Skip to content

feat(cli): add coder stat command #8005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 48 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
5db9006
add stat command
johnstcn May 29, 2023
d6029b4
cpu working on mac
johnstcn Jun 7, 2023
18f4942
add stat memory
johnstcn Jun 7, 2023
251fdda
support values with no total
johnstcn Jun 7, 2023
4c081dc
move clistats to its own package
johnstcn Jun 8, 2023
2ba7392
fix container detection to work with sysbox containers
johnstcn Jun 8, 2023
0e1c96a
add cross-platform declaration for IsContainerized()
johnstcn Jun 8, 2023
0f9859e
add a sync.Once to IsContainerized()
johnstcn Jun 8, 2023
a220c7f
make uptime minutes
johnstcn Jun 8, 2023
89f7e8d
lint
johnstcn Jun 8, 2023
c51e245
extract nproc to variable
johnstcn Jun 8, 2023
3528c00
add skeleton of cgroup stuff
johnstcn Jun 8, 2023
7108c6e
initial cgroupv2 cpu implementation
johnstcn Jun 8, 2023
4ef5f24
fix disk_windows
johnstcn Jun 8, 2023
f0f7b6a
add tests for clistats
johnstcn Jun 8, 2023
6a878b9
improve testing
johnstcn Jun 9, 2023
be7ba72
remove unnecessary os-specific implementations now that we have abstr…
johnstcn Jun 12, 2023
3643407
remove uptime stat as it is trivial to implement in bash
johnstcn Jun 12, 2023
1c8943e
implement cgroupv1 cpu
johnstcn Jun 12, 2023
95b8d1f
unskip container memory tests
johnstcn Jun 12, 2023
495b5b0
flesh out tests
johnstcn Jun 13, 2023
fa0c4c6
cgroupv1 memory
johnstcn Jun 13, 2023
70ef79b
improve tests to allow testing cpu used
johnstcn Jun 13, 2023
7eeefc1
refactor cpu usage calc
johnstcn Jun 13, 2023
305675f
fix tests
johnstcn Jun 13, 2023
d1bb322
fix off-by-10 error
johnstcn Jun 13, 2023
eb2bcf6
remove --sample-interval and collect CPU stats in parallel
johnstcn Jun 13, 2023
44edcf3
fmt; gen
johnstcn Jun 13, 2023
0f3254a
make default_cols consistent to avoid ci surprises
johnstcn Jun 13, 2023
edd99f4
fix race condition
johnstcn Jun 13, 2023
49b6861
remove UPTIME from test
johnstcn Jun 13, 2023
69b1904
update golden files
johnstcn Jun 13, 2023
7eb526d
add stat subcommands
johnstcn Jun 14, 2023
665bf7f
allow modifying unit prefixes
johnstcn Jun 14, 2023
6b11a5c
update docs and examples
johnstcn Jun 14, 2023
c1467f0
fix NaN issue for HostCPU
johnstcn Jun 14, 2023
789c6de
avoid blocking on err chan
johnstcn Jun 14, 2023
482db10
add percentages
johnstcn Jun 15, 2023
0775082
remove outdated comments
johnstcn Jun 15, 2023
73debf8
handle counter reset
johnstcn Jun 15, 2023
d0c992a
add test for large difference between used and total
johnstcn Jun 15, 2023
ef7460a
auto-scale precision, limiting to 3 digits
johnstcn Jun 15, 2023
bec527f
automatically scale precision, remove --prefix arg
johnstcn Jun 15, 2023
08adba7
make gen
johnstcn Jun 15, 2023
78f76e7
improve cli tests
johnstcn Jun 15, 2023
9a82882
update go.mod
johnstcn Jun 15, 2023
19c8a80
Merge remote-tracking branch 'origin/main' into cj/coder-stat
johnstcn Jun 15, 2023
eab2530
update go.sum
johnstcn Jun 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
309 changes: 309 additions & 0 deletions cli/clistat/cgroup.go
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy that you split a lot of this logic into its own package.

Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
package clistat

import (
"bufio"
"bytes"
"strconv"
"strings"

"github.com/spf13/afero"
"golang.org/x/xerrors"
"tailscale.com/types/ptr"
)

// Paths for CGroupV1.
// Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
const (
// CPU usage of all tasks in cgroup in nanoseconds.
cgroupV1CPUAcctUsage = "/sys/fs/cgroup/cpu/cpuacct.usage"
// Alternate path
cgroupV1CPUAcctUsageAlt = "/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage"
// CFS quota and period for cgroup in MICROseconds
cgroupV1CFSQuotaUs = "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us"
cgroupV1CFSPeriodUs = "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"
// Maximum memory usable by cgroup in bytes
cgroupV1MemoryMaxUsageBytes = "/sys/fs/cgroup/memory/memory.max_usage_in_bytes"
// Current memory usage of cgroup in bytes
cgroupV1MemoryUsageBytes = "/sys/fs/cgroup/memory/memory.usage_in_bytes"
// Other memory stats - we are interested in total_inactive_file
cgroupV1MemoryStat = "/sys/fs/cgroup/memory/memory.stat"
)

// Paths for CGroupV2.
// Ref: https://docs.kernel.org/admin-guide/cgroup-v2.html
const (
// Contains quota and period in microseconds separated by a space.
cgroupV2CPUMax = "/sys/fs/cgroup/cpu.max"
// Contains current CPU usage under usage_usec
cgroupV2CPUStat = "/sys/fs/cgroup/cpu.stat"
// Contains current cgroup memory usage in bytes.
cgroupV2MemoryUsageBytes = "/sys/fs/cgroup/memory.current"
// Contains max cgroup memory usage in bytes.
cgroupV2MemoryMaxBytes = "/sys/fs/cgroup/memory.max"
// Other memory stats - we are interested in total_inactive_file
cgroupV2MemoryStat = "/sys/fs/cgroup/memory.stat"
)

// ContainerCPU returns the CPU usage of the container cgroup.
// This is calculated as difference of two samples of the
// CPU usage of the container cgroup.
// The total is read from the relevant path in /sys/fs/cgroup.
// If there is no limit set, the total is assumed to be the
// number of host cores multiplied by the CFS period.
// If the system is not containerized, this always returns nil.
func (s *Statter) ContainerCPU(m Prefix) (*Result, error) {
// Firstly, check if we are containerized.
if ok, err := IsContainerized(s.fs); err != nil || !ok {
return nil, nil //nolint: nilnil
}

total, err := s.cGroupCPUTotal()
if err != nil {
return nil, xerrors.Errorf("get total cpu: %w", err)
}

used1, err := s.cGroupCPUUsed()
if err != nil {
return nil, xerrors.Errorf("get cgroup CPU usage: %w", err)
}

// The measurements in /sys/fs/cgroup are counters.
// We need to wait for a bit to get a difference.
// Note that someone could reset the counter in the meantime.
// We can't do anything about that.
s.wait(s.sampleInterval)

used2, err := s.cGroupCPUUsed()
if err != nil {
return nil, xerrors.Errorf("get cgroup CPU usage: %w", err)
}

r := &Result{
Unit: "cores",
Prefix: m,
Used: (used2 - used1),
Total: ptr.To(total),
}
return r, nil
}

func (s *Statter) cGroupCPUTotal() (used float64, err error) {
if s.isCGroupV2() {
return s.cGroupV2CPUTotal()
}

// Fall back to CGroupv1
return s.cGroupV1CPUTotal()
}

func (s *Statter) cGroupCPUUsed() (used float64, err error) {
if s.isCGroupV2() {
return s.cGroupV2CPUUsed()
}

return s.cGroupV1CPUUsed()
}

func (s *Statter) isCGroupV2() bool {
// Check for the presence of /sys/fs/cgroup/cpu.max
_, err := s.fs.Stat(cgroupV2CPUMax)
return err == nil
}

func (s *Statter) cGroupV2CPUUsed() (used float64, err error) {
usageUs, err := readInt64Prefix(s.fs, cgroupV2CPUStat, "usage_usec")
if err != nil {
return 0, xerrors.Errorf("get cgroupv2 cpu used: %w", err)
}
periodUs, err := readInt64SepIdx(s.fs, cgroupV2CPUMax, " ", 1)
if err != nil {
return 0, xerrors.Errorf("get cpu period: %w", err)
}

return float64(usageUs) / float64(periodUs), nil
}

func (s *Statter) cGroupV2CPUTotal() (total float64, err error) {
var quotaUs, periodUs int64
periodUs, err = readInt64SepIdx(s.fs, cgroupV2CPUMax, " ", 1)
if err != nil {
return 0, xerrors.Errorf("get cpu period: %w", err)
}

quotaUs, err = readInt64SepIdx(s.fs, cgroupV2CPUMax, " ", 0)
if err != nil {
// Fall back to number of cores
quotaUs = int64(s.nproc) * periodUs
}

return float64(quotaUs) / float64(periodUs), nil
}

func (s *Statter) cGroupV1CPUTotal() (float64, error) {
periodUs, err := readInt64(s.fs, cgroupV1CFSPeriodUs)
if err != nil {
return 0, xerrors.Errorf("read cpu period: %w", err)
}

quotaUs, err := readInt64(s.fs, cgroupV1CFSQuotaUs)
if err != nil {
return 0, xerrors.Errorf("read cpu quota: %w", err)
}

if quotaUs < 0 {
// Fall back to the number of cores
quotaUs = int64(s.nproc) * periodUs
}

return float64(quotaUs) / float64(periodUs), nil
}

func (s *Statter) cGroupV1CPUUsed() (float64, error) {
usageNs, err := readInt64(s.fs, cgroupV1CPUAcctUsage)
if err != nil {
// try alternate path
usageNs, err = readInt64(s.fs, cgroupV1CPUAcctUsageAlt)
if err != nil {
return 0, xerrors.Errorf("read cpu used: %w", err)
}
}

// usage is in ns, convert to us
usageNs /= 1000
periodUs, err := readInt64(s.fs, cgroupV1CFSPeriodUs)
if err != nil {
return 0, xerrors.Errorf("get cpu period: %w", err)
}

return float64(usageNs) / float64(periodUs), nil
}

// ContainerMemory returns the memory usage of the container cgroup.
// If the system is not containerized, this always returns nil.
func (s *Statter) ContainerMemory(m Prefix) (*Result, error) {
if ok, err := IsContainerized(s.fs); err != nil || !ok {
return nil, nil //nolint:nilnil
}

if s.isCGroupV2() {
return s.cGroupV2Memory(m)
}

// Fall back to CGroupv1
return s.cGroupV1Memory(m)
}

func (s *Statter) cGroupV2Memory(m Prefix) (*Result, error) {
maxUsageBytes, err := readInt64(s.fs, cgroupV2MemoryMaxBytes)
if err != nil {
return nil, xerrors.Errorf("read memory total: %w", err)
}

currUsageBytes, err := readInt64(s.fs, cgroupV2MemoryUsageBytes)
if err != nil {
return nil, xerrors.Errorf("read memory usage: %w", err)
}

inactiveFileBytes, err := readInt64Prefix(s.fs, cgroupV2MemoryStat, "inactive_file")
if err != nil {
return nil, xerrors.Errorf("read memory stats: %w", err)
}

return &Result{
Total: ptr.To(float64(maxUsageBytes)),
Used: float64(currUsageBytes - inactiveFileBytes),
Unit: "B",
Prefix: m,
}, nil
}

func (s *Statter) cGroupV1Memory(m Prefix) (*Result, error) {
maxUsageBytes, err := readInt64(s.fs, cgroupV1MemoryMaxUsageBytes)
if err != nil {
return nil, xerrors.Errorf("read memory total: %w", err)
}

// need a space after total_rss so we don't hit something else
usageBytes, err := readInt64(s.fs, cgroupV1MemoryUsageBytes)
if err != nil {
return nil, xerrors.Errorf("read memory usage: %w", err)
}

totalInactiveFileBytes, err := readInt64Prefix(s.fs, cgroupV1MemoryStat, "total_inactive_file")
if err != nil {
return nil, xerrors.Errorf("read memory stats: %w", err)
}

// Total memory used is usage - total_inactive_file
return &Result{
Total: ptr.To(float64(maxUsageBytes)),
Used: float64(usageBytes - totalInactiveFileBytes),
Unit: "B",
Prefix: m,
}, nil
}

// read an int64 value from path
func readInt64(fs afero.Fs, path string) (int64, error) {
data, err := afero.ReadFile(fs, path)
if err != nil {
return 0, xerrors.Errorf("read %s: %w", path, err)
}

val, err := strconv.ParseInt(string(bytes.TrimSpace(data)), 10, 64)
if err != nil {
return 0, xerrors.Errorf("parse %s: %w", path, err)
}

return val, nil
}

// read an int64 value from path at field idx separated by sep
func readInt64SepIdx(fs afero.Fs, path, sep string, idx int) (int64, error) {
data, err := afero.ReadFile(fs, path)
if err != nil {
return 0, xerrors.Errorf("read %s: %w", path, err)
}

parts := strings.Split(string(data), sep)
if len(parts) < idx {
return 0, xerrors.Errorf("expected line %q to have at least %d parts", string(data), idx+1)
}

val, err := strconv.ParseInt(strings.TrimSpace(parts[idx]), 10, 64)
if err != nil {
return 0, xerrors.Errorf("parse %s: %w", path, err)
}

return val, nil
}

// read the first int64 value from path prefixed with prefix
func readInt64Prefix(fs afero.Fs, path, prefix string) (int64, error) {
data, err := afero.ReadFile(fs, path)
if err != nil {
return 0, xerrors.Errorf("read %s: %w", path, err)
}

scn := bufio.NewScanner(bytes.NewReader(data))
for scn.Scan() {
line := scn.Text()
if !strings.HasPrefix(line, prefix) {
continue
}

parts := strings.Fields(line)
if len(parts) != 2 {
return 0, xerrors.Errorf("parse %s: expected two fields but got %s", path, line)
}

val, err := strconv.ParseInt(strings.TrimSpace(parts[1]), 10, 64)
if err != nil {
return 0, xerrors.Errorf("parse %s: %w", path, err)
}

return val, nil
}

return 0, xerrors.Errorf("parse %s: did not find line with prefix %s", path, prefix)
}
61 changes: 61 additions & 0 deletions cli/clistat/container.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package clistat

import (
"bufio"
"bytes"
"os"

"github.com/spf13/afero"
"golang.org/x/xerrors"
)

const (
procMounts = "/proc/mounts"
procOneCgroup = "/proc/1/cgroup"
)

// IsContainerized returns whether the host is containerized.
// This is adapted from https://github.com/elastic/go-sysinfo/tree/main/providers/linux/container.go#L31
// with modifications to support Sysbox containers.
// On non-Linux platforms, it always returns false.
func IsContainerized(fs afero.Fs) (ok bool, err error) {
cgData, err := afero.ReadFile(fs, procOneCgroup)
if err != nil {
if os.IsNotExist(err) {
return false, nil // how?
}
return false, xerrors.Errorf("read file %s: %w", procOneCgroup, err)
}

scn := bufio.NewScanner(bytes.NewReader(cgData))
for scn.Scan() {
line := scn.Bytes()
if bytes.Contains(line, []byte("docker")) ||
bytes.Contains(line, []byte(".slice")) ||
bytes.Contains(line, []byte("lxc")) ||
bytes.Contains(line, []byte("kubepods")) {
return true, nil
}
}

// Last-ditch effort to detect Sysbox containers.
// Check if we have anything mounted as type sysboxfs in /proc/mounts
mountsData, err := afero.ReadFile(fs, procMounts)
if err != nil {
if os.IsNotExist(err) {
return false, nil // how??
}
return false, xerrors.Errorf("read file %s: %w", procMounts, err)
}

scn = bufio.NewScanner(bytes.NewReader(mountsData))
for scn.Scan() {
line := scn.Bytes()
if bytes.Contains(line, []byte("sysboxfs")) {
return true, nil
}
}

// If we get here, we are _probably_ not running in a container.
return false, nil
}
27 changes: 27 additions & 0 deletions cli/clistat/disk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//go:build !windows

package clistat

import (
"syscall"

"tailscale.com/types/ptr"
)

// Disk returns the disk usage of the given path.
// If path is empty, it returns the usage of the root directory.
func (*Statter) Disk(path string, m Prefix) (*Result, error) {
if path == "" {
path = "/"
}
var stat syscall.Statfs_t
if err := syscall.Statfs(path, &stat); err != nil {
return nil, err
}
var r Result
r.Total = ptr.To(float64(stat.Blocks * uint64(stat.Bsize)))
r.Used = float64(stat.Blocks-stat.Bfree) * float64(stat.Bsize)
r.Unit = "B"
r.Prefix = m
return &r, nil
}
Loading