diff --git a/agent/agentexec/cli_linux.go b/agent/agentexec/cli_linux.go new file mode 100644 index 0000000000000..4081882712a40 --- /dev/null +++ b/agent/agentexec/cli_linux.go @@ -0,0 +1,145 @@ +//go:build linux +// +build linux + +package agentexec + +import ( + "flag" + "fmt" + "os" + "os/exec" + "runtime" + "strconv" + "strings" + "syscall" + + "golang.org/x/sys/unix" + "golang.org/x/xerrors" +) + +// unset is set to an invalid value for nice and oom scores. +const unset = -2000 + +// CLI runs the agent-exec command. It should only be called by the cli package. +func CLI() error { + // We lock the OS thread here to avoid a race condition where the nice priority + // we get is on a different thread from the one we set it on. + runtime.LockOSThread() + // Nop on success but we do it anyway in case of an error. + defer runtime.UnlockOSThread() + + var ( + fs = flag.NewFlagSet("agent-exec", flag.ExitOnError) + nice = fs.Int("coder-nice", unset, "") + oom = fs.Int("coder-oom", unset, "") + ) + + if len(os.Args) < 3 { + return xerrors.Errorf("malformed command %+v", os.Args) + } + + // Parse everything after "coder agent-exec". + err := fs.Parse(os.Args[2:]) + if err != nil { + return xerrors.Errorf("parse flags: %w", err) + } + + // Get everything after "coder agent-exec --" + args := execArgs(os.Args) + if len(args) == 0 { + return xerrors.Errorf("no exec command provided %+v", os.Args) + } + + if *nice == unset { + // If an explicit nice score isn't set, we use the default. + *nice, err = defaultNiceScore() + if err != nil { + return xerrors.Errorf("get default nice score: %w", err) + } + } + + if *oom == unset { + // If an explicit oom score isn't set, we use the default. + *oom, err = defaultOOMScore() + if err != nil { + return xerrors.Errorf("get default oom score: %w", err) + } + } + + err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice) + if err != nil { + return xerrors.Errorf("set nice score: %w", err) + } + + err = writeOOMScoreAdj(*oom) + if err != nil { + return xerrors.Errorf("set oom score: %w", err) + } + + path, err := exec.LookPath(args[0]) + if err != nil { + return xerrors.Errorf("look path: %w", err) + } + + return syscall.Exec(path, args, os.Environ()) +} + +func defaultNiceScore() (int, error) { + score, err := unix.Getpriority(unix.PRIO_PROCESS, 0) + if err != nil { + return 0, xerrors.Errorf("get nice score: %w", err) + } + // See https://linux.die.net/man/2/setpriority#Notes + score = 20 - score + + score += 5 + if score > 19 { + return 19, nil + } + return score, nil +} + +func defaultOOMScore() (int, error) { + score, err := oomScoreAdj() + if err != nil { + return 0, xerrors.Errorf("get oom score: %w", err) + } + + // If the agent has a negative oom_score_adj, we set the child to 0 + // so it's treated like every other process. + if score < 0 { + return 0, nil + } + + // If the agent is already almost at the maximum then set it to the max. + if score >= 998 { + return 1000, nil + } + + // If the agent oom_score_adj is >=0, we set the child to slightly + // less than the maximum. If users want a different score they set it + // directly. + return 998, nil +} + +func oomScoreAdj() (int, error) { + scoreStr, err := os.ReadFile("/proc/self/oom_score_adj") + if err != nil { + return 0, xerrors.Errorf("read oom_score_adj: %w", err) + } + return strconv.Atoi(strings.TrimSpace(string(scoreStr))) +} + +func writeOOMScoreAdj(score int) error { + return os.WriteFile("/proc/self/oom_score_adj", []byte(fmt.Sprintf("%d", score)), 0o600) +} + +// execArgs returns the arguments to pass to syscall.Exec after the "--" delimiter. +func execArgs(args []string) []string { + for i, arg := range args { + if arg == "--" { + return args[i+1:] + } + } + return nil +} diff --git a/agent/agentexec/cli_linux_test.go b/agent/agentexec/cli_linux_test.go new file mode 100644 index 0000000000000..6a5345971616d --- /dev/null +++ b/agent/agentexec/cli_linux_test.go @@ -0,0 +1,178 @@ +//go:build linux +// +build linux + +package agentexec_test + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "syscall" + "testing" + "time" + + "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" + + "github.com/coder/coder/v2/testutil" +) + +func TestCLI(t *testing.T) { + t.Parallel() + + t.Run("OK", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitMedium) + cmd, path := cmd(ctx, t, 123, 12) + err := cmd.Start() + require.NoError(t, err) + go cmd.Wait() + + waitForSentinel(ctx, t, cmd, path) + requireOOMScore(t, cmd.Process.Pid, 123) + requireNiceScore(t, cmd.Process.Pid, 12) + }) + + t.Run("Defaults", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitMedium) + cmd, path := cmd(ctx, t, 0, 0) + err := cmd.Start() + require.NoError(t, err) + go cmd.Wait() + + waitForSentinel(ctx, t, cmd, path) + + expectedNice := expectedNiceScore(t) + expectedOOM := expectedOOMScore(t) + requireOOMScore(t, cmd.Process.Pid, expectedOOM) + requireNiceScore(t, cmd.Process.Pid, expectedNice) + }) +} + +func requireNiceScore(t *testing.T, pid int, score int) { + t.Helper() + + nice, err := unix.Getpriority(unix.PRIO_PROCESS, pid) + require.NoError(t, err) + // See https://linux.die.net/man/2/setpriority#Notes + require.Equal(t, score, 20-nice) +} + +func requireOOMScore(t *testing.T, pid int, expected int) { + t.Helper() + + actual, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid)) + require.NoError(t, err) + score := strings.TrimSpace(string(actual)) + require.Equal(t, strconv.Itoa(expected), score) +} + +func waitForSentinel(ctx context.Context, t *testing.T, cmd *exec.Cmd, path string) { + t.Helper() + + ticker := time.NewTicker(testutil.IntervalFast) + defer ticker.Stop() + + // RequireEventually doesn't work well with require.NoError or similar require functions. + for { + err := cmd.Process.Signal(syscall.Signal(0)) + require.NoError(t, err) + + _, err = os.Stat(path) + if err == nil { + return + } + + select { + case <-ticker.C: + case <-ctx.Done(): + require.NoError(t, ctx.Err()) + } + } +} + +func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) { + var ( + args = execArgs(oom, nice) + dir = t.TempDir() + file = filepath.Join(dir, "sentinel") + ) + + args = append(args, "sh", "-c", fmt.Sprintf("touch %s && sleep 10m", file)) + //nolint:gosec + cmd := exec.CommandContext(ctx, TestBin, args...) + + // We set this so we can also easily kill the sleep process the shell spawns. + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + cmd.Env = os.Environ() + var buf bytes.Buffer + cmd.Stdout = &buf + cmd.Stderr = &buf + t.Cleanup(func() { + // Print output of a command if the test fails. + if t.Failed() { + t.Logf("cmd %q output: %s", cmd.Args, buf.String()) + } + if cmd.Process != nil { + // We use -cmd.Process.Pid to kill the whole process group. + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) + } + }) + return cmd, file +} + +func expectedOOMScore(t *testing.T) int { + t.Helper() + + score, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", os.Getpid())) + require.NoError(t, err) + + scoreInt, err := strconv.Atoi(strings.TrimSpace(string(score))) + require.NoError(t, err) + + if scoreInt < 0 { + return 0 + } + if scoreInt >= 998 { + return 1000 + } + return 998 +} + +func expectedNiceScore(t *testing.T) int { + t.Helper() + + score, err := unix.Getpriority(unix.PRIO_PROCESS, os.Getpid()) + require.NoError(t, err) + + // Priority is niceness + 20. + score = 20 - score + score += 5 + if score > 19 { + return 19 + } + return score +} + +func execArgs(oom int, nice int) []string { + execArgs := []string{"agent-exec"} + if oom != 0 { + execArgs = append(execArgs, fmt.Sprintf("--coder-oom=%d", oom)) + } + if nice != 0 { + execArgs = append(execArgs, fmt.Sprintf("--coder-nice=%d", nice)) + } + execArgs = append(execArgs, "--") + return execArgs +} diff --git a/agent/agentexec/cli_other.go b/agent/agentexec/cli_other.go new file mode 100644 index 0000000000000..67fe7d1eede2b --- /dev/null +++ b/agent/agentexec/cli_other.go @@ -0,0 +1,10 @@ +//go:build !linux +// +build !linux + +package agentexec + +import "golang.org/x/xerrors" + +func CLI() error { + return xerrors.New("agent-exec is only supported on Linux") +} diff --git a/agent/agentexec/cmdtest/main_linux.go b/agent/agentexec/cmdtest/main_linux.go new file mode 100644 index 0000000000000..8cd48f0b21812 --- /dev/null +++ b/agent/agentexec/cmdtest/main_linux.go @@ -0,0 +1,19 @@ +//go:build linux +// +build linux + +package main + +import ( + "fmt" + "os" + + "github.com/coder/coder/v2/agent/agentexec" +) + +func main() { + err := agentexec.CLI() + if err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/agent/agentexec/exec.go b/agent/agentexec/exec.go new file mode 100644 index 0000000000000..253671aeebe86 --- /dev/null +++ b/agent/agentexec/exec.go @@ -0,0 +1,86 @@ +package agentexec + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strconv" + + "golang.org/x/xerrors" +) + +const ( + // EnvProcPrioMgmt is the environment variable that determines whether + // we attempt to manage process CPU and OOM Killer priority. + EnvProcPrioMgmt = "CODER_PROC_PRIO_MGMT" + EnvProcOOMScore = "CODER_PROC_OOM_SCORE" + EnvProcNiceScore = "CODER_PROC_NICE_SCORE" +) + +// CommandContext returns an exec.Cmd that calls "coder agent-exec" prior to exec'ing +// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal exec.Cmd +// is returned. All instances of exec.Cmd should flow through this function to ensure +// proper resource constraints are applied to the child process. +func CommandContext(ctx context.Context, cmd string, args ...string) (*exec.Cmd, error) { + _, enabled := os.LookupEnv(EnvProcPrioMgmt) + if runtime.GOOS != "linux" || !enabled { + return exec.CommandContext(ctx, cmd, args...), nil + } + + executable, err := os.Executable() + if err != nil { + return nil, xerrors.Errorf("get executable: %w", err) + } + + bin, err := filepath.EvalSymlinks(executable) + if err != nil { + return nil, xerrors.Errorf("eval symlinks: %w", err) + } + + execArgs := []string{"agent-exec"} + if score, ok := envValInt(EnvProcOOMScore); ok { + execArgs = append(execArgs, oomScoreArg(score)) + } + + if score, ok := envValInt(EnvProcNiceScore); ok { + execArgs = append(execArgs, niceScoreArg(score)) + } + execArgs = append(execArgs, "--", cmd) + execArgs = append(execArgs, args...) + + return exec.CommandContext(ctx, bin, execArgs...), nil +} + +// envValInt searches for a key in a list of environment variables and parses it to an int. +// If the key is not found or cannot be parsed, returns 0 and false. +func envValInt(key string) (int, bool) { + val, ok := os.LookupEnv(key) + if !ok { + return 0, false + } + + i, err := strconv.Atoi(val) + if err != nil { + return 0, false + } + return i, true +} + +// The following are flags used by the agent-exec command. We use flags instead of +// environment variables to avoid having to deal with a caller overriding the +// environment variables. +const ( + niceFlag = "coder-nice" + oomFlag = "coder-oom" +) + +func niceScoreArg(score int) string { + return fmt.Sprintf("--%s=%d", niceFlag, score) +} + +func oomScoreArg(score int) string { + return fmt.Sprintf("--%s=%d", oomFlag, score) +} diff --git a/agent/agentexec/exec_test.go b/agent/agentexec/exec_test.go new file mode 100644 index 0000000000000..26fcde259eea4 --- /dev/null +++ b/agent/agentexec/exec_test.go @@ -0,0 +1,119 @@ +package agentexec_test + +import ( + "context" + "os" + "os/exec" + "runtime" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/coder/coder/v2/agent/agentexec" +) + +//nolint:paralleltest // we need to test environment variables +func TestExec(t *testing.T) { + //nolint:paralleltest // we need to test environment variables + t.Run("NonLinux", func(t *testing.T) { + t.Setenv(agentexec.EnvProcPrioMgmt, "true") + + if runtime.GOOS == "linux" { + t.Skip("skipping on linux") + } + + cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep") + require.NoError(t, err) + + path, err := exec.LookPath("sh") + require.NoError(t, err) + require.Equal(t, path, cmd.Path) + require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args) + }) + + //nolint:paralleltest // we need to test environment variables + t.Run("Linux", func(t *testing.T) { + //nolint:paralleltest // we need to test environment variables + t.Run("Disabled", func(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("skipping on linux") + } + + cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep") + require.NoError(t, err) + path, err := exec.LookPath("sh") + require.NoError(t, err) + require.Equal(t, path, cmd.Path) + require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args) + }) + + //nolint:paralleltest // we need to test environment variables + t.Run("Enabled", func(t *testing.T) { + t.Setenv(agentexec.EnvProcPrioMgmt, "hello") + + if runtime.GOOS != "linux" { + t.Skip("skipping on linux") + } + + executable, err := os.Executable() + require.NoError(t, err) + + cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep") + require.NoError(t, err) + require.Equal(t, executable, cmd.Path) + require.Equal(t, []string{executable, "agent-exec", "--", "sh", "-c", "sleep"}, cmd.Args) + }) + + t.Run("Nice", func(t *testing.T) { + t.Setenv(agentexec.EnvProcPrioMgmt, "hello") + t.Setenv(agentexec.EnvProcNiceScore, "10") + + if runtime.GOOS != "linux" { + t.Skip("skipping on linux") + } + + executable, err := os.Executable() + require.NoError(t, err) + + cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep") + require.NoError(t, err) + require.Equal(t, executable, cmd.Path) + require.Equal(t, []string{executable, "agent-exec", "--coder-nice=10", "--", "sh", "-c", "sleep"}, cmd.Args) + }) + + t.Run("OOM", func(t *testing.T) { + t.Setenv(agentexec.EnvProcPrioMgmt, "hello") + t.Setenv(agentexec.EnvProcOOMScore, "123") + + if runtime.GOOS != "linux" { + t.Skip("skipping on linux") + } + + executable, err := os.Executable() + require.NoError(t, err) + + cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep") + require.NoError(t, err) + require.Equal(t, executable, cmd.Path) + require.Equal(t, []string{executable, "agent-exec", "--coder-oom=123", "--", "sh", "-c", "sleep"}, cmd.Args) + }) + + t.Run("Both", func(t *testing.T) { + t.Setenv(agentexec.EnvProcPrioMgmt, "hello") + t.Setenv(agentexec.EnvProcOOMScore, "432") + t.Setenv(agentexec.EnvProcNiceScore, "14") + + if runtime.GOOS != "linux" { + t.Skip("skipping on linux") + } + + executable, err := os.Executable() + require.NoError(t, err) + + cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep") + require.NoError(t, err) + require.Equal(t, executable, cmd.Path) + require.Equal(t, []string{executable, "agent-exec", "--coder-oom=432", "--coder-nice=14", "--", "sh", "-c", "sleep"}, cmd.Args) + }) + }) +} diff --git a/agent/agentexec/main_linux_test.go b/agent/agentexec/main_linux_test.go new file mode 100644 index 0000000000000..8b5df84d60372 --- /dev/null +++ b/agent/agentexec/main_linux_test.go @@ -0,0 +1,46 @@ +//go:build linux +// +build linux + +package agentexec_test + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "testing" +) + +var TestBin string + +func TestMain(m *testing.M) { + code := func() int { + // We generate a unique directory per test invocation to avoid collisions between two + // processes attempting to create the same temp file. + dir := genDir() + defer os.RemoveAll(dir) + TestBin = buildBinary(dir) + return m.Run() + }() + + os.Exit(code) +} + +func buildBinary(dir string) string { + path := filepath.Join(dir, "agent-test") + out, err := exec.Command("go", "build", "-o", path, "./cmdtest").CombinedOutput() + mustf(err, "build binary: %s", out) + return path +} + +func mustf(err error, msg string, args ...any) { + if err != nil { + panic(fmt.Sprintf(msg, args...)) + } +} + +func genDir() string { + dir, err := os.MkdirTemp(os.TempDir(), "agentexec") + mustf(err, "create temp dir: %v", err) + return dir +}