Skip to content

feat: implement agent process management #9461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: implement agent process management
- An opt-in feature has been added to the agent to allow
  deprioritizing non coder-related processes for both CPU
  and memory. Non coder processes have their niceness set to 10
  and their oom_score_adj set to 100
  • Loading branch information
sreya committed Sep 8, 2023
commit 4ed40699cfd4eb2b18d534d1e3d49f61237a6c4a
85 changes: 85 additions & 0 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"os/exec"
"os/user"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand All @@ -34,6 +35,7 @@ import (
"tailscale.com/types/netlogtype"

"cdr.dev/slog"
"github.com/coder/coder/v2/agent/agentproc"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/reconnectingpty"
"github.com/coder/coder/v2/buildinfo"
Expand All @@ -51,6 +53,8 @@ const (
ProtocolDial = "dial"
)

const EnvProcMemNice = "CODER_PROC_MEMNICE_ENABLE"

type Options struct {
Filesystem afero.Fs
LogDir string
Expand All @@ -68,6 +72,7 @@ type Options struct {
PrometheusRegistry *prometheus.Registry
ReportMetadataInterval time.Duration
ServiceBannerRefreshInterval time.Duration
Syscaller agentproc.Syscaller
}

type Client interface {
Expand Down Expand Up @@ -197,6 +202,7 @@ type agent struct {

prometheusRegistry *prometheus.Registry
metrics *agentMetrics
syscaller agentproc.Syscaller
}

func (a *agent) TailnetConn() *tailnet.Conn {
Expand Down Expand Up @@ -225,6 +231,7 @@ func (a *agent) runLoop(ctx context.Context) {
go a.reportLifecycleLoop(ctx)
go a.reportMetadataLoop(ctx)
go a.fetchServiceBannerLoop(ctx)
go a.manageProcessPriorityLoop(ctx)

for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
a.logger.Info(ctx, "connecting to coderd")
Expand Down Expand Up @@ -1253,6 +1260,84 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
}
}

var exemptProcesses = []string{"coder"}

func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
ticker := time.NewTicker(time.Minute)
defer ticker.Stop()

const (
procDir = agentproc.DefaultProcDir
niceness = 10
oomScoreAdj = -1000
)

if val := a.envVars[EnvProcMemNice]; val == "" || runtime.GOOS != "linux" {
a.logger.Info(ctx, "process priority not enabled, agent will not manage process niceness/oom_score_adj ",
slog.F("env_var", EnvProcMemNice),
slog.F("value", val),
slog.F("goos", runtime.GOOS),
)
return
}

for {
select {
case <-ticker.C:
procs, err := agentproc.List(a.filesystem, agentproc.DefaultProcDir)
if err != nil {
a.logger.Error(ctx, "failed to list procs",
slog.F("dir", agentproc.DefaultProcDir),
slog.Error(err),
)
continue
}
for _, proc := range procs {
// Trim off the path e.g. "./coder" -> "coder"
name := filepath.Base(proc.Name())
if slices.Contains(exemptProcesses, name) {
a.logger.Debug(ctx, "skipping exempt process",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
)
continue
}

err := proc.SetNiceness(a.syscaller, niceness)
if err != nil {
a.logger.Error(ctx, "unable to set proc niceness",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("niceness", niceness),
slog.Error(err),
)
continue
}

err = proc.SetOOMAdj(oomScoreAdj)
if err != nil {
a.logger.Error(ctx, "unable to set proc oom_score_adj",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("oom_score_adj", oomScoreAdj),
slog.Error(err),
)
continue
}

a.logger.Debug(ctx, "deprioritized process",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("niceness", niceness),
slog.F("oom_score_adj", oomScoreAdj),
)
}
case <-ctx.Done():
return
}
}
}

// isClosed returns whether the API is closed or not.
func (a *agent) isClosed() bool {
select {
Expand Down
3 changes: 3 additions & 0 deletions agent/agentproc/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package agentproc contains logic for interfacing with local
// processes running in the same context as the agent.
package agentproc
99 changes: 99 additions & 0 deletions agent/agentproc/proc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package agentproc

import (
"path/filepath"
"strconv"
"strings"
"syscall"

"github.com/spf13/afero"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
)

const DefaultProcDir = "/proc"

type Syscaller interface {
SetPriority(pid int32, priority int) error
}

type UnixSyscaller struct{}

func (UnixSyscaller) SetPriority(pid int32, nice int) error {
err := unix.Setpriority(unix.PRIO_PROCESS, int(pid), nice)
if err != nil {
return xerrors.Errorf("set priority: %w", err)
}
return nil
}

type Process struct {
Dir string
CmdLine string
PID int32
fs afero.Fs
}

func (p *Process) SetOOMAdj(score int) error {
path := filepath.Join(p.Dir, "oom_score_adj")
err := afero.WriteFile(p.fs,
path,
[]byte(strconv.Itoa(score)),
0644,
)
if err != nil {
return xerrors.Errorf("write %q: %w", path, err)
}

return nil
}

func (p *Process) SetNiceness(sc Syscaller, score int) error {
err := sc.SetPriority(p.PID, score)
if err != nil {
return xerrors.Errorf("set priority for %q: %w", p.CmdLine, err)
}
return nil
}

func (p *Process) Name() string {
args := strings.Split(p.CmdLine, "\x00")
// Split will always return at least one element.
return args[0]
}

func List(fs afero.Fs, dir string) ([]*Process, error) {
d, err := fs.Open(dir)
if err != nil {
return nil, xerrors.Errorf("open dir %q: %w", dir, err)
}

entries, err := d.Readdirnames(0)
if err != nil {
return nil, xerrors.Errorf("readdirnames: %w", err)
}

processes := make([]*Process, 0, len(entries))
for _, entry := range entries {
pid, err := strconv.ParseInt(entry, 10, 32)
if err != nil {
continue
}
cmdline, err := afero.ReadFile(fs, filepath.Join(dir, entry, "cmdline"))
if err != nil {
var errNo syscall.Errno
if xerrors.As(err, &errNo) && errNo == syscall.EPERM {
continue
}
return nil, xerrors.Errorf("read cmdline: %w", err)
}
processes = append(processes, &Process{
PID: int32(pid),
CmdLine: string(cmdline),
Dir: filepath.Join(dir, entry),
fs: fs,
})
}

return processes, nil
}
12 changes: 12 additions & 0 deletions agent/agentproc/proc_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package agentproc_test

type mockSyscaller struct {
SetPriorityFn func(int32, int) error
}

func (f mockSyscaller) SetPriority(pid int32, nice int) error {
if f.SetPriorityFn == nil {
return nil
}
return f.SetPriorityFn(pid, nice)
}