Skip to content

Commit 1b5f0c6

Browse files
committed
feat: implement agent process management
- An opt-in feature has been added to the agent to allow deprioritizing non coder-related processes for both CPU and memory. Non coder processes have their niceness set to 10 and their oom_score_adj set to 100
1 parent ee24260 commit 1b5f0c6

File tree

4 files changed

+199
-0
lines changed

4 files changed

+199
-0
lines changed

agent/agent.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"os/exec"
1616
"os/user"
1717
"path/filepath"
18+
"runtime"
1819
"sort"
1920
"strconv"
2021
"strings"
@@ -34,6 +35,7 @@ import (
3435
"tailscale.com/types/netlogtype"
3536

3637
"cdr.dev/slog"
38+
"github.com/coder/coder/v2/agent/agentproc"
3739
"github.com/coder/coder/v2/agent/agentssh"
3840
"github.com/coder/coder/v2/agent/reconnectingpty"
3941
"github.com/coder/coder/v2/buildinfo"
@@ -51,6 +53,8 @@ const (
5153
ProtocolDial = "dial"
5254
)
5355

56+
const EnvProcMemNice = "CODER_PROC_MEMNICE_ENABLE"
57+
5458
type Options struct {
5559
Filesystem afero.Fs
5660
LogDir string
@@ -68,6 +72,7 @@ type Options struct {
6872
PrometheusRegistry *prometheus.Registry
6973
ReportMetadataInterval time.Duration
7074
ServiceBannerRefreshInterval time.Duration
75+
Syscaller agentproc.Syscaller
7176
}
7277

7378
type Client interface {
@@ -197,6 +202,7 @@ type agent struct {
197202

198203
prometheusRegistry *prometheus.Registry
199204
metrics *agentMetrics
205+
syscaller agentproc.Syscaller
200206
}
201207

202208
func (a *agent) TailnetConn() *tailnet.Conn {
@@ -225,6 +231,7 @@ func (a *agent) runLoop(ctx context.Context) {
225231
go a.reportLifecycleLoop(ctx)
226232
go a.reportMetadataLoop(ctx)
227233
go a.fetchServiceBannerLoop(ctx)
234+
go a.manageProcessPriorityLoop(ctx)
228235

229236
for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
230237
a.logger.Info(ctx, "connecting to coderd")
@@ -1253,6 +1260,84 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
12531260
}
12541261
}
12551262

1263+
var exemptProcesses = []string{"coder"}
1264+
1265+
func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
1266+
ticker := time.NewTicker(time.Minute)
1267+
defer ticker.Stop()
1268+
1269+
const (
1270+
procDir = agentproc.DefaultProcDir
1271+
niceness = 10
1272+
oomScoreAdj = -1000
1273+
)
1274+
1275+
if val := a.envVars[EnvProcMemNice]; val == "" || runtime.GOOS != "linux" {
1276+
a.logger.Info(ctx, "process priority not enabled, agent will not manage process niceness/oom_score_adj ",
1277+
slog.F("env_var", EnvProcMemNice),
1278+
slog.F("value", val),
1279+
slog.F("goos", runtime.GOOS),
1280+
)
1281+
return
1282+
}
1283+
1284+
for {
1285+
select {
1286+
case <-ticker.C:
1287+
procs, err := agentproc.List(a.filesystem, agentproc.DefaultProcDir)
1288+
if err != nil {
1289+
a.logger.Error(ctx, "failed to list procs",
1290+
slog.F("dir", agentproc.DefaultProcDir),
1291+
slog.Error(err),
1292+
)
1293+
continue
1294+
}
1295+
for _, proc := range procs {
1296+
// Trim off the path e.g. "./coder" -> "coder"
1297+
name := filepath.Base(proc.Name())
1298+
if slices.Contains(exemptProcesses, name) {
1299+
a.logger.Debug(ctx, "skipping exempt process",
1300+
slog.F("name", proc.Name()),
1301+
slog.F("pid", proc.PID),
1302+
)
1303+
continue
1304+
}
1305+
1306+
err := proc.SetNiceness(a.syscaller, niceness)
1307+
if err != nil {
1308+
a.logger.Error(ctx, "unable to set proc niceness",
1309+
slog.F("name", proc.Name()),
1310+
slog.F("pid", proc.PID),
1311+
slog.F("niceness", niceness),
1312+
slog.Error(err),
1313+
)
1314+
continue
1315+
}
1316+
1317+
err = proc.SetOOMAdj(oomScoreAdj)
1318+
if err != nil {
1319+
a.logger.Error(ctx, "unable to set proc oom_score_adj",
1320+
slog.F("name", proc.Name()),
1321+
slog.F("pid", proc.PID),
1322+
slog.F("oom_score_adj", oomScoreAdj),
1323+
slog.Error(err),
1324+
)
1325+
continue
1326+
}
1327+
1328+
a.logger.Debug(ctx, "deprioritized process",
1329+
slog.F("name", proc.Name()),
1330+
slog.F("pid", proc.PID),
1331+
slog.F("niceness", niceness),
1332+
slog.F("oom_score_adj", oomScoreAdj),
1333+
)
1334+
}
1335+
case <-ctx.Done():
1336+
return
1337+
}
1338+
}
1339+
}
1340+
12561341
// isClosed returns whether the API is closed or not.
12571342
func (a *agent) isClosed() bool {
12581343
select {

agent/agentproc/doc.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Package agentproc contains logic for interfacing with local
2+
// processes running in the same context as the agent.
3+
package agentproc

agent/agentproc/proc.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package agentproc
2+
3+
import (
4+
"path/filepath"
5+
"strconv"
6+
"strings"
7+
"syscall"
8+
9+
"github.com/spf13/afero"
10+
"golang.org/x/sys/unix"
11+
"golang.org/x/xerrors"
12+
)
13+
14+
const DefaultProcDir = "/proc"
15+
16+
type Syscaller interface {
17+
SetPriority(pid int32, priority int) error
18+
}
19+
20+
type UnixSyscaller struct{}
21+
22+
func (UnixSyscaller) SetPriority(pid int32, nice int) error {
23+
err := unix.Setpriority(unix.PRIO_PROCESS, int(pid), nice)
24+
if err != nil {
25+
return xerrors.Errorf("set priority: %w", err)
26+
}
27+
return nil
28+
}
29+
30+
type Process struct {
31+
Dir string
32+
CmdLine string
33+
PID int32
34+
fs afero.Fs
35+
}
36+
37+
func (p *Process) SetOOMAdj(score int) error {
38+
path := filepath.Join(p.Dir, "oom_score_adj")
39+
err := afero.WriteFile(p.fs,
40+
path,
41+
[]byte(strconv.Itoa(score)),
42+
0644,
43+
)
44+
if err != nil {
45+
return xerrors.Errorf("write %q: %w", path, err)
46+
}
47+
48+
return nil
49+
}
50+
51+
func (p *Process) SetNiceness(sc Syscaller, score int) error {
52+
err := sc.SetPriority(p.PID, score)
53+
if err != nil {
54+
return xerrors.Errorf("set priority for %q: %w", p.CmdLine, err)
55+
}
56+
return nil
57+
}
58+
59+
func (p *Process) Name() string {
60+
args := strings.Split(p.CmdLine, "\x00")
61+
// Split will always return at least one element.
62+
return args[0]
63+
}
64+
65+
func List(fs afero.Fs, dir string) ([]*Process, error) {
66+
d, err := fs.Open(dir)
67+
if err != nil {
68+
return nil, xerrors.Errorf("open dir %q: %w", dir, err)
69+
}
70+
71+
entries, err := d.Readdirnames(0)
72+
if err != nil {
73+
return nil, xerrors.Errorf("readdirnames: %w", err)
74+
}
75+
76+
processes := make([]*Process, 0, len(entries))
77+
for _, entry := range entries {
78+
pid, err := strconv.ParseInt(entry, 10, 32)
79+
if err != nil {
80+
continue
81+
}
82+
cmdline, err := afero.ReadFile(fs, filepath.Join(dir, entry, "cmdline"))
83+
if err != nil {
84+
var errNo syscall.Errno
85+
if xerrors.As(err, &errNo) && errNo == syscall.EPERM {
86+
continue
87+
}
88+
return nil, xerrors.Errorf("read cmdline: %w", err)
89+
}
90+
processes = append(processes, &Process{
91+
PID: int32(pid),
92+
CmdLine: string(cmdline),
93+
Dir: filepath.Join(dir, entry),
94+
fs: fs,
95+
})
96+
}
97+
98+
return processes, nil
99+
}

agent/agentproc/proc_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package agentproc_test
2+
3+
type mockSyscaller struct {
4+
SetPriorityFn func(int32, int) error
5+
}
6+
7+
func (f mockSyscaller) SetPriority(pid int32, nice int) error {
8+
if f.SetPriorityFn == nil {
9+
return nil
10+
}
11+
return f.SetPriorityFn(pid, nice)
12+
}

0 commit comments

Comments
 (0)