Skip to content

Commit ae138bf

Browse files
committed
improve process detection
1 parent 1b5f0c6 commit ae138bf

File tree

2 files changed

+93
-13
lines changed

2 files changed

+93
-13
lines changed

agent/agent.go

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,7 +1260,7 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
12601260
}
12611261
}
12621262

1263-
var exemptProcesses = []string{"coder"}
1263+
var prioritizedProcs = []string{"coder"}
12641264

12651265
func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
12661266
ticker := time.NewTicker(time.Minute)
@@ -1269,7 +1269,7 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
12691269
const (
12701270
procDir = agentproc.DefaultProcDir
12711271
niceness = 10
1272-
oomScoreAdj = -1000
1272+
oomScoreAdj = 100
12731273
)
12741274

12751275
if val := a.envVars[EnvProcMemNice]; val == "" || runtime.GOOS != "linux" {
@@ -1284,7 +1284,7 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
12841284
for {
12851285
select {
12861286
case <-ticker.C:
1287-
procs, err := agentproc.List(a.filesystem, agentproc.DefaultProcDir)
1287+
procs, err := agentproc.List(a.filesystem, a.syscaller, agentproc.DefaultProcDir)
12881288
if err != nil {
12891289
a.logger.Error(ctx, "failed to list procs",
12901290
slog.F("dir", agentproc.DefaultProcDir),
@@ -1295,31 +1295,52 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
12951295
for _, proc := range procs {
12961296
// Trim off the path e.g. "./coder" -> "coder"
12971297
name := filepath.Base(proc.Name())
1298-
if slices.Contains(exemptProcesses, name) {
1299-
a.logger.Debug(ctx, "skipping exempt process",
1298+
// If the process is prioritized we should adjust
1299+
// it's oom_score_adj and avoid lowering its niceness.
1300+
if slices.Contains(prioritizedProcs, name) {
1301+
err = proc.SetOOMAdj(oomScoreAdj)
1302+
if err != nil {
1303+
a.logger.Error(ctx, "unable to set proc oom_score_adj",
1304+
slog.F("name", proc.Name()),
1305+
slog.F("pid", proc.PID),
1306+
slog.F("oom_score_adj", oomScoreAdj),
1307+
slog.Error(err),
1308+
)
1309+
continue
1310+
}
1311+
1312+
a.logger.Debug(ctx, "decreased process oom_score",
13001313
slog.F("name", proc.Name()),
13011314
slog.F("pid", proc.PID),
1315+
slog.F("oom_score_adj", oomScoreAdj),
13021316
)
13031317
continue
13041318
}
13051319

1306-
err := proc.SetNiceness(a.syscaller, niceness)
1320+
score, err := proc.Nice(a.syscaller)
13071321
if err != nil {
1308-
a.logger.Error(ctx, "unable to set proc niceness",
1322+
a.logger.Error(ctx, "unable to get proc niceness",
13091323
slog.F("name", proc.Name()),
13101324
slog.F("pid", proc.PID),
1311-
slog.F("niceness", niceness),
13121325
slog.Error(err),
13131326
)
13141327
continue
13151328
}
1329+
if score != 20 {
1330+
a.logger.Error(ctx, "skipping process due to custom niceness",
1331+
slog.F("name", proc.Name()),
1332+
slog.F("pid", proc.PID),
1333+
slog.F("niceness", score),
1334+
)
1335+
continue
1336+
}
13161337

1317-
err = proc.SetOOMAdj(oomScoreAdj)
1338+
err = proc.SetNiceness(a.syscaller, niceness)
13181339
if err != nil {
1319-
a.logger.Error(ctx, "unable to set proc oom_score_adj",
1340+
a.logger.Error(ctx, "unable to set proc niceness",
13201341
slog.F("name", proc.Name()),
13211342
slog.F("pid", proc.PID),
1322-
slog.F("oom_score_adj", oomScoreAdj),
1343+
slog.F("niceness", niceness),
13231344
slog.Error(err),
13241345
)
13251346
continue
@@ -1329,7 +1350,6 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
13291350
slog.F("name", proc.Name()),
13301351
slog.F("pid", proc.PID),
13311352
slog.F("niceness", niceness),
1332-
slog.F("oom_score_adj", oomScoreAdj),
13331353
)
13341354
}
13351355
case <-ctx.Done():

agent/agentproc/proc.go

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ const DefaultProcDir = "/proc"
1515

1616
type Syscaller interface {
1717
SetPriority(pid int32, priority int) error
18+
GetPriority(pid int32) (int, error)
19+
Kill(pid int32, sig syscall.Signal) error
1820
}
1921

2022
type UnixSyscaller struct{}
@@ -27,6 +29,23 @@ func (UnixSyscaller) SetPriority(pid int32, nice int) error {
2729
return nil
2830
}
2931

32+
func (UnixSyscaller) GetPriority(pid int32) (int, error) {
33+
nice, err := unix.Getpriority(0, int(pid))
34+
if err != nil {
35+
return 0, xerrors.Errorf("get priority: %w", err)
36+
}
37+
return nice, nil
38+
}
39+
40+
func (UnixSyscaller) Kill(pid int, sig syscall.Signal) error {
41+
err := syscall.Kill(pid, sig)
42+
if err != nil {
43+
return xerrors.Errorf("kill: %w", err)
44+
}
45+
46+
return nil
47+
}
48+
3049
type Process struct {
3150
Dir string
3251
CmdLine string
@@ -56,13 +75,21 @@ func (p *Process) SetNiceness(sc Syscaller, score int) error {
5675
return nil
5776
}
5877

78+
func (p *Process) Nice(sc Syscaller) (int, error) {
79+
nice, err := sc.GetPriority(p.PID)
80+
if err != nil {
81+
return 0, xerrors.Errorf("get priority for %q: %w", p.CmdLine, err)
82+
}
83+
return nice, nil
84+
}
85+
5986
func (p *Process) Name() string {
6087
args := strings.Split(p.CmdLine, "\x00")
6188
// Split will always return at least one element.
6289
return args[0]
6390
}
6491

65-
func List(fs afero.Fs, dir string) ([]*Process, error) {
92+
func List(fs afero.Fs, syscaller Syscaller, dir string) ([]*Process, error) {
6693
d, err := fs.Open(dir)
6794
if err != nil {
6895
return nil, xerrors.Errorf("open dir %q: %w", dir, err)
@@ -79,6 +106,16 @@ func List(fs afero.Fs, dir string) ([]*Process, error) {
79106
if err != nil {
80107
continue
81108
}
109+
110+
// Check that the process still exists.
111+
exists, err := isProcessExist(syscaller, int32(pid), syscall.Signal(0))
112+
if err != nil {
113+
return nil, xerrors.Errorf("check process exists: %w", err)
114+
}
115+
if !exists {
116+
continue
117+
}
118+
82119
cmdline, err := afero.ReadFile(fs, filepath.Join(dir, entry, "cmdline"))
83120
if err != nil {
84121
var errNo syscall.Errno
@@ -97,3 +134,26 @@ func List(fs afero.Fs, dir string) ([]*Process, error) {
97134

98135
return processes, nil
99136
}
137+
138+
func isProcessExist(syscaller Syscaller, pid int32, sig syscall.Signal) (bool, error) {
139+
err := syscaller.Kill(pid, sig)
140+
if err == nil {
141+
return true, nil
142+
}
143+
if err.Error() == "os: process already finished" {
144+
return false, nil
145+
}
146+
147+
errno, ok := err.(syscall.Errno)
148+
if !ok {
149+
return false, err
150+
}
151+
switch errno {
152+
case syscall.ESRCH:
153+
return false, nil
154+
case syscall.EPERM:
155+
return true, nil
156+
}
157+
158+
return false, xerrors.Errorf("kill: %w", err)
159+
}

0 commit comments

Comments
 (0)