Skip to content

Commit f37242b

Browse files
committed
oom score
1 parent b3a787a commit f37242b

File tree

5 files changed

+192
-55
lines changed

5 files changed

+192
-55
lines changed

agent/agent.go

Lines changed: 86 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ const (
6363
// EnvProcPrioMgmt determines whether we attempt to manage
6464
// process CPU and OOM Killer priority.
6565
const EnvProcPrioMgmt = "CODER_PROC_PRIO_MGMT"
66+
const EnvProcOOMScore = "CODER_PROC_OOM_SCORE"
6667

6768
type Options struct {
6869
Filesystem afero.Fs
@@ -1569,28 +1570,22 @@ func (a *agent) manageProcessPriorityUntilGracefulShutdown() {
15691570
return
15701571
}
15711572

1572-
const agentOOMScore = "-1000"
1573-
1574-
err := afero.WriteFile(a.filesystem, "/proc/self/oom_score_adj", []byte(agentOOMScore), 0o600)
1575-
if err != nil {
1576-
a.logger.Error(ctx, "error adjusting agent oom_score_adj",
1577-
slog.F("score", agentOOMScore),
1578-
slog.Error(err),
1579-
)
1580-
} else {
1581-
a.logger.Debug(ctx, "adjusted agent oom_score_adj to avoid OOM Killer",
1582-
slog.F("score", agentOOMScore),
1583-
)
1584-
}
1585-
15861573
if a.processManagementTick == nil {
15871574
ticker := time.NewTicker(time.Second)
15881575
defer ticker.Stop()
15891576
a.processManagementTick = ticker.C
15901577
}
15911578

1579+
oomScore := unsetOOMScore
1580+
if scoreStr, ok := a.environmentVariables[EnvProcOOMScore]; ok {
1581+
score, err := strconv.Atoi(strings.TrimSpace(scoreStr))
1582+
if err == nil {
1583+
oomScore = score
1584+
}
1585+
}
1586+
15921587
for {
1593-
procs, err := a.manageProcessPriority(ctx)
1588+
procs, err := a.manageProcessPriority(ctx, oomScore)
15941589
if err != nil {
15951590
a.logger.Error(ctx, "manage process priority",
15961591
slog.Error(err),
@@ -1608,11 +1603,23 @@ func (a *agent) manageProcessPriorityUntilGracefulShutdown() {
16081603
}
16091604
}
16101605

1611-
func (a *agent) manageProcessPriority(ctx context.Context) ([]*agentproc.Process, error) {
1606+
const unsetOOMScore = 1001
1607+
1608+
func (a *agent) manageProcessPriority(ctx context.Context, oomScore int) ([]*agentproc.Process, error) {
16121609
const (
16131610
niceness = 10
16141611
)
16151612

1613+
agentScore, err := a.getAgentOOMScore()
1614+
if err != nil {
1615+
agentScore = unsetOOMScore
1616+
}
1617+
if oomScore == unsetOOMScore && agentScore != unsetOOMScore {
1618+
// If the child score has not been explicitly specified we should
1619+
// set it to a score relative to the agent score.
1620+
oomScore = childOOMScore(agentScore)
1621+
}
1622+
16161623
procs, err := agentproc.List(a.filesystem, a.syscaller)
16171624
if err != nil {
16181625
return nil, xerrors.Errorf("list: %w", err)
@@ -1640,10 +1647,10 @@ func (a *agent) manageProcessPriority(ctx context.Context) ([]*agentproc.Process
16401647
continue
16411648
}
16421649

1643-
score, err := proc.Niceness(a.syscaller)
1644-
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1650+
score, niceErr := proc.Niceness(a.syscaller)
1651+
if niceErr != nil && !xerrors.Is(niceErr, os.ErrPermission) {
16451652
logger.Warn(ctx, "unable to get proc niceness",
1646-
slog.Error(err),
1653+
slog.Error(niceErr),
16471654
)
16481655
continue
16491656
}
@@ -1653,28 +1660,31 @@ func (a *agent) manageProcessPriority(ctx context.Context) ([]*agentproc.Process
16531660
// Getpriority actually returns priority for the nice value
16541661
// which is niceness + 20, so here 20 = a niceness of 0 (aka unset).
16551662
if score != 20 {
1656-
// We don't log here since it can get spammy
16571663
continue
16581664
}
16591665

1660-
err = proc.SetNiceness(a.syscaller, niceness)
1661-
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1662-
logger.Warn(ctx, "unable to set proc niceness",
1663-
slog.F("niceness", niceness),
1664-
slog.Error(err),
1665-
)
1666-
continue
1666+
if niceErr == nil {
1667+
err := proc.SetNiceness(a.syscaller, niceness)
1668+
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1669+
logger.Warn(ctx, "unable to set proc niceness",
1670+
slog.F("niceness", niceness),
1671+
slog.Error(err),
1672+
)
1673+
}
16671674
}
16681675

1669-
err = afero.WriteFile(a.filesystem, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), []byte("0"), 0o600)
1670-
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1671-
logger.Warn(ctx, "unable to set oom_score_adj",
1672-
slog.F("score", "0"),
1673-
slog.Error(err),
1674-
)
1675-
continue
1676+
// If the oom score is valid and it's not already set and isn't a custom value set by another process
1677+
// then it's ok to update it.
1678+
if oomScore != unsetOOMScore && oomScore != proc.OOMScoreAdj && !isCustomOOMScore(agentScore, proc) {
1679+
oomScoreStr := strconv.Itoa(oomScore)
1680+
err := afero.WriteFile(a.filesystem, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), []byte(oomScoreStr), 0o644)
1681+
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1682+
logger.Warn(ctx, "unable to set oom_score_adj",
1683+
slog.F("score", "0"),
1684+
slog.Error(err),
1685+
)
1686+
}
16761687
}
1677-
16781688
modProcs = append(modProcs, proc)
16791689
}
16801690
return modProcs, nil
@@ -2028,3 +2038,44 @@ func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger sl
20282038
}
20292039
})
20302040
}
2041+
2042+
// childOOMScore returns the oom_score_adj for a child process. It is based
2043+
// on the oom_score_adj of the agent process.
2044+
func childOOMScore(agentScore int) int {
2045+
// If the agent has a negative oom_score_adj, we set the child to 0
2046+
// so it's treated like every other process.
2047+
if agentScore < 0 {
2048+
return 0
2049+
}
2050+
2051+
// If the agent is already almost at the maximum then set it to the max.
2052+
if agentScore >= 998 {
2053+
return 1000
2054+
}
2055+
2056+
// If the agent oom_score_adj is >=0, we set the child to slightly
2057+
// less than the maximum. If users want a different score they set it
2058+
// directly.
2059+
return 998
2060+
}
2061+
2062+
func (a *agent) getAgentOOMScore() (int, error) {
2063+
scoreStr, err := afero.ReadFile(a.filesystem, "/proc/self/oom_score_adj")
2064+
if err != nil {
2065+
return 0, xerrors.Errorf("read file: %w", err)
2066+
}
2067+
2068+
score, err := strconv.Atoi(strings.TrimSpace(string(scoreStr)))
2069+
if err != nil {
2070+
return 0, xerrors.Errorf("parse int: %w", err)
2071+
}
2072+
2073+
return score, nil
2074+
}
2075+
2076+
// isCustomOOMScore checks to see if the oom_score_adj is not a value that would
2077+
// originate from an agent-spawned process.
2078+
func isCustomOOMScore(agentScore int, process *agentproc.Process) bool {
2079+
score := process.OOMScoreAdj
2080+
return agentScore != score && score != 1000 && score != 0 && score != 998
2081+
}

agent/agent_test.go

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,11 +2529,11 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25292529
logger = slog.Make(sloghuman.Sink(io.Discard))
25302530
)
25312531

2532+
requireFileWrite(t, fs, "/proc/self/oom_score_adj", "-500")
2533+
25322534
// Create some processes.
25332535
for i := 0; i < 4; i++ {
2534-
// Create a prioritized process. This process should
2535-
// have it's oom_score_adj set to -500 and its nice
2536-
// score should be untouched.
2536+
// Create a prioritized process.
25372537
var proc agentproc.Process
25382538
if i == 0 {
25392539
proc = agentproctest.GenerateProcess(t, fs,
@@ -2551,8 +2551,8 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25512551
},
25522552
)
25532553

2554-
syscaller.EXPECT().SetPriority(proc.PID, 10).Return(nil)
25552554
syscaller.EXPECT().GetPriority(proc.PID).Return(20, nil)
2555+
syscaller.EXPECT().SetPriority(proc.PID, 10).Return(nil)
25562556
}
25572557
syscaller.EXPECT().
25582558
Kill(proc.PID, syscall.Signal(0)).
@@ -2571,6 +2571,9 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25712571
})
25722572
actualProcs := <-modProcs
25732573
require.Len(t, actualProcs, len(expectedProcs)-1)
2574+
for _, proc := range actualProcs {
2575+
requireFileEquals(t, fs, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), "0")
2576+
}
25742577
})
25752578

25762579
t.Run("IgnoreCustomNice", func(t *testing.T) {
@@ -2589,13 +2592,8 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25892592
logger = slog.Make(sloghuman.Sink(io.Discard))
25902593
)
25912594

2592-
requireScore := func(t *testing.T, p *agentproc.Process, score string) {
2593-
t.Helper()
2594-
2595-
actual, err := afero.ReadFile(fs, fmt.Sprintf("/proc/%d/oom_score_adj", p.PID))
2596-
require.NoError(t, err)
2597-
require.Equal(t, score, string(actual))
2598-
}
2595+
err := afero.WriteFile(fs, "/proc/self/oom_score_adj", []byte("0"), 0o644)
2596+
require.NoError(t, err)
25992597

26002598
// Create some processes.
26012599
for i := 0; i < 3; i++ {
@@ -2628,8 +2626,58 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
26282626
// We should ignore the process with a custom nice score.
26292627
require.Len(t, actualProcs, 2)
26302628
for _, proc := range actualProcs {
2631-
requireScore(t, proc, "0")
2629+
_, ok := expectedProcs[proc.PID]
2630+
require.True(t, ok)
2631+
requireFileEquals(t, fs, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), "998")
2632+
}
2633+
})
2634+
2635+
t.Run("CustomOOMScore", func(t *testing.T) {
2636+
t.Parallel()
2637+
2638+
if runtime.GOOS != "linux" {
2639+
t.Skip("Skipping non-linux environment")
2640+
}
2641+
2642+
var (
2643+
fs = afero.NewMemMapFs()
2644+
ticker = make(chan time.Time)
2645+
syscaller = agentproctest.NewMockSyscaller(gomock.NewController(t))
2646+
modProcs = make(chan []*agentproc.Process)
2647+
logger = slog.Make(sloghuman.Sink(io.Discard))
2648+
)
2649+
2650+
err := afero.WriteFile(fs, "/proc/self/oom_score_adj", []byte("0"), 0o644)
2651+
require.NoError(t, err)
2652+
2653+
// Create some processes.
2654+
for i := 0; i < 3; i++ {
2655+
proc := agentproctest.GenerateProcess(t, fs)
2656+
syscaller.EXPECT().
2657+
Kill(proc.PID, syscall.Signal(0)).
2658+
Return(nil)
2659+
syscaller.EXPECT().GetPriority(proc.PID).Return(20, nil)
2660+
syscaller.EXPECT().SetPriority(proc.PID, 10).Return(nil)
2661+
}
2662+
2663+
_, _, _, _, _ = setupAgent(t, agentsdk.Manifest{}, 0, func(c *agenttest.Client, o *agent.Options) {
2664+
o.Syscaller = syscaller
2665+
o.ModifiedProcesses = modProcs
2666+
o.EnvironmentVariables = map[string]string{
2667+
agent.EnvProcPrioMgmt: "1",
2668+
agent.EnvProcOOMScore: "-567",
2669+
}
2670+
o.Filesystem = fs
2671+
o.Logger = logger
2672+
o.ProcessManagementTick = ticker
2673+
})
2674+
actualProcs := <-modProcs
2675+
// We should ignore the process with a custom nice score.
2676+
require.Len(t, actualProcs, 3)
2677+
for _, proc := range actualProcs {
2678+
requireFileEquals(t, fs, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), "-567")
26322679
}
2680+
26332681
})
26342682

26352683
t.Run("DisabledByDefault", func(t *testing.T) {
@@ -2750,3 +2798,17 @@ func requireEcho(t *testing.T, conn net.Conn) {
27502798
require.NoError(t, err)
27512799
require.Equal(t, "test", string(b))
27522800
}
2801+
2802+
func requireFileWrite(t testing.TB, fs afero.Fs, path, data string) {
2803+
t.Helper()
2804+
err := afero.WriteFile(fs, path, []byte(data), 0o600)
2805+
require.NoError(t, err)
2806+
}
2807+
2808+
func requireFileEquals(t testing.TB, fs afero.Fs, path, expect string) {
2809+
t.Helper()
2810+
actual, err := afero.ReadFile(fs, path)
2811+
require.NoError(t, err)
2812+
2813+
require.Equal(t, expect, string(actual))
2814+
}

agent/agentproc/agentproctest/proc.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package agentproctest
22

33
import (
44
"fmt"
5+
"strconv"
56
"testing"
67

78
"github.com/spf13/afero"
@@ -29,8 +30,9 @@ func GenerateProcess(t *testing.T, fs afero.Fs, muts ...func(*agentproc.Process)
2930
cmdline := fmt.Sprintf("%s\x00%s\x00%s", arg1, arg2, arg3)
3031

3132
process := agentproc.Process{
32-
CmdLine: cmdline,
33-
PID: int32(pid),
33+
CmdLine: cmdline,
34+
PID: int32(pid),
35+
OOMScoreAdj: 0,
3436
}
3537

3638
for _, mut := range muts {
@@ -45,5 +47,9 @@ func GenerateProcess(t *testing.T, fs afero.Fs, muts ...func(*agentproc.Process)
4547
err = afero.WriteFile(fs, fmt.Sprintf("%s/cmdline", process.Dir), []byte(process.CmdLine), 0o444)
4648
require.NoError(t, err)
4749

50+
score := strconv.Itoa(process.OOMScoreAdj)
51+
err = afero.WriteFile(fs, fmt.Sprintf("%s/oom_score_adj", process.Dir), []byte(score), 0o444)
52+
require.NoError(t, err)
53+
4854
return process
4955
}

agent/agentproc/proc_unix.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package agentproc
55

66
import (
77
"errors"
8+
"os"
89
"path/filepath"
910
"strconv"
1011
"strings"
@@ -50,10 +51,26 @@ func List(fs afero.Fs, syscaller Syscaller) ([]*Process, error) {
5051
}
5152
return nil, xerrors.Errorf("read cmdline: %w", err)
5253
}
54+
55+
oomScore, err := afero.ReadFile(fs, filepath.Join(defaultProcDir, entry, "oom_score_adj"))
56+
if err != nil {
57+
if xerrors.Is(err, os.ErrPermission) {
58+
continue
59+
}
60+
61+
return nil, xerrors.Errorf("read oom_score_adj: %w", err)
62+
}
63+
64+
oom, err := strconv.Atoi(strings.TrimSpace(string(oomScore)))
65+
if err != nil {
66+
return nil, xerrors.Errorf("convert oom score: %w", err)
67+
}
68+
5369
processes = append(processes, &Process{
54-
PID: int32(pid),
55-
CmdLine: string(cmdline),
56-
Dir: filepath.Join(defaultProcDir, entry),
70+
PID: int32(pid),
71+
CmdLine: string(cmdline),
72+
Dir: filepath.Join(defaultProcDir, entry),
73+
OOMScoreAdj: oom,
5774
})
5875
}
5976

agent/agentproc/syscaller.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ type Syscaller interface {
1414
const defaultProcDir = "/proc"
1515

1616
type Process struct {
17-
Dir string
18-
CmdLine string
19-
PID int32
17+
Dir string
18+
CmdLine string
19+
PID int32
20+
OOMScoreAdj int
2021
}

0 commit comments

Comments
 (0)