Skip to content

Commit f8d938d

Browse files
authored
fix: fix oom_score adjustments failing if caps set (#15758)
- Fixes an issue where oom scores would fail to be adjusted in cases where the `coder` binary has capabilities set on it. This is because `PR_SET_DUMPABLE` is set to `0` when a process is executed with elevated capabilities. The fix is to flip `PR_SET_DUMPABLE` to `1` prior to writing to `oom_score_adj`.
1 parent 0109c9f commit f8d938d

File tree

4 files changed

+101
-14
lines changed

4 files changed

+101
-14
lines changed

agent/agentexec/cli_linux.go

+51-12
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515

1616
"golang.org/x/sys/unix"
1717
"golang.org/x/xerrors"
18+
"kernel.org/pub/linux/libs/security/libcap/cap"
1819
)
1920

2021
// CLI runs the agent-exec command. It should only be called by the cli package.
@@ -48,6 +49,14 @@ func CLI() error {
4849
return xerrors.Errorf("no exec command provided %+v", os.Args)
4950
}
5051

52+
if *oom == unset {
53+
// If an explicit oom score isn't set, we use the default.
54+
*oom, err = defaultOOMScore()
55+
if err != nil {
56+
return xerrors.Errorf("get default oom score: %w", err)
57+
}
58+
}
59+
5160
if *nice == unset {
5261
// If an explicit nice score isn't set, we use the default.
5362
*nice, err = defaultNiceScore()
@@ -56,28 +65,45 @@ func CLI() error {
5665
}
5766
}
5867

59-
if *oom == unset {
60-
// If an explicit oom score isn't set, we use the default.
61-
*oom, err = defaultOOMScore()
62-
if err != nil {
63-
return xerrors.Errorf("get default oom score: %w", err)
64-
}
68+
// We drop effective caps prior to setting dumpable so that we limit the
69+
// impact of someone attempting to hijack the process (i.e. with a debugger)
70+
// to take advantage of the capabilities of the agent process. We encourage
71+
// users to set cap_net_admin on the agent binary for improved networking
72+
// performance and doing so results in the process having its SET_DUMPABLE
73+
// attribute disabled (meaning we cannot adjust the oom score).
74+
err = dropEffectiveCaps()
75+
if err != nil {
76+
printfStdErr("failed to drop effective caps: %v", err)
6577
}
6678

67-
err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
79+
// Set dumpable to 1 so that we can adjust the oom score. If the process
80+
// doesn't have capabilities or has an suid/sgid bit set, this is already
81+
// set.
82+
err = unix.Prctl(unix.PR_SET_DUMPABLE, 1, 0, 0, 0)
6883
if err != nil {
69-
// We alert the user instead of failing the command since it can be difficult to debug
70-
// for a template admin otherwise. It's quite possible (and easy) to set an
71-
// inappriopriate value for niceness.
72-
printfStdErr("failed to adjust niceness to %d for cmd %+v: %v", *nice, args, err)
84+
printfStdErr("failed to set dumpable: %v", err)
7385
}
7486

7587
err = writeOOMScoreAdj(*oom)
7688
if err != nil {
7789
// We alert the user instead of failing the command since it can be difficult to debug
7890
// for a template admin otherwise. It's quite possible (and easy) to set an
7991
// inappriopriate value for oom_score_adj.
80-
printfStdErr("failed to adjust oom score to %d for cmd %+v: %v", *oom, args, err)
92+
printfStdErr("failed to adjust oom score to %d for cmd %+v: %v", *oom, execArgs(os.Args), err)
93+
}
94+
95+
// Set dumpable back to 0 just to be safe. It's not inherited for execve anyways.
96+
err = unix.Prctl(unix.PR_SET_DUMPABLE, 0, 0, 0, 0)
97+
if err != nil {
98+
printfStdErr("failed to unset dumpable: %v", err)
99+
}
100+
101+
err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
102+
if err != nil {
103+
// We alert the user instead of failing the command since it can be difficult to debug
104+
// for a template admin otherwise. It's quite possible (and easy) to set an
105+
// inappriopriate value for niceness.
106+
printfStdErr("failed to adjust niceness to %d for cmd %+v: %v", *nice, args, err)
81107
}
82108

83109
path, err := exec.LookPath(args[0])
@@ -151,3 +177,16 @@ func execArgs(args []string) []string {
151177
func printfStdErr(format string, a ...any) {
152178
_, _ = fmt.Fprintf(os.Stderr, "coder-agent: %s\n", fmt.Sprintf(format, a...))
153179
}
180+
181+
func dropEffectiveCaps() error {
182+
proc := cap.GetProc()
183+
err := proc.ClearFlag(cap.Effective)
184+
if err != nil {
185+
return xerrors.Errorf("clear effective caps: %w", err)
186+
}
187+
err = proc.SetProc()
188+
if err != nil {
189+
return xerrors.Errorf("set proc: %w", err)
190+
}
191+
return nil
192+
}

agent/agentexec/cli_linux_test.go

+44-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818

1919
"github.com/stretchr/testify/require"
2020
"golang.org/x/sys/unix"
21+
"golang.org/x/xerrors"
2122

2223
"github.com/coder/coder/v2/testutil"
2324
)
@@ -50,6 +51,32 @@ func TestCLI(t *testing.T) {
5051
requireOOMScore(t, cmd.Process.Pid, expectedOOM)
5152
requireNiceScore(t, cmd.Process.Pid, expectedNice)
5253
})
54+
55+
t.Run("Capabilities", func(t *testing.T) {
56+
testdir := filepath.Dir(TestBin)
57+
capDir := filepath.Join(testdir, "caps")
58+
err := os.Mkdir(capDir, 0o755)
59+
require.NoError(t, err)
60+
bin := buildBinary(capDir)
61+
// Try to set capabilities on the binary. This should work fine in CI but
62+
// it's possible some developers may be working in an environment where they don't have the necessary permissions.
63+
err = setCaps(t, bin, "cap_net_admin")
64+
if os.Getenv("CI") != "" {
65+
require.NoError(t, err)
66+
} else if err != nil {
67+
t.Skipf("unable to set capabilities for test: %v", err)
68+
}
69+
ctx := testutil.Context(t, testutil.WaitMedium)
70+
cmd, path := binCmd(ctx, t, bin, 123, 12)
71+
err = cmd.Start()
72+
require.NoError(t, err)
73+
go cmd.Wait()
74+
75+
waitForSentinel(ctx, t, cmd, path)
76+
// This is what we're really testing, a binary with added capabilities requires setting dumpable.
77+
requireOOMScore(t, cmd.Process.Pid, 123)
78+
requireNiceScore(t, cmd.Process.Pid, 12)
79+
})
5380
}
5481

5582
func requireNiceScore(t *testing.T, pid int, score int) {
@@ -94,7 +121,7 @@ func waitForSentinel(ctx context.Context, t *testing.T, cmd *exec.Cmd, path stri
94121
}
95122
}
96123

97-
func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
124+
func binCmd(ctx context.Context, t *testing.T, bin string, oom, nice int) (*exec.Cmd, string) {
98125
var (
99126
args = execArgs(oom, nice)
100127
dir = t.TempDir()
@@ -103,7 +130,7 @@ func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
103130

104131
args = append(args, "sh", "-c", fmt.Sprintf("touch %s && sleep 10m", file))
105132
//nolint:gosec
106-
cmd := exec.CommandContext(ctx, TestBin, args...)
133+
cmd := exec.CommandContext(ctx, bin, args...)
107134

108135
// We set this so we can also easily kill the sleep process the shell spawns.
109136
cmd.SysProcAttr = &syscall.SysProcAttr{
@@ -127,6 +154,10 @@ func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
127154
return cmd, file
128155
}
129156

157+
func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
158+
return binCmd(ctx, t, TestBin, oom, nice)
159+
}
160+
130161
func expectedOOMScore(t *testing.T) int {
131162
t.Helper()
132163

@@ -171,3 +202,14 @@ func execArgs(oom int, nice int) []string {
171202
execArgs = append(execArgs, "--")
172203
return execArgs
173204
}
205+
206+
func setCaps(t *testing.T, bin string, caps ...string) error {
207+
t.Helper()
208+
209+
setcap := fmt.Sprintf("sudo -n setcap %s=ep %s", strings.Join(caps, ", "), bin)
210+
out, err := exec.CommandContext(context.Background(), "sh", "-c", setcap).CombinedOutput()
211+
if err != nil {
212+
return xerrors.Errorf("setcap %q (%s): %w", setcap, out, err)
213+
}
214+
return nil
215+
}

go.mod

+2
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ require (
212212
github.com/google/go-github/v61 v61.0.0
213213
github.com/mocktools/go-smtp-mock/v2 v2.4.0
214214
github.com/natefinch/atomic v1.0.1
215+
kernel.org/pub/linux/libs/security/libcap/cap v1.2.73
215216
)
216217

217218
require (
@@ -248,6 +249,7 @@ require (
248249
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
249250
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
250251
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
252+
kernel.org/pub/linux/libs/security/libcap/psx v1.2.73 // indirect
251253
)
252254

253255
require (

go.sum

+4
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,10 @@ howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
12661266
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
12671267
inet.af/peercred v0.0.0-20210906144145-0893ea02156a h1:qdkS8Q5/i10xU2ArJMKYhVa1DORzBfYS/qA2UK2jheg=
12681268
inet.af/peercred v0.0.0-20210906144145-0893ea02156a/go.mod h1:FjawnflS/udxX+SvpsMgZfdqx2aykOlkISeAsADi5IU=
1269+
kernel.org/pub/linux/libs/security/libcap/cap v1.2.73 h1:Th2b8jljYqkyZKS3aD3N9VpYsQpHuXLgea+SZUIfODA=
1270+
kernel.org/pub/linux/libs/security/libcap/cap v1.2.73/go.mod h1:hbeKwKcboEsxARYmcy/AdPVN11wmT/Wnpgv4k4ftyqY=
1271+
kernel.org/pub/linux/libs/security/libcap/psx v1.2.73 h1:SEAEUiPVylTD4vqqi+vtGkSnXeP2FcRO3FoZB1MklMw=
1272+
kernel.org/pub/linux/libs/security/libcap/psx v1.2.73/go.mod h1:+l6Ee2F59XiJ2I6WR5ObpC1utCQJZ/VLsEbQCD8RG24=
12691273
lukechampine.com/uint128 v1.3.0 h1:cDdUVfRwDUDovz610ABgFD17nXD4/uDgVHl2sC3+sbo=
12701274
lukechampine.com/uint128 v1.3.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk=
12711275
modernc.org/cc/v3 v3.41.0 h1:QoR1Sn3YWlmA1T4vLaKZfawdVtSiGx8H+cEojbC7v1Q=

0 commit comments

Comments
 (0)