Skip to content

fix: fix oom_score adjustments failing if caps set #15758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 51 additions & 12 deletions agent/agentexec/cli_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"golang.org/x/sys/unix"
"golang.org/x/xerrors"
"kernel.org/pub/linux/libs/security/libcap/cap"
)

// CLI runs the agent-exec command. It should only be called by the cli package.
Expand Down Expand Up @@ -48,6 +49,14 @@ func CLI() error {
return xerrors.Errorf("no exec command provided %+v", os.Args)
}

if *oom == unset {
// If an explicit oom score isn't set, we use the default.
*oom, err = defaultOOMScore()
if err != nil {
return xerrors.Errorf("get default oom score: %w", err)
}
}

if *nice == unset {
// If an explicit nice score isn't set, we use the default.
*nice, err = defaultNiceScore()
Expand All @@ -56,28 +65,45 @@ func CLI() error {
}
}

if *oom == unset {
// If an explicit oom score isn't set, we use the default.
*oom, err = defaultOOMScore()
if err != nil {
return xerrors.Errorf("get default oom score: %w", err)
}
// We drop effective caps prior to setting dumpable so that we limit the
// impact of someone attempting to hijack the process (i.e. with a debugger)
// to take advantage of the capabilities of the agent process. We encourage
// users to set cap_net_admin on the agent binary for improved networking
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we mention this in coder.com/docs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

honestly not sure

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If not, we probably should (wherever we're documenting the nice/oom adjustments).

Copy link
Collaborator Author

@sreya sreya Dec 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah once this is merged and there are no bugs, I'm going to write up some documentation for resource management in Coder

// performance and doing so results in the process having its SET_DUMPABLE
// attribute disabled (meaning we cannot adjust the oom score).
err = dropEffectiveCaps()
if err != nil {
printfStdErr("failed to drop effective caps: %v", err)
}

err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
// Set dumpable to 1 so that we can adjust the oom score. If the process
// doesn't have capabilities or has an suid/sgid bit set, this is already
// set.
err = unix.Prctl(unix.PR_SET_DUMPABLE, 1, 0, 0, 0)
if err != nil {
// We alert the user instead of failing the command since it can be difficult to debug
// for a template admin otherwise. It's quite possible (and easy) to set an
// inappriopriate value for niceness.
printfStdErr("failed to adjust niceness to %d for cmd %+v: %v", *nice, args, err)
printfStdErr("failed to set dumpable: %v", err)
}

err = writeOOMScoreAdj(*oom)
if err != nil {
// We alert the user instead of failing the command since it can be difficult to debug
// for a template admin otherwise. It's quite possible (and easy) to set an
// inappriopriate value for oom_score_adj.
printfStdErr("failed to adjust oom score to %d for cmd %+v: %v", *oom, args, err)
printfStdErr("failed to adjust oom score to %d for cmd %+v: %v", *oom, execArgs(os.Args), err)
}

// Set dumpable back to 0 just to be safe. It's not inherited for execve anyways.
err = unix.Prctl(unix.PR_SET_DUMPABLE, 0, 0, 0, 0)
if err != nil {
printfStdErr("failed to unset dumpable: %v", err)
}

err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
if err != nil {
// We alert the user instead of failing the command since it can be difficult to debug
// for a template admin otherwise. It's quite possible (and easy) to set an
// inappriopriate value for niceness.
printfStdErr("failed to adjust niceness to %d for cmd %+v: %v", *nice, args, err)
}

path, err := exec.LookPath(args[0])
Expand Down Expand Up @@ -151,3 +177,16 @@ func execArgs(args []string) []string {
func printfStdErr(format string, a ...any) {
_, _ = fmt.Fprintf(os.Stderr, "coder-agent: %s\n", fmt.Sprintf(format, a...))
}

func dropEffectiveCaps() error {
proc := cap.GetProc()
err := proc.ClearFlag(cap.Effective)
if err != nil {
return xerrors.Errorf("clear effective caps: %w", err)
}
err = proc.SetProc()
if err != nil {
return xerrors.Errorf("set proc: %w", err)
}
return nil
}
46 changes: 44 additions & 2 deletions agent/agentexec/cli_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/testutil"
)
Expand Down Expand Up @@ -50,6 +51,32 @@ func TestCLI(t *testing.T) {
requireOOMScore(t, cmd.Process.Pid, expectedOOM)
requireNiceScore(t, cmd.Process.Pid, expectedNice)
})

t.Run("Capabilities", func(t *testing.T) {
testdir := filepath.Dir(TestBin)
capDir := filepath.Join(testdir, "caps")
err := os.Mkdir(capDir, 0o755)
require.NoError(t, err)
bin := buildBinary(capDir)
// Try to set capabilities on the binary. This should work fine in CI but
// it's possible some developers may be working in an environment where they don't have the necessary permissions.
err = setCaps(t, bin, "cap_net_admin")
if os.Getenv("CI") != "" {
require.NoError(t, err)
} else if err != nil {
t.Skipf("unable to set capabilities for test: %v", err)
}
ctx := testutil.Context(t, testutil.WaitMedium)
cmd, path := binCmd(ctx, t, bin, 123, 12)
err = cmd.Start()
require.NoError(t, err)
go cmd.Wait()

waitForSentinel(ctx, t, cmd, path)
// This is what we're really testing, a binary with added capabilities requires setting dumpable.
requireOOMScore(t, cmd.Process.Pid, 123)
requireNiceScore(t, cmd.Process.Pid, 12)
})
}

func requireNiceScore(t *testing.T, pid int, score int) {
Expand Down Expand Up @@ -94,7 +121,7 @@ func waitForSentinel(ctx context.Context, t *testing.T, cmd *exec.Cmd, path stri
}
}

func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
func binCmd(ctx context.Context, t *testing.T, bin string, oom, nice int) (*exec.Cmd, string) {
var (
args = execArgs(oom, nice)
dir = t.TempDir()
Expand All @@ -103,7 +130,7 @@ func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {

args = append(args, "sh", "-c", fmt.Sprintf("touch %s && sleep 10m", file))
//nolint:gosec
cmd := exec.CommandContext(ctx, TestBin, args...)
cmd := exec.CommandContext(ctx, bin, args...)

// We set this so we can also easily kill the sleep process the shell spawns.
cmd.SysProcAttr = &syscall.SysProcAttr{
Expand All @@ -127,6 +154,10 @@ func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
return cmd, file
}

func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
return binCmd(ctx, t, TestBin, oom, nice)
}

func expectedOOMScore(t *testing.T) int {
t.Helper()

Expand Down Expand Up @@ -171,3 +202,14 @@ func execArgs(oom int, nice int) []string {
execArgs = append(execArgs, "--")
return execArgs
}

func setCaps(t *testing.T, bin string, caps ...string) error {
t.Helper()

setcap := fmt.Sprintf("sudo -n setcap %s=ep %s", strings.Join(caps, ", "), bin)
out, err := exec.CommandContext(context.Background(), "sh", "-c", setcap).CombinedOutput()
if err != nil {
return xerrors.Errorf("setcap %q (%s): %w", setcap, out, err)
}
return nil
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ require (
github.com/google/go-github/v61 v61.0.0
github.com/mocktools/go-smtp-mock/v2 v2.4.0
github.com/natefinch/atomic v1.0.1
kernel.org/pub/linux/libs/security/libcap/cap v1.2.73
)

require (
Expand Down Expand Up @@ -248,6 +249,7 @@ require (
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
kernel.org/pub/linux/libs/security/libcap/psx v1.2.73 // indirect
)

require (
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,10 @@ howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
inet.af/peercred v0.0.0-20210906144145-0893ea02156a h1:qdkS8Q5/i10xU2ArJMKYhVa1DORzBfYS/qA2UK2jheg=
inet.af/peercred v0.0.0-20210906144145-0893ea02156a/go.mod h1:FjawnflS/udxX+SvpsMgZfdqx2aykOlkISeAsADi5IU=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.73 h1:Th2b8jljYqkyZKS3aD3N9VpYsQpHuXLgea+SZUIfODA=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.73/go.mod h1:hbeKwKcboEsxARYmcy/AdPVN11wmT/Wnpgv4k4ftyqY=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.73 h1:SEAEUiPVylTD4vqqi+vtGkSnXeP2FcRO3FoZB1MklMw=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.73/go.mod h1:+l6Ee2F59XiJ2I6WR5ObpC1utCQJZ/VLsEbQCD8RG24=
lukechampine.com/uint128 v1.3.0 h1:cDdUVfRwDUDovz610ABgFD17nXD4/uDgVHl2sC3+sbo=
lukechampine.com/uint128 v1.3.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk=
modernc.org/cc/v3 v3.41.0 h1:QoR1Sn3YWlmA1T4vLaKZfawdVtSiGx8H+cEojbC7v1Q=
Expand Down
Loading