Skip to content

fix: add agent exec abstraction #15717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"tailscale.com/util/clientmetric"

"cdr.dev/slog"
"github.com/coder/coder/v2/agent/agentexec"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/proto"
Expand Down Expand Up @@ -80,6 +81,7 @@ type Options struct {
ReportMetadataInterval time.Duration
ServiceBannerRefreshInterval time.Duration
BlockFileTransfer bool
Execer agentexec.Execer
}

type Client interface {
Expand Down Expand Up @@ -139,6 +141,10 @@ func New(options Options) Agent {
prometheusRegistry = prometheus.NewRegistry()
}

if options.Execer == nil {
options.Execer = agentexec.DefaultExecer
}

hardCtx, hardCancel := context.WithCancel(context.Background())
gracefulCtx, gracefulCancel := context.WithCancel(hardCtx)
a := &agent{
Expand Down Expand Up @@ -171,6 +177,7 @@ func New(options Options) Agent {

prometheusRegistry: prometheusRegistry,
metrics: newAgentMetrics(prometheusRegistry),
execer: options.Execer,
}
// Initially, we have a closed channel, reflecting the fact that we are not initially connected.
// Each time we connect we replace the channel (while holding the closeMutex) with a new one
Expand Down Expand Up @@ -239,6 +246,7 @@ type agent struct {
// metrics are prometheus registered metrics that will be collected and
// labeled in Coder with the agent + workspace.
metrics *agentMetrics
execer agentexec.Execer
}

func (a *agent) TailnetConn() *tailnet.Conn {
Expand All @@ -247,7 +255,7 @@ func (a *agent) TailnetConn() *tailnet.Conn {

func (a *agent) init() {
// pass the "hard" context because we explicitly close the SSH server as part of graceful shutdown.
sshSrv, err := agentssh.NewServer(a.hardCtx, a.logger.Named("ssh-server"), a.prometheusRegistry, a.filesystem, &agentssh.Config{
sshSrv, err := agentssh.NewServer(a.hardCtx, a.logger.Named("ssh-server"), a.prometheusRegistry, a.filesystem, a.execer, &agentssh.Config{
MaxTimeout: a.sshMaxTimeout,
MOTDFile: func() string { return a.manifest.Load().MOTDFile },
AnnouncementBanners: func() *[]codersdk.BannerConfig { return a.announcementBanners.Load() },
Expand Down
3 changes: 0 additions & 3 deletions agent/agentexec/cli_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ import (
"golang.org/x/xerrors"
)

// unset is set to an invalid value for nice and oom scores.
const unset = -2000

// CLI runs the agent-exec command. It should only be called by the cli package.
func CLI() error {
// We lock the OS thread here to avoid a race condition where the nice priority
Expand Down
103 changes: 72 additions & 31 deletions agent/agentexec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,60 +20,101 @@ const (
EnvProcPrioMgmt = "CODER_PROC_PRIO_MGMT"
EnvProcOOMScore = "CODER_PROC_OOM_SCORE"
EnvProcNiceScore = "CODER_PROC_NICE_SCORE"
)

// CommandContext returns an exec.Cmd that calls "coder agent-exec" prior to exec'ing
// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal exec.Cmd
// is returned. All instances of exec.Cmd should flow through this function to ensure
// proper resource constraints are applied to the child process.
func CommandContext(ctx context.Context, cmd string, args ...string) (*exec.Cmd, error) {
cmd, args, err := agentExecCmd(cmd, args...)
if err != nil {
return nil, xerrors.Errorf("agent exec cmd: %w", err)
}
return exec.CommandContext(ctx, cmd, args...), nil
}
// unset is set to an invalid value for nice and oom scores.
unset = -2000
)

// PTYCommandContext returns an pty.Cmd that calls "coder agent-exec" prior to exec'ing
// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal pty.Cmd
// is returned. All instances of pty.Cmd should flow through this function to ensure
// proper resource constraints are applied to the child process.
func PTYCommandContext(ctx context.Context, cmd string, args ...string) (*pty.Cmd, error) {
cmd, args, err := agentExecCmd(cmd, args...)
if err != nil {
return nil, xerrors.Errorf("agent exec cmd: %w", err)
}
return pty.CommandContext(ctx, cmd, args...), nil
var DefaultExecer Execer = execer{}

// Execer defines an abstraction for creating exec.Cmd variants. It's unfortunately
// necessary because we need to be able to wrap child processes with "coder agent-exec"
// for templates that expect the agent to manage process priority.
type Execer interface {
// CommandContext returns an exec.Cmd that calls "coder agent-exec" prior to exec'ing
// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal exec.Cmd
// is returned. All instances of exec.Cmd should flow through this function to ensure
// proper resource constraints are applied to the child process.
CommandContext(ctx context.Context, cmd string, args ...string) *exec.Cmd
// PTYCommandContext returns an pty.Cmd that calls "coder agent-exec" prior to exec'ing
// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal pty.Cmd
// is returned. All instances of pty.Cmd should flow through this function to ensure
// proper resource constraints are applied to the child process.
PTYCommandContext(ctx context.Context, cmd string, args ...string) *pty.Cmd
}

func agentExecCmd(cmd string, args ...string) (string, []string, error) {
func NewExecer() (Execer, error) {
_, enabled := os.LookupEnv(EnvProcPrioMgmt)
if runtime.GOOS != "linux" || !enabled {
return cmd, args, nil
return DefaultExecer, nil
}

executable, err := os.Executable()
if err != nil {
return "", nil, xerrors.Errorf("get executable: %w", err)
return nil, xerrors.Errorf("get executable: %w", err)
}

bin, err := filepath.EvalSymlinks(executable)
if err != nil {
return "", nil, xerrors.Errorf("eval symlinks: %w", err)
return nil, xerrors.Errorf("eval symlinks: %w", err)
}

oomScore, ok := envValInt(EnvProcOOMScore)
if !ok {
oomScore = unset
}

niceScore, ok := envValInt(EnvProcNiceScore)
if !ok {
niceScore = unset
}

return priorityExecer{
binPath: bin,
oomScore: oomScore,
niceScore: niceScore,
}, nil
}

type execer struct{}

func (execer) CommandContext(ctx context.Context, cmd string, args ...string) *exec.Cmd {
return exec.CommandContext(ctx, cmd, args...)
}

func (execer) PTYCommandContext(ctx context.Context, cmd string, args ...string) *pty.Cmd {
return pty.CommandContext(ctx, cmd, args...)
}

type priorityExecer struct {
binPath string
oomScore int
niceScore int
}

func (e priorityExecer) CommandContext(ctx context.Context, cmd string, args ...string) *exec.Cmd {
cmd, args = e.agentExecCmd(cmd, args...)
return exec.CommandContext(ctx, cmd, args...)
}

func (e priorityExecer) PTYCommandContext(ctx context.Context, cmd string, args ...string) *pty.Cmd {
cmd, args = e.agentExecCmd(cmd, args...)
return pty.CommandContext(ctx, cmd, args...)
}

func (e priorityExecer) agentExecCmd(cmd string, args ...string) (string, []string) {
execArgs := []string{"agent-exec"}
if score, ok := envValInt(EnvProcOOMScore); ok {
execArgs = append(execArgs, oomScoreArg(score))
if e.oomScore != unset {
execArgs = append(execArgs, oomScoreArg(e.oomScore))
}

if score, ok := envValInt(EnvProcNiceScore); ok {
execArgs = append(execArgs, niceScoreArg(score))
if e.niceScore != unset {
execArgs = append(execArgs, niceScoreArg(e.niceScore))
}
execArgs = append(execArgs, "--", cmd)
execArgs = append(execArgs, args...)

return bin, execArgs, nil
return e.binPath, execArgs
}

// envValInt searches for a key in a list of environment variables and parses it to an int.
Expand Down
84 changes: 84 additions & 0 deletions agent/agentexec/exec_internal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package agentexec

import (
"context"
"os/exec"
"testing"

"github.com/stretchr/testify/require"
)

func TestExecer(t *testing.T) {
t.Parallel()

t.Run("Default", func(t *testing.T) {
t.Parallel()

cmd := DefaultExecer.CommandContext(context.Background(), "sh", "-c", "sleep")

path, err := exec.LookPath("sh")
require.NoError(t, err)
require.Equal(t, path, cmd.Path)
require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args)
})

t.Run("Priority", func(t *testing.T) {
t.Parallel()

t.Run("OK", func(t *testing.T) {
t.Parallel()

e := priorityExecer{
binPath: "/foo/bar/baz",
oomScore: unset,
niceScore: unset,
}

cmd := e.CommandContext(context.Background(), "sh", "-c", "sleep")
require.Equal(t, e.binPath, cmd.Path)
require.Equal(t, []string{e.binPath, "agent-exec", "--", "sh", "-c", "sleep"}, cmd.Args)
})

t.Run("Nice", func(t *testing.T) {
t.Parallel()

e := priorityExecer{
binPath: "/foo/bar/baz",
oomScore: unset,
niceScore: 10,
}

cmd := e.CommandContext(context.Background(), "sh", "-c", "sleep")
require.Equal(t, e.binPath, cmd.Path)
require.Equal(t, []string{e.binPath, "agent-exec", "--coder-nice=10", "--", "sh", "-c", "sleep"}, cmd.Args)
})

t.Run("OOM", func(t *testing.T) {
t.Parallel()

e := priorityExecer{
binPath: "/foo/bar/baz",
oomScore: 123,
niceScore: unset,
}

cmd := e.CommandContext(context.Background(), "sh", "-c", "sleep")
require.Equal(t, e.binPath, cmd.Path)
require.Equal(t, []string{e.binPath, "agent-exec", "--coder-oom=123", "--", "sh", "-c", "sleep"}, cmd.Args)
})

t.Run("Both", func(t *testing.T) {
t.Parallel()

e := priorityExecer{
binPath: "/foo/bar/baz",
oomScore: 432,
niceScore: 14,
}

cmd := e.CommandContext(context.Background(), "sh", "-c", "sleep")
require.Equal(t, e.binPath, cmd.Path)
require.Equal(t, []string{e.binPath, "agent-exec", "--coder-oom=432", "--coder-nice=14", "--", "sh", "-c", "sleep"}, cmd.Args)
})
})
}
119 changes: 0 additions & 119 deletions agent/agentexec/exec_test.go

This file was deleted.

Loading
Loading