Skip to content

feat: implement agent process management #9461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
improve process detection
  • Loading branch information
sreya committed Sep 8, 2023
commit 7e59db6ed7af22500be7dcc9a4f243cbabe43858
44 changes: 32 additions & 12 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1260,7 +1260,7 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
}
}

var exemptProcesses = []string{"coder"}
var prioritizedProcs = []string{"coder"}

func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
ticker := time.NewTicker(time.Minute)
Expand All @@ -1269,7 +1269,7 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
const (
procDir = agentproc.DefaultProcDir
niceness = 10
oomScoreAdj = -1000
oomScoreAdj = 100
)

if val := a.envVars[EnvProcMemNice]; val == "" || runtime.GOOS != "linux" {
Expand All @@ -1284,7 +1284,7 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
for {
select {
case <-ticker.C:
procs, err := agentproc.List(a.filesystem, agentproc.DefaultProcDir)
procs, err := agentproc.List(a.filesystem, a.syscaller, agentproc.DefaultProcDir)
if err != nil {
a.logger.Error(ctx, "failed to list procs",
slog.F("dir", agentproc.DefaultProcDir),
Expand All @@ -1295,31 +1295,52 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
for _, proc := range procs {
// Trim off the path e.g. "./coder" -> "coder"
name := filepath.Base(proc.Name())
if slices.Contains(exemptProcesses, name) {
a.logger.Debug(ctx, "skipping exempt process",
// If the process is prioritized we should adjust
// it's oom_score_adj and avoid lowering its niceness.
if slices.Contains(prioritizedProcs, name) {
err = proc.SetOOMAdj(oomScoreAdj)
if err != nil {
a.logger.Error(ctx, "unable to set proc oom_score_adj",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("oom_score_adj", oomScoreAdj),
slog.Error(err),
)
continue
}

a.logger.Debug(ctx, "decreased process oom_score",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("oom_score_adj", oomScoreAdj),
)
continue
}

err := proc.SetNiceness(a.syscaller, niceness)
score, err := proc.Nice(a.syscaller)
if err != nil {
a.logger.Error(ctx, "unable to set proc niceness",
a.logger.Error(ctx, "unable to get proc niceness",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("niceness", niceness),
slog.Error(err),
)
continue
}
if score != 20 {
a.logger.Error(ctx, "skipping process due to custom niceness",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("niceness", score),
)
continue
}

err = proc.SetOOMAdj(oomScoreAdj)
err = proc.SetNiceness(a.syscaller, niceness)
if err != nil {
a.logger.Error(ctx, "unable to set proc oom_score_adj",
a.logger.Error(ctx, "unable to set proc niceness",
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("oom_score_adj", oomScoreAdj),
slog.F("niceness", niceness),
slog.Error(err),
)
continue
Expand All @@ -1329,7 +1350,6 @@ func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
slog.F("name", proc.Name()),
slog.F("pid", proc.PID),
slog.F("niceness", niceness),
slog.F("oom_score_adj", oomScoreAdj),
)
}
case <-ctx.Done():
Expand Down
62 changes: 61 additions & 1 deletion agent/agentproc/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const DefaultProcDir = "/proc"

type Syscaller interface {
SetPriority(pid int32, priority int) error
GetPriority(pid int32) (int, error)
Kill(pid int32, sig syscall.Signal) error
}

type UnixSyscaller struct{}
Expand All @@ -27,6 +29,23 @@ func (UnixSyscaller) SetPriority(pid int32, nice int) error {
return nil
}

func (UnixSyscaller) GetPriority(pid int32) (int, error) {
nice, err := unix.Getpriority(0, int(pid))
if err != nil {
return 0, xerrors.Errorf("get priority: %w", err)
}
return nice, nil
}

func (UnixSyscaller) Kill(pid int, sig syscall.Signal) error {
err := syscall.Kill(pid, sig)
if err != nil {
return xerrors.Errorf("kill: %w", err)
}

return nil
}

type Process struct {
Dir string
CmdLine string
Expand Down Expand Up @@ -56,13 +75,21 @@ func (p *Process) SetNiceness(sc Syscaller, score int) error {
return nil
}

func (p *Process) Nice(sc Syscaller) (int, error) {
nice, err := sc.GetPriority(p.PID)
if err != nil {
return 0, xerrors.Errorf("get priority for %q: %w", p.CmdLine, err)
}
return nice, nil
}

func (p *Process) Name() string {
args := strings.Split(p.CmdLine, "\x00")
// Split will always return at least one element.
return args[0]
}

func List(fs afero.Fs, dir string) ([]*Process, error) {
func List(fs afero.Fs, syscaller Syscaller, dir string) ([]*Process, error) {
d, err := fs.Open(dir)
if err != nil {
return nil, xerrors.Errorf("open dir %q: %w", dir, err)
Expand All @@ -79,6 +106,16 @@ func List(fs afero.Fs, dir string) ([]*Process, error) {
if err != nil {
continue
}

// Check that the process still exists.
exists, err := isProcessExist(syscaller, int32(pid), syscall.Signal(0))
if err != nil {
return nil, xerrors.Errorf("check process exists: %w", err)
}
if !exists {
continue
}

cmdline, err := afero.ReadFile(fs, filepath.Join(dir, entry, "cmdline"))
if err != nil {
var errNo syscall.Errno
Expand All @@ -97,3 +134,26 @@ func List(fs afero.Fs, dir string) ([]*Process, error) {

return processes, nil
}

func isProcessExist(syscaller Syscaller, pid int32, sig syscall.Signal) (bool, error) {
err := syscaller.Kill(pid, sig)
if err == nil {
return true, nil
}
if err.Error() == "os: process already finished" {
return false, nil
}

errno, ok := err.(syscall.Errno)
if !ok {
return false, err
}
switch errno {
case syscall.ESRCH:
return false, nil
case syscall.EPERM:
return true, nil
}

return false, xerrors.Errorf("kill: %w", err)
}