Skip to content

feat: add support for coder_script #9584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 68 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
7199651
Add basic migrations
kylecarbs Sep 3, 2023
51b0079
Merge branch 'main' into execscripts
kylecarbs Sep 5, 2023
c18a401
Improve schema
kylecarbs Sep 5, 2023
9ae6e62
Merge branch 'main' into execscripts
kylecarbs Sep 5, 2023
70ebaf3
Refactor agent scripts into it's own package
kylecarbs Sep 7, 2023
89c7af1
Support legacy start and stop script format
kylecarbs Sep 7, 2023
d5df133
Pipe the scripts!
kylecarbs Sep 7, 2023
58964c9
Finish the piping
kylecarbs Sep 7, 2023
00a4e73
Fix context usage
kylecarbs Sep 7, 2023
942fde6
It works!
kylecarbs Sep 7, 2023
92dedad
Fix sql query
kylecarbs Sep 7, 2023
7cf6f0c
Fix SQL query
kylecarbs Sep 7, 2023
5b6f264
Rename `LogSourceID` -> `SourceID`
kylecarbs Sep 7, 2023
e2c9f91
Fix the FE
kylecarbs Sep 7, 2023
6fab755
Merge branch 'main' into execscripts
kylecarbs Sep 7, 2023
51e08f4
fmt
kylecarbs Sep 7, 2023
9a38131
Rename migrations
kylecarbs Sep 7, 2023
c0fac6b
Fix log tests
kylecarbs Sep 8, 2023
f7f1c7a
Fix lint err
kylecarbs Sep 8, 2023
f0a8f53
Fix gen
kylecarbs Sep 8, 2023
66f9185
Fix story type
kylecarbs Sep 8, 2023
78f01d1
Rename source to script
kylecarbs Sep 13, 2023
75388f7
Fix schema jank
kylecarbs Sep 13, 2023
8810326
Uncomment test
kylecarbs Sep 13, 2023
45b395e
Rename proto to TimeoutSeconds
kylecarbs Sep 13, 2023
40bcd9d
Fix comments
kylecarbs Sep 17, 2023
b744c9f
Fix comments
kylecarbs Sep 17, 2023
c844462
Fix legacy endpoint without specified log_source
kylecarbs Sep 18, 2023
139bbf9
Merge branch 'main' into execscripts
kylecarbs Sep 18, 2023
a21f085
Fix non-blocking by default in agent
kylecarbs Sep 18, 2023
49808b1
Fix resources tests
kylecarbs Sep 18, 2023
a47fe28
Fix dbfake
kylecarbs Sep 18, 2023
da40c79
Fix resources
kylecarbs Sep 18, 2023
3631cfa
Fix linting I think
kylecarbs Sep 18, 2023
77bc6e1
Add fixtures
kylecarbs Sep 18, 2023
5b2bd86
fmt
kylecarbs Sep 18, 2023
9d1a4fe
Fix startup script behavior
kylecarbs Sep 18, 2023
ad0d678
Fix comments
kylecarbs Sep 19, 2023
39fb9d3
Fix context
kylecarbs Sep 21, 2023
444decb
Merge branch 'main' into execscripts
kylecarbs Sep 21, 2023
c4d3cb8
Fix cancel
kylecarbs Sep 21, 2023
4c17a8b
Fix SQL tests
kylecarbs Sep 21, 2023
9591e34
Fix e2e tests
kylecarbs Sep 21, 2023
a4a0270
Interrupt on Windows
kylecarbs Sep 21, 2023
35c1c11
Merge branch 'main' into execscripts
matifali Sep 22, 2023
dd5abdf
Fix agent leaking script process
kylecarbs Sep 24, 2023
9e85d7b
Fix migrations
kylecarbs Sep 24, 2023
c26a01b
Merge branch 'main' into execscripts
kylecarbs Sep 24, 2023
9513acf
Fix stories
kylecarbs Sep 24, 2023
2e3611b
Merge branch 'main' into execscripts
matifali Sep 25, 2023
e8b1e43
Fix duplicate logs appearing
kylecarbs Sep 25, 2023
ee1fe11
Merge branch 'execscripts' of https://github.com/coder/coder into exe…
kylecarbs Sep 25, 2023
b837aac
Gen
kylecarbs Sep 25, 2023
f1ff5cc
Fix log location
kylecarbs Sep 25, 2023
4ec3a87
Fix tests
kylecarbs Sep 25, 2023
d36ab53
Fix tests
kylecarbs Sep 25, 2023
eeddb52
Fix log output
kylecarbs Sep 25, 2023
aa5540b
Show display name in output
kylecarbs Sep 25, 2023
f866a92
Fix print
kylecarbs Sep 25, 2023
a99b6dd
Return timeout on start context
kylecarbs Sep 25, 2023
1865590
Gen
kylecarbs Sep 25, 2023
3b26aa0
Fix fixture
kylecarbs Sep 25, 2023
f2f69bb
Fix the agent status
kylecarbs Sep 25, 2023
aa68796
Fix startup timeout msg
kylecarbs Sep 25, 2023
73a7a78
Fix command using shared context
kylecarbs Sep 25, 2023
d1f4963
Fix timeout draining
kylecarbs Sep 25, 2023
784e616
Change signal type
kylecarbs Sep 25, 2023
7ac782b
Add deterministic colors to startup script logs
kylecarbs Sep 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 39 additions & 147 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"net/http"
"net/netip"
"os"
"os/exec"
"os/user"
"path/filepath"
"runtime"
Expand All @@ -37,6 +36,7 @@ import (

"cdr.dev/slog"
"github.com/coder/coder/v2/agent/agentproc"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/reconnectingpty"
"github.com/coder/coder/v2/buildinfo"
Expand Down Expand Up @@ -196,6 +196,7 @@ type agent struct {

manifest atomic.Pointer[agentsdk.Manifest] // manifest is atomic because values can change after reconnection.
reportMetadataInterval time.Duration
scriptRunner *agentscripts.Runner
serviceBanner atomic.Pointer[codersdk.ServiceBannerConfig] // serviceBanner is atomic because it is periodically updated.
serviceBannerRefreshInterval time.Duration
sessionToken atomic.Pointer[string]
Expand Down Expand Up @@ -238,7 +239,13 @@ func (a *agent) init(ctx context.Context) {
sshSrv.Manifest = &a.manifest
sshSrv.ServiceBanner = &a.serviceBanner
a.sshServer = sshSrv

a.scriptRunner = agentscripts.New(agentscripts.Options{
LogDir: a.logDir,
Logger: a.logger,
SSHServer: sshSrv,
Filesystem: a.filesystem,
PatchLogs: a.client.PatchLogs,
})
go a.runLoop(ctx)
}

Expand Down Expand Up @@ -657,41 +664,29 @@ func (a *agent) run(ctx context.Context) error {
}
}

lifecycleState := codersdk.WorkspaceAgentLifecycleReady
scriptDone := make(chan error, 1)
err = a.trackConnGoroutine(func() {
defer close(scriptDone)
scriptDone <- a.runStartupScript(ctx, manifest.StartupScript)
})
err = a.scriptRunner.Init(manifest.Scripts)
if err != nil {
return xerrors.Errorf("track startup script: %w", err)
return xerrors.Errorf("init script runner: %w", err)
}
go func() {
var timeout <-chan time.Time
// If timeout is zero, an older version of the coder
// provider was used. Otherwise a timeout is always > 0.
if manifest.StartupScriptTimeout > 0 {
t := time.NewTimer(manifest.StartupScriptTimeout)
defer t.Stop()
timeout = t.C
}

var err error
select {
case err = <-scriptDone:
case <-timeout:
a.logger.Warn(ctx, "script timed out", slog.F("lifecycle", "startup"), slog.F("timeout", manifest.StartupScriptTimeout))
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleStartTimeout)
err = <-scriptDone // The script can still complete after a timeout.
}
err = a.trackConnGoroutine(func() {
err := a.scriptRunner.Execute(ctx, func(script codersdk.WorkspaceAgentScript) bool {
return script.RunOnStart
})
if err != nil {
if errors.Is(err, context.Canceled) {
return
a.logger.Warn(ctx, "startup script failed", slog.Error(err))
if errors.Is(err, agentscripts.ErrTimeout) {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleStartTimeout)
} else {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleStartError)
}
lifecycleState = codersdk.WorkspaceAgentLifecycleStartError
} else {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleReady)
}
a.setLifecycle(ctx, lifecycleState)
}()
a.scriptRunner.StartCron()
})
if err != nil {
return xerrors.Errorf("track conn goroutine: %w", err)
}
}

// This automatically closes when the context ends!
Expand Down Expand Up @@ -1006,93 +1001,6 @@ func (a *agent) runDERPMapSubscriber(ctx context.Context, network *tailnet.Conn)
}
}

func (a *agent) runStartupScript(ctx context.Context, script string) error {
return a.runScript(ctx, "startup", script)
}

func (a *agent) runShutdownScript(ctx context.Context, script string) error {
return a.runScript(ctx, "shutdown", script)
}

func (a *agent) runScript(ctx context.Context, lifecycle, script string) (err error) {
if script == "" {
return nil
}

logger := a.logger.With(slog.F("lifecycle", lifecycle))

logger.Info(ctx, fmt.Sprintf("running %s script", lifecycle), slog.F("script", script))
fileWriter, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return xerrors.Errorf("open %s script log file: %w", lifecycle, err)
}
defer func() {
err := fileWriter.Close()
if err != nil {
logger.Warn(ctx, fmt.Sprintf("close %s script log file", lifecycle), slog.Error(err))
}
}()

cmdPty, err := a.sshServer.CreateCommand(ctx, script, nil)
if err != nil {
return xerrors.Errorf("%s script: create command: %w", lifecycle, err)
}
cmd := cmdPty.AsExec()

var stdout, stderr io.Writer = fileWriter, fileWriter
if lifecycle == "startup" {
send, flushAndClose := agentsdk.LogsSender(a.client.PatchLogs, logger)
// If ctx is canceled here (or in a writer below), we may be
// discarding logs, but that's okay because we're shutting down
// anyway. We could consider creating a new context here if we
// want better control over flush during shutdown.
defer func() {
if err := flushAndClose(ctx); err != nil {
logger.Warn(ctx, "flush startup logs failed", slog.Error(err))
}
}()

infoW := agentsdk.StartupLogsWriter(ctx, send, codersdk.WorkspaceAgentLogSourceStartupScript, codersdk.LogLevelInfo)
defer infoW.Close()
errW := agentsdk.StartupLogsWriter(ctx, send, codersdk.WorkspaceAgentLogSourceStartupScript, codersdk.LogLevelError)
defer errW.Close()

stdout = io.MultiWriter(fileWriter, infoW)
stderr = io.MultiWriter(fileWriter, errW)
}

cmd.Stdout = stdout
cmd.Stderr = stderr

start := time.Now()
defer func() {
end := time.Now()
execTime := end.Sub(start)
exitCode := 0
if err != nil {
exitCode = 255 // Unknown status.
var exitError *exec.ExitError
if xerrors.As(err, &exitError) {
exitCode = exitError.ExitCode()
}
logger.Warn(ctx, fmt.Sprintf("%s script failed", lifecycle), slog.F("execution_time", execTime), slog.F("exit_code", exitCode), slog.Error(err))
} else {
logger.Info(ctx, fmt.Sprintf("%s script completed", lifecycle), slog.F("execution_time", execTime), slog.F("exit_code", exitCode))
}
}()

err = cmd.Run()
if err != nil {
// cmd.Run does not return a context canceled error, it returns "signal: killed".
if ctx.Err() != nil {
return ctx.Err()
}

return xerrors.Errorf("%s script: run: %w", lifecycle, err)
}
return nil
}

func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, msg codersdk.WorkspaceAgentReconnectingPTYInit, conn net.Conn) (retErr error) {
defer conn.Close()
a.metrics.connectionsTotal.Add(1)
Expand Down Expand Up @@ -1475,39 +1383,23 @@ func (a *agent) Close() error {
}

lifecycleState := codersdk.WorkspaceAgentLifecycleOff
if manifest := a.manifest.Load(); manifest != nil && manifest.ShutdownScript != "" {
scriptDone := make(chan error, 1)
go func() {
defer close(scriptDone)
scriptDone <- a.runShutdownScript(ctx, manifest.ShutdownScript)
}()

var timeout <-chan time.Time
// If timeout is zero, an older version of the coder
// provider was used. Otherwise a timeout is always > 0.
if manifest.ShutdownScriptTimeout > 0 {
t := time.NewTimer(manifest.ShutdownScriptTimeout)
defer t.Stop()
timeout = t.C
}

var err error
select {
case err = <-scriptDone:
case <-timeout:
a.logger.Warn(ctx, "script timed out", slog.F("lifecycle", "shutdown"), slog.F("timeout", manifest.ShutdownScriptTimeout))
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleShutdownTimeout)
err = <-scriptDone // The script can still complete after a timeout.
}
if err != nil {
err = a.scriptRunner.Execute(ctx, func(script codersdk.WorkspaceAgentScript) bool {
return script.RunOnStop
})
if err != nil {
if errors.Is(err, agentscripts.ErrTimeout) {
lifecycleState = codersdk.WorkspaceAgentLifecycleShutdownTimeout
} else {
lifecycleState = codersdk.WorkspaceAgentLifecycleShutdownError
}
}

// Set final state and wait for it to be reported because context
// cancellation will stop the report loop.
a.setLifecycle(ctx, lifecycleState)

err = a.scriptRunner.Close()
if err != nil {
a.logger.Error(ctx, "script runner close", slog.Error(err))
}

// Wait for the lifecycle to be reported, but don't wait forever so
// that we don't break user expectations.
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
Expand Down
Loading