Skip to content

fix(agent/agentscripts): display informative error for ErrWaitDelay #10407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ func (a *agent) run(ctx context.Context) error {
return script.RunOnStart
})
if err != nil {
a.logger.Warn(ctx, "startup script failed", slog.Error(err))
a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err))
if errors.Is(err, agentscripts.ErrTimeout) {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleStartTimeout)
} else {
Expand Down Expand Up @@ -1465,6 +1465,7 @@ func (a *agent) Close() error {
return script.RunOnStop
})
if err != nil {
a.logger.Warn(ctx, "shutdown script(s) failed", slog.Error(err))
if errors.Is(err, agentscripts.ErrTimeout) {
lifecycleState = codersdk.WorkspaceAgentLifecycleShutdownTimeout
} else {
Expand Down
25 changes: 24 additions & 1 deletion agent/agentscripts/agentscripts.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ import (
var (
// ErrTimeout is returned when a script times out.
ErrTimeout = xerrors.New("script timed out")
// ErrOutputPipesOpen is returned when a script exits leaving the output
// pipe(s) (stdout, stderr) open. This happens because we set WaitDelay on
// the command, which gives us two things:
//
// 1. The ability to ensure that a script exits (this is important for e.g.
// blocking login, and avoiding doing so indefinitely)
// 2. Improved command cancellation on timeout
ErrOutputPipesOpen = xerrors.New("script exited without closing output pipes")

parser = cron.NewParser(cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.DowOptional)
)
Expand Down Expand Up @@ -248,7 +256,22 @@ func (r *Runner) run(ctx context.Context, script codersdk.WorkspaceAgentScript)
err = cmdCtx.Err()
case err = <-cmdDone:
}
if errors.Is(err, context.DeadlineExceeded) {
switch {
case errors.Is(err, exec.ErrWaitDelay):
err = ErrOutputPipesOpen
message := fmt.Sprintf("script exited successfully, but output pipes were not closed after %s", cmd.WaitDelay)
details := fmt.Sprint(
"This usually means a child process was started with references to stdout or stderr. As a result, this " +
"process may now have been terminated. Consider redirecting the output or using a separate " +
"\"coder_script\" for the process, see " +
"https://coder.com/docs/v2/latest/templates/troubleshooting#startup-script-issues for more information.",
)
// Inform the user by propagating the message via log writers.
_, _ = fmt.Fprintf(cmd.Stderr, "WARNING: %s. %s\n", message, details)
// Also log to agent logs for ease of debugging.
r.Logger.Warn(ctx, message, slog.F("details", details), slog.Error(err))

case errors.Is(err, context.DeadlineExceeded):
err = ErrTimeout
}
return err
Expand Down