diff --git a/agent/agent.go b/agent/agent.go index 7f7e8cd64dff8..0d2326d2ab9d3 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -743,7 +743,7 @@ func (a *agent) run(ctx context.Context) error { return script.RunOnStart }) if err != nil { - a.logger.Warn(ctx, "startup script failed", slog.Error(err)) + a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err)) if errors.Is(err, agentscripts.ErrTimeout) { a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleStartTimeout) } else { @@ -1465,6 +1465,7 @@ func (a *agent) Close() error { return script.RunOnStop }) if err != nil { + a.logger.Warn(ctx, "shutdown script(s) failed", slog.Error(err)) if errors.Is(err, agentscripts.ErrTimeout) { lifecycleState = codersdk.WorkspaceAgentLifecycleShutdownTimeout } else { diff --git a/agent/agentscripts/agentscripts.go b/agent/agentscripts/agentscripts.go index c3bd525109ced..3acc48b0a140c 100644 --- a/agent/agentscripts/agentscripts.go +++ b/agent/agentscripts/agentscripts.go @@ -27,6 +27,14 @@ import ( var ( // ErrTimeout is returned when a script times out. ErrTimeout = xerrors.New("script timed out") + // ErrOutputPipesOpen is returned when a script exits leaving the output + // pipe(s) (stdout, stderr) open. This happens because we set WaitDelay on + // the command, which gives us two things: + // + // 1. The ability to ensure that a script exits (this is important for e.g. + // blocking login, and avoiding doing so indefinitely) + // 2. Improved command cancellation on timeout + ErrOutputPipesOpen = xerrors.New("script exited without closing output pipes") parser = cron.NewParser(cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.DowOptional) ) @@ -248,7 +256,22 @@ func (r *Runner) run(ctx context.Context, script codersdk.WorkspaceAgentScript) err = cmdCtx.Err() case err = <-cmdDone: } - if errors.Is(err, context.DeadlineExceeded) { + switch { + case errors.Is(err, exec.ErrWaitDelay): + err = ErrOutputPipesOpen + message := fmt.Sprintf("script exited successfully, but output pipes were not closed after %s", cmd.WaitDelay) + details := fmt.Sprint( + "This usually means a child process was started with references to stdout or stderr. As a result, this " + + "process may now have been terminated. Consider redirecting the output or using a separate " + + "\"coder_script\" for the process, see " + + "https://coder.com/docs/v2/latest/templates/troubleshooting#startup-script-issues for more information.", + ) + // Inform the user by propagating the message via log writers. + _, _ = fmt.Fprintf(cmd.Stderr, "WARNING: %s. %s\n", message, details) + // Also log to agent logs for ease of debugging. + r.Logger.Warn(ctx, message, slog.F("details", details), slog.Error(err)) + + case errors.Is(err, context.DeadlineExceeded): err = ErrTimeout } return err