diff --git a/agent/agentscripts/agentscripts.go b/agent/agentscripts/agentscripts.go index 8b3aaf9a22c3d..198bb79360118 100644 --- a/agent/agentscripts/agentscripts.go +++ b/agent/agentscripts/agentscripts.go @@ -129,7 +129,18 @@ func (r *Runner) StartCron() { // has exited by the time the `cron.Stop()` context returns, so we need to // track it manually. err := r.trackCommandGoroutine(func() { - r.cron.Run() + // Since this is run async, in quick unit tests, it is possible the + // Close() function gets called before we even start the cron. + // In these cases, the Run() will never end. + // So if we are closed, we just return, and skip the Run() entirely. + select { + case <-r.cronCtx.Done(): + // The cronCtx is cancelled before cron.Close() happens. So if the ctx is + // cancelled, then Close() will be called, or it is about to be called. + // So do nothing! + default: + r.cron.Run() + } }) if err != nil { r.Logger.Warn(context.Background(), "start cron failed", slog.Error(err)) @@ -315,6 +326,7 @@ func (r *Runner) Close() error { return nil } close(r.closed) + // Must cancel the cron ctx BEFORE stopping the cron. r.cronCtxCancel() <-r.cron.Stop().Done() r.cmdCloseWait.Wait() diff --git a/agent/agentscripts/agentscripts_test.go b/agent/agentscripts/agentscripts_test.go index 1570e35d59b31..9957b8833b608 100644 --- a/agent/agentscripts/agentscripts_test.go +++ b/agent/agentscripts/agentscripts_test.go @@ -53,6 +53,15 @@ func TestTimeout(t *testing.T) { require.ErrorIs(t, runner.Execute(context.Background(), nil), agentscripts.ErrTimeout) } +// TestCronClose exists because cron.Run() can happen after cron.Close(). +// If this happens, there used to be a deadlock. +func TestCronClose(t *testing.T) { + t.Parallel() + runner := agentscripts.New(agentscripts.Options{}) + runner.StartCron() + require.NoError(t, runner.Close(), "close runner") +} + func setup(t *testing.T, patchLogs func(ctx context.Context, req agentsdk.PatchLogs) error) *agentscripts.Runner { t.Helper() if patchLogs == nil {