From 278252100a0379b3d9f9fbe2a0137436d1daec47 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 15 Dec 2023 14:20:25 -0600 Subject: [PATCH 1/3] chore: add unit test to excercise flake --- agent/agentscripts/agentscripts_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/agent/agentscripts/agentscripts_test.go b/agent/agentscripts/agentscripts_test.go index 1570e35d59b31..c645673126554 100644 --- a/agent/agentscripts/agentscripts_test.go +++ b/agent/agentscripts/agentscripts_test.go @@ -53,6 +53,15 @@ func TestTimeout(t *testing.T) { require.ErrorIs(t, runner.Execute(context.Background(), nil), agentscripts.ErrTimeout) } +// TestCronClose exists because StartCront() can happen after Close(), +// so the cron go routine is not exited. +func TestCronClose(t *testing.T) { + t.Parallel() + runner := agentscripts.New(agentscripts.Options{}) + runner.StartCron() + require.NoError(t, runner.Close(), "close runner") +} + func setup(t *testing.T, patchLogs func(ctx context.Context, req agentsdk.PatchLogs) error) *agentscripts.Runner { t.Helper() if patchLogs == nil { From 9b343c49578400cb6c60aa5e92ee0825e9575f91 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 15 Dec 2023 14:44:42 -0600 Subject: [PATCH 2/3] Implement a *fix for cron stop() before run() This fix still has a race condition. I do not see a clean solution without modifying the cron libary. The cron library uses a boolean to indicate running, and that boolean needs to be set to "true" before we call "Close()". Or "Close()" should prevent "Run()" from doing anything. In either case, this solves the issue for a niche unit test bug in which the test finishes, calling Close(), before there was an oppertunity to start the go routine. It probably isn't worth a lot of time investment, and this fix will suffice --- agent/agentscripts/agentscripts.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/agent/agentscripts/agentscripts.go b/agent/agentscripts/agentscripts.go index 8b3aaf9a22c3d..92f434584b3fd 100644 --- a/agent/agentscripts/agentscripts.go +++ b/agent/agentscripts/agentscripts.go @@ -129,7 +129,18 @@ func (r *Runner) StartCron() { // has exited by the time the `cron.Stop()` context returns, so we need to // track it manually. err := r.trackCommandGoroutine(func() { - r.cron.Run() + // Since this is run async, in quick unit tests, it is possible the + // Close() function gets called before we even start the cron. + // In these cases, the Run() will never end. + // So if we are closed, we just return, and skip the Run() entirely. + select { + case <-r.cronCtx.Done(): + default: + // There is still a chance the Close() happens right here before we' + // r.cron.Run(). Fixing this requires modifying the cron package + // to add a hook for when the cron "running" flag is set. + r.cron.Run() + } }) if err != nil { r.Logger.Warn(context.Background(), "start cron failed", slog.Error(err)) From 687f7f63fc9631df95aeb2d1331c1ac22fdfd41f Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 15 Dec 2023 14:53:14 -0600 Subject: [PATCH 3/3] Add comment to protect order --- agent/agentscripts/agentscripts.go | 7 ++++--- agent/agentscripts/agentscripts_test.go | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/agent/agentscripts/agentscripts.go b/agent/agentscripts/agentscripts.go index 92f434584b3fd..198bb79360118 100644 --- a/agent/agentscripts/agentscripts.go +++ b/agent/agentscripts/agentscripts.go @@ -135,10 +135,10 @@ func (r *Runner) StartCron() { // So if we are closed, we just return, and skip the Run() entirely. select { case <-r.cronCtx.Done(): + // The cronCtx is cancelled before cron.Close() happens. So if the ctx is + // cancelled, then Close() will be called, or it is about to be called. + // So do nothing! default: - // There is still a chance the Close() happens right here before we' - // r.cron.Run(). Fixing this requires modifying the cron package - // to add a hook for when the cron "running" flag is set. r.cron.Run() } }) @@ -326,6 +326,7 @@ func (r *Runner) Close() error { return nil } close(r.closed) + // Must cancel the cron ctx BEFORE stopping the cron. r.cronCtxCancel() <-r.cron.Stop().Done() r.cmdCloseWait.Wait() diff --git a/agent/agentscripts/agentscripts_test.go b/agent/agentscripts/agentscripts_test.go index c645673126554..9957b8833b608 100644 --- a/agent/agentscripts/agentscripts_test.go +++ b/agent/agentscripts/agentscripts_test.go @@ -53,8 +53,8 @@ func TestTimeout(t *testing.T) { require.ErrorIs(t, runner.Execute(context.Background(), nil), agentscripts.ErrTimeout) } -// TestCronClose exists because StartCront() can happen after Close(), -// so the cron go routine is not exited. +// TestCronClose exists because cron.Run() can happen after cron.Close(). +// If this happens, there used to be a deadlock. func TestCronClose(t *testing.T) { t.Parallel() runner := agentscripts.New(agentscripts.Options{})