From 12137692677f5c786e65fcb6761bbf6a45243cdf Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Tue, 10 Dec 2024 16:36:46 +0000 Subject: [PATCH 1/5] chore: attempt to fix race --- coderd/autobuild/lifecycle_executor.go | 10 +++ coderd/autobuild/lifecycle_executor_test.go | 69 +++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 5e6f9ae2f034d..fbb0381ef3c4b 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -3,6 +3,7 @@ package autobuild import ( "context" "database/sql" + "fmt" "net/http" "sync" "sync/atomic" @@ -177,6 +178,15 @@ func (e *Executor) runOnce(t time.Time) Stats { err := e.db.InTx(func(tx database.Store) error { var err error + ok, err := tx.TryAcquireLock(e.ctx, database.GenLockID(fmt.Sprintf("lifecycle-executor:%s", wsID))) + if err != nil { + return xerrors.Errorf("try acquire lifecycle executor lock: %w", err) + } + if !ok { + e.log.Debug(e.ctx, "unable to acquire lock for workspace, skipping", slog.F("workspace_id", wsID)) + return nil + } + // Re-check eligibility since the first check was outside the // transaction and the workspace settings may have changed. ws, err = tx.GetWorkspaceByID(e.ctx, wsID) diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 794d99f778446..d1b1b3c297958 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -18,6 +18,7 @@ import ( "github.com/coder/coder/v2/coderd/coderdtest" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/notifications" "github.com/coder/coder/v2/coderd/notifications/notificationstest" "github.com/coder/coder/v2/coderd/schedule" @@ -72,6 +73,74 @@ func TestExecutorAutostartOK(t *testing.T) { require.Equal(t, template.AutostartRequirement.DaysOfWeek, []string{"monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"}) } +func TestMultipleLifecycleExecutors(t *testing.T) { + t.Parallel() + + db, ps := dbtestutil.NewDB(t) + + var ( + sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") + // Create our first client + tickChA = make(chan time.Time) + statsChA = make(chan autobuild.Stats) + clientA = coderdtest.New(t, &coderdtest.Options{ + IncludeProvisionerDaemon: true, + AutobuildTicker: tickChA, + AutobuildStats: statsChA, + Database: db, + Pubsub: ps, + }) + // ... And then our second client + tickChB = make(chan time.Time) + statsChB = make(chan autobuild.Stats) + _ = coderdtest.New(t, &coderdtest.Options{ + IncludeProvisionerDaemon: true, + AutobuildTicker: tickChB, + AutobuildStats: statsChB, + Database: db, + Pubsub: ps, + }) + // Now create a workspace (we can use either client, it doesn't matter) + workspace = mustProvisionWorkspace(t, clientA, func(cwr *codersdk.CreateWorkspaceRequest) { + cwr.AutostartSchedule = ptr.Ref(sched.String()) + }) + ) + + // Have the workspace stopped so we can perform an autostart + workspace = coderdtest.MustTransitionWorkspace(t, clientA, workspace.ID, database.WorkspaceTransitionStart, database.WorkspaceTransitionStop) + + // Get both clients to perform a lifecycle execution tick + next := sched.Next(workspace.LatestBuild.CreatedAt) + go func() { + tickChA <- next + close(tickChA) + }() + go func() { + tickChB <- next + close(tickChB) + }() + + // Now we want to check the stats for both clients + statsA := <-statsChA + statsB := <-statsChB + + // We expect there to be no errors + assert.Len(t, statsA.Errors, 0) + assert.Len(t, statsB.Errors, 0) + + // We also expect there to have been only one transition + require.Equal(t, 1, len(statsA.Transitions)+len(statsB.Transitions)) + + stats := statsA + if len(statsB.Transitions) == 1 { + stats = statsB + } + + // And we expect this transition to have been a start transition + assert.Contains(t, stats.Transitions, workspace.ID) + assert.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[workspace.ID]) +} + func TestExecutorAutostartTemplateUpdated(t *testing.T) { t.Parallel() From c45817bc62a3e408310fab7818876ee7e546a4a4 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 13 Dec 2024 09:32:00 +0000 Subject: [PATCH 2/5] chore: use `log` instead of `e.log` --- coderd/autobuild/lifecycle_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index fbb0381ef3c4b..157a975db3609 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -183,7 +183,7 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("try acquire lifecycle executor lock: %w", err) } if !ok { - e.log.Debug(e.ctx, "unable to acquire lock for workspace, skipping", slog.F("workspace_id", wsID)) + log.Debug(e.ctx, "unable to acquire lock for workspace, skipping") return nil } From 46fa216a243247bd84903ed65a39c7ef09979516 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 13 Dec 2024 15:14:05 +0000 Subject: [PATCH 3/5] chore: add check for err != context.Canceled --- coderd/autobuild/lifecycle_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 157a975db3609..cc4e48b43544c 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -399,7 +399,7 @@ func (e *Executor) runOnce(t time.Time) Stats { } return nil }() - if err != nil { + if err != nil && !xerrors.Is(err, context.Canceled) { log.Error(e.ctx, "failed to transition workspace", slog.Error(err)) statsMu.Lock() stats.Errors[wsID] = err From ae7fca95170dd62a5b71bdc1122e1e012b7d936d Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 13 Dec 2024 16:26:09 +0000 Subject: [PATCH 4/5] chore: use Cian's suggestion --- coderd/autobuild/lifecycle_executor_test.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index d1b1b3c297958..12a41c1411619 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -81,21 +81,20 @@ func TestMultipleLifecycleExecutors(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") // Create our first client - tickChA = make(chan time.Time) + tickCh = make(chan time.Time, 2) statsChA = make(chan autobuild.Stats) clientA = coderdtest.New(t, &coderdtest.Options{ IncludeProvisionerDaemon: true, - AutobuildTicker: tickChA, + AutobuildTicker: tickCh, AutobuildStats: statsChA, Database: db, Pubsub: ps, }) // ... And then our second client - tickChB = make(chan time.Time) statsChB = make(chan autobuild.Stats) _ = coderdtest.New(t, &coderdtest.Options{ IncludeProvisionerDaemon: true, - AutobuildTicker: tickChB, + AutobuildTicker: tickCh, AutobuildStats: statsChB, Database: db, Pubsub: ps, @@ -111,13 +110,11 @@ func TestMultipleLifecycleExecutors(t *testing.T) { // Get both clients to perform a lifecycle execution tick next := sched.Next(workspace.LatestBuild.CreatedAt) + go func() { - tickChA <- next - close(tickChA) - }() - go func() { - tickChB <- next - close(tickChB) + tickCh <- next + tickCh <- next + close(tickCh) }() // Now we want to check the stats for both clients From 6b133e70315d2be8d51100d0d86f92301ba2a13d Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 13 Dec 2024 16:29:03 +0000 Subject: [PATCH 5/5] chore: ignore last commit, misread the comment --- coderd/autobuild/lifecycle_executor_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 12a41c1411619..62bb8b2fd2ed2 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -111,11 +111,16 @@ func TestMultipleLifecycleExecutors(t *testing.T) { // Get both clients to perform a lifecycle execution tick next := sched.Next(workspace.LatestBuild.CreatedAt) + startCh := make(chan struct{}) go func() { + <-startCh tickCh <- next + }() + go func() { + <-startCh tickCh <- next - close(tickCh) }() + close(startCh) // Now we want to check the stats for both clients statsA := <-statsChA