Skip to content

Commit b7635d7

Browse files
committed
add prebuild mutual exclusion test
1 parent 9dfd0ea commit b7635d7

File tree

2 files changed

+77
-30
lines changed

2 files changed

+77
-30
lines changed

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -145,26 +145,7 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
145145

146146
logger.Debug(ctx, "starting reconciliation")
147147

148-
// This tx holds a global lock, which prevents any other coderd replica from starting a reconciliation and
149-
// possibly getting an inconsistent view of the state.
150-
//
151-
// The lock MUST be held until ALL modifications have been effected.
152-
//
153-
// It is run with RepeatableRead isolation, so it's effectively snapshotting the data at the start of the tx.
154-
//
155-
// This is a read-only tx, so returning an error (i.e. causing a rollback) has no impact.
156-
err := c.store.InTx(func(db database.Store) error {
157-
start := c.clock.Now()
158-
159-
// TODO: use TryAcquireLock here and bail out early.
160-
err := db.AcquireLock(ctx, database.LockIDReconcileTemplatePrebuilds)
161-
if err != nil {
162-
logger.Warn(ctx, "failed to acquire top-level reconciliation lock; likely running on another coderd replica", slog.Error(err))
163-
return nil
164-
}
165-
166-
logger.Debug(ctx, "acquired top-level reconciliation lock", slog.F("acquire_wait_secs", fmt.Sprintf("%.4f", c.clock.Since(start).Seconds())))
167-
148+
err := c.WithReconciliationLock(ctx, logger, func(ctx context.Context, db database.Store) error {
168149
state, err := c.SnapshotState(ctx, db)
169150
if err != nil {
170151
return xerrors.Errorf("determine current state: %w", err)
@@ -209,10 +190,6 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
209190
}
210191

211192
return eg.Wait()
212-
}, &database.TxOptions{
213-
Isolation: sql.LevelRepeatableRead,
214-
ReadOnly: true,
215-
TxIdentifier: "template_prebuilds",
216193
})
217194
if err != nil {
218195
logger.Error(ctx, "failed to reconcile", slog.Error(err))
@@ -221,6 +198,35 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
221198
return err
222199
}
223200

201+
func (c *StoreReconciler) WithReconciliationLock(ctx context.Context, logger slog.Logger, fn func(ctx context.Context, db database.Store) error) error {
202+
// This tx holds a global lock, which prevents any other coderd replica from starting a reconciliation and
203+
// possibly getting an inconsistent view of the state.
204+
//
205+
// The lock MUST be held until ALL modifications have been effected.
206+
//
207+
// It is run with RepeatableRead isolation, so it's effectively snapshotting the data at the start of the tx.
208+
//
209+
// This is a read-only tx, so returning an error (i.e. causing a rollback) has no impact.
210+
return c.store.InTx(func(db database.Store) error {
211+
start := c.clock.Now()
212+
213+
// TODO: use TryAcquireLock here and bail out early.
214+
err := db.AcquireLock(ctx, database.LockIDReconcileTemplatePrebuilds)
215+
if err != nil {
216+
logger.Warn(ctx, "failed to acquire top-level reconciliation lock; likely running on another coderd replica", slog.Error(err))
217+
return nil
218+
}
219+
220+
logger.Debug(ctx, "acquired top-level reconciliation lock", slog.F("acquire_wait_secs", fmt.Sprintf("%.4f", c.clock.Since(start).Seconds())))
221+
222+
return fn(ctx, db)
223+
}, &database.TxOptions{
224+
Isolation: sql.LevelRepeatableRead,
225+
ReadOnly: true,
226+
TxIdentifier: "template_prebuilds",
227+
})
228+
}
229+
224230
// SnapshotState determines the current state of prebuilds & the presets which define them.
225231
// An application-level lock is used
226232
func (c *StoreReconciler) SnapshotState(ctx context.Context, store database.Store) (*prebuilds.ReconciliationState, error) {

enterprise/coderd/prebuilds/reconcile_test.go

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"database/sql"
66
"fmt"
7+
"sync"
78
"testing"
89
"time"
910

@@ -27,13 +28,13 @@ import (
2728
)
2829

2930
func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
31+
// Scenario: No reconciliation actions are taken if there are no presets
32+
t.Parallel()
33+
3034
if !dbtestutil.WillUsePostgres() {
3135
t.Skip("This test requires postgres")
3236
}
3337

34-
// Scenario: No reconciliation actions are taken if there are no presets
35-
t.Parallel()
36-
3738
clock := quartz.NewMock(t)
3839
ctx := testutil.Context(t, testutil.WaitLong)
3940
db, ps := dbtestutil.NewDB(t)
@@ -72,13 +73,13 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
7273
}
7374

7475
func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
76+
// Scenario: No reconciliation actions are taken if there are no prebuilds
77+
t.Parallel()
78+
7579
if !dbtestutil.WillUsePostgres() {
7680
t.Skip("This test requires postgres")
7781
}
7882

79-
// Scenario: No reconciliation actions are taken if there are no prebuilds
80-
t.Parallel()
81-
8283
clock := quartz.NewMock(t)
8384
ctx := testutil.Context(t, testutil.WaitLong)
8485
db, ps := dbtestutil.NewDB(t)
@@ -485,6 +486,46 @@ func TestFailedBuildBackoff(t *testing.T) {
485486
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions.BackoffUntil).Truncate(backoffInterval))
486487
}
487488

489+
func TestReconciliationLock(t *testing.T) {
490+
t.Parallel()
491+
492+
if !dbtestutil.WillUsePostgres() {
493+
t.Skip("This test requires postgres")
494+
}
495+
496+
ctx := testutil.Context(t, testutil.WaitSuperLong)
497+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
498+
db, ps := dbtestutil.NewDB(t)
499+
500+
wg := sync.WaitGroup{}
501+
mutex := sync.Mutex{}
502+
for i := 0; i < 5; i++ {
503+
wg.Add(1)
504+
go func() {
505+
defer wg.Done()
506+
reconciler := prebuilds.NewStoreReconciler(
507+
db,
508+
ps,
509+
codersdk.PrebuildsConfig{},
510+
slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug),
511+
quartz.NewMock(t),
512+
)
513+
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
514+
lockObtained := mutex.TryLock()
515+
// As long as the postgres lock is held, this mutex should always be unlocked when we get here.
516+
// If this mutex is ever locked at this point, then that means that the postgres lock is not being held while we're
517+
// inside WithReconciliationLock, which is meant to hold the lock.
518+
require.True(t, lockObtained)
519+
// Sleep a bit to give reconcilers more time to contend for the lock
520+
time.Sleep(time.Second)
521+
defer mutex.Unlock()
522+
return nil
523+
})
524+
}()
525+
}
526+
wg.Wait()
527+
}
528+
488529
func setupTestDBTemplate(
489530
t *testing.T,
490531
db database.Store,

0 commit comments

Comments
 (0)