Skip to content

Commit 9676b65

Browse files
committed
Protect race on cancelFn by using running/stopped atomic bools
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
1 parent 61b82ab commit 9676b65

File tree

5 files changed

+26
-14
lines changed

5 files changed

+26
-14
lines changed

coderd/prebuilds/api.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ var ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt worksp
1414
type ReconciliationOrchestrator interface {
1515
Reconciler
1616

17-
// RunLoop starts a continuous reconciliation loop that periodically calls ReconcileAll
17+
// Run starts a continuous reconciliation loop that periodically calls ReconcileAll
1818
// to ensure all prebuilds are in their desired states. The loop runs until the context
1919
// is canceled or Stop is called.
20-
RunLoop(ctx context.Context)
20+
Run(ctx context.Context)
2121

2222
// Stop gracefully shuts down the orchestrator with the given cause.
2323
// The cause is used for logging and error reporting.

coderd/prebuilds/noop.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010

1111
type NoopReconciler struct{}
1212

13-
func (NoopReconciler) RunLoop(context.Context) {}
13+
func (NoopReconciler) Run(context.Context) {}
1414
func (NoopReconciler) Stop(context.Context, error) {}
1515
func (NoopReconciler) ReconcileAll(context.Context) error { return nil }
1616
func (NoopReconciler) SnapshotState(context.Context, database.Store) (*GlobalSnapshot, error) {

enterprise/coderd/coderd.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ func (api *API) updateEntitlements(ctx context.Context) error {
874874
}
875875

876876
api.AGPL.PrebuildsReconciler.Store(&reconciler)
877-
go reconciler.RunLoop(context.Background())
877+
go reconciler.Run(context.Background())
878878

879879
api.AGPL.PrebuildsClaimer.Store(&claimer)
880880
}

enterprise/coderd/prebuilds/reconcile.go

+19-7
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type StoreReconciler struct {
3838
clock quartz.Clock
3939

4040
cancelFn context.CancelCauseFunc
41+
running atomic.Bool
4142
stopped atomic.Bool
4243
done chan struct{}
4344
}
@@ -61,7 +62,7 @@ func NewStoreReconciler(
6162
}
6263
}
6364

64-
func (c *StoreReconciler) RunLoop(ctx context.Context) {
65+
func (c *StoreReconciler) Run(ctx context.Context) {
6566
reconciliationInterval := c.cfg.ReconciliationInterval.Value()
6667
if reconciliationInterval <= 0 { // avoids a panic
6768
reconciliationInterval = 5 * time.Minute
@@ -82,6 +83,11 @@ func (c *StoreReconciler) RunLoop(ctx context.Context) {
8283
ctx, cancel := context.WithCancelCause(dbauthz.AsPrebuildsOrchestrator(ctx))
8384
c.cancelFn = cancel
8485

86+
// Everything is in place, reconciler can now be considered as running.
87+
//
88+
// NOTE: without this atomic bool, Stop might race with Run for the c.cancelFn above.
89+
c.running.Store(true)
90+
8591
for {
8692
select {
8793
// TODO: implement pubsub listener to allow reconciling a specific template imperatively once it has been changed,
@@ -107,16 +113,26 @@ func (c *StoreReconciler) RunLoop(ctx context.Context) {
107113
}
108114

109115
func (c *StoreReconciler) Stop(ctx context.Context, cause error) {
116+
defer c.running.Store(false)
117+
110118
if cause != nil {
111119
c.logger.Error(context.Background(), "stopping reconciler due to an error", slog.Error(cause))
112120
} else {
113121
c.logger.Info(context.Background(), "gracefully stopping reconciler")
114122
}
115123

116-
if c.isStopped() {
124+
// If previously stopped (Swap returns previous value), then short-circuit.
125+
//
126+
// NOTE: we need to *prospectively* mark this as stopped to prevent Stop being called multiple times and causing problems.
127+
if c.stopped.Swap(true) {
128+
return
129+
}
130+
131+
// If the reconciler is not running, there's nothing else to do.
132+
if !c.running.Load() {
117133
return
118134
}
119-
c.stopped.Store(true)
135+
120136
if c.cancelFn != nil {
121137
c.cancelFn(cause)
122138
}
@@ -138,10 +154,6 @@ func (c *StoreReconciler) Stop(ctx context.Context, cause error) {
138154
}
139155
}
140156

141-
func (c *StoreReconciler) isStopped() bool {
142-
return c.stopped.Load()
143-
}
144-
145157
// ReconcileAll will attempt to resolve the desired vs actual state of all templates which have presets with prebuilds configured.
146158
//
147159
// NOTE:

enterprise/coderd/prebuilds/reconcile_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ func TestRunLoop(t *testing.T) {
575575
t, &slogtest.Options{IgnoreErrors: true},
576576
).Leveled(slog.LevelDebug)
577577
db, pubSub := dbtestutil.NewDB(t)
578-
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock)
578+
reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock)
579579

580580
ownerID := uuid.New()
581581
dbgen.User(t, db, database.User{
@@ -639,7 +639,7 @@ func TestRunLoop(t *testing.T) {
639639
// we need to wait until ticker is initialized, and only then use clock.Advance()
640640
// otherwise clock.Advance() will be ignored
641641
trap := clock.Trap().NewTicker()
642-
go controller.RunLoop(ctx)
642+
go reconciler.Run(ctx)
643643
// wait until ticker is initialized
644644
trap.MustWait(ctx).Release()
645645
// start 1st iteration of ReconciliationLoop
@@ -681,7 +681,7 @@ func TestRunLoop(t *testing.T) {
681681
}, testutil.WaitShort, testutil.IntervalFast)
682682

683683
// gracefully stop the reconciliation loop
684-
controller.Stop(ctx, nil)
684+
reconciler.Stop(ctx, nil)
685685
}
686686

687687
func TestFailedBuildBackoff(t *testing.T) {

0 commit comments

Comments
 (0)