Skip to content

Commit d9ed06d

Browse files
committed
Fix data race in provisioner p.acquiredJobDone chan
1 parent 6c28118 commit d9ed06d

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

provisionerd/provisionerd.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ func (p *provisionerDaemon) connect(ctx context.Context) {
156156

157157
// Locks a job in the database, and runs it!
158158
func (p *provisionerDaemon) acquireJob(ctx context.Context) {
159+
if p.isClosed() {
160+
return
161+
}
159162
p.acquiredJobMutex.Lock()
160163
defer p.acquiredJobMutex.Unlock()
161164
if p.isRunningJob() {
@@ -174,16 +177,14 @@ func (p *provisionerDaemon) acquireJob(ctx context.Context) {
174177
p.opts.Logger.Warn(context.Background(), "acquire job", slog.Error(err))
175178
return
176179
}
177-
if p.isClosed() {
178-
return
179-
}
180180
if p.acquiredJob.JobId == "" {
181181
p.opts.Logger.Debug(context.Background(), "no jobs available")
182182
return
183183
}
184184
ctx, p.acquiredJobCancel = context.WithCancel(ctx)
185185
p.acquiredJobCancelled.Store(false)
186186
p.acquiredJobRunning.Store(true)
187+
187188
p.acquiredJobDone = make(chan struct{})
188189

189190
p.opts.Logger.Info(context.Background(), "acquired job",
@@ -234,8 +235,6 @@ func (p *provisionerDaemon) runJob(ctx context.Context) {
234235
return
235236
}
236237
p.opts.Logger.Debug(ctx, "cleaned up work directory")
237-
p.acquiredJobMutex.Lock()
238-
defer p.acquiredJobMutex.Unlock()
239238
p.acquiredJobRunning.Store(false)
240239
close(p.acquiredJobDone)
241240
}()
@@ -510,11 +509,22 @@ func (p *provisionerDaemon) Close() error {
510509
func (p *provisionerDaemon) closeWithError(err error) error {
511510
p.closeMutex.Lock()
512511
defer p.closeMutex.Unlock()
512+
513513
if p.isClosed() {
514514
return p.closeError
515515
}
516516

517517
if p.isRunningJob() {
518+
519+
// We also need the 'acquire job' mutex here,
520+
// so that a new `p.acquiredJobDone` channel isn't created
521+
// while we're waiting on the mutex.
522+
523+
// Note the mutex order - it's important that we always use the same order of acquisition
524+
// to avoid deadlocks
525+
p.acquiredJobMutex.Lock()
526+
defer p.acquiredJobMutex.Unlock()
527+
518528
errMsg := "provisioner daemon was shutdown gracefully"
519529
if err != nil {
520530
errMsg = err.Error()

0 commit comments

Comments
 (0)