@@ -43,7 +43,7 @@ type Executor struct {
43
43
type Stats struct {
44
44
Transitions map [uuid.UUID ]database.WorkspaceTransition
45
45
Elapsed time.Duration
46
- Error error
46
+ Errors map [uuid. UUID ] error
47
47
}
48
48
49
49
// New returns a new wsactions executor.
@@ -83,9 +83,6 @@ func (e *Executor) Run() {
83
83
return
84
84
}
85
85
stats := e .runOnce (t )
86
- if stats .Error != nil {
87
- e .log .Error (e .ctx , "error running once" , slog .Error (stats .Error ))
88
- }
89
86
if e .statsCh != nil {
90
87
select {
91
88
case <- e .ctx .Done ():
@@ -100,15 +97,14 @@ func (e *Executor) Run() {
100
97
}
101
98
102
99
func (e * Executor ) runOnce (t time.Time ) Stats {
103
- var err error
104
100
stats := Stats {
105
101
Transitions : make (map [uuid.UUID ]database.WorkspaceTransition ),
102
+ Errors : make (map [uuid.UUID ]error ),
106
103
}
107
104
// we build the map of transitions concurrently, so need a mutex to serialize writes to the map
108
105
statsMu := sync.Mutex {}
109
106
defer func () {
110
107
stats .Elapsed = time .Since (t )
111
- stats .Error = err
112
108
}()
113
109
currentTick := t .Truncate (time .Minute )
114
110
@@ -139,152 +135,158 @@ func (e *Executor) runOnce(t time.Time) Stats {
139
135
log := e .log .With (slog .F ("workspace_id" , wsID ))
140
136
141
137
eg .Go (func () error {
142
- var job * database.ProvisionerJob
143
- var auditLog * auditParams
144
- err := e .db .InTx (func (tx database.Store ) error {
145
- // Re-check eligibility since the first check was outside the
146
- // transaction and the workspace settings may have changed.
147
- ws , err := tx .GetWorkspaceByID (e .ctx , wsID )
148
- if err != nil {
149
- return xerrors .Errorf ("get workspace by id: %w" , err )
150
- }
138
+ err := func () error {
139
+ var job * database.ProvisionerJob
140
+ var auditLog * auditParams
141
+ err := e .db .InTx (func (tx database.Store ) error {
142
+ // Re-check eligibility since the first check was outside the
143
+ // transaction and the workspace settings may have changed.
144
+ ws , err := tx .GetWorkspaceByID (e .ctx , wsID )
145
+ if err != nil {
146
+ return xerrors .Errorf ("get workspace by id: %w" , err )
147
+ }
151
148
152
- user , err := tx .GetUserByID (e .ctx , ws .OwnerID )
153
- if err != nil {
154
- return xerrors .Errorf ("get user by id: %w" , err )
155
- }
149
+ user , err := tx .GetUserByID (e .ctx , ws .OwnerID )
150
+ if err != nil {
151
+ return xerrors .Errorf ("get user by id: %w" , err )
152
+ }
156
153
157
- // Determine the workspace state based on its latest build.
158
- latestBuild , err := tx .GetLatestWorkspaceBuildByWorkspaceID (e .ctx , ws .ID )
159
- if err != nil {
160
- return xerrors .Errorf ("get latest workspace build: %w" , err )
161
- }
154
+ // Determine the workspace state based on its latest build.
155
+ latestBuild , err := tx .GetLatestWorkspaceBuildByWorkspaceID (e .ctx , ws .ID )
156
+ if err != nil {
157
+ return xerrors .Errorf ("get latest workspace build: %w" , err )
158
+ }
162
159
163
- latestJob , err := tx .GetProvisionerJobByID (e .ctx , latestBuild .JobID )
164
- if err != nil {
165
- return xerrors .Errorf ("get latest provisioner job: %w" , err )
166
- }
160
+ latestJob , err := tx .GetProvisionerJobByID (e .ctx , latestBuild .JobID )
161
+ if err != nil {
162
+ return xerrors .Errorf ("get latest provisioner job: %w" , err )
163
+ }
167
164
168
- templateSchedule , err := (* (e .templateScheduleStore .Load ())).Get (e .ctx , tx , ws .TemplateID )
169
- if err != nil {
170
- return xerrors .Errorf ("get template scheduling options: %w" , err )
171
- }
165
+ templateSchedule , err := (* (e .templateScheduleStore .Load ())).Get (e .ctx , tx , ws .TemplateID )
166
+ if err != nil {
167
+ return xerrors .Errorf ("get template scheduling options: %w" , err )
168
+ }
172
169
173
- template , err := tx .GetTemplateByID (e .ctx , ws .TemplateID )
174
- if err != nil {
175
- return xerrors .Errorf ("get template by ID: %w" , err )
176
- }
170
+ template , err := tx .GetTemplateByID (e .ctx , ws .TemplateID )
171
+ if err != nil {
172
+ return xerrors .Errorf ("get template by ID: %w" , err )
173
+ }
177
174
178
- accessControl := (* (e .accessControlStore .Load ())).GetTemplateAccessControl (template )
175
+ accessControl := (* (e .accessControlStore .Load ())).GetTemplateAccessControl (template )
179
176
180
- nextTransition , reason , err := getNextTransition (user , ws , latestBuild , latestJob , templateSchedule , currentTick )
181
- if err != nil {
182
- log .Debug (e .ctx , "skipping workspace" , slog .Error (err ))
183
- // err is used to indicate that a workspace is not eligible
184
- // so returning nil here is ok although ultimately the distinction
185
- // doesn't matter since the transaction is read-only up to
186
- // this point.
187
- return nil
188
- }
177
+ nextTransition , reason , err := getNextTransition (user , ws , latestBuild , latestJob , templateSchedule , currentTick )
178
+ if err != nil {
179
+ log .Debug (e .ctx , "skipping workspace" , slog .Error (err ))
180
+ // err is used to indicate that a workspace is not eligible
181
+ // so returning nil here is ok although ultimately the distinction
182
+ // doesn't matter since the transaction is read-only up to
183
+ // this point.
184
+ return nil
185
+ }
189
186
190
- var build * database.WorkspaceBuild
191
- if nextTransition != "" {
192
- builder := wsbuilder .New (ws , nextTransition ).
193
- SetLastWorkspaceBuildInTx (& latestBuild ).
194
- SetLastWorkspaceBuildJobInTx (& latestJob ).
195
- Reason (reason )
196
- log .Debug (e .ctx , "auto building workspace" , slog .F ("transition" , nextTransition ))
197
- if nextTransition == database .WorkspaceTransitionStart &&
198
- useActiveVersion (accessControl , ws ) {
199
- log .Debug (e .ctx , "autostarting with active version" )
200
- builder = builder .ActiveVersion ()
187
+ var build * database.WorkspaceBuild
188
+ if nextTransition != "" {
189
+ builder := wsbuilder .New (ws , nextTransition ).
190
+ SetLastWorkspaceBuildInTx (& latestBuild ).
191
+ SetLastWorkspaceBuildJobInTx (& latestJob ).
192
+ Reason (reason )
193
+ log .Debug (e .ctx , "auto building workspace" , slog .F ("transition" , nextTransition ))
194
+ if nextTransition == database .WorkspaceTransitionStart &&
195
+ useActiveVersion (accessControl , ws ) {
196
+ log .Debug (e .ctx , "autostarting with active version" )
197
+ builder = builder .ActiveVersion ()
198
+ }
199
+
200
+ build , job , err = builder .Build (e .ctx , tx , nil , audit.WorkspaceBuildBaggage {IP : "127.0.0.1" })
201
+ if err != nil {
202
+ return xerrors .Errorf ("build workspace with transition %q: %w" , nextTransition , err )
203
+ }
201
204
}
202
205
203
- build , job , err = builder .Build (e .ctx , tx , nil , audit.WorkspaceBuildBaggage {IP : "127.0.0.1" })
204
- if err != nil {
205
- log .Error (e .ctx , "unable to transition workspace" ,
206
- slog .F ("transition" , nextTransition ),
207
- slog .Error (err ),
206
+ // Transition the workspace to dormant if it has breached the template's
207
+ // threshold for inactivity.
208
+ if reason == database .BuildReasonAutolock {
209
+ wsOld := ws
210
+ ws , err = tx .UpdateWorkspaceDormantDeletingAt (e .ctx , database.UpdateWorkspaceDormantDeletingAtParams {
211
+ ID : ws .ID ,
212
+ DormantAt : sql.NullTime {
213
+ Time : dbtime .Now (),
214
+ Valid : true ,
215
+ },
216
+ })
217
+
218
+ auditLog = & auditParams {
219
+ Build : build ,
220
+ Job : latestJob ,
221
+ Reason : reason ,
222
+ Old : wsOld ,
223
+ New : ws ,
224
+ }
225
+ if err != nil {
226
+ return xerrors .Errorf ("update workspace dormant deleting at: %w" , err )
227
+ }
228
+
229
+ log .Info (e .ctx , "dormant workspace" ,
230
+ slog .F ("last_used_at" , ws .LastUsedAt ),
231
+ slog .F ("time_til_dormant" , templateSchedule .TimeTilDormant ),
232
+ slog .F ("since_last_used_at" , time .Since (ws .LastUsedAt )),
208
233
)
209
- return xerrors .Errorf ("build workspace: %w" , err )
210
234
}
211
- }
212
235
213
- // Transition the workspace to dormant if it has breached the template's
214
- // threshold for inactivity.
215
- if reason == database .BuildReasonAutolock {
216
- wsOld := ws
217
- ws , err = tx .UpdateWorkspaceDormantDeletingAt (e .ctx , database.UpdateWorkspaceDormantDeletingAtParams {
218
- ID : ws .ID ,
219
- DormantAt : sql.NullTime {
220
- Time : dbtime .Now (),
221
- Valid : true ,
222
- },
223
- })
224
-
225
- auditLog = & auditParams {
226
- Build : build ,
227
- Job : latestJob ,
228
- Reason : reason ,
229
- Old : wsOld ,
230
- New : ws ,
236
+ if reason == database .BuildReasonAutodelete {
237
+ log .Info (e .ctx , "deleted workspace" ,
238
+ slog .F ("dormant_at" , ws .DormantAt .Time ),
239
+ slog .F ("time_til_dormant_autodelete" , templateSchedule .TimeTilDormantAutoDelete ),
240
+ )
231
241
}
232
- if err != nil {
233
- return xerrors .Errorf ("update workspace dormant deleting at: %w" , err )
242
+
243
+ if nextTransition == "" {
244
+ return nil
234
245
}
235
246
236
- log .Info (e .ctx , "dormant workspace" ,
237
- slog .F ("last_used_at" , ws .LastUsedAt ),
238
- slog .F ("time_til_dormant" , templateSchedule .TimeTilDormant ),
239
- slog .F ("since_last_used_at" , time .Since (ws .LastUsedAt )),
240
- )
241
- }
247
+ statsMu .Lock ()
248
+ stats .Transitions [ws .ID ] = nextTransition
249
+ statsMu .Unlock ()
242
250
243
- if reason == database .BuildReasonAutodelete {
244
- log .Info (e .ctx , "deleted workspace" ,
245
- slog .F ("dormant_at" , ws .DormantAt .Time ),
246
- slog .F ("time_til_dormant_autodelete" , templateSchedule .TimeTilDormantAutoDelete ),
251
+ log .Info (e .ctx , "scheduling workspace transition" ,
252
+ slog .F ("transition" , nextTransition ),
253
+ slog .F ("reason" , reason ),
247
254
)
248
- }
249
255
250
- if nextTransition == "" {
251
256
return nil
252
- }
253
-
254
- statsMu .Lock ()
255
- stats .Transitions [ws .ID ] = nextTransition
256
- statsMu .Unlock ()
257
257
258
- log .Info (e .ctx , "scheduling workspace transition" ,
259
- slog .F ("transition" , nextTransition ),
260
- slog .F ("reason" , reason ),
261
- )
262
-
263
- return nil
264
-
265
- // Run with RepeatableRead isolation so that the build process sees the same data
266
- // as our calculation that determines whether an autobuild is necessary.
267
- }, & sql.TxOptions {Isolation : sql .LevelRepeatableRead })
268
- if err != nil {
269
- log .Error (e .ctx , "workspace scheduling failed" , slog .Error (err ))
270
- }
271
- if auditLog != nil {
272
- // If the transition didn't succeed then updating the workspace
273
- // to indicate dormant didn't either.
274
- auditLog .Success = err == nil
275
- auditBuild (e .ctx , e .log , * e .auditor .Load (), * auditLog )
276
- }
277
- if job != nil && err == nil {
278
- // Note that we can't refactor such that posting the job happens inside wsbuilder because it's called
279
- // with an outer transaction like this, and we need to make sure the outer transaction commits before
280
- // posting the job. If we post before the transaction commits, provisionerd might try to acquire the
281
- // job, fail, and then sit idle instead of picking up the job.
282
- err = provisionerjobs .PostJob (e .ps , * job )
258
+ // Run with RepeatableRead isolation so that the build process sees the same data
259
+ // as our calculation that determines whether an autobuild is necessary.
260
+ }, & sql.TxOptions {Isolation : sql .LevelRepeatableRead })
261
+ if auditLog != nil {
262
+ // If the transition didn't succeed then updating the workspace
263
+ // to indicate dormant didn't either.
264
+ auditLog .Success = err == nil
265
+ auditBuild (e .ctx , e .log , * e .auditor .Load (), * auditLog )
266
+ }
283
267
if err != nil {
284
- // Client probably doesn't care about this error, so just log it.
285
- log .Error (e .ctx , "failed to post provisioner job to pubsub" , slog .Error (err ))
268
+ return xerrors .Errorf ("transition workspace: %w" , err )
286
269
}
270
+ if job != nil {
271
+ // Note that we can't refactor such that posting the job happens inside wsbuilder because it's called
272
+ // with an outer transaction like this, and we need to make sure the outer transaction commits before
273
+ // posting the job. If we post before the transaction commits, provisionerd might try to acquire the
274
+ // job, fail, and then sit idle instead of picking up the job.
275
+ err = provisionerjobs .PostJob (e .ps , * job )
276
+ if err != nil {
277
+ return xerrors .Errorf ("post provisioner job to pubsub: %w" , err )
278
+ }
279
+ }
280
+ return nil
281
+ }()
282
+ if err != nil {
283
+ e .log .Error (e .ctx , "failed to transition workspace" , slog .Error (err ))
284
+ statsMu .Lock ()
285
+ stats .Errors [wsID ] = err
286
+ statsMu .Unlock ()
287
287
}
288
+ // Even though we got an error we still return nil to avoid
289
+ // short-circuiting the evaluation loop.
288
290
return nil
289
291
})
290
292
}
0 commit comments