Skip to content

Commit 4a9c8f4

Browse files
authored
feat: add auto-locking/deleting workspace based on template config (#8240)
1 parent 818c4a7 commit 4a9c8f4

18 files changed

+727
-71
lines changed

coderd/apidoc/docs.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/autobuild/lifecycle_executor.go

Lines changed: 107 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -160,23 +160,65 @@ func (e *Executor) runOnce(t time.Time) Stats {
160160
return nil
161161
}
162162

163-
builder := wsbuilder.New(ws, nextTransition).
164-
SetLastWorkspaceBuildInTx(&latestBuild).
165-
SetLastWorkspaceBuildJobInTx(&latestJob).
166-
Reason(reason)
167-
168-
if _, _, err := builder.Build(e.ctx, tx, nil); err != nil {
169-
log.Error(e.ctx, "workspace build error",
170-
slog.F("transition", nextTransition),
171-
slog.Error(err),
163+
if nextTransition != "" {
164+
builder := wsbuilder.New(ws, nextTransition).
165+
SetLastWorkspaceBuildInTx(&latestBuild).
166+
SetLastWorkspaceBuildJobInTx(&latestJob).
167+
Reason(reason)
168+
169+
if _, _, err := builder.Build(e.ctx, tx, nil); err != nil {
170+
log.Error(e.ctx, "unable to transition workspace",
171+
slog.F("transition", nextTransition),
172+
slog.Error(err),
173+
)
174+
return nil
175+
}
176+
}
177+
178+
// Lock the workspace if it has breached the template's
179+
// threshold for inactivity.
180+
if reason == database.BuildReasonAutolock {
181+
err = tx.UpdateWorkspaceLockedAt(e.ctx, database.UpdateWorkspaceLockedAtParams{
182+
ID: ws.ID,
183+
LockedAt: sql.NullTime{
184+
Time: database.Now(),
185+
Valid: true,
186+
},
187+
})
188+
if err != nil {
189+
log.Error(e.ctx, "unable to lock workspace",
190+
slog.F("transition", nextTransition),
191+
slog.Error(err),
192+
)
193+
return nil
194+
}
195+
196+
log.Info(e.ctx, "locked workspace",
197+
slog.F("last_used_at", ws.LastUsedAt),
198+
slog.F("inactivity_ttl", templateSchedule.InactivityTTL),
199+
slog.F("since_last_used_at", time.Since(ws.LastUsedAt)),
200+
)
201+
}
202+
203+
if reason == database.BuildReasonAutodelete {
204+
log.Info(e.ctx, "deleted workspace",
205+
slog.F("locked_at", ws.LockedAt.Time),
206+
slog.F("locked_ttl", templateSchedule.LockedTTL),
172207
)
208+
}
209+
210+
if nextTransition == "" {
173211
return nil
174212
}
213+
175214
statsMu.Lock()
176215
stats.Transitions[ws.ID] = nextTransition
177216
statsMu.Unlock()
178217

179-
log.Info(e.ctx, "scheduling workspace transition", slog.F("transition", nextTransition))
218+
log.Info(e.ctx, "scheduling workspace transition",
219+
slog.F("transition", nextTransition),
220+
slog.F("reason", reason),
221+
)
180222

181223
return nil
182224

@@ -199,6 +241,12 @@ func (e *Executor) runOnce(t time.Time) Stats {
199241
return stats
200242
}
201243

244+
// getNextTransition returns the next eligible transition for the workspace
245+
// as well as the reason for why it is transitioning. It is possible
246+
// for this function to return a nil error as well as an empty transition.
247+
// In such cases it means no provisioning should occur but the workspace
248+
// may be "transitioning" to a new state (such as an inactive, stopped
249+
// workspace transitioning to the locked state).
202250
func getNextTransition(
203251
ws database.Workspace,
204252
latestBuild database.WorkspaceBuild,
@@ -211,12 +259,23 @@ func getNextTransition(
211259
error,
212260
) {
213261
switch {
214-
case isEligibleForAutostop(latestBuild, latestJob, currentTick):
262+
case isEligibleForAutostop(ws, latestBuild, latestJob, currentTick):
215263
return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil
216264
case isEligibleForAutostart(ws, latestBuild, latestJob, templateSchedule, currentTick):
217265
return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil
218-
case isEligibleForFailedStop(latestBuild, latestJob, templateSchedule):
266+
case isEligibleForFailedStop(latestBuild, latestJob, templateSchedule, currentTick):
219267
return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil
268+
case isEligibleForLockedStop(ws, templateSchedule, currentTick):
269+
// Only stop started workspaces.
270+
if latestBuild.Transition == database.WorkspaceTransitionStart {
271+
return database.WorkspaceTransitionStop, database.BuildReasonAutolock, nil
272+
}
273+
// We shouldn't transition the workspace but we should still
274+
// lock it.
275+
return "", database.BuildReasonAutolock, nil
276+
277+
case isEligibleForDelete(ws, templateSchedule, currentTick):
278+
return database.WorkspaceTransitionDelete, database.BuildReasonAutodelete, nil
220279
default:
221280
return "", "", xerrors.Errorf("last transition not valid for autostart or autostop")
222281
}
@@ -225,7 +284,12 @@ func getNextTransition(
225284
// isEligibleForAutostart returns true if the workspace should be autostarted.
226285
func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions, currentTick time.Time) bool {
227286
// Don't attempt to autostart failed workspaces.
228-
if !job.CompletedAt.Valid || job.Error.String != "" {
287+
if db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed {
288+
return false
289+
}
290+
291+
// If the workspace is locked we should not autostart it.
292+
if ws.LockedAt.Valid {
229293
return false
230294
}
231295

@@ -253,9 +317,13 @@ func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild
253317
}
254318

255319
// isEligibleForAutostart returns true if the workspace should be autostopped.
256-
func isEligibleForAutostop(build database.WorkspaceBuild, job database.ProvisionerJob, currentTick time.Time) bool {
257-
// Don't attempt to autostop failed workspaces.
258-
if !job.CompletedAt.Valid || job.Error.String != "" {
320+
func isEligibleForAutostop(ws database.Workspace, build database.WorkspaceBuild, job database.ProvisionerJob, currentTick time.Time) bool {
321+
if db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed {
322+
return false
323+
}
324+
325+
// If the workspace is locked we should not autostop it.
326+
if ws.LockedAt.Valid {
259327
return false
260328
}
261329

@@ -266,14 +334,35 @@ func isEligibleForAutostop(build database.WorkspaceBuild, job database.Provision
266334
!currentTick.Before(build.Deadline)
267335
}
268336

337+
// isEligibleForLockedStop returns true if the workspace should be locked
338+
// for breaching the inactivity threshold of the template.
339+
func isEligibleForLockedStop(ws database.Workspace, templateSchedule schedule.TemplateScheduleOptions, currentTick time.Time) bool {
340+
// Only attempt to lock workspaces not already locked.
341+
return !ws.LockedAt.Valid &&
342+
// The template must specify an inactivity TTL.
343+
templateSchedule.InactivityTTL > 0 &&
344+
// The workspace must breach the inactivity TTL.
345+
currentTick.Sub(ws.LastUsedAt) > templateSchedule.InactivityTTL
346+
}
347+
348+
func isEligibleForDelete(ws database.Workspace, templateSchedule schedule.TemplateScheduleOptions, currentTick time.Time) bool {
349+
// Only attempt to delete locked workspaces.
350+
return ws.LockedAt.Valid &&
351+
// Locked workspaces should only be deleted if a locked_ttl is specified.
352+
templateSchedule.LockedTTL > 0 &&
353+
// The workspace must breach the locked_ttl.
354+
currentTick.Sub(ws.LockedAt.Time) > templateSchedule.LockedTTL
355+
}
356+
269357
// isEligibleForFailedStop returns true if the workspace is eligible to be stopped
270358
// due to a failed build.
271-
func isEligibleForFailedStop(build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool {
359+
func isEligibleForFailedStop(build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions, currentTick time.Time) bool {
272360
// If the template has specified a failure TLL.
273361
return templateSchedule.FailureTTL > 0 &&
274362
// And the job resulted in failure.
275363
db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed &&
276364
build.Transition == database.WorkspaceTransitionStart &&
277365
// And sufficient time has elapsed since the job has completed.
278-
job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL
366+
job.CompletedAt.Valid &&
367+
currentTick.Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL
279368
}

coderd/autobuild/lifecycle_executor_test.go

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"github.com/coder/coder/codersdk"
2222
"github.com/coder/coder/provisioner/echo"
2323
"github.com/coder/coder/provisionersdk/proto"
24-
"github.com/coder/coder/testutil"
2524
)
2625

2726
func TestExecutorAutostartOK(t *testing.T) {
@@ -651,8 +650,9 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) {
651650
assert.Len(t, stats.Transitions, 0)
652651
}
653652

654-
// TesetExecutorFailedWorkspace tests that failed workspaces that breach
655-
// their template failed_ttl threshold trigger a stop job.
653+
// TestExecutorFailedWorkspace test AGPL functionality which mainly
654+
// ensures that autostop actions as a result of a failed workspace
655+
// build do not trigger.
656656
// For enterprise functionality see enterprise/coderd/workspaces_test.go
657657
func TestExecutorFailedWorkspace(t *testing.T) {
658658
t.Parallel()
@@ -693,12 +693,57 @@ func TestExecutorFailedWorkspace(t *testing.T) {
693693
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
694694
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
695695
require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)
696-
require.Eventually(t,
697-
func() bool {
698-
return database.Now().Sub(*build.Job.CompletedAt) > failureTTL
699-
},
700-
testutil.IntervalMedium, testutil.IntervalFast)
701-
ticker <- time.Now()
696+
ticker <- build.Job.CompletedAt.Add(failureTTL * 2)
697+
stats := <-statCh
698+
// Expect no transitions since we're using AGPL.
699+
require.Len(t, stats.Transitions, 0)
700+
})
701+
}
702+
703+
// TestExecutorInactiveWorkspace test AGPL functionality which mainly
704+
// ensures that autostop actions as a result of an inactive workspace
705+
// do not trigger.
706+
// For enterprise functionality see enterprise/coderd/workspaces_test.go
707+
func TestExecutorInactiveWorkspace(t *testing.T) {
708+
t.Parallel()
709+
710+
// Test that an AGPL TemplateScheduleStore properly disables
711+
// functionality.
712+
t.Run("OK", func(t *testing.T) {
713+
t.Parallel()
714+
715+
var (
716+
ticker = make(chan time.Time)
717+
statCh = make(chan autobuild.Stats)
718+
logger = slogtest.Make(t, &slogtest.Options{
719+
// We ignore errors here since we expect to fail
720+
// builds.
721+
IgnoreErrors: true,
722+
})
723+
inactiveTTL = time.Millisecond
724+
725+
client = coderdtest.New(t, &coderdtest.Options{
726+
Logger: &logger,
727+
AutobuildTicker: ticker,
728+
IncludeProvisionerDaemon: true,
729+
AutobuildStats: statCh,
730+
TemplateScheduleStore: schedule.NewAGPLTemplateScheduleStore(),
731+
})
732+
)
733+
user := coderdtest.CreateFirstUser(t, client)
734+
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
735+
Parse: echo.ParseComplete,
736+
ProvisionPlan: echo.ProvisionComplete,
737+
ProvisionApply: echo.ProvisionComplete,
738+
})
739+
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) {
740+
ctr.InactivityTTLMillis = ptr.Ref[int64](inactiveTTL.Milliseconds())
741+
})
742+
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
743+
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
744+
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
745+
require.Equal(t, codersdk.WorkspaceStatusRunning, build.Status)
746+
ticker <- ws.LastUsedAt.Add(inactiveTTL * 2)
702747
stats := <-statCh
703748
// Expect no transitions since we're using AGPL.
704749
require.Len(t, stats.Transitions, 0)

coderd/database/dbfake/dbfake.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3495,12 +3495,17 @@ func (q *fakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no
34953495
return nil, err
34963496
}
34973497

3498-
if build.Transition == database.WorkspaceTransitionStart && !build.Deadline.IsZero() && build.Deadline.Before(now) {
3498+
if build.Transition == database.WorkspaceTransitionStart &&
3499+
!build.Deadline.IsZero() &&
3500+
build.Deadline.Before(now) &&
3501+
!workspace.LockedAt.Valid {
34993502
workspaces = append(workspaces, workspace)
35003503
continue
35013504
}
35023505

3503-
if build.Transition == database.WorkspaceTransitionStop && workspace.AutostartSchedule.Valid {
3506+
if build.Transition == database.WorkspaceTransitionStop &&
3507+
workspace.AutostartSchedule.Valid &&
3508+
!workspace.LockedAt.Valid {
35043509
workspaces = append(workspaces, workspace)
35053510
continue
35063511
}
@@ -3513,6 +3518,19 @@ func (q *fakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no
35133518
workspaces = append(workspaces, workspace)
35143519
continue
35153520
}
3521+
3522+
template, err := q.GetTemplateByID(ctx, workspace.TemplateID)
3523+
if err != nil {
3524+
return nil, xerrors.Errorf("get template by ID: %w", err)
3525+
}
3526+
if !workspace.LockedAt.Valid && template.InactivityTTL > 0 {
3527+
workspaces = append(workspaces, workspace)
3528+
continue
3529+
}
3530+
if workspace.LockedAt.Valid && template.LockedTTL > 0 {
3531+
workspaces = append(workspaces, workspace)
3532+
continue
3533+
}
35163534
}
35173535

35183536
return workspaces, nil
@@ -4702,6 +4720,7 @@ func (q *fakeQuerier) UpdateTemplateScheduleByID(_ context.Context, arg database
47024720
tpl.MaxTTL = arg.MaxTTL
47034721
tpl.FailureTTL = arg.FailureTTL
47044722
tpl.InactivityTTL = arg.InactivityTTL
4723+
tpl.LockedTTL = arg.LockedTTL
47054724
q.templates[idx] = tpl
47064725
return tpl.DeepCopy(), nil
47074726
}
@@ -5245,6 +5264,7 @@ func (q *fakeQuerier) UpdateWorkspaceLockedAt(_ context.Context, arg database.Up
52455264
continue
52465265
}
52475266
workspace.LockedAt = arg.LockedAt
5267+
workspace.LastUsedAt = database.Now()
52485268
q.workspaces[index] = workspace
52495269
return nil
52505270
}

coderd/database/dump.sql

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
-- It's not possible to delete enum values.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
BEGIN;
2+
ALTER TYPE build_reason ADD VALUE IF NOT EXISTS 'autolock';
3+
ALTER TYPE build_reason ADD VALUE IF NOT EXISTS 'failedstop';
4+
ALTER TYPE build_reason ADD VALUE IF NOT EXISTS 'autodelete';
5+
COMMIT;

coderd/database/models.go

Lines changed: 13 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)