From 06399d0b94e76f3b7f7412e779f81854b8062909 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Tue, 13 Jun 2023 00:29:45 +0000 Subject: [PATCH 1/9] chore: move autobuild/executor into autobuild --- cli/server.go | 4 +- coderd/autobuild/doc.go | 3 + .../{executor => }/lifecycle_executor.go | 134 +++++++++--------- .../{executor => }/lifecycle_executor_test.go | 36 ++--- coderd/autobuild/notify/notifier_test.go | 2 +- coderd/coderdtest/coderdtest.go | 6 +- 6 files changed, 96 insertions(+), 89 deletions(-) create mode 100644 coderd/autobuild/doc.go rename coderd/autobuild/{executor => }/lifecycle_executor.go (62%) rename coderd/autobuild/{executor => }/lifecycle_executor_test.go (97%) diff --git a/cli/server.go b/cli/server.go index e30bf5fd71fde..c77b252429fcd 100644 --- a/cli/server.go +++ b/cli/server.go @@ -62,7 +62,7 @@ import ( "github.com/coder/coder/cli/cliui" "github.com/coder/coder/cli/config" "github.com/coder/coder/coderd" - "github.com/coder/coder/coderd/autobuild/executor" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbfake" "github.com/coder/coder/coderd/database/dbmetrics" @@ -899,7 +899,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. autobuildPoller := time.NewTicker(cfg.AutobuildPollInterval.Value()) defer autobuildPoller.Stop() - autobuildExecutor := executor.New(ctx, options.Database, coderAPI.TemplateScheduleStore, logger, autobuildPoller.C) + autobuildExecutor := autobuild.NewExecutor(ctx, options.Database, coderAPI.TemplateScheduleStore, logger, autobuildPoller.C) autobuildExecutor.Run() // Currently there is no way to ask the server to shut diff --git a/coderd/autobuild/doc.go b/coderd/autobuild/doc.go new file mode 100644 index 0000000000000..2e6e7c5a0c287 --- /dev/null +++ b/coderd/autobuild/doc.go @@ -0,0 +1,3 @@ +// Package autobuild contains logic for scheduling workspace +// builds in the background. +package autobuild diff --git a/coderd/autobuild/executor/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go similarity index 62% rename from coderd/autobuild/executor/lifecycle_executor.go rename to coderd/autobuild/lifecycle_executor.go index 761293fc63ae5..6138e8ec58cf2 100644 --- a/coderd/autobuild/executor/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -1,4 +1,4 @@ -package executor +package autobuild import ( "context" @@ -35,8 +35,8 @@ type Stats struct { Error error } -// New returns a new autobuild executor. -func New(ctx context.Context, db database.Store, tss *atomic.Pointer[schedule.TemplateScheduleStore], log slog.Logger, tick <-chan time.Time) *Executor { +// New returns a new wsactions executor. +func NewExecutor(ctx context.Context, db database.Store, tss *atomic.Pointer[schedule.TemplateScheduleStore], log slog.Logger, tick <-chan time.Time) *Executor { le := &Executor{ //nolint:gocritic // Autostart has a limited set of permissions. ctx: dbauthz.AsAutostart(ctx), @@ -125,77 +125,57 @@ func (e *Executor) runOnce(t time.Time) Stats { log := e.log.With(slog.F("workspace_id", wsID)) eg.Go(func() error { - err := e.db.InTx(func(db database.Store) error { + err := e.db.InTx(func(tx database.Store) error { // Re-check eligibility since the first check was outside the // transaction and the workspace settings may have changed. - ws, err := db.GetWorkspaceByID(e.ctx, wsID) + ws, err := tx.GetWorkspaceByID(e.ctx, wsID) if err != nil { log.Error(e.ctx, "get workspace autostart failed", slog.Error(err)) return nil } // Determine the workspace state based on its latest build. - priorHistory, err := db.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) + latestBuild, err := tx.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) if err != nil { log.Warn(e.ctx, "get latest workspace build", slog.Error(err)) return nil } - templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, db, ws.TemplateID) + templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, tx, ws.TemplateID) if err != nil { log.Warn(e.ctx, "get template schedule options", slog.Error(err)) return nil } - if !isEligibleForAutoStartStop(ws, priorHistory, templateSchedule) { - return nil - } - - priorJob, err := db.GetProvisionerJobByID(e.ctx, priorHistory.JobID) + latestJob, err := tx.GetProvisionerJobByID(e.ctx, latestBuild.JobID) if err != nil { log.Warn(e.ctx, "get last provisioner job for workspace %q: %w", slog.Error(err)) return nil } - validTransition, nextTransition, err := getNextTransition(ws, priorHistory, priorJob) + nextTransition, reason, err := getNextTransition(ws, latestBuild, latestJob, templateSchedule, currentTick) if err != nil { log.Debug(e.ctx, "skipping workspace", slog.Error(err)) return nil } - if currentTick.Before(nextTransition) { - log.Debug(e.ctx, "skipping workspace: too early", - slog.F("next_transition_at", nextTransition), - slog.F("transition", validTransition), - slog.F("current_tick", currentTick), - ) - return nil - } - builder := wsbuilder.New(ws, validTransition). - SetLastWorkspaceBuildInTx(&priorHistory). - SetLastWorkspaceBuildJobInTx(&priorJob) - - switch validTransition { - case database.WorkspaceTransitionStart: - builder = builder.Reason(database.BuildReasonAutostart) - case database.WorkspaceTransitionStop: - builder = builder.Reason(database.BuildReasonAutostop) - default: - log.Error(e.ctx, "unsupported transition", slog.F("transition", validTransition)) - return nil - } - if _, _, err := builder.Build(e.ctx, db, nil); err != nil { + builder := wsbuilder.New(ws, nextTransition). + SetLastWorkspaceBuildInTx(&latestBuild). + SetLastWorkspaceBuildJobInTx(&latestJob). + Reason(reason) + + if _, _, err := builder.Build(e.ctx, tx, nil); err != nil { log.Error(e.ctx, "unable to transition workspace", - slog.F("transition", validTransition), + slog.F("transition", nextTransition), slog.Error(err), ) return nil } statsMu.Lock() - stats.Transitions[ws.ID] = validTransition + stats.Transitions[ws.ID] = nextTransition statsMu.Unlock() - log.Info(e.ctx, "scheduling workspace transition", slog.F("transition", validTransition)) + log.Info(e.ctx, "scheduling workspace transition", slog.F("transition", nextTransition)) return nil @@ -218,7 +198,9 @@ func (e *Executor) runOnce(t time.Time) Stats { return stats } -func isEligibleForAutoStartStop(ws database.Workspace, priorHistory database.WorkspaceBuild, templateSchedule schedule.TemplateScheduleOptions) bool { +// isEligibleForTransition returns true if the workspace meets basic criteria +// for transitioning to a new state. +func isEligibleForTransition(ws database.Workspace, latestBuild database.WorkspaceBuild, templateSchedule schedule.TemplateScheduleOptions) bool { if ws.Deleted { return false } @@ -227,7 +209,7 @@ func isEligibleForAutoStartStop(ws database.Workspace, priorHistory database.Wor } // Don't check the template schedule to see whether it allows autostop, this // is done during the build when determining the deadline. - if priorHistory.Transition == database.WorkspaceTransitionStart && !priorHistory.Deadline.IsZero() { + if latestBuild.Transition == database.WorkspaceTransitionStart && !latestBuild.Deadline.IsZero() { return true } @@ -236,35 +218,57 @@ func isEligibleForAutoStartStop(ws database.Workspace, priorHistory database.Wor func getNextTransition( ws database.Workspace, - priorHistory database.WorkspaceBuild, - priorJob database.ProvisionerJob, + latestBuild database.WorkspaceBuild, + latestJob database.ProvisionerJob, + templateSchedule schedule.TemplateScheduleOptions, + currentTick time.Time, ) ( - validTransition database.WorkspaceTransition, - nextTransition time.Time, - err error, + database.WorkspaceTransition, + database.BuildReason, + error, ) { - if !priorJob.CompletedAt.Valid || priorJob.Error.String != "" { - return "", time.Time{}, xerrors.Errorf("last workspace build did not complete successfully") + if !isEligibleForTransition(ws, latestBuild, templateSchedule) { + return "", "", xerrors.Errorf("workspace ineligible for transition") } - switch priorHistory.Transition { - case database.WorkspaceTransitionStart: - if priorHistory.Deadline.IsZero() { - return "", time.Time{}, xerrors.Errorf("latest workspace build has zero deadline") - } - // For stopping, do not truncate. This is inconsistent with autostart, but - // it ensures we will not stop too early. - return database.WorkspaceTransitionStop, priorHistory.Deadline, nil - case database.WorkspaceTransitionStop: - sched, err := schedule.Weekly(ws.AutostartSchedule.String) - if err != nil { - return "", time.Time{}, xerrors.Errorf("workspace has invalid autostart schedule: %w", err) - } - // Round down to the nearest minute, as this is the finest granularity cron supports. - // Truncate is probably not necessary here, but doing it anyway to be sure. - nextTransition = sched.Next(priorHistory.CreatedAt).Truncate(time.Minute) - return database.WorkspaceTransitionStart, nextTransition, nil + if !latestJob.CompletedAt.Valid || latestJob.Error.String != "" { + return "", "", xerrors.Errorf("last workspace build did not complete successfully") + } + + switch { + case isEligibleForAutostop(latestBuild, currentTick): + return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil + case isEligibleForAutostart(ws, latestBuild, currentTick): + return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil default: - return "", time.Time{}, xerrors.Errorf("last transition not valid for autostart or autostop") + return "", "", xerrors.Errorf("last transition not valid for autostart or autostop") + } +} + +// isEligibleForAutostart returns true if the workspace should be autostarted. +func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, currentTick time.Time) bool { + // If the last transition for the workspace was not 'stop' then the workspace + // cannot be started. + if build.Transition != database.WorkspaceTransitionStop { + return false + } + + sched, err := schedule.Weekly(ws.AutostartSchedule.String) + if err != nil { + return false } + // Round down to the nearest minute, as this is the finest granularity cron supports. + // Truncate is probably not necessary here, but doing it anyway to be sure. + nextTransition := sched.Next(build.CreatedAt).Truncate(time.Minute) + + return !currentTick.Before(nextTransition) +} + +// isEligibleForAutostart returns true if the workspace should be autostopped. +func isEligibleForAutostop(build database.WorkspaceBuild, currentTick time.Time) bool { + // A workspace must be started in order for it to be auto-stopped. + return build.Transition == database.WorkspaceTransitionStart && + !build.Deadline.IsZero() && + // We do not want to stop a workspace prior to it breaching its deadline. + !currentTick.Before(build.Deadline) } diff --git a/coderd/autobuild/executor/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go similarity index 97% rename from coderd/autobuild/executor/lifecycle_executor_test.go rename to coderd/autobuild/lifecycle_executor_test.go index e942ff490c5a6..0be450528fa5d 100644 --- a/coderd/autobuild/executor/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -1,4 +1,4 @@ -package executor_test +package autobuild_test import ( "context" @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/goleak" - "github.com/coder/coder/coderd/autobuild/executor" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/schedule" @@ -27,7 +27,7 @@ func TestExecutorAutostartOK(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -66,7 +66,7 @@ func TestExecutorAutostartTemplateUpdated(t *testing.T) { ctx = context.Background() err error tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -113,7 +113,7 @@ func TestExecutorAutostartAlreadyRunning(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -145,7 +145,7 @@ func TestExecutorAutostartNotEnabled(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -180,7 +180,7 @@ func TestExecutorAutostopOK(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -216,7 +216,7 @@ func TestExecutorAutostopExtend(t *testing.T) { var ( ctx = context.Background() tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -266,7 +266,7 @@ func TestExecutorAutostopAlreadyStopped(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -299,7 +299,7 @@ func TestExecutorAutostopNotEnabled(t *testing.T) { var ( ctx = context.Background() tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -341,7 +341,7 @@ func TestExecutorWorkspaceDeleted(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -374,7 +374,7 @@ func TestExecutorWorkspaceAutostartTooEarly(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -405,7 +405,7 @@ func TestExecutorWorkspaceAutostopBeforeDeadline(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -433,7 +433,7 @@ func TestExecutorWorkspaceAutostopNoWaitChangedMyMind(t *testing.T) { var ( ctx = context.Background() tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -501,8 +501,8 @@ func TestExecutorAutostartMultipleOK(t *testing.T) { sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) tickCh2 = make(chan time.Time) - statsCh1 = make(chan executor.Stats) - statsCh2 = make(chan executor.Stats) + statsCh1 = make(chan autobuild.Stats) + statsCh2 = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -556,7 +556,7 @@ func TestExecutorAutostartWithParameters(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -609,7 +609,7 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, diff --git a/coderd/autobuild/notify/notifier_test.go b/coderd/autobuild/notify/notifier_test.go index fb547defd5b36..e241e19f2a7a4 100644 --- a/coderd/autobuild/notify/notifier_test.go +++ b/coderd/autobuild/notify/notifier_test.go @@ -9,7 +9,7 @@ import ( "go.uber.org/atomic" "go.uber.org/goleak" - "github.com/coder/coder/coderd/autobuild/notify" + "github.com/coder/coder/coderd/wsactions/notify" ) func TestNotifier(t *testing.T) { diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 5d03b49016f28..a28a52cda0d4e 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -54,7 +54,7 @@ import ( "cdr.dev/slog/sloggers/slogtest" "github.com/coder/coder/coderd" "github.com/coder/coder/coderd/audit" - "github.com/coder/coder/coderd/autobuild/executor" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/awsidentity" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbauthz" @@ -102,7 +102,7 @@ type Options struct { GoogleTokenValidator *idtoken.Validator SSHKeygenAlgorithm gitsshkey.Algorithm AutobuildTicker <-chan time.Time - AutobuildStats chan<- executor.Stats + AutobuildStats chan<- autobuild.Stats Auditor audit.Auditor TLSCertificates []tls.Certificate GitAuthConfigs []*gitauth.Config @@ -244,7 +244,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can templateScheduleStore.Store(&options.TemplateScheduleStore) ctx, cancelFunc := context.WithCancel(context.Background()) - lifecycleExecutor := executor.New( + lifecycleExecutor := autobuild.NewExecutor( ctx, options.Database, &templateScheduleStore, From 315d869004afa17640f847823dd7b20a6287b557 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Tue, 13 Jun 2023 02:41:04 +0000 Subject: [PATCH 2/9] feat: add support for stopping failed workspaces --- coderd/autobuild/lifecycle_executor.go | 71 +++--- coderd/autobuild/lifecycle_executor_test.go | 253 ++++++++++++++++++++ coderd/autobuild/notify/notifier_test.go | 2 +- coderd/coderdtest/coderdtest.go | 12 +- coderd/database/dbauthz/dbauthz.go | 4 +- coderd/database/dbfake/dbfake.go | 2 +- coderd/database/dbmetrics/dbmetrics.go | 4 +- coderd/database/dbmock/dbmock.go | 12 +- coderd/database/querier.go | 2 +- coderd/database/queries.sql.go | 17 +- coderd/database/queries/workspaces.sql | 13 +- coderd/wsbuilder/wsbuilder.go | 30 ++- provisioner/echo/serve.go | 29 +-- 13 files changed, 368 insertions(+), 83 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 6138e8ec58cf2..8010a7f854185 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -13,9 +13,11 @@ import ( "cdr.dev/slog" "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/db2sdk" "github.com/coder/coder/coderd/database/dbauthz" "github.com/coder/coder/coderd/schedule" "github.com/coder/coder/coderd/wsbuilder" + "github.com/coder/coder/codersdk" ) // Executor automatically starts or stops workspaces. @@ -108,7 +110,7 @@ func (e *Executor) runOnce(t time.Time) Stats { // NOTE: If a workspace build is created with a given TTL and then the user either // changes or unsets the TTL, the deadline for the workspace build will not // have changed. This behavior is as expected per #2229. - workspaces, err := e.db.GetWorkspacesEligibleForAutoStartStop(e.ctx, t) + workspaces, err := e.db.GetWorkspacesEligibleForTransition(e.ctx, t) if err != nil { e.log.Error(e.ctx, "get workspaces for autostart or autostop", slog.Error(err)) return stats @@ -198,24 +200,6 @@ func (e *Executor) runOnce(t time.Time) Stats { return stats } -// isEligibleForTransition returns true if the workspace meets basic criteria -// for transitioning to a new state. -func isEligibleForTransition(ws database.Workspace, latestBuild database.WorkspaceBuild, templateSchedule schedule.TemplateScheduleOptions) bool { - if ws.Deleted { - return false - } - if templateSchedule.UserAutostartEnabled && ws.AutostartSchedule.Valid && ws.AutostartSchedule.String != "" { - return true - } - // Don't check the template schedule to see whether it allows autostop, this - // is done during the build when determining the deadline. - if latestBuild.Transition == database.WorkspaceTransitionStart && !latestBuild.Deadline.IsZero() { - return true - } - - return false -} - func getNextTransition( ws database.Workspace, latestBuild database.WorkspaceBuild, @@ -227,32 +211,37 @@ func getNextTransition( database.BuildReason, error, ) { - if !isEligibleForTransition(ws, latestBuild, templateSchedule) { - return "", "", xerrors.Errorf("workspace ineligible for transition") - } - - if !latestJob.CompletedAt.Valid || latestJob.Error.String != "" { - return "", "", xerrors.Errorf("last workspace build did not complete successfully") - } - switch { - case isEligibleForAutostop(latestBuild, currentTick): + case isEligibleForAutostop(latestBuild, latestJob, currentTick): return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil - case isEligibleForAutostart(ws, latestBuild, currentTick): + case isEligibleForAutostart(ws, latestBuild, latestJob, templateSchedule, currentTick): return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil + case isEligibleForFailedStop(latestJob, templateSchedule): + return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil default: return "", "", xerrors.Errorf("last transition not valid for autostart or autostop") } } // isEligibleForAutostart returns true if the workspace should be autostarted. -func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, currentTick time.Time) bool { +func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions, currentTick time.Time) bool { + // Don't attempt to autostart failed workspaces. + if !job.CompletedAt.Valid || job.Error.String != "" { + return false + } + // If the last transition for the workspace was not 'stop' then the workspace // cannot be started. if build.Transition != database.WorkspaceTransitionStop { return false } + // If autostart isn't enabled, or the schedule isn't valid/populated we can't + // autostart the workspace. + if !templateSchedule.UserAutostartEnabled || !ws.AutostartSchedule.Valid || ws.AutostartSchedule.String == "" { + return false + } + sched, err := schedule.Weekly(ws.AutostartSchedule.String) if err != nil { return false @@ -265,10 +254,30 @@ func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild } // isEligibleForAutostart returns true if the workspace should be autostopped. -func isEligibleForAutostop(build database.WorkspaceBuild, currentTick time.Time) bool { +func isEligibleForAutostop(build database.WorkspaceBuild, job database.ProvisionerJob, currentTick time.Time) bool { + // Don't attempt to autostop failed workspaces. + if !job.CompletedAt.Valid || job.Error.String != "" { + return false + } + // A workspace must be started in order for it to be auto-stopped. return build.Transition == database.WorkspaceTransitionStart && !build.Deadline.IsZero() && // We do not want to stop a workspace prior to it breaching its deadline. !currentTick.Before(build.Deadline) } + +// isEligibleForFailedStop returns true if the workspace is eligible to be stopped +// due to a failed build. +func isEligibleForFailedStop(job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool { + // If the template has specified a failure TLL. + return templateSchedule.FailureTTL > 0 && + // And the job resulted in failure. + db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && + // And sufficient time has elapsed since the job has completed. + (job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL || + // Or sufficient time has elapsed since the job was canceled. + job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL || + // Or the job is stuck/abandoned. + database.Now().Sub(job.UpdatedAt) > 30*time.Second) +} diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 0be450528fa5d..b485c85fa7659 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -11,6 +11,8 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/goleak" + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/database" @@ -19,6 +21,7 @@ import ( "github.com/coder/coder/codersdk" "github.com/coder/coder/provisioner/echo" "github.com/coder/coder/provisionersdk/proto" + "github.com/coder/coder/testutil" ) func TestExecutorAutostartOK(t *testing.T) { @@ -648,6 +651,191 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { assert.Len(t, stats.Transitions, 0) } +// TesetExecutorFailedWorkspace tests that failed workspaces that breach +// their template failed_ttl threshold trigger a stop job. +func TestExecutorFailedWorkspace(t *testing.T) { + t.Parallel() + + t.Run("AGPLOK", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Millisecond + + client = coderdtest.New(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: schedule.NewAGPLTemplateScheduleStore(), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + require.Eventually(t, + func() bool { + return database.Now().Sub(*build.Job.CompletedAt) > failureTTL + }, + testutil.IntervalMedium, testutil.IntervalFast) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since we're using AGPL. + require.Len(t, stats.Transitions, 0) + }) + + t.Run("EnterpriseOK", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Millisecond + + client = coderdtest.New(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + require.Eventually(t, + func() bool { + return database.Now().Sub(*build.Job.CompletedAt) > failureTTL + }, + testutil.IntervalMedium, testutil.IntervalFast) + ticker <- time.Now() + stats := <-statCh + // Expect workspace to transition to stopped state for breaching + // failure TTL. + require.Len(t, stats.Transitions, 1) + require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop) + }) + + t.Run("TooEarly", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Minute + + client = coderdtest.New(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since not enough time has elapsed. + require.Len(t, stats.Transitions, 0) + }) + + t.Run("StuckJob", func(t *testing.T) { + t.Parallel() + + var ( + ctx = testutil.Context(t, testutil.WaitMedium) + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Hour + client, _, api = coderdtest.NewWithAPI(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + + // Emulate a stuck job. + err := api.Database.UpdateProvisionerJobByID(ctx, database.UpdateProvisionerJobByIDParams{ + ID: build.Job.ID, + UpdatedAt: database.Now().Add(-time.Hour), + }) + require.NoError(t, err) + ticker <- time.Now() + stats := <-statCh + // Expect a transition since we should detect that the job stopped + // updating. + require.Len(t, stats.Transitions, 1) + }) +} + func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace { t.Helper() user := coderdtest.CreateFirstUser(t, client) @@ -705,3 +893,68 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } + +func mockEnterpriseTemplateScheduler(t *testing.T) schedule.TemplateScheduleStore { + return schedule.MockTemplateScheduleStore{ + GetFn: func(ctx context.Context, store database.Store, id uuid.UUID) (schedule.TemplateScheduleOptions, error) { + t.Helper() + + template, err := store.GetTemplateByID(ctx, id) + require.NoError(t, err) + + return schedule.TemplateScheduleOptions{ + UserAutostartEnabled: template.AllowUserAutostart, + UserAutostopEnabled: template.AllowUserAutostop, + DefaultTTL: time.Duration(template.DefaultTTL), + MaxTTL: time.Duration(template.MaxTTL), + FailureTTL: time.Duration(template.FailureTTL), + }, nil + }, + SetFn: func(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { + t.Helper() + + if int64(opts.DefaultTTL) == tpl.DefaultTTL && + int64(opts.MaxTTL) == tpl.MaxTTL && + int64(opts.FailureTTL) == tpl.FailureTTL && + int64(opts.InactivityTTL) == tpl.InactivityTTL && + int64(opts.LockedTTL) == tpl.LockedTTL && + opts.UserAutostartEnabled == tpl.AllowUserAutostart && + opts.UserAutostopEnabled == tpl.AllowUserAutostop { + // Avoid updating the UpdatedAt timestamp if nothing will be changed. + return tpl, nil + } + + template, err := db.UpdateTemplateScheduleByID(ctx, database.UpdateTemplateScheduleByIDParams{ + ID: tpl.ID, + UpdatedAt: database.Now(), + AllowUserAutostart: opts.UserAutostartEnabled, + AllowUserAutostop: opts.UserAutostopEnabled, + DefaultTTL: int64(opts.DefaultTTL), + MaxTTL: int64(opts.MaxTTL), + FailureTTL: int64(opts.FailureTTL), + InactivityTTL: int64(opts.InactivityTTL), + LockedTTL: int64(opts.LockedTTL), + }) + require.NoError(t, err) + // Update all workspaces using the template to set the user defined schedule + // to be within the new bounds. This essentially does the following for each + // workspace using the template. + // if (template.ttl != NULL) { + // workspace.ttl = min(workspace.ttl, template.ttl) + // } + // + // NOTE: this does not apply to currently running workspaces as their + // schedule information is committed to the workspace_build during start. + // This limitation is displayed to the user while editing the template. + if opts.MaxTTL > 0 { + err = db.UpdateWorkspaceTTLToBeWithinTemplateMax(ctx, database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams{ + TemplateID: template.ID, + TemplateMaxTTL: int64(opts.MaxTTL), + }) + require.NoError(t, err) + } + + return template, nil + }, + } +} diff --git a/coderd/autobuild/notify/notifier_test.go b/coderd/autobuild/notify/notifier_test.go index e241e19f2a7a4..fb547defd5b36 100644 --- a/coderd/autobuild/notify/notifier_test.go +++ b/coderd/autobuild/notify/notifier_test.go @@ -9,7 +9,7 @@ import ( "go.uber.org/atomic" "go.uber.org/goleak" - "github.com/coder/coder/coderd/wsactions/notify" + "github.com/coder/coder/coderd/autobuild/notify" ) func TestNotifier(t *testing.T) { diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index a28a52cda0d4e..b355c7e08c199 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -136,6 +136,9 @@ type Options struct { ConfigSSH codersdk.SSHConfigResponse SwaggerEndpoint bool + // Logger should only be overridden if you expect errors + // as part of your test. + Logger *slog.Logger } // New constructs a codersdk client connected to an in-memory API instance. @@ -310,6 +313,11 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can require.NoError(t, err) } + if options.Logger == nil { + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + options.Logger = &logger + } + return func(h http.Handler) { mutex.Lock() defer mutex.Unlock() @@ -322,7 +330,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can AccessURL: accessURL, AppHostname: options.AppHostname, AppHostnameRegex: appHostnameRegex, - Logger: slogtest.Make(t, nil).Leveled(slog.LevelDebug), + Logger: *options.Logger, CacheDir: t.TempDir(), Database: options.Database, Pubsub: options.Pubsub, @@ -427,7 +435,7 @@ func NewProvisionerDaemon(t testing.TB, coderAPI *coderd.API) io.Closer { return coderAPI.CreateInMemoryProvisionerDaemon(ctx, 0) }, &provisionerd.Options{ Filesystem: fs, - Logger: slogtest.Make(t, nil).Named("provisionerd").Leveled(slog.LevelDebug), + Logger: coderAPI.Logger.Named("provisionerd").Leveled(slog.LevelDebug), JobPollInterval: 50 * time.Millisecond, UpdateInterval: 250 * time.Millisecond, ForceCancelInterval: time.Second, diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index d9e82c5b6e3e4..4211204f1917d 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -1682,8 +1682,8 @@ func (q *querier) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesP return q.db.GetAuthorizedWorkspaces(ctx, arg, prep) } -func (q *querier) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]database.Workspace, error) { - return q.db.GetWorkspacesEligibleForAutoStartStop(ctx, now) +func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.Workspace, error) { + return q.db.GetWorkspacesEligibleForTransition(ctx, now) } func (q *querier) InsertAPIKey(ctx context.Context, arg database.InsertAPIKeyParams) (database.APIKey, error) { diff --git a/coderd/database/dbfake/dbfake.go b/coderd/database/dbfake/dbfake.go index da623da8620f9..ad274c54372ca 100644 --- a/coderd/database/dbfake/dbfake.go +++ b/coderd/database/dbfake/dbfake.go @@ -3404,7 +3404,7 @@ func (q *fakeQuerier) GetWorkspaces(ctx context.Context, arg database.GetWorkspa return workspaceRows, err } -func (q *fakeQuerier) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]database.Workspace, error) { +func (q *fakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.Workspace, error) { q.mutex.RLock() defer q.mutex.RUnlock() diff --git a/coderd/database/dbmetrics/dbmetrics.go b/coderd/database/dbmetrics/dbmetrics.go index 71171ed6ff630..71ab27bc1d69c 100644 --- a/coderd/database/dbmetrics/dbmetrics.go +++ b/coderd/database/dbmetrics/dbmetrics.go @@ -962,9 +962,9 @@ func (m metricsStore) GetWorkspaces(ctx context.Context, arg database.GetWorkspa return workspaces, err } -func (m metricsStore) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]database.Workspace, error) { +func (m metricsStore) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.Workspace, error) { start := time.Now() - workspaces, err := m.s.GetWorkspacesEligibleForAutoStartStop(ctx, now) + workspaces, err := m.s.GetWorkspacesEligibleForTransition(ctx, now) m.queryLatencies.WithLabelValues("GetWorkspacesEligibleForAutoStartStop").Observe(time.Since(start).Seconds()) return workspaces, err } diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 610795142ba07..3e5cc26e3f4e0 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -1933,19 +1933,19 @@ func (mr *MockStoreMockRecorder) GetWorkspaces(arg0, arg1 interface{}) *gomock.C return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaces", reflect.TypeOf((*MockStore)(nil).GetWorkspaces), arg0, arg1) } -// GetWorkspacesEligibleForAutoStartStop mocks base method. -func (m *MockStore) GetWorkspacesEligibleForAutoStartStop(arg0 context.Context, arg1 time.Time) ([]database.Workspace, error) { +// GetWorkspacesEligibleForTransition mocks base method. +func (m *MockStore) GetWorkspacesEligibleForTransition(arg0 context.Context, arg1 time.Time) ([]database.Workspace, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetWorkspacesEligibleForAutoStartStop", arg0, arg1) + ret := m.ctrl.Call(m, "GetWorkspacesEligibleForTransition", arg0, arg1) ret0, _ := ret[0].([]database.Workspace) ret1, _ := ret[1].(error) return ret0, ret1 } -// GetWorkspacesEligibleForAutoStartStop indicates an expected call of GetWorkspacesEligibleForAutoStartStop. -func (mr *MockStoreMockRecorder) GetWorkspacesEligibleForAutoStartStop(arg0, arg1 interface{}) *gomock.Call { +// GetWorkspacesEligibleForTransition indicates an expected call of GetWorkspacesEligibleForTransition. +func (mr *MockStoreMockRecorder) GetWorkspacesEligibleForTransition(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspacesEligibleForAutoStartStop", reflect.TypeOf((*MockStore)(nil).GetWorkspacesEligibleForAutoStartStop), arg0, arg1) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspacesEligibleForTransition", reflect.TypeOf((*MockStore)(nil).GetWorkspacesEligibleForTransition), arg0, arg1) } // InTx mocks base method. diff --git a/coderd/database/querier.go b/coderd/database/querier.go index f2e13580040a8..acd434adb465b 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -166,7 +166,7 @@ type sqlcQuerier interface { GetWorkspaceResourcesByJobIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceResource, error) GetWorkspaceResourcesCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceResource, error) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) ([]GetWorkspacesRow, error) - GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]Workspace, error) + GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]Workspace, error) InsertAPIKey(ctx context.Context, arg InsertAPIKeyParams) (APIKey, error) // We use the organization_id as the id // for simplicity since all users is diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 3f6baeb2c50a4..7301c208c22e8 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -8272,13 +8272,15 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) return items, nil } -const getWorkspacesEligibleForAutoStartStop = `-- name: GetWorkspacesEligibleForAutoStartStop :many +const getWorkspacesEligibleForTransition = `-- name: GetWorkspacesEligibleForTransition :many SELECT workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at FROM workspaces LEFT JOIN workspace_builds ON workspace_builds.workspace_id = workspaces.id +INNER JOIN + provisioner_jobs ON workspace_builds.job_id = provisioner_jobs.id WHERE workspace_builds.build_number = ( SELECT @@ -8308,12 +8310,19 @@ WHERE ( workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL + ) OR + + -- If the workspace's most recent job resulted in an error + -- it may be eligible for failed stop. + ( + provisioner_jobs.error IS NOT NULL AND + provisioner_jobs.error != '' ) - ) + ) AND workspaces.deleted = 'false' ` -func (q *sqlQuerier) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]Workspace, error) { - rows, err := q.db.QueryContext(ctx, getWorkspacesEligibleForAutoStartStop, now) +func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]Workspace, error) { + rows, err := q.db.QueryContext(ctx, getWorkspacesEligibleForTransition, now) if err != nil { return nil, err } diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index f5d7c35cfd929..98cfcf0bcab25 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -405,13 +405,15 @@ SELECT stopped_workspaces.count AS stopped_workspaces FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspaces, stopped_workspaces; --- name: GetWorkspacesEligibleForAutoStartStop :many +-- name: GetWorkspacesEligibleForTransition :many SELECT workspaces.* FROM workspaces LEFT JOIN workspace_builds ON workspace_builds.workspace_id = workspaces.id +INNER JOIN + provisioner_jobs ON workspace_builds.job_id = provisioner_jobs.id WHERE workspace_builds.build_number = ( SELECT @@ -441,5 +443,12 @@ WHERE ( workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL + ) OR + + -- If the workspace's most recent job resulted in an error + -- it may be eligible for failed stop. + ( + provisioner_jobs.error IS NOT NULL AND + provisioner_jobs.error != '' ) - ); + ) AND workspaces.deleted = 'false'; diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index d9288485fdd1e..e6980788cb9e8 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -617,11 +617,12 @@ func (b *Builder) authorize(authFunc func(action rbac.Action, object rbac.Object case database.WorkspaceTransitionStart, database.WorkspaceTransitionStop: action = rbac.ActionUpdate default: - return BuildError{http.StatusBadRequest, fmt.Sprintf("Transition %q not supported.", b.trans), xerrors.New("")} + msg := fmt.Sprintf("Transition %q not supported.", b.trans) + return BuildError{http.StatusBadRequest, msg, xerrors.New(msg)} } if !authFunc(action, b.workspace) { // We use the same wording as the httpapi to avoid leaking the existence of the workspace - return BuildError{http.StatusNotFound, httpapi.ResourceNotFoundResponse.Message, xerrors.New("")} + return BuildError{http.StatusNotFound, httpapi.ResourceNotFoundResponse.Message, xerrors.New(httpapi.ResourceNotFoundResponse.Message)} } template, err := b.getTemplate() @@ -633,7 +634,7 @@ func (b *Builder) authorize(authFunc func(action rbac.Action, object rbac.Object // cloud state. if b.state.explicit != nil || b.state.orphan { if !authFunc(rbac.ActionUpdate, template.RBACObject()) { - return BuildError{http.StatusForbidden, "Only template managers may provide custom state", xerrors.New("")} + return BuildError{http.StatusForbidden, "Only template managers may provide custom state", xerrors.New("Only template managers may provide custom state")} } } @@ -641,7 +642,7 @@ func (b *Builder) authorize(authFunc func(action rbac.Action, object rbac.Object return BuildError{ http.StatusBadRequest, "Workspace builds with a custom log level are restricted to template authors only.", - xerrors.New(""), + xerrors.New("Workspace builds with a custom log level are restricted to template authors only."), } } return nil @@ -686,22 +687,26 @@ func (b *Builder) checkTemplateJobStatus() error { templateVersionJobStatus := db2sdk.ProvisionerJobStatus(*templateVersionJob) switch templateVersionJobStatus { case codersdk.ProvisionerJobPending, codersdk.ProvisionerJobRunning: + msg := fmt.Sprintf("The provided template version is %s. Wait for it to complete importing!", templateVersionJobStatus) + return BuildError{ http.StatusNotAcceptable, - fmt.Sprintf("The provided template version is %s. Wait for it to complete importing!", templateVersionJobStatus), - xerrors.New(""), + msg, + xerrors.New(msg), } case codersdk.ProvisionerJobFailed: + msg := fmt.Sprintf("The provided template version %q has failed to import: %q. You cannot build workspaces with it!", templateVersion.Name, templateVersionJob.Error.String) return BuildError{ http.StatusBadRequest, - fmt.Sprintf("The provided template version %q has failed to import: %q. You cannot build workspaces with it!", templateVersion.Name, templateVersionJob.Error.String), - xerrors.New(""), + msg, + xerrors.New(msg), } case codersdk.ProvisionerJobCanceled: + msg := fmt.Sprintf("The provided template version %q has failed to import: %q. You cannot build workspaces with it!", templateVersion.Name, templateVersionJob.Error.String) return BuildError{ http.StatusBadRequest, - "The provided template version was canceled during import. You cannot build workspaces with it!", - xerrors.New(""), + msg, + xerrors.New(msg), } } return nil @@ -717,10 +722,11 @@ func (b *Builder) checkRunningBuild() error { return BuildError{http.StatusInternalServerError, "failed to fetch prior build", err} } if db2sdk.ProvisionerJobStatus(*job).Active() { + msg := "A workspace build is already active." return BuildError{ http.StatusConflict, - "A workspace build is already active.", - xerrors.New(""), + msg, + xerrors.New(msg), } } return nil diff --git a/provisioner/echo/serve.go b/provisioner/echo/serve.go index 0da18a1e6392d..6df498a9af495 100644 --- a/provisioner/echo/serve.go +++ b/provisioner/echo/serve.go @@ -18,21 +18,6 @@ import ( "github.com/coder/coder/provisionersdk/proto" ) -const ( - ParameterExecKey = "echo.exec" - - errorKey = "error" - successKey = "success" -) - -func ParameterError(s string) string { - return formatExecValue(errorKey, s) -} - -func ParameterSucceed() string { - return formatExecValue(successKey, "") -} - // ProvisionApplyWithAgent returns provision responses that will mock a fake // "aws_instance" resource with an agent that has the given auth token. func ProvisionApplyWithAgent(authToken string) []*proto.Provision_Response { @@ -55,10 +40,6 @@ func ProvisionApplyWithAgent(authToken string) []*proto.Provision_Response { }} } -func formatExecValue(key, value string) string { - return fmt.Sprintf("%s=%s", key, value) -} - var ( // ParseComplete is a helper to indicate an empty parse completion. ParseComplete = []*proto.Parse_Response{{ @@ -72,6 +53,16 @@ var ( Complete: &proto.Provision_Complete{}, }, }} + + // ProvisionFailed is a helper to convey a failed provision + // operation. + ProvisionFailed = []*proto.Provision_Response{{ + Type: &proto.Provision_Response_Complete{ + Complete: &proto.Provision_Complete{ + Error: "failed!", + }, + }, + }} ) // Serve starts the echo provisioner. From 190c4bcd9667a51f8baeb3ddc102ec9a0d529317 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Tue, 20 Jun 2023 20:23:02 +0000 Subject: [PATCH 3/9] fix fake --- coderd/database/dbfake/dbfake.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/coderd/database/dbfake/dbfake.go b/coderd/database/dbfake/dbfake.go index ad274c54372ca..8b9ae6d74fdf2 100644 --- a/coderd/database/dbfake/dbfake.go +++ b/coderd/database/dbfake/dbfake.go @@ -20,9 +20,11 @@ import ( "golang.org/x/xerrors" "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/db2sdk" "github.com/coder/coder/coderd/httpapi" "github.com/coder/coder/coderd/rbac" "github.com/coder/coder/coderd/util/slice" + "github.com/coder/coder/codersdk" ) var validProxyByHostnameRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`) @@ -3424,6 +3426,15 @@ func (q *fakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no workspaces = append(workspaces, workspace) continue } + + job, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID) + if err != nil { + return nil, xerrors.Errorf("get provisioner job by ID: %w", err) + } + if db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed { + workspaces = append(workspaces, workspace) + continue + } } return workspaces, nil From a8fa3f001ac61643a39ac7a637ad6855cf0fc831 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 01:14:26 +0000 Subject: [PATCH 4/9] pr comments --- coderd/autobuild/lifecycle_executor.go | 9 ++-- coderd/autobuild/lifecycle_executor_test.go | 47 --------------------- coderd/database/queries/workspaces.sql | 3 +- 3 files changed, 6 insertions(+), 53 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 8010a7f854185..c43487d291a29 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -216,7 +216,7 @@ func getNextTransition( return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil case isEligibleForAutostart(ws, latestBuild, latestJob, templateSchedule, currentTick): return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil - case isEligibleForFailedStop(latestJob, templateSchedule): + case isEligibleForFailedStop(latestBuild, latestJob, templateSchedule): return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil default: return "", "", xerrors.Errorf("last transition not valid for autostart or autostop") @@ -269,15 +269,14 @@ func isEligibleForAutostop(build database.WorkspaceBuild, job database.Provision // isEligibleForFailedStop returns true if the workspace is eligible to be stopped // due to a failed build. -func isEligibleForFailedStop(job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool { +func isEligibleForFailedStop(build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool { // If the template has specified a failure TLL. return templateSchedule.FailureTTL > 0 && // And the job resulted in failure. db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && + build.Transition == database.WorkspaceTransitionStart && // And sufficient time has elapsed since the job has completed. (job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL || // Or sufficient time has elapsed since the job was canceled. - job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL || - // Or the job is stuck/abandoned. - database.Now().Sub(job.UpdatedAt) > 30*time.Second) + job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL) } diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index b485c85fa7659..9087f8dc4ab04 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -787,53 +787,6 @@ func TestExecutorFailedWorkspace(t *testing.T) { // Expect no transitions since not enough time has elapsed. require.Len(t, stats.Transitions, 0) }) - - t.Run("StuckJob", func(t *testing.T) { - t.Parallel() - - var ( - ctx = testutil.Context(t, testutil.WaitMedium) - ticker = make(chan time.Time) - statCh = make(chan autobuild.Stats) - logger = slogtest.Make(t, &slogtest.Options{ - // We ignore errors here since we expect to fail - // builds. - IgnoreErrors: true, - }) - failureTTL = time.Hour - client, _, api = coderdtest.NewWithAPI(t, &coderdtest.Options{ - Logger: &logger, - AutobuildTicker: ticker, - IncludeProvisionerDaemon: true, - AutobuildStats: statCh, - TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), - }) - ) - user := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ - Parse: echo.ParseComplete, - ProvisionPlan: echo.ProvisionComplete, - ProvisionApply: echo.ProvisionFailed, - }) - template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { - ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) - }) - coderdtest.AwaitTemplateVersionJob(t, client, version.ID) - ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) - build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) - - // Emulate a stuck job. - err := api.Database.UpdateProvisionerJobByID(ctx, database.UpdateProvisionerJobByIDParams{ - ID: build.Job.ID, - UpdatedAt: database.Now().Add(-time.Hour), - }) - require.NoError(t, err) - ticker <- time.Now() - stats := <-statCh - // Expect a transition since we should detect that the job stopped - // updating. - require.Len(t, stats.Transitions, 1) - }) } func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace { diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 98cfcf0bcab25..90368b7e8f5c6 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -449,6 +449,7 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' + provisioner_jobs.error != '' AND + workspace_builds.transition = "start"::workspace_transition ) ) AND workspaces.deleted = 'false'; From 4fdaf19cd579fbd6ec5708b58209bfc858482379 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 01:45:33 +0000 Subject: [PATCH 5/9] move enterprise tests --- coderd/autobuild/lifecycle_executor.go | 1 - coderd/autobuild/lifecycle_executor_test.go | 157 +------------------- enterprise/coderd/coderd.go | 2 +- enterprise/coderd/provisionerdaemons.go | 8 +- enterprise/coderd/workspaces_test.go | 156 +++++++++++++++++++ 5 files changed, 165 insertions(+), 159 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index c43487d291a29..e2ab82d8077b7 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -142,7 +142,6 @@ func (e *Executor) runOnce(t time.Time) Stats { log.Warn(e.ctx, "get latest workspace build", slog.Error(err)) return nil } - templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, tx, ws.TemplateID) if err != nil { log.Warn(e.ctx, "get template schedule options", slog.Error(err)) diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 9087f8dc4ab04..3da342066ad12 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -653,10 +653,13 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { // TesetExecutorFailedWorkspace tests that failed workspaces that breach // their template failed_ttl threshold trigger a stop job. +// For enterprise functionality see enterprise/coderd/workspaces_test.go func TestExecutorFailedWorkspace(t *testing.T) { t.Parallel() - t.Run("AGPLOK", func(t *testing.T) { + // Test that an AGPL TemplateScheduleStore properly disables + // functionality. + t.Run("OK", func(t *testing.T) { t.Parallel() var ( @@ -700,93 +703,6 @@ func TestExecutorFailedWorkspace(t *testing.T) { // Expect no transitions since we're using AGPL. require.Len(t, stats.Transitions, 0) }) - - t.Run("EnterpriseOK", func(t *testing.T) { - t.Parallel() - - var ( - ticker = make(chan time.Time) - statCh = make(chan autobuild.Stats) - logger = slogtest.Make(t, &slogtest.Options{ - // We ignore errors here since we expect to fail - // builds. - IgnoreErrors: true, - }) - failureTTL = time.Millisecond - - client = coderdtest.New(t, &coderdtest.Options{ - Logger: &logger, - AutobuildTicker: ticker, - IncludeProvisionerDaemon: true, - AutobuildStats: statCh, - TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), - }) - ) - user := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ - Parse: echo.ParseComplete, - ProvisionPlan: echo.ProvisionComplete, - ProvisionApply: echo.ProvisionFailed, - }) - template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { - ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) - }) - coderdtest.AwaitTemplateVersionJob(t, client, version.ID) - ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) - build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) - require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) - require.Eventually(t, - func() bool { - return database.Now().Sub(*build.Job.CompletedAt) > failureTTL - }, - testutil.IntervalMedium, testutil.IntervalFast) - ticker <- time.Now() - stats := <-statCh - // Expect workspace to transition to stopped state for breaching - // failure TTL. - require.Len(t, stats.Transitions, 1) - require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop) - }) - - t.Run("TooEarly", func(t *testing.T) { - t.Parallel() - - var ( - ticker = make(chan time.Time) - statCh = make(chan autobuild.Stats) - logger = slogtest.Make(t, &slogtest.Options{ - // We ignore errors here since we expect to fail - // builds. - IgnoreErrors: true, - }) - failureTTL = time.Minute - - client = coderdtest.New(t, &coderdtest.Options{ - Logger: &logger, - AutobuildTicker: ticker, - IncludeProvisionerDaemon: true, - AutobuildStats: statCh, - TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), - }) - ) - user := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ - Parse: echo.ParseComplete, - ProvisionPlan: echo.ProvisionComplete, - ProvisionApply: echo.ProvisionFailed, - }) - template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { - ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) - }) - coderdtest.AwaitTemplateVersionJob(t, client, version.ID) - ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) - build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) - require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) - ticker <- time.Now() - stats := <-statCh - // Expect no transitions since not enough time has elapsed. - require.Len(t, stats.Transitions, 0) - }) } func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace { @@ -846,68 +762,3 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } - -func mockEnterpriseTemplateScheduler(t *testing.T) schedule.TemplateScheduleStore { - return schedule.MockTemplateScheduleStore{ - GetFn: func(ctx context.Context, store database.Store, id uuid.UUID) (schedule.TemplateScheduleOptions, error) { - t.Helper() - - template, err := store.GetTemplateByID(ctx, id) - require.NoError(t, err) - - return schedule.TemplateScheduleOptions{ - UserAutostartEnabled: template.AllowUserAutostart, - UserAutostopEnabled: template.AllowUserAutostop, - DefaultTTL: time.Duration(template.DefaultTTL), - MaxTTL: time.Duration(template.MaxTTL), - FailureTTL: time.Duration(template.FailureTTL), - }, nil - }, - SetFn: func(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { - t.Helper() - - if int64(opts.DefaultTTL) == tpl.DefaultTTL && - int64(opts.MaxTTL) == tpl.MaxTTL && - int64(opts.FailureTTL) == tpl.FailureTTL && - int64(opts.InactivityTTL) == tpl.InactivityTTL && - int64(opts.LockedTTL) == tpl.LockedTTL && - opts.UserAutostartEnabled == tpl.AllowUserAutostart && - opts.UserAutostopEnabled == tpl.AllowUserAutostop { - // Avoid updating the UpdatedAt timestamp if nothing will be changed. - return tpl, nil - } - - template, err := db.UpdateTemplateScheduleByID(ctx, database.UpdateTemplateScheduleByIDParams{ - ID: tpl.ID, - UpdatedAt: database.Now(), - AllowUserAutostart: opts.UserAutostartEnabled, - AllowUserAutostop: opts.UserAutostopEnabled, - DefaultTTL: int64(opts.DefaultTTL), - MaxTTL: int64(opts.MaxTTL), - FailureTTL: int64(opts.FailureTTL), - InactivityTTL: int64(opts.InactivityTTL), - LockedTTL: int64(opts.LockedTTL), - }) - require.NoError(t, err) - // Update all workspaces using the template to set the user defined schedule - // to be within the new bounds. This essentially does the following for each - // workspace using the template. - // if (template.ttl != NULL) { - // workspace.ttl = min(workspace.ttl, template.ttl) - // } - // - // NOTE: this does not apply to currently running workspaces as their - // schedule information is committed to the workspace_build during start. - // This limitation is displayed to the user while editing the template. - if opts.MaxTTL > 0 { - err = db.UpdateWorkspaceTTLToBeWithinTemplateMax(ctx, database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams{ - TemplateID: template.ID, - TemplateMaxTTL: int64(opts.MaxTTL), - }) - require.NoError(t, err) - } - - return template, nil - }, - } -} diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 2cc54bd3fa705..aa460629cd929 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -402,7 +402,7 @@ func (api *API) updateEntitlements(ctx context.Context) error { if changed, enabled := featureChanged(codersdk.FeatureAdvancedTemplateScheduling); changed { if enabled { - store := &enterpriseTemplateScheduleStore{} + store := &EnterpriseTemplateScheduleStore{} ptr := schedule.TemplateScheduleStore(store) api.AGPL.TemplateScheduleStore.Store(&ptr) } else { diff --git a/enterprise/coderd/provisionerdaemons.go b/enterprise/coderd/provisionerdaemons.go index 36b98cde73004..5bb35bdbd3b22 100644 --- a/enterprise/coderd/provisionerdaemons.go +++ b/enterprise/coderd/provisionerdaemons.go @@ -310,11 +310,11 @@ func websocketNetConn(ctx context.Context, conn *websocket.Conn, msgType websock } } -type enterpriseTemplateScheduleStore struct{} +type EnterpriseTemplateScheduleStore struct{} -var _ schedule.TemplateScheduleStore = &enterpriseTemplateScheduleStore{} +var _ schedule.TemplateScheduleStore = &EnterpriseTemplateScheduleStore{} -func (*enterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.Context, db database.Store, templateID uuid.UUID) (schedule.TemplateScheduleOptions, error) { +func (*EnterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.Context, db database.Store, templateID uuid.UUID) (schedule.TemplateScheduleOptions, error) { tpl, err := db.GetTemplateByID(ctx, templateID) if err != nil { return schedule.TemplateScheduleOptions{}, err @@ -331,7 +331,7 @@ func (*enterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.C }, nil } -func (*enterpriseTemplateScheduleStore) SetTemplateScheduleOptions(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { +func (*EnterpriseTemplateScheduleStore) SetTemplateScheduleOptions(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { if int64(opts.DefaultTTL) == tpl.DefaultTTL && int64(opts.MaxTTL) == tpl.MaxTTL && int64(opts.FailureTTL) == tpl.FailureTTL && diff --git a/enterprise/coderd/workspaces_test.go b/enterprise/coderd/workspaces_test.go index 2c1a84a6f8dff..770f3ace3fe3b 100644 --- a/enterprise/coderd/workspaces_test.go +++ b/enterprise/coderd/workspaces_test.go @@ -10,12 +10,17 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "cdr.dev/slog/sloggers/slogtest" + + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/util/ptr" "github.com/coder/coder/codersdk" + "github.com/coder/coder/enterprise/coderd" "github.com/coder/coder/enterprise/coderd/coderdenttest" "github.com/coder/coder/enterprise/coderd/license" + "github.com/coder/coder/provisioner/echo" "github.com/coder/coder/testutil" ) @@ -74,6 +79,157 @@ func TestCreateWorkspace(t *testing.T) { }) } +func TestWorkspaceAutobuild(t *testing.T) { + t.Parallel() + + t.Run("FailureTTLOK", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Millisecond + + client = coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{}, + }, + }) + ) + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureAdvancedTemplateScheduling: 1, + }, + }) + + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + require.Eventually(t, + func() bool { + return database.Now().Sub(*build.Job.CompletedAt) > failureTTL + }, + testutil.IntervalMedium, testutil.IntervalFast) + ticker <- time.Now() + stats := <-statCh + // Expect workspace to transition to stopped state for breaching + // failure TTL. + require.Len(t, stats.Transitions, 1) + require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop) + }) + + t.Run("FailureTTLTooEarly", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Minute + + client = coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{}, + }, + }) + ) + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureAdvancedTemplateScheduling: 1, + }, + }) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since not enough time has elapsed. + require.Len(t, stats.Transitions, 0) + }) + + t.Run("FailureTTLUnset", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + + client = coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{}, + }, + }) + ) + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureAdvancedTemplateScheduling: 1, + }, + }) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + // Create a template without setting a failure_ttl. + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since the field is unset on the template. + require.Len(t, stats.Transitions, 0) + }) +} + func TestWorkspacesFiltering(t *testing.T) { t.Parallel() From be1e37beac711f17fa088bc91610d824ac11542c Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 02:00:53 +0000 Subject: [PATCH 6/9] make gen --- coderd/database/queries.sql.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 7301c208c22e8..21aadf5e758d7 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -8316,7 +8316,8 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' + provisioner_jobs.error != '' AND + workspace_builds.transition = "start"::workspace_transition ) ) AND workspaces.deleted = 'false' ` From a418d3087eff9f96b64ad8fd18829bed000c88ed Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 02:08:24 +0000 Subject: [PATCH 7/9] quotes --- coderd/database/queries.sql.go | 4 ++-- coderd/database/queries/workspaces.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 21aadf5e758d7..efec848d63814 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -8316,8 +8316,8 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = "start"::workspace_transition + provisioner_jobs.error != '' AND + workspace_builds.transition = 'start'::workspace_transition ) ) AND workspaces.deleted = 'false' ` diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 90368b7e8f5c6..0327ff5420fec 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -449,7 +449,7 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = "start"::workspace_transition + provisioner_jobs.error != '' AND + workspace_builds.transition = 'start'::workspace_transition ) ) AND workspaces.deleted = 'false'; From 7b6e5d1e8a9f9c41c83fb431c5dc367089fc072e Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 18:26:06 +0000 Subject: [PATCH 8/9] ignore canceled jobs --- coderd/autobuild/lifecycle_executor.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index e2ab82d8077b7..ca31ae5ad13e6 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -275,7 +275,5 @@ func isEligibleForFailedStop(build database.WorkspaceBuild, job database.Provisi db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && build.Transition == database.WorkspaceTransitionStart && // And sufficient time has elapsed since the job has completed. - (job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL || - // Or sufficient time has elapsed since the job was canceled. - job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL) + job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL } From 194e9ebd7658e1eccef7c50655e7791f5829b8c3 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 18:30:33 +0000 Subject: [PATCH 9/9] fmt --- coderd/autobuild/lifecycle_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index ca31ae5ad13e6..4e0ae5aacfa14 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -275,5 +275,5 @@ func isEligibleForFailedStop(build database.WorkspaceBuild, job database.Provisi db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && build.Transition == database.WorkspaceTransitionStart && // And sufficient time has elapsed since the job has completed. - job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL + job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL }