diff --git a/coderd/autobuild/executor/lifecycle_executor.go b/coderd/autobuild/executor/lifecycle_executor.go index 97582f10de6ca..6cd45823bbf8c 100644 --- a/coderd/autobuild/executor/lifecycle_executor.go +++ b/coderd/autobuild/executor/lifecycle_executor.go @@ -5,14 +5,14 @@ import ( "encoding/json" "time" - "cdr.dev/slog" - - "github.com/coder/coder/coderd/autobuild/schedule" - "github.com/coder/coder/coderd/database" - "github.com/google/uuid" "github.com/moby/moby/pkg/namesgenerator" + "golang.org/x/sync/errgroup" "golang.org/x/xerrors" + + "cdr.dev/slog" + "github.com/coder/coder/coderd/autobuild/schedule" + "github.com/coder/coder/coderd/database" ) // Executor automatically starts or stops workspaces. @@ -89,80 +89,116 @@ func (e *Executor) runOnce(t time.Time) Stats { stats.Error = err }() currentTick := t.Truncate(time.Minute) - err = e.db.InTx(func(db database.Store) error { - // TTL is set at the workspace level, and deadline at the workspace build level. - // When a workspace build is created, its deadline initially starts at zero. - // When provisionerd successfully completes a provision job, the deadline is - // set to now + TTL if the associated workspace has a TTL set. This deadline - // is what we compare against when performing autostop operations, rounded down - // to the minute. - // - // NOTE: If a workspace build is created with a given TTL and then the user either - // changes or unsets the TTL, the deadline for the workspace build will not - // have changed. This behavior is as expected per #2229. - eligibleWorkspaces, err := db.GetWorkspacesAutostart(e.ctx) - if err != nil { - return xerrors.Errorf("get eligible workspaces for autostart or autostop: %w", err) + + // TTL is set at the workspace level, and deadline at the workspace build level. + // When a workspace build is created, its deadline initially starts at zero. + // When provisionerd successfully completes a provision job, the deadline is + // set to now + TTL if the associated workspace has a TTL set. This deadline + // is what we compare against when performing autostop operations, rounded down + // to the minute. + // + // NOTE: If a workspace build is created with a given TTL and then the user either + // changes or unsets the TTL, the deadline for the workspace build will not + // have changed. This behavior is as expected per #2229. + workspaces, err := e.db.GetWorkspaces(e.ctx, database.GetWorkspacesParams{ + Deleted: false, + }) + if err != nil { + e.log.Error(e.ctx, "get workspaces for autostart or autostop", slog.Error(err)) + return stats + } + + var eligibleWorkspaceIDs []uuid.UUID + for _, ws := range workspaces { + if isEligibleForAutoStartStop(ws) { + eligibleWorkspaceIDs = append(eligibleWorkspaceIDs, ws.ID) } + } - for _, ws := range eligibleWorkspaces { - // Determine the workspace state based on its latest build. - priorHistory, err := db.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) - if err != nil { - e.log.Warn(e.ctx, "get latest workspace build", - slog.F("workspace_id", ws.ID), - slog.Error(err), - ) - continue - } + // We only use errgroup here for convenience of API, not for early + // cancellation. This means we only return nil errors in th eg.Go. + eg := errgroup.Group{} + // Limit the concurrency to avoid overloading the database. + eg.SetLimit(10) - priorJob, err := db.GetProvisionerJobByID(e.ctx, priorHistory.JobID) - if err != nil { - e.log.Warn(e.ctx, "get last provisioner job for workspace %q: %w", - slog.F("workspace_id", ws.ID), - slog.Error(err), - ) - continue - } + for _, wsID := range eligibleWorkspaceIDs { + wsID := wsID + log := e.log.With(slog.F("workspace_id", wsID)) - validTransition, nextTransition, err := getNextTransition(ws, priorHistory, priorJob) - if err != nil { - e.log.Debug(e.ctx, "skipping workspace", - slog.Error(err), - slog.F("workspace_id", ws.ID), - ) - continue - } + eg.Go(func() error { + err := e.db.InTx(func(db database.Store) error { + // Re-check eligibility since the first check was outside the + // transaction and the workspace settings may have changed. + ws, err := db.GetWorkspaceByID(e.ctx, wsID) + if err != nil { + log.Error(e.ctx, "get workspace autostart failed", slog.Error(err)) + return nil + } + if !isEligibleForAutoStartStop(ws) { + return nil + } - if currentTick.Before(nextTransition) { - e.log.Debug(e.ctx, "skipping workspace: too early", - slog.F("workspace_id", ws.ID), - slog.F("next_transition_at", nextTransition), - slog.F("transition", validTransition), - slog.F("current_tick", currentTick), - ) - continue - } + // Determine the workspace state based on its latest build. + priorHistory, err := db.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) + if err != nil { + log.Warn(e.ctx, "get latest workspace build", slog.Error(err)) + return nil + } + + priorJob, err := db.GetProvisionerJobByID(e.ctx, priorHistory.JobID) + if err != nil { + log.Warn(e.ctx, "get last provisioner job for workspace %q: %w", slog.Error(err)) + return nil + } + + validTransition, nextTransition, err := getNextTransition(ws, priorHistory, priorJob) + if err != nil { + log.Debug(e.ctx, "skipping workspace", slog.Error(err)) + return nil + } + + if currentTick.Before(nextTransition) { + log.Debug(e.ctx, "skipping workspace: too early", + slog.F("next_transition_at", nextTransition), + slog.F("transition", validTransition), + slog.F("current_tick", currentTick), + ) + return nil + } + + log.Info(e.ctx, "scheduling workspace transition", slog.F("transition", validTransition)) - e.log.Info(e.ctx, "scheduling workspace transition", - slog.F("workspace_id", ws.ID), - slog.F("transition", validTransition), - ) + stats.Transitions[ws.ID] = validTransition + if err := build(e.ctx, db, ws, validTransition, priorHistory, priorJob); err != nil { + log.Error(e.ctx, "unable to transition workspace", + slog.F("transition", validTransition), + slog.Error(err), + ) + return nil + } - stats.Transitions[ws.ID] = validTransition - if err := build(e.ctx, db, ws, validTransition, priorHistory, priorJob); err != nil { - e.log.Error(e.ctx, "unable to transition workspace", - slog.F("workspace_id", ws.ID), - slog.F("transition", validTransition), - slog.Error(err), - ) + return nil + }) + if err != nil { + log.Error(e.ctx, "workspace scheduling failed", slog.Error(err)) } - } - return nil - }) + return nil + }) + } + + // This should not happen since we don't want early cancellation. + err = eg.Wait() + if err != nil { + e.log.Error(e.ctx, "workspace scheduling errgroup failed", slog.Error(err)) + } + return stats } +func isEligibleForAutoStartStop(ws database.Workspace) bool { + return !ws.Deleted && (ws.AutostartSchedule.String != "" || ws.Ttl.Int64 > 0) +} + func getNextTransition( ws database.Workspace, priorHistory database.WorkspaceBuild, diff --git a/coderd/database/databasefake/databasefake.go b/coderd/database/databasefake/databasefake.go index 0958d2e7ffde9..a8b0e564294f8 100644 --- a/coderd/database/databasefake/databasefake.go +++ b/coderd/database/databasefake/databasefake.go @@ -588,20 +588,6 @@ func (q *fakeQuerier) GetWorkspaceAppsByAgentIDs(_ context.Context, ids []uuid.U return apps, nil } -func (q *fakeQuerier) GetWorkspacesAutostart(_ context.Context) ([]database.Workspace, error) { - q.mutex.RLock() - defer q.mutex.RUnlock() - workspaces := make([]database.Workspace, 0) - for _, ws := range q.workspaces { - if ws.AutostartSchedule.String != "" { - workspaces = append(workspaces, ws) - } else if ws.Ttl.Valid { - workspaces = append(workspaces, ws) - } - } - return workspaces, nil -} - func (q *fakeQuerier) GetWorkspaceOwnerCountsByTemplateIDs(_ context.Context, templateIDs []uuid.UUID) ([]database.GetWorkspaceOwnerCountsByTemplateIDsRow, error) { q.mutex.RLock() defer q.mutex.RUnlock() @@ -2383,7 +2369,8 @@ func (q *fakeQuerier) GetDeploymentID(_ context.Context) (string, error) { } func (q *fakeQuerier) InsertLicense( - _ context.Context, arg database.InsertLicenseParams) (database.License, error) { + _ context.Context, arg database.InsertLicenseParams, +) (database.License, error) { q.mutex.Lock() defer q.mutex.Unlock() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 12985422f5f06..23cd258f49084 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -99,7 +99,6 @@ type querier interface { GetWorkspaceResourcesByJobID(ctx context.Context, jobID uuid.UUID) ([]WorkspaceResource, error) GetWorkspaceResourcesCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceResource, error) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) ([]Workspace, error) - GetWorkspacesAutostart(ctx context.Context) ([]Workspace, error) InsertAPIKey(ctx context.Context, arg InsertAPIKeyParams) (APIKey, error) InsertAgentStat(ctx context.Context, arg InsertAgentStatParams) (AgentStat, error) InsertAuditLog(ctx context.Context, arg InsertAuditLogParams) (AuditLog, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 9d63a7e49424f..213f5ecf0bbf9 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -4762,56 +4762,6 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) return items, nil } -const getWorkspacesAutostart = `-- name: GetWorkspacesAutostart :many -SELECT - id, created_at, updated_at, owner_id, organization_id, template_id, deleted, name, autostart_schedule, ttl, last_used_at -FROM - workspaces -WHERE - deleted = false -AND -( - (autostart_schedule IS NOT NULL AND autostart_schedule <> '') - OR - (ttl IS NOT NULL AND ttl > 0) -) -` - -func (q *sqlQuerier) GetWorkspacesAutostart(ctx context.Context) ([]Workspace, error) { - rows, err := q.db.QueryContext(ctx, getWorkspacesAutostart) - if err != nil { - return nil, err - } - defer rows.Close() - var items []Workspace - for rows.Next() { - var i Workspace - if err := rows.Scan( - &i.ID, - &i.CreatedAt, - &i.UpdatedAt, - &i.OwnerID, - &i.OrganizationID, - &i.TemplateID, - &i.Deleted, - &i.Name, - &i.AutostartSchedule, - &i.Ttl, - &i.LastUsedAt, - ); err != nil { - return nil, err - } - items = append(items, i) - } - if err := rows.Close(); err != nil { - return nil, err - } - if err := rows.Err(); err != nil { - return nil, err - } - return items, nil -} - const insertWorkspace = `-- name: InsertWorkspace :one INSERT INTO workspaces ( diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 3af1ca49655d5..f2c1723f84ba4 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -50,20 +50,6 @@ WHERE END ; --- name: GetWorkspacesAutostart :many -SELECT - * -FROM - workspaces -WHERE - deleted = false -AND -( - (autostart_schedule IS NOT NULL AND autostart_schedule <> '') - OR - (ttl IS NOT NULL AND ttl > 0) -); - -- name: GetWorkspaceByOwnerIDAndName :one SELECT *