From fb857e09b0df01482ebfc29b81eaace9831e3414 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 09:29:25 +0000 Subject: [PATCH 1/7] fix: rewrite GetWorkspacesEligibleForTransition query --- coderd/database/dbauthz/dbauthz.go | 2 +- coderd/database/dbmem/dbmem.go | 34 +++++-- coderd/database/dbmetrics/querymetrics.go | 2 +- coderd/database/dbmock/dbmock.go | 4 +- coderd/database/querier.go | 2 +- coderd/database/queries.sql.go | 103 +++++++++++----------- coderd/database/queries/workspaces.sql | 76 +++++++++------- 7 files changed, 128 insertions(+), 95 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index c855d5a1984df..218d6681dc771 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -2801,7 +2801,7 @@ func (q *querier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerID u return q.db.GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, ownerID, prep) } -func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) { +func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { return q.db.GetWorkspacesEligibleForTransition(ctx, now) } diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 9a306db09785e..5caf644257c82 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -6868,11 +6868,11 @@ func (q *FakeQuerier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, owner return q.GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, ownerID, nil) } -func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) { +func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { q.mutex.RLock() defer q.mutex.RUnlock() - workspaces := []database.WorkspaceTable{} + workspaces := []database.GetWorkspacesEligibleForTransitionRow{} for _, workspace := range q.workspaces { build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, workspace.ID) if err != nil { @@ -6883,14 +6883,20 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no !build.Deadline.IsZero() && build.Deadline.Before(now) && !workspace.DormantAt.Valid { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } if build.Transition == database.WorkspaceTransitionStop && workspace.AutostartSchedule.Valid && !workspace.DormantAt.Valid { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } @@ -6899,7 +6905,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no return nil, xerrors.Errorf("get provisioner job by ID: %w", err) } if codersdk.ProvisionerJobStatus(job.JobStatus) == codersdk.ProvisionerJobFailed { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } @@ -6908,11 +6917,17 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no return nil, xerrors.Errorf("get template by ID: %w", err) } if !workspace.DormantAt.Valid && template.TimeTilDormant > 0 { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } if workspace.DormantAt.Valid && template.TimeTilDormantAutoDelete > 0 { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } @@ -6921,7 +6936,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no return nil, xerrors.Errorf("get user by ID: %w", err) } if user.Status == database.UserStatusSuspended && build.Transition == database.WorkspaceTransitionStart { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } } diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index cee25e482bbaa..b2edda7a8fff4 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -1659,7 +1659,7 @@ func (m queryMetricsStore) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, return r0, r1 } -func (m queryMetricsStore) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) { +func (m queryMetricsStore) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { start := time.Now() workspaces, err := m.s.GetWorkspacesEligibleForTransition(ctx, now) m.queryLatencies.WithLabelValues("GetWorkspacesEligibleForAutoStartStop").Observe(time.Since(start).Seconds()) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index d8721f56d3f4e..cb5ebd69f89e7 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -3503,10 +3503,10 @@ func (mr *MockStoreMockRecorder) GetWorkspacesAndAgentsByOwnerID(arg0, arg1 any) } // GetWorkspacesEligibleForTransition mocks base method. -func (m *MockStore) GetWorkspacesEligibleForTransition(arg0 context.Context, arg1 time.Time) ([]database.WorkspaceTable, error) { +func (m *MockStore) GetWorkspacesEligibleForTransition(arg0 context.Context, arg1 time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetWorkspacesEligibleForTransition", arg0, arg1) - ret0, _ := ret[0].([]database.WorkspaceTable) + ret0, _ := ret[0].([]database.GetWorkspacesEligibleForTransitionRow) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 46d1b1ae5b322..297e549f1e300 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -346,7 +346,7 @@ type sqlcQuerier interface { // be used in a WHERE clause. GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) ([]GetWorkspacesRow, error) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerID uuid.UUID) ([]GetWorkspacesAndAgentsByOwnerIDRow, error) - GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]WorkspaceTable, error) + GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]GetWorkspacesEligibleForTransitionRow, error) InsertAPIKey(ctx context.Context, arg InsertAPIKeyParams) (APIKey, error) // We use the organization_id as the id // for simplicity since all users is diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 87d3c17f5400f..27612573a3c2e 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -15363,7 +15363,8 @@ func (q *sqlQuerier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerI const getWorkspacesEligibleForTransition = `-- name: GetWorkspacesEligibleForTransition :many SELECT - workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at, workspaces.dormant_at, workspaces.deleting_at, workspaces.automatic_updates, workspaces.favorite + workspaces.id, + workspaces.name FROM workspaces LEFT JOIN @@ -15385,82 +15386,84 @@ WHERE ) AND ( - -- If the workspace build was a start transition, the workspace is - -- potentially eligible for autostop if it's past the deadline. The - -- deadline is computed at build time upon success and is bumped based - -- on activity (up the max deadline if set). We don't need to check - -- license here since that's done when the values are written to the build. + -- isEligibleForAutostop ( - workspace_builds.transition = 'start'::workspace_transition AND - workspace_builds.deadline IS NOT NULL AND - workspace_builds.deadline < $1 :: timestamptz + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND + workspaces.dormant_at IS NULL AND + workspace_builds.transition = 'start'::workspace_transition AND ( + users.status = 'suspended'::user_status OR ( + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline < $1 :: timestamptz + ) + ) ) OR - -- If the workspace build was a stop transition, the workspace is - -- potentially eligible for autostart if it has a schedule set. The - -- caller must check if the template allows autostart in a license-aware - -- fashion as we cannot check it here. + -- isEligibleForAutostart ( + users.status = 'active'::user_status AND + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL ) OR - -- If the workspace's most recent job resulted in an error - -- it may be eligible for failed stop. - ( - provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = 'start'::workspace_transition - ) OR - - -- If the workspace's template has an inactivity_ttl set - -- it may be eligible for dormancy. + -- isEligibleForDormantStop ( + workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND - workspaces.dormant_at IS NULL + ($1 :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- If the workspace's template has a time_til_dormant_autodelete set - -- and the workspace is already dormant. + -- isEligibleForDelete ( + workspaces.dormant_at IS NOT NULL AND + workspaces.deleting_at IS NOT NULL AND + workspaces.deleting_at < $1 :: timestamptz AND templates.time_til_dormant_autodelete > 0 AND - workspaces.dormant_at IS NOT NULL + CASE + WHEN ( + workspace_builds.transition = 'delete'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status + ) THEN ( + ( + provisioner_jobs.canceled_at IS NOT NULL OR + provisioner_jobs.completed_at IS NOT NULL + ) AND ( + ($1 :: timestamptz) - (CASE + WHEN provisioner_jobs.canceled_at IS NOT NULL THEN provisioner_jobs.canceled_at + ELSE provisioner_jobs.completed_at + END) > INTERVAL '24 hours' + ) + ) + ELSE true + END ) OR - -- If the user account is suspended, and the workspace is running. + -- isEligibleForFailedStop ( - users.status = 'suspended'::user_status AND - workspace_builds.transition = 'start'::workspace_transition + templates.failure_ttl > 0 AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND + workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.completed_at IS NOT NULL AND + ($1 :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) ) AND workspaces.deleted = 'false' ` -func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]WorkspaceTable, error) { +type GetWorkspacesEligibleForTransitionRow struct { + ID uuid.UUID `db:"id" json:"id"` + Name string `db:"name" json:"name"` +} + +func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]GetWorkspacesEligibleForTransitionRow, error) { rows, err := q.db.QueryContext(ctx, getWorkspacesEligibleForTransition, now) if err != nil { return nil, err } defer rows.Close() - var items []WorkspaceTable + var items []GetWorkspacesEligibleForTransitionRow for rows.Next() { - var i WorkspaceTable - if err := rows.Scan( - &i.ID, - &i.CreatedAt, - &i.UpdatedAt, - &i.OwnerID, - &i.OrganizationID, - &i.TemplateID, - &i.Deleted, - &i.Name, - &i.AutostartSchedule, - &i.Ttl, - &i.LastUsedAt, - &i.DormantAt, - &i.DeletingAt, - &i.AutomaticUpdates, - &i.Favorite, - ); err != nil { + var i GetWorkspacesEligibleForTransitionRow + if err := rows.Scan(&i.ID, &i.Name); err != nil { return nil, err } items = append(items, i) diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index a1f41eb84d603..91315170c619c 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -557,7 +557,8 @@ FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspa -- name: GetWorkspacesEligibleForTransition :many SELECT - workspaces.* + workspaces.id, + workspaces.name FROM workspaces LEFT JOIN @@ -579,52 +580,65 @@ WHERE ) AND ( - -- If the workspace build was a start transition, the workspace is - -- potentially eligible for autostop if it's past the deadline. The - -- deadline is computed at build time upon success and is bumped based - -- on activity (up the max deadline if set). We don't need to check - -- license here since that's done when the values are written to the build. + -- isEligibleForAutostop ( - workspace_builds.transition = 'start'::workspace_transition AND - workspace_builds.deadline IS NOT NULL AND - workspace_builds.deadline < @now :: timestamptz + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND + workspaces.dormant_at IS NULL AND + workspace_builds.transition = 'start'::workspace_transition AND ( + users.status = 'suspended'::user_status OR ( + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline < @now :: timestamptz + ) + ) ) OR - -- If the workspace build was a stop transition, the workspace is - -- potentially eligible for autostart if it has a schedule set. The - -- caller must check if the template allows autostart in a license-aware - -- fashion as we cannot check it here. + -- isEligibleForAutostart ( + users.status = 'active'::user_status AND + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL ) OR - -- If the workspace's most recent job resulted in an error - -- it may be eligible for failed stop. - ( - provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = 'start'::workspace_transition - ) OR - - -- If the workspace's template has an inactivity_ttl set - -- it may be eligible for dormancy. + -- isEligibleForDormantStop ( + workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND - workspaces.dormant_at IS NULL + (@now :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- If the workspace's template has a time_til_dormant_autodelete set - -- and the workspace is already dormant. + -- isEligibleForDelete ( + workspaces.dormant_at IS NOT NULL AND + workspaces.deleting_at IS NOT NULL AND + workspaces.deleting_at < @now :: timestamptz AND templates.time_til_dormant_autodelete > 0 AND - workspaces.dormant_at IS NOT NULL + CASE + WHEN ( + workspace_builds.transition = 'delete'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status + ) THEN ( + ( + provisioner_jobs.canceled_at IS NOT NULL OR + provisioner_jobs.completed_at IS NOT NULL + ) AND ( + (@now :: timestamptz) - (CASE + WHEN provisioner_jobs.canceled_at IS NOT NULL THEN provisioner_jobs.canceled_at + ELSE provisioner_jobs.completed_at + END) > INTERVAL '24 hours' + ) + ) + ELSE true + END ) OR - -- If the user account is suspended, and the workspace is running. + -- isEligibleForFailedStop ( - users.status = 'suspended'::user_status AND - workspace_builds.transition = 'start'::workspace_transition + templates.failure_ttl > 0 AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND + workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.completed_at IS NOT NULL AND + (@now :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) ) AND workspaces.deleted = 'false'; @@ -727,5 +741,3 @@ WHERE -- Authorize Filter clause will be injected below in GetAuthorizedWorkspacesAndAgentsByOwnerID -- @authorize_filter GROUP BY workspaces.id, workspaces.name, latest_build.job_status, latest_build.job_id, latest_build.transition; - - From de6e6af41f3302e20564a71938dfd70b71db8591 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 12:26:30 +0000 Subject: [PATCH 2/7] feat: add caching to reduce db calls --- coderd/autobuild/cache.go | 41 +++++++++++++ coderd/autobuild/cache_test.go | 84 ++++++++++++++++++++++++++ coderd/autobuild/lifecycle_executor.go | 26 ++++++-- 3 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 coderd/autobuild/cache.go create mode 100644 coderd/autobuild/cache_test.go diff --git a/coderd/autobuild/cache.go b/coderd/autobuild/cache.go new file mode 100644 index 0000000000000..7d6122ef2fada --- /dev/null +++ b/coderd/autobuild/cache.go @@ -0,0 +1,41 @@ +package autobuild + +import ( + "errors" + "sync" +) + +var errCacheLoaderNil = errors.New("loader is nil") + +type cacheOf[K comparable, V any] struct { + mu sync.Mutex + m map[K]V +} + +func newCacheOf[K comparable, V any]() *cacheOf[K, V] { + return &cacheOf[K, V]{ + m: make(map[K]V), + } +} + +func (c *cacheOf[K, V]) LoadOrStore(key K, loader func() (V, error)) (V, error) { + c.mu.Lock() + defer c.mu.Unlock() + + value, found := c.m[key] + if !found { + if loader == nil { + return *new(V), errCacheLoaderNil + } + + loaded, err := loader() + if err != nil { + return *new(V), err + } + + c.m[key] = loaded + return loaded, nil + } + + return value, nil +} diff --git a/coderd/autobuild/cache_test.go b/coderd/autobuild/cache_test.go new file mode 100644 index 0000000000000..2a335a65294a2 --- /dev/null +++ b/coderd/autobuild/cache_test.go @@ -0,0 +1,84 @@ +package autobuild + +import ( + "errors" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestCache(t *testing.T) { + t.Parallel() + + t.Run("CallsLoaderOnce", func(t *testing.T) { + callCount := 0 + cache := newCacheOf[uuid.UUID, int]() + key := uuid.New() + + // Call LoadOrStore for key `key` for the first time. + // We expect this to call our loader function. + value, err := cache.LoadOrStore(key, func() (int, error) { + callCount += 1 + return 1, nil + }) + require.NoError(t, err) + require.Equal(t, 1, value) + require.Equal(t, 1, callCount) + + // Call LoadOrStore for key `key` for the second time. + // We expect this to return data from the previous load. + value, err = cache.LoadOrStore(key, func() (int, error) { + callCount += 1 + + // We return a different value to further check + // that this function isn't called. + return 2, nil + }) + require.NoError(t, err) + require.Equal(t, 1, value) + require.Equal(t, 1, callCount) + }) + + t.Run("ReturnsErrOnLoaderErr", func(t *testing.T) { + exampleErr := errors.New("example error") + cache := newCacheOf[uuid.UUID, int]() + key := uuid.New() + + _, err := cache.LoadOrStore(key, func() (int, error) { + return 0, exampleErr + }) + require.Error(t, err) + require.Equal(t, exampleErr, err) + }) + + t.Run("CanCacheWithMultipleKeys", func(t *testing.T) { + cache := newCacheOf[uuid.UUID, int]() + keyA := uuid.New() + keyB := uuid.New() + + // We first insert data with our first key + value, err := cache.LoadOrStore(keyA, func() (int, error) { + return 10, nil + }) + require.NoError(t, err) + require.Equal(t, 10, value) + + // Next we insert data with a different key + value, err = cache.LoadOrStore(keyB, func() (int, error) { + return 20, nil + }) + require.NoError(t, err) + require.Equal(t, 20, value) + + // Now we check the data is still available for the first key + value, err = cache.LoadOrStore(keyA, nil) + require.NoError(t, err) + require.Equal(t, 10, value) + + // And that the data is also still available for the second key + value, err = cache.LoadOrStore(keyB, nil) + require.NoError(t, err) + require.Equal(t, 20, value) + }) +} diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index ac2930c9e32c8..3737da035c3b3 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -154,6 +154,16 @@ func (e *Executor) runOnce(t time.Time) Stats { // Limit the concurrency to avoid overloading the database. eg.SetLimit(10) + // We cache these values to help reduce load on the database. + // These could be outdated during our execution, but this is + // unlikely to be noticed or cause any unwanted behaviour. + var ( + userCache = newCacheOf[uuid.UUID, database.User]() + templateCache = newCacheOf[uuid.UUID, database.Template]() + templateVersionCache = newCacheOf[uuid.UUID, database.TemplateVersion]() + templateScheduleCache = newCacheOf[uuid.UUID, schedule.TemplateScheduleOptions]() + ) + for _, ws := range workspaces { wsID := ws.ID wsName := ws.Name @@ -184,7 +194,9 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get workspace by id: %w", err) } - user, err := tx.GetUserByID(e.ctx, ws.OwnerID) + user, err := userCache.LoadOrStore(ws.OwnerID, func() (database.User, error) { + return tx.GetUserByID(e.ctx, ws.OwnerID) + }) if err != nil { return xerrors.Errorf("get user by id: %w", err) } @@ -200,17 +212,23 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get latest provisioner job: %w", err) } - templateSchedule, err := (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) + templateSchedule, err := templateScheduleCache.LoadOrStore(ws.TemplateID, func() (schedule.TemplateScheduleOptions, error) { + return (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) + }) if err != nil { return xerrors.Errorf("get template scheduling options: %w", err) } - tmpl, err = tx.GetTemplateByID(e.ctx, ws.TemplateID) + tmpl, err := templateCache.LoadOrStore(ws.TemplateID, func() (database.Template, error) { + return tx.GetTemplateByID(e.ctx, ws.TemplateID) + }) if err != nil { return xerrors.Errorf("get template by ID: %w", err) } - activeTemplateVersion, err = tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) + activeTemplateVersion, err = templateVersionCache.LoadOrStore(tmpl.ActiveVersionID, func() (database.TemplateVersion, error) { + return tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) + }) if err != nil { return xerrors.Errorf("get active template version by ID: %w", err) } From e81a985a94abee23ae2d5776148536b18d88126c Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 14:28:43 +0000 Subject: [PATCH 3/7] revert: cache --- coderd/autobuild/cache.go | 41 ------------- coderd/autobuild/cache_test.go | 84 -------------------------- coderd/autobuild/lifecycle_executor.go | 26 ++------ 3 files changed, 4 insertions(+), 147 deletions(-) delete mode 100644 coderd/autobuild/cache.go delete mode 100644 coderd/autobuild/cache_test.go diff --git a/coderd/autobuild/cache.go b/coderd/autobuild/cache.go deleted file mode 100644 index 7d6122ef2fada..0000000000000 --- a/coderd/autobuild/cache.go +++ /dev/null @@ -1,41 +0,0 @@ -package autobuild - -import ( - "errors" - "sync" -) - -var errCacheLoaderNil = errors.New("loader is nil") - -type cacheOf[K comparable, V any] struct { - mu sync.Mutex - m map[K]V -} - -func newCacheOf[K comparable, V any]() *cacheOf[K, V] { - return &cacheOf[K, V]{ - m: make(map[K]V), - } -} - -func (c *cacheOf[K, V]) LoadOrStore(key K, loader func() (V, error)) (V, error) { - c.mu.Lock() - defer c.mu.Unlock() - - value, found := c.m[key] - if !found { - if loader == nil { - return *new(V), errCacheLoaderNil - } - - loaded, err := loader() - if err != nil { - return *new(V), err - } - - c.m[key] = loaded - return loaded, nil - } - - return value, nil -} diff --git a/coderd/autobuild/cache_test.go b/coderd/autobuild/cache_test.go deleted file mode 100644 index 2a335a65294a2..0000000000000 --- a/coderd/autobuild/cache_test.go +++ /dev/null @@ -1,84 +0,0 @@ -package autobuild - -import ( - "errors" - "testing" - - "github.com/google/uuid" - "github.com/stretchr/testify/require" -) - -func TestCache(t *testing.T) { - t.Parallel() - - t.Run("CallsLoaderOnce", func(t *testing.T) { - callCount := 0 - cache := newCacheOf[uuid.UUID, int]() - key := uuid.New() - - // Call LoadOrStore for key `key` for the first time. - // We expect this to call our loader function. - value, err := cache.LoadOrStore(key, func() (int, error) { - callCount += 1 - return 1, nil - }) - require.NoError(t, err) - require.Equal(t, 1, value) - require.Equal(t, 1, callCount) - - // Call LoadOrStore for key `key` for the second time. - // We expect this to return data from the previous load. - value, err = cache.LoadOrStore(key, func() (int, error) { - callCount += 1 - - // We return a different value to further check - // that this function isn't called. - return 2, nil - }) - require.NoError(t, err) - require.Equal(t, 1, value) - require.Equal(t, 1, callCount) - }) - - t.Run("ReturnsErrOnLoaderErr", func(t *testing.T) { - exampleErr := errors.New("example error") - cache := newCacheOf[uuid.UUID, int]() - key := uuid.New() - - _, err := cache.LoadOrStore(key, func() (int, error) { - return 0, exampleErr - }) - require.Error(t, err) - require.Equal(t, exampleErr, err) - }) - - t.Run("CanCacheWithMultipleKeys", func(t *testing.T) { - cache := newCacheOf[uuid.UUID, int]() - keyA := uuid.New() - keyB := uuid.New() - - // We first insert data with our first key - value, err := cache.LoadOrStore(keyA, func() (int, error) { - return 10, nil - }) - require.NoError(t, err) - require.Equal(t, 10, value) - - // Next we insert data with a different key - value, err = cache.LoadOrStore(keyB, func() (int, error) { - return 20, nil - }) - require.NoError(t, err) - require.Equal(t, 20, value) - - // Now we check the data is still available for the first key - value, err = cache.LoadOrStore(keyA, nil) - require.NoError(t, err) - require.Equal(t, 10, value) - - // And that the data is also still available for the second key - value, err = cache.LoadOrStore(keyB, nil) - require.NoError(t, err) - require.Equal(t, 20, value) - }) -} diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 3737da035c3b3..35f3ba106507a 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -154,16 +154,6 @@ func (e *Executor) runOnce(t time.Time) Stats { // Limit the concurrency to avoid overloading the database. eg.SetLimit(10) - // We cache these values to help reduce load on the database. - // These could be outdated during our execution, but this is - // unlikely to be noticed or cause any unwanted behaviour. - var ( - userCache = newCacheOf[uuid.UUID, database.User]() - templateCache = newCacheOf[uuid.UUID, database.Template]() - templateVersionCache = newCacheOf[uuid.UUID, database.TemplateVersion]() - templateScheduleCache = newCacheOf[uuid.UUID, schedule.TemplateScheduleOptions]() - ) - for _, ws := range workspaces { wsID := ws.ID wsName := ws.Name @@ -194,9 +184,7 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get workspace by id: %w", err) } - user, err := userCache.LoadOrStore(ws.OwnerID, func() (database.User, error) { - return tx.GetUserByID(e.ctx, ws.OwnerID) - }) + user, err := tx.GetUserByID(e.ctx, ws.OwnerID) if err != nil { return xerrors.Errorf("get user by id: %w", err) } @@ -212,23 +200,17 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get latest provisioner job: %w", err) } - templateSchedule, err := templateScheduleCache.LoadOrStore(ws.TemplateID, func() (schedule.TemplateScheduleOptions, error) { - return (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) - }) + templateSchedule, err := (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) if err != nil { return xerrors.Errorf("get template scheduling options: %w", err) } - tmpl, err := templateCache.LoadOrStore(ws.TemplateID, func() (database.Template, error) { - return tx.GetTemplateByID(e.ctx, ws.TemplateID) - }) + tmpl, err := tx.GetTemplateByID(e.ctx, ws.TemplateID) if err != nil { return xerrors.Errorf("get template by ID: %w", err) } - activeTemplateVersion, err = templateVersionCache.LoadOrStore(tmpl.ActiveVersionID, func() (database.TemplateVersion, error) { - return tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) - }) + activeTemplateVersion, err = tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) if err != nil { return xerrors.Errorf("get active template version by ID: %w", err) } From 190617c5dbb84503d5e4ecf2b5cd1aa57d7b6f67 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 15:15:17 +0000 Subject: [PATCH 4/7] revert: mistake --- coderd/autobuild/lifecycle_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 35f3ba106507a..ac2930c9e32c8 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -205,7 +205,7 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get template scheduling options: %w", err) } - tmpl, err := tx.GetTemplateByID(e.ctx, ws.TemplateID) + tmpl, err = tx.GetTemplateByID(e.ctx, ws.TemplateID) if err != nil { return xerrors.Errorf("get template by ID: %w", err) } From 18e9550d24f62f73ed9ea620e1c633ca61376a2b Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 8 Nov 2024 10:02:13 +0000 Subject: [PATCH 5/7] chore: comment query --- coderd/database/queries.sql.go | 32 +++++++++++++++++++++----- coderd/database/queries/workspaces.sql | 32 +++++++++++++++++++++----- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 27612573a3c2e..cc7a9166a729b 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -15386,7 +15386,11 @@ WHERE ) AND ( - -- isEligibleForAutostop + -- A workspace may be eligible for autostop if the following are true: + -- * The provisioner job has not failed. + -- * The workspace is not dormant. + -- * The workspace build was a start transition. + -- * The workspace's owner is suspended OR the workspace build deadline has passed. ( provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspaces.dormant_at IS NULL AND @@ -15398,7 +15402,11 @@ WHERE ) ) OR - -- isEligibleForAutostart + -- A workspace may be eligible for autostart if the following are true: + -- * The workspace's owner is active. + -- * The provisioner job did not failed. + -- * The workspace build was a stop transition. + -- * The workspace has an autostart schedule. ( users.status = 'active'::user_status AND provisioner_jobs.job_status != 'failed'::provisioner_job_status AND @@ -15406,14 +15414,21 @@ WHERE workspaces.autostart_schedule IS NOT NULL ) OR - -- isEligibleForDormantStop + -- A workspace may be eligible for dormant stop if the following are true: + -- * The workspace is not dormant. + -- * The template has set a time til dormant. + -- * The workspace has been unused for longer than the time til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND ($1 :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- isEligibleForDelete + -- A workspace may be eligible for deletion if the following are true: + -- * The workspace is dormant. + -- * The workspace is scheduled to be deleted. + -- * If there was a prior attempt to delete the workspace that failed: + -- * This attempt was at least 24 hours ago> ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND @@ -15438,11 +15453,16 @@ WHERE END ) OR - -- isEligibleForFailedStop + -- A workspace may be eligible for failed stop if the following are true: + -- * The template has a failure ttl set. + -- * The workspace build was a start transition. + -- * The provisioner job failed. + -- * The provisioner job had completed. + -- * The provisioner job has been completed for longer than the failure ttl. ( templates.failure_ttl > 0 AND - provisioner_jobs.job_status = 'failed'::provisioner_job_status AND workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND provisioner_jobs.completed_at IS NOT NULL AND ($1 :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 91315170c619c..d27a2569dc216 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -580,7 +580,11 @@ WHERE ) AND ( - -- isEligibleForAutostop + -- A workspace may be eligible for autostop if the following are true: + -- * The provisioner job has not failed. + -- * The workspace is not dormant. + -- * The workspace build was a start transition. + -- * The workspace's owner is suspended OR the workspace build deadline has passed. ( provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspaces.dormant_at IS NULL AND @@ -592,7 +596,11 @@ WHERE ) ) OR - -- isEligibleForAutostart + -- A workspace may be eligible for autostart if the following are true: + -- * The workspace's owner is active. + -- * The provisioner job did not failed. + -- * The workspace build was a stop transition. + -- * The workspace has an autostart schedule. ( users.status = 'active'::user_status AND provisioner_jobs.job_status != 'failed'::provisioner_job_status AND @@ -600,14 +608,21 @@ WHERE workspaces.autostart_schedule IS NOT NULL ) OR - -- isEligibleForDormantStop + -- A workspace may be eligible for dormant stop if the following are true: + -- * The workspace is not dormant. + -- * The template has set a time til dormant. + -- * The workspace has been unused for longer than the time til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND (@now :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- isEligibleForDelete + -- A workspace may be eligible for deletion if the following are true: + -- * The workspace is dormant. + -- * The workspace is scheduled to be deleted. + -- * If there was a prior attempt to delete the workspace that failed: + -- * This attempt was at least 24 hours ago> ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND @@ -632,11 +647,16 @@ WHERE END ) OR - -- isEligibleForFailedStop + -- A workspace may be eligible for failed stop if the following are true: + -- * The template has a failure ttl set. + -- * The workspace build was a start transition. + -- * The provisioner job failed. + -- * The provisioner job had completed. + -- * The provisioner job has been completed for longer than the failure ttl. ( templates.failure_ttl > 0 AND - provisioner_jobs.job_status = 'failed'::provisioner_job_status AND workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND provisioner_jobs.completed_at IS NOT NULL AND (@now :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) From 5aff6e3f1240b0de5c0e189064158a8339e9269a Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 8 Nov 2024 10:22:56 +0000 Subject: [PATCH 6/7] fix: update dbmem.go to match new query --- coderd/database/dbmem/dbmem.go | 77 +++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 5caf644257c82..b465adb96e96d 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -6876,23 +6876,32 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no for _, workspace := range q.workspaces { build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, workspace.ID) if err != nil { - return nil, err + return nil, xerrors.Errorf("get workspace build by ID: %w", err) } - if build.Transition == database.WorkspaceTransitionStart && - !build.Deadline.IsZero() && - build.Deadline.Before(now) && - !workspace.DormantAt.Valid { - workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ - ID: workspace.ID, - Name: workspace.Name, - }) + user, err := q.getUserByIDNoLock(workspace.OwnerID) + if err != nil { + return nil, xerrors.Errorf("get user by ID: %w", err) + } + + job, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID) + if err != nil { + return nil, xerrors.Errorf("get provisioner job by ID: %w", err) + } + + template, err := q.getTemplateByIDNoLock(ctx, workspace.TemplateID) + if err != nil { + return nil, xerrors.Errorf("get template by ID: %w", err) + } + + if workspace.Deleted { continue } - if build.Transition == database.WorkspaceTransitionStop && - workspace.AutostartSchedule.Valid && - !workspace.DormantAt.Valid { + if job.JobStatus != database.ProvisionerJobStatusFailed && + !workspace.DormantAt.Valid && + build.Transition == database.WorkspaceTransitionStart && + (user.Status == database.UserStatusSuspended || (!build.Deadline.IsZero() && build.Deadline.Before(now))) { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, @@ -6900,11 +6909,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no continue } - job, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID) - if err != nil { - return nil, xerrors.Errorf("get provisioner job by ID: %w", err) - } - if codersdk.ProvisionerJobStatus(job.JobStatus) == codersdk.ProvisionerJobFailed { + if user.Status == database.UserStatusActive && + job.JobStatus != database.ProvisionerJobStatusFailed && + build.Transition == database.WorkspaceTransitionStop && + workspace.AutostartSchedule.Valid { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, @@ -6912,18 +6920,31 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no continue } - template, err := q.getTemplateByIDNoLock(ctx, workspace.TemplateID) - if err != nil { - return nil, xerrors.Errorf("get template by ID: %w", err) - } - if !workspace.DormantAt.Valid && template.TimeTilDormant > 0 { + if !workspace.DormantAt.Valid && + template.TimeTilDormant > 0 && + now.Sub(workspace.LastUsedAt) > time.Duration(template.TimeTilDormant) { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, }) continue } - if workspace.DormantAt.Valid && template.TimeTilDormantAutoDelete > 0 { + + if workspace.DormantAt.Valid && + workspace.DeletingAt.Valid && + workspace.DeletingAt.Time.Before(now) && + template.TimeTilDormantAutoDelete > 0 { + if build.Transition == database.WorkspaceTransitionDelete && + job.JobStatus == database.ProvisionerJobStatusFailed { + if job.CanceledAt.Valid && now.Sub(job.CanceledAt.Time) <= 24*time.Hour { + continue + } + + if job.CompletedAt.Valid && now.Sub(job.CompletedAt.Time) <= 24*time.Hour { + continue + } + } + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, @@ -6931,11 +6952,11 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no continue } - user, err := q.getUserByIDNoLock(workspace.OwnerID) - if err != nil { - return nil, xerrors.Errorf("get user by ID: %w", err) - } - if user.Status == database.UserStatusSuspended && build.Transition == database.WorkspaceTransitionStart { + if template.FailureTTL > 0 && + build.Transition == database.WorkspaceTransitionStart && + job.JobStatus == database.ProvisionerJobStatusFailed && + job.CompletedAt.Valid && + now.Sub(job.CompletedAt.Time) > time.Duration(template.FailureTTL) { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, From b9539027faa6191dbd0acf1321bb1f288f399d22 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Mon, 11 Nov 2024 09:39:59 +0000 Subject: [PATCH 7/7] fix: typo --- coderd/database/queries.sql.go | 10 +++++----- coderd/database/queries/workspaces.sql | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index cc7a9166a729b..14afd75403c89 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -15396,7 +15396,7 @@ WHERE workspaces.dormant_at IS NULL AND workspace_builds.transition = 'start'::workspace_transition AND ( users.status = 'suspended'::user_status OR ( - workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamptz AND workspace_builds.deadline < $1 :: timestamptz ) ) @@ -15404,7 +15404,7 @@ WHERE -- A workspace may be eligible for autostart if the following are true: -- * The workspace's owner is active. - -- * The provisioner job did not failed. + -- * The provisioner job did not fail. -- * The workspace build was a stop transition. -- * The workspace has an autostart schedule. ( @@ -15416,8 +15416,8 @@ WHERE -- A workspace may be eligible for dormant stop if the following are true: -- * The workspace is not dormant. - -- * The template has set a time til dormant. - -- * The workspace has been unused for longer than the time til dormancy. + -- * The template has set a time 'til dormant. + -- * The workspace has been unused for longer than the time 'til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND @@ -15428,7 +15428,7 @@ WHERE -- * The workspace is dormant. -- * The workspace is scheduled to be deleted. -- * If there was a prior attempt to delete the workspace that failed: - -- * This attempt was at least 24 hours ago> + -- * This attempt was at least 24 hours ago. ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index d27a2569dc216..4d200a33f1620 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -590,7 +590,7 @@ WHERE workspaces.dormant_at IS NULL AND workspace_builds.transition = 'start'::workspace_transition AND ( users.status = 'suspended'::user_status OR ( - workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamptz AND workspace_builds.deadline < @now :: timestamptz ) ) @@ -598,7 +598,7 @@ WHERE -- A workspace may be eligible for autostart if the following are true: -- * The workspace's owner is active. - -- * The provisioner job did not failed. + -- * The provisioner job did not fail. -- * The workspace build was a stop transition. -- * The workspace has an autostart schedule. ( @@ -610,8 +610,8 @@ WHERE -- A workspace may be eligible for dormant stop if the following are true: -- * The workspace is not dormant. - -- * The template has set a time til dormant. - -- * The workspace has been unused for longer than the time til dormancy. + -- * The template has set a time 'til dormant. + -- * The workspace has been unused for longer than the time 'til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND @@ -622,7 +622,7 @@ WHERE -- * The workspace is dormant. -- * The workspace is scheduled to be deleted. -- * If there was a prior attempt to delete the workspace that failed: - -- * This attempt was at least 24 hours ago> + -- * This attempt was at least 24 hours ago. ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND