Skip to content

fix: make GetWorkspacesEligibleForTransition return less false-positives #15429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix: rewrite GetWorkspacesEligibleForTransition query
  • Loading branch information
DanielleMaywood committed Nov 7, 2024
commit fb857e09b0df01482ebfc29b81eaace9831e3414
2 changes: 1 addition & 1 deletion coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -2801,7 +2801,7 @@ func (q *querier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerID u
return q.db.GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, ownerID, prep)
}

func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) {
func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) {
return q.db.GetWorkspacesEligibleForTransition(ctx, now)
}

Expand Down
34 changes: 26 additions & 8 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -6868,11 +6868,11 @@ func (q *FakeQuerier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, owner
return q.GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, ownerID, nil)
}

func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) {
func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()

workspaces := []database.WorkspaceTable{}
workspaces := []database.GetWorkspacesEligibleForTransitionRow{}
for _, workspace := range q.workspaces {
build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, workspace.ID)
if err != nil {
Expand All @@ -6883,14 +6883,20 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no
!build.Deadline.IsZero() &&
build.Deadline.Before(now) &&
!workspace.DormantAt.Valid {
workspaces = append(workspaces, workspace)
workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{
ID: workspace.ID,
Name: workspace.Name,
})
continue
}

if build.Transition == database.WorkspaceTransitionStop &&
workspace.AutostartSchedule.Valid &&
!workspace.DormantAt.Valid {
workspaces = append(workspaces, workspace)
workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{
ID: workspace.ID,
Name: workspace.Name,
})
continue
}

Expand All @@ -6899,7 +6905,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no
return nil, xerrors.Errorf("get provisioner job by ID: %w", err)
}
if codersdk.ProvisionerJobStatus(job.JobStatus) == codersdk.ProvisionerJobFailed {
workspaces = append(workspaces, workspace)
workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{
ID: workspace.ID,
Name: workspace.Name,
})
continue
}

Expand All @@ -6908,11 +6917,17 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no
return nil, xerrors.Errorf("get template by ID: %w", err)
}
if !workspace.DormantAt.Valid && template.TimeTilDormant > 0 {
workspaces = append(workspaces, workspace)
workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{
ID: workspace.ID,
Name: workspace.Name,
})
continue
}
if workspace.DormantAt.Valid && template.TimeTilDormantAutoDelete > 0 {
workspaces = append(workspaces, workspace)
workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{
ID: workspace.ID,
Name: workspace.Name,
})
continue
}

Expand All @@ -6921,7 +6936,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no
return nil, xerrors.Errorf("get user by ID: %w", err)
}
if user.Status == database.UserStatusSuspended && build.Transition == database.WorkspaceTransitionStart {
workspaces = append(workspaces, workspace)
workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{
ID: workspace.ID,
Name: workspace.Name,
})
continue
}
}
Expand Down
2 changes: 1 addition & 1 deletion coderd/database/dbmetrics/querymetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

103 changes: 53 additions & 50 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

76 changes: 44 additions & 32 deletions coderd/database/queries/workspaces.sql
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,8 @@ FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspa

-- name: GetWorkspacesEligibleForTransition :many
SELECT
workspaces.*
workspaces.id,
workspaces.name
FROM
workspaces
LEFT JOIN
Expand All @@ -579,52 +580,65 @@ WHERE
) AND

(
-- If the workspace build was a start transition, the workspace is
-- potentially eligible for autostop if it's past the deadline. The
-- deadline is computed at build time upon success and is bumped based
-- on activity (up the max deadline if set). We don't need to check
-- license here since that's done when the values are written to the build.
-- isEligibleForAutostop
(
workspace_builds.transition = 'start'::workspace_transition AND
workspace_builds.deadline IS NOT NULL AND
workspace_builds.deadline < @now :: timestamptz
provisioner_jobs.job_status != 'failed'::provisioner_job_status AND
workspaces.dormant_at IS NULL AND
workspace_builds.transition = 'start'::workspace_transition AND (
users.status = 'suspended'::user_status OR (
workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND
workspace_builds.deadline < @now :: timestamptz
)
)
) OR

-- If the workspace build was a stop transition, the workspace is
-- potentially eligible for autostart if it has a schedule set. The
-- caller must check if the template allows autostart in a license-aware
-- fashion as we cannot check it here.
-- isEligibleForAutostart
(
users.status = 'active'::user_status AND
provisioner_jobs.job_status != 'failed'::provisioner_job_status AND
workspace_builds.transition = 'stop'::workspace_transition AND
workspaces.autostart_schedule IS NOT NULL
) OR

-- If the workspace's most recent job resulted in an error
-- it may be eligible for failed stop.
(
provisioner_jobs.error IS NOT NULL AND
provisioner_jobs.error != '' AND
workspace_builds.transition = 'start'::workspace_transition
) OR

-- If the workspace's template has an inactivity_ttl set
-- it may be eligible for dormancy.
-- isEligibleForDormantStop
(
workspaces.dormant_at IS NULL AND
templates.time_til_dormant > 0 AND
workspaces.dormant_at IS NULL
(@now :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize this is probably already in use elsewhere but not going to lie, it's weird seeing nanos being converted into millis. I know ms is the smallest unit in pg but feels like this isn't super intuitive for humans to understand what's going on here.

Obviously storing ns in the db is the main problem (and we're not fixing that here). But I'd prefer to see the conversion into seconds instead of ms as that feels like a more intuitive unit.

Even the db column for time_til_dormant doesn't have a comment so this is pretty much hidden magic 😄.

Not a blocker, but we should probably do something about this at some point.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every day I regret my previous decision 😓

) OR

-- If the workspace's template has a time_til_dormant_autodelete set
-- and the workspace is already dormant.
-- isEligibleForDelete
(
workspaces.dormant_at IS NOT NULL AND
workspaces.deleting_at IS NOT NULL AND
workspaces.deleting_at < @now :: timestamptz AND
templates.time_til_dormant_autodelete > 0 AND
workspaces.dormant_at IS NOT NULL
CASE
WHEN (
workspace_builds.transition = 'delete'::workspace_transition AND
provisioner_jobs.job_status = 'failed'::provisioner_job_status
) THEN (
(
provisioner_jobs.canceled_at IS NOT NULL OR
provisioner_jobs.completed_at IS NOT NULL
) AND (
(@now :: timestamptz) - (CASE
WHEN provisioner_jobs.canceled_at IS NOT NULL THEN provisioner_jobs.canceled_at
ELSE provisioner_jobs.completed_at
END) > INTERVAL '24 hours'
)
)
ELSE true
END
) OR

-- If the user account is suspended, and the workspace is running.
-- isEligibleForFailedStop
(
users.status = 'suspended'::user_status AND
workspace_builds.transition = 'start'::workspace_transition
templates.failure_ttl > 0 AND
provisioner_jobs.job_status = 'failed'::provisioner_job_status AND
workspace_builds.transition = 'start'::workspace_transition AND
provisioner_jobs.completed_at IS NOT NULL AND
(@now :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000))
)
) AND workspaces.deleted = 'false';

Expand Down Expand Up @@ -727,5 +741,3 @@ WHERE
-- Authorize Filter clause will be injected below in GetAuthorizedWorkspacesAndAgentsByOwnerID
-- @authorize_filter
GROUP BY workspaces.id, workspaces.name, latest_build.job_status, latest_build.job_id, latest_build.transition;