Skip to content

fix(coderd/database): exclude canceled jobs in queue position #15835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 81 additions & 24 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -3804,35 +3804,92 @@ func (q *FakeQuerier) GetProvisionerJobsByIDsWithQueuePosition(_ context.Context
q.mutex.RLock()
defer q.mutex.RUnlock()

jobs := make([]database.GetProvisionerJobsByIDsWithQueuePositionRow, 0)
queuePosition := int64(1)
// WITH pending_jobs AS (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is lovely, but I can't wait to get rid of dbmem.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a love/hate relationship with dbmem. Part of me will be a bit sad 😆

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m with you @Emyrk, many times it has been a nice way to verify query logic/results by forcing you to reproduce the logic in an alternative way. Still going to be happy that it’s gone though 🥲

// SELECT
// id, created_at
// FROM
// provisioner_jobs
// WHERE
// started_at IS NULL
// AND
// canceled_at IS NULL
// AND
// completed_at IS NULL
// AND
// error IS NULL
// ),
type pendingJobRow struct {
ID uuid.UUID
CreatedAt time.Time
}
pendingJobs := make([]pendingJobRow, 0)
for _, job := range q.provisionerJobs {
for _, id := range ids {
if id == job.ID {
// clone the Tags before appending, since maps are reference types and
// we don't want the caller to be able to mutate the map we have inside
// dbmem!
job.Tags = maps.Clone(job.Tags)
job := database.GetProvisionerJobsByIDsWithQueuePositionRow{
ProvisionerJob: job,
}
if !job.ProvisionerJob.StartedAt.Valid {
job.QueuePosition = queuePosition
}
jobs = append(jobs, job)
break
}
}
if !job.StartedAt.Valid {
queuePosition++
if job.StartedAt.Valid ||
job.CanceledAt.Valid ||
job.CompletedAt.Valid ||
job.Error.Valid {
continue
}
pendingJobs = append(pendingJobs, pendingJobRow{
ID: job.ID,
CreatedAt: job.CreatedAt,
})
}
for _, job := range jobs {
if !job.ProvisionerJob.StartedAt.Valid {
// Set it to the max position!
job.QueueSize = queuePosition

// queue_position AS (
// SELECT
// id,
// ROW_NUMBER() OVER (ORDER BY created_at ASC) AS queue_position
// FROM
// pending_jobs
// ),
slices.SortFunc(pendingJobs, func(a, b pendingJobRow) int {
c := a.CreatedAt.Compare(b.CreatedAt)
return c
})

queuePosition := make(map[uuid.UUID]int64)
for idx, pj := range pendingJobs {
queuePosition[pj.ID] = int64(idx + 1)
}

// queue_size AS (
// SELECT COUNT(*) AS count FROM pending_jobs
// ),
queueSize := len(pendingJobs)

// SELECT
// sqlc.embed(pj),
// COALESCE(qp.queue_position, 0) AS queue_position,
// COALESCE(qs.count, 0) AS queue_size
// FROM
// provisioner_jobs pj
// LEFT JOIN
// queue_position qp ON pj.id = qp.id
// LEFT JOIN
// queue_size qs ON TRUE
// WHERE
// pj.id IN (...)
jobs := make([]database.GetProvisionerJobsByIDsWithQueuePositionRow, 0)
for _, job := range q.provisionerJobs {
if !slices.Contains(ids, job.ID) {
continue
}
// clone the Tags before appending, since maps are reference types and
// we don't want the caller to be able to mutate the map we have inside
// dbmem!
job.Tags = maps.Clone(job.Tags)
job := database.GetProvisionerJobsByIDsWithQueuePositionRow{
// sqlc.embed(pj),
ProvisionerJob: job,
// COALESCE(qp.queue_position, 0) AS queue_position,
QueuePosition: queuePosition[job.ID],
// COALESCE(qs.count, 0) AS queue_size
QueueSize: int64(queueSize),
}
jobs = append(jobs, job)
}

return jobs, nil
}

Expand Down
121 changes: 121 additions & 0 deletions coderd/database/querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"cdr.dev/slog/sloggers/slogtest"
Expand Down Expand Up @@ -2037,6 +2038,126 @@ func TestExpectOne(t *testing.T) {
})
}

func TestGetProvisionerJobsByIDsWithQueuePosition(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.SkipNow()
}

db, _ := dbtestutil.NewDB(t)
now := dbtime.Now()
ctx := testutil.Context(t, testutil.WaitShort)

// Given the following provisioner jobs:
allJobs := []database.ProvisionerJob{
// Pending. This will be the last in the queue because
// it was created most recently.
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-time.Minute),
StartedAt: sql.NullTime{},
CanceledAt: sql.NullTime{},
CompletedAt: sql.NullTime{},
Error: sql.NullString{},
}),

// Another pending. This will come first in the queue
// because it was created before the previous job.
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-2 * time.Minute),
StartedAt: sql.NullTime{},
CanceledAt: sql.NullTime{},
CompletedAt: sql.NullTime{},
Error: sql.NullString{},
}),

// Running
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-3 * time.Minute),
StartedAt: sql.NullTime{Valid: true, Time: now},
CanceledAt: sql.NullTime{},
CompletedAt: sql.NullTime{},
Error: sql.NullString{},
}),

// Succeeded
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-4 * time.Minute),
StartedAt: sql.NullTime{Valid: true, Time: now},
CanceledAt: sql.NullTime{},
CompletedAt: sql.NullTime{Valid: true, Time: now},
Error: sql.NullString{},
}),

// Canceling
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-5 * time.Minute),
StartedAt: sql.NullTime{},
CanceledAt: sql.NullTime{Valid: true, Time: now},
CompletedAt: sql.NullTime{},
Error: sql.NullString{},
}),

// Canceled
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-6 * time.Minute),
StartedAt: sql.NullTime{},
CanceledAt: sql.NullTime{Valid: true, Time: now},
CompletedAt: sql.NullTime{Valid: true, Time: now},
Error: sql.NullString{},
}),

// Failed
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
CreatedAt: now.Add(-7 * time.Minute),
StartedAt: sql.NullTime{},
CanceledAt: sql.NullTime{},
CompletedAt: sql.NullTime{},
Error: sql.NullString{String: "failed", Valid: true},
}),
}

// Assert invariant: the jobs are in the expected order
require.Len(t, allJobs, 7, "expected 7 jobs")
for idx, status := range []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusSucceeded,
database.ProvisionerJobStatusCanceling,
database.ProvisionerJobStatusCanceled,
database.ProvisionerJobStatusFailed,
} {
require.Equal(t, status, allJobs[idx].JobStatus, "expected job %d to have status %s", idx, status)
}

var jobIDs []uuid.UUID
for _, job := range allJobs {
jobIDs = append(jobIDs, job.ID)
}

// When: we fetch the jobs by their IDs
actualJobs, err := db.GetProvisionerJobsByIDsWithQueuePosition(ctx, jobIDs)
require.NoError(t, err)
require.Len(t, actualJobs, len(allJobs), "should return all jobs")

// Then: the jobs should be returned in the correct order (by IDs in the input slice)
for idx, job := range actualJobs {
assert.EqualValues(t, allJobs[idx], job.ProvisionerJob)
}

// Then: the queue size should be set correctly
for _, job := range actualJobs {
assert.EqualValues(t, job.QueueSize, 2, "should have queue size 2")
}

// Then: the queue position should be set correctly:
var queuePositions []int64
for _, job := range actualJobs {
queuePositions = append(queuePositions, job.QueuePosition)
}
assert.EqualValues(t, []int64{2, 1, 0, 0, 0, 0, 0}, queuePositions, "expected queue positions to be set correctly")
}

func TestGroupRemovalTrigger(t *testing.T) {
t.Parallel()

Expand Down
12 changes: 9 additions & 3 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 9 additions & 3 deletions coderd/database/queries/provisionerjobs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,29 @@ WHERE
id = ANY(@ids :: uuid [ ]);

-- name: GetProvisionerJobsByIDsWithQueuePosition :many
WITH unstarted_jobs AS (
WITH pending_jobs AS (
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: Renamed for clarity.

SELECT
id, created_at
FROM
provisioner_jobs
WHERE
started_at IS NULL
AND
canceled_at IS NULL
AND
completed_at IS NULL
AND
error IS NULL
Comment on lines +60 to +65
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: this is in line with the generated provisioner job status:

CASE
    WHEN (completed_at IS NOT NULL) THEN
    CASE
        WHEN (error <> ''::text) THEN 'failed'::provisioner_job_status
        WHEN (canceled_at IS NOT NULL) THEN 'canceled'::provisioner_job_status
        ELSE 'succeeded'::provisioner_job_status
    END
    ELSE
    CASE
        WHEN (error <> ''::text) THEN 'failed'::provisioner_job_status
        WHEN (canceled_at IS NOT NULL) THEN 'canceling'::provisioner_job_status
        WHEN (started_at IS NULL) THEN 'pending'::provisioner_job_status
        ELSE 'running'::provisioner_job_status
    END
END)

),
queue_position AS (
SELECT
id,
ROW_NUMBER() OVER (ORDER BY created_at ASC) AS queue_position
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll need to amend this to take into account matching tags. Queue position could be 1/3 for the provisioners that match, but 20/50 on a larger scale (but that's nonsensical). Do we have an issue that tracks this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Account matching tags? Is that provisioner tags?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Take into account 😄. Matching provisioner tags yes 👍🏻

Copy link
Member Author

@johnstcn johnstcn Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will create a follow-up issue to track this. 👍

Edit: #15843

FROM
unstarted_jobs
pending_jobs
),
queue_size AS (
SELECT COUNT(*) as count FROM unstarted_jobs
SELECT COUNT(*) AS count FROM pending_jobs
)
SELECT
sqlc.embed(pj),
Expand Down
Loading