Skip to content

feat: add provisioning timings to understand slow build times #14274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e36010
Initial implementation
dannykopping Aug 7, 2024
45b7fb4
More hacking, now including a job-wide view of timings
dannykopping Aug 13, 2024
38c4197
API
dannykopping Aug 13, 2024
803a9d4
Smol refactor
dannykopping Aug 14, 2024
973ec6d
Capture dependency graph timings
dannykopping Aug 14, 2024
a070e07
Expand hash to include span category so multiple operations on the sa…
dannykopping Aug 14, 2024
4a29b96
Tests
dannykopping Aug 15, 2024
275bfca
lint/fmt
dannykopping Aug 15, 2024
73bac3f
Moar tests
dannykopping Aug 15, 2024
3d77c63
Improve coverage
dannykopping Aug 15, 2024
c0ae1ba
Remove stats API call, will follow up in another PR
dannykopping Aug 15, 2024
28fa2f7
Fixing tests
dannykopping Aug 15, 2024
68b16ff
Use max(end)-min(start) as stage timings, not local maximum
dannykopping Aug 15, 2024
6f0b8f8
make fmt
dannykopping Aug 15, 2024
724f139
Minor fix-ups
dannykopping Aug 15, 2024
c30a900
Pls god let this work
dannykopping Aug 15, 2024
0d68e69
Move terraform test helpers into internal package
dannykopping Aug 19, 2024
15282bb
Review comments
dannykopping Aug 19, 2024
82ca13e
Merge branch 'main' of github.com:coder/coder into dk/provision-detai…
dannykopping Aug 19, 2024
597ec85
More CI happiness
dannykopping Aug 19, 2024
805c0f2
Restrict timings tests to non-Windows
dannykopping Aug 19, 2024
46f3318
Give CI exactly what it wants FFS (see https://github.com/coder/coder…
dannykopping Aug 19, 2024
ebbaf31
@mtojek you legend :)
dannykopping Aug 19, 2024
eb5ec5c
Merge branch 'main' of https://github.com/coder/coder into dk/provisi…
dannykopping Aug 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -2793,6 +2793,14 @@ func (q *querier) InsertProvisionerJobLogs(ctx context.Context, arg database.Ins
return q.db.InsertProvisionerJobLogs(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See InsertProvisionerJobLogs and InsertProvisionerJob above

func (q *querier) InsertProvisionerJobTimings(ctx context.Context, arg database.InsertProvisionerJobTimingsParams) ([]database.ProvisionerJobTiming, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return nil, err
// }
return q.db.InsertProvisionerJobTimings(ctx, arg)
}

func (q *querier) InsertProvisionerKey(ctx context.Context, arg database.InsertProvisionerKeyParams) (database.ProvisionerKey, error) {
return insert(q.log, q.auth, rbac.ResourceProvisionerKeys.InOrg(arg.OrganizationID).WithID(arg.ID), q.db.InsertProvisionerKey)(ctx, arg)
}
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbauthz/dbauthz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2470,6 +2470,13 @@ func (s *MethodTestSuite) TestSystemFunctions() {
JobID: j.ID,
}).Asserts( /*rbac.ResourceSystem, policy.ActionCreate*/ )
}))
s.Run("InsertProvisionerJobTimings", s.Subtest(func(db database.Store, check *expects) {
// TODO: we need to create a ProvisionerJob resource
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args(database.InsertProvisionerJobTimingsParams{
JobID: j.ID,
}).Asserts( /*rbac.ResourceSystem, policy.ActionCreate*/ )
}))
s.Run("UpsertProvisionerDaemon", s.Subtest(func(db database.Store, check *expects) {
org := dbgen.Organization(s.T(), db, database.Organization{})
pd := rbac.ResourceProvisionerDaemon.InOrg(org.ID)
Expand Down
9 changes: 9 additions & 0 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -6652,6 +6652,15 @@ func (q *FakeQuerier) InsertProvisionerJobLogs(_ context.Context, arg database.I
return logs, nil
}

func (*FakeQuerier) InsertProvisionerJobTimings(_ context.Context, arg database.InsertProvisionerJobTimingsParams) ([]database.ProvisionerJobTiming, error) {
err := validateDatabaseType(arg)
if err != nil {
return nil, err
}

return nil, nil
}

func (q *FakeQuerier) InsertProvisionerKey(_ context.Context, arg database.InsertProvisionerKeyParams) (database.ProvisionerKey, error) {
err := validateDatabaseType(arg)
if err != nil {
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbmetrics/dbmetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions coderd/database/dbtime/dbtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ func Now() time.Time {

// Time returns a time compatible with Postgres. Postgres only stores dates with
// microsecond precision.
// FIXME(dannyk): refactor all calls to Time() to expect the input time to be modified to UTC; there are currently a
//
// few calls whose behavior would change subtly.
// See https://github.com/coder/coder/pull/14274#discussion_r1718427461
func Time(t time.Time) time.Time {
return t.Round(time.Microsecond)
}
89 changes: 89 additions & 0 deletions coderd/database/dump.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/database/foreign_key_constraint.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DROP VIEW IF EXISTS provisioner_job_stats;

DROP TYPE IF EXISTS provisioner_job_timing_stage CASCADE;

DROP TABLE IF EXISTS provisioner_job_timings;
45 changes: 45 additions & 0 deletions coderd/database/migrations/000245_provisioner_job_timings.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
CREATE TYPE provisioner_job_timing_stage AS ENUM (
'init',
'plan',
'graph',
'apply'
);

CREATE TABLE provisioner_job_timings
(
job_id uuid NOT NULL REFERENCES provisioner_jobs (id) ON DELETE CASCADE,
started_at timestamp with time zone not null,
ended_at timestamp with time zone not null,
stage provisioner_job_timing_stage not null,
source text not null,
action text not null,
resource text not null
);

CREATE VIEW provisioner_job_stats AS
SELECT pj.id AS job_id,
pj.job_status,
wb.workspace_id,
pj.worker_id,
pj.error,
pj.error_code,
Comment on lines +23 to +25
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are extraneous right now, but when we slap an API on top of this we'll likely need this info and can save ourselves another database call.

pj.updated_at,
GREATEST(EXTRACT(EPOCH FROM (pj.started_at - pj.created_at)), 0) AS queued_secs,
GREATEST(EXTRACT(EPOCH FROM (pj.completed_at - pj.started_at)), 0) AS completion_secs,
GREATEST(EXTRACT(EPOCH FROM (pj.canceled_at - pj.started_at)), 0) AS canceled_secs,
GREATEST(EXTRACT(EPOCH FROM (
MAX(CASE WHEN pjt.stage = 'init'::provisioner_job_timing_stage THEN pjt.ended_at END) -
MIN(CASE WHEN pjt.stage = 'init'::provisioner_job_timing_stage THEN pjt.started_at END))), 0) AS init_secs,
GREATEST(EXTRACT(EPOCH FROM (
MAX(CASE WHEN pjt.stage = 'plan'::provisioner_job_timing_stage THEN pjt.ended_at END) -
MIN(CASE WHEN pjt.stage = 'plan'::provisioner_job_timing_stage THEN pjt.started_at END))), 0) AS plan_secs,
GREATEST(EXTRACT(EPOCH FROM (
MAX(CASE WHEN pjt.stage = 'graph'::provisioner_job_timing_stage THEN pjt.ended_at END) -
MIN(CASE WHEN pjt.stage = 'graph'::provisioner_job_timing_stage THEN pjt.started_at END))), 0) AS graph_secs,
GREATEST(EXTRACT(EPOCH FROM (
MAX(CASE WHEN pjt.stage = 'apply'::provisioner_job_timing_stage THEN pjt.ended_at END) -
MIN(CASE WHEN pjt.stage = 'apply'::provisioner_job_timing_stage THEN pjt.started_at END))), 0) AS apply_secs
Comment on lines +30 to +41
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the max(ended_at)-min(started_at) of each stage to determine stage timings.

FROM provisioner_jobs pj
JOIN workspace_builds wb ON wb.job_id = pj.id
LEFT JOIN provisioner_job_timings pjt ON pjt.job_id = pj.id
GROUP BY pj.id, wb.workspace_id;
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
INSERT INTO provisioner_job_timings (job_id, started_at, ended_at, stage, source, action, resource)
VALUES
-- Job 1 - init stage
('424a58cb-61d6-4627-9907-613c396c4a38', NOW() - INTERVAL '1 hour 55 minutes', NOW() - INTERVAL '1 hour 50 minutes', 'init', 'source1', 'action1', 'resource1'),

-- Job 1 - plan stage
('424a58cb-61d6-4627-9907-613c396c4a38', NOW() - INTERVAL '1 hour 50 minutes', NOW() - INTERVAL '1 hour 40 minutes', 'plan', 'source2', 'action2', 'resource2'),

-- Job 1 - graph stage
('424a58cb-61d6-4627-9907-613c396c4a38', NOW() - INTERVAL '1 hour 40 minutes', NOW() - INTERVAL '1 hour 30 minutes', 'graph', 'source3', 'action3', 'resource3'),

-- Job 1 - apply stage
('424a58cb-61d6-4627-9907-613c396c4a38', NOW() - INTERVAL '1 hour 30 minutes', NOW() - INTERVAL '1 hour 20 minutes', 'apply', 'source4', 'action4', 'resource4');
91 changes: 91 additions & 0 deletions coderd/database/models.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading