Skip to content

feat: add provisioning timings to understand slow build times #14274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e36010
Initial implementation
dannykopping Aug 7, 2024
45b7fb4
More hacking, now including a job-wide view of timings
dannykopping Aug 13, 2024
38c4197
API
dannykopping Aug 13, 2024
803a9d4
Smol refactor
dannykopping Aug 14, 2024
973ec6d
Capture dependency graph timings
dannykopping Aug 14, 2024
a070e07
Expand hash to include span category so multiple operations on the sa…
dannykopping Aug 14, 2024
4a29b96
Tests
dannykopping Aug 15, 2024
275bfca
lint/fmt
dannykopping Aug 15, 2024
73bac3f
Moar tests
dannykopping Aug 15, 2024
3d77c63
Improve coverage
dannykopping Aug 15, 2024
c0ae1ba
Remove stats API call, will follow up in another PR
dannykopping Aug 15, 2024
28fa2f7
Fixing tests
dannykopping Aug 15, 2024
68b16ff
Use max(end)-min(start) as stage timings, not local maximum
dannykopping Aug 15, 2024
6f0b8f8
make fmt
dannykopping Aug 15, 2024
724f139
Minor fix-ups
dannykopping Aug 15, 2024
c30a900
Pls god let this work
dannykopping Aug 15, 2024
0d68e69
Move terraform test helpers into internal package
dannykopping Aug 19, 2024
15282bb
Review comments
dannykopping Aug 19, 2024
82ca13e
Merge branch 'main' of github.com:coder/coder into dk/provision-detai…
dannykopping Aug 19, 2024
597ec85
More CI happiness
dannykopping Aug 19, 2024
805c0f2
Restrict timings tests to non-Windows
dannykopping Aug 19, 2024
46f3318
Give CI exactly what it wants FFS (see https://github.com/coder/coder…
dannykopping Aug 19, 2024
ebbaf31
@mtojek you legend :)
dannykopping Aug 19, 2024
eb5ec5c
Merge branch 'main' of https://github.com/coder/coder into dk/provisi…
dannykopping Aug 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Minor fix-ups
Signed-off-by: Danny Kopping <danny@coder.com>
  • Loading branch information
dannykopping committed Aug 15, 2024
commit 724f13929a155f393bc29cdc143ca62b1df13e6d
48 changes: 32 additions & 16 deletions coderd/database/dump.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
INSERT INTO provisioner_job_timings (job_id, started_at, ended_at, stage, source, action, resource)
VALUES
-- Job 1 - init stage
('d09b6083-e482-41ac-ad06-3aa731ec4fc6', NOW() - INTERVAL '1 hour 55 minutes', NOW() - INTERVAL '1 hour 50 minutes', 'init', 'source1', 'action1', 'resource1'),

-- Job 1 - plan stage
('d09b6083-e482-41ac-ad06-3aa731ec4fc6', NOW() - INTERVAL '1 hour 50 minutes', NOW() - INTERVAL '1 hour 40 minutes', 'plan', 'source2', 'action2', 'resource2'),

-- Job 1 - graph stage
('d09b6083-e482-41ac-ad06-3aa731ec4fc6', NOW() - INTERVAL '1 hour 40 minutes', NOW() - INTERVAL '1 hour 30 minutes', 'graph', 'source3', 'action3', 'resource3'),

-- Job 1 - apply stage
('d09b6083-e482-41ac-ad06-3aa731ec4fc6', NOW() - INTERVAL '1 hour 30 minutes', NOW() - INTERVAL '1 hour 20 minutes', 'apply', 'source4', 'action4', 'resource4');
2 changes: 1 addition & 1 deletion coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,7 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob)
_, err = s.Database.InsertProvisionerJobTimings(ctx, params)
if err != nil {
// Don't fail the transaction for non-critical data.
s.Logger.Warn(ctx, "failed to update provisioner job timings", slog.Error(err))
s.Logger.Warn(ctx, "failed to update provisioner job timings", slog.F("job_id", jobID), slog.Error(err))
}

// audit the outcome of the workspace build
Expand Down
17 changes: 0 additions & 17 deletions codersdk/workspacebuilds.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,6 @@ type WorkspaceBuild struct {
DailyCost int32 `json:"daily_cost"`
}

// WorkspaceBuildStats is a point-in-time representation of a workspace build, including timing information.
type WorkspaceBuildStats struct {
ID uuid.UUID `json:"id" format:"uuid"`
JobStatus ProvisionerJobStatus `json:"job_status"`
WorkerID uuid.UUID `json:"worker_id" format:"uuid"`
WorkspaceID uuid.UUID `json:"workspace_id" format:"uuid"`
Error string `json:"error"`
ErrorCode string `json:"error_code"`
UpdatedAt time.Time `json:"updated_at" format:"date-time"`
QueuedSecs float64 `json:"queued_secs"`
CompletionSecs float64 `json:"completion_secs"`
CanceledSecs float64 `json:"canceled_secs"`
InitSecs float64 `json:"init_secs"`
PlanSecs float64 `json:"plan_secs"`
ApplySecs float64 `json:"apply_secs"`
}

// WorkspaceResource describes resources used to create a workspace, for instance:
// containers, images, volumes.
type WorkspaceResource struct {
Expand Down
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
name = "coder-${osArch}";
# Updated with ./scripts/update-flake.sh`.
# This should be updated whenever go.mod changes!
vendorHash = "sha256-AZ0qzh7H+UwnZNyg2iaNMSUWlGgomI/mo70T+FdF7ws=";
vendorHash = "sha256-i3sseokxA5MoSJDzrWJI+GdS5HOYG1fPIRdr239Zx30=";
proxyVendor = true;
src = ./.;
nativeBuildInputs = with pkgs; [ getopt openssl zstd ];
Expand Down
2 changes: 1 addition & 1 deletion provisioner/terraform/timings_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestAggregation(t *testing.T) {
// txtar is a text-based archive format used in the stdlib for simple and elegant tests.
//
// We ALWAYS expect that the archive contains two or more "files":
// 1. JSON logs generated by a terraform execution, one per line, *one file per stage*
// 1. JSON logs generated by a terraform execution, one per line, *one file per stage*
// N. Expected resulting timings in JSON form, one per line
arc := txtar.Parse(tc.input)
require.GreaterOrEqual(t, len(arc.Files), 2)
Expand Down
1 change: 1 addition & 0 deletions provisioner/terraform/timings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@

t.Logf(fakeBin)

ctx, api := setupProvisioner(t, &provisionerServeOptions{

Check failure on line 31 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: setupProvisioner

Check failure on line 31 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: provisionerServeOptions

Check failure on line 31 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: setupProvisioner

Check failure on line 31 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: provisionerServeOptions
binaryPath: fakeBin,
})
sess := configure(ctx, t, api, &proto.Config{

Check failure on line 34 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: configure

Check failure on line 34 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: configure
TemplateSourceArchive: makeTar(t, nil),

Check failure on line 35 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: makeTar

Check failure on line 35 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: makeTar
})

ctx, cancel := context.WithTimeout(ctx, testutil.WaitLong)
Expand All @@ -40,7 +40,7 @@

// When: a plan is executed in the provisioner, our fake terraform will be executed and will produce a
// state file and some log content.
err = sendPlan(sess, proto.WorkspaceTransition_START)

Check failure on line 43 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: sendPlan
require.NoError(t, err)

var timings []*proto.Timing
Expand All @@ -67,7 +67,7 @@
}

// When: the plan has completed, let's trigger an apply.
err = sendApply(sess, proto.WorkspaceTransition_START)

Check failure on line 70 in provisioner/terraform/timings_test.go

View workflow job for this annotation

GitHub Actions / test-go (windows-2022)

undefined: sendApply
require.NoError(t, err)

for {
Expand All @@ -94,6 +94,7 @@
// Sort the timings stably to keep reduce flakiness.
terraform.StableSortTimings(t, timings)

// Then: the received timings should match the expected values below.
// NOTE: These timings have been encoded to JSON format to make the tests more readable.
planTimings := terraform.ParseTimingLines(t, []byte(`{"start":"2024-08-15T08:26:39.194726Z", "end":"2024-08-15T08:26:39.195836Z", "action":"read", "source":"coder", "resource":"data.coder_parameter.memory_size", "stage":"plan", "state":"COMPLETED"}
{"start":"2024-08-15T08:26:39.194726Z", "end":"2024-08-15T08:26:39.195712Z", "action":"read", "source":"coder", "resource":"data.coder_provisioner.me", "stage":"plan", "state":"COMPLETED"}
Expand Down
123 changes: 123 additions & 0 deletions site/e2e/google/protobuf/timestampGenerated.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* eslint-disable */

Check failure on line 1 in site/e2e/google/protobuf/timestampGenerated.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected undescribed directive comment. Include descriptions to explain why the comment is necessary

Check failure on line 1 in site/e2e/google/protobuf/timestampGenerated.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected unlimited 'eslint-disable' comment. Specify some rule names to disable

Check failure on line 1 in site/e2e/google/protobuf/timestampGenerated.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected undescribed directive comment. Include descriptions to explain why the comment is necessary

Check failure on line 1 in site/e2e/google/protobuf/timestampGenerated.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected unlimited 'eslint-disable' comment. Specify some rule names to disable

Check failure on line 1 in site/e2e/google/protobuf/timestampGenerated.ts

View workflow job for this annotation

GitHub Actions / fmt

Unexpected undescribed directive comment. Include descriptions to explain why the comment is necessary

Check failure on line 1 in site/e2e/google/protobuf/timestampGenerated.ts

View workflow job for this annotation

GitHub Actions / fmt

Unexpected unlimited 'eslint-disable' comment. Specify some rule names to disable
import * as _m0 from "protobufjs/minimal";

export const protobufPackage = "google.protobuf";

/**
* A Timestamp represents a point in time independent of any time zone or local
* calendar, encoded as a count of seconds and fractions of seconds at
* nanosecond resolution. The count is relative to an epoch at UTC midnight on
* January 1, 1970, in the proleptic Gregorian calendar which extends the
* Gregorian calendar backwards to year one.
*
* All minutes are 60 seconds long. Leap seconds are "smeared" so that no leap
* second table is needed for interpretation, using a [24-hour linear
* smear](https://developers.google.com/time/smear).
*
* The range is from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59.999999999Z. By
* restricting to that range, we ensure that we can convert to and from [RFC
* 3339](https://www.ietf.org/rfc/rfc3339.txt) date strings.
*
* # Examples
*
* Example 1: Compute Timestamp from POSIX `time()`.
*
* Timestamp timestamp;
* timestamp.set_seconds(time(NULL));
* timestamp.set_nanos(0);
*
* Example 2: Compute Timestamp from POSIX `gettimeofday()`.
*
* struct timeval tv;
* gettimeofday(&tv, NULL);
*
* Timestamp timestamp;
* timestamp.set_seconds(tv.tv_sec);
* timestamp.set_nanos(tv.tv_usec * 1000);
*
* Example 3: Compute Timestamp from Win32 `GetSystemTimeAsFileTime()`.
*
* FILETIME ft;
* GetSystemTimeAsFileTime(&ft);
* UINT64 ticks = (((UINT64)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
*
* // A Windows tick is 100 nanoseconds. Windows epoch 1601-01-01T00:00:00Z
* // is 11644473600 seconds before Unix epoch 1970-01-01T00:00:00Z.
* Timestamp timestamp;
* timestamp.set_seconds((INT64) ((ticks / 10000000) - 11644473600LL));
* timestamp.set_nanos((INT32) ((ticks % 10000000) * 100));
*
* Example 4: Compute Timestamp from Java `System.currentTimeMillis()`.
*
* long millis = System.currentTimeMillis();
*
* Timestamp timestamp = Timestamp.newBuilder().setSeconds(millis / 1000)
* .setNanos((int) ((millis % 1000) * 1000000)).build();
*
* Example 5: Compute Timestamp from Java `Instant.now()`.
*
* Instant now = Instant.now();
*
* Timestamp timestamp =
* Timestamp.newBuilder().setSeconds(now.getEpochSecond())
* .setNanos(now.getNano()).build();
*
* Example 6: Compute Timestamp from current time in Python.
*
* timestamp = Timestamp()
* timestamp.GetCurrentTime()
*
* # JSON Mapping
*
* In JSON format, the Timestamp type is encoded as a string in the
* [RFC 3339](https://www.ietf.org/rfc/rfc3339.txt) format. That is, the
* format is "{year}-{month}-{day}T{hour}:{min}:{sec}[.{frac_sec}]Z"
* where {year} is always expressed using four digits while {month}, {day},
* {hour}, {min}, and {sec} are zero-padded to two digits each. The fractional
* seconds, which can go up to 9 digits (i.e. up to 1 nanosecond resolution),
* are optional. The "Z" suffix indicates the timezone ("UTC"); the timezone
* is required. A proto3 JSON serializer should always use UTC (as indicated by
* "Z") when printing the Timestamp type and a proto3 JSON parser should be
* able to accept both UTC and other timezones (as indicated by an offset).
*
* For example, "2017-01-15T01:30:15.01Z" encodes 15.01 seconds past
* 01:30 UTC on January 15, 2017.
*
* In JavaScript, one can convert a Date object to this format using the
* standard
* [toISOString()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/toISOString)
* method. In Python, a standard `datetime.datetime` object can be converted
* to this format using
* [`strftime`](https://docs.python.org/2/library/time.html#time.strftime) with
* the time format spec '%Y-%m-%dT%H:%M:%S.%fZ'. Likewise, in Java, one can use
* the Joda Time's [`ISODateTimeFormat.dateTime()`](
* http://joda-time.sourceforge.net/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime()
* ) to obtain a formatter capable of generating timestamps in this format.
*/
export interface Timestamp {
/**
* Represents seconds of UTC time since Unix epoch
* 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to
* 9999-12-31T23:59:59Z inclusive.
*/
seconds: number;
/**
* Non-negative fractions of a second at nanosecond resolution. Negative
* second values with fractions must still have non-negative nanos values
* that count forward in time. Must be from 0 to 999,999,999
* inclusive.
*/
nanos: number;
}

export const Timestamp = {
encode(message: Timestamp, writer: _m0.Writer = _m0.Writer.create()): _m0.Writer {
if (message.seconds !== 0) {
writer.uint32(8).int64(message.seconds);
}
if (message.nanos !== 0) {
writer.uint32(16).int32(message.nanos);
}
return writer;
},
};
17 changes: 0 additions & 17 deletions site/src/api/typesGenerated.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading