Skip to content

feat: add provisioning timings to understand slow build times #14274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e36010
Initial implementation
dannykopping Aug 7, 2024
45b7fb4
More hacking, now including a job-wide view of timings
dannykopping Aug 13, 2024
38c4197
API
dannykopping Aug 13, 2024
803a9d4
Smol refactor
dannykopping Aug 14, 2024
973ec6d
Capture dependency graph timings
dannykopping Aug 14, 2024
a070e07
Expand hash to include span category so multiple operations on the sa…
dannykopping Aug 14, 2024
4a29b96
Tests
dannykopping Aug 15, 2024
275bfca
lint/fmt
dannykopping Aug 15, 2024
73bac3f
Moar tests
dannykopping Aug 15, 2024
3d77c63
Improve coverage
dannykopping Aug 15, 2024
c0ae1ba
Remove stats API call, will follow up in another PR
dannykopping Aug 15, 2024
28fa2f7
Fixing tests
dannykopping Aug 15, 2024
68b16ff
Use max(end)-min(start) as stage timings, not local maximum
dannykopping Aug 15, 2024
6f0b8f8
make fmt
dannykopping Aug 15, 2024
724f139
Minor fix-ups
dannykopping Aug 15, 2024
c30a900
Pls god let this work
dannykopping Aug 15, 2024
0d68e69
Move terraform test helpers into internal package
dannykopping Aug 19, 2024
15282bb
Review comments
dannykopping Aug 19, 2024
82ca13e
Merge branch 'main' of github.com:coder/coder into dk/provision-detai…
dannykopping Aug 19, 2024
597ec85
More CI happiness
dannykopping Aug 19, 2024
805c0f2
Restrict timings tests to non-Windows
dannykopping Aug 19, 2024
46f3318
Give CI exactly what it wants FFS (see https://github.com/coder/coder…
dannykopping Aug 19, 2024
ebbaf31
@mtojek you legend :)
dannykopping Aug 19, 2024
eb5ec5c
Merge branch 'main' of https://github.com/coder/coder into dk/provisi…
dannykopping Aug 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Smol refactor
Signed-off-by: Danny Kopping <danny@coder.com>
  • Loading branch information
dannykopping committed Aug 15, 2024
commit 803a9d47458f62f1b5398d80d43c9fdf968c4b6e
4 changes: 2 additions & 2 deletions coderd/database/dbtime/dbtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ func Now() time.Time {
// Time returns a time compatible with Postgres. Postgres only stores dates with
// microsecond precision.
func Time(t time.Time) time.Time {
return t.Round(time.Microsecond)
}
return t.UTC().Round(time.Microsecond)
}
30 changes: 14 additions & 16 deletions provisioner/terraform/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"go.opentelemetry.io/otel/attribute"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/provisionersdk/proto"
)
Expand All @@ -36,6 +35,8 @@ type executor struct {
// cachePath and workdir must not be used by multiple processes at once.
cachePath string
workdir string
// used to capture execution times at various stages
timings *timingAggregator
}

func (e *executor) basicEnv() []string {
Expand Down Expand Up @@ -254,8 +255,7 @@ func (e *executor) plan(ctx, killCtx context.Context, env, vars []string, logr l
args = append(args, "-var", variable)
}

timingsAgg := newTimingsAggregator(database.ProvisionerJobTimingStagePlan)
outWriter, doneOut := e.provisionLogWriter(logr, timingsAgg)
outWriter, doneOut := e.provisionLogWriter(logr)
errWriter, doneErr := logWriter(logr, proto.LogLevel_ERROR)
defer func() {
_ = outWriter.Close()
Expand All @@ -277,7 +277,7 @@ func (e *executor) plan(ctx, killCtx context.Context, env, vars []string, logr l
Parameters: state.Parameters,
Resources: state.Resources,
ExternalAuthProviders: state.ExternalAuthProviders,
Timings: timingsAgg.aggregate(),
Timings: e.timings.aggregate(),
}, nil
}

Expand Down Expand Up @@ -404,8 +404,7 @@ func (e *executor) apply(
getPlanFilePath(e.workdir),
}

timingsAgg := newTimingsAggregator(database.ProvisionerJobTimingStageApply)
outWriter, doneOut := e.provisionLogWriter(logr, timingsAgg)
outWriter, doneOut := e.provisionLogWriter(logr)
errWriter, doneErr := logWriter(logr, proto.LogLevel_ERROR)
defer func() {
_ = outWriter.Close()
Expand Down Expand Up @@ -433,7 +432,7 @@ func (e *executor) apply(
Resources: state.Resources,
ExternalAuthProviders: state.ExternalAuthProviders,
State: stateContent,
Timings: timingsAgg.aggregate(),
Timings: e.timings.aggregate(),
}, nil
}

Expand Down Expand Up @@ -547,15 +546,15 @@ func readAndLog(sink logSink, r io.Reader, done chan<- any, level proto.LogLevel
// provisionLogWriter creates a WriteCloser that will log each JSON formatted terraform log. The WriteCloser must be
// closed by the caller to end logging, after which the returned channel will be closed to indicate that logging of the
// written data has finished. Failure to close the WriteCloser will leak a goroutine.
func (e *executor) provisionLogWriter(sink logSink, timings *timingsAggregator) (io.WriteCloser, <-chan any) {
func (e *executor) provisionLogWriter(sink logSink) (io.WriteCloser, <-chan any) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved these under the executor receiver to get access to the logger.

r, w := io.Pipe()
done := make(chan any)

go e.provisionReadAndLog(sink, r, timings, done)
go e.provisionReadAndLog(sink, r, done)
return w, done
}

func (e *executor) provisionReadAndLog(sink logSink, r io.Reader, timings *timingsAggregator, done chan<- any) {
func (e *executor) provisionReadAndLog(sink logSink, r io.Reader, done chan<- any) {
defer close(done)
scanner := bufio.NewScanner(r)
for scanner.Scan() {
Expand Down Expand Up @@ -588,13 +587,13 @@ func (e *executor) provisionReadAndLog(sink logSink, r io.Reader, timings *timin
logLevel := convertTerraformLogLevel(log.Level, sink)
sink.ProvisionLog(logLevel, log.Message)

ts, te, err := extractTimingsEntry(log)
ts, span, err := extractTimingSpan(log)
if err != nil {
e.logger.Debug(context.Background(), "failed to extract timings entry from log line",
slog.F("line", log.Message), slog.Error(err))
} else {
// Only ingest valid timings.
timings.ingest(ts, te)
e.timings.ingest(ts, span)
}

// If the diagnostic is provided, let's provide a bit more info!
Expand All @@ -608,13 +607,13 @@ func (e *executor) provisionReadAndLog(sink logSink, r io.Reader, timings *timin
}
}

func extractTimingsEntry(log terraformProvisionLog) (time.Time, *timingsEntry, error) {
func extractTimingSpan(log terraformProvisionLog) (time.Time, *timingSpan, error) {
// Input is not well-formed, bail out.
if log.Type == "" {
return time.Time{}, nil, xerrors.Errorf("invalid type: %q", log.Type)
}

typ := logType(log.Type)
typ := timingKind(log.Type)
if !typ.Valid() {
return time.Time{}, nil, xerrors.Errorf("invalid type: %q", log.Type)
}
Expand All @@ -624,9 +623,8 @@ func extractTimingsEntry(log terraformProvisionLog) (time.Time, *timingsEntry, e
// TODO: log
ts = time.Now()
}
ts = ts.UTC()

return ts, &timingsEntry{
return ts, &timingSpan{
kind: typ,
action: log.Hook.Action,
provider: log.Hook.Resource.Provider,
Expand Down
20 changes: 11 additions & 9 deletions provisioner/terraform/provision.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (s *server) Plan(
defer cancel()
defer kill()

e := s.executor(sess.WorkDirectory)
e := s.executor(sess.WorkDirectory, database.ProvisionerJobTimingStagePlan)
if err := e.checkMinVersion(ctx); err != nil {
return provisionersdk.PlanErrorf(err.Error())
}
Expand Down Expand Up @@ -104,18 +104,20 @@ func (s *server) Plan(

s.logger.Debug(ctx, "running initialization")

timingsAgg := newTimingsAggregator(database.ProvisionerJobTimingStageInit)
timingsAgg.ingest(createInitTimingsEvent(initStart))
// The JSON output of `terraform init` doesn't include discrete fields for capturing timings of each plugin,
// so we capture the whole init process.
initTimings := newTimingAggregator(database.ProvisionerJobTimingStageInit)
initTimings.ingest(createInitTimingsEvent(initStart))

err = e.init(ctx, killCtx, sess)
if err != nil {
timingsAgg.ingest(createInitTimingsEvent(initErrored))
initTimings.ingest(createInitTimingsEvent(initErrored))

s.logger.Debug(ctx, "init failed", slog.Error(err))
return provisionersdk.PlanErrorf("initialize terraform: %s", err)
}

timingsAgg.ingest(createInitTimingsEvent(initComplete))
initTimings.ingest(createInitTimingsEvent(initComplete))

s.logger.Debug(ctx, "ran initialization")

Expand All @@ -137,12 +139,12 @@ func (s *server) Plan(
return provisionersdk.PlanErrorf(err.Error())
}

resp.Timings = append(resp.Timings, timingsAgg.aggregate()...)
resp.Timings = append(resp.Timings, initTimings.aggregate()...)
return resp
}

func createInitTimingsEvent(event logType) (time.Time, *timingsEntry) {
return dbtime.Now(), &timingsEntry{
func createInitTimingsEvent(event timingKind) (time.Time, *timingSpan) {
return dbtime.Now(), &timingSpan{
kind: event,
action: "initialize terraform",
provider: "terraform",
Expand All @@ -159,7 +161,7 @@ func (s *server) Apply(
defer cancel()
defer kill()

e := s.executor(sess.WorkDirectory)
e := s.executor(sess.WorkDirectory, database.ProvisionerJobTimingStageApply)
if err := e.checkMinVersion(ctx); err != nil {
return provisionersdk.ApplyErrorf(err.Error())
}
Expand Down
5 changes: 4 additions & 1 deletion provisioner/terraform/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"golang.org/x/xerrors"

"cdr.dev/slog"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/unhanger"
"github.com/coder/coder/v2/provisionersdk"
)
Expand Down Expand Up @@ -138,13 +140,14 @@ func (s *server) startTrace(ctx context.Context, name string, opts ...trace.Span
))...)
}

func (s *server) executor(workdir string) *executor {
func (s *server) executor(workdir string, stage database.ProvisionerJobTimingStage) *executor {
return &executor{
server: s,
mut: s.execMut,
binaryPath: s.binaryPath,
cachePath: s.cachePath,
workdir: workdir,
logger: s.logger.Named("executor"),
timings: newTimingAggregator(stage),
}
}
Loading