From 4419ecae67c64f4a4fbd5523e84fdf46d7e18da1 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 7 Aug 2025 14:54:25 -0500 Subject: [PATCH] chore: add more pprof labels for various go routines - ReplicaSync - Notifications - MetricsAggregator - DBPurge --- coderd/database/dbpurge/dbpurge.go | 11 ++++++----- coderd/notifications/manager.go | 7 ++++--- coderd/pproflabel/pproflabel.go | 10 +++++++++- coderd/prometheusmetrics/aggregator.go | 8 ++++---- enterprise/replicasync/replicasync.go | 3 ++- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/coderd/database/dbpurge/dbpurge.go b/coderd/database/dbpurge/dbpurge.go index 135d7f40b05dd..5afa9b4ba2975 100644 --- a/coderd/database/dbpurge/dbpurge.go +++ b/coderd/database/dbpurge/dbpurge.go @@ -12,6 +12,7 @@ import ( "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/coder/coder/v2/coderd/pproflabel" "github.com/coder/quartz" ) @@ -38,7 +39,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz. // Start the ticker with the initial delay. ticker := clk.NewTicker(delay) - doTick := func(start time.Time) { + doTick := func(ctx context.Context, start time.Time) { defer ticker.Reset(delay) // Start a transaction to grab advisory lock, we don't want to run // multiple purges at the same time (multiple replicas). @@ -85,21 +86,21 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz. } } - go func() { + pproflabel.Go(ctx, pproflabel.Service(pproflabel.ServiceDBPurge), func(ctx context.Context) { defer close(closed) defer ticker.Stop() // Force an initial tick. - doTick(dbtime.Time(clk.Now()).UTC()) + doTick(ctx, dbtime.Time(clk.Now()).UTC()) for { select { case <-ctx.Done(): return case tick := <-ticker.C: ticker.Stop() - doTick(dbtime.Time(tick).UTC()) + doTick(ctx, dbtime.Time(tick).UTC()) } } - }() + }) return &instance{ cancel: cancelFunc, closed: closed, diff --git a/coderd/notifications/manager.go b/coderd/notifications/manager.go index 11588a09fb797..943306d443265 100644 --- a/coderd/notifications/manager.go +++ b/coderd/notifications/manager.go @@ -11,12 +11,13 @@ import ( "golang.org/x/xerrors" "cdr.dev/slog" - "github.com/coder/quartz" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/notifications/dispatch" + "github.com/coder/coder/v2/coderd/pproflabel" "github.com/coder/coder/v2/codersdk" + "github.com/coder/quartz" ) var ErrInvalidDispatchTimeout = xerrors.New("dispatch timeout must be less than lease period") @@ -145,7 +146,7 @@ func (m *Manager) Run(ctx context.Context) { m.runOnce.Do(func() { // Closes when Stop() is called or context is canceled. - go func() { + pproflabel.Go(ctx, pproflabel.Service(pproflabel.ServiceNotifications), func(ctx context.Context) { err := m.loop(ctx) if err != nil { if xerrors.Is(err, ErrManagerAlreadyClosed) { @@ -154,7 +155,7 @@ func (m *Manager) Run(ctx context.Context) { m.log.Error(ctx, "notification manager stopped with error", slog.Error(err)) } } - }() + }) }) } diff --git a/coderd/pproflabel/pproflabel.go b/coderd/pproflabel/pproflabel.go index 2bfd071dcdc39..a412ec0bf92c3 100644 --- a/coderd/pproflabel/pproflabel.go +++ b/coderd/pproflabel/pproflabel.go @@ -21,9 +21,17 @@ const ( ServiceHTTPServer = "http-api" ServiceLifecycles = "lifecycle-executor" - ServiceMetricCollector = "metrics-collector" ServicePrebuildReconciler = "prebuilds-reconciler" ServiceTerraformProvisioner = "terraform-provisioner" + ServiceDBPurge = "db-purge" + ServiceNotifications = "notifications" + ServiceReplicaSync = "replica-sync" + // ServiceMetricCollector collects metrics from insights in the database and + // exports them in a prometheus collector format. + ServiceMetricCollector = "metrics-collector" + // ServiceAgentMetricAggregator merges agent metrics and exports them in a + // prometheus collector format. + ServiceAgentMetricAggregator = "agent-metrics-aggregator" RequestTypeTag = "coder_request_type" ) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index 44ade677d5cff..ad51c3e7fa8a7 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -11,11 +11,11 @@ import ( "github.com/prometheus/common/model" "golang.org/x/xerrors" - "github.com/coder/coder/v2/coderd/agentmetrics" - "cdr.dev/slog" agentproto "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentmetrics" + "github.com/coder/coder/v2/coderd/pproflabel" ) const ( @@ -298,7 +298,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { done := make(chan struct{}) cleanupTicker := time.NewTicker(ma.metricsCleanupInterval) - go func() { + pproflabel.Go(ctx, pproflabel.Service(pproflabel.ServiceAgentMetricAggregator), func(ctx context.Context) { defer close(done) defer cleanupTicker.Stop() @@ -395,7 +395,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { return } } - }() + }) return func() { cancelFunc() <-done diff --git a/enterprise/replicasync/replicasync.go b/enterprise/replicasync/replicasync.go index 528540a262464..129e652c97de5 100644 --- a/enterprise/replicasync/replicasync.go +++ b/enterprise/replicasync/replicasync.go @@ -23,6 +23,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/database/pubsub" + "github.com/coder/coder/v2/coderd/pproflabel" ) var PubsubEvent = "replica" @@ -104,7 +105,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, ps pubsub.P return nil, xerrors.Errorf("subscribe: %w", err) } manager.closeWait.Add(1) - go manager.loop(ctx) + pproflabel.Go(ctx, pproflabel.Service(pproflabel.ServiceReplicaSync), manager.loop) return manager, nil }