Skip to content

Commit 562b56d

Browse files
committed
feat: fetch prebuilds metrics state in background
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
1 parent 7f056da commit 562b56d

File tree

3 files changed

+84
-17
lines changed

3 files changed

+84
-17
lines changed

enterprise/coderd/prebuilds/metricscollector.go

+74-17
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ package prebuilds
22

33
import (
44
"context"
5+
"sync/atomic"
56
"time"
67

7-
"cdr.dev/slog"
8-
98
"github.com/prometheus/client_golang/prometheus"
9+
"golang.org/x/xerrors"
10+
11+
"cdr.dev/slog"
1012

1113
"github.com/coder/coder/v2/coderd/database"
1214
"github.com/coder/coder/v2/coderd/database/dbauthz"
@@ -57,18 +59,27 @@ var (
5759
)
5860
)
5961

62+
const (
63+
metricsUpdateInterval = time.Second * 15
64+
metricsUpdateTimeout = time.Second * 10
65+
)
66+
6067
type MetricsCollector struct {
6168
database database.Store
6269
logger slog.Logger
6370
snapshotter prebuilds.StateSnapshotter
71+
72+
latestState atomic.Pointer[state]
6473
}
6574

6675
var _ prometheus.Collector = new(MetricsCollector)
6776

77+
// NewMetricsCollector returns a
6878
func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
79+
log := logger.Named("prebuilds_metrics_collector")
6980
return &MetricsCollector{
7081
database: db,
71-
logger: logger.Named("prebuilds_metrics_collector"),
82+
logger: log,
7283
snapshotter: snapshotter,
7384
}
7485
}
@@ -82,34 +93,31 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
8293
descCh <- eligiblePrebuildsDesc
8394
}
8495

96+
// Collect uses the cached state to set configured metrics.
97+
// The state is cached because this function can be called multiple times per second and retrieving the current state
98+
// is an expensive operation.
8599
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
86100
// nolint:gocritic // We need to set an authz context to read metrics from the db.
87-
ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
88-
defer cancel()
89-
prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
90-
if err != nil {
91-
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
101+
ctx := dbauthz.AsPrebuildsOrchestrator(context.Background())
102+
103+
currentState := mc.latestState.Load()
104+
if currentState == nil {
105+
mc.logger.Warn(ctx, "failed to set prebuilds metrics; state not set")
92106
return
93107
}
94108

95-
for _, metric := range prebuildMetrics {
109+
for _, metric := range currentState.prebuildMetrics {
96110
metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
97111
metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
98112
metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
99113
}
100114

101-
snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database)
102-
if err != nil {
103-
mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err))
104-
return
105-
}
106-
107-
for _, preset := range snapshot.Presets {
115+
for _, preset := range currentState.snapshot.Presets {
108116
if !preset.UsingActiveVersion {
109117
continue
110118
}
111119

112-
presetSnapshot, err := snapshot.FilterByPreset(preset.ID)
120+
presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID)
113121
if err != nil {
114122
mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err))
115123
continue
@@ -121,3 +129,52 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
121129
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
122130
}
123131
}
132+
133+
type state struct {
134+
prebuildMetrics []database.GetPrebuildMetricsRow
135+
snapshot *prebuilds.GlobalSnapshot
136+
}
137+
138+
// BackgroundFetch updates the metrics state every given interval.
139+
func (mc *MetricsCollector) BackgroundFetch(ctx context.Context, updateInterval, updateTimeout time.Duration) {
140+
tick := time.NewTicker(time.Nanosecond)
141+
defer tick.Stop()
142+
143+
for {
144+
select {
145+
case <-ctx.Done():
146+
return
147+
case <-tick.C:
148+
// Tick immediately, then set regular interval.
149+
tick.Reset(updateInterval)
150+
151+
if err := mc.UpdateState(ctx, updateTimeout); err != nil {
152+
mc.logger.Error(ctx, "failed to update prebuilds metrics state", slog.Error(err))
153+
}
154+
}
155+
}
156+
}
157+
158+
// UpdateState builds the current metrics state.
159+
func (mc *MetricsCollector) UpdateState(ctx context.Context, timeout time.Duration) error {
160+
mc.logger.Debug(ctx, "fetching prebuilds metrics state")
161+
fetchCtx, fetchCancel := context.WithTimeout(ctx, timeout)
162+
defer fetchCancel()
163+
164+
prebuildMetrics, err := mc.database.GetPrebuildMetrics(fetchCtx)
165+
if err != nil {
166+
return xerrors.Errorf("fetch prebuild metrics: %w", err)
167+
}
168+
169+
snapshot, err := mc.snapshotter.SnapshotState(fetchCtx, mc.database)
170+
if err != nil {
171+
return xerrors.Errorf("snapshot state: %w", err)
172+
}
173+
mc.logger.Debug(ctx, "fetched prebuilds metrics state")
174+
175+
mc.latestState.Store(&state{
176+
prebuildMetrics: prebuildMetrics,
177+
snapshot: snapshot,
178+
})
179+
return nil
180+
}

enterprise/coderd/prebuilds/metricscollector_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/coder/quartz"
1717

1818
"github.com/coder/coder/v2/coderd/database"
19+
"github.com/coder/coder/v2/coderd/database/dbauthz"
1920
"github.com/coder/coder/v2/coderd/database/dbgen"
2021
"github.com/coder/coder/v2/coderd/database/dbtestutil"
2122
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
@@ -248,6 +249,10 @@ func TestMetricsCollector(t *testing.T) {
248249
setupTestDBWorkspaceAgent(t, db, workspace.ID, eligible)
249250
}
250251

252+
// Force an update to the metrics state to allow the collector to collect fresh metrics.
253+
// nolint:gocritic // Authz context needed to retrieve state.
254+
require.NoError(t, collector.UpdateState(dbauthz.AsPrebuildsOrchestrator(ctx), testutil.WaitLong))
255+
251256
metricsFamilies, err := registry.Gather()
252257
require.NoError(t, err)
253258

enterprise/coderd/prebuilds/reconcile.go

+5
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ func (c *StoreReconciler) Run(ctx context.Context) {
9797
ctx, cancel := context.WithCancelCause(dbauthz.AsPrebuildsOrchestrator(ctx))
9898
c.cancelFn = cancel
9999

100+
// Start updating metrics in the background.
101+
if c.metrics != nil {
102+
go c.metrics.BackgroundFetch(ctx, metricsUpdateInterval, metricsUpdateTimeout)
103+
}
104+
100105
// Everything is in place, reconciler can now be considered as running.
101106
//
102107
// NOTE: without this atomic bool, Stop might race with Run for the c.cancelFn above.

0 commit comments

Comments
 (0)