Skip to content

Commit 2431bbe

Browse files
committed
Add metrics collector
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
1 parent b6f378c commit 2431bbe

File tree

7 files changed

+470
-30
lines changed

7 files changed

+470
-30
lines changed

coderd/prebuilds/api.go

+11-6
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,15 @@ type ReconciliationOrchestrator interface {
3232
// All database operations must be performed within repeatable-read transactions
3333
// to ensure consistency.
3434
type Reconciler interface {
35+
StateSnapshotter
36+
3537
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
3638
// It takes a global snapshot of the system state and then reconciles each preset
3739
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
3840
// For more fine-grained control, you can use the lower-level methods SnapshotState
3941
// and ReconcilePreset directly.
4042
ReconcileAll(ctx context.Context) error
4143

42-
// SnapshotState captures the current state of all prebuilds across templates.
43-
// It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
44-
// each representing the state of prebuilds for a specific preset.
45-
// MUST be called inside a repeatable-read transaction.
46-
SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
47-
4844
// ReconcilePreset handles a single PresetSnapshot, determining and executing
4945
// the required actions (creating or deleting prebuilds) based on the current state.
5046
// MUST be called inside a repeatable-read transaction.
@@ -57,6 +53,15 @@ type Reconciler interface {
5753
CalculateActions(ctx context.Context, state PresetSnapshot) (*ReconciliationActions, error)
5854
}
5955

56+
// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
57+
type StateSnapshotter interface {
58+
// SnapshotState captures the current state of all prebuilds across templates.
59+
// It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
60+
// each representing the state of prebuilds for a specific preset.
61+
// MUST be called inside a repeatable-read transaction.
62+
SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
63+
}
64+
6065
type Claimer interface {
6166
Claim(ctx context.Context, store database.Store, userID uuid.UUID, name string, presetID uuid.UUID) (*uuid.UUID, error)
6267
Initiator() uuid.UUID

enterprise/coderd/coderd.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,6 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio
11651165
}
11661166

11671167
reconciler := prebuilds.NewStoreReconciler(api.Database, api.Pubsub, api.DeploymentValues.Prebuilds,
1168-
api.Logger.Named("prebuilds"), quartz.NewReal())
1168+
api.Logger.Named("prebuilds"), quartz.NewReal(), api.PrometheusRegistry)
11691169
return reconciler, prebuilds.EnterpriseClaimer{}
11701170
}

enterprise/coderd/prebuilds/claim_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"time"
1010

1111
"github.com/google/uuid"
12+
"github.com/prometheus/client_golang/prometheus"
1213
"github.com/stretchr/testify/require"
1314

1415
"github.com/coder/quartz"
@@ -115,7 +116,7 @@ func TestClaimPrebuild(t *testing.T) {
115116
EntitlementsUpdateInterval: time.Second,
116117
})
117118

118-
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
119+
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.DefaultRegisterer)
119120
var claimer agplprebuilds.Claimer = &prebuilds.EnterpriseClaimer{}
120121
api.AGPL.PrebuildsClaimer.Store(&claimer)
121122

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package prebuilds
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"cdr.dev/slog"
8+
9+
"github.com/prometheus/client_golang/prometheus"
10+
11+
"github.com/coder/coder/v2/coderd/database"
12+
"github.com/coder/coder/v2/coderd/database/dbauthz"
13+
"github.com/coder/coder/v2/coderd/prebuilds"
14+
)
15+
16+
var (
17+
labels = []string{"template_name", "preset_name", "organization_name"}
18+
createdPrebuildsDesc = prometheus.NewDesc(
19+
"coderd_prebuilds_created_total",
20+
"The number of prebuilds that have been created to meet the desired count set by presets.",
21+
labels,
22+
nil,
23+
)
24+
failedPrebuildsDesc = prometheus.NewDesc(
25+
"coderd_prebuilds_failed_total",
26+
"The number of prebuilds that failed to build during creation.",
27+
labels,
28+
nil,
29+
)
30+
claimedPrebuildsDesc = prometheus.NewDesc(
31+
"coderd_prebuilds_claimed_total",
32+
"The number of prebuilds that were claimed by a user. Each count means that a user created a workspace using a preset and was assigned a prebuild instead of a brand new workspace.",
33+
labels,
34+
nil,
35+
)
36+
usedPresetsDesc = prometheus.NewDesc(
37+
"coderd_prebuilds_used_presets",
38+
"The number of times a preset was used to build a prebuild.",
39+
labels,
40+
nil,
41+
)
42+
desiredPrebuildsDesc = prometheus.NewDesc(
43+
"coderd_prebuilds_desired",
44+
"The number of prebuilds desired by each preset of each template.",
45+
labels,
46+
nil,
47+
)
48+
runningPrebuildsDesc = prometheus.NewDesc(
49+
"coderd_prebuilds_running",
50+
"The number of prebuilds that are currently running. Running prebuilds have successfully started, but they may not be ready to be claimed by a user yet.",
51+
labels,
52+
nil,
53+
)
54+
eligiblePrebuildsDesc = prometheus.NewDesc(
55+
"coderd_prebuilds_eligible",
56+
"The number of eligible prebuilds. Eligible prebuilds are prebuilds that are ready to be claimed by a user.",
57+
labels,
58+
nil,
59+
)
60+
)
61+
62+
type MetricsCollector struct {
63+
database database.Store
64+
logger slog.Logger
65+
snapshotter prebuilds.StateSnapshotter
66+
}
67+
68+
var _ prometheus.Collector = new(MetricsCollector)
69+
70+
func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
71+
return &MetricsCollector{
72+
database: db,
73+
logger: logger.Named("prebuilds_metrics_collector"),
74+
snapshotter: snapshotter,
75+
}
76+
}
77+
78+
func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
79+
descCh <- createdPrebuildsDesc
80+
descCh <- failedPrebuildsDesc
81+
descCh <- claimedPrebuildsDesc
82+
descCh <- usedPresetsDesc
83+
descCh <- desiredPrebuildsDesc
84+
descCh <- runningPrebuildsDesc
85+
descCh <- eligiblePrebuildsDesc
86+
}
87+
88+
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
89+
ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
90+
defer cancel()
91+
// nolint:gocritic // just until we get back to this
92+
prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
93+
if err != nil {
94+
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
95+
return
96+
}
97+
98+
for _, metric := range prebuildMetrics {
99+
metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
100+
metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
101+
metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
102+
}
103+
104+
snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database)
105+
if err != nil {
106+
mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err))
107+
return
108+
}
109+
110+
for _, preset := range snapshot.Presets {
111+
if !preset.UsingActiveVersion {
112+
continue
113+
}
114+
115+
presetSnapshot, err := snapshot.FilterByPreset(preset.ID)
116+
if err != nil {
117+
mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err))
118+
continue
119+
}
120+
state := presetSnapshot.CalculateState()
121+
122+
metricsCh <- prometheus.MustNewConstMetric(desiredPrebuildsDesc, prometheus.GaugeValue, float64(state.Desired), preset.TemplateName, preset.Name, preset.OrganizationName)
123+
metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName)
124+
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
125+
}
126+
}

0 commit comments

Comments
 (0)