Skip to content

feat: add prebuilds metrics collector #17547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions coderd/prebuilds/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (

"github.com/google/uuid"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/database"
)

var ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt workspaces found")
Expand All @@ -25,12 +27,23 @@ type ReconciliationOrchestrator interface {
}

type Reconciler interface {
StateSnapshotter

// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
// It takes a global snapshot of the system state and then reconciles each preset
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
ReconcileAll(ctx context.Context) error
}

// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
type StateSnapshotter interface {
// SnapshotState captures the current state of all prebuilds across templates.
// It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
// each representing the state of prebuilds for a specific preset.
// MUST be called inside a repeatable-read transaction.
SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
}

type Claimer interface {
Claim(ctx context.Context, userID uuid.UUID, name string, presetID uuid.UUID) (*uuid.UUID, error)
Initiator() uuid.UUID
Expand Down
2 changes: 1 addition & 1 deletion enterprise/coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,6 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio
}

reconciler := prebuilds.NewStoreReconciler(api.Database, api.Pubsub, api.DeploymentValues.Prebuilds,
api.Logger.Named("prebuilds"), quartz.NewReal())
api.Logger.Named("prebuilds"), quartz.NewReal(), api.PrometheusRegistry)
return reconciler, prebuilds.EnterpriseClaimer{}
}
5 changes: 3 additions & 2 deletions enterprise/coderd/prebuilds/claim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"

Expand Down Expand Up @@ -142,7 +143,7 @@ func TestClaimPrebuild(t *testing.T) {
EntitlementsUpdateInterval: time.Second,
})

reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(spy)
api.AGPL.PrebuildsClaimer.Store(&claimer)

Expand Down Expand Up @@ -419,7 +420,7 @@ func TestClaimPrebuild_CheckDifferentErrors(t *testing.T) {
EntitlementsUpdateInterval: time.Second,
})

reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), api.PrometheusRegistry)
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(errorStore)
api.AGPL.PrebuildsClaimer.Store(&claimer)

Expand Down
123 changes: 123 additions & 0 deletions enterprise/coderd/prebuilds/metricscollector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package prebuilds

import (
"context"
"time"

"cdr.dev/slog"

"github.com/prometheus/client_golang/prometheus"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/prebuilds"
)

var (
labels = []string{"template_name", "preset_name", "organization_name"}
createdPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_created_total",
"Total number of prebuilt workspaces that have been created to meet the desired instance count of each "+
"template preset.",
labels,
nil,
)
failedPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_failed_total",
"Total number of prebuilt workspaces that failed to build.",
labels,
nil,
)
claimedPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_claimed_total",
"Total number of prebuilt workspaces which were claimed by users. Claiming refers to creating a workspace "+
"with a preset selected for which eligible prebuilt workspaces are available and one is reassigned to a user.",
labels,
nil,
)
desiredPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_desired",
"Target number of prebuilt workspaces that should be available for each template preset.",
labels,
nil,
)
runningPrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_running",
"Current number of prebuilt workspaces that are in a running state. These workspaces have started "+
"successfully but may not yet be claimable by users (see coderd_prebuilt_workspaces_eligible).",
labels,
nil,
)
eligiblePrebuildsDesc = prometheus.NewDesc(
"coderd_prebuilt_workspaces_eligible",
"Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that "+
"have completed their build process with their agent reporting 'ready' status.",
labels,
nil,
)
)

type MetricsCollector struct {
database database.Store
logger slog.Logger
snapshotter prebuilds.StateSnapshotter
}

var _ prometheus.Collector = new(MetricsCollector)

func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
return &MetricsCollector{
database: db,
logger: logger.Named("prebuilds_metrics_collector"),
snapshotter: snapshotter,
}
}

func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
descCh <- createdPrebuildsDesc
descCh <- failedPrebuildsDesc
descCh <- claimedPrebuildsDesc
descCh <- desiredPrebuildsDesc
descCh <- runningPrebuildsDesc
descCh <- eligiblePrebuildsDesc
}

func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
// nolint:gocritic // We need to set an authz context to read metrics from the db.
ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
defer cancel()
prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
if err != nil {
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
return
}

for _, metric := range prebuildMetrics {
metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
}

snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database)
if err != nil {
mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err))
return
}

for _, preset := range snapshot.Presets {
if !preset.UsingActiveVersion {
continue
}

presetSnapshot, err := snapshot.FilterByPreset(preset.ID)
if err != nil {
mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err))
continue
}
state := presetSnapshot.CalculateState()

metricsCh <- prometheus.MustNewConstMetric(desiredPrebuildsDesc, prometheus.GaugeValue, float64(state.Desired), preset.TemplateName, preset.Name, preset.OrganizationName)
metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName)
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
}
}
Loading
Loading