Skip to content

Commit 1f5273b

Browse files
committed
histograms
1 parent 8df9eea commit 1f5273b

File tree

2 files changed

+56
-15
lines changed

2 files changed

+56
-15
lines changed

cli/server.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,11 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
724724
}
725725
defer closeAgentStatsFunc()
726726

727-
metricsAggregator := prometheusmetrics.NewMetricsAggregator(logger, 0)
727+
metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(logger, options.PrometheusRegistry, 0)
728+
if err != nil {
729+
return xerrors.Errorf("can't initialize metrics aggregator: %w", err)
730+
}
731+
728732
cancelMetricsAggregator := metricsAggregator.Run(ctx)
729733
defer cancelMetricsAggregator()
730734

coderd/prometheusmetrics/aggregator.go

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ type MetricsAggregator struct {
3535

3636
collectCh chan (chan<- prometheus.Metric)
3737
updateCh chan updateRequest
38+
39+
updateHistogram prometheus.Histogram
40+
cleanupHistogram prometheus.Histogram
3841
}
3942

4043
type updateRequest struct {
@@ -59,18 +62,46 @@ type annotatedMetric struct {
5962

6063
var _ prometheus.Collector = new(MetricsAggregator)
6164

62-
func NewMetricsAggregator(logger slog.Logger, duration time.Duration) *MetricsAggregator {
65+
func NewMetricsAggregator(logger slog.Logger, registerer prometheus.Registerer, duration time.Duration) (*MetricsAggregator, error) {
6366
metricsCleanupInterval := defaultMetricsCleanupInterval
6467
if duration > 0 {
6568
metricsCleanupInterval = duration
6669
}
70+
71+
updateHistogram := prometheus.NewHistogram(prometheus.HistogramOpts{
72+
Namespace: "coderd",
73+
Subsystem: "prometheusmetrics",
74+
Name: "metrics_aggregator_execution_update_seconds",
75+
Help: "Histogram for duration of metrics aggregator update in seconds.",
76+
Buckets: []float64{0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.500, 1, 5, 10, 30},
77+
})
78+
err := registerer.Register(updateHistogram)
79+
if err != nil {
80+
return nil, err
81+
}
82+
83+
cleanupHistogram := prometheus.NewHistogram(prometheus.HistogramOpts{
84+
Namespace: "coderd",
85+
Subsystem: "prometheusmetrics",
86+
Name: "metrics_aggregator_execution_cleanup_seconds",
87+
Help: "Histogram for duration of metrics aggregator cleanup in seconds.",
88+
Buckets: []float64{0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.500, 1, 5, 10, 30},
89+
})
90+
err = registerer.Register(cleanupHistogram)
91+
if err != nil {
92+
return nil, err
93+
}
94+
6795
return &MetricsAggregator{
6896
log: logger,
6997
metricsCleanupInterval: metricsCleanupInterval,
7098

7199
collectCh: make(chan (chan<- prometheus.Metric), sizeCollectCh),
72100
updateCh: make(chan updateRequest, sizeUpdateCh),
73-
}
101+
102+
updateHistogram: updateHistogram,
103+
cleanupHistogram: cleanupHistogram,
104+
}, nil
74105
}
75106

76107
func (ma *MetricsAggregator) Run(ctx context.Context) func() {
@@ -87,6 +118,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
87118
case req := <-ma.updateCh:
88119
ma.log.Debug(ctx, "metrics aggregator: update metrics")
89120

121+
timer := prometheus.NewTimer(ma.updateHistogram)
90122
UpdateLoop:
91123
for _, m := range req.metrics {
92124
for i, q := range ma.queue {
@@ -107,6 +139,8 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
107139
expiryDate: req.timestamp.Add(ma.metricsCleanupInterval),
108140
})
109141
}
142+
143+
timer.ObserveDuration()
110144
case inputCh := <-ma.collectCh:
111145
ma.log.Debug(ctx, "metrics aggregator: collect metrics")
112146

@@ -124,6 +158,8 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
124158
case <-cleanupTicker.C:
125159
ma.log.Debug(ctx, "metrics aggregator: clean expired metrics")
126160

161+
timer := prometheus.NewTimer(ma.cleanupHistogram)
162+
127163
now := time.Now()
128164

129165
var hasExpiredMetrics bool
@@ -134,20 +170,21 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
134170
}
135171
}
136172

137-
if !hasExpiredMetrics {
138-
continue
139-
}
140-
141-
var j int
142-
fresh := make([]annotatedMetric, len(ma.queue))
143-
for _, m := range ma.queue {
144-
if m.expiryDate.After(now) {
145-
fresh[j] = m
146-
j++
173+
if hasExpiredMetrics {
174+
var j int
175+
fresh := make([]annotatedMetric, len(ma.queue))
176+
for _, m := range ma.queue {
177+
if m.expiryDate.After(now) {
178+
fresh[j] = m
179+
j++
180+
}
147181
}
182+
fresh = fresh[:j]
183+
ma.queue = fresh
148184
}
149-
fresh = fresh[:j]
150-
ma.queue = fresh
185+
186+
timer.ObserveDuration()
187+
cleanupTicker.Reset(ma.metricsCleanupInterval)
151188
case <-ctx.Done():
152189
ma.log.Debug(ctx, "metrics aggregator: is stopped")
153190
return

0 commit comments

Comments
 (0)