@@ -35,6 +35,9 @@ type MetricsAggregator struct {
35
35
36
36
collectCh chan (chan <- prometheus.Metric )
37
37
updateCh chan updateRequest
38
+
39
+ updateHistogram prometheus.Histogram
40
+ cleanupHistogram prometheus.Histogram
38
41
}
39
42
40
43
type updateRequest struct {
@@ -59,18 +62,46 @@ type annotatedMetric struct {
59
62
60
63
var _ prometheus.Collector = new (MetricsAggregator )
61
64
62
- func NewMetricsAggregator (logger slog.Logger , duration time.Duration ) * MetricsAggregator {
65
+ func NewMetricsAggregator (logger slog.Logger , registerer prometheus. Registerer , duration time.Duration ) ( * MetricsAggregator , error ) {
63
66
metricsCleanupInterval := defaultMetricsCleanupInterval
64
67
if duration > 0 {
65
68
metricsCleanupInterval = duration
66
69
}
70
+
71
+ updateHistogram := prometheus .NewHistogram (prometheus.HistogramOpts {
72
+ Namespace : "coderd" ,
73
+ Subsystem : "prometheusmetrics" ,
74
+ Name : "metrics_aggregator_execution_update_seconds" ,
75
+ Help : "Histogram for duration of metrics aggregator update in seconds." ,
76
+ Buckets : []float64 {0.001 , 0.005 , 0.010 , 0.025 , 0.050 , 0.100 , 0.500 , 1 , 5 , 10 , 30 },
77
+ })
78
+ err := registerer .Register (updateHistogram )
79
+ if err != nil {
80
+ return nil , err
81
+ }
82
+
83
+ cleanupHistogram := prometheus .NewHistogram (prometheus.HistogramOpts {
84
+ Namespace : "coderd" ,
85
+ Subsystem : "prometheusmetrics" ,
86
+ Name : "metrics_aggregator_execution_cleanup_seconds" ,
87
+ Help : "Histogram for duration of metrics aggregator cleanup in seconds." ,
88
+ Buckets : []float64 {0.001 , 0.005 , 0.010 , 0.025 , 0.050 , 0.100 , 0.500 , 1 , 5 , 10 , 30 },
89
+ })
90
+ err = registerer .Register (cleanupHistogram )
91
+ if err != nil {
92
+ return nil , err
93
+ }
94
+
67
95
return & MetricsAggregator {
68
96
log : logger ,
69
97
metricsCleanupInterval : metricsCleanupInterval ,
70
98
71
99
collectCh : make (chan (chan <- prometheus.Metric ), sizeCollectCh ),
72
100
updateCh : make (chan updateRequest , sizeUpdateCh ),
73
- }
101
+
102
+ updateHistogram : updateHistogram ,
103
+ cleanupHistogram : cleanupHistogram ,
104
+ }, nil
74
105
}
75
106
76
107
func (ma * MetricsAggregator ) Run (ctx context.Context ) func () {
@@ -87,6 +118,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
87
118
case req := <- ma .updateCh :
88
119
ma .log .Debug (ctx , "metrics aggregator: update metrics" )
89
120
121
+ timer := prometheus .NewTimer (ma .updateHistogram )
90
122
UpdateLoop:
91
123
for _ , m := range req .metrics {
92
124
for i , q := range ma .queue {
@@ -107,6 +139,8 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
107
139
expiryDate : req .timestamp .Add (ma .metricsCleanupInterval ),
108
140
})
109
141
}
142
+
143
+ timer .ObserveDuration ()
110
144
case inputCh := <- ma .collectCh :
111
145
ma .log .Debug (ctx , "metrics aggregator: collect metrics" )
112
146
@@ -124,6 +158,8 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
124
158
case <- cleanupTicker .C :
125
159
ma .log .Debug (ctx , "metrics aggregator: clean expired metrics" )
126
160
161
+ timer := prometheus .NewTimer (ma .cleanupHistogram )
162
+
127
163
now := time .Now ()
128
164
129
165
var hasExpiredMetrics bool
@@ -134,20 +170,21 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
134
170
}
135
171
}
136
172
137
- if ! hasExpiredMetrics {
138
- continue
139
- }
140
-
141
- var j int
142
- fresh := make ([]annotatedMetric , len (ma .queue ))
143
- for _ , m := range ma .queue {
144
- if m .expiryDate .After (now ) {
145
- fresh [j ] = m
146
- j ++
173
+ if hasExpiredMetrics {
174
+ var j int
175
+ fresh := make ([]annotatedMetric , len (ma .queue ))
176
+ for _ , m := range ma .queue {
177
+ if m .expiryDate .After (now ) {
178
+ fresh [j ] = m
179
+ j ++
180
+ }
147
181
}
182
+ fresh = fresh [:j ]
183
+ ma .queue = fresh
148
184
}
149
- fresh = fresh [:j ]
150
- ma .queue = fresh
185
+
186
+ timer .ObserveDuration ()
187
+ cleanupTicker .Reset (ma .metricsCleanupInterval )
151
188
case <- ctx .Done ():
152
189
ma .log .Debug (ctx , "metrics aggregator: is stopped" )
153
190
return
0 commit comments