@@ -2,11 +2,13 @@ package prebuilds
2
2
3
3
import (
4
4
"context"
5
+ "sync/atomic"
5
6
"time"
6
7
7
- "cdr.dev/slog"
8
-
9
8
"github.com/prometheus/client_golang/prometheus"
9
+ "golang.org/x/xerrors"
10
+
11
+ "cdr.dev/slog"
10
12
11
13
"github.com/coder/coder/v2/coderd/database"
12
14
"github.com/coder/coder/v2/coderd/database/dbauthz"
@@ -57,18 +59,27 @@ var (
57
59
)
58
60
)
59
61
62
+ const (
63
+ metricsUpdateInterval = time .Second * 15
64
+ metricsUpdateTimeout = time .Second * 10
65
+ )
66
+
60
67
type MetricsCollector struct {
61
68
database database.Store
62
69
logger slog.Logger
63
70
snapshotter prebuilds.StateSnapshotter
71
+
72
+ latestState atomic.Pointer [state ]
64
73
}
65
74
66
75
var _ prometheus.Collector = new (MetricsCollector )
67
76
77
+ // NewMetricsCollector returns a
68
78
func NewMetricsCollector (db database.Store , logger slog.Logger , snapshotter prebuilds.StateSnapshotter ) * MetricsCollector {
79
+ log := logger .Named ("prebuilds_metrics_collector" )
69
80
return & MetricsCollector {
70
81
database : db ,
71
- logger : logger . Named ( "prebuilds_metrics_collector" ) ,
82
+ logger : log ,
72
83
snapshotter : snapshotter ,
73
84
}
74
85
}
@@ -82,34 +93,31 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
82
93
descCh <- eligiblePrebuildsDesc
83
94
}
84
95
96
+ // Collect uses the cached state to set configured metrics.
97
+ // The state is cached because this function can be called multiple times per second and retrieving the current state
98
+ // is an expensive operation.
85
99
func (mc * MetricsCollector ) Collect (metricsCh chan <- prometheus.Metric ) {
86
100
// nolint:gocritic // We need to set an authz context to read metrics from the db.
87
- ctx , cancel := context . WithTimeout ( dbauthz .AsPrebuildsOrchestrator (context .Background ()), 10 * time . Second )
88
- defer cancel ()
89
- prebuildMetrics , err := mc .database . GetPrebuildMetrics ( ctx )
90
- if err ! = nil {
91
- mc .logger .Error (ctx , "failed to get prebuild metrics" , slog . Error ( err ) )
101
+ ctx := dbauthz .AsPrebuildsOrchestrator (context .Background ())
102
+
103
+ currentState := mc .latestState . Load ( )
104
+ if currentState = = nil {
105
+ mc .logger .Warn (ctx , "failed to set prebuilds metrics; state not set" )
92
106
return
93
107
}
94
108
95
- for _ , metric := range prebuildMetrics {
109
+ for _ , metric := range currentState . prebuildMetrics {
96
110
metricsCh <- prometheus .MustNewConstMetric (createdPrebuildsDesc , prometheus .CounterValue , float64 (metric .CreatedCount ), metric .TemplateName , metric .PresetName , metric .OrganizationName )
97
111
metricsCh <- prometheus .MustNewConstMetric (failedPrebuildsDesc , prometheus .CounterValue , float64 (metric .FailedCount ), metric .TemplateName , metric .PresetName , metric .OrganizationName )
98
112
metricsCh <- prometheus .MustNewConstMetric (claimedPrebuildsDesc , prometheus .CounterValue , float64 (metric .ClaimedCount ), metric .TemplateName , metric .PresetName , metric .OrganizationName )
99
113
}
100
114
101
- snapshot , err := mc .snapshotter .SnapshotState (ctx , mc .database )
102
- if err != nil {
103
- mc .logger .Error (ctx , "failed to get latest prebuild state" , slog .Error (err ))
104
- return
105
- }
106
-
107
- for _ , preset := range snapshot .Presets {
115
+ for _ , preset := range currentState .snapshot .Presets {
108
116
if ! preset .UsingActiveVersion {
109
117
continue
110
118
}
111
119
112
- presetSnapshot , err := snapshot .FilterByPreset (preset .ID )
120
+ presetSnapshot , err := currentState . snapshot .FilterByPreset (preset .ID )
113
121
if err != nil {
114
122
mc .logger .Error (ctx , "failed to filter by preset" , slog .Error (err ))
115
123
continue
@@ -121,3 +129,52 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
121
129
metricsCh <- prometheus .MustNewConstMetric (eligiblePrebuildsDesc , prometheus .GaugeValue , float64 (state .Eligible ), preset .TemplateName , preset .Name , preset .OrganizationName )
122
130
}
123
131
}
132
+
133
+ type state struct {
134
+ prebuildMetrics []database.GetPrebuildMetricsRow
135
+ snapshot * prebuilds.GlobalSnapshot
136
+ }
137
+
138
+ // BackgroundFetch updates the metrics state every given interval.
139
+ func (mc * MetricsCollector ) BackgroundFetch (ctx context.Context , updateInterval , updateTimeout time.Duration ) {
140
+ tick := time .NewTicker (time .Nanosecond )
141
+ defer tick .Stop ()
142
+
143
+ for {
144
+ select {
145
+ case <- ctx .Done ():
146
+ return
147
+ case <- tick .C :
148
+ // Tick immediately, then set regular interval.
149
+ tick .Reset (updateInterval )
150
+
151
+ if err := mc .UpdateState (ctx , updateTimeout ); err != nil {
152
+ mc .logger .Error (ctx , "failed to update prebuilds metrics state" , slog .Error (err ))
153
+ }
154
+ }
155
+ }
156
+ }
157
+
158
+ // UpdateState builds the current metrics state.
159
+ func (mc * MetricsCollector ) UpdateState (ctx context.Context , timeout time.Duration ) error {
160
+ mc .logger .Debug (ctx , "fetching prebuilds metrics state" )
161
+ fetchCtx , fetchCancel := context .WithTimeout (ctx , timeout )
162
+ defer fetchCancel ()
163
+
164
+ prebuildMetrics , err := mc .database .GetPrebuildMetrics (fetchCtx )
165
+ if err != nil {
166
+ return xerrors .Errorf ("fetch prebuild metrics: %w" , err )
167
+ }
168
+
169
+ snapshot , err := mc .snapshotter .SnapshotState (fetchCtx , mc .database )
170
+ if err != nil {
171
+ return xerrors .Errorf ("snapshot state: %w" , err )
172
+ }
173
+ mc .logger .Debug (ctx , "fetched prebuilds metrics state" )
174
+
175
+ mc .latestState .Store (& state {
176
+ prebuildMetrics : prebuildMetrics ,
177
+ snapshot : snapshot ,
178
+ })
179
+ return nil
180
+ }
0 commit comments