@@ -2,14 +2,17 @@ package prebuilds
2
2
3
3
import (
4
4
"context"
5
+ "fmt"
6
+ "sync/atomic"
5
7
"time"
6
8
7
- "cdr.dev/slog"
8
-
9
9
"github.com/prometheus/client_golang/prometheus"
10
+ "golang.org/x/xerrors"
11
+
12
+ "cdr.dev/slog"
10
13
11
14
"github.com/coder/coder/v2/coderd/database"
12
- "github.com/coder/coder/v2/coderd/database/dbauthz "
15
+ "github.com/coder/coder/v2/coderd/database/dbtime "
13
16
"github.com/coder/coder/v2/coderd/prebuilds"
14
17
)
15
18
@@ -55,20 +58,34 @@ var (
55
58
labels ,
56
59
nil ,
57
60
)
61
+ lastUpdateDesc = prometheus .NewDesc (
62
+ "coderd_prebuilt_workspaces_metrics_last_updated" ,
63
+ "The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached." ,
64
+ []string {},
65
+ nil ,
66
+ )
67
+ )
68
+
69
+ const (
70
+ metricsUpdateInterval = time .Second * 15
71
+ metricsUpdateTimeout = time .Second * 10
58
72
)
59
73
60
74
type MetricsCollector struct {
61
75
database database.Store
62
76
logger slog.Logger
63
77
snapshotter prebuilds.StateSnapshotter
78
+
79
+ latestState atomic.Pointer [metricsState ]
64
80
}
65
81
66
82
var _ prometheus.Collector = new (MetricsCollector )
67
83
68
84
func NewMetricsCollector (db database.Store , logger slog.Logger , snapshotter prebuilds.StateSnapshotter ) * MetricsCollector {
85
+ log := logger .Named ("prebuilds_metrics_collector" )
69
86
return & MetricsCollector {
70
87
database : db ,
71
- logger : logger . Named ( "prebuilds_metrics_collector" ) ,
88
+ logger : log ,
72
89
snapshotter : snapshotter ,
73
90
}
74
91
}
@@ -80,38 +97,34 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
80
97
descCh <- desiredPrebuildsDesc
81
98
descCh <- runningPrebuildsDesc
82
99
descCh <- eligiblePrebuildsDesc
100
+ descCh <- lastUpdateDesc
83
101
}
84
102
103
+ // Collect uses the cached state to set configured metrics.
104
+ // The state is cached because this function can be called multiple times per second and retrieving the current state
105
+ // is an expensive operation.
85
106
func (mc * MetricsCollector ) Collect (metricsCh chan <- prometheus.Metric ) {
86
- // nolint:gocritic // We need to set an authz context to read metrics from the db.
87
- ctx , cancel := context .WithTimeout (dbauthz .AsPrebuildsOrchestrator (context .Background ()), 10 * time .Second )
88
- defer cancel ()
89
- prebuildMetrics , err := mc .database .GetPrebuildMetrics (ctx )
90
- if err != nil {
91
- mc .logger .Error (ctx , "failed to get prebuild metrics" , slog .Error (err ))
107
+ currentState := mc .latestState .Load () // Grab a copy; it's ok if it goes stale during the course of this func.
108
+ if currentState == nil {
109
+ mc .logger .Warn (context .Background (), "failed to set prebuilds metrics; state not set" )
110
+ metricsCh <- prometheus .MustNewConstMetric (lastUpdateDesc , prometheus .GaugeValue , 0 )
92
111
return
93
112
}
94
113
95
- for _ , metric := range prebuildMetrics {
114
+ for _ , metric := range currentState . prebuildMetrics {
96
115
metricsCh <- prometheus .MustNewConstMetric (createdPrebuildsDesc , prometheus .CounterValue , float64 (metric .CreatedCount ), metric .TemplateName , metric .PresetName , metric .OrganizationName )
97
116
metricsCh <- prometheus .MustNewConstMetric (failedPrebuildsDesc , prometheus .CounterValue , float64 (metric .FailedCount ), metric .TemplateName , metric .PresetName , metric .OrganizationName )
98
117
metricsCh <- prometheus .MustNewConstMetric (claimedPrebuildsDesc , prometheus .CounterValue , float64 (metric .ClaimedCount ), metric .TemplateName , metric .PresetName , metric .OrganizationName )
99
118
}
100
119
101
- snapshot , err := mc .snapshotter .SnapshotState (ctx , mc .database )
102
- if err != nil {
103
- mc .logger .Error (ctx , "failed to get latest prebuild state" , slog .Error (err ))
104
- return
105
- }
106
-
107
- for _ , preset := range snapshot .Presets {
120
+ for _ , preset := range currentState .snapshot .Presets {
108
121
if ! preset .UsingActiveVersion {
109
122
continue
110
123
}
111
124
112
- presetSnapshot , err := snapshot .FilterByPreset (preset .ID )
125
+ presetSnapshot , err := currentState . snapshot .FilterByPreset (preset .ID )
113
126
if err != nil {
114
- mc .logger .Error (ctx , "failed to filter by preset" , slog .Error (err ))
127
+ mc .logger .Error (context . Background () , "failed to filter by preset" , slog .Error (err ))
115
128
continue
116
129
}
117
130
state := presetSnapshot .CalculateState ()
@@ -120,4 +133,57 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
120
133
metricsCh <- prometheus .MustNewConstMetric (runningPrebuildsDesc , prometheus .GaugeValue , float64 (state .Actual ), preset .TemplateName , preset .Name , preset .OrganizationName )
121
134
metricsCh <- prometheus .MustNewConstMetric (eligiblePrebuildsDesc , prometheus .GaugeValue , float64 (state .Eligible ), preset .TemplateName , preset .Name , preset .OrganizationName )
122
135
}
136
+
137
+ metricsCh <- prometheus .MustNewConstMetric (lastUpdateDesc , prometheus .GaugeValue , float64 (currentState .createdAt .Unix ()))
138
+ }
139
+
140
+ type metricsState struct {
141
+ prebuildMetrics []database.GetPrebuildMetricsRow
142
+ snapshot * prebuilds.GlobalSnapshot
143
+ createdAt time.Time
144
+ }
145
+
146
+ // BackgroundFetch updates the metrics state every given interval.
147
+ func (mc * MetricsCollector ) BackgroundFetch (ctx context.Context , updateInterval , updateTimeout time.Duration ) {
148
+ tick := time .NewTicker (time .Nanosecond )
149
+ defer tick .Stop ()
150
+
151
+ for {
152
+ select {
153
+ case <- ctx .Done ():
154
+ return
155
+ case <- tick .C :
156
+ // Tick immediately, then set regular interval.
157
+ tick .Reset (updateInterval )
158
+
159
+ if err := mc .UpdateState (ctx , updateTimeout ); err != nil {
160
+ mc .logger .Error (ctx , "failed to update prebuilds metrics state" , slog .Error (err ))
161
+ }
162
+ }
163
+ }
164
+ }
165
+
166
+ // UpdateState builds the current metrics state.
167
+ func (mc * MetricsCollector ) UpdateState (ctx context.Context , timeout time.Duration ) error {
168
+ start := time .Now ()
169
+ fetchCtx , fetchCancel := context .WithTimeout (ctx , timeout )
170
+ defer fetchCancel ()
171
+
172
+ prebuildMetrics , err := mc .database .GetPrebuildMetrics (fetchCtx )
173
+ if err != nil {
174
+ return xerrors .Errorf ("fetch prebuild metrics: %w" , err )
175
+ }
176
+
177
+ snapshot , err := mc .snapshotter .SnapshotState (fetchCtx , mc .database )
178
+ if err != nil {
179
+ return xerrors .Errorf ("snapshot state: %w" , err )
180
+ }
181
+ mc .logger .Debug (ctx , "fetched prebuilds metrics state" , slog .F ("duration_secs" , fmt .Sprintf ("%.2f" , time .Since (start ).Seconds ())))
182
+
183
+ mc .latestState .Store (& metricsState {
184
+ prebuildMetrics : prebuildMetrics ,
185
+ snapshot : snapshot ,
186
+ createdAt : dbtime .Now (),
187
+ })
188
+ return nil
123
189
}
0 commit comments