@@ -24,10 +24,12 @@ import (
24
24
"github.com/coder/coder/v2/tailnet"
25
25
)
26
26
27
+ const defaultRefreshRate = time .Minute
28
+
27
29
// ActiveUsers tracks the number of users that have authenticated within the past hour.
28
30
func ActiveUsers (ctx context.Context , registerer prometheus.Registerer , db database.Store , duration time.Duration ) (func (), error ) {
29
31
if duration == 0 {
30
- duration = 5 * time . Minute
32
+ duration = defaultRefreshRate
31
33
}
32
34
33
35
gauge := prometheus .NewGauge (prometheus.GaugeOpts {
@@ -72,36 +74,42 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
72
74
}
73
75
74
76
// Workspaces tracks the total number of workspaces with labels on status.
75
- func Workspaces (ctx context.Context , registerer prometheus.Registerer , db database.Store , duration time.Duration ) (func (), error ) {
77
+ func Workspaces (ctx context.Context , logger slog. Logger , registerer prometheus.Registerer , db database.Store , duration time.Duration ) (func (), error ) {
76
78
if duration == 0 {
77
- duration = 5 * time . Minute
79
+ duration = defaultRefreshRate
78
80
}
79
81
80
- gauge := prometheus .NewGaugeVec (prometheus.GaugeOpts {
82
+ workspaceLatestBuildTotals := prometheus .NewGaugeVec (prometheus.GaugeOpts {
81
83
Namespace : "coderd" ,
82
84
Subsystem : "api" ,
83
85
Name : "workspace_latest_build_total" ,
84
- Help : "The latest workspace builds with a status." ,
86
+ Help : "The current number of workspace builds by status." ,
85
87
}, []string {"status" })
86
- err := registerer .Register (gauge )
87
- if err != nil {
88
+ if err := registerer .Register (workspaceLatestBuildTotals ); err != nil {
89
+ return nil , err
90
+ }
91
+
92
+ workspaceLatestBuildStatuses := prometheus .NewGaugeVec (prometheus.GaugeOpts {
93
+ Namespace : "coderd" ,
94
+ Name : "workspace_latest_build_status" ,
95
+ Help : "The current workspace statuses by template, transition, and owner." ,
96
+ }, []string {"status" , "template_name" , "template_version" , "workspace_owner" , "workspace_transition" })
97
+ if err := registerer .Register (workspaceLatestBuildStatuses ); err != nil {
88
98
return nil , err
89
99
}
90
- // This exists so the prometheus metric exports immediately when set.
91
- // It helps with tests so they don't have to wait for a tick.
92
- gauge .WithLabelValues ("pending" ).Set (0 )
93
100
94
101
ctx , cancelFunc := context .WithCancel (ctx )
95
102
done := make (chan struct {})
96
103
97
- // Use time.Nanosecond to force an initial tick. It will be reset to the
98
- // correct duration after executing once.
99
- ticker := time .NewTicker (time .Nanosecond )
100
- doTick := func () {
101
- defer ticker .Reset (duration )
102
-
104
+ updateWorkspaceTotals := func () {
103
105
builds , err := db .GetLatestWorkspaceBuilds (ctx )
104
106
if err != nil {
107
+ if errors .Is (err , sql .ErrNoRows ) {
108
+ // clear all series if there are no database entries
109
+ workspaceLatestBuildTotals .Reset ()
110
+ }
111
+
112
+ logger .Warn (ctx , "failed to load latest workspace builds" , slog .Error (err ))
105
113
return
106
114
}
107
115
jobIDs := make ([]uuid.UUID , 0 , len (builds ))
@@ -110,16 +118,53 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
110
118
}
111
119
jobs , err := db .GetProvisionerJobsByIDs (ctx , jobIDs )
112
120
if err != nil {
121
+ ids := make ([]string , 0 , len (jobIDs ))
122
+ for _ , id := range jobIDs {
123
+ ids = append (ids , id .String ())
124
+ }
125
+
126
+ logger .Warn (ctx , "failed to load provisioner jobs" , slog .F ("ids" , ids ), slog .Error (err ))
113
127
return
114
128
}
115
129
116
- gauge .Reset ()
130
+ workspaceLatestBuildTotals .Reset ()
117
131
for _ , job := range jobs {
118
132
status := codersdk .ProvisionerJobStatus (job .JobStatus )
119
- gauge .WithLabelValues (string (status )).Add (1 )
133
+ workspaceLatestBuildTotals .WithLabelValues (string (status )).Add (1 )
120
134
}
121
135
}
122
136
137
+ updateWorkspaceStatuses := func () {
138
+ ws , err := db .GetWorkspaces (ctx , database.GetWorkspacesParams {
139
+ Deleted : false ,
140
+ WithSummary : false ,
141
+ })
142
+ if err != nil {
143
+ if errors .Is (err , sql .ErrNoRows ) {
144
+ // clear all series if there are no database entries
145
+ workspaceLatestBuildStatuses .Reset ()
146
+ }
147
+
148
+ logger .Warn (ctx , "failed to load active workspaces" , slog .Error (err ))
149
+ return
150
+ }
151
+
152
+ workspaceLatestBuildStatuses .Reset ()
153
+ for _ , w := range ws {
154
+ workspaceLatestBuildStatuses .WithLabelValues (string (w .LatestBuildStatus ), w .TemplateName , w .TemplateVersionName .String , w .Username , string (w .LatestBuildTransition )).Add (1 )
155
+ }
156
+ }
157
+
158
+ // Use time.Nanosecond to force an initial tick. It will be reset to the
159
+ // correct duration after executing once.
160
+ ticker := time .NewTicker (time .Nanosecond )
161
+ doTick := func () {
162
+ defer ticker .Reset (duration )
163
+
164
+ updateWorkspaceTotals ()
165
+ updateWorkspaceStatuses ()
166
+ }
167
+
123
168
go func () {
124
169
defer close (done )
125
170
defer ticker .Stop ()
@@ -141,7 +186,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
141
186
// Agents tracks the total number of workspaces with labels on status.
142
187
func Agents (ctx context.Context , logger slog.Logger , registerer prometheus.Registerer , db database.Store , coordinator * atomic.Pointer [tailnet.Coordinator ], derpMapFn func () * tailcfg.DERPMap , agentInactiveDisconnectTimeout , duration time.Duration ) (func (), error ) {
143
188
if duration == 0 {
144
- duration = 1 * time . Minute
189
+ duration = defaultRefreshRate
145
190
}
146
191
147
192
agentsGauge := NewCachedGaugeVec (prometheus .NewGaugeVec (prometheus.GaugeOpts {
@@ -330,7 +375,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
330
375
331
376
func AgentStats (ctx context.Context , logger slog.Logger , registerer prometheus.Registerer , db database.Store , initialCreateAfter time.Time , duration time.Duration , aggregateByLabels []string ) (func (), error ) {
332
377
if duration == 0 {
333
- duration = 1 * time . Minute
378
+ duration = defaultRefreshRate
334
379
}
335
380
336
381
if len (aggregateByLabels ) == 0 {
0 commit comments