@@ -2,13 +2,20 @@ package prometheusmetrics
2
2
3
3
import (
4
4
"context"
5
+ "fmt"
6
+ "log"
7
+ "strconv"
8
+ "strings"
9
+ "sync/atomic"
5
10
"time"
6
11
7
12
"github.com/google/uuid"
8
13
"github.com/prometheus/client_golang/prometheus"
14
+ "tailscale.com/tailcfg"
9
15
10
16
"github.com/coder/coder/coderd"
11
17
"github.com/coder/coder/coderd/database"
18
+ "github.com/coder/coder/tailnet"
12
19
)
13
20
14
21
// ActiveUsers tracks the number of users that have authenticated within the past hour.
@@ -108,7 +115,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
108
115
}
109
116
110
117
// Agents tracks the total number of workspaces with labels on status.
111
- func Agents (ctx context.Context , registerer prometheus.Registerer , db database.Store , duration time.Duration ) (context.CancelFunc , error ) {
118
+ func Agents (ctx context.Context , registerer prometheus.Registerer , db database.Store , coordinator * atomic. Pointer [tailnet. Coordinator ], derpMap * tailcfg. DERPMap , duration time.Duration ) (context.CancelFunc , error ) {
112
119
if duration == 0 {
113
120
duration = 15 * time .Second // TODO 5 * time.Minute
114
121
}
@@ -124,23 +131,26 @@ func Agents(ctx context.Context, registerer prometheus.Registerer, db database.S
124
131
return nil , err
125
132
}
126
133
127
- agentsUserLatenciesHistogram := prometheus .NewHistogramVec (prometheus.HistogramOpts {
134
+ agentsUserLatenciesGauge := prometheus .NewGaugeVec (prometheus.GaugeOpts {
128
135
Namespace : "coderd" ,
129
136
Subsystem : "agents" ,
130
137
Name : "user_latencies_seconds" ,
131
138
Help : "The user's agent latency in seconds." ,
132
- Buckets : []float64 {0.001 , 0.005 , 0.010 , 0.025 , 0.050 , 0.100 , 0.500 , 1 , 5 , 10 , 30 },
133
- }, []string {"agent_id" , "workspace" , "connection_type" , "ide" })
134
- err = registerer .Register (agentsUserLatenciesHistogram )
139
+ }, []string {"agent_id" , "workspace_name" , "derp_region" , "preferred" })
140
+ err = registerer .Register (agentsUserLatenciesGauge )
135
141
if err != nil {
136
142
return nil , err
137
143
}
138
144
145
+ // FIXME connection_type ide
146
+
139
147
ctx , cancelFunc := context .WithCancel (ctx )
140
148
ticker := time .NewTicker (duration )
141
149
go func () {
142
150
defer ticker .Stop ()
143
151
for {
152
+ log .Println ("Agents!!!" )
153
+
144
154
select {
145
155
case <- ctx .Done ():
146
156
return
@@ -151,30 +161,70 @@ func Agents(ctx context.Context, registerer prometheus.Registerer, db database.S
151
161
152
162
builds , err := db .GetLatestWorkspaceBuilds (ctx )
153
163
if err != nil {
164
+ log .Println ("1" , err )
154
165
continue
155
166
}
156
167
157
168
agentsConnectionGauge .Reset ()
169
+ agentsUserLatenciesGauge .Reset ()
158
170
for _ , build := range builds {
159
171
workspace , err := db .GetWorkspaceByID (ctx , build .WorkspaceID )
160
172
if err != nil {
173
+ log .Println ("2" , err )
161
174
continue
162
175
}
163
176
164
177
agents , err := db .GetWorkspaceAgentsInLatestBuildByWorkspaceID (ctx , build .WorkspaceID )
165
178
if err != nil {
179
+ log .Println ("3" , err )
166
180
continue
167
181
}
168
182
169
183
if len (agents ) == 0 {
170
184
continue
171
185
}
172
186
187
+ // FIXME publish workspace even if no agents
188
+
173
189
for _ , agent := range agents {
174
190
connectionStatus := agent .Status (6 * time .Second )
175
191
176
192
// FIXME AgentInactiveDisconnectTimeout
193
+ log .Println ("with value " + agent .Name )
177
194
agentsConnectionGauge .WithLabelValues (agent .Name , workspace .Name , string (connectionStatus .Status )).Set (1 )
195
+
196
+ node := (* coordinator .Load ()).Node (agent .ID )
197
+ if node != nil {
198
+ log .Println ("coordinator" )
199
+
200
+ for rawRegion , latency := range node .DERPLatency {
201
+ log .Println (rawRegion , latency )
202
+
203
+ regionParts := strings .SplitN (rawRegion , "-" , 2 )
204
+ regionID , err := strconv .Atoi (regionParts [0 ])
205
+ if err != nil {
206
+ continue // xerrors.Errorf("convert derp region id %q: %w", rawRegion, err)
207
+ }
208
+ region , found := derpMap .Regions [regionID ]
209
+ if ! found {
210
+ // It's possible that a workspace agent is using an old DERPMap
211
+ // and reports regions that do not exist. If that's the case,
212
+ // report the region as unknown!
213
+ region = & tailcfg.DERPRegion {
214
+ RegionID : regionID ,
215
+ RegionName : fmt .Sprintf ("Unnamed %d" , regionID ),
216
+ }
217
+ }
218
+
219
+ log .Println (region , latency )
220
+ agentsUserLatenciesGauge .WithLabelValues (agent .Name , workspace .Name , region .RegionName , fmt .Sprintf ("%v" , node .PreferredDERP == regionID )).Set (latency )
221
+ }
222
+ } else {
223
+ log .Println ("node is null" )
224
+ }
225
+
226
+ // FIXME publish agent even if DERP is missing
227
+ // FIXME IDE?
178
228
}
179
229
}
180
230
}
0 commit comments