@@ -258,7 +258,7 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
258
258
return result
259
259
}
260
260
261
- func convertInterval (i int64 ) time.Duration {
261
+ func adjustIntervalForTests (i int64 ) time.Duration {
262
262
// In tests we want to set shorter intervals because engineers are
263
263
// impatient.
264
264
base := time .Second
@@ -274,12 +274,14 @@ type metadataResultAndKey struct {
274
274
}
275
275
276
276
func (a * agent ) reportMetadataLoop (ctx context.Context ) {
277
- baseInterval := convertInterval (1 )
277
+ baseInterval := adjustIntervalForTests (1 )
278
+
279
+ const metadataLimit = 128
278
280
279
281
var (
280
282
baseTicker = time .NewTicker (baseInterval )
281
283
lastCollectedAts = make (map [string ]time.Time )
282
- metadataResults = make (chan metadataResultAndKey , 16 )
284
+ metadataResults = make (chan metadataResultAndKey , metadataLimit )
283
285
)
284
286
defer baseTicker .Stop ()
285
287
@@ -294,15 +296,13 @@ func (a *agent) reportMetadataLoop(ctx context.Context) {
294
296
a .logger .Error (ctx , "report metadata" , slog .Error (err ))
295
297
}
296
298
case <- baseTicker .C :
297
- break
298
299
}
299
300
300
- if len (metadataResults ) > cap ( metadataResults ) / 2 {
301
+ if len (metadataResults ) > 0 {
301
302
// If we're backpressured on sending back results, we risk
302
303
// runaway goroutine growth and/or overloading coderd. So,
303
- // we just skip the collection. Since we never update
304
- // the collections map, we'll retry the collection
305
- // on the next tick.
304
+ // we just skip the collection and give the loop another chance to
305
+ // post metadata.
306
306
a .logger .Debug (
307
307
ctx , "metadata collection backpressured" ,
308
308
slog .F ("queue_len" , len (metadataResults )),
@@ -314,6 +314,15 @@ func (a *agent) reportMetadataLoop(ctx context.Context) {
314
314
if manifest == nil {
315
315
continue
316
316
}
317
+
318
+ if len (manifest .Metadata ) > metadataLimit {
319
+ a .logger .Error (
320
+ ctx , "metadata limit exceeded" ,
321
+ slog .F ("limit" , metadataLimit ), slog .F ("got" , len (manifest .Metadata )),
322
+ )
323
+ continue
324
+ }
325
+
317
326
// If the manifest changes (e.g. on agent reconnect) we need to
318
327
// purge old cache values to prevent lastCollectedAt from growing
319
328
// boundlessly.
@@ -337,7 +346,7 @@ func (a *agent) reportMetadataLoop(ctx context.Context) {
337
346
continue
338
347
}
339
348
if collectedAt .Add (
340
- convertInterval (md .Interval ),
349
+ adjustIntervalForTests (md .Interval ),
341
350
).After (time .Now ()) {
342
351
continue
343
352
}
@@ -351,6 +360,10 @@ func (a *agent) reportMetadataLoop(ctx context.Context) {
351
360
key : md .Key ,
352
361
result : a .collectMetadata (ctx , md ),
353
362
}:
363
+ default :
364
+ // This should be impossible because the channel is empty
365
+ // before we start spinning up send goroutines.
366
+ a .logger .Error (ctx , "metadataResults channel full" )
354
367
}
355
368
}(md )
356
369
}
0 commit comments