Skip to content

Commit 706f7ce

Browse files
committed
fix(agent/agentcontainers): avoid logspam in API updaterLoop
Fixes #18709
1 parent 0b82f41 commit 706f7ce

File tree

2 files changed

+107
-1
lines changed

2 files changed

+107
-1
lines changed

agent/agentcontainers/api.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ func (api *API) updaterLoop() {
449449
// We utilize a TickerFunc here instead of a regular Ticker so that
450450
// we can guarantee execution of the updateContainers method after
451451
// advancing the clock.
452+
var prevErr error
452453
ticker := api.clock.TickerFunc(api.ctx, api.updateInterval, func() error {
453454
done := make(chan error, 1)
454455
var sent bool
@@ -467,8 +468,14 @@ func (api *API) updaterLoop() {
467468
if errors.Is(err, context.Canceled) {
468469
api.logger.Warn(api.ctx, "updater loop ticker canceled", slog.Error(err))
469470
} else {
470-
api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err))
471+
// Avoid excessive logging of the same error.
472+
if prevErr == nil || prevErr.Error() != err.Error() {
473+
api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err))
474+
}
471475
}
476+
prevErr = err
477+
} else {
478+
prevErr = nil
472479
}
473480
default:
474481
api.logger.Debug(api.ctx, "updater loop ticker skipped, update in progress")

agent/agentcontainers/api_test.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"golang.org/x/xerrors"
2727

2828
"cdr.dev/slog"
29+
"cdr.dev/slog/sloggers/sloghuman"
2930
"cdr.dev/slog/sloggers/slogtest"
3031
"github.com/coder/coder/v2/agent/agentcontainers"
3132
"github.com/coder/coder/v2/agent/agentcontainers/acmock"
@@ -342,6 +343,104 @@ func (f *fakeExecer) getLastCommand() *exec.Cmd {
342343
func TestAPI(t *testing.T) {
343344
t.Parallel()
344345

346+
t.Run("NoUpdaterLoopLogspam", func(t *testing.T) {
347+
t.Parallel()
348+
349+
var (
350+
ctx = testutil.Context(t, testutil.WaitShort)
351+
logbuf strings.Builder
352+
logger = slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug).AppendSinks(sloghuman.Sink(&logbuf))
353+
mClock = quartz.NewMock(t)
354+
tickerTrap = mClock.Trap().TickerFunc("updaterLoop")
355+
firstErr = xerrors.New("first error")
356+
secondErr = xerrors.New("second error")
357+
fakeCLI = &fakeContainerCLI{
358+
listErr: firstErr,
359+
}
360+
)
361+
362+
api := agentcontainers.NewAPI(logger,
363+
agentcontainers.WithClock(mClock),
364+
agentcontainers.WithContainerCLI(fakeCLI),
365+
)
366+
api.Start()
367+
defer api.Close()
368+
369+
// Make sure the ticker function has been registered
370+
// before advancing the clock.
371+
tickerTrap.MustWait(ctx).MustRelease(ctx)
372+
tickerTrap.Close()
373+
374+
logbuf.Reset()
375+
376+
// First tick should handle the error.
377+
_, aw := mClock.AdvanceNext()
378+
aw.MustWait(ctx)
379+
380+
// Verify first error is logged.
381+
got := logbuf.String()
382+
t.Logf("got log: %q", got)
383+
require.Contains(t, got, "updater loop ticker failed", "first error should be logged")
384+
require.Contains(t, got, "first error", "should contain first error message")
385+
logbuf.Reset()
386+
387+
// Second tick should handle the same error without logging it again.
388+
_, aw = mClock.AdvanceNext()
389+
aw.MustWait(ctx)
390+
391+
// Verify same error is not logged again.
392+
got = logbuf.String()
393+
t.Logf("got log: %q", got)
394+
require.Empty(t, got, "same error should not be logged again")
395+
396+
// Change to a different error.
397+
fakeCLI.listErr = secondErr
398+
399+
// Third tick should handle the different error and log it.
400+
_, aw = mClock.AdvanceNext()
401+
aw.MustWait(ctx)
402+
403+
// Verify different error is logged.
404+
got = logbuf.String()
405+
t.Logf("got log: %q", got)
406+
require.Contains(t, got, "updater loop ticker failed", "different error should be logged")
407+
require.Contains(t, got, "second error", "should contain second error message")
408+
logbuf.Reset()
409+
410+
// Clear the error to simulate success.
411+
fakeCLI.listErr = nil
412+
413+
// Fourth tick should succeed.
414+
_, aw = mClock.AdvanceNext()
415+
aw.MustWait(ctx)
416+
417+
// Fifth tick should continue to succeed.
418+
_, aw = mClock.AdvanceNext()
419+
aw.MustWait(ctx)
420+
421+
// Verify successful operations are logged properly.
422+
got = logbuf.String()
423+
t.Logf("got log: %q", got)
424+
gotSuccessCount := strings.Count(got, "containers updated successfully")
425+
require.GreaterOrEqual(t, gotSuccessCount, 2, "should have successful update got")
426+
require.NotContains(t, got, "updater loop ticker failed", "no errors should be logged during success")
427+
logbuf.Reset()
428+
429+
// Reintroduce the original error.
430+
fakeCLI.listErr = firstErr
431+
432+
// Sixth tick should handle the error after success and log it.
433+
_, aw = mClock.AdvanceNext()
434+
aw.MustWait(ctx)
435+
436+
// Verify error after success is logged.
437+
got = logbuf.String()
438+
t.Logf("got log: %q", got)
439+
require.Contains(t, got, "updater loop ticker failed", "error after success should be logged")
440+
require.Contains(t, got, "first error", "should contain first error message")
441+
logbuf.Reset()
442+
})
443+
345444
// List tests the API.getContainers method using a mock
346445
// implementation. It specifically tests caching behavior.
347446
t.Run("List", func(t *testing.T) {

0 commit comments

Comments
 (0)