Skip to content

Commit ec8e41f

Browse files
authored
chore: add logging around agent app health reporting (#12071)
1 parent c0e169e commit ec8e41f

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

agent/apphealth.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
2626

2727
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
2828
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
29+
logger = logger.Named("apphealth")
30+
2931
runHealthcheckLoop := func(ctx context.Context) error {
32+
ctx, cancel := context.WithCancel(ctx)
33+
defer cancel()
34+
3035
// no need to run this loop if no apps for this workspace.
3136
if len(apps) == 0 {
3237
return nil
@@ -87,6 +92,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
8792
return nil
8893
}()
8994
if err != nil {
95+
nowUnhealthy := false
9096
mu.Lock()
9197
if failures[app.ID] < int(app.Healthcheck.Threshold) {
9298
// increment the failure count and keep status the same.
@@ -96,14 +102,21 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
96102
// set to unhealthy if we hit the failure threshold.
97103
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
98104
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
105+
nowUnhealthy = true
99106
}
100107
mu.Unlock()
108+
logger.Debug(ctx, "error checking app health",
109+
slog.F("id", app.ID.String()),
110+
slog.F("slug", app.Slug),
111+
slog.F("now_unhealthy", nowUnhealthy), slog.Error(err),
112+
)
101113
} else {
102114
mu.Lock()
103115
// we only need one successful health check to be considered healthy.
104116
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
105117
failures[app.ID] = 0
106118
mu.Unlock()
119+
logger.Debug(ctx, "workspace app healthy", slog.F("id", app.ID.String()), slog.F("slug", app.Slug))
107120
}
108121

109122
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
@@ -137,7 +150,9 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
137150
Healths: lastHealth,
138151
})
139152
if err != nil {
140-
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
153+
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
154+
} else {
155+
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
141156
}
142157
}
143158
}

coderd/agentapi/apps.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ func (a *AppsAPI) BatchUpdateAppHealths(ctx context.Context, req *agentproto.Bat
2424
return nil, err
2525
}
2626

27+
a.Log.Debug(ctx, "got batch app health update",
28+
slog.F("agent_id", workspaceAgent.ID.String()),
29+
slog.F("updates", req.Updates),
30+
)
31+
2732
if len(req.Updates) == 0 {
2833
return &agentproto.BatchUpdateAppHealthResponse{}, nil
2934
}

0 commit comments

Comments
 (0)