Skip to content

Commit 4a702df

Browse files
committed
chore: add logging around agent app health reporting
1 parent c0e169e commit 4a702df

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

agent/apphealth.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
2626

2727
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
2828
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
29+
logger = logger.Named("apphealth")
30+
2931
runHealthcheckLoop := func(ctx context.Context) error {
32+
ctx, cancel := context.WithCancel(ctx)
33+
defer cancel()
34+
3035
// no need to run this loop if no apps for this workspace.
3136
if len(apps) == 0 {
3237
return nil
@@ -87,6 +92,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
8792
return nil
8893
}()
8994
if err != nil {
95+
nowUnhealthy := false
9096
mu.Lock()
9197
if failures[app.ID] < int(app.Healthcheck.Threshold) {
9298
// increment the failure count and keep status the same.
@@ -96,14 +102,17 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
96102
// set to unhealthy if we hit the failure threshold.
97103
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
98104
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
105+
nowUnhealthy = true
99106
}
100107
mu.Unlock()
108+
logger.Debug(ctx, "error checking app health", slog.F("id", app.ID.String()), slog.F("slug", app.Slug), slog.F("now_unhealthy", nowUnhealthy), slog.Error(err))
101109
} else {
102110
mu.Lock()
103111
// we only need one successful health check to be considered healthy.
104112
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
105113
failures[app.ID] = 0
106114
mu.Unlock()
115+
logger.Debug(ctx, "workspace app healthy", slog.F("id", app.ID.String()), slog.F("slug", app.Slug))
107116
}
108117

109118
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
@@ -137,7 +146,9 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
137146
Healths: lastHealth,
138147
})
139148
if err != nil {
140-
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
149+
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
150+
} else {
151+
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
141152
}
142153
}
143154
}

coderd/agentapi/apps.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ func (a *AppsAPI) BatchUpdateAppHealths(ctx context.Context, req *agentproto.Bat
2424
return nil, err
2525
}
2626

27+
a.Log.Debug(ctx, "got batch app health update",
28+
slog.F("agent_id", workspaceAgent.ID.String()),
29+
slog.F("updates", req.Updates),
30+
)
31+
2732
if len(req.Updates) == 0 {
2833
return &agentproto.BatchUpdateAppHealthResponse{}, nil
2934
}

0 commit comments

Comments
 (0)