Skip to content

Commit 2a000dc

Browse files
committed
Merge branch 'main' into fix-workspace-actions
2 parents 8757691 + 44d6913 commit 2a000dc

38 files changed

+1426
-472
lines changed

agent/apphealth.go

+54-60
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,11 @@ import (
1010
"golang.org/x/xerrors"
1111

1212
"cdr.dev/slog"
13+
"github.com/coder/coder/v2/clock"
1314
"github.com/coder/coder/v2/codersdk"
1415
"github.com/coder/coder/v2/codersdk/agentsdk"
15-
"github.com/coder/retry"
1616
)
1717

18-
// WorkspaceAgentApps fetches the workspace apps.
19-
type WorkspaceAgentApps func(context.Context) ([]codersdk.WorkspaceApp, error)
20-
2118
// PostWorkspaceAgentAppHealth updates the workspace app health.
2219
type PostWorkspaceAgentAppHealth func(context.Context, agentsdk.PostAppHealthsRequest) error
2320

@@ -26,15 +23,26 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
2623

2724
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
2825
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
26+
return NewAppHealthReporterWithClock(logger, apps, postWorkspaceAgentAppHealth, clock.NewReal())
27+
}
28+
29+
// NewAppHealthReporterWithClock is only called directly by test code. Product code should call
30+
// NewAppHealthReporter.
31+
func NewAppHealthReporterWithClock(
32+
logger slog.Logger,
33+
apps []codersdk.WorkspaceApp,
34+
postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth,
35+
clk clock.Clock,
36+
) WorkspaceAppHealthReporter {
2937
logger = logger.Named("apphealth")
3038

31-
runHealthcheckLoop := func(ctx context.Context) error {
39+
return func(ctx context.Context) {
3240
ctx, cancel := context.WithCancel(ctx)
3341
defer cancel()
3442

3543
// no need to run this loop if no apps for this workspace.
3644
if len(apps) == 0 {
37-
return nil
45+
return
3846
}
3947

4048
hasHealthchecksEnabled := false
@@ -49,7 +57,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
4957

5058
// no need to run this loop if no health checks are configured.
5159
if !hasHealthchecksEnabled {
52-
return nil
60+
return
5361
}
5462

5563
// run a ticker for each app health check.
@@ -61,25 +69,29 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
6169
}
6270
app := nextApp
6371
go func() {
64-
t := time.NewTicker(time.Duration(app.Healthcheck.Interval) * time.Second)
65-
defer t.Stop()
66-
67-
for {
68-
select {
69-
case <-ctx.Done():
70-
return
71-
case <-t.C:
72-
}
73-
// we set the http timeout to the healthcheck interval to prevent getting too backed up.
74-
client := &http.Client{
75-
Timeout: time.Duration(app.Healthcheck.Interval) * time.Second,
76-
}
72+
_ = clk.TickerFunc(ctx, time.Duration(app.Healthcheck.Interval)*time.Second, func() error {
73+
// We time out at the healthcheck interval to prevent getting too backed up, but
74+
// set it 1ms early so that it's not simultaneous with the next tick in testing,
75+
// which makes the test easier to understand.
76+
//
77+
// It would be idiomatic to use the http.Client.Timeout or a context.WithTimeout,
78+
// but we are passing this off to the native http library, which is not aware
79+
// of the clock library we are using. That means in testing, with a mock clock
80+
// it will compare mocked times with real times, and we will get strange results.
81+
// So, we just implement the timeout as a context we cancel with an AfterFunc
82+
reqCtx, reqCancel := context.WithCancel(ctx)
83+
timeout := clk.AfterFunc(
84+
time.Duration(app.Healthcheck.Interval)*time.Second-time.Millisecond,
85+
reqCancel,
86+
"timeout", app.Slug)
87+
defer timeout.Stop()
88+
7789
err := func() error {
78-
req, err := http.NewRequestWithContext(ctx, http.MethodGet, app.Healthcheck.URL, nil)
90+
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, app.Healthcheck.URL, nil)
7991
if err != nil {
8092
return err
8193
}
82-
res, err := client.Do(req)
94+
res, err := http.DefaultClient.Do(req)
8395
if err != nil {
8496
return err
8597
}
@@ -118,54 +130,36 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
118130
mu.Unlock()
119131
logger.Debug(ctx, "workspace app healthy", slog.F("id", app.ID.String()), slog.F("slug", app.Slug))
120132
}
121-
122-
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
123-
}
133+
return nil
134+
}, "healthcheck", app.Slug)
124135
}()
125136
}
126137

127138
mu.Lock()
128139
lastHealth := copyHealth(health)
129140
mu.Unlock()
130-
reportTicker := time.NewTicker(time.Second)
131-
defer reportTicker.Stop()
132-
// every second we check if the health values of the apps have changed
133-
// and if there is a change we will report the new values.
134-
for {
135-
select {
136-
case <-ctx.Done():
141+
reportTicker := clk.TickerFunc(ctx, time.Second, func() error {
142+
mu.RLock()
143+
changed := healthChanged(lastHealth, health)
144+
mu.RUnlock()
145+
if !changed {
137146
return nil
138-
case <-reportTicker.C:
139-
mu.RLock()
140-
changed := healthChanged(lastHealth, health)
141-
mu.RUnlock()
142-
if !changed {
143-
continue
144-
}
145-
146-
mu.Lock()
147-
lastHealth = copyHealth(health)
148-
mu.Unlock()
149-
err := postWorkspaceAgentAppHealth(ctx, agentsdk.PostAppHealthsRequest{
150-
Healths: lastHealth,
151-
})
152-
if err != nil {
153-
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
154-
} else {
155-
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
156-
}
157147
}
158-
}
159-
}
160148

161-
return func(ctx context.Context) {
162-
for r := retry.New(time.Second, 30*time.Second); r.Wait(ctx); {
163-
err := runHealthcheckLoop(ctx)
164-
if err == nil || xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
165-
return
149+
mu.Lock()
150+
lastHealth = copyHealth(health)
151+
mu.Unlock()
152+
err := postWorkspaceAgentAppHealth(ctx, agentsdk.PostAppHealthsRequest{
153+
Healths: lastHealth,
154+
})
155+
if err != nil {
156+
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
157+
} else {
158+
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
166159
}
167-
logger.Error(ctx, "failed running workspace app reporter", slog.Error(err))
168-
}
160+
return nil
161+
}, "report")
162+
_ = reportTicker.Wait() // only possible error is context done
169163
}
170164
}
171165

0 commit comments

Comments
 (0)