@@ -10,14 +10,11 @@ import (
10
10
"golang.org/x/xerrors"
11
11
12
12
"cdr.dev/slog"
13
+ "github.com/coder/coder/v2/clock"
13
14
"github.com/coder/coder/v2/codersdk"
14
15
"github.com/coder/coder/v2/codersdk/agentsdk"
15
- "github.com/coder/retry"
16
16
)
17
17
18
- // WorkspaceAgentApps fetches the workspace apps.
19
- type WorkspaceAgentApps func (context.Context ) ([]codersdk.WorkspaceApp , error )
20
-
21
18
// PostWorkspaceAgentAppHealth updates the workspace app health.
22
19
type PostWorkspaceAgentAppHealth func (context.Context , agentsdk.PostAppHealthsRequest ) error
23
20
@@ -26,15 +23,26 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
26
23
27
24
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
28
25
func NewWorkspaceAppHealthReporter (logger slog.Logger , apps []codersdk.WorkspaceApp , postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth ) WorkspaceAppHealthReporter {
26
+ return NewAppHealthReporterWithClock (logger , apps , postWorkspaceAgentAppHealth , clock .NewReal ())
27
+ }
28
+
29
+ // NewAppHealthReporterWithClock is only called directly by test code. Product code should call
30
+ // NewAppHealthReporter.
31
+ func NewAppHealthReporterWithClock (
32
+ logger slog.Logger ,
33
+ apps []codersdk.WorkspaceApp ,
34
+ postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth ,
35
+ clk clock.Clock ,
36
+ ) WorkspaceAppHealthReporter {
29
37
logger = logger .Named ("apphealth" )
30
38
31
- runHealthcheckLoop := func (ctx context.Context ) error {
39
+ return func (ctx context.Context ) {
32
40
ctx , cancel := context .WithCancel (ctx )
33
41
defer cancel ()
34
42
35
43
// no need to run this loop if no apps for this workspace.
36
44
if len (apps ) == 0 {
37
- return nil
45
+ return
38
46
}
39
47
40
48
hasHealthchecksEnabled := false
@@ -49,7 +57,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
49
57
50
58
// no need to run this loop if no health checks are configured.
51
59
if ! hasHealthchecksEnabled {
52
- return nil
60
+ return
53
61
}
54
62
55
63
// run a ticker for each app health check.
@@ -61,25 +69,29 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
61
69
}
62
70
app := nextApp
63
71
go func () {
64
- t := time .NewTicker (time .Duration (app .Healthcheck .Interval ) * time .Second )
65
- defer t .Stop ()
66
-
67
- for {
68
- select {
69
- case <- ctx .Done ():
70
- return
71
- case <- t .C :
72
- }
73
- // we set the http timeout to the healthcheck interval to prevent getting too backed up.
74
- client := & http.Client {
75
- Timeout : time .Duration (app .Healthcheck .Interval ) * time .Second ,
76
- }
72
+ _ = clk .TickerFunc (ctx , time .Duration (app .Healthcheck .Interval )* time .Second , func () error {
73
+ // We time out at the healthcheck interval to prevent getting too backed up, but
74
+ // set it 1ms early so that it's not simultaneous with the next tick in testing,
75
+ // which makes the test easier to understand.
76
+ //
77
+ // It would be idiomatic to use the http.Client.Timeout or a context.WithTimeout,
78
+ // but we are passing this off to the native http library, which is not aware
79
+ // of the clock library we are using. That means in testing, with a mock clock
80
+ // it will compare mocked times with real times, and we will get strange results.
81
+ // So, we just implement the timeout as a context we cancel with an AfterFunc
82
+ reqCtx , reqCancel := context .WithCancel (ctx )
83
+ timeout := clk .AfterFunc (
84
+ time .Duration (app .Healthcheck .Interval )* time .Second - time .Millisecond ,
85
+ reqCancel ,
86
+ "timeout" , app .Slug )
87
+ defer timeout .Stop ()
88
+
77
89
err := func () error {
78
- req , err := http .NewRequestWithContext (ctx , http .MethodGet , app .Healthcheck .URL , nil )
90
+ req , err := http .NewRequestWithContext (reqCtx , http .MethodGet , app .Healthcheck .URL , nil )
79
91
if err != nil {
80
92
return err
81
93
}
82
- res , err := client .Do (req )
94
+ res , err := http . DefaultClient .Do (req )
83
95
if err != nil {
84
96
return err
85
97
}
@@ -118,54 +130,36 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
118
130
mu .Unlock ()
119
131
logger .Debug (ctx , "workspace app healthy" , slog .F ("id" , app .ID .String ()), slog .F ("slug" , app .Slug ))
120
132
}
121
-
122
- t .Reset (time .Duration (app .Healthcheck .Interval ) * time .Second )
123
- }
133
+ return nil
134
+ }, "healthcheck" , app .Slug )
124
135
}()
125
136
}
126
137
127
138
mu .Lock ()
128
139
lastHealth := copyHealth (health )
129
140
mu .Unlock ()
130
- reportTicker := time .NewTicker (time .Second )
131
- defer reportTicker .Stop ()
132
- // every second we check if the health values of the apps have changed
133
- // and if there is a change we will report the new values.
134
- for {
135
- select {
136
- case <- ctx .Done ():
141
+ reportTicker := clk .TickerFunc (ctx , time .Second , func () error {
142
+ mu .RLock ()
143
+ changed := healthChanged (lastHealth , health )
144
+ mu .RUnlock ()
145
+ if ! changed {
137
146
return nil
138
- case <- reportTicker .C :
139
- mu .RLock ()
140
- changed := healthChanged (lastHealth , health )
141
- mu .RUnlock ()
142
- if ! changed {
143
- continue
144
- }
145
-
146
- mu .Lock ()
147
- lastHealth = copyHealth (health )
148
- mu .Unlock ()
149
- err := postWorkspaceAgentAppHealth (ctx , agentsdk.PostAppHealthsRequest {
150
- Healths : lastHealth ,
151
- })
152
- if err != nil {
153
- logger .Error (ctx , "failed to report workspace app health" , slog .Error (err ))
154
- } else {
155
- logger .Debug (ctx , "sent workspace app health" , slog .F ("health" , lastHealth ))
156
- }
157
147
}
158
- }
159
- }
160
148
161
- return func (ctx context.Context ) {
162
- for r := retry .New (time .Second , 30 * time .Second ); r .Wait (ctx ); {
163
- err := runHealthcheckLoop (ctx )
164
- if err == nil || xerrors .Is (err , context .Canceled ) || xerrors .Is (err , context .DeadlineExceeded ) {
165
- return
149
+ mu .Lock ()
150
+ lastHealth = copyHealth (health )
151
+ mu .Unlock ()
152
+ err := postWorkspaceAgentAppHealth (ctx , agentsdk.PostAppHealthsRequest {
153
+ Healths : lastHealth ,
154
+ })
155
+ if err != nil {
156
+ logger .Error (ctx , "failed to report workspace app health" , slog .Error (err ))
157
+ } else {
158
+ logger .Debug (ctx , "sent workspace app health" , slog .F ("health" , lastHealth ))
166
159
}
167
- logger .Error (ctx , "failed running workspace app reporter" , slog .Error (err ))
168
- }
160
+ return nil
161
+ }, "report" )
162
+ _ = reportTicker .Wait () // only possible error is context done
169
163
}
170
164
}
171
165
0 commit comments