Skip to content

Commit e80c05d

Browse files
committed
feat(coderd/healthcheck): add health check for proxy
1 parent e311e9e commit e80c05d

File tree

4 files changed

+316
-16
lines changed

4 files changed

+316
-16
lines changed

coderd/healthcheck/healthcheck.go

+36-12
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,19 @@ import (
1212
)
1313

1414
const (
15-
SectionDERP string = "DERP"
16-
SectionAccessURL string = "AccessURL"
17-
SectionWebsocket string = "Websocket"
18-
SectionDatabase string = "Database"
15+
SectionDERP string = "DERP"
16+
SectionAccessURL string = "AccessURL"
17+
SectionWebsocket string = "Websocket"
18+
SectionDatabase string = "Database"
19+
SectionWorkspaceProxy string = "WorkspaceProxy"
1920
)
2021

2122
type Checker interface {
2223
DERP(ctx context.Context, opts *derphealth.ReportOptions) derphealth.Report
2324
AccessURL(ctx context.Context, opts *AccessURLReportOptions) AccessURLReport
2425
Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport
2526
Database(ctx context.Context, opts *DatabaseReportOptions) DatabaseReport
27+
WorkspaceProxy(ctx context.Context, opts *WorkspaceProxyReportOptions) WorkspaceProxyReport
2628
}
2729

2830
// @typescript-generate Report
@@ -34,20 +36,22 @@ type Report struct {
3436
// FailingSections is a list of sections that have failed their healthcheck.
3537
FailingSections []string `json:"failing_sections"`
3638

37-
DERP derphealth.Report `json:"derp"`
38-
AccessURL AccessURLReport `json:"access_url"`
39-
Websocket WebsocketReport `json:"websocket"`
40-
Database DatabaseReport `json:"database"`
39+
DERP derphealth.Report `json:"derp"`
40+
AccessURL AccessURLReport `json:"access_url"`
41+
Websocket WebsocketReport `json:"websocket"`
42+
Database DatabaseReport `json:"database"`
43+
WorkspaceProxy WorkspaceProxyReport `json:"workspace_proxy"`
4144

4245
// The Coder version of the server that the report was generated on.
4346
CoderVersion string `json:"coder_version"`
4447
}
4548

4649
type ReportOptions struct {
47-
AccessURL AccessURLReportOptions
48-
Database DatabaseReportOptions
49-
DerpHealth derphealth.ReportOptions
50-
Websocket WebsocketReportOptions
50+
AccessURL AccessURLReportOptions
51+
Database DatabaseReportOptions
52+
DerpHealth derphealth.ReportOptions
53+
Websocket WebsocketReportOptions
54+
WorkspaceProxy WorkspaceProxyReportOptions
5155

5256
Checker Checker
5357
}
@@ -74,6 +78,11 @@ func (defaultChecker) Database(ctx context.Context, opts *DatabaseReportOptions)
7478
return report
7579
}
7680

81+
func (defaultChecker) WorkspaceProxy(ctx context.Context, opts *WorkspaceProxyReportOptions) (report WorkspaceProxyReport) {
82+
report.Run(ctx, opts)
83+
return report
84+
}
85+
7786
func Run(ctx context.Context, opts *ReportOptions) *Report {
7887
var (
7988
wg sync.WaitGroup
@@ -132,6 +141,18 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
132141
report.Database = opts.Checker.Database(ctx, &opts.Database)
133142
}()
134143

144+
wg.Add(1)
145+
go func() {
146+
defer wg.Done()
147+
defer func() {
148+
if err := recover(); err != nil {
149+
report.WorkspaceProxy.Error = ptr.Ref(fmt.Sprint(err))
150+
}
151+
}()
152+
153+
report.WorkspaceProxy = opts.Checker.WorkspaceProxy(ctx, &opts.WorkspaceProxy)
154+
}()
155+
135156
report.CoderVersion = buildinfo.Version()
136157
wg.Wait()
137158

@@ -149,6 +170,9 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
149170
if !report.Database.Healthy {
150171
report.FailingSections = append(report.FailingSections, SectionDatabase)
151172
}
173+
if !report.WorkspaceProxy.Healthy {
174+
report.FailingSections = append(report.FailingSections, SectionWorkspaceProxy)
175+
}
152176

153177
report.Healthy = len(report.FailingSections) == 0
154178
return &report

coderd/healthcheck/healthcheck_test.go

+51-4
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ import (
1111
)
1212

1313
type testChecker struct {
14-
DERPReport derphealth.Report
15-
AccessURLReport healthcheck.AccessURLReport
16-
WebsocketReport healthcheck.WebsocketReport
17-
DatabaseReport healthcheck.DatabaseReport
14+
DERPReport derphealth.Report
15+
AccessURLReport healthcheck.AccessURLReport
16+
WebsocketReport healthcheck.WebsocketReport
17+
DatabaseReport healthcheck.DatabaseReport
18+
WorkspaceProxyReport healthcheck.WorkspaceProxyReport
1819
}
1920

2021
func (c *testChecker) DERP(context.Context, *derphealth.ReportOptions) derphealth.Report {
@@ -33,6 +34,10 @@ func (c *testChecker) Database(context.Context, *healthcheck.DatabaseReportOptio
3334
return c.DatabaseReport
3435
}
3536

37+
func (c *testChecker) WorkspaceProxy(context.Context, *healthcheck.WorkspaceProxyReportOptions) healthcheck.WorkspaceProxyReport {
38+
return c.WorkspaceProxyReport
39+
}
40+
3641
func TestHealthcheck(t *testing.T) {
3742
t.Parallel()
3843

@@ -56,6 +61,9 @@ func TestHealthcheck(t *testing.T) {
5661
DatabaseReport: healthcheck.DatabaseReport{
5762
Healthy: true,
5863
},
64+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
65+
Healthy: true,
66+
},
5967
},
6068
healthy: true,
6169
failingSections: []string{},
@@ -74,6 +82,9 @@ func TestHealthcheck(t *testing.T) {
7482
DatabaseReport: healthcheck.DatabaseReport{
7583
Healthy: true,
7684
},
85+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
86+
Healthy: true,
87+
},
7788
},
7889
healthy: false,
7990
failingSections: []string{healthcheck.SectionDERP},
@@ -93,6 +104,9 @@ func TestHealthcheck(t *testing.T) {
93104
DatabaseReport: healthcheck.DatabaseReport{
94105
Healthy: true,
95106
},
107+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
108+
Healthy: true,
109+
},
96110
},
97111
healthy: true,
98112
failingSections: []string{},
@@ -111,6 +125,9 @@ func TestHealthcheck(t *testing.T) {
111125
DatabaseReport: healthcheck.DatabaseReport{
112126
Healthy: true,
113127
},
128+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
129+
Healthy: true,
130+
},
114131
},
115132
healthy: false,
116133
failingSections: []string{healthcheck.SectionAccessURL},
@@ -129,6 +146,9 @@ func TestHealthcheck(t *testing.T) {
129146
DatabaseReport: healthcheck.DatabaseReport{
130147
Healthy: true,
131148
},
149+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
150+
Healthy: true,
151+
},
132152
},
133153
healthy: false,
134154
failingSections: []string{healthcheck.SectionWebsocket},
@@ -147,9 +167,33 @@ func TestHealthcheck(t *testing.T) {
147167
DatabaseReport: healthcheck.DatabaseReport{
148168
Healthy: false,
149169
},
170+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
171+
Healthy: true,
172+
},
150173
},
151174
healthy: false,
152175
failingSections: []string{healthcheck.SectionDatabase},
176+
}, {
177+
name: "ProxyFail",
178+
checker: &testChecker{
179+
DERPReport: derphealth.Report{
180+
Healthy: true,
181+
},
182+
AccessURLReport: healthcheck.AccessURLReport{
183+
Healthy: true,
184+
},
185+
WebsocketReport: healthcheck.WebsocketReport{
186+
Healthy: true,
187+
},
188+
DatabaseReport: healthcheck.DatabaseReport{
189+
Healthy: true,
190+
},
191+
WorkspaceProxyReport: healthcheck.WorkspaceProxyReport{
192+
Healthy: false,
193+
},
194+
},
195+
healthy: false,
196+
failingSections: []string{healthcheck.SectionWorkspaceProxy},
153197
}, {
154198
name: "AllFail",
155199
checker: &testChecker{},
@@ -159,6 +203,7 @@ func TestHealthcheck(t *testing.T) {
159203
healthcheck.SectionAccessURL,
160204
healthcheck.SectionWebsocket,
161205
healthcheck.SectionDatabase,
206+
healthcheck.SectionWorkspaceProxy,
162207
},
163208
}} {
164209
c := c
@@ -175,6 +220,8 @@ func TestHealthcheck(t *testing.T) {
175220
assert.Equal(t, c.checker.DERPReport.Warnings, report.DERP.Warnings)
176221
assert.Equal(t, c.checker.AccessURLReport.Healthy, report.AccessURL.Healthy)
177222
assert.Equal(t, c.checker.WebsocketReport.Healthy, report.Websocket.Healthy)
223+
assert.Equal(t, c.checker.WorkspaceProxyReport.Healthy, report.WorkspaceProxy.Healthy)
224+
assert.Equal(t, c.checker.WorkspaceProxyReport.Warnings, report.WorkspaceProxy.Warnings)
178225
assert.NotZero(t, report.Time)
179226
assert.NotZero(t, report.CoderVersion)
180227
})

coderd/healthcheck/workspaceproxy.go

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package healthcheck
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"cdr.dev/slog"
8+
"github.com/coder/coder/v2/buildinfo"
9+
"github.com/coder/coder/v2/coderd/util/ptr"
10+
"github.com/coder/coder/v2/codersdk"
11+
)
12+
13+
type WorkspaceProxyReportOptions struct {
14+
// UpdateProxyHealth is a function called when healthcheck is run.
15+
// This would normally be ProxyHealth.ForceUpdate().
16+
// We do this because if someone mashes the healthcheck refresh button
17+
// they would expect up-to-date data.
18+
UpdateProxyHealth func(context.Context) error
19+
// FetchWorkspaceProxies is a function that returns the available workspace proxies.
20+
FetchWorkspaceProxies func(context.Context) (codersdk.RegionsResponse[codersdk.WorkspaceProxy], error)
21+
// CurrentVersion is the current server version.
22+
// We pass this in to make it easier to test.
23+
CurrentVersion string
24+
Logger slog.Logger
25+
}
26+
27+
// @typescript-generate Report
28+
type WorkspaceProxyReport struct {
29+
Healthy bool `json:"healthy"`
30+
Warnings []string `json:"warnings"`
31+
Error *string `json:"error"`
32+
33+
WorkspaceProxies codersdk.RegionsResponse[codersdk.WorkspaceProxy]
34+
}
35+
36+
func (r *WorkspaceProxyReport) Run(ctx context.Context, opts *WorkspaceProxyReportOptions) {
37+
r.Healthy = true
38+
r.Warnings = []string{}
39+
40+
if opts.FetchWorkspaceProxies == nil {
41+
opts.Logger.Debug(ctx, "no workspace proxies configured")
42+
return
43+
}
44+
45+
if opts.UpdateProxyHealth == nil {
46+
err := "opts.UpdateProxyHealth must not be nil if opts.FetchWorkspaceProxies is not nil"
47+
opts.Logger.Error(ctx, "developer error: "+err)
48+
r.Error = ptr.Ref(err)
49+
return
50+
}
51+
52+
if err := opts.UpdateProxyHealth(ctx); err != nil {
53+
opts.Logger.Error(ctx, "failed to update proxy health: %w", err)
54+
r.Error = ptr.Ref(err.Error())
55+
return
56+
}
57+
58+
proxies, err := opts.FetchWorkspaceProxies(ctx)
59+
if err != nil {
60+
opts.Logger.Error(ctx, "failed to fetch workspace proxies", slog.Error(err))
61+
r.Healthy = false
62+
r.Error = ptr.Ref(err.Error())
63+
return
64+
}
65+
66+
r.WorkspaceProxies = proxies
67+
68+
var numProxies int
69+
var healthyProxies int
70+
for _, proxy := range r.WorkspaceProxies.Regions {
71+
numProxies++
72+
if proxy.Healthy {
73+
healthyProxies++
74+
}
75+
76+
// check versions
77+
if !buildinfo.VersionsMatch(proxy.Version, opts.CurrentVersion) {
78+
opts.Logger.Warn(ctx, "proxy version mismatch",
79+
slog.F("version", opts.CurrentVersion),
80+
slog.F("proxy_version", proxy.Version),
81+
slog.F("proxy_name", proxy.Name),
82+
)
83+
r.Healthy = false
84+
r.Warnings = append(r.Warnings, fmt.Sprintf("Proxy %q version %q does not match primary server version %q", proxy.Name, proxy.Version, opts.CurrentVersion))
85+
}
86+
}
87+
}

0 commit comments

Comments
 (0)