Skip to content

Commit 2b63492

Browse files
authored
feat(healthcheck): add failing sections to report (#7789)
1 parent 9b8e5c2 commit 2b63492

File tree

11 files changed

+217
-38
lines changed

11 files changed

+217
-38
lines changed

coderd/apidoc/docs.go

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/coderd.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ type Options struct {
130130
// AppSecurityKey is the crypto key used to sign and encrypt tokens related to
131131
// workspace applications. It consists of both a signing and encryption key.
132132
AppSecurityKey workspaceapps.SecurityKey
133-
HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error)
133+
HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report
134134
HealthcheckTimeout time.Duration
135135
HealthcheckRefresh time.Duration
136136

@@ -266,7 +266,7 @@ func New(options *Options) *API {
266266
options.TemplateScheduleStore.Store(&v)
267267
}
268268
if options.HealthcheckFunc == nil {
269-
options.HealthcheckFunc = func(ctx context.Context, apiKey string) (*healthcheck.Report, error) {
269+
options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report {
270270
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
271271
AccessURL: options.AccessURL,
272272
DERPMap: options.DERPMap.Clone(),

coderd/coderdtest/coderdtest.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ type Options struct {
107107
TrialGenerator func(context.Context, string) error
108108
TemplateScheduleStore schedule.TemplateScheduleStore
109109

110-
HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error)
110+
HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report
111111
HealthcheckTimeout time.Duration
112112
HealthcheckRefresh time.Duration
113113

coderd/debug.go

+3-5
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,9 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
4747
ctx, cancel := context.WithTimeout(context.Background(), api.HealthcheckTimeout)
4848
defer cancel()
4949

50-
report, err := api.HealthcheckFunc(ctx, apiKey)
51-
if err == nil {
52-
api.healthCheckCache.Store(report)
53-
}
54-
return report, err
50+
report := api.HealthcheckFunc(ctx, apiKey)
51+
api.healthCheckCache.Store(report)
52+
return report, nil
5553
})
5654

5755
select {

coderd/debug_test.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ func TestDebugHealth(t *testing.T) {
2424
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
2525
sessionToken string
2626
client = coderdtest.New(t, &coderdtest.Options{
27-
HealthcheckFunc: func(_ context.Context, apiKey string) (*healthcheck.Report, error) {
27+
HealthcheckFunc: func(_ context.Context, apiKey string) *healthcheck.Report {
2828
assert.Equal(t, sessionToken, apiKey)
29-
return &healthcheck.Report{}, nil
29+
return &healthcheck.Report{}
3030
},
3131
})
3232
_ = coderdtest.CreateFirstUser(t, client)
@@ -48,15 +48,15 @@ func TestDebugHealth(t *testing.T) {
4848
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
4949
client = coderdtest.New(t, &coderdtest.Options{
5050
HealthcheckTimeout: time.Microsecond,
51-
HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) {
51+
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
5252
t := time.NewTimer(time.Second)
5353
defer t.Stop()
5454

5555
select {
5656
case <-ctx.Done():
57-
return nil, ctx.Err()
57+
return &healthcheck.Report{}
5858
case <-t.C:
59-
return &healthcheck.Report{}, nil
59+
return &healthcheck.Report{}
6060
}
6161
},
6262
})
@@ -80,11 +80,11 @@ func TestDebugHealth(t *testing.T) {
8080
client = coderdtest.New(t, &coderdtest.Options{
8181
HealthcheckRefresh: time.Hour,
8282
HealthcheckTimeout: time.Hour,
83-
HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) {
83+
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
8484
calls++
8585
return &healthcheck.Report{
8686
Time: time.Now(),
87-
}, nil
87+
}
8888
},
8989
})
9090
_ = coderdtest.CreateFirstUser(t, client)

coderd/healthcheck/derp.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
)
2626

2727
type DERPReport struct {
28-
mu sync.Mutex
2928
Healthy bool `json:"healthy"`
3029

3130
Regions map[int]*DERPRegionReport `json:"regions"`
@@ -78,6 +77,7 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
7877
r.Regions = map[int]*DERPRegionReport{}
7978

8079
wg := &sync.WaitGroup{}
80+
mu := sync.Mutex{}
8181

8282
wg.Add(len(opts.DERPMap.Regions))
8383
for _, region := range opts.DERPMap.Regions {
@@ -97,19 +97,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
9797

9898
regionReport.Run(ctx)
9999

100-
r.mu.Lock()
100+
mu.Lock()
101101
r.Regions[region.RegionID] = &regionReport
102102
if !regionReport.Healthy {
103103
r.Healthy = false
104104
}
105-
r.mu.Unlock()
105+
mu.Unlock()
106106
}()
107107
}
108108

109109
ncLogf := func(format string, args ...interface{}) {
110-
r.mu.Lock()
110+
mu.Lock()
111111
r.NetcheckLogs = append(r.NetcheckLogs, fmt.Sprintf(format, args...))
112-
r.mu.Unlock()
112+
mu.Unlock()
113113
}
114114
nc := &netcheck.Client{
115115
PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil),

coderd/healthcheck/healthcheck.go

+57-11
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,24 @@ import (
1111
"tailscale.com/tailcfg"
1212
)
1313

14+
const (
15+
SectionDERP string = "DERP"
16+
SectionAccessURL string = "AccessURL"
17+
SectionWebsocket string = "Websocket"
18+
)
19+
20+
type Checker interface {
21+
DERP(ctx context.Context, opts *DERPReportOptions) DERPReport
22+
AccessURL(ctx context.Context, opts *AccessURLOptions) AccessURLReport
23+
Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport
24+
}
25+
1426
type Report struct {
1527
// Time is the time the report was generated at.
1628
Time time.Time `json:"time"`
1729
// Healthy is true if the report returns no errors.
18-
Healthy bool `json:"healthy"`
30+
Healthy bool `json:"healthy"`
31+
FailingSections []string `json:"failing_sections"`
1932

2033
DERP DERPReport `json:"derp"`
2134
AccessURL AccessURLReport `json:"access_url"`
@@ -28,12 +41,36 @@ type ReportOptions struct {
2841
AccessURL *url.URL
2942
Client *http.Client
3043
APIKey string
44+
45+
Checker Checker
46+
}
47+
48+
type defaultChecker struct{}
49+
50+
func (defaultChecker) DERP(ctx context.Context, opts *DERPReportOptions) (report DERPReport) {
51+
report.Run(ctx, opts)
52+
return report
53+
}
54+
55+
func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLOptions) (report AccessURLReport) {
56+
report.Run(ctx, opts)
57+
return report
3158
}
3259

33-
func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
34-
var report Report
60+
func (defaultChecker) Websocket(ctx context.Context, opts *WebsocketReportOptions) (report WebsocketReport) {
61+
report.Run(ctx, opts)
62+
return report
63+
}
3564

36-
wg := &sync.WaitGroup{}
65+
func Run(ctx context.Context, opts *ReportOptions) *Report {
66+
var (
67+
wg sync.WaitGroup
68+
report Report
69+
)
70+
71+
if opts.Checker == nil {
72+
opts.Checker = defaultChecker{}
73+
}
3774

3875
wg.Add(1)
3976
go func() {
@@ -44,7 +81,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
4481
}
4582
}()
4683

47-
report.DERP.Run(ctx, &DERPReportOptions{
84+
report.DERP = opts.Checker.DERP(ctx, &DERPReportOptions{
4885
DERPMap: opts.DERPMap,
4986
})
5087
}()
@@ -58,7 +95,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
5895
}
5996
}()
6097

61-
report.AccessURL.Run(ctx, &AccessURLOptions{
98+
report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLOptions{
6299
AccessURL: opts.AccessURL,
63100
Client: opts.Client,
64101
})
@@ -72,16 +109,25 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
72109
report.Websocket.Error = xerrors.Errorf("%v", err)
73110
}
74111
}()
75-
report.Websocket.Run(ctx, &WebsocketReportOptions{
112+
113+
report.Websocket = opts.Checker.Websocket(ctx, &WebsocketReportOptions{
76114
APIKey: opts.APIKey,
77115
AccessURL: opts.AccessURL,
78116
})
79117
}()
80118

81119
wg.Wait()
82120
report.Time = time.Now()
83-
report.Healthy = report.DERP.Healthy &&
84-
report.AccessURL.Healthy &&
85-
report.Websocket.Healthy
86-
return &report, nil
121+
if !report.DERP.Healthy {
122+
report.FailingSections = append(report.FailingSections, SectionDERP)
123+
}
124+
if !report.AccessURL.Healthy {
125+
report.FailingSections = append(report.FailingSections, SectionAccessURL)
126+
}
127+
if !report.Websocket.Healthy {
128+
report.FailingSections = append(report.FailingSections, SectionWebsocket)
129+
}
130+
131+
report.Healthy = len(report.FailingSections) == 0
132+
return &report
87133
}
+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package healthcheck_test
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/stretchr/testify/assert"
8+
9+
"github.com/coder/coder/coderd/healthcheck"
10+
)
11+
12+
type testChecker struct {
13+
DERPReport healthcheck.DERPReport
14+
AccessURLReport healthcheck.AccessURLReport
15+
WebsocketReport healthcheck.WebsocketReport
16+
}
17+
18+
func (c *testChecker) DERP(context.Context, *healthcheck.DERPReportOptions) healthcheck.DERPReport {
19+
return c.DERPReport
20+
}
21+
22+
func (c *testChecker) AccessURL(context.Context, *healthcheck.AccessURLOptions) healthcheck.AccessURLReport {
23+
return c.AccessURLReport
24+
}
25+
26+
func (c *testChecker) Websocket(context.Context, *healthcheck.WebsocketReportOptions) healthcheck.WebsocketReport {
27+
return c.WebsocketReport
28+
}
29+
30+
func TestHealthcheck(t *testing.T) {
31+
t.Parallel()
32+
33+
for _, c := range []struct {
34+
name string
35+
checker *testChecker
36+
healthy bool
37+
failingSections []string
38+
}{{
39+
name: "OK",
40+
checker: &testChecker{
41+
DERPReport: healthcheck.DERPReport{
42+
Healthy: true,
43+
},
44+
AccessURLReport: healthcheck.AccessURLReport{
45+
Healthy: true,
46+
},
47+
WebsocketReport: healthcheck.WebsocketReport{
48+
Healthy: true,
49+
},
50+
},
51+
healthy: true,
52+
failingSections: nil,
53+
}, {
54+
name: "DERPFail",
55+
checker: &testChecker{
56+
DERPReport: healthcheck.DERPReport{
57+
Healthy: false,
58+
},
59+
AccessURLReport: healthcheck.AccessURLReport{
60+
Healthy: true,
61+
},
62+
WebsocketReport: healthcheck.WebsocketReport{
63+
Healthy: true,
64+
},
65+
},
66+
healthy: false,
67+
failingSections: []string{healthcheck.SectionDERP},
68+
}, {
69+
name: "AccessURLFail",
70+
checker: &testChecker{
71+
DERPReport: healthcheck.DERPReport{
72+
Healthy: true,
73+
},
74+
AccessURLReport: healthcheck.AccessURLReport{
75+
Healthy: false,
76+
},
77+
WebsocketReport: healthcheck.WebsocketReport{
78+
Healthy: true,
79+
},
80+
},
81+
healthy: false,
82+
failingSections: []string{healthcheck.SectionAccessURL},
83+
}, {
84+
name: "WebsocketFail",
85+
checker: &testChecker{
86+
DERPReport: healthcheck.DERPReport{
87+
Healthy: true,
88+
},
89+
AccessURLReport: healthcheck.AccessURLReport{
90+
Healthy: true,
91+
},
92+
WebsocketReport: healthcheck.WebsocketReport{
93+
Healthy: false,
94+
},
95+
},
96+
healthy: false,
97+
failingSections: []string{healthcheck.SectionWebsocket},
98+
}, {
99+
name: "AllFail",
100+
checker: &testChecker{},
101+
healthy: false,
102+
failingSections: []string{healthcheck.SectionDERP, healthcheck.SectionAccessURL, healthcheck.SectionWebsocket},
103+
}} {
104+
c := c
105+
t.Run(c.name, func(t *testing.T) {
106+
t.Parallel()
107+
108+
report := healthcheck.Run(context.Background(), &healthcheck.ReportOptions{
109+
Checker: c.checker,
110+
})
111+
112+
assert.Equal(t, c.healthy, report.Healthy)
113+
assert.Equal(t, c.failingSections, report.FailingSections)
114+
assert.Equal(t, c.checker.DERPReport.Healthy, report.DERP.Healthy)
115+
assert.Equal(t, c.checker.AccessURLReport.Healthy, report.AccessURL.Healthy)
116+
assert.Equal(t, c.checker.WebsocketReport.Healthy, report.Websocket.Healthy)
117+
assert.NotZero(t, report.Time)
118+
})
119+
}
120+
}

docs/api/debug.md

+1
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
206206
}
207207
}
208208
},
209+
"failing_sections": ["string"],
209210
"healthy": true,
210211
"time": "string",
211212
"websocket": {

0 commit comments

Comments
 (0)