Skip to content

Commit b62b6af

Browse files
authored
fix(healthcheck): don't allow panics to exit coderd (#7276)
1 parent a983416 commit b62b6af

File tree

9 files changed

+88
-24
lines changed

9 files changed

+88
-24
lines changed

coderd/apidoc/docs.go

+4-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

+4-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/coderd.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ func New(options *Options) *API {
250250
if options.HealthcheckFunc == nil {
251251
options.HealthcheckFunc = func(ctx context.Context) (*healthcheck.Report, error) {
252252
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
253-
DERPMap: options.DERPMap.Clone(),
253+
AccessURL: options.AccessURL,
254+
DERPMap: options.DERPMap.Clone(),
254255
})
255256
}
256257
}

coderd/healthcheck/accessurl.go

+10-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type AccessURLReport struct {
1515
Reachable bool
1616
StatusCode int
1717
HealthzResponse string
18-
Err error
18+
Error error
1919
}
2020

2121
type AccessURLOptions struct {
@@ -27,32 +27,37 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLOptions) {
2727
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
2828
defer cancel()
2929

30+
if opts.AccessURL == nil {
31+
r.Error = xerrors.New("access URL is nil")
32+
return
33+
}
34+
3035
if opts.Client == nil {
3136
opts.Client = http.DefaultClient
3237
}
3338

3439
accessURL, err := opts.AccessURL.Parse("/healthz")
3540
if err != nil {
36-
r.Err = xerrors.Errorf("parse healthz endpoint: %w", err)
41+
r.Error = xerrors.Errorf("parse healthz endpoint: %w", err)
3742
return
3843
}
3944

4045
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
4146
if err != nil {
42-
r.Err = xerrors.Errorf("create healthz request: %w", err)
47+
r.Error = xerrors.Errorf("create healthz request: %w", err)
4348
return
4449
}
4550

4651
res, err := opts.Client.Do(req)
4752
if err != nil {
48-
r.Err = xerrors.Errorf("get healthz endpoint: %w", err)
53+
r.Error = xerrors.Errorf("get healthz endpoint: %w", err)
4954
return
5055
}
5156
defer res.Body.Close()
5257

5358
body, err := io.ReadAll(res.Body)
5459
if err != nil {
55-
r.Err = xerrors.Errorf("read healthz response: %w", err)
60+
r.Error = xerrors.Errorf("read healthz response: %w", err)
5661
return
5762
}
5863

coderd/healthcheck/accessurl_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func TestAccessURL(t *testing.T) {
3636
assert.True(t, report.Reachable)
3737
assert.Equal(t, http.StatusOK, report.StatusCode)
3838
assert.Equal(t, "OK", report.HealthzResponse)
39-
assert.NoError(t, report.Err)
39+
assert.NoError(t, report.Error)
4040
})
4141

4242
t.Run("404", func(t *testing.T) {
@@ -66,7 +66,7 @@ func TestAccessURL(t *testing.T) {
6666
assert.True(t, report.Reachable)
6767
assert.Equal(t, http.StatusNotFound, report.StatusCode)
6868
assert.Equal(t, string(resp), report.HealthzResponse)
69-
assert.NoError(t, report.Err)
69+
assert.NoError(t, report.Error)
7070
})
7171

7272
t.Run("ClientErr", func(t *testing.T) {
@@ -102,7 +102,7 @@ func TestAccessURL(t *testing.T) {
102102
assert.False(t, report.Reachable)
103103
assert.Equal(t, 0, report.StatusCode)
104104
assert.Equal(t, "", report.HealthzResponse)
105-
assert.ErrorIs(t, report.Err, expErr)
105+
assert.ErrorIs(t, report.Error, expErr)
106106
})
107107
}
108108

coderd/healthcheck/derp.go

+27-9
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ type DERPReport struct {
3333
Netcheck *netcheck.Report `json:"netcheck"`
3434
NetcheckErr error `json:"netcheck_err"`
3535
NetcheckLogs []string `json:"netcheck_logs"`
36+
37+
Error error `json:"error"`
3638
}
3739

3840
type DERPRegionReport struct {
@@ -41,6 +43,7 @@ type DERPRegionReport struct {
4143

4244
Region *tailcfg.DERPRegion `json:"region"`
4345
NodeReports []*DERPNodeReport `json:"node_reports"`
46+
Error error `json:"error"`
4447
}
4548
type DERPNodeReport struct {
4649
mu sync.Mutex
@@ -55,6 +58,7 @@ type DERPNodeReport struct {
5558
UsesWebsocket bool `json:"uses_websocket"`
5659
ClientLogs [][]string `json:"client_logs"`
5760
ClientErrs [][]error `json:"client_errs"`
61+
Error error `json:"error"`
5862

5963
STUN DERPStunReport `json:"stun"`
6064
}
@@ -77,12 +81,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
7781

7882
wg.Add(len(opts.DERPMap.Regions))
7983
for _, region := range opts.DERPMap.Regions {
80-
region := region
81-
go func() {
82-
defer wg.Done()
83-
regionReport := DERPRegionReport{
84+
var (
85+
region = region
86+
regionReport = DERPRegionReport{
8487
Region: region,
8588
}
89+
)
90+
go func() {
91+
defer wg.Done()
92+
defer func() {
93+
if err := recover(); err != nil {
94+
regionReport.Error = xerrors.Errorf("%v", err)
95+
}
96+
}()
8697

8798
regionReport.Run(ctx)
8899

@@ -117,14 +128,21 @@ func (r *DERPRegionReport) Run(ctx context.Context) {
117128

118129
wg.Add(len(r.Region.Nodes))
119130
for _, node := range r.Region.Nodes {
120-
node := node
121-
go func() {
122-
defer wg.Done()
123-
124-
nodeReport := DERPNodeReport{
131+
var (
132+
node = node
133+
nodeReport = DERPNodeReport{
125134
Node: node,
126135
Healthy: true,
127136
}
137+
)
138+
139+
go func() {
140+
defer wg.Done()
141+
defer func() {
142+
if err := recover(); err != nil {
143+
nodeReport.Error = xerrors.Errorf("%v", err)
144+
}
145+
}()
128146

129147
nodeReport.Run(ctx)
130148

coderd/healthcheck/healthcheck.go

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"sync"
88
"time"
99

10+
"golang.org/x/xerrors"
1011
"tailscale.com/tailcfg"
1112
)
1213

@@ -38,6 +39,12 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
3839
wg.Add(1)
3940
go func() {
4041
defer wg.Done()
42+
defer func() {
43+
if err := recover(); err != nil {
44+
report.DERP.Error = xerrors.Errorf("%v", err)
45+
}
46+
}()
47+
4148
report.DERP.Run(ctx, &DERPReportOptions{
4249
DERPMap: opts.DERPMap,
4350
})
@@ -46,6 +53,12 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
4653
wg.Add(1)
4754
go func() {
4855
defer wg.Done()
56+
defer func() {
57+
if err := recover(); err != nil {
58+
report.AccessURL.Error = xerrors.Errorf("%v", err)
59+
}
60+
}()
61+
4962
report.AccessURL.Run(ctx, &AccessURLOptions{
5063
AccessURL: opts.AccessURL,
5164
Client: opts.Client,

docs/api/debug.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
4040
```json
4141
{
4242
"access_url": {
43-
"err": null,
43+
"error": null,
4444
"healthy": true,
4545
"healthzResponse": "string",
4646
"reachable": true,
4747
"statusCode": 0
4848
},
4949
"derp": {
50+
"error": null,
5051
"healthy": true,
5152
"netcheck": {
5253
"captivePortal": "string",
@@ -82,12 +83,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
8283
"netcheck_logs": ["string"],
8384
"regions": {
8485
"property1": {
86+
"error": null,
8587
"healthy": true,
8688
"node_reports": [
8789
{
8890
"can_exchange_messages": true,
8991
"client_errs": [[null]],
9092
"client_logs": [["string"]],
93+
"error": null,
9194
"healthy": true,
9295
"node": {
9396
"certName": "string",
@@ -141,12 +144,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
141144
}
142145
},
143146
"property2": {
147+
"error": null,
144148
"healthy": true,
145149
"node_reports": [
146150
{
147151
"can_exchange_messages": true,
148152
"client_errs": [[null]],
149153
"client_logs": [["string"]],
154+
"error": null,
150155
"healthy": true,
151156
"node": {
152157
"certName": "string",

0 commit comments

Comments
 (0)