Skip to content

Commit 87e5f26

Browse files
committed
feat(coderd/healthcheck: add error codes for the rest of the owl
1 parent 2f5530f commit 87e5f26

File tree

12 files changed

+161
-54
lines changed

12 files changed

+161
-54
lines changed

coderd/healthcheck/accessurl.go

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,10 @@ import (
77
"net/url"
88
"time"
99

10-
"golang.org/x/xerrors"
11-
1210
"github.com/coder/coder/v2/coderd/healthcheck/health"
1311
"github.com/coder/coder/v2/coderd/util/ptr"
1412
)
1513

16-
var (
17-
ErrAccessURLNotSet = "EACSURL01: Access URL not set"
18-
ErrAccessURLInvalid = "EACSURL02: Access URL invalid: "
19-
ErrAccessURLFetch = "EACSURL03: Failed to fetch /healthz: "
20-
ErrAccessURLNotOK = "EACSURL04: /healthz did not return 200 OK"
21-
)
22-
2314
// @typescript-generate AccessURLReport
2415
type AccessURLReport struct {
2516
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
@@ -51,7 +42,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
5142
r.Dismissed = opts.Dismissed
5243

5344
if opts.AccessURL == nil {
54-
r.Error = ptr.Ref(ErrAccessURLNotSet)
45+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLNotSet, "Access URL not set"))
5546
r.Severity = health.SeverityError
5647
return
5748
}
@@ -63,29 +54,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
6354

6455
accessURL, err := opts.AccessURL.Parse("/healthz")
6556
if err != nil {
66-
r.Error = convertError(xerrors.Errorf(ErrAccessURLInvalid+"parse healthz endpoint: %w", err))
57+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLInvalid, "parse healthz endpoint: %s", err))
6758
r.Severity = health.SeverityError
6859
return
6960
}
7061

7162
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
7263
if err != nil {
73-
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"create healthz request: %w", err))
64+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "create healthz request: %s", err))
7465
r.Severity = health.SeverityError
7566
return
7667
}
7768

7869
res, err := opts.Client.Do(req)
7970
if err != nil {
80-
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"get healthz endpoint: %w", err))
71+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "get healthz endpoint: %s", err))
8172
r.Severity = health.SeverityError
8273
return
8374
}
8475
defer res.Body.Close()
8576

8677
body, err := io.ReadAll(res.Body)
8778
if err != nil {
88-
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"read healthz response: %w", err))
79+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "read healthz response: %s", err))
8980
r.Severity = health.SeverityError
9081
return
9182
}
@@ -95,7 +86,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
9586
r.StatusCode = res.StatusCode
9687
if res.StatusCode != http.StatusOK {
9788
r.Severity = health.SeverityWarning
98-
r.Warnings = append(r.Warnings, ErrAccessURLNotOK)
89+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeAccessURLNotOK, "/healthz did not return 200 OK"))
9990
}
10091
r.HealthzResponse = string(body)
10192
}

coderd/healthcheck/accessurl_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func TestAccessURL(t *testing.T) {
6565
assert.Equal(t, 0, report.StatusCode)
6666
assert.Equal(t, "", report.HealthzResponse)
6767
require.NotNil(t, report.Error)
68-
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLNotSet)
68+
assert.Contains(t, *report.Error, health.CodeAccessURLNotSet)
6969
})
7070

7171
t.Run("ClientErr", func(t *testing.T) {
@@ -101,7 +101,7 @@ func TestAccessURL(t *testing.T) {
101101
assert.Equal(t, "", report.HealthzResponse)
102102
require.NotNil(t, report.Error)
103103
assert.Contains(t, *report.Error, expErr.Error())
104-
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLFetch)
104+
assert.Contains(t, *report.Error, health.CodeAccessURLFetch)
105105
})
106106

107107
t.Run("404", func(t *testing.T) {
@@ -131,7 +131,7 @@ func TestAccessURL(t *testing.T) {
131131
assert.Equal(t, string(resp), report.HealthzResponse)
132132
assert.Nil(t, report.Error)
133133
if assert.NotEmpty(t, report.Warnings) {
134-
assert.Contains(t, report.Warnings[0], healthcheck.ErrAccessURLNotOK)
134+
assert.Contains(t, report.Warnings[0], health.CodeAccessURLNotOK)
135135
}
136136
})
137137

coderd/healthcheck/database.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ import (
44
"context"
55
"time"
66

7-
"golang.org/x/exp/slices"
8-
"golang.org/x/xerrors"
9-
107
"github.com/coder/coder/v2/coderd/database"
118
"github.com/coder/coder/v2/coderd/healthcheck/health"
9+
"github.com/coder/coder/v2/coderd/util/ptr"
10+
11+
"golang.org/x/exp/slices"
1212
)
1313

1414
const (
@@ -55,8 +55,9 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
5555
for i := 0; i < pingCount; i++ {
5656
pong, err := opts.DB.Ping(ctx)
5757
if err != nil {
58-
r.Error = convertError(xerrors.Errorf("ping: %w", err))
58+
r.Error = ptr.Ref(health.Messagef(health.CodeDatabasePingFailed, "ping database: %s", err))
5959
r.Severity = health.SeverityError
60+
6061
return
6162
}
6263
pings = append(pings, pong)
@@ -69,6 +70,7 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
6970
r.LatencyMS = latency.Milliseconds()
7071
if r.LatencyMS >= r.ThresholdMS {
7172
r.Severity = health.SeverityWarning
73+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDatabasePingSlow, "median database ping above threshold"))
7274
}
7375
r.Healthy = true
7476
r.Reachable = true

coderd/healthcheck/derphealth/derp.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
137137
}
138138

139139
for _, w := range regionReport.Warnings {
140-
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", regionReport.Region.RegionName, w))
140+
r.Warnings = append(r.Warnings, w)
141141
}
142142
mu.Unlock()
143143
}()
@@ -203,7 +203,7 @@ func (r *RegionReport) Run(ctx context.Context) {
203203
}
204204

205205
for _, w := range nodeReport.Warnings {
206-
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", nodeReport.Node.Name, w))
206+
r.Warnings = append(r.Warnings, w)
207207
}
208208
r.mu.Unlock()
209209
}()
@@ -228,7 +228,7 @@ func (r *RegionReport) Run(ctx context.Context) {
228228
} else if unhealthyNodes == 1 {
229229
// r.Healthy = true (by default)
230230
r.Severity = health.SeverityWarning
231-
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
231+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPOneNodeUnhealthy, oneNodeUnhealthy))
232232
} else if unhealthyNodes > 1 {
233233
r.Healthy = false
234234

@@ -292,7 +292,7 @@ func (r *NodeReport) Run(ctx context.Context) {
292292
}
293293

294294
if r.UsesWebsocket {
295-
r.Warnings = append(r.Warnings, warningNodeUsesWebsocket)
295+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPNodeUsesWebsocket, warningNodeUsesWebsocket))
296296
r.Severity = health.SeverityWarning
297297
}
298298
}

coderd/healthcheck/derphealth/derp_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ func TestDERP(t *testing.T) {
129129
assert.True(t, report.Healthy)
130130
assert.Equal(t, health.SeverityWarning, report.Severity)
131131
assert.True(t, report.Dismissed)
132+
if assert.NotEmpty(t, report.Warnings) {
133+
assert.Contains(t, report.Warnings[0], health.CodeDERPOneNodeUnhealthy)
134+
}
132135
for _, region := range report.Regions {
133136
assert.True(t, region.Healthy)
134137
assert.True(t, region.NodeReports[0].Healthy)
@@ -232,7 +235,9 @@ func TestDERP(t *testing.T) {
232235

233236
assert.True(t, report.Healthy)
234237
assert.Equal(t, health.SeverityWarning, report.Severity)
235-
assert.NotEmpty(t, report.Warnings)
238+
if assert.NotEmpty(t, report.Warnings) {
239+
assert.Contains(t, report.Warnings[0], health.CodeDERPNodeUsesWebsocket)
240+
}
236241
for _, region := range report.Regions {
237242
assert.True(t, region.Healthy)
238243
assert.Equal(t, health.SeverityWarning, region.Severity)

coderd/healthcheck/health/model.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,37 @@
11
package health
22

3+
import (
4+
"fmt"
5+
"strings"
6+
)
7+
38
const (
49
SeverityOK Severity = "ok"
510
SeverityWarning Severity = "warning"
611
SeverityError Severity = "error"
12+
13+
// CodeUnknown is a catch-all health code when something unexpected goes wrong (for example, a panic).
14+
CodeUnknown Code = "EUNKNOWN"
15+
16+
CodeProxyUpdate Code = "EWP01"
17+
CodeProxyFetch Code = "EWP02"
18+
CodeProxyVersionMismatch Code = "EWP03"
19+
CodeProxyUnhealthy Code = "EWP04"
20+
21+
CodeDatabasePingFailed Code = "EDB01"
22+
CodeDatabasePingSlow Code = "EDB02"
23+
24+
CodeWebsocketDial Code = "EWS01"
25+
CodeWebsocketEcho Code = "EWS02"
26+
CodeWebsocketMsg Code = "EWS03"
27+
28+
CodeAccessURLNotSet Code = "EACS01"
29+
CodeAccessURLInvalid Code = "EACS02"
30+
CodeAccessURLFetch Code = "EACS03"
31+
CodeAccessURLNotOK Code = "EACS04"
32+
33+
CodeDERPNodeUsesWebsocket Code = `EDERP01`
34+
CodeDERPOneNodeUnhealthy Code = `EDERP02`
735
)
836

937
// @typescript-generate Severity
@@ -18,3 +46,17 @@ var severityRank = map[Severity]int{
1846
func (s Severity) Value() int {
1947
return severityRank[s]
2048
}
49+
50+
// Code is a stable identifier used to link to documentation.
51+
// @typescript-generate Code
52+
type Code string
53+
54+
// Messagef is a convenience function for formatting a healthcheck error message.
55+
func Messagef(code Code, msg string, args ...any) string {
56+
var sb strings.Builder
57+
sb.WriteString(string(code))
58+
sb.WriteRune(':')
59+
sb.WriteRune(' ')
60+
sb.WriteString(fmt.Sprintf(msg, args...))
61+
return sb.String()
62+
}

coderd/healthcheck/healthcheck.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package healthcheck
22

33
import (
44
"context"
5-
"fmt"
65
"sync"
76
"time"
87

@@ -104,7 +103,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
104103
defer wg.Done()
105104
defer func() {
106105
if err := recover(); err != nil {
107-
report.DERP.Error = ptr.Ref(fmt.Sprint(err))
106+
report.DERP.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "derp report panic: %s", err))
108107
}
109108
}()
110109

@@ -116,7 +115,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
116115
defer wg.Done()
117116
defer func() {
118117
if err := recover(); err != nil {
119-
report.AccessURL.Error = ptr.Ref(fmt.Sprint(err))
118+
report.AccessURL.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "access url report panic: %s", err))
120119
}
121120
}()
122121

@@ -128,7 +127,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
128127
defer wg.Done()
129128
defer func() {
130129
if err := recover(); err != nil {
131-
report.Websocket.Error = ptr.Ref(fmt.Sprint(err))
130+
report.Websocket.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "websocket report panic: %s", err))
132131
}
133132
}()
134133

@@ -140,7 +139,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
140139
defer wg.Done()
141140
defer func() {
142141
if err := recover(); err != nil {
143-
report.Database.Error = ptr.Ref(fmt.Sprint(err))
142+
report.Database.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "database report panic: %s", err))
144143
}
145144
}()
146145

@@ -152,7 +151,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
152151
defer wg.Done()
153152
defer func() {
154153
if err := recover(); err != nil {
155-
report.WorkspaceProxy.Error = ptr.Ref(fmt.Sprint(err))
154+
report.WorkspaceProxy.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "proxy report panic: %s", err))
156155
}
157156
}()
158157

coderd/healthcheck/websocket.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"nhooyr.io/websocket"
1414

1515
"github.com/coder/coder/v2/coderd/healthcheck/health"
16+
"github.com/coder/coder/v2/coderd/util/ptr"
1617
)
1718

1819
// @typescript-generate WebsocketReport
@@ -75,6 +76,7 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
7576
}
7677
if err != nil {
7778
r.Error = convertError(xerrors.Errorf("websocket dial: %w", err))
79+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketDial, "websocket dial: %s", err))
7880
r.Severity = health.SeverityError
7981
return
8082
}
@@ -84,26 +86,26 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
8486
msg := strconv.Itoa(i)
8587
err := c.Write(ctx, websocket.MessageText, []byte(msg))
8688
if err != nil {
87-
r.Error = convertError(xerrors.Errorf("write message: %w", err))
89+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketEcho, "write message: %s", err))
8890
r.Severity = health.SeverityError
8991
return
9092
}
9193

9294
ty, got, err := c.Read(ctx)
9395
if err != nil {
94-
r.Error = convertError(xerrors.Errorf("read message: %w", err))
96+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketEcho, "read message: %s", err))
9597
r.Severity = health.SeverityError
9698
return
9799
}
98100

99101
if ty != websocket.MessageText {
100-
r.Error = convertError(xerrors.Errorf("received incorrect message type: %v", ty))
102+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketMsg, "received incorrect message type: %v", ty))
101103
r.Severity = health.SeverityError
102104
return
103105
}
104106

105107
if string(got) != msg {
106-
r.Error = convertError(xerrors.Errorf("received incorrect message: wanted %q, got %q", msg, string(got)))
108+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketMsg, "received incorrect message: wanted %q, got %q", msg, string(got)))
107109
r.Severity = health.SeverityError
108110
return
109111
}

coderd/healthcheck/websocket_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ func TestWebsocket(t *testing.T) {
6363
APIKey: "test",
6464
})
6565

66-
require.NotNil(t, wsReport.Error)
66+
if assert.NotNil(t, wsReport.Error) {
67+
assert.Contains(t, *wsReport.Error, health.CodeWebsocketDial)
68+
}
6769
require.Equal(t, health.SeverityError, wsReport.Severity)
6870
assert.Equal(t, wsReport.Body, "test error")
6971
assert.Equal(t, wsReport.Code, http.StatusBadRequest)

0 commit comments

Comments
 (0)