Skip to content

feat(coderd/healthcheck): add access URL error codes and healthcheck doc #10915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat(coderd/healthcheck: add error codes for the rest of the owl
  • Loading branch information
johnstcn committed Nov 30, 2023
commit 87e5f26c2cb1006f04d6cf3ffda1e58e0c093035
21 changes: 6 additions & 15 deletions coderd/healthcheck/accessurl.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,10 @@ import (
"net/url"
"time"

"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"
)

var (
ErrAccessURLNotSet = "EACSURL01: Access URL not set"
ErrAccessURLInvalid = "EACSURL02: Access URL invalid: "
ErrAccessURLFetch = "EACSURL03: Failed to fetch /healthz: "
ErrAccessURLNotOK = "EACSURL04: /healthz did not return 200 OK"
)

// @typescript-generate AccessURLReport
type AccessURLReport struct {
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Expand Down Expand Up @@ -51,7 +42,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
r.Dismissed = opts.Dismissed

if opts.AccessURL == nil {
r.Error = ptr.Ref(ErrAccessURLNotSet)
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLNotSet, "Access URL not set"))
r.Severity = health.SeverityError
return
}
Expand All @@ -63,29 +54,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)

accessURL, err := opts.AccessURL.Parse("/healthz")
if err != nil {
r.Error = convertError(xerrors.Errorf(ErrAccessURLInvalid+"parse healthz endpoint: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLInvalid, "parse healthz endpoint: %s", err))
r.Severity = health.SeverityError
return
}

req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
if err != nil {
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"create healthz request: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "create healthz request: %s", err))
r.Severity = health.SeverityError
return
}

res, err := opts.Client.Do(req)
if err != nil {
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"get healthz endpoint: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "get healthz endpoint: %s", err))
r.Severity = health.SeverityError
return
}
defer res.Body.Close()

body, err := io.ReadAll(res.Body)
if err != nil {
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"read healthz response: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "read healthz response: %s", err))
r.Severity = health.SeverityError
return
}
Expand All @@ -95,7 +86,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
r.StatusCode = res.StatusCode
if res.StatusCode != http.StatusOK {
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, ErrAccessURLNotOK)
r.Warnings = append(r.Warnings, health.Messagef(health.CodeAccessURLNotOK, "/healthz did not return 200 OK"))
}
r.HealthzResponse = string(body)
}
6 changes: 3 additions & 3 deletions coderd/healthcheck/accessurl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func TestAccessURL(t *testing.T) {
assert.Equal(t, 0, report.StatusCode)
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLNotSet)
assert.Contains(t, *report.Error, health.CodeAccessURLNotSet)
})

t.Run("ClientErr", func(t *testing.T) {
Expand Down Expand Up @@ -101,7 +101,7 @@ func TestAccessURL(t *testing.T) {
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, expErr.Error())
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLFetch)
assert.Contains(t, *report.Error, health.CodeAccessURLFetch)
})

t.Run("404", func(t *testing.T) {
Expand Down Expand Up @@ -131,7 +131,7 @@ func TestAccessURL(t *testing.T) {
assert.Equal(t, string(resp), report.HealthzResponse)
assert.Nil(t, report.Error)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], healthcheck.ErrAccessURLNotOK)
assert.Contains(t, report.Warnings[0], health.CodeAccessURLNotOK)
}
})

Expand Down
10 changes: 6 additions & 4 deletions coderd/healthcheck/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ import (
"context"
"time"

"golang.org/x/exp/slices"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"

"golang.org/x/exp/slices"
)

const (
Expand Down Expand Up @@ -55,8 +55,9 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
for i := 0; i < pingCount; i++ {
pong, err := opts.DB.Ping(ctx)
if err != nil {
r.Error = convertError(xerrors.Errorf("ping: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeDatabasePingFailed, "ping database: %s", err))
r.Severity = health.SeverityError

return
}
pings = append(pings, pong)
Expand All @@ -69,6 +70,7 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
r.LatencyMS = latency.Milliseconds()
if r.LatencyMS >= r.ThresholdMS {
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDatabasePingSlow, "median database ping above threshold"))
}
r.Healthy = true
r.Reachable = true
Expand Down
8 changes: 4 additions & 4 deletions coderd/healthcheck/derphealth/derp.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
}

for _, w := range regionReport.Warnings {
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", regionReport.Region.RegionName, w))
r.Warnings = append(r.Warnings, w)
}
mu.Unlock()
}()
Expand Down Expand Up @@ -203,7 +203,7 @@ func (r *RegionReport) Run(ctx context.Context) {
}

for _, w := range nodeReport.Warnings {
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", nodeReport.Node.Name, w))
r.Warnings = append(r.Warnings, w)
}
r.mu.Unlock()
}()
Expand All @@ -228,7 +228,7 @@ func (r *RegionReport) Run(ctx context.Context) {
} else if unhealthyNodes == 1 {
// r.Healthy = true (by default)
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPOneNodeUnhealthy, oneNodeUnhealthy))
} else if unhealthyNodes > 1 {
r.Healthy = false

Expand Down Expand Up @@ -292,7 +292,7 @@ func (r *NodeReport) Run(ctx context.Context) {
}

if r.UsesWebsocket {
r.Warnings = append(r.Warnings, warningNodeUsesWebsocket)
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPNodeUsesWebsocket, warningNodeUsesWebsocket))
r.Severity = health.SeverityWarning
}
}
Expand Down
7 changes: 6 additions & 1 deletion coderd/healthcheck/derphealth/derp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ func TestDERP(t *testing.T) {
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.True(t, report.Dismissed)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], health.CodeDERPOneNodeUnhealthy)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.True(t, region.NodeReports[0].Healthy)
Expand Down Expand Up @@ -232,7 +235,9 @@ func TestDERP(t *testing.T) {

assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.NotEmpty(t, report.Warnings)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], health.CodeDERPNodeUsesWebsocket)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.Equal(t, health.SeverityWarning, region.Severity)
Expand Down
42 changes: 42 additions & 0 deletions coderd/healthcheck/health/model.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
package health

import (
"fmt"
"strings"
)

const (
SeverityOK Severity = "ok"
SeverityWarning Severity = "warning"
SeverityError Severity = "error"

// CodeUnknown is a catch-all health code when something unexpected goes wrong (for example, a panic).
CodeUnknown Code = "EUNKNOWN"

CodeProxyUpdate Code = "EWP01"
CodeProxyFetch Code = "EWP02"
CodeProxyVersionMismatch Code = "EWP03"
CodeProxyUnhealthy Code = "EWP04"

CodeDatabasePingFailed Code = "EDB01"
CodeDatabasePingSlow Code = "EDB02"

CodeWebsocketDial Code = "EWS01"
CodeWebsocketEcho Code = "EWS02"
CodeWebsocketMsg Code = "EWS03"

CodeAccessURLNotSet Code = "EACS01"
CodeAccessURLInvalid Code = "EACS02"
CodeAccessURLFetch Code = "EACS03"
CodeAccessURLNotOK Code = "EACS04"

CodeDERPNodeUsesWebsocket Code = `EDERP01`
CodeDERPOneNodeUnhealthy Code = `EDERP02`
Comment on lines +13 to +34
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: these all have to go in here to be generated properly

)

// @typescript-generate Severity
Expand All @@ -18,3 +46,17 @@ var severityRank = map[Severity]int{
func (s Severity) Value() int {
return severityRank[s]
}

// Code is a stable identifier used to link to documentation.
// @typescript-generate Code
type Code string

// Messagef is a convenience function for formatting a healthcheck error message.
func Messagef(code Code, msg string, args ...any) string {
var sb strings.Builder
sb.WriteString(string(code))
sb.WriteRune(':')
sb.WriteRune(' ')
sb.WriteString(fmt.Sprintf(msg, args...))
return sb.String()
}
11 changes: 5 additions & 6 deletions coderd/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package healthcheck

import (
"context"
"fmt"
"sync"
"time"

Expand Down Expand Up @@ -104,7 +103,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.DERP.Error = ptr.Ref(fmt.Sprint(err))
report.DERP.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "derp report panic: %s", err))
}
}()

Expand All @@ -116,7 +115,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.AccessURL.Error = ptr.Ref(fmt.Sprint(err))
report.AccessURL.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "access url report panic: %s", err))
}
}()

Expand All @@ -128,7 +127,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.Websocket.Error = ptr.Ref(fmt.Sprint(err))
report.Websocket.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "websocket report panic: %s", err))
}
}()

Expand All @@ -140,7 +139,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.Database.Error = ptr.Ref(fmt.Sprint(err))
report.Database.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "database report panic: %s", err))
}
}()

Expand All @@ -152,7 +151,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.WorkspaceProxy.Error = ptr.Ref(fmt.Sprint(err))
report.WorkspaceProxy.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "proxy report panic: %s", err))
}
}()

Expand Down
10 changes: 6 additions & 4 deletions coderd/healthcheck/websocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"nhooyr.io/websocket"

"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"
)

// @typescript-generate WebsocketReport
Expand Down Expand Up @@ -75,6 +76,7 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
}
if err != nil {
r.Error = convertError(xerrors.Errorf("websocket dial: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketDial, "websocket dial: %s", err))
r.Severity = health.SeverityError
return
}
Expand All @@ -84,26 +86,26 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
msg := strconv.Itoa(i)
err := c.Write(ctx, websocket.MessageText, []byte(msg))
if err != nil {
r.Error = convertError(xerrors.Errorf("write message: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketEcho, "write message: %s", err))
r.Severity = health.SeverityError
return
}

ty, got, err := c.Read(ctx)
if err != nil {
r.Error = convertError(xerrors.Errorf("read message: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketEcho, "read message: %s", err))
r.Severity = health.SeverityError
return
}

if ty != websocket.MessageText {
r.Error = convertError(xerrors.Errorf("received incorrect message type: %v", ty))
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketMsg, "received incorrect message type: %v", ty))
r.Severity = health.SeverityError
return
}

if string(got) != msg {
r.Error = convertError(xerrors.Errorf("received incorrect message: wanted %q, got %q", msg, string(got)))
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketMsg, "received incorrect message: wanted %q, got %q", msg, string(got)))
r.Severity = health.SeverityError
return
}
Expand Down
4 changes: 3 additions & 1 deletion coderd/healthcheck/websocket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ func TestWebsocket(t *testing.T) {
APIKey: "test",
})

require.NotNil(t, wsReport.Error)
if assert.NotNil(t, wsReport.Error) {
assert.Contains(t, *wsReport.Error, health.CodeWebsocketDial)
}
require.Equal(t, health.SeverityError, wsReport.Severity)
assert.Equal(t, wsReport.Body, "test error")
assert.Equal(t, wsReport.Code, http.StatusBadRequest)
Expand Down
Loading