Skip to content

feat(coderd/healthcheck): add access URL error codes and healthcheck doc #10915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions coderd/healthcheck/accessurl.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import (
"net/url"
"time"

"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"
)
Expand Down Expand Up @@ -44,7 +42,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
r.Dismissed = opts.Dismissed

if opts.AccessURL == nil {
r.Error = ptr.Ref("access URL is nil")
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLNotSet, "Access URL not set"))
r.Severity = health.SeverityError
return
}
Expand All @@ -56,29 +54,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)

accessURL, err := opts.AccessURL.Parse("/healthz")
if err != nil {
r.Error = convertError(xerrors.Errorf("parse healthz endpoint: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLInvalid, "parse healthz endpoint: %s", err))
r.Severity = health.SeverityError
return
}

req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
if err != nil {
r.Error = convertError(xerrors.Errorf("create healthz request: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "create healthz request: %s", err))
r.Severity = health.SeverityError
return
}

res, err := opts.Client.Do(req)
if err != nil {
r.Error = convertError(xerrors.Errorf("get healthz endpoint: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "get healthz endpoint: %s", err))
r.Severity = health.SeverityError
return
}
defer res.Body.Close()

body, err := io.ReadAll(res.Body)
if err != nil {
r.Error = convertError(xerrors.Errorf("read healthz response: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "read healthz response: %s", err))
r.Severity = health.SeverityError
return
}
Expand All @@ -88,6 +86,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
r.StatusCode = res.StatusCode
if res.StatusCode != http.StatusOK {
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, health.Messagef(health.CodeAccessURLNotOK, "/healthz did not return 200 OK"))
}
r.HealthzResponse = string(body)
}
82 changes: 57 additions & 25 deletions coderd/healthcheck/accessurl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/healthcheck/health"
)
Expand All @@ -25,12 +24,17 @@ func TestAccessURL(t *testing.T) {
var (
ctx, cancel = context.WithCancel(context.Background())
report healthcheck.AccessURLReport
client = coderdtest.New(t, nil)
resp = []byte("OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write(resp)
}))
)
defer cancel()

report.Run(ctx, &healthcheck.AccessURLReportOptions{
AccessURL: client.URL,
Client: srv.Client(),
AccessURL: mustURL(t, srv.URL),
})

assert.True(t, report.Healthy)
Expand All @@ -41,35 +45,27 @@ func TestAccessURL(t *testing.T) {
assert.Nil(t, report.Error)
})

t.Run("404", func(t *testing.T) {
t.Run("NotSet", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithCancel(context.Background())
report healthcheck.AccessURLReport
resp = []byte("NOT OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write(resp)
}))
)
defer cancel()
defer srv.Close()

u, err := url.Parse(srv.URL)
require.NoError(t, err)

report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: srv.Client(),
AccessURL: u,
Client: nil, // defaults to http.DefaultClient
AccessURL: nil,
})

assert.False(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.Equal(t, http.StatusNotFound, report.StatusCode)
assert.Equal(t, string(resp), report.HealthzResponse)
assert.Nil(t, report.Error)
assert.False(t, report.Reachable)
assert.Equal(t, health.SeverityError, report.Severity)
assert.Equal(t, 0, report.StatusCode)
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, health.CodeAccessURLNotSet)
})

t.Run("ClientErr", func(t *testing.T) {
Expand All @@ -81,7 +77,7 @@ func TestAccessURL(t *testing.T) {
resp = []byte("OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(resp)
_, _ = w.Write(resp)
}))
client = srv.Client()
)
Expand All @@ -93,12 +89,9 @@ func TestAccessURL(t *testing.T) {
return nil, expErr
})

u, err := url.Parse(srv.URL)
require.NoError(t, err)

report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: client,
AccessURL: u,
AccessURL: mustURL(t, srv.URL),
})

assert.False(t, report.Healthy)
Expand All @@ -108,6 +101,38 @@ func TestAccessURL(t *testing.T) {
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, expErr.Error())
assert.Contains(t, *report.Error, health.CodeAccessURLFetch)
})

t.Run("404", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithCancel(context.Background())
report healthcheck.AccessURLReport
resp = []byte("NOT OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
_, _ = w.Write(resp)
}))
)
defer cancel()
defer srv.Close()

report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: srv.Client(),
AccessURL: mustURL(t, srv.URL),
})

assert.False(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.Equal(t, http.StatusNotFound, report.StatusCode)
assert.Equal(t, string(resp), report.HealthzResponse)
assert.Nil(t, report.Error)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], health.CodeAccessURLNotOK)
}
})

t.Run("DismissedError", func(t *testing.T) {
Expand All @@ -133,3 +158,10 @@ type roundTripFunc func(r *http.Request) (*http.Response, error)
func (rt roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
return rt(r)
}

func mustURL(t testing.TB, s string) *url.URL {
t.Helper()
u, err := url.Parse(s)
require.NoError(t, err)
return u
}
10 changes: 6 additions & 4 deletions coderd/healthcheck/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ import (
"context"
"time"

"golang.org/x/exp/slices"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"

"golang.org/x/exp/slices"
)

const (
Expand Down Expand Up @@ -55,8 +55,9 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
for i := 0; i < pingCount; i++ {
pong, err := opts.DB.Ping(ctx)
if err != nil {
r.Error = convertError(xerrors.Errorf("ping: %w", err))
r.Error = ptr.Ref(health.Messagef(health.CodeDatabasePingFailed, "ping database: %s", err))
r.Severity = health.SeverityError

return
}
pings = append(pings, pong)
Expand All @@ -69,6 +70,7 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
r.LatencyMS = latency.Milliseconds()
if r.LatencyMS >= r.ThresholdMS {
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDatabasePingSlow, "median database ping above threshold"))
}
r.Healthy = true
r.Reachable = true
Expand Down
6 changes: 6 additions & 0 deletions coderd/healthcheck/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func TestDatabase(t *testing.T) {
require.NotNil(t, report.Error)
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
assert.Contains(t, *report.Error, err.Error())
assert.Contains(t, *report.Error, health.CodeDatabasePingFailed)
})

t.Run("DismissedError", func(t *testing.T) {
Expand All @@ -85,6 +86,7 @@ func TestDatabase(t *testing.T) {
assert.Equal(t, health.SeverityError, report.Severity)
assert.True(t, report.Dismissed)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, health.CodeDatabasePingFailed)
})

t.Run("Median", func(t *testing.T) {
Expand Down Expand Up @@ -112,6 +114,7 @@ func TestDatabase(t *testing.T) {
assert.EqualValues(t, 1, report.LatencyMS)
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
assert.Nil(t, report.Error)
assert.Empty(t, report.Warnings)
})

t.Run("Threshold", func(t *testing.T) {
Expand Down Expand Up @@ -139,5 +142,8 @@ func TestDatabase(t *testing.T) {
assert.EqualValues(t, 1000, report.LatencyMS)
assert.Equal(t, time.Second.Milliseconds(), report.ThresholdMS)
assert.Nil(t, report.Error)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], health.CodeDatabasePingSlow)
}
})
}
12 changes: 4 additions & 8 deletions coderd/healthcheck/derphealth/derp.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,7 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
r.Healthy = false
}

for _, w := range regionReport.Warnings {
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", regionReport.Region.RegionName, w))
}
r.Warnings = append(r.Warnings, regionReport.Warnings...)
mu.Unlock()
}()
}
Expand Down Expand Up @@ -202,9 +200,7 @@ func (r *RegionReport) Run(ctx context.Context) {
unhealthyNodes++
}

for _, w := range nodeReport.Warnings {
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", nodeReport.Node.Name, w))
}
r.Warnings = append(r.Warnings, nodeReport.Warnings...)
r.mu.Unlock()
}()
}
Expand All @@ -228,7 +224,7 @@ func (r *RegionReport) Run(ctx context.Context) {
} else if unhealthyNodes == 1 {
// r.Healthy = true (by default)
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPOneNodeUnhealthy, oneNodeUnhealthy))
} else if unhealthyNodes > 1 {
r.Healthy = false

Expand Down Expand Up @@ -292,7 +288,7 @@ func (r *NodeReport) Run(ctx context.Context) {
}

if r.UsesWebsocket {
r.Warnings = append(r.Warnings, warningNodeUsesWebsocket)
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPNodeUsesWebsocket, warningNodeUsesWebsocket))
r.Severity = health.SeverityWarning
}
}
Expand Down
7 changes: 6 additions & 1 deletion coderd/healthcheck/derphealth/derp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ func TestDERP(t *testing.T) {
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.True(t, report.Dismissed)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], health.CodeDERPOneNodeUnhealthy)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.True(t, region.NodeReports[0].Healthy)
Expand Down Expand Up @@ -232,7 +235,9 @@ func TestDERP(t *testing.T) {

assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.NotEmpty(t, report.Warnings)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], health.CodeDERPNodeUsesWebsocket)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.Equal(t, health.SeverityWarning, region.Severity)
Expand Down
42 changes: 42 additions & 0 deletions coderd/healthcheck/health/model.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
package health

import (
"fmt"
"strings"
)

const (
SeverityOK Severity = "ok"
SeverityWarning Severity = "warning"
SeverityError Severity = "error"

// CodeUnknown is a catch-all health code when something unexpected goes wrong (for example, a panic).
CodeUnknown Code = "EUNKNOWN"

CodeProxyUpdate Code = "EWP01"
CodeProxyFetch Code = "EWP02"
CodeProxyVersionMismatch Code = "EWP03"
CodeProxyUnhealthy Code = "EWP04"

CodeDatabasePingFailed Code = "EDB01"
CodeDatabasePingSlow Code = "EDB02"

CodeWebsocketDial Code = "EWS01"
CodeWebsocketEcho Code = "EWS02"
CodeWebsocketMsg Code = "EWS03"

CodeAccessURLNotSet Code = "EACS01"
CodeAccessURLInvalid Code = "EACS02"
CodeAccessURLFetch Code = "EACS03"
CodeAccessURLNotOK Code = "EACS04"

CodeDERPNodeUsesWebsocket Code = `EDERP01`
CodeDERPOneNodeUnhealthy Code = `EDERP02`
Comment on lines +13 to +34
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: these all have to go in here to be generated properly

)

// @typescript-generate Severity
Expand All @@ -18,3 +46,17 @@ var severityRank = map[Severity]int{
func (s Severity) Value() int {
return severityRank[s]
}

// Code is a stable identifier used to link to documentation.
// @typescript-generate Code
type Code string

// Messagef is a convenience function for formatting a healthcheck error message.
func Messagef(code Code, msg string, args ...any) string {
var sb strings.Builder
_, _ = sb.WriteString(string(code))
_, _ = sb.WriteRune(':')
_, _ = sb.WriteRune(' ')
_, _ = sb.WriteString(fmt.Sprintf(msg, args...))
return sb.String()
}
Loading