Skip to content

Commit 2f5530f

Browse files
committed
feat(coderd/healthcheck): add access URL error codes and healthcheck doc
1 parent 5b2f436 commit 2f5530f

File tree

3 files changed

+149
-30
lines changed

3 files changed

+149
-30
lines changed

coderd/healthcheck/accessurl.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ import (
1313
"github.com/coder/coder/v2/coderd/util/ptr"
1414
)
1515

16+
var (
17+
ErrAccessURLNotSet = "EACSURL01: Access URL not set"
18+
ErrAccessURLInvalid = "EACSURL02: Access URL invalid: "
19+
ErrAccessURLFetch = "EACSURL03: Failed to fetch /healthz: "
20+
ErrAccessURLNotOK = "EACSURL04: /healthz did not return 200 OK"
21+
)
22+
1623
// @typescript-generate AccessURLReport
1724
type AccessURLReport struct {
1825
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
@@ -44,7 +51,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
4451
r.Dismissed = opts.Dismissed
4552

4653
if opts.AccessURL == nil {
47-
r.Error = ptr.Ref("access URL is nil")
54+
r.Error = ptr.Ref(ErrAccessURLNotSet)
4855
r.Severity = health.SeverityError
4956
return
5057
}
@@ -56,29 +63,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
5663

5764
accessURL, err := opts.AccessURL.Parse("/healthz")
5865
if err != nil {
59-
r.Error = convertError(xerrors.Errorf("parse healthz endpoint: %w", err))
66+
r.Error = convertError(xerrors.Errorf(ErrAccessURLInvalid+"parse healthz endpoint: %w", err))
6067
r.Severity = health.SeverityError
6168
return
6269
}
6370

6471
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
6572
if err != nil {
66-
r.Error = convertError(xerrors.Errorf("create healthz request: %w", err))
73+
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"create healthz request: %w", err))
6774
r.Severity = health.SeverityError
6875
return
6976
}
7077

7178
res, err := opts.Client.Do(req)
7279
if err != nil {
73-
r.Error = convertError(xerrors.Errorf("get healthz endpoint: %w", err))
80+
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"get healthz endpoint: %w", err))
7481
r.Severity = health.SeverityError
7582
return
7683
}
7784
defer res.Body.Close()
7885

7986
body, err := io.ReadAll(res.Body)
8087
if err != nil {
81-
r.Error = convertError(xerrors.Errorf("read healthz response: %w", err))
88+
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"read healthz response: %w", err))
8289
r.Severity = health.SeverityError
8390
return
8491
}
@@ -88,6 +95,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
8895
r.StatusCode = res.StatusCode
8996
if res.StatusCode != http.StatusOK {
9097
r.Severity = health.SeverityWarning
98+
r.Warnings = append(r.Warnings, ErrAccessURLNotOK)
9199
}
92100
r.HealthzResponse = string(body)
93101
}

coderd/healthcheck/accessurl_test.go

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"github.com/stretchr/testify/require"
1212
"golang.org/x/xerrors"
1313

14-
"github.com/coder/coder/v2/coderd/coderdtest"
1514
"github.com/coder/coder/v2/coderd/healthcheck"
1615
"github.com/coder/coder/v2/coderd/healthcheck/health"
1716
)
@@ -25,12 +24,17 @@ func TestAccessURL(t *testing.T) {
2524
var (
2625
ctx, cancel = context.WithCancel(context.Background())
2726
report healthcheck.AccessURLReport
28-
client = coderdtest.New(t, nil)
27+
resp = []byte("OK")
28+
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
29+
w.WriteHeader(http.StatusOK)
30+
_, _ = w.Write(resp)
31+
}))
2932
)
3033
defer cancel()
3134

3235
report.Run(ctx, &healthcheck.AccessURLReportOptions{
33-
AccessURL: client.URL,
36+
Client: srv.Client(),
37+
AccessURL: mustURL(t, srv.URL),
3438
})
3539

3640
assert.True(t, report.Healthy)
@@ -41,35 +45,27 @@ func TestAccessURL(t *testing.T) {
4145
assert.Nil(t, report.Error)
4246
})
4347

44-
t.Run("404", func(t *testing.T) {
48+
t.Run("NotSet", func(t *testing.T) {
4549
t.Parallel()
4650

4751
var (
4852
ctx, cancel = context.WithCancel(context.Background())
4953
report healthcheck.AccessURLReport
50-
resp = []byte("NOT OK")
51-
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
52-
w.WriteHeader(http.StatusNotFound)
53-
w.Write(resp)
54-
}))
5554
)
5655
defer cancel()
57-
defer srv.Close()
58-
59-
u, err := url.Parse(srv.URL)
60-
require.NoError(t, err)
6156

6257
report.Run(ctx, &healthcheck.AccessURLReportOptions{
63-
Client: srv.Client(),
64-
AccessURL: u,
58+
Client: nil, // defaults to http.DefaultClient
59+
AccessURL: nil,
6560
})
6661

6762
assert.False(t, report.Healthy)
68-
assert.True(t, report.Reachable)
69-
assert.Equal(t, health.SeverityWarning, report.Severity)
70-
assert.Equal(t, http.StatusNotFound, report.StatusCode)
71-
assert.Equal(t, string(resp), report.HealthzResponse)
72-
assert.Nil(t, report.Error)
63+
assert.False(t, report.Reachable)
64+
assert.Equal(t, health.SeverityError, report.Severity)
65+
assert.Equal(t, 0, report.StatusCode)
66+
assert.Equal(t, "", report.HealthzResponse)
67+
require.NotNil(t, report.Error)
68+
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLNotSet)
7369
})
7470

7571
t.Run("ClientErr", func(t *testing.T) {
@@ -81,7 +77,7 @@ func TestAccessURL(t *testing.T) {
8177
resp = []byte("OK")
8278
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
8379
w.WriteHeader(http.StatusOK)
84-
w.Write(resp)
80+
_, _ = w.Write(resp)
8581
}))
8682
client = srv.Client()
8783
)
@@ -93,12 +89,9 @@ func TestAccessURL(t *testing.T) {
9389
return nil, expErr
9490
})
9591

96-
u, err := url.Parse(srv.URL)
97-
require.NoError(t, err)
98-
9992
report.Run(ctx, &healthcheck.AccessURLReportOptions{
10093
Client: client,
101-
AccessURL: u,
94+
AccessURL: mustURL(t, srv.URL),
10295
})
10396

10497
assert.False(t, report.Healthy)
@@ -108,6 +101,38 @@ func TestAccessURL(t *testing.T) {
108101
assert.Equal(t, "", report.HealthzResponse)
109102
require.NotNil(t, report.Error)
110103
assert.Contains(t, *report.Error, expErr.Error())
104+
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLFetch)
105+
})
106+
107+
t.Run("404", func(t *testing.T) {
108+
t.Parallel()
109+
110+
var (
111+
ctx, cancel = context.WithCancel(context.Background())
112+
report healthcheck.AccessURLReport
113+
resp = []byte("NOT OK")
114+
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
115+
w.WriteHeader(http.StatusNotFound)
116+
_, _ = w.Write(resp)
117+
}))
118+
)
119+
defer cancel()
120+
defer srv.Close()
121+
122+
report.Run(ctx, &healthcheck.AccessURLReportOptions{
123+
Client: srv.Client(),
124+
AccessURL: mustURL(t, srv.URL),
125+
})
126+
127+
assert.False(t, report.Healthy)
128+
assert.True(t, report.Reachable)
129+
assert.Equal(t, health.SeverityWarning, report.Severity)
130+
assert.Equal(t, http.StatusNotFound, report.StatusCode)
131+
assert.Equal(t, string(resp), report.HealthzResponse)
132+
assert.Nil(t, report.Error)
133+
if assert.NotEmpty(t, report.Warnings) {
134+
assert.Contains(t, report.Warnings[0], healthcheck.ErrAccessURLNotOK)
135+
}
111136
})
112137

113138
t.Run("DismissedError", func(t *testing.T) {
@@ -133,3 +158,10 @@ type roundTripFunc func(r *http.Request) (*http.Response, error)
133158
func (rt roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
134159
return rt(r)
135160
}
161+
162+
func mustURL(t testing.TB, s string) *url.URL {
163+
t.Helper()
164+
u, err := url.Parse(s)
165+
require.NoError(t, err)
166+
return u
167+
}

docs/admin/healthcheck.md

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Deployment Health
2+
3+
Coder includes an operator-friendly deployment health page that provides a
4+
number of details about the health of your Coder deployment.
5+
6+
You can view it at `https://${CODER_URL}/health`, or you can alternatively view
7+
the [JSON response directly](../api/debug.md#debug-info-deployment-health).
8+
9+
The deployment health page is broken up into the following sections:
10+
11+
## Access URL
12+
13+
The Access URL section shows checks related to Coder's
14+
[access URL](./configure.md#access-url).
15+
16+
Coder will periodically send a GET request to `${CODER_ACCESS_URL}/healthz` and
17+
validate that the response is `200 OK`.
18+
19+
If there is an issue, you may see one of the following errors reported:
20+
21+
### <a name="EACSURL01">EACSURL01: Access URL not set</a>
22+
23+
**Problem:** no access URL has been configured.
24+
25+
**Solution:** configure an [access URL](./configure.md#access-url) for Coder.
26+
27+
### <a name="EACSURL02">EACSURL02: Access URL invalid</a>
28+
29+
**Problem:** `${CODER_ACCESS_URL}/healthz` is not a valid URL.
30+
31+
**Solution:** Ensure that the access URL is a valid URL accepted by
32+
[`url.Parse`](https://pkg.go.dev/net/url#Parse).
33+
34+
### <a name="EACSURL03">EACSURL03: Failed to fetch /healthz</a>
35+
36+
**Problem:** Coder was unable to execute a GET request to
37+
`${CODER_ACCESS_URL}/healthz`.
38+
39+
This could be due to a number of reasons, including but not limited to:
40+
41+
- DNS lookup failure
42+
- A misconfigured firewall
43+
- A misconfigured reverse proxy
44+
- Invalid or expired SSL certificates
45+
46+
**Solution:** Investigate and resolve the root cause of the connection issue.
47+
48+
To troubleshoot further, you can log into the machine running Coder and attempt
49+
to run the following command:
50+
51+
```shell
52+
curl -v ${CODER_ACCESS_URL}
53+
```
54+
55+
The output of this command should aid further diagnosis.
56+
57+
### <a name="EACSURL04">EACSURL04: /healthz did not return 200 OK</a>
58+
59+
**Problem:** Coder was able to execute a GET request to
60+
`${CODER_ACCESS_URL}/healthz`, but the response code was not `200 OK` as
61+
expected.
62+
63+
This could mean, for instance, that:
64+
65+
- The request did not actually hit your Coder instance (potentially an incorrect
66+
DNS entry)
67+
- The request hit your Coder instance, but on an unexpected path (potentially a
68+
misconfigured reverse proxy)
69+
70+
**Solution:** Inspect the `HealthzResponse` in the health check output. This
71+
should give you a good indication of the root cause.
72+
73+
## Database
74+
75+
## DERP
76+
77+
## Websocket
78+
79+
## Workspace Proxy

0 commit comments

Comments
 (0)