Skip to content

feat(coderd/healthcheck): add access URL error codes and healthcheck doc #10915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(coderd/healthcheck): add access URL error codes and healthcheck doc
  • Loading branch information
johnstcn committed Nov 30, 2023
commit 2f5530f3cab0b2b9ec31dd039dbd9e83d48c36a3
18 changes: 13 additions & 5 deletions coderd/healthcheck/accessurl.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ import (
"github.com/coder/coder/v2/coderd/util/ptr"
)

var (
ErrAccessURLNotSet = "EACSURL01: Access URL not set"
ErrAccessURLInvalid = "EACSURL02: Access URL invalid: "
ErrAccessURLFetch = "EACSURL03: Failed to fetch /healthz: "
ErrAccessURLNotOK = "EACSURL04: /healthz did not return 200 OK"
)

// @typescript-generate AccessURLReport
type AccessURLReport struct {
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Expand Down Expand Up @@ -44,7 +51,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
r.Dismissed = opts.Dismissed

if opts.AccessURL == nil {
r.Error = ptr.Ref("access URL is nil")
r.Error = ptr.Ref(ErrAccessURLNotSet)
r.Severity = health.SeverityError
return
}
Expand All @@ -56,29 +63,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)

accessURL, err := opts.AccessURL.Parse("/healthz")
if err != nil {
r.Error = convertError(xerrors.Errorf("parse healthz endpoint: %w", err))
r.Error = convertError(xerrors.Errorf(ErrAccessURLInvalid+"parse healthz endpoint: %w", err))
r.Severity = health.SeverityError
return
}

req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
if err != nil {
r.Error = convertError(xerrors.Errorf("create healthz request: %w", err))
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"create healthz request: %w", err))
r.Severity = health.SeverityError
return
}

res, err := opts.Client.Do(req)
if err != nil {
r.Error = convertError(xerrors.Errorf("get healthz endpoint: %w", err))
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"get healthz endpoint: %w", err))
r.Severity = health.SeverityError
return
}
defer res.Body.Close()

body, err := io.ReadAll(res.Body)
if err != nil {
r.Error = convertError(xerrors.Errorf("read healthz response: %w", err))
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"read healthz response: %w", err))
r.Severity = health.SeverityError
return
}
Expand All @@ -88,6 +95,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
r.StatusCode = res.StatusCode
if res.StatusCode != http.StatusOK {
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, ErrAccessURLNotOK)
}
r.HealthzResponse = string(body)
}
82 changes: 57 additions & 25 deletions coderd/healthcheck/accessurl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/healthcheck/health"
)
Expand All @@ -25,12 +24,17 @@ func TestAccessURL(t *testing.T) {
var (
ctx, cancel = context.WithCancel(context.Background())
report healthcheck.AccessURLReport
client = coderdtest.New(t, nil)
resp = []byte("OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write(resp)
}))
)
defer cancel()

report.Run(ctx, &healthcheck.AccessURLReportOptions{
AccessURL: client.URL,
Client: srv.Client(),
AccessURL: mustURL(t, srv.URL),
})

assert.True(t, report.Healthy)
Expand All @@ -41,35 +45,27 @@ func TestAccessURL(t *testing.T) {
assert.Nil(t, report.Error)
})

t.Run("404", func(t *testing.T) {
t.Run("NotSet", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithCancel(context.Background())
report healthcheck.AccessURLReport
resp = []byte("NOT OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write(resp)
}))
)
defer cancel()
defer srv.Close()

u, err := url.Parse(srv.URL)
require.NoError(t, err)

report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: srv.Client(),
AccessURL: u,
Client: nil, // defaults to http.DefaultClient
AccessURL: nil,
})

assert.False(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.Equal(t, http.StatusNotFound, report.StatusCode)
assert.Equal(t, string(resp), report.HealthzResponse)
assert.Nil(t, report.Error)
assert.False(t, report.Reachable)
assert.Equal(t, health.SeverityError, report.Severity)
assert.Equal(t, 0, report.StatusCode)
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLNotSet)
})

t.Run("ClientErr", func(t *testing.T) {
Expand All @@ -81,7 +77,7 @@ func TestAccessURL(t *testing.T) {
resp = []byte("OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(resp)
_, _ = w.Write(resp)
}))
client = srv.Client()
)
Expand All @@ -93,12 +89,9 @@ func TestAccessURL(t *testing.T) {
return nil, expErr
})

u, err := url.Parse(srv.URL)
require.NoError(t, err)

report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: client,
AccessURL: u,
AccessURL: mustURL(t, srv.URL),
})

assert.False(t, report.Healthy)
Expand All @@ -108,6 +101,38 @@ func TestAccessURL(t *testing.T) {
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)
assert.Contains(t, *report.Error, expErr.Error())
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLFetch)
})

t.Run("404", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithCancel(context.Background())
report healthcheck.AccessURLReport
resp = []byte("NOT OK")
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
_, _ = w.Write(resp)
}))
)
defer cancel()
defer srv.Close()

report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: srv.Client(),
AccessURL: mustURL(t, srv.URL),
})

assert.False(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.Equal(t, http.StatusNotFound, report.StatusCode)
assert.Equal(t, string(resp), report.HealthzResponse)
assert.Nil(t, report.Error)
if assert.NotEmpty(t, report.Warnings) {
assert.Contains(t, report.Warnings[0], healthcheck.ErrAccessURLNotOK)
}
})

t.Run("DismissedError", func(t *testing.T) {
Expand All @@ -133,3 +158,10 @@ type roundTripFunc func(r *http.Request) (*http.Response, error)
func (rt roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
return rt(r)
}

func mustURL(t testing.TB, s string) *url.URL {
t.Helper()
u, err := url.Parse(s)
require.NoError(t, err)
return u
}
79 changes: 79 additions & 0 deletions docs/admin/healthcheck.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Deployment Health

Coder includes an operator-friendly deployment health page that provides a
number of details about the health of your Coder deployment.

You can view it at `https://${CODER_URL}/health`, or you can alternatively view
the [JSON response directly](../api/debug.md#debug-info-deployment-health).

The deployment health page is broken up into the following sections:

## Access URL

The Access URL section shows checks related to Coder's
[access URL](./configure.md#access-url).

Coder will periodically send a GET request to `${CODER_ACCESS_URL}/healthz` and
validate that the response is `200 OK`.

If there is an issue, you may see one of the following errors reported:

### <a name="EACSURL01">EACSURL01: Access URL not set</a>

**Problem:** no access URL has been configured.

**Solution:** configure an [access URL](./configure.md#access-url) for Coder.

### <a name="EACSURL02">EACSURL02: Access URL invalid</a>

**Problem:** `${CODER_ACCESS_URL}/healthz` is not a valid URL.

**Solution:** Ensure that the access URL is a valid URL accepted by
[`url.Parse`](https://pkg.go.dev/net/url#Parse).

### <a name="EACSURL03">EACSURL03: Failed to fetch /healthz</a>

**Problem:** Coder was unable to execute a GET request to
`${CODER_ACCESS_URL}/healthz`.

This could be due to a number of reasons, including but not limited to:

- DNS lookup failure
- A misconfigured firewall
- A misconfigured reverse proxy
- Invalid or expired SSL certificates

**Solution:** Investigate and resolve the root cause of the connection issue.

To troubleshoot further, you can log into the machine running Coder and attempt
to run the following command:

```shell
curl -v ${CODER_ACCESS_URL}
```

The output of this command should aid further diagnosis.

### <a name="EACSURL04">EACSURL04: /healthz did not return 200 OK</a>

**Problem:** Coder was able to execute a GET request to
`${CODER_ACCESS_URL}/healthz`, but the response code was not `200 OK` as
expected.

This could mean, for instance, that:

- The request did not actually hit your Coder instance (potentially an incorrect
DNS entry)
- The request hit your Coder instance, but on an unexpected path (potentially a
misconfigured reverse proxy)

**Solution:** Inspect the `HealthzResponse` in the health check output. This
should give you a good indication of the root cause.

## Database

## DERP

## Websocket

## Workspace Proxy