Skip to content

Commit ca13285

Browse files
committed
feat(coderd): add DERP healthcheck
1 parent ca4fa81 commit ca13285

File tree

11 files changed

+814
-7
lines changed

11 files changed

+814
-7
lines changed

coderd/coderd.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"tailscale.com/derp/derphttp"
3434
"tailscale.com/tailcfg"
3535
"tailscale.com/types/key"
36+
"tailscale.com/util/singleflight"
3637

3738
"cdr.dev/slog"
3839
"github.com/coder/coder/buildinfo"
@@ -46,6 +47,7 @@ import (
4647
"github.com/coder/coder/coderd/database/dbtype"
4748
"github.com/coder/coder/coderd/gitauth"
4849
"github.com/coder/coder/coderd/gitsshkey"
50+
"github.com/coder/coder/coderd/healthcheck"
4951
"github.com/coder/coder/coderd/httpapi"
5052
"github.com/coder/coder/coderd/httpmw"
5153
"github.com/coder/coder/coderd/metricscache"
@@ -123,7 +125,10 @@ type Options struct {
123125
TemplateScheduleStore schedule.TemplateScheduleStore
124126
// AppSigningKey denotes the symmetric key to use for signing app tickets.
125127
// The key must be 64 bytes long.
126-
AppSigningKey []byte
128+
AppSigningKey []byte
129+
HealthcheckFunc func(ctx context.Context) (*healthcheck.Report, error)
130+
HealthcheckTimeout time.Duration
131+
HealthcheckRefresh time.Duration
127132

128133
// APIRateLimit is the minutely throughput rate limit per user or ip.
129134
// Setting a rate limit <0 will disable the rate limiter across the entire
@@ -235,6 +240,19 @@ func New(options *Options) *API {
235240
if len(options.AppSigningKey) != 64 {
236241
panic("coderd: AppSigningKey must be 64 bytes long")
237242
}
243+
if options.HealthcheckFunc == nil {
244+
options.HealthcheckFunc = func(ctx context.Context) (*healthcheck.Report, error) {
245+
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
246+
DERPMap: options.DERPMap.Clone(),
247+
})
248+
}
249+
}
250+
if options.HealthcheckTimeout == 0 {
251+
options.HealthcheckTimeout = 30 * time.Second
252+
}
253+
if options.HealthcheckRefresh == 0 {
254+
options.HealthcheckRefresh = 10 * time.Minute
255+
}
238256

239257
siteCacheDir := options.CacheDir
240258
if siteCacheDir != "" {
@@ -293,6 +311,7 @@ func New(options *Options) *API {
293311
Auditor: atomic.Pointer[audit.Auditor]{},
294312
TemplateScheduleStore: atomic.Pointer[schedule.TemplateScheduleStore]{},
295313
Experiments: experiments,
314+
healthCheckGroup: &singleflight.Group[string, *healthcheck.Report]{},
296315
}
297316
if options.UpdateCheckOptions != nil {
298317
api.updateChecker = updatecheck.New(
@@ -718,6 +737,7 @@ func New(options *Options) *API {
718737
)
719738

720739
r.Get("/coordinator", api.debugCoordinator)
740+
r.Get("/health", api.debugDeploymentHealth)
721741
})
722742
})
723743

@@ -773,6 +793,8 @@ type API struct {
773793
// Experiments contains the list of experiments currently enabled.
774794
// This is used to gate features that are not yet ready for production.
775795
Experiments codersdk.Experiments
796+
797+
healthCheckGroup *singleflight.Group[string, *healthcheck.Report]
776798
}
777799

778800
// Close waits for all WebSocket connections to drain before returning.

coderd/coderdtest/coderdtest.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ import (
6060
"github.com/coder/coder/coderd/database/dbtestutil"
6161
"github.com/coder/coder/coderd/gitauth"
6262
"github.com/coder/coder/coderd/gitsshkey"
63+
"github.com/coder/coder/coderd/healthcheck"
6364
"github.com/coder/coder/coderd/httpapi"
6465
"github.com/coder/coder/coderd/httpmw"
6566
"github.com/coder/coder/coderd/rbac"
@@ -105,6 +106,10 @@ type Options struct {
105106
TrialGenerator func(context.Context, string) error
106107
TemplateScheduleStore schedule.TemplateScheduleStore
107108

109+
HealthcheckFunc func(ctx context.Context) (*healthcheck.Report, error)
110+
HealthcheckTimeout time.Duration
111+
HealthcheckRefresh time.Duration
112+
108113
// All rate limits default to -1 (unlimited) in tests if not set.
109114
APIRateLimit int
110115
LoginRateLimit int
@@ -335,6 +340,9 @@ func NewOptions(t *testing.T, options *Options) (func(http.Handler), context.Can
335340
SwaggerEndpoint: options.SwaggerEndpoint,
336341
AppSigningKey: AppSigningKey,
337342
SSHConfig: options.ConfigSSH,
343+
HealthcheckFunc: options.HealthcheckFunc,
344+
HealthcheckTimeout: options.HealthcheckTimeout,
345+
HealthcheckRefresh: options.HealthcheckRefresh,
338346
}
339347
}
340348

coderd/debug.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
package coderd
22

3-
import "net/http"
3+
import (
4+
"context"
5+
"net/http"
6+
"time"
7+
8+
"github.com/coder/coder/coderd/healthcheck"
9+
"github.com/coder/coder/coderd/httpapi"
10+
"github.com/coder/coder/codersdk"
11+
)
412

513
// @Summary Debug Info Wireguard Coordinator
614
// @ID debug-info-wireguard-coordinator
@@ -12,3 +20,37 @@ import "net/http"
1220
func (api *API) debugCoordinator(rw http.ResponseWriter, r *http.Request) {
1321
(*api.TailnetCoordinator.Load()).ServeHTTPDebug(rw, r)
1422
}
23+
24+
// @Summary Debug Info Deployment Health
25+
// @ID debug-info-deployment-health
26+
// @Security CoderSessionToken
27+
// @Produce text/html
28+
// @Produce json
29+
// @Tags Debug
30+
// @Success 200
31+
// @Router /debug/health [get]
32+
func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
33+
ctx, cancel := context.WithTimeout(r.Context(), api.HealthcheckTimeout)
34+
defer cancel()
35+
36+
resChan := api.healthCheckGroup.DoChan("", func() (*healthcheck.Report, error) {
37+
return api.HealthcheckFunc(ctx)
38+
})
39+
40+
select {
41+
case <-ctx.Done():
42+
httpapi.Write(ctx, rw, http.StatusNotFound, codersdk.Response{
43+
Message: "Healthcheck is in progress and did not complete in time. Try again in a few seconds.",
44+
})
45+
return
46+
case res := <-resChan:
47+
if time.Since(res.Val.Time) > api.HealthcheckRefresh {
48+
api.healthCheckGroup.Forget("")
49+
api.debugDeploymentHealth(rw, r)
50+
return
51+
}
52+
53+
httpapi.Write(ctx, rw, http.StatusOK, res.Val)
54+
return
55+
}
56+
}

coderd/debug_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package coderd_test
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"net/http/httputil"
8+
"testing"
9+
"time"
10+
11+
"github.com/stretchr/testify/require"
12+
13+
"github.com/coder/coder/coderd/coderdtest"
14+
"github.com/coder/coder/coderd/healthcheck"
15+
"github.com/coder/coder/testutil"
16+
)
17+
18+
func TestDebug(t *testing.T) {
19+
t.Parallel()
20+
t.Run("Health/OK", func(t *testing.T) {
21+
t.Parallel()
22+
23+
var (
24+
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
25+
client = coderdtest.New(t, &coderdtest.Options{
26+
HealthcheckFunc: func(context.Context) (*healthcheck.Report, error) {
27+
return &healthcheck.Report{}, nil
28+
},
29+
})
30+
_ = coderdtest.CreateFirstUser(t, client)
31+
)
32+
defer cancel()
33+
34+
res, err := client.Request(ctx, "GET", "/debug/health", nil)
35+
require.NoError(t, err)
36+
defer res.Body.Close()
37+
require.Equal(t, http.StatusOK, res.StatusCode)
38+
39+
})
40+
41+
t.Run("Health/Timeout", func(t *testing.T) {
42+
t.Parallel()
43+
44+
var (
45+
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
46+
client = coderdtest.New(t, &coderdtest.Options{
47+
HealthcheckTimeout: time.Microsecond,
48+
HealthcheckFunc: func(context.Context) (*healthcheck.Report, error) {
49+
t := time.NewTimer(time.Second)
50+
defer t.Stop()
51+
52+
select {
53+
case <-ctx.Done():
54+
return nil, ctx.Err()
55+
case <-t.C:
56+
return &healthcheck.Report{}, nil
57+
}
58+
},
59+
})
60+
_ = coderdtest.CreateFirstUser(t, client)
61+
)
62+
defer cancel()
63+
64+
res, err := client.Request(ctx, "GET", "/api/v2/debug/health", nil)
65+
require.NoError(t, err)
66+
defer res.Body.Close()
67+
dump, _ := httputil.DumpResponse(res, true)
68+
fmt.Println(string(dump))
69+
require.Equal(t, http.StatusNotFound, res.StatusCode)
70+
})
71+
}

coderd/healthcheck/accessurl.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package healthcheck
2+
3+
import (
4+
"context"
5+
"io"
6+
"net/http"
7+
"net/url"
8+
9+
"golang.org/x/xerrors"
10+
)
11+
12+
type AccessURLReport struct {
13+
Reachable bool
14+
StatusCode int
15+
HealthzResponse string
16+
Err error
17+
}
18+
19+
func (r *AccessURLReport) Run(ctx context.Context, accessURL *url.URL) {
20+
accessURL, err := accessURL.Parse("/healthz")
21+
if err != nil {
22+
r.Err = xerrors.Errorf("parse healthz endpoint: %w", err)
23+
return
24+
}
25+
26+
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
27+
if err != nil {
28+
r.Err = xerrors.Errorf("create healthz request: %w", err)
29+
return
30+
}
31+
32+
res, err := http.DefaultClient.Do(req)
33+
if err != nil {
34+
r.Err = xerrors.Errorf("get healthz endpoint: %w", err)
35+
return
36+
}
37+
defer res.Body.Close()
38+
39+
body, err := io.ReadAll(res.Body)
40+
if err != nil {
41+
r.Err = xerrors.Errorf("read healthz response: %w", err)
42+
return
43+
}
44+
45+
r.Reachable = true
46+
r.StatusCode = res.StatusCode
47+
r.HealthzResponse = string(body)
48+
}

0 commit comments

Comments
 (0)