From 2ef5eeeaf61072eb05c12942c9e87f97958d782f Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Thu, 1 Jun 2023 21:34:35 +0000 Subject: [PATCH 1/2] feat(healthcheck): add failing sections to report --- coderd/coderd.go | 4 +- coderd/coderdtest/coderdtest.go | 2 +- coderd/debug.go | 8 +- coderd/debug_test.go | 14 +-- coderd/healthcheck/derp.go | 10 +-- coderd/healthcheck/healthcheck.go | 68 +++++++++++--- coderd/healthcheck/healthcheck_test.go | 120 +++++++++++++++++++++++++ 7 files changed, 195 insertions(+), 31 deletions(-) create mode 100644 coderd/healthcheck/healthcheck_test.go diff --git a/coderd/coderd.go b/coderd/coderd.go index 7319fee6aa70e..4360939414384 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -130,7 +130,7 @@ type Options struct { // AppSecurityKey is the crypto key used to sign and encrypt tokens related to // workspace applications. It consists of both a signing and encryption key. AppSecurityKey workspaceapps.SecurityKey - HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error) + HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report HealthcheckTimeout time.Duration HealthcheckRefresh time.Duration @@ -266,7 +266,7 @@ func New(options *Options) *API { options.TemplateScheduleStore.Store(&v) } if options.HealthcheckFunc == nil { - options.HealthcheckFunc = func(ctx context.Context, apiKey string) (*healthcheck.Report, error) { + options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ AccessURL: options.AccessURL, DERPMap: options.DERPMap.Clone(), diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 879acc737e75b..c058925f99403 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -107,7 +107,7 @@ type Options struct { TrialGenerator func(context.Context, string) error TemplateScheduleStore schedule.TemplateScheduleStore - HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error) + HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report HealthcheckTimeout time.Duration HealthcheckRefresh time.Duration diff --git a/coderd/debug.go b/coderd/debug.go index 8fef634b5aacf..045cb0076c953 100644 --- a/coderd/debug.go +++ b/coderd/debug.go @@ -47,11 +47,9 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) { ctx, cancel := context.WithTimeout(context.Background(), api.HealthcheckTimeout) defer cancel() - report, err := api.HealthcheckFunc(ctx, apiKey) - if err == nil { - api.healthCheckCache.Store(report) - } - return report, err + report := api.HealthcheckFunc(ctx, apiKey) + api.healthCheckCache.Store(report) + return report, nil }) select { diff --git a/coderd/debug_test.go b/coderd/debug_test.go index 9742ce959834b..2cff8f176c4c3 100644 --- a/coderd/debug_test.go +++ b/coderd/debug_test.go @@ -24,9 +24,9 @@ func TestDebugHealth(t *testing.T) { ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) sessionToken string client = coderdtest.New(t, &coderdtest.Options{ - HealthcheckFunc: func(_ context.Context, apiKey string) (*healthcheck.Report, error) { + HealthcheckFunc: func(_ context.Context, apiKey string) *healthcheck.Report { assert.Equal(t, sessionToken, apiKey) - return &healthcheck.Report{}, nil + return &healthcheck.Report{} }, }) _ = coderdtest.CreateFirstUser(t, client) @@ -48,15 +48,15 @@ func TestDebugHealth(t *testing.T) { ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) client = coderdtest.New(t, &coderdtest.Options{ HealthcheckTimeout: time.Microsecond, - HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) { + HealthcheckFunc: func(context.Context, string) *healthcheck.Report { t := time.NewTimer(time.Second) defer t.Stop() select { case <-ctx.Done(): - return nil, ctx.Err() + return &healthcheck.Report{} case <-t.C: - return &healthcheck.Report{}, nil + return &healthcheck.Report{} } }, }) @@ -80,11 +80,11 @@ func TestDebugHealth(t *testing.T) { client = coderdtest.New(t, &coderdtest.Options{ HealthcheckRefresh: time.Hour, HealthcheckTimeout: time.Hour, - HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) { + HealthcheckFunc: func(context.Context, string) *healthcheck.Report { calls++ return &healthcheck.Report{ Time: time.Now(), - }, nil + } }, }) _ = coderdtest.CreateFirstUser(t, client) diff --git a/coderd/healthcheck/derp.go b/coderd/healthcheck/derp.go index 0e7c66f474113..7a5e8bbf550e1 100644 --- a/coderd/healthcheck/derp.go +++ b/coderd/healthcheck/derp.go @@ -25,7 +25,6 @@ import ( ) type DERPReport struct { - mu sync.Mutex Healthy bool `json:"healthy"` Regions map[int]*DERPRegionReport `json:"regions"` @@ -78,6 +77,7 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) { r.Regions = map[int]*DERPRegionReport{} wg := &sync.WaitGroup{} + mu := sync.Mutex{} wg.Add(len(opts.DERPMap.Regions)) for _, region := range opts.DERPMap.Regions { @@ -97,19 +97,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) { regionReport.Run(ctx) - r.mu.Lock() + mu.Lock() r.Regions[region.RegionID] = ®ionReport if !regionReport.Healthy { r.Healthy = false } - r.mu.Unlock() + mu.Unlock() }() } ncLogf := func(format string, args ...interface{}) { - r.mu.Lock() + mu.Lock() r.NetcheckLogs = append(r.NetcheckLogs, fmt.Sprintf(format, args...)) - r.mu.Unlock() + mu.Unlock() } nc := &netcheck.Client{ PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil), diff --git a/coderd/healthcheck/healthcheck.go b/coderd/healthcheck/healthcheck.go index 101fe18613570..1acf448de6ae3 100644 --- a/coderd/healthcheck/healthcheck.go +++ b/coderd/healthcheck/healthcheck.go @@ -11,11 +11,24 @@ import ( "tailscale.com/tailcfg" ) +const ( + SectionDERP string = "DERP" + SectionAccessURL string = "AccessURL" + SectionWebsocket string = "Websocket" +) + +type Checker interface { + DERP(ctx context.Context, opts *DERPReportOptions) DERPReport + AccessURL(ctx context.Context, opts *AccessURLOptions) AccessURLReport + Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport +} + type Report struct { // Time is the time the report was generated at. Time time.Time `json:"time"` // Healthy is true if the report returns no errors. - Healthy bool `json:"healthy"` + Healthy bool `json:"healthy"` + FailingSections []string `json:"failing_sections"` DERP DERPReport `json:"derp"` AccessURL AccessURLReport `json:"access_url"` @@ -28,12 +41,36 @@ type ReportOptions struct { AccessURL *url.URL Client *http.Client APIKey string + + Checker Checker +} + +type defaultChecker struct{} + +func (defaultChecker) DERP(ctx context.Context, opts *DERPReportOptions) (report DERPReport) { + report.Run(ctx, opts) + return report +} + +func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLOptions) (report AccessURLReport) { + report.Run(ctx, opts) + return report } -func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { - var report Report +func (defaultChecker) Websocket(ctx context.Context, opts *WebsocketReportOptions) (report WebsocketReport) { + report.Run(ctx, opts) + return report +} - wg := &sync.WaitGroup{} +func Run(ctx context.Context, opts *ReportOptions) *Report { + var ( + wg sync.WaitGroup + report Report + ) + + if opts.Checker == nil { + opts.Checker = defaultChecker{} + } wg.Add(1) go func() { @@ -44,7 +81,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { } }() - report.DERP.Run(ctx, &DERPReportOptions{ + report.DERP = opts.Checker.DERP(ctx, &DERPReportOptions{ DERPMap: opts.DERPMap, }) }() @@ -58,7 +95,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { } }() - report.AccessURL.Run(ctx, &AccessURLOptions{ + report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLOptions{ AccessURL: opts.AccessURL, Client: opts.Client, }) @@ -72,7 +109,8 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { report.Websocket.Error = xerrors.Errorf("%v", err) } }() - report.Websocket.Run(ctx, &WebsocketReportOptions{ + + report.Websocket = opts.Checker.Websocket(ctx, &WebsocketReportOptions{ APIKey: opts.APIKey, AccessURL: opts.AccessURL, }) @@ -80,8 +118,16 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { wg.Wait() report.Time = time.Now() - report.Healthy = report.DERP.Healthy && - report.AccessURL.Healthy && - report.Websocket.Healthy - return &report, nil + if !report.DERP.Healthy { + report.FailingSections = append(report.FailingSections, SectionDERP) + } + if !report.AccessURL.Healthy { + report.FailingSections = append(report.FailingSections, SectionAccessURL) + } + if !report.Websocket.Healthy { + report.FailingSections = append(report.FailingSections, SectionWebsocket) + } + + report.Healthy = len(report.FailingSections) == 0 + return &report } diff --git a/coderd/healthcheck/healthcheck_test.go b/coderd/healthcheck/healthcheck_test.go new file mode 100644 index 0000000000000..714ceb577e8a9 --- /dev/null +++ b/coderd/healthcheck/healthcheck_test.go @@ -0,0 +1,120 @@ +package healthcheck_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/coder/coder/coderd/healthcheck" +) + +type testChecker struct { + DERPReport healthcheck.DERPReport + AccessURLReport healthcheck.AccessURLReport + WebsocketReport healthcheck.WebsocketReport +} + +func (c *testChecker) DERP(context.Context, *healthcheck.DERPReportOptions) healthcheck.DERPReport { + return c.DERPReport +} + +func (c *testChecker) AccessURL(context.Context, *healthcheck.AccessURLOptions) healthcheck.AccessURLReport { + return c.AccessURLReport +} + +func (c *testChecker) Websocket(context.Context, *healthcheck.WebsocketReportOptions) healthcheck.WebsocketReport { + return c.WebsocketReport +} + +func TestHealthcheck(t *testing.T) { + t.Parallel() + + for _, c := range []struct { + name string + checker *testChecker + healthy bool + failingSections []string + }{{ + name: "OK", + checker: &testChecker{ + DERPReport: healthcheck.DERPReport{ + Healthy: true, + }, + AccessURLReport: healthcheck.AccessURLReport{ + Healthy: true, + }, + WebsocketReport: healthcheck.WebsocketReport{ + Healthy: true, + }, + }, + healthy: true, + failingSections: []string{}, + }, { + name: "DERPFail", + checker: &testChecker{ + DERPReport: healthcheck.DERPReport{ + Healthy: false, + }, + AccessURLReport: healthcheck.AccessURLReport{ + Healthy: true, + }, + WebsocketReport: healthcheck.WebsocketReport{ + Healthy: true, + }, + }, + healthy: false, + failingSections: []string{healthcheck.SectionDERP}, + }, { + name: "AccessURLFail", + checker: &testChecker{ + DERPReport: healthcheck.DERPReport{ + Healthy: true, + }, + AccessURLReport: healthcheck.AccessURLReport{ + Healthy: false, + }, + WebsocketReport: healthcheck.WebsocketReport{ + Healthy: true, + }, + }, + healthy: false, + failingSections: []string{healthcheck.SectionAccessURL}, + }, { + name: "WebsocketFail", + checker: &testChecker{ + DERPReport: healthcheck.DERPReport{ + Healthy: true, + }, + AccessURLReport: healthcheck.AccessURLReport{ + Healthy: true, + }, + WebsocketReport: healthcheck.WebsocketReport{ + Healthy: false, + }, + }, + healthy: false, + failingSections: []string{healthcheck.SectionWebsocket}, + }, { + name: "AllFail", + checker: &testChecker{}, + healthy: false, + failingSections: []string{healthcheck.SectionDERP, healthcheck.SectionAccessURL, healthcheck.SectionWebsocket}, + }} { + c := c + t.Run(c.name, func(t *testing.T) { + t.Parallel() + + report := healthcheck.Run(context.Background(), &healthcheck.ReportOptions{ + Checker: c.checker, + }) + + assert.Equal(t, c.healthy, report.Healthy) + assert.Equal(t, c.failingSections, report.FailingSections) + assert.Equal(t, c.checker.DERPReport.Healthy, report.DERP.Healthy) + assert.Equal(t, c.checker.AccessURLReport.Healthy, report.AccessURL.Healthy) + assert.Equal(t, c.checker.WebsocketReport.Healthy, report.Websocket.Healthy) + assert.NotZero(t, report.Time) + }) + } +} From e07c2806032adada5646589ac628ea8af9b8160b Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Thu, 1 Jun 2023 21:46:44 +0000 Subject: [PATCH 2/2] fixup! feat(healthcheck): add failing sections to report --- coderd/apidoc/docs.go | 6 ++++++ coderd/apidoc/swagger.json | 6 ++++++ coderd/healthcheck/healthcheck_test.go | 2 +- docs/api/debug.md | 1 + docs/api/schemas.md | 16 +++++++++------- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 08ef7df926c76..c084a2aaca070 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -10440,6 +10440,12 @@ const docTemplate = `{ "derp": { "$ref": "#/definitions/healthcheck.DERPReport" }, + "failing_sections": { + "type": "array", + "items": { + "type": "string" + } + }, "healthy": { "description": "Healthy is true if the report returns no errors.", "type": "boolean" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 1437f446d94d6..deeaeae7c20f3 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -9425,6 +9425,12 @@ "derp": { "$ref": "#/definitions/healthcheck.DERPReport" }, + "failing_sections": { + "type": "array", + "items": { + "type": "string" + } + }, "healthy": { "description": "Healthy is true if the report returns no errors.", "type": "boolean" diff --git a/coderd/healthcheck/healthcheck_test.go b/coderd/healthcheck/healthcheck_test.go index 714ceb577e8a9..2b6737c6b3a9f 100644 --- a/coderd/healthcheck/healthcheck_test.go +++ b/coderd/healthcheck/healthcheck_test.go @@ -49,7 +49,7 @@ func TestHealthcheck(t *testing.T) { }, }, healthy: true, - failingSections: []string{}, + failingSections: nil, }, { name: "DERPFail", checker: &testChecker{ diff --git a/docs/api/debug.md b/docs/api/debug.md index 0f9a33d993f55..f905b20062119 100644 --- a/docs/api/debug.md +++ b/docs/api/debug.md @@ -206,6 +206,7 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \ } } }, + "failing_sections": ["string"], "healthy": true, "time": "string", "websocket": { diff --git a/docs/api/schemas.md b/docs/api/schemas.md index 39c4e55907270..7536ee767c46a 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -6376,6 +6376,7 @@ Parameter represents a set value for the scope. } } }, + "failing_sections": ["string"], "healthy": true, "time": "string", "websocket": { @@ -6391,13 +6392,14 @@ Parameter represents a set value for the scope. ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------ | ---------------------------------------------------------- | -------- | ------------ | ------------------------------------------------ | -| `access_url` | [healthcheck.AccessURLReport](#healthcheckaccessurlreport) | false | | | -| `derp` | [healthcheck.DERPReport](#healthcheckderpreport) | false | | | -| `healthy` | boolean | false | | Healthy is true if the report returns no errors. | -| `time` | string | false | | Time is the time the report was generated at. | -| `websocket` | [healthcheck.WebsocketReport](#healthcheckwebsocketreport) | false | | | +| Name | Type | Required | Restrictions | Description | +| ------------------ | ---------------------------------------------------------- | -------- | ------------ | ------------------------------------------------ | +| `access_url` | [healthcheck.AccessURLReport](#healthcheckaccessurlreport) | false | | | +| `derp` | [healthcheck.DERPReport](#healthcheckderpreport) | false | | | +| `failing_sections` | array of string | false | | | +| `healthy` | boolean | false | | Healthy is true if the report returns no errors. | +| `time` | string | false | | Time is the time the report was generated at. | +| `websocket` | [healthcheck.WebsocketReport](#healthcheckwebsocketreport) | false | | | ## healthcheck.WebsocketReport