From a1c73fed4c4cb475ec1b637f6ad04af5abb7355b Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Thu, 15 Jun 2023 23:50:36 +0000 Subject: [PATCH] feat(healthcheck): add database section --- coderd/apidoc/docs.go | 18 ++++++ coderd/apidoc/swagger.json | 18 ++++++ coderd/coderd.go | 1 + coderd/healthcheck/accessurl.go | 4 +- coderd/healthcheck/accessurl_test.go | 6 +- coderd/healthcheck/database.go | 49 +++++++++++++++ coderd/healthcheck/database_test.go | 85 ++++++++++++++++++++++++++ coderd/healthcheck/healthcheck.go | 34 ++++++++++- coderd/healthcheck/healthcheck_test.go | 48 +++++++++++++-- docs/api/debug.md | 6 ++ docs/api/schemas.md | 27 ++++++++ 11 files changed, 284 insertions(+), 12 deletions(-) create mode 100644 coderd/healthcheck/database.go create mode 100644 coderd/healthcheck/database_test.go diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index c43da403aa372..33e09f3f94fcf 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -9993,12 +9993,30 @@ const docTemplate = `{ "error": {} } }, + "healthcheck.DatabaseReport": { + "type": "object", + "properties": { + "error": {}, + "healthy": { + "type": "boolean" + }, + "latency": { + "type": "integer" + }, + "reachable": { + "type": "boolean" + } + } + }, "healthcheck.Report": { "type": "object", "properties": { "access_url": { "$ref": "#/definitions/healthcheck.AccessURLReport" }, + "database": { + "$ref": "#/definitions/healthcheck.DatabaseReport" + }, "derp": { "$ref": "#/definitions/healthcheck.DERPReport" }, diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index c83e77a8ea8e1..d6110c0c53680 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -9052,12 +9052,30 @@ "error": {} } }, + "healthcheck.DatabaseReport": { + "type": "object", + "properties": { + "error": {}, + "healthy": { + "type": "boolean" + }, + "latency": { + "type": "integer" + }, + "reachable": { + "type": "boolean" + } + } + }, "healthcheck.Report": { "type": "object", "properties": { "access_url": { "$ref": "#/definitions/healthcheck.AccessURLReport" }, + "database": { + "$ref": "#/definitions/healthcheck.DatabaseReport" + }, "derp": { "$ref": "#/definitions/healthcheck.DERPReport" }, diff --git a/coderd/coderd.go b/coderd/coderd.go index fe59fd8726467..ee94458ca72bb 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -262,6 +262,7 @@ func New(options *Options) *API { if options.HealthcheckFunc == nil { options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ + DB: options.Database, AccessURL: options.AccessURL, DERPMap: options.DERPMap.Clone(), APIKey: apiKey, diff --git a/coderd/healthcheck/accessurl.go b/coderd/healthcheck/accessurl.go index c773c5560eb64..0edc827a624b3 100644 --- a/coderd/healthcheck/accessurl.go +++ b/coderd/healthcheck/accessurl.go @@ -18,12 +18,12 @@ type AccessURLReport struct { Error error `json:"error"` } -type AccessURLOptions struct { +type AccessURLReportOptions struct { AccessURL *url.URL Client *http.Client } -func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLOptions) { +func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions) { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() diff --git a/coderd/healthcheck/accessurl_test.go b/coderd/healthcheck/accessurl_test.go index 71e5a8d0e94dd..95545c25289cb 100644 --- a/coderd/healthcheck/accessurl_test.go +++ b/coderd/healthcheck/accessurl_test.go @@ -28,7 +28,7 @@ func TestAccessURL(t *testing.T) { ) defer cancel() - report.Run(ctx, &healthcheck.AccessURLOptions{ + report.Run(ctx, &healthcheck.AccessURLReportOptions{ AccessURL: client.URL, }) @@ -57,7 +57,7 @@ func TestAccessURL(t *testing.T) { u, err := url.Parse(srv.URL) require.NoError(t, err) - report.Run(ctx, &healthcheck.AccessURLOptions{ + report.Run(ctx, &healthcheck.AccessURLReportOptions{ Client: srv.Client(), AccessURL: u, }) @@ -93,7 +93,7 @@ func TestAccessURL(t *testing.T) { u, err := url.Parse(srv.URL) require.NoError(t, err) - report.Run(ctx, &healthcheck.AccessURLOptions{ + report.Run(ctx, &healthcheck.AccessURLReportOptions{ Client: client, AccessURL: u, }) diff --git a/coderd/healthcheck/database.go b/coderd/healthcheck/database.go new file mode 100644 index 0000000000000..eb37744d02ea3 --- /dev/null +++ b/coderd/healthcheck/database.go @@ -0,0 +1,49 @@ +package healthcheck + +import ( + "context" + "time" + + "golang.org/x/exp/slices" + "golang.org/x/xerrors" + + "github.com/coder/coder/coderd/database" +) + +type DatabaseReport struct { + Healthy bool `json:"healthy"` + Reachable bool `json:"reachable"` + Latency time.Duration `json:"latency"` + Error error `json:"error"` +} + +type DatabaseReportOptions struct { + DB database.Store +} + +func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) { + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + pingCount := 5 + pings := make([]time.Duration, 0, pingCount) + // Ping 5 times and average the latency. + for i := 0; i < pingCount; i++ { + pong, err := opts.DB.Ping(ctx) + if err != nil { + r.Error = xerrors.Errorf("ping: %w", err) + return + } + pings = append(pings, pong) + } + slices.Sort(pings) + + // Take the median ping. + r.Latency = pings[pingCount/2] + // Somewhat arbitrary, but if the latency is over 15ms, we consider it + // unhealthy. + if r.Latency < 15*time.Millisecond { + r.Healthy = true + } + r.Reachable = true +} diff --git a/coderd/healthcheck/database_test.go b/coderd/healthcheck/database_test.go new file mode 100644 index 0000000000000..5d5dc3035d145 --- /dev/null +++ b/coderd/healthcheck/database_test.go @@ -0,0 +1,85 @@ +package healthcheck_test + +import ( + "context" + "testing" + "time" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + "golang.org/x/xerrors" + + "github.com/coder/coder/coderd/database/dbmock" + "github.com/coder/coder/coderd/healthcheck" + "github.com/coder/coder/testutil" +) + +func TestDatabase(t *testing.T) { + t.Parallel() + + t.Run("OK", func(t *testing.T) { + t.Parallel() + + var ( + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) + report = healthcheck.DatabaseReport{} + db = dbmock.NewMockStore(gomock.NewController(t)) + ping = 10 * time.Millisecond + ) + defer cancel() + + db.EXPECT().Ping(gomock.Any()).Return(ping, nil).Times(5) + + report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db}) + + assert.True(t, report.Healthy) + assert.True(t, report.Reachable) + assert.Equal(t, ping, report.Latency) + assert.NoError(t, report.Error) + }) + + t.Run("Error", func(t *testing.T) { + t.Parallel() + + var ( + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) + report = healthcheck.DatabaseReport{} + db = dbmock.NewMockStore(gomock.NewController(t)) + err = xerrors.New("ping error") + ) + defer cancel() + + db.EXPECT().Ping(gomock.Any()).Return(time.Duration(0), err) + + report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db}) + + assert.False(t, report.Healthy) + assert.False(t, report.Reachable) + assert.Zero(t, report.Latency) + assert.ErrorIs(t, report.Error, err) + }) + + t.Run("Median", func(t *testing.T) { + t.Parallel() + + var ( + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) + report = healthcheck.DatabaseReport{} + db = dbmock.NewMockStore(gomock.NewController(t)) + ) + defer cancel() + + db.EXPECT().Ping(gomock.Any()).Return(time.Microsecond, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Second, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Nanosecond, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Minute, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Millisecond, nil) + + report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db}) + + assert.True(t, report.Healthy) + assert.True(t, report.Reachable) + assert.Equal(t, time.Millisecond, report.Latency) + assert.NoError(t, report.Error) + }) +} diff --git a/coderd/healthcheck/healthcheck.go b/coderd/healthcheck/healthcheck.go index 1acf448de6ae3..951e982fd3403 100644 --- a/coderd/healthcheck/healthcheck.go +++ b/coderd/healthcheck/healthcheck.go @@ -9,18 +9,22 @@ import ( "golang.org/x/xerrors" "tailscale.com/tailcfg" + + "github.com/coder/coder/coderd/database" ) const ( SectionDERP string = "DERP" SectionAccessURL string = "AccessURL" SectionWebsocket string = "Websocket" + SectionDatabase string = "Database" ) type Checker interface { DERP(ctx context.Context, opts *DERPReportOptions) DERPReport - AccessURL(ctx context.Context, opts *AccessURLOptions) AccessURLReport + AccessURL(ctx context.Context, opts *AccessURLReportOptions) AccessURLReport Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport + Database(ctx context.Context, opts *DatabaseReportOptions) DatabaseReport } type Report struct { @@ -33,9 +37,11 @@ type Report struct { DERP DERPReport `json:"derp"` AccessURL AccessURLReport `json:"access_url"` Websocket WebsocketReport `json:"websocket"` + Database DatabaseReport `json:"database"` } type ReportOptions struct { + DB database.Store // TODO: support getting this over HTTP? DERPMap *tailcfg.DERPMap AccessURL *url.URL @@ -52,7 +58,7 @@ func (defaultChecker) DERP(ctx context.Context, opts *DERPReportOptions) (report return report } -func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLOptions) (report AccessURLReport) { +func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLReportOptions) (report AccessURLReport) { report.Run(ctx, opts) return report } @@ -62,6 +68,11 @@ func (defaultChecker) Websocket(ctx context.Context, opts *WebsocketReportOption return report } +func (defaultChecker) Database(ctx context.Context, opts *DatabaseReportOptions) (report DatabaseReport) { + report.Run(ctx, opts) + return report +} + func Run(ctx context.Context, opts *ReportOptions) *Report { var ( wg sync.WaitGroup @@ -95,7 +106,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { } }() - report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLOptions{ + report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLReportOptions{ AccessURL: opts.AccessURL, Client: opts.Client, }) @@ -116,6 +127,20 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { }) }() + wg.Add(1) + go func() { + defer wg.Done() + defer func() { + if err := recover(); err != nil { + report.Database.Error = xerrors.Errorf("%v", err) + } + }() + + report.Database = opts.Checker.Database(ctx, &DatabaseReportOptions{ + DB: opts.DB, + }) + }() + wg.Wait() report.Time = time.Now() if !report.DERP.Healthy { @@ -127,6 +152,9 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { if !report.Websocket.Healthy { report.FailingSections = append(report.FailingSections, SectionWebsocket) } + if !report.Database.Healthy { + report.FailingSections = append(report.FailingSections, SectionDatabase) + } report.Healthy = len(report.FailingSections) == 0 return &report diff --git a/coderd/healthcheck/healthcheck_test.go b/coderd/healthcheck/healthcheck_test.go index 2b6737c6b3a9f..60dac7deb99b0 100644 --- a/coderd/healthcheck/healthcheck_test.go +++ b/coderd/healthcheck/healthcheck_test.go @@ -13,13 +13,14 @@ type testChecker struct { DERPReport healthcheck.DERPReport AccessURLReport healthcheck.AccessURLReport WebsocketReport healthcheck.WebsocketReport + DatabaseReport healthcheck.DatabaseReport } func (c *testChecker) DERP(context.Context, *healthcheck.DERPReportOptions) healthcheck.DERPReport { return c.DERPReport } -func (c *testChecker) AccessURL(context.Context, *healthcheck.AccessURLOptions) healthcheck.AccessURLReport { +func (c *testChecker) AccessURL(context.Context, *healthcheck.AccessURLReportOptions) healthcheck.AccessURLReport { return c.AccessURLReport } @@ -27,6 +28,10 @@ func (c *testChecker) Websocket(context.Context, *healthcheck.WebsocketReportOpt return c.WebsocketReport } +func (c *testChecker) Database(context.Context, *healthcheck.DatabaseReportOptions) healthcheck.DatabaseReport { + return c.DatabaseReport +} + func TestHealthcheck(t *testing.T) { t.Parallel() @@ -47,6 +52,9 @@ func TestHealthcheck(t *testing.T) { WebsocketReport: healthcheck.WebsocketReport{ Healthy: true, }, + DatabaseReport: healthcheck.DatabaseReport{ + Healthy: true, + }, }, healthy: true, failingSections: nil, @@ -62,6 +70,9 @@ func TestHealthcheck(t *testing.T) { WebsocketReport: healthcheck.WebsocketReport{ Healthy: true, }, + DatabaseReport: healthcheck.DatabaseReport{ + Healthy: true, + }, }, healthy: false, failingSections: []string{healthcheck.SectionDERP}, @@ -77,6 +88,9 @@ func TestHealthcheck(t *testing.T) { WebsocketReport: healthcheck.WebsocketReport{ Healthy: true, }, + DatabaseReport: healthcheck.DatabaseReport{ + Healthy: true, + }, }, healthy: false, failingSections: []string{healthcheck.SectionAccessURL}, @@ -92,14 +106,40 @@ func TestHealthcheck(t *testing.T) { WebsocketReport: healthcheck.WebsocketReport{ Healthy: false, }, + DatabaseReport: healthcheck.DatabaseReport{ + Healthy: true, + }, }, healthy: false, failingSections: []string{healthcheck.SectionWebsocket}, }, { - name: "AllFail", - checker: &testChecker{}, + name: "DatabaseFail", + checker: &testChecker{ + DERPReport: healthcheck.DERPReport{ + Healthy: true, + }, + AccessURLReport: healthcheck.AccessURLReport{ + Healthy: true, + }, + WebsocketReport: healthcheck.WebsocketReport{ + Healthy: true, + }, + DatabaseReport: healthcheck.DatabaseReport{ + Healthy: false, + }, + }, healthy: false, - failingSections: []string{healthcheck.SectionDERP, healthcheck.SectionAccessURL, healthcheck.SectionWebsocket}, + failingSections: []string{healthcheck.SectionDatabase}, + }, { + name: "AllFail", + checker: &testChecker{}, + healthy: false, + failingSections: []string{ + healthcheck.SectionDERP, + healthcheck.SectionAccessURL, + healthcheck.SectionWebsocket, + healthcheck.SectionDatabase, + }, }} { c := c t.Run(c.name, func(t *testing.T) { diff --git a/docs/api/debug.md b/docs/api/debug.md index f905b20062119..d44f40d557191 100644 --- a/docs/api/debug.md +++ b/docs/api/debug.md @@ -46,6 +46,12 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \ "reachable": true, "status_code": 0 }, + "database": { + "error": null, + "healthy": true, + "latency": 0, + "reachable": true + }, "derp": { "error": null, "healthy": true, diff --git a/docs/api/schemas.md b/docs/api/schemas.md index 88580cdb6b957..5e0e8629e2cc8 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -5959,6 +5959,26 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `enabled` | boolean | false | | | | `error` | any | false | | | +## healthcheck.DatabaseReport + +```json +{ + "error": null, + "healthy": true, + "latency": 0, + "reachable": true +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +| ----------- | ------- | -------- | ------------ | ----------- | +| `error` | any | false | | | +| `healthy` | boolean | false | | | +| `latency` | integer | false | | | +| `reachable` | boolean | false | | | + ## healthcheck.Report ```json @@ -5970,6 +5990,12 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in "reachable": true, "status_code": 0 }, + "database": { + "error": null, + "healthy": true, + "latency": 0, + "reachable": true + }, "derp": { "error": null, "healthy": true, @@ -6149,6 +6175,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | Name | Type | Required | Restrictions | Description | | ------------------ | ---------------------------------------------------------- | -------- | ------------ | ------------------------------------------------ | | `access_url` | [healthcheck.AccessURLReport](#healthcheckaccessurlreport) | false | | | +| `database` | [healthcheck.DatabaseReport](#healthcheckdatabasereport) | false | | | | `derp` | [healthcheck.DERPReport](#healthcheckderpreport) | false | | | | `failing_sections` | array of string | false | | | | `healthy` | boolean | false | | Healthy is true if the report returns no errors. |