From 2484951b16827d29b6939e7a5bd2f25de4b7d200 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 10 Nov 2023 19:07:58 +0530 Subject: [PATCH 1/9] feat(coderd/healthcheck): allow configuring database hc threshold --- coderd/healthcheck/database.go | 28 +++++++++++++++--------- coderd/healthcheck/database_test.go | 33 +++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/coderd/healthcheck/database.go b/coderd/healthcheck/database.go index 70005dc5b3d9f..58f0d48843fcd 100644 --- a/coderd/healthcheck/database.go +++ b/coderd/healthcheck/database.go @@ -10,20 +10,30 @@ import ( "github.com/coder/coder/v2/coderd/database" ) +const ( + DatabaseDefaultThreshold = 15 * time.Millisecond +) + // @typescript-generate DatabaseReport type DatabaseReport struct { - Healthy bool `json:"healthy"` - Reachable bool `json:"reachable"` - Latency string `json:"latency"` - LatencyMs int `json:"latency_ms"` - Error *string `json:"error"` + Healthy bool `json:"healthy"` + Reachable bool `json:"reachable"` + Latency string `json:"latency"` + LatencyMs int64 `json:"latency_ms"` + ThresholdMs int64 `json:"threshold_ms"` + Error *string `json:"error"` } type DatabaseReportOptions struct { - DB database.Store + DB database.Store + Threshold time.Duration } func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) { + r.ThresholdMs = opts.Threshold.Milliseconds() + if r.ThresholdMs == 0 { + r.ThresholdMs = DatabaseDefaultThreshold.Milliseconds() + } ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() @@ -43,10 +53,8 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) { // Take the median ping. latency := pings[pingCount/2] r.Latency = latency.String() - r.LatencyMs = int(latency.Milliseconds()) - // Somewhat arbitrary, but if the latency is over 15ms, we consider it - // unhealthy. - if latency < 15*time.Millisecond { + r.LatencyMs = latency.Milliseconds() + if r.LatencyMs < r.ThresholdMs { r.Healthy = true } r.Reachable = true diff --git a/coderd/healthcheck/database_test.go b/coderd/healthcheck/database_test.go index f6c2782aacacd..39da1bc657458 100644 --- a/coderd/healthcheck/database_test.go +++ b/coderd/healthcheck/database_test.go @@ -36,7 +36,8 @@ func TestDatabase(t *testing.T) { assert.True(t, report.Healthy) assert.True(t, report.Reachable) assert.Equal(t, ping.String(), report.Latency) - assert.Equal(t, int(ping.Milliseconds()), report.LatencyMs) + assert.Equal(t, ping.Milliseconds(), report.LatencyMs) + assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMs) assert.Nil(t, report.Error) }) @@ -59,6 +60,7 @@ func TestDatabase(t *testing.T) { assert.False(t, report.Reachable) assert.Zero(t, report.Latency) require.NotNil(t, report.Error) + assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMs) assert.Contains(t, *report.Error, err.Error()) }) @@ -83,7 +85,34 @@ func TestDatabase(t *testing.T) { assert.True(t, report.Healthy) assert.True(t, report.Reachable) assert.Equal(t, time.Millisecond.String(), report.Latency) - assert.Equal(t, 1, report.LatencyMs) + assert.EqualValues(t, 1, report.LatencyMs) + assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMs) + assert.Nil(t, report.Error) + }) + + t.Run("Threshold", func(t *testing.T) { + t.Parallel() + + var ( + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) + report = healthcheck.DatabaseReport{} + db = dbmock.NewMockStore(gomock.NewController(t)) + ) + defer cancel() + + db.EXPECT().Ping(gomock.Any()).Return(time.Second, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Millisecond, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Second, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Millisecond, nil) + db.EXPECT().Ping(gomock.Any()).Return(time.Second, nil) + + report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db, Threshold: time.Second}) + + assert.False(t, report.Healthy) + assert.True(t, report.Reachable) + assert.Equal(t, time.Second.String(), report.Latency) + assert.EqualValues(t, 1000, report.LatencyMs) + assert.Equal(t, time.Second.Milliseconds(), report.ThresholdMs) assert.Nil(t, report.Error) }) } From 9e5c030343bca9460697ea906c1ea45ae6002610 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 10 Nov 2023 20:55:27 +0530 Subject: [PATCH 2/9] feat(coderd): add database hc latency, plumb through --- coderd/coderd.go | 19 ++++++++++++++---- coderd/healthcheck/healthcheck.go | 33 ++++++++----------------------- codersdk/deployment.go | 21 ++++++++++++++++++++ 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index 600e66404f327..9b9f1195e455f 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -38,6 +38,7 @@ import ( // Used for swagger docs. _ "github.com/coder/coder/v2/coderd/apidoc" "github.com/coder/coder/v2/coderd/externalauth" + "github.com/coder/coder/v2/coderd/healthcheck/derphealth" "cdr.dev/slog" "github.com/coder/coder/v2/buildinfo" @@ -398,10 +399,20 @@ func New(options *Options) *API { if options.HealthcheckFunc == nil { options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ - DB: options.Database, - AccessURL: options.AccessURL, - DERPMap: api.DERPMap(), - APIKey: apiKey, + Database: healthcheck.DatabaseReportOptions{ + DB: options.Database, + Threshold: options.DeploymentValues.Healthcheck.ThresholdDatabase.Value(), + }, + Websocket: healthcheck.WebsocketReportOptions{ + AccessURL: options.AccessURL, + APIKey: apiKey, + }, + AccessURL: healthcheck.AccessURLReportOptions{ + AccessURL: options.AccessURL, + }, + DerpHealth: derphealth.ReportOptions{ + DERPMap: api.DERPMap(), + }, }) } } diff --git a/coderd/healthcheck/healthcheck.go b/coderd/healthcheck/healthcheck.go index 61c6e40c1e1be..d59de08592203 100644 --- a/coderd/healthcheck/healthcheck.go +++ b/coderd/healthcheck/healthcheck.go @@ -3,15 +3,10 @@ package healthcheck import ( "context" "fmt" - "net/http" - "net/url" "sync" "time" - "tailscale.com/tailcfg" - "github.com/coder/coder/v2/buildinfo" - "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/healthcheck/derphealth" "github.com/coder/coder/v2/coderd/util/ptr" ) @@ -49,12 +44,10 @@ type Report struct { } type ReportOptions struct { - DB database.Store - // TODO: support getting this over HTTP? - DERPMap *tailcfg.DERPMap - AccessURL *url.URL - Client *http.Client - APIKey string + AccessURL AccessURLReportOptions + Database DatabaseReportOptions + DerpHealth derphealth.ReportOptions + Websocket WebsocketReportOptions Checker Checker } @@ -100,9 +93,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { } }() - report.DERP = opts.Checker.DERP(ctx, &derphealth.ReportOptions{ - DERPMap: opts.DERPMap, - }) + report.DERP = opts.Checker.DERP(ctx, &opts.DerpHealth) }() wg.Add(1) @@ -114,10 +105,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { } }() - report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLReportOptions{ - AccessURL: opts.AccessURL, - Client: opts.Client, - }) + report.AccessURL = opts.Checker.AccessURL(ctx, &opts.AccessURL) }() wg.Add(1) @@ -129,10 +117,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { } }() - report.Websocket = opts.Checker.Websocket(ctx, &WebsocketReportOptions{ - APIKey: opts.APIKey, - AccessURL: opts.AccessURL, - }) + report.Websocket = opts.Checker.Websocket(ctx, &opts.Websocket) }() wg.Add(1) @@ -144,9 +129,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report { } }() - report.Database = opts.Checker.Database(ctx, &DatabaseReportOptions{ - DB: opts.DB, - }) + report.Database = opts.Checker.Database(ctx, &opts.Database) }() report.CoderVersion = buildinfo.Version() diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 586de0774849f..600840fa5bb61 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -183,6 +183,7 @@ type DeploymentValues struct { EnableTerraformDebugMode clibase.Bool `json:"enable_terraform_debug_mode,omitempty" typescript:",notnull"` UserQuietHoursSchedule UserQuietHoursScheduleConfig `json:"user_quiet_hours_schedule,omitempty" typescript:",notnull"` WebTerminalRenderer clibase.String `json:"web_terminal_renderer,omitempty" typescript:",notnull"` + Healthcheck HealthcheckConfig `json:"healthcheck,omitempty" typescript:",notnull"` Config clibase.YAMLConfigPath `json:"config,omitempty" typescript:",notnull"` WriteConfig clibase.Bool `json:"write_config,omitempty" typescript:",notnull"` @@ -395,6 +396,11 @@ type UserQuietHoursScheduleConfig struct { // WindowDuration clibase.Duration `json:"window_duration" typescript:",notnull"` } +// HealthcheckConfig contains configuration for healthchecks. +type HealthcheckConfig struct { + ThresholdDatabase clibase.Duration `json:"threshold_database" typescript:",notnull"` +} + const ( annotationEnterpriseKey = "enterprise" annotationSecretKey = "secret" @@ -489,6 +495,11 @@ func (c *DeploymentValues) Options() clibase.OptionSet { Name: "Logging", YAML: "logging", } + deploymentGroupIntrospectionHealthcheck = clibase.Group{ + Parent: &deploymentGroupIntrospection, + Name: "Healthcheck", + YAML: "healthcheck", + } deploymentGroupOAuth2 = clibase.Group{ Name: "OAuth2", Description: `Configure login and user-provisioning with GitHub via oAuth2.`, @@ -1799,6 +1810,16 @@ Write out the current server config as YAML to stdout.`, Group: &deploymentGroupClient, YAML: "webTerminalRenderer", }, + { + Name: "Healthcheck Threshold Database", + Description: "The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms.", + Flag: "", + Env: "CODER_HEALTHCHECK_THRESHOLD_DATABASE", + Default: (15 * time.Millisecond).String(), + Value: &c.Healthcheck.ThresholdDatabase, + Group: &deploymentGroupIntrospectionHealthcheck, + YAML: "thresholdDatabase", + }, } return opts From 42477e343ebfc9728cbf45ae7d484131a1d3a571 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 10 Nov 2023 20:55:55 +0530 Subject: [PATCH 3/9] gen etc. --- cli/testdata/coder_server_--help.golden | 6 +++ cli/testdata/server-config.yaml.golden | 6 +++ coderd/apidoc/docs.go | 14 +++++++ coderd/apidoc/swagger.json | 14 +++++++ docs/api/debug.md | 3 +- docs/api/general.md | 3 ++ docs/api/schemas.md | 42 +++++++++++++++---- docs/cli/server.md | 11 +++++ .../cli/testdata/coder_server_--help.golden | 6 +++ site/src/api/typesGenerated.ts | 7 ++++ 10 files changed, 102 insertions(+), 10 deletions(-) diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 8981ea5f30d8f..b80ef4336b818 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -80,6 +80,12 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. +INTROSPECTION / HEALTHCHECK OPTIONS: + duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) + The threshold for the database healthcheck. If the median latency of + the database exceeds this threshold over 5 attempts, the database is + considered unhealthy. The default value is 15ms. + INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index a52ec496de6c1..c83ec2d728817 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -232,6 +232,12 @@ introspection: # Allow administrators to enable Terraform debug output. # (default: false, type: bool) enableTerraformDebugMode: false + healthcheck: + # The threshold for the database healthcheck. If the median latency of the + # database exceeds this threshold over 5 attempts, the database is considered + # unhealthy. The default value is 15ms. + # (default: 15ms, type: duration) + thresholdDatabase: 15ms oauth2: github: # Client ID for Login with GitHub. diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index b55e9d6d23225..d934a0f2c5e48 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -8380,6 +8380,9 @@ const docTemplate = `{ "type": "string" } }, + "healthcheck": { + "$ref": "#/definitions/codersdk.HealthcheckConfig" + }, "http_address": { "description": "HTTPAddress is a string because it may be set to zero to disable.", "type": "string" @@ -8859,6 +8862,14 @@ const docTemplate = `{ } } }, + "codersdk.HealthcheckConfig": { + "type": "object", + "properties": { + "threshold_database": { + "type": "integer" + } + } + }, "codersdk.InsightsReportInterval": { "type": "string", "enum": [ @@ -12177,6 +12188,9 @@ const docTemplate = `{ }, "reachable": { "type": "boolean" + }, + "threshold_ms": { + "type": "integer" } } }, diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 8b474ff8ecbee..21f8d5f8dd7ee 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -7492,6 +7492,9 @@ "type": "string" } }, + "healthcheck": { + "$ref": "#/definitions/codersdk.HealthcheckConfig" + }, "http_address": { "description": "HTTPAddress is a string because it may be set to zero to disable.", "type": "string" @@ -7961,6 +7964,14 @@ } } }, + "codersdk.HealthcheckConfig": { + "type": "object", + "properties": { + "threshold_database": { + "type": "integer" + } + } + }, "codersdk.InsightsReportInterval": { "type": "string", "enum": ["day", "week"], @@ -11102,6 +11113,9 @@ }, "reachable": { "type": "boolean" + }, + "threshold_ms": { + "type": "integer" } } }, diff --git a/docs/api/debug.md b/docs/api/debug.md index 5016f6a87b256..24ccf253398cb 100644 --- a/docs/api/debug.md +++ b/docs/api/debug.md @@ -53,7 +53,8 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \ "healthy": true, "latency": "string", "latency_ms": 0, - "reachable": true + "reachable": true, + "threshold_ms": 0 }, "derp": { "error": "string", diff --git a/docs/api/general.md b/docs/api/general.md index 6d000836670ea..1187cfb79f1f0 100644 --- a/docs/api/general.md +++ b/docs/api/general.md @@ -235,6 +235,9 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \ ] }, "external_token_encryption_keys": ["string"], + "healthcheck": { + "threshold_database": 0 + }, "http_address": "string", "in_memory_database": true, "job_hang_detector_interval": 0, diff --git a/docs/api/schemas.md b/docs/api/schemas.md index 517e76981c567..f529dea988205 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -2156,6 +2156,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in ] }, "external_token_encryption_keys": ["string"], + "healthcheck": { + "threshold_database": 0 + }, "http_address": "string", "in_memory_database": true, "job_hang_detector_interval": 0, @@ -2527,6 +2530,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in ] }, "external_token_encryption_keys": ["string"], + "healthcheck": { + "threshold_database": 0 + }, "http_address": "string", "in_memory_database": true, "job_hang_detector_interval": 0, @@ -2726,6 +2732,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `experiments` | array of string | false | | | | `external_auth` | [clibase.Struct-array_codersdk_ExternalAuthConfig](#clibasestruct-array_codersdk_externalauthconfig) | false | | | | `external_token_encryption_keys` | array of string | false | | | +| `healthcheck` | [codersdk.HealthcheckConfig](#codersdkhealthcheckconfig) | false | | | | `http_address` | string | false | | Http address is a string because it may be set to zero to disable. | | `in_memory_database` | boolean | false | | | | `job_hang_detector_interval` | integer | false | | | @@ -3176,6 +3183,20 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `threshold` | integer | false | | Threshold specifies the number of consecutive failed health checks before returning "unhealthy". | | `url` | string | false | | URL specifies the endpoint to check for the app health. | +## codersdk.HealthcheckConfig + +```json +{ + "threshold_database": 0 +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +| -------------------- | ------- | -------- | ------------ | ----------- | +| `threshold_database` | integer | false | | | + ## codersdk.InsightsReportInterval ```json @@ -7444,19 +7465,21 @@ If the schedule is empty, the user will be updated to use the default schedule.| "healthy": true, "latency": "string", "latency_ms": 0, - "reachable": true + "reachable": true, + "threshold_ms": 0 } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------ | ------- | -------- | ------------ | ----------- | -| `error` | string | false | | | -| `healthy` | boolean | false | | | -| `latency` | string | false | | | -| `latency_ms` | integer | false | | | -| `reachable` | boolean | false | | | +| Name | Type | Required | Restrictions | Description | +| -------------- | ------- | -------- | ------------ | ----------- | +| `error` | string | false | | | +| `healthy` | boolean | false | | | +| `latency` | string | false | | | +| `latency_ms` | integer | false | | | +| `reachable` | boolean | false | | | +| `threshold_ms` | integer | false | | | ## healthcheck.Report @@ -7476,7 +7499,8 @@ If the schedule is empty, the user will be updated to use the default schedule.| "healthy": true, "latency": "string", "latency_ms": 0, - "reachable": true + "reachable": true, + "threshold_ms": 0 }, "derp": { "error": "string", diff --git a/docs/cli/server.md b/docs/cli/server.md index 93a07b72f98a8..99cc0a52b02f9 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -305,6 +305,17 @@ Time to force cancel provisioning tasks that are stuck. HTTP bind address of the server. Unset to disable the HTTP endpoint. +### -- + +| | | +| ----------- | -------------------------------------------------------- | +| Type | duration | +| Environment | $CODER_HEALTHCHECK_THRESHOLD_DATABASE | +| YAML | introspection.healthcheck.thresholdDatabase | +| Default | 15ms | + +The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. + ### --log-human | | | diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 6e3b4a502ed27..46fece16cfa00 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -81,6 +81,12 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. +INTROSPECTION / HEALTHCHECK OPTIONS: + duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) + The threshold for the database healthcheck. If the median latency of + the database exceeds this threshold over 5 attempts, the database is + considered unhealthy. The default value is 15ms. + INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 920237160dfd1..af5fec2b435da 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -423,6 +423,7 @@ export interface DeploymentValues { readonly enable_terraform_debug_mode?: boolean; readonly user_quiet_hours_schedule?: UserQuietHoursScheduleConfig; readonly web_terminal_renderer?: string; + readonly healthcheck?: HealthcheckConfig; readonly config?: string; readonly write_config?: boolean; readonly address?: string; @@ -548,6 +549,11 @@ export interface Healthcheck { readonly threshold: number; } +// From codersdk/deployment.go +export interface HealthcheckConfig { + readonly threshold_database: number; +} + // From codersdk/workspaceagents.go export interface IssueReconnectingPTYSignedTokenRequest { readonly url: string; @@ -2088,6 +2094,7 @@ export interface HealthcheckDatabaseReport { readonly reachable: boolean; readonly latency: string; readonly latency_ms: number; + readonly threshold_ms: number; readonly error?: string; } From 15361a3d1362b56fc563fd82c530d316ebd73260 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 10 Nov 2023 22:33:58 +0530 Subject: [PATCH 4/9] plumb through healthcheck refresh / timeout --- coderd/coderd.go | 4 ++-- coderd/debug.go | 6 +++--- coderd/debug_test.go | 45 ++++++++++++++++++++++++++++++++++++++++++ codersdk/deployment.go | 25 ++++++++++++++++++++++- 4 files changed, 74 insertions(+), 6 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index 9b9f1195e455f..d0adaaf517e23 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -417,10 +417,10 @@ func New(options *Options) *API { } } if options.HealthcheckTimeout == 0 { - options.HealthcheckTimeout = 30 * time.Second + options.HealthcheckTimeout = options.DeploymentValues.Healthcheck.Timeout.Value() } if options.HealthcheckRefresh == 0 { - options.HealthcheckRefresh = 10 * time.Minute + options.HealthcheckRefresh = options.DeploymentValues.Healthcheck.Refresh.Value() } var oidcAuthURLParams map[string]string diff --git a/coderd/debug.go b/coderd/debug.go index 1e50b91ba69d3..6b53198046c7c 100644 --- a/coderd/debug.go +++ b/coderd/debug.go @@ -32,12 +32,12 @@ func (api *API) debugCoordinator(rw http.ResponseWriter, r *http.Request) { // @Router /debug/health [get] func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) { apiKey := httpmw.APITokenFromRequest(r) - ctx, cancel := context.WithTimeout(r.Context(), api.HealthcheckTimeout) + ctx, cancel := context.WithTimeout(r.Context(), api.Options.HealthcheckTimeout) defer cancel() // Get cached report if it exists. if report := api.healthCheckCache.Load(); report != nil { - if time.Since(report.Time) < api.HealthcheckRefresh { + if time.Since(report.Time) < api.Options.HealthcheckRefresh { formatHealthcheck(ctx, rw, r, report) return } @@ -45,7 +45,7 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) { resChan := api.healthCheckGroup.DoChan("", func() (*healthcheck.Report, error) { // Create a new context not tied to the request. - ctx, cancel := context.WithTimeout(context.Background(), api.HealthcheckTimeout) + ctx, cancel := context.WithTimeout(context.Background(), api.Options.HealthcheckTimeout) defer cancel() report := api.HealthcheckFunc(ctx, apiKey) diff --git a/coderd/debug_test.go b/coderd/debug_test.go index f9241a303bcd4..2136ca2d9d6ac 100644 --- a/coderd/debug_test.go +++ b/coderd/debug_test.go @@ -72,6 +72,51 @@ func TestDebugHealth(t *testing.T) { require.Equal(t, http.StatusNotFound, res.StatusCode) }) + t.Run("Refresh", func(t *testing.T) { + t.Parallel() + + var ( + calls = make(chan struct{}) + callsDone = make(chan struct{}) + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort) + client = coderdtest.New(t, &coderdtest.Options{ + HealthcheckRefresh: time.Microsecond, + HealthcheckFunc: func(context.Context, string) *healthcheck.Report { + calls <- struct{}{} + return &healthcheck.Report{} + }, + }) + _ = coderdtest.CreateFirstUser(t, client) + ) + + defer cancel() + + go func() { + defer close(callsDone) + <-calls + <-time.After(testutil.IntervalFast) + <-calls + }() + + res, err := client.Request(ctx, "GET", "/api/v2/debug/health", nil) + require.NoError(t, err) + defer res.Body.Close() + _, _ = io.ReadAll(res.Body) + require.Equal(t, http.StatusOK, res.StatusCode) + + res, err = client.Request(ctx, "GET", "/api/v2/debug/health", nil) + require.NoError(t, err) + defer res.Body.Close() + _, _ = io.ReadAll(res.Body) + require.Equal(t, http.StatusOK, res.StatusCode) + + select { + case <-callsDone: + case <-ctx.Done(): + t.Fatal("timed out waiting for calls to finish") + } + }) + t.Run("Deduplicated", func(t *testing.T) { t.Parallel() diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 600840fa5bb61..7f21350680e09 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -398,6 +398,8 @@ type UserQuietHoursScheduleConfig struct { // HealthcheckConfig contains configuration for healthchecks. type HealthcheckConfig struct { + Timeout clibase.Duration `json:"timeout" typescript:",notnull"` + Refresh clibase.Duration `json:"refresh" typescript:",notnull"` ThresholdDatabase clibase.Duration `json:"threshold_database" typescript:",notnull"` } @@ -1810,10 +1812,31 @@ Write out the current server config as YAML to stdout.`, Group: &deploymentGroupClient, YAML: "webTerminalRenderer", }, + // Healthcheck Options + { + Name: "Healthcheck Timeout", + Description: "Overall timeout for healthchecks.", + Flag: "healthcheck-timeout", + Env: "CODER_HEALTHCHECK_TIMEOUT", + Default: (30 * time.Second).String(), + Value: &c.Healthcheck.Timeout, + Group: &deploymentGroupIntrospectionHealthcheck, + YAML: "timeout", + }, + { + Name: "Healthcheck Refresh", + Description: "Refresh interval for healthchecks.", + Flag: "healthcheck-refresh", + Env: "CODER_HEALTHCHECK_REFRESH", + Default: (10 * time.Minute).String(), + Value: &c.Healthcheck.Refresh, + Group: &deploymentGroupIntrospectionHealthcheck, + YAML: "refresh", + }, { Name: "Healthcheck Threshold Database", Description: "The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms.", - Flag: "", + Flag: "threshold-database", Env: "CODER_HEALTHCHECK_THRESHOLD_DATABASE", Default: (15 * time.Millisecond).String(), Value: &c.Healthcheck.ThresholdDatabase, From 20e713e4a60fcf7ed05f14d7e0874318298c3064 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 10 Nov 2023 22:34:13 +0530 Subject: [PATCH 5/9] make gen etc. again --- cli/testdata/coder_server_--help.golden | 8 ++++++- cli/testdata/server-config.yaml.golden | 6 +++++ coderd/apidoc/docs.go | 6 +++++ coderd/apidoc/swagger.json | 6 +++++ docs/api/general.md | 4 +++- docs/api/schemas.md | 14 ++++++++--- docs/cli/server.md | 24 ++++++++++++++++++- .../cli/testdata/coder_server_--help.golden | 8 ++++++- site/src/api/typesGenerated.ts | 2 ++ 9 files changed, 71 insertions(+), 7 deletions(-) diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index b80ef4336b818..e82571bb64fbd 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -81,11 +81,17 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. INTROSPECTION / HEALTHCHECK OPTIONS: - duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) + --healthcheck-refresh duration, $CODER_HEALTHCHECK_REFRESH (default: 10m0s) + Refresh interval for healthchecks. + + --threshold-database duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. + --healthcheck-timeout duration, $CODER_HEALTHCHECK_TIMEOUT (default: 30s) + Overall timeout for healthchecks. + INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index c83ec2d728817..e4f588485a271 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -233,6 +233,12 @@ introspection: # (default: false, type: bool) enableTerraformDebugMode: false healthcheck: + # Overall timeout for healthchecks. + # (default: 30s, type: duration) + timeout: 30s + # Refresh interval for healthchecks. + # (default: 10m0s, type: duration) + refresh: 10m0s # The threshold for the database healthcheck. If the median latency of the # database exceeds this threshold over 5 attempts, the database is considered # unhealthy. The default value is 15ms. diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index d934a0f2c5e48..b9b271f17e3b7 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -8865,8 +8865,14 @@ const docTemplate = `{ "codersdk.HealthcheckConfig": { "type": "object", "properties": { + "refresh": { + "type": "integer" + }, "threshold_database": { "type": "integer" + }, + "timeout": { + "type": "integer" } } }, diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 21f8d5f8dd7ee..9b7ccf8111c57 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -7967,8 +7967,14 @@ "codersdk.HealthcheckConfig": { "type": "object", "properties": { + "refresh": { + "type": "integer" + }, "threshold_database": { "type": "integer" + }, + "timeout": { + "type": "integer" } } }, diff --git a/docs/api/general.md b/docs/api/general.md index 1187cfb79f1f0..fd63c832e3bb9 100644 --- a/docs/api/general.md +++ b/docs/api/general.md @@ -236,7 +236,9 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \ }, "external_token_encryption_keys": ["string"], "healthcheck": { - "threshold_database": 0 + "refresh": 0, + "threshold_database": 0, + "timeout": 0 }, "http_address": "string", "in_memory_database": true, diff --git a/docs/api/schemas.md b/docs/api/schemas.md index f529dea988205..6d85ed2b8577a 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -2157,7 +2157,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "external_token_encryption_keys": ["string"], "healthcheck": { - "threshold_database": 0 + "refresh": 0, + "threshold_database": 0, + "timeout": 0 }, "http_address": "string", "in_memory_database": true, @@ -2531,7 +2533,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in }, "external_token_encryption_keys": ["string"], "healthcheck": { - "threshold_database": 0 + "refresh": 0, + "threshold_database": 0, + "timeout": 0 }, "http_address": "string", "in_memory_database": true, @@ -3187,7 +3191,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in ```json { - "threshold_database": 0 + "refresh": 0, + "threshold_database": 0, + "timeout": 0 } ``` @@ -3195,7 +3201,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | Name | Type | Required | Restrictions | Description | | -------------------- | ------- | -------- | ------------ | ----------- | +| `refresh` | integer | false | | | | `threshold_database` | integer | false | | | +| `timeout` | integer | false | | | ## codersdk.InsightsReportInterval diff --git a/docs/cli/server.md b/docs/cli/server.md index 99cc0a52b02f9..7e152408a19ca 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -305,7 +305,18 @@ Time to force cancel provisioning tasks that are stuck. HTTP bind address of the server. Unset to disable the HTTP endpoint. -### -- +### --healthcheck-refresh + +| | | +| ----------- | ---------------------------------------------- | +| Type | duration | +| Environment | $CODER_HEALTHCHECK_REFRESH | +| YAML | introspection.healthcheck.refresh | +| Default | 10m0s | + +Refresh interval for healthchecks. + +### --threshold-database | | | | ----------- | -------------------------------------------------------- | @@ -316,6 +327,17 @@ HTTP bind address of the server. Unset to disable the HTTP endpoint. The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. +### --healthcheck-timeout + +| | | +| ----------- | ---------------------------------------------- | +| Type | duration | +| Environment | $CODER_HEALTHCHECK_TIMEOUT | +| YAML | introspection.healthcheck.timeout | +| Default | 30s | + +Overall timeout for healthchecks. + ### --log-human | | | diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 46fece16cfa00..fbda947c754d7 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -82,11 +82,17 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. INTROSPECTION / HEALTHCHECK OPTIONS: - duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) + --healthcheck-refresh duration, $CODER_HEALTHCHECK_REFRESH (default: 10m0s) + Refresh interval for healthchecks. + + --threshold-database duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. + --healthcheck-timeout duration, $CODER_HEALTHCHECK_TIMEOUT (default: 30s) + Overall timeout for healthchecks. + INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index af5fec2b435da..4304016695d87 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -551,6 +551,8 @@ export interface Healthcheck { // From codersdk/deployment.go export interface HealthcheckConfig { + readonly timeout: number; + readonly refresh: number; readonly threshold_database: number; } From 8115ffe7a298fc5f0e5fa69caa10b8ba8884aebb Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 13 Nov 2023 08:43:50 +0000 Subject: [PATCH 6/9] renaming --- cli/testdata/coder_server_--help.golden | 14 +++---- cli/testdata/server-config.yaml.golden | 4 +- coderd/healthcheck/database.go | 14 +++---- coderd/healthcheck/database_test.go | 14 +++---- codersdk/deployment.go | 14 +++---- docs/cli/server.md | 42 +++++++++---------- .../cli/testdata/coder_server_--help.golden | 14 +++---- 7 files changed, 58 insertions(+), 58 deletions(-) diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index e82571bb64fbd..68199a030abcf 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -81,16 +81,16 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. INTROSPECTION / HEALTHCHECK OPTIONS: - --healthcheck-refresh duration, $CODER_HEALTHCHECK_REFRESH (default: 10m0s) - Refresh interval for healthchecks. - - --threshold-database duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) - The threshold for the database healthcheck. If the median latency of + --threshold-database duration, $CODER_HEALTH_CHECK_THRESHOLD_DATABASE (default: 15ms) + The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - --healthcheck-timeout duration, $CODER_HEALTHCHECK_TIMEOUT (default: 30s) - Overall timeout for healthchecks. + --healthcheck-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) + Overall timeout for health checks. + + --healthcheck-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) + Refresh interval for healthchecks. INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index e4f588485a271..46e744bb74694 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -233,13 +233,13 @@ introspection: # (default: false, type: bool) enableTerraformDebugMode: false healthcheck: - # Overall timeout for healthchecks. + # Overall timeout for health checks. # (default: 30s, type: duration) timeout: 30s # Refresh interval for healthchecks. # (default: 10m0s, type: duration) refresh: 10m0s - # The threshold for the database healthcheck. If the median latency of the + # The threshold for the database health check. If the median latency of the # database exceeds this threshold over 5 attempts, the database is considered # unhealthy. The default value is 15ms. # (default: 15ms, type: duration) diff --git a/coderd/healthcheck/database.go b/coderd/healthcheck/database.go index 58f0d48843fcd..9ee92d7a71a9f 100644 --- a/coderd/healthcheck/database.go +++ b/coderd/healthcheck/database.go @@ -19,8 +19,8 @@ type DatabaseReport struct { Healthy bool `json:"healthy"` Reachable bool `json:"reachable"` Latency string `json:"latency"` - LatencyMs int64 `json:"latency_ms"` - ThresholdMs int64 `json:"threshold_ms"` + LatencyMS int64 `json:"latency_ms"` + ThresholdMS int64 `json:"threshold_ms"` Error *string `json:"error"` } @@ -30,9 +30,9 @@ type DatabaseReportOptions struct { } func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) { - r.ThresholdMs = opts.Threshold.Milliseconds() - if r.ThresholdMs == 0 { - r.ThresholdMs = DatabaseDefaultThreshold.Milliseconds() + r.ThresholdMS = opts.Threshold.Milliseconds() + if r.ThresholdMS == 0 { + r.ThresholdMS = DatabaseDefaultThreshold.Milliseconds() } ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() @@ -53,8 +53,8 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) { // Take the median ping. latency := pings[pingCount/2] r.Latency = latency.String() - r.LatencyMs = latency.Milliseconds() - if r.LatencyMs < r.ThresholdMs { + r.LatencyMS = latency.Milliseconds() + if r.LatencyMS < r.ThresholdMS { r.Healthy = true } r.Reachable = true diff --git a/coderd/healthcheck/database_test.go b/coderd/healthcheck/database_test.go index 39da1bc657458..be97c6424a47a 100644 --- a/coderd/healthcheck/database_test.go +++ b/coderd/healthcheck/database_test.go @@ -36,8 +36,8 @@ func TestDatabase(t *testing.T) { assert.True(t, report.Healthy) assert.True(t, report.Reachable) assert.Equal(t, ping.String(), report.Latency) - assert.Equal(t, ping.Milliseconds(), report.LatencyMs) - assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMs) + assert.Equal(t, ping.Milliseconds(), report.LatencyMS) + assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS) assert.Nil(t, report.Error) }) @@ -60,7 +60,7 @@ func TestDatabase(t *testing.T) { assert.False(t, report.Reachable) assert.Zero(t, report.Latency) require.NotNil(t, report.Error) - assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMs) + assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS) assert.Contains(t, *report.Error, err.Error()) }) @@ -85,8 +85,8 @@ func TestDatabase(t *testing.T) { assert.True(t, report.Healthy) assert.True(t, report.Reachable) assert.Equal(t, time.Millisecond.String(), report.Latency) - assert.EqualValues(t, 1, report.LatencyMs) - assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMs) + assert.EqualValues(t, 1, report.LatencyMS) + assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS) assert.Nil(t, report.Error) }) @@ -111,8 +111,8 @@ func TestDatabase(t *testing.T) { assert.False(t, report.Healthy) assert.True(t, report.Reachable) assert.Equal(t, time.Second.String(), report.Latency) - assert.EqualValues(t, 1000, report.LatencyMs) - assert.Equal(t, time.Second.Milliseconds(), report.ThresholdMs) + assert.EqualValues(t, 1000, report.LatencyMS) + assert.Equal(t, time.Second.Milliseconds(), report.ThresholdMS) assert.Nil(t, report.Error) }) } diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 7f21350680e09..44f1ce79eee45 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -1814,10 +1814,10 @@ Write out the current server config as YAML to stdout.`, }, // Healthcheck Options { - Name: "Healthcheck Timeout", - Description: "Overall timeout for healthchecks.", + Name: "Health Check Timeout", + Description: "Overall timeout for health checks.", Flag: "healthcheck-timeout", - Env: "CODER_HEALTHCHECK_TIMEOUT", + Env: "CODER_HEALTH_CHECK_TIMEOUT", Default: (30 * time.Second).String(), Value: &c.Healthcheck.Timeout, Group: &deploymentGroupIntrospectionHealthcheck, @@ -1827,17 +1827,17 @@ Write out the current server config as YAML to stdout.`, Name: "Healthcheck Refresh", Description: "Refresh interval for healthchecks.", Flag: "healthcheck-refresh", - Env: "CODER_HEALTHCHECK_REFRESH", + Env: "CODER_HEALTH_CHECK_REFRESH", Default: (10 * time.Minute).String(), Value: &c.Healthcheck.Refresh, Group: &deploymentGroupIntrospectionHealthcheck, YAML: "refresh", }, { - Name: "Healthcheck Threshold Database", - Description: "The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms.", + Name: "Database Health Check Threshold", + Description: "The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms.", Flag: "threshold-database", - Env: "CODER_HEALTHCHECK_THRESHOLD_DATABASE", + Env: "CODER_HEALTH_CHECK_THRESHOLD_DATABASE", Default: (15 * time.Millisecond).String(), Value: &c.Healthcheck.ThresholdDatabase, Group: &deploymentGroupIntrospectionHealthcheck, diff --git a/docs/cli/server.md b/docs/cli/server.md index 7e152408a19ca..fe8c26cf00024 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -172,6 +172,17 @@ An HTTP URL that is accessible by other replicas to relay DERP traffic. Required Addresses for STUN servers to establish P2P connections. It's recommended to have at least two STUN servers to give users the best chance of connecting P2P to workspaces. Each STUN server will get it's own DERP region, with region IDs starting at `--derp-server-region-id + 1`. Use special value 'disable' to turn off STUN completely. +### --threshold-database + +| | | +| ----------- | -------------------------------------------------------- | +| Type | duration | +| Environment | $CODER_HEALTH_CHECK_THRESHOLD_DATABASE | +| YAML | introspection.healthcheck.thresholdDatabase | +| Default | 15ms | + +The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. + ### --default-quiet-hours-schedule | | | @@ -305,38 +316,27 @@ Time to force cancel provisioning tasks that are stuck. HTTP bind address of the server. Unset to disable the HTTP endpoint. -### --healthcheck-refresh +### --healthcheck-timeout | | | | ----------- | ---------------------------------------------- | | Type | duration | -| Environment | $CODER_HEALTHCHECK_REFRESH | -| YAML | introspection.healthcheck.refresh | -| Default | 10m0s | - -Refresh interval for healthchecks. - -### --threshold-database - -| | | -| ----------- | -------------------------------------------------------- | -| Type | duration | -| Environment | $CODER_HEALTHCHECK_THRESHOLD_DATABASE | -| YAML | introspection.healthcheck.thresholdDatabase | -| Default | 15ms | +| Environment | $CODER_HEALTH_CHECK_TIMEOUT | +| YAML | introspection.healthcheck.timeout | +| Default | 30s | -The threshold for the database healthcheck. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. +Overall timeout for health checks. -### --healthcheck-timeout +### --healthcheck-refresh | | | | ----------- | ---------------------------------------------- | | Type | duration | -| Environment | $CODER_HEALTHCHECK_TIMEOUT | -| YAML | introspection.healthcheck.timeout | -| Default | 30s | +| Environment | $CODER_HEALTH_CHECK_REFRESH | +| YAML | introspection.healthcheck.refresh | +| Default | 10m0s | -Overall timeout for healthchecks. +Refresh interval for healthchecks. ### --log-human diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index fbda947c754d7..a8c8a64f58c8d 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -82,16 +82,16 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. INTROSPECTION / HEALTHCHECK OPTIONS: - --healthcheck-refresh duration, $CODER_HEALTHCHECK_REFRESH (default: 10m0s) - Refresh interval for healthchecks. - - --threshold-database duration, $CODER_HEALTHCHECK_THRESHOLD_DATABASE (default: 15ms) - The threshold for the database healthcheck. If the median latency of + --threshold-database duration, $CODER_HEALTH_CHECK_THRESHOLD_DATABASE (default: 15ms) + The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - --healthcheck-timeout duration, $CODER_HEALTHCHECK_TIMEOUT (default: 30s) - Overall timeout for healthchecks. + --healthcheck-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) + Overall timeout for health checks. + + --healthcheck-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) + Refresh interval for healthchecks. INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) From bc8e2007dab5a90f478006562c53d2b4d1c01755 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 13 Nov 2023 09:15:23 +0000 Subject: [PATCH 7/9] more renaming --- cli/testdata/coder_server_--help.golden | 10 ++--- codersdk/deployment.go | 10 ++--- docs/cli/server.md | 42 +++++++++---------- .../cli/testdata/coder_server_--help.golden | 10 ++--- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 68199a030abcf..587d11260b826 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -81,17 +81,17 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. INTROSPECTION / HEALTHCHECK OPTIONS: - --threshold-database duration, $CODER_HEALTH_CHECK_THRESHOLD_DATABASE (default: 15ms) + --health-check-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) + Refresh interval for healthchecks. + + --health-check-threshold-database duration, $CODER_HEALTH_CHECK_THRESHOLD_DATABASE (default: 15ms) The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - --healthcheck-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) + --health-check-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) Overall timeout for health checks. - --healthcheck-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) - Refresh interval for healthchecks. - INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 44f1ce79eee45..69fd4edbb30ba 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -1816,7 +1816,7 @@ Write out the current server config as YAML to stdout.`, { Name: "Health Check Timeout", Description: "Overall timeout for health checks.", - Flag: "healthcheck-timeout", + Flag: "health-check-timeout", Env: "CODER_HEALTH_CHECK_TIMEOUT", Default: (30 * time.Second).String(), Value: &c.Healthcheck.Timeout, @@ -1824,9 +1824,9 @@ Write out the current server config as YAML to stdout.`, YAML: "timeout", }, { - Name: "Healthcheck Refresh", + Name: "Health Check Refresh", Description: "Refresh interval for healthchecks.", - Flag: "healthcheck-refresh", + Flag: "health-check-refresh", Env: "CODER_HEALTH_CHECK_REFRESH", Default: (10 * time.Minute).String(), Value: &c.Healthcheck.Refresh, @@ -1834,9 +1834,9 @@ Write out the current server config as YAML to stdout.`, YAML: "refresh", }, { - Name: "Database Health Check Threshold", + Name: "Health Check Threshold: Database", Description: "The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms.", - Flag: "threshold-database", + Flag: "health-check-threshold-database", Env: "CODER_HEALTH_CHECK_THRESHOLD_DATABASE", Default: (15 * time.Millisecond).String(), Value: &c.Healthcheck.ThresholdDatabase, diff --git a/docs/cli/server.md b/docs/cli/server.md index fe8c26cf00024..d0000e32e98db 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -172,17 +172,6 @@ An HTTP URL that is accessible by other replicas to relay DERP traffic. Required Addresses for STUN servers to establish P2P connections. It's recommended to have at least two STUN servers to give users the best chance of connecting P2P to workspaces. Each STUN server will get it's own DERP region, with region IDs starting at `--derp-server-region-id + 1`. Use special value 'disable' to turn off STUN completely. -### --threshold-database - -| | | -| ----------- | -------------------------------------------------------- | -| Type | duration | -| Environment | $CODER_HEALTH_CHECK_THRESHOLD_DATABASE | -| YAML | introspection.healthcheck.thresholdDatabase | -| Default | 15ms | - -The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - ### --default-quiet-hours-schedule | | | @@ -316,27 +305,38 @@ Time to force cancel provisioning tasks that are stuck. HTTP bind address of the server. Unset to disable the HTTP endpoint. -### --healthcheck-timeout +### --health-check-refresh | | | | ----------- | ---------------------------------------------- | | Type | duration | -| Environment | $CODER_HEALTH_CHECK_TIMEOUT | -| YAML | introspection.healthcheck.timeout | -| Default | 30s | +| Environment | $CODER_HEALTH_CHECK_REFRESH | +| YAML | introspection.healthcheck.refresh | +| Default | 10m0s | -Overall timeout for health checks. +Refresh interval for healthchecks. -### --healthcheck-refresh +### --health-check-threshold-database + +| | | +| ----------- | -------------------------------------------------------- | +| Type | duration | +| Environment | $CODER_HEALTH_CHECK_THRESHOLD_DATABASE | +| YAML | introspection.healthcheck.thresholdDatabase | +| Default | 15ms | + +The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. + +### --health-check-timeout | | | | ----------- | ---------------------------------------------- | | Type | duration | -| Environment | $CODER_HEALTH_CHECK_REFRESH | -| YAML | introspection.healthcheck.refresh | -| Default | 10m0s | +| Environment | $CODER_HEALTH_CHECK_TIMEOUT | +| YAML | introspection.healthcheck.timeout | +| Default | 30s | -Refresh interval for healthchecks. +Overall timeout for health checks. ### --log-human diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index a8c8a64f58c8d..32c2321c8c089 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -82,17 +82,17 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. INTROSPECTION / HEALTHCHECK OPTIONS: - --threshold-database duration, $CODER_HEALTH_CHECK_THRESHOLD_DATABASE (default: 15ms) + --health-check-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) + Refresh interval for healthchecks. + + --health-check-threshold-database duration, $CODER_HEALTH_CHECK_THRESHOLD_DATABASE (default: 15ms) The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - --healthcheck-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) + --health-check-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) Overall timeout for health checks. - --healthcheck-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) - Refresh interval for healthchecks. - INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. From 973a9bb0f5ac18db872d3ce9b1615a318105a3ce Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 13 Nov 2023 13:26:55 +0000 Subject: [PATCH 8/9] yet more renaming --- cli/testdata/coder_server_--help.golden | 2 +- codersdk/deployment.go | 2 +- enterprise/cli/testdata/coder_server_--help.golden | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 587d11260b826..acc971f0e1daa 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -80,7 +80,7 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. -INTROSPECTION / HEALTHCHECK OPTIONS: +INTROSPECTION / HEALTH CHECK OPTIONS: --health-check-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) Refresh interval for healthchecks. diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 69fd4edbb30ba..eaaebc6586a9a 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -499,7 +499,7 @@ func (c *DeploymentValues) Options() clibase.OptionSet { } deploymentGroupIntrospectionHealthcheck = clibase.Group{ Parent: &deploymentGroupIntrospection, - Name: "Healthcheck", + Name: "Health Check", YAML: "healthcheck", } deploymentGroupOAuth2 = clibase.Group{ diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 32c2321c8c089..6bc5719c59916 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -81,7 +81,7 @@ Use a YAML configuration file when your server launch become unwieldy. Write out the current server config as YAML to stdout. -INTROSPECTION / HEALTHCHECK OPTIONS: +INTROSPECTION / HEALTH CHECK OPTIONS: --health-check-refresh duration, $CODER_HEALTH_CHECK_REFRESH (default: 10m0s) Refresh interval for healthchecks. From 58361e877cd49d5b17897085ee9b81a9aa1375e5 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 13 Nov 2023 13:34:56 +0000 Subject: [PATCH 9/9] revert configurability for hc timeout, keep hard-coded at 30s --- cli/testdata/coder_server_--help.golden | 3 --- cli/testdata/server-config.yaml.golden | 3 --- coderd/apidoc/docs.go | 3 --- coderd/apidoc/swagger.json | 3 --- coderd/coderd.go | 2 +- codersdk/deployment.go | 11 ----------- docs/api/general.md | 3 +-- docs/api/schemas.md | 10 +++------- docs/cli/server.md | 11 ----------- enterprise/cli/testdata/coder_server_--help.golden | 3 --- site/src/api/typesGenerated.ts | 1 - 11 files changed, 5 insertions(+), 48 deletions(-) diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index acc971f0e1daa..4d1b9609aa6f4 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -89,9 +89,6 @@ INTROSPECTION / HEALTH CHECK OPTIONS: the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - --health-check-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) - Overall timeout for health checks. - INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index 46e744bb74694..3163a6e2cab33 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -233,9 +233,6 @@ introspection: # (default: false, type: bool) enableTerraformDebugMode: false healthcheck: - # Overall timeout for health checks. - # (default: 30s, type: duration) - timeout: 30s # Refresh interval for healthchecks. # (default: 10m0s, type: duration) refresh: 10m0s diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index b9b271f17e3b7..e8689850eb081 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -8870,9 +8870,6 @@ const docTemplate = `{ }, "threshold_database": { "type": "integer" - }, - "timeout": { - "type": "integer" } } }, diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 9b7ccf8111c57..10a54546fde8f 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -7972,9 +7972,6 @@ }, "threshold_database": { "type": "integer" - }, - "timeout": { - "type": "integer" } } }, diff --git a/coderd/coderd.go b/coderd/coderd.go index d0adaaf517e23..8f138dcc2c6f2 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -417,7 +417,7 @@ func New(options *Options) *API { } } if options.HealthcheckTimeout == 0 { - options.HealthcheckTimeout = options.DeploymentValues.Healthcheck.Timeout.Value() + options.HealthcheckTimeout = 30 * time.Second } if options.HealthcheckRefresh == 0 { options.HealthcheckRefresh = options.DeploymentValues.Healthcheck.Refresh.Value() diff --git a/codersdk/deployment.go b/codersdk/deployment.go index eaaebc6586a9a..613e3b17045fa 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -398,7 +398,6 @@ type UserQuietHoursScheduleConfig struct { // HealthcheckConfig contains configuration for healthchecks. type HealthcheckConfig struct { - Timeout clibase.Duration `json:"timeout" typescript:",notnull"` Refresh clibase.Duration `json:"refresh" typescript:",notnull"` ThresholdDatabase clibase.Duration `json:"threshold_database" typescript:",notnull"` } @@ -1813,16 +1812,6 @@ Write out the current server config as YAML to stdout.`, YAML: "webTerminalRenderer", }, // Healthcheck Options - { - Name: "Health Check Timeout", - Description: "Overall timeout for health checks.", - Flag: "health-check-timeout", - Env: "CODER_HEALTH_CHECK_TIMEOUT", - Default: (30 * time.Second).String(), - Value: &c.Healthcheck.Timeout, - Group: &deploymentGroupIntrospectionHealthcheck, - YAML: "timeout", - }, { Name: "Health Check Refresh", Description: "Refresh interval for healthchecks.", diff --git a/docs/api/general.md b/docs/api/general.md index fd63c832e3bb9..de89e07e558c5 100644 --- a/docs/api/general.md +++ b/docs/api/general.md @@ -237,8 +237,7 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \ "external_token_encryption_keys": ["string"], "healthcheck": { "refresh": 0, - "threshold_database": 0, - "timeout": 0 + "threshold_database": 0 }, "http_address": "string", "in_memory_database": true, diff --git a/docs/api/schemas.md b/docs/api/schemas.md index 6d85ed2b8577a..d3a61585d096c 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -2158,8 +2158,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in "external_token_encryption_keys": ["string"], "healthcheck": { "refresh": 0, - "threshold_database": 0, - "timeout": 0 + "threshold_database": 0 }, "http_address": "string", "in_memory_database": true, @@ -2534,8 +2533,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in "external_token_encryption_keys": ["string"], "healthcheck": { "refresh": 0, - "threshold_database": 0, - "timeout": 0 + "threshold_database": 0 }, "http_address": "string", "in_memory_database": true, @@ -3192,8 +3190,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in ```json { "refresh": 0, - "threshold_database": 0, - "timeout": 0 + "threshold_database": 0 } ``` @@ -3203,7 +3200,6 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | -------------------- | ------- | -------- | ------------ | ----------- | | `refresh` | integer | false | | | | `threshold_database` | integer | false | | | -| `timeout` | integer | false | | | ## codersdk.InsightsReportInterval diff --git a/docs/cli/server.md b/docs/cli/server.md index d0000e32e98db..3fb0c57b3ee48 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -327,17 +327,6 @@ Refresh interval for healthchecks. The threshold for the database health check. If the median latency of the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. -### --health-check-timeout - -| | | -| ----------- | ---------------------------------------------- | -| Type | duration | -| Environment | $CODER_HEALTH_CHECK_TIMEOUT | -| YAML | introspection.healthcheck.timeout | -| Default | 30s | - -Overall timeout for health checks. - ### --log-human | | | diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 6bc5719c59916..85c924474c206 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -90,9 +90,6 @@ INTROSPECTION / HEALTH CHECK OPTIONS: the database exceeds this threshold over 5 attempts, the database is considered unhealthy. The default value is 15ms. - --health-check-timeout duration, $CODER_HEALTH_CHECK_TIMEOUT (default: 30s) - Overall timeout for health checks. - INTROSPECTION / LOGGING OPTIONS: --enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false) Allow administrators to enable Terraform debug output. diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 4304016695d87..dcfb60584d7c8 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -551,7 +551,6 @@ export interface Healthcheck { // From codersdk/deployment.go export interface HealthcheckConfig { - readonly timeout: number; readonly refresh: number; readonly threshold_database: number; }