From a15b2c732bac5f038d4e89dbcea0c2bf90c2c70c Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Mon, 24 Apr 2023 21:10:32 +0000 Subject: [PATCH 1/2] fix(healthcheck) don't allow panics to exit coderd --- coderd/coderd.go | 3 ++- coderd/healthcheck/accessurl.go | 15 ++++++++---- coderd/healthcheck/accessurl_test.go | 6 ++--- coderd/healthcheck/derp.go | 36 +++++++++++++++++++++------- coderd/healthcheck/healthcheck.go | 13 ++++++++++ 5 files changed, 55 insertions(+), 18 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index a5cf693a3d8b6..15af81d00aa01 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -250,7 +250,8 @@ func New(options *Options) *API { if options.HealthcheckFunc == nil { options.HealthcheckFunc = func(ctx context.Context) (*healthcheck.Report, error) { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ - DERPMap: options.DERPMap.Clone(), + AccessURL: options.AccessURL, + DERPMap: options.DERPMap.Clone(), }) } } diff --git a/coderd/healthcheck/accessurl.go b/coderd/healthcheck/accessurl.go index fd55fc7634203..f8babac89595c 100644 --- a/coderd/healthcheck/accessurl.go +++ b/coderd/healthcheck/accessurl.go @@ -15,7 +15,7 @@ type AccessURLReport struct { Reachable bool StatusCode int HealthzResponse string - Err error + Error error } type AccessURLOptions struct { @@ -27,32 +27,37 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLOptions) { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() + if opts.AccessURL == nil { + r.Error = xerrors.New("access URL is nil") + return + } + if opts.Client == nil { opts.Client = http.DefaultClient } accessURL, err := opts.AccessURL.Parse("/healthz") if err != nil { - r.Err = xerrors.Errorf("parse healthz endpoint: %w", err) + r.Error = xerrors.Errorf("parse healthz endpoint: %w", err) return } req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil) if err != nil { - r.Err = xerrors.Errorf("create healthz request: %w", err) + r.Error = xerrors.Errorf("create healthz request: %w", err) return } res, err := opts.Client.Do(req) if err != nil { - r.Err = xerrors.Errorf("get healthz endpoint: %w", err) + r.Error = xerrors.Errorf("get healthz endpoint: %w", err) return } defer res.Body.Close() body, err := io.ReadAll(res.Body) if err != nil { - r.Err = xerrors.Errorf("read healthz response: %w", err) + r.Error = xerrors.Errorf("read healthz response: %w", err) return } diff --git a/coderd/healthcheck/accessurl_test.go b/coderd/healthcheck/accessurl_test.go index 808888771e2f4..71e5a8d0e94dd 100644 --- a/coderd/healthcheck/accessurl_test.go +++ b/coderd/healthcheck/accessurl_test.go @@ -36,7 +36,7 @@ func TestAccessURL(t *testing.T) { assert.True(t, report.Reachable) assert.Equal(t, http.StatusOK, report.StatusCode) assert.Equal(t, "OK", report.HealthzResponse) - assert.NoError(t, report.Err) + assert.NoError(t, report.Error) }) t.Run("404", func(t *testing.T) { @@ -66,7 +66,7 @@ func TestAccessURL(t *testing.T) { assert.True(t, report.Reachable) assert.Equal(t, http.StatusNotFound, report.StatusCode) assert.Equal(t, string(resp), report.HealthzResponse) - assert.NoError(t, report.Err) + assert.NoError(t, report.Error) }) t.Run("ClientErr", func(t *testing.T) { @@ -102,7 +102,7 @@ func TestAccessURL(t *testing.T) { assert.False(t, report.Reachable) assert.Equal(t, 0, report.StatusCode) assert.Equal(t, "", report.HealthzResponse) - assert.ErrorIs(t, report.Err, expErr) + assert.ErrorIs(t, report.Error, expErr) }) } diff --git a/coderd/healthcheck/derp.go b/coderd/healthcheck/derp.go index 64e255bdeda49..0e7c66f474113 100644 --- a/coderd/healthcheck/derp.go +++ b/coderd/healthcheck/derp.go @@ -33,6 +33,8 @@ type DERPReport struct { Netcheck *netcheck.Report `json:"netcheck"` NetcheckErr error `json:"netcheck_err"` NetcheckLogs []string `json:"netcheck_logs"` + + Error error `json:"error"` } type DERPRegionReport struct { @@ -41,6 +43,7 @@ type DERPRegionReport struct { Region *tailcfg.DERPRegion `json:"region"` NodeReports []*DERPNodeReport `json:"node_reports"` + Error error `json:"error"` } type DERPNodeReport struct { mu sync.Mutex @@ -55,6 +58,7 @@ type DERPNodeReport struct { UsesWebsocket bool `json:"uses_websocket"` ClientLogs [][]string `json:"client_logs"` ClientErrs [][]error `json:"client_errs"` + Error error `json:"error"` STUN DERPStunReport `json:"stun"` } @@ -77,12 +81,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) { wg.Add(len(opts.DERPMap.Regions)) for _, region := range opts.DERPMap.Regions { - region := region - go func() { - defer wg.Done() - regionReport := DERPRegionReport{ + var ( + region = region + regionReport = DERPRegionReport{ Region: region, } + ) + go func() { + defer wg.Done() + defer func() { + if err := recover(); err != nil { + regionReport.Error = xerrors.Errorf("%v", err) + } + }() regionReport.Run(ctx) @@ -117,14 +128,21 @@ func (r *DERPRegionReport) Run(ctx context.Context) { wg.Add(len(r.Region.Nodes)) for _, node := range r.Region.Nodes { - node := node - go func() { - defer wg.Done() - - nodeReport := DERPNodeReport{ + var ( + node = node + nodeReport = DERPNodeReport{ Node: node, Healthy: true, } + ) + + go func() { + defer wg.Done() + defer func() { + if err := recover(); err != nil { + nodeReport.Error = xerrors.Errorf("%v", err) + } + }() nodeReport.Run(ctx) diff --git a/coderd/healthcheck/healthcheck.go b/coderd/healthcheck/healthcheck.go index 26dedaa5a97a8..88f9f0ad075d0 100644 --- a/coderd/healthcheck/healthcheck.go +++ b/coderd/healthcheck/healthcheck.go @@ -7,6 +7,7 @@ import ( "sync" "time" + "golang.org/x/xerrors" "tailscale.com/tailcfg" ) @@ -38,6 +39,12 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { wg.Add(1) go func() { defer wg.Done() + defer func() { + if err := recover(); err != nil { + report.DERP.Error = xerrors.Errorf("%v", err) + } + }() + report.DERP.Run(ctx, &DERPReportOptions{ DERPMap: opts.DERPMap, }) @@ -46,6 +53,12 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) { wg.Add(1) go func() { defer wg.Done() + defer func() { + if err := recover(); err != nil { + report.AccessURL.Error = xerrors.Errorf("%v", err) + } + }() + report.AccessURL.Run(ctx, &AccessURLOptions{ AccessURL: opts.AccessURL, Client: opts.Client, From 94deb6f2da438a40a8f6013d0746879209efa904 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Mon, 24 Apr 2023 21:23:19 +0000 Subject: [PATCH 2/2] fixup! fix(healthcheck) don't allow panics to exit coderd --- coderd/apidoc/docs.go | 5 ++++- coderd/apidoc/swagger.json | 5 ++++- docs/api/debug.md | 7 ++++++- docs/api/schemas.md | 22 +++++++++++++++++++--- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 4cb4940a03775..23ad0d23293a6 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -9965,7 +9965,7 @@ const docTemplate = `{ "healthcheck.AccessURLReport": { "type": "object", "properties": { - "err": {}, + "error": {}, "healthy": { "type": "boolean" }, @@ -10002,6 +10002,7 @@ const docTemplate = `{ } } }, + "error": {}, "healthy": { "type": "boolean" }, @@ -10025,6 +10026,7 @@ const docTemplate = `{ "healthcheck.DERPRegionReport": { "type": "object", "properties": { + "error": {}, "healthy": { "type": "boolean" }, @@ -10042,6 +10044,7 @@ const docTemplate = `{ "healthcheck.DERPReport": { "type": "object", "properties": { + "error": {}, "healthy": { "type": "boolean" }, diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 558360d0c0cc6..c32c1c4b85429 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -9004,7 +9004,7 @@ "healthcheck.AccessURLReport": { "type": "object", "properties": { - "err": {}, + "error": {}, "healthy": { "type": "boolean" }, @@ -9041,6 +9041,7 @@ } } }, + "error": {}, "healthy": { "type": "boolean" }, @@ -9064,6 +9065,7 @@ "healthcheck.DERPRegionReport": { "type": "object", "properties": { + "error": {}, "healthy": { "type": "boolean" }, @@ -9081,6 +9083,7 @@ "healthcheck.DERPReport": { "type": "object", "properties": { + "error": {}, "healthy": { "type": "boolean" }, diff --git a/docs/api/debug.md b/docs/api/debug.md index 634f6bbc907e3..0f68215501c4e 100644 --- a/docs/api/debug.md +++ b/docs/api/debug.md @@ -40,13 +40,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \ ```json { "access_url": { - "err": null, + "error": null, "healthy": true, "healthzResponse": "string", "reachable": true, "statusCode": 0 }, "derp": { + "error": null, "healthy": true, "netcheck": { "captivePortal": "string", @@ -82,12 +83,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \ "netcheck_logs": ["string"], "regions": { "property1": { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", @@ -141,12 +144,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \ } }, "property2": { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", diff --git a/docs/api/schemas.md b/docs/api/schemas.md index d297b31169065..d327b405d40fe 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -5674,7 +5674,7 @@ Parameter represents a set value for the scope. ```json { - "err": null, + "error": null, "healthy": true, "healthzResponse": "string", "reachable": true, @@ -5686,7 +5686,7 @@ Parameter represents a set value for the scope. | Name | Type | Required | Restrictions | Description | | ----------------- | ------- | -------- | ------------ | ----------- | -| `err` | any | false | | | +| `error` | any | false | | | | `healthy` | boolean | false | | | | `healthzResponse` | string | false | | | | `reachable` | boolean | false | | | @@ -5699,6 +5699,7 @@ Parameter represents a set value for the scope. "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", @@ -5735,6 +5736,7 @@ Parameter represents a set value for the scope. | `can_exchange_messages` | boolean | false | | | | `client_errs` | array of array | false | | | | `client_logs` | array of array | false | | | +| `error` | any | false | | | | `healthy` | boolean | false | | | | `node` | [tailcfg.DERPNode](#tailcfgderpnode) | false | | | | `node_info` | [derp.ServerInfoMessage](#derpserverinfomessage) | false | | | @@ -5746,12 +5748,14 @@ Parameter represents a set value for the scope. ```json { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", @@ -5810,6 +5814,7 @@ Parameter represents a set value for the scope. | Name | Type | Required | Restrictions | Description | | -------------- | ----------------------------------------------------------------- | -------- | ------------ | ----------- | +| `error` | any | false | | | | `healthy` | boolean | false | | | | `node_reports` | array of [healthcheck.DERPNodeReport](#healthcheckderpnodereport) | false | | | | `region` | [tailcfg.DERPRegion](#tailcfgderpregion) | false | | | @@ -5818,6 +5823,7 @@ Parameter represents a set value for the scope. ```json { + "error": null, "healthy": true, "netcheck": { "captivePortal": "string", @@ -5853,12 +5859,14 @@ Parameter represents a set value for the scope. "netcheck_logs": ["string"], "regions": { "property1": { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", @@ -5912,12 +5920,14 @@ Parameter represents a set value for the scope. } }, "property2": { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", @@ -5978,6 +5988,7 @@ Parameter represents a set value for the scope. | Name | Type | Required | Restrictions | Description | | ------------------ | ------------------------------------------------------------ | -------- | ------------ | ----------- | +| `error` | any | false | | | | `healthy` | boolean | false | | | | `netcheck` | [netcheck.Report](#netcheckreport) | false | | | | `netcheck_err` | any | false | | | @@ -6008,13 +6019,14 @@ Parameter represents a set value for the scope. ```json { "access_url": { - "err": null, + "error": null, "healthy": true, "healthzResponse": "string", "reachable": true, "statusCode": 0 }, "derp": { + "error": null, "healthy": true, "netcheck": { "captivePortal": "string", @@ -6050,12 +6062,14 @@ Parameter represents a set value for the scope. "netcheck_logs": ["string"], "regions": { "property1": { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string", @@ -6109,12 +6123,14 @@ Parameter represents a set value for the scope. } }, "property2": { + "error": null, "healthy": true, "node_reports": [ { "can_exchange_messages": true, "client_errs": [[null]], "client_logs": [["string"]], + "error": null, "healthy": true, "node": { "certName": "string",