Skip to content

Commit 38ed816

Browse files
authored
fix(coderd/debug): fix caching issue with dismissed sections (#11051)
1 parent 53453c0 commit 38ed816

File tree

3 files changed

+64
-12
lines changed

3 files changed

+64
-12
lines changed

coderd/coderd.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"github.com/prometheus/client_golang/prometheus"
2626
httpSwagger "github.com/swaggo/http-swagger/v2"
2727
"go.opentelemetry.io/otel/trace"
28-
"golang.org/x/exp/slices"
2928
"golang.org/x/xerrors"
3029
"google.golang.org/api/idtoken"
3130
"storj.io/drpc/drpcmux"
@@ -408,30 +407,26 @@ func New(options *Options) *API {
408407

409408
if options.HealthcheckFunc == nil {
410409
options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report {
411-
dismissedHealthchecks := loadDismissedHealthchecks(ctx, options.Database, options.Logger)
410+
// NOTE: dismissed healthchecks are marked in formatHealthcheck.
411+
// Not here, as this result gets cached.
412412
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
413413
Database: healthcheck.DatabaseReportOptions{
414414
DB: options.Database,
415415
Threshold: options.DeploymentValues.Healthcheck.ThresholdDatabase.Value(),
416-
Dismissed: slices.Contains(dismissedHealthchecks, codersdk.HealthSectionDatabase),
417416
},
418417
Websocket: healthcheck.WebsocketReportOptions{
419418
AccessURL: options.AccessURL,
420419
APIKey: apiKey,
421-
Dismissed: slices.Contains(dismissedHealthchecks, codersdk.HealthSectionWebsocket),
422420
},
423421
AccessURL: healthcheck.AccessURLReportOptions{
424422
AccessURL: options.AccessURL,
425-
Dismissed: slices.Contains(dismissedHealthchecks, codersdk.HealthSectionAccessURL),
426423
},
427424
DerpHealth: derphealth.ReportOptions{
428-
DERPMap: api.DERPMap(),
429-
Dismissed: slices.Contains(dismissedHealthchecks, codersdk.HealthSectionDERP),
425+
DERPMap: api.DERPMap(),
430426
},
431427
WorkspaceProxy: healthcheck.WorkspaceProxyReportOptions{
432428
CurrentVersion: buildinfo.Version(),
433429
WorkspaceProxiesFetchUpdater: *(options.WorkspaceProxiesFetchUpdater).Load(),
434-
Dismissed: slices.Contains(dismissedHealthchecks, codersdk.HealthSectionWorkspaceProxy),
435430
},
436431
})
437432
}

coderd/debug.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,19 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
5959
ctx, cancel := context.WithTimeout(r.Context(), api.Options.HealthcheckTimeout)
6060
defer cancel()
6161

62+
// Load sections previously marked as dismissed.
63+
// We hydrate this here as we cache the healthcheck and hydrating in the
64+
// healthcheck function itself can lead to stale results.
65+
dismissed := loadDismissedHealthchecks(ctx, api.Database, api.Logger)
66+
6267
// Check if the forced query parameter is set.
6368
forced := r.URL.Query().Get("force") == "true"
6469

6570
// Get cached report if it exists and the requester did not force a refresh.
6671
if !forced {
6772
if report := api.healthCheckCache.Load(); report != nil {
6873
if time.Since(report.Time) < api.Options.HealthcheckRefresh {
69-
formatHealthcheck(ctx, rw, r, report)
74+
formatHealthcheck(ctx, rw, r, *report, dismissed...)
7075
return
7176
}
7277
}
@@ -89,12 +94,36 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
8994
})
9095
return
9196
case res := <-resChan:
92-
formatHealthcheck(ctx, rw, r, res.Val)
97+
report := res.Val
98+
if report == nil {
99+
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
100+
Message: "There was an unknown error completing the healthcheck.",
101+
Detail: "nil report from healthcheck result channel",
102+
})
103+
return
104+
}
105+
formatHealthcheck(ctx, rw, r, *report, dismissed...)
93106
return
94107
}
95108
}
96109

97-
func formatHealthcheck(ctx context.Context, rw http.ResponseWriter, r *http.Request, hc *healthcheck.Report) {
110+
func formatHealthcheck(ctx context.Context, rw http.ResponseWriter, r *http.Request, hc healthcheck.Report, dismissed ...codersdk.HealthSection) {
111+
// Mark any sections previously marked as dismissed.
112+
for _, d := range dismissed {
113+
switch d {
114+
case codersdk.HealthSectionAccessURL:
115+
hc.AccessURL.Dismissed = true
116+
case codersdk.HealthSectionDERP:
117+
hc.DERP.Dismissed = true
118+
case codersdk.HealthSectionDatabase:
119+
hc.Database.Dismissed = true
120+
case codersdk.HealthSectionWebsocket:
121+
hc.Websocket.Dismissed = true
122+
case codersdk.HealthSectionWorkspaceProxy:
123+
hc.WorkspaceProxy.Dismissed = true
124+
}
125+
}
126+
98127
format := r.URL.Query().Get("format")
99128
switch format {
100129
case "text":
@@ -269,7 +298,7 @@ func loadDismissedHealthchecks(ctx context.Context, db database.Store, logger sl
269298
}
270299
}
271300
if err != nil && !xerrors.Is(err, sql.ErrNoRows) {
272-
logger.Error(ctx, "unable to fetch health settings: %w", err)
301+
logger.Error(ctx, "unable to fetch health settings", slog.Error(err))
273302
}
274303
return dismissedHealthchecks
275304
}

coderd/debug_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package coderd_test
22

33
import (
44
"context"
5+
"encoding/json"
56
"io"
67
"net/http"
78
"sync/atomic"
@@ -11,6 +12,8 @@ import (
1112
"github.com/stretchr/testify/assert"
1213
"github.com/stretchr/testify/require"
1314

15+
"cdr.dev/slog/sloggers/slogtest"
16+
1417
"github.com/coder/coder/v2/coderd/coderdtest"
1518
"github.com/coder/coder/v2/coderd/healthcheck"
1619
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
@@ -90,8 +93,11 @@ func TestDebugHealth(t *testing.T) {
9093
t.Parallel()
9194

9295
var (
96+
// Need to ignore errors due to ctx timeout
97+
logger = slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
9398
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
9499
client = coderdtest.New(t, &coderdtest.Options{
100+
Logger: &logger,
95101
HealthcheckTimeout: time.Microsecond,
96102
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
97103
t := time.NewTimer(time.Second)
@@ -276,6 +282,17 @@ func TestHealthSettings(t *testing.T) {
276282
settings, err := adminClient.HealthSettings(ctx)
277283
require.NoError(t, err)
278284
require.Equal(t, expected, settings)
285+
286+
// then
287+
res, err := adminClient.Request(ctx, "GET", "/api/v2/debug/health", nil)
288+
require.NoError(t, err)
289+
bs, err := io.ReadAll(res.Body)
290+
require.NoError(t, err)
291+
defer res.Body.Close()
292+
var hc healthcheck.Report
293+
require.NoError(t, json.Unmarshal(bs, &hc))
294+
require.True(t, hc.DERP.Dismissed)
295+
require.True(t, hc.Websocket.Dismissed)
279296
})
280297

281298
t.Run("UnDismissSection", func(t *testing.T) {
@@ -307,6 +324,17 @@ func TestHealthSettings(t *testing.T) {
307324
settings, err := adminClient.HealthSettings(ctx)
308325
require.NoError(t, err)
309326
require.Equal(t, expected, settings)
327+
328+
// then
329+
res, err := adminClient.Request(ctx, "GET", "/api/v2/debug/health", nil)
330+
require.NoError(t, err)
331+
bs, err := io.ReadAll(res.Body)
332+
require.NoError(t, err)
333+
defer res.Body.Close()
334+
var hc healthcheck.Report
335+
require.NoError(t, json.Unmarshal(bs, &hc))
336+
require.True(t, hc.DERP.Dismissed)
337+
require.False(t, hc.Websocket.Dismissed)
310338
})
311339

312340
t.Run("NotModified", func(t *testing.T) {

0 commit comments

Comments
 (0)