Skip to content

feat: add healthcheck database section #8060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ func New(options *Options) *API {
if options.HealthcheckFunc == nil {
options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report {
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
DB: options.Database,
AccessURL: options.AccessURL,
DERPMap: options.DERPMap.Clone(),
APIKey: apiKey,
Expand Down
4 changes: 2 additions & 2 deletions coderd/healthcheck/accessurl.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ type AccessURLReport struct {
Error error `json:"error"`
}

type AccessURLOptions struct {
type AccessURLReportOptions struct {
AccessURL *url.URL
Client *http.Client
}

func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLOptions) {
func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

Expand Down
6 changes: 3 additions & 3 deletions coderd/healthcheck/accessurl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestAccessURL(t *testing.T) {
)
defer cancel()

report.Run(ctx, &healthcheck.AccessURLOptions{
report.Run(ctx, &healthcheck.AccessURLReportOptions{
AccessURL: client.URL,
})

Expand Down Expand Up @@ -57,7 +57,7 @@ func TestAccessURL(t *testing.T) {
u, err := url.Parse(srv.URL)
require.NoError(t, err)

report.Run(ctx, &healthcheck.AccessURLOptions{
report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: srv.Client(),
AccessURL: u,
})
Expand Down Expand Up @@ -93,7 +93,7 @@ func TestAccessURL(t *testing.T) {
u, err := url.Parse(srv.URL)
require.NoError(t, err)

report.Run(ctx, &healthcheck.AccessURLOptions{
report.Run(ctx, &healthcheck.AccessURLReportOptions{
Client: client,
AccessURL: u,
})
Expand Down
49 changes: 49 additions & 0 deletions coderd/healthcheck/database.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package healthcheck

import (
"context"
"time"

"golang.org/x/exp/slices"
"golang.org/x/xerrors"

"github.com/coder/coder/coderd/database"
)

type DatabaseReport struct {
Healthy bool `json:"healthy"`
Reachable bool `json:"reachable"`
Latency time.Duration `json:"latency"`
Error error `json:"error"`
}

type DatabaseReportOptions struct {
DB database.Store
}

func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

pingCount := 5
pings := make([]time.Duration, 0, pingCount)
// Ping 5 times and average the latency.
for i := 0; i < pingCount; i++ {
pong, err := opts.DB.Ping(ctx)
if err != nil {
r.Error = xerrors.Errorf("ping: %w", err)
return
}
pings = append(pings, pong)
}
slices.Sort(pings)

// Take the median ping.
r.Latency = pings[pingCount/2]
// Somewhat arbitrary, but if the latency is over 15ms, we consider it
// unhealthy.
if r.Latency < 15*time.Millisecond {
r.Healthy = true
}
r.Reachable = true
}
85 changes: 85 additions & 0 deletions coderd/healthcheck/database_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package healthcheck_test

import (
"context"
"testing"
"time"

"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"golang.org/x/xerrors"

"github.com/coder/coder/coderd/database/dbmock"
"github.com/coder/coder/coderd/healthcheck"
"github.com/coder/coder/testutil"
)

func TestDatabase(t *testing.T) {
t.Parallel()

t.Run("OK", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
report = healthcheck.DatabaseReport{}
db = dbmock.NewMockStore(gomock.NewController(t))
ping = 10 * time.Millisecond
)
defer cancel()

db.EXPECT().Ping(gomock.Any()).Return(ping, nil).Times(5)

report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db})

assert.True(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, ping, report.Latency)
assert.NoError(t, report.Error)
})

t.Run("Error", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
report = healthcheck.DatabaseReport{}
db = dbmock.NewMockStore(gomock.NewController(t))
err = xerrors.New("ping error")
)
defer cancel()

db.EXPECT().Ping(gomock.Any()).Return(time.Duration(0), err)

report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db})

assert.False(t, report.Healthy)
assert.False(t, report.Reachable)
assert.Zero(t, report.Latency)
assert.ErrorIs(t, report.Error, err)
})

t.Run("Median", func(t *testing.T) {
t.Parallel()

var (
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
report = healthcheck.DatabaseReport{}
db = dbmock.NewMockStore(gomock.NewController(t))
)
defer cancel()

db.EXPECT().Ping(gomock.Any()).Return(time.Microsecond, nil)
db.EXPECT().Ping(gomock.Any()).Return(time.Second, nil)
db.EXPECT().Ping(gomock.Any()).Return(time.Nanosecond, nil)
db.EXPECT().Ping(gomock.Any()).Return(time.Minute, nil)
db.EXPECT().Ping(gomock.Any()).Return(time.Millisecond, nil)

report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db})

assert.True(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, time.Millisecond, report.Latency)
assert.NoError(t, report.Error)
})
}
34 changes: 31 additions & 3 deletions coderd/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,22 @@ import (

"golang.org/x/xerrors"
"tailscale.com/tailcfg"

"github.com/coder/coder/coderd/database"
)

const (
SectionDERP string = "DERP"
SectionAccessURL string = "AccessURL"
SectionWebsocket string = "Websocket"
SectionDatabase string = "Database"
)

type Checker interface {
DERP(ctx context.Context, opts *DERPReportOptions) DERPReport
AccessURL(ctx context.Context, opts *AccessURLOptions) AccessURLReport
AccessURL(ctx context.Context, opts *AccessURLReportOptions) AccessURLReport
Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport
Database(ctx context.Context, opts *DatabaseReportOptions) DatabaseReport
}

type Report struct {
Expand All @@ -33,9 +37,11 @@ type Report struct {
DERP DERPReport `json:"derp"`
AccessURL AccessURLReport `json:"access_url"`
Websocket WebsocketReport `json:"websocket"`
Database DatabaseReport `json:"database"`
}

type ReportOptions struct {
DB database.Store
// TODO: support getting this over HTTP?
DERPMap *tailcfg.DERPMap
AccessURL *url.URL
Expand All @@ -52,7 +58,7 @@ func (defaultChecker) DERP(ctx context.Context, opts *DERPReportOptions) (report
return report
}

func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLOptions) (report AccessURLReport) {
func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLReportOptions) (report AccessURLReport) {
report.Run(ctx, opts)
return report
}
Expand All @@ -62,6 +68,11 @@ func (defaultChecker) Websocket(ctx context.Context, opts *WebsocketReportOption
return report
}

func (defaultChecker) Database(ctx context.Context, opts *DatabaseReportOptions) (report DatabaseReport) {
report.Run(ctx, opts)
return report
}

func Run(ctx context.Context, opts *ReportOptions) *Report {
var (
wg sync.WaitGroup
Expand Down Expand Up @@ -95,7 +106,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
}
}()

report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLOptions{
report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLReportOptions{
AccessURL: opts.AccessURL,
Client: opts.Client,
})
Expand All @@ -116,6 +127,20 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
})
}()

wg.Add(1)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.Database.Error = xerrors.Errorf("%v", err)
}
}()

report.Database = opts.Checker.Database(ctx, &DatabaseReportOptions{
DB: opts.DB,
})
}()

wg.Wait()
report.Time = time.Now()
if !report.DERP.Healthy {
Expand All @@ -127,6 +152,9 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
if !report.Websocket.Healthy {
report.FailingSections = append(report.FailingSections, SectionWebsocket)
}
if !report.Database.Healthy {
report.FailingSections = append(report.FailingSections, SectionDatabase)
}

report.Healthy = len(report.FailingSections) == 0
return &report
Expand Down
Loading