Skip to content

fix: reduce excessive logging when database is unreachable #17363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 15, 2025
Prev Previous commit
Next Next commit
Replace DatabaseHealthcheckFn with interface
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
  • Loading branch information
dannykopping committed Apr 14, 2025
commit 68867da2ecc58eb068b2cfd93e81c5be03084593
13 changes: 5 additions & 8 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -675,14 +675,11 @@ func New(options *Options) *API {
api.Auditor.Store(&options.Auditor)
api.TailnetCoordinator.Store(&options.TailnetCoordinator)
dialer := &InmemTailnetDialer{
CoordPtr: &api.TailnetCoordinator,
DERPFn: api.DERPMap,
Logger: options.Logger,
ClientID: uuid.New(),
DatabaseHealthcheckFn: func(ctx context.Context) error {
_, err := api.Database.Ping(ctx)
return err
},
CoordPtr: &api.TailnetCoordinator,
DERPFn: api.DERPMap,
Logger: options.Logger,
ClientID: uuid.New(),
DatabaseHealthCheck: api.Database,
}
stn, err := NewServerTailnet(api.ctx,
options.Logger,
Expand Down
19 changes: 12 additions & 7 deletions coderd/tailnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,19 +536,24 @@ func NewMultiAgentController(ctx context.Context, logger slog.Logger, tracer tra
return m
}

type Pinger interface {
Ping(context.Context) (time.Duration, error)
}

// InmemTailnetDialer is a tailnet.ControlProtocolDialer that connects to a Coordinator and DERPMap
// service running in the same memory space.
type InmemTailnetDialer struct {
CoordPtr *atomic.Pointer[tailnet.Coordinator]
DERPFn func() *tailcfg.DERPMap
Logger slog.Logger
ClientID uuid.UUID
DatabaseHealthcheckFn func(ctx context.Context) error
CoordPtr *atomic.Pointer[tailnet.Coordinator]
DERPFn func() *tailcfg.DERPMap
Logger slog.Logger
ClientID uuid.UUID
// DatabaseHealthCheck is used to validate that the store is reachable.
DatabaseHealthCheck Pinger
}

func (a *InmemTailnetDialer) Dial(ctx context.Context, _ tailnet.ResumeTokenController) (tailnet.ControlProtocolClients, error) {
if a.DatabaseHealthcheckFn != nil {
if err := a.DatabaseHealthcheckFn(ctx); err != nil {
if a.DatabaseHealthCheck != nil {
if _, err := a.DatabaseHealthCheck.Ping(ctx); err != nil {
return tailnet.ControlProtocolClients{}, xerrors.Errorf("%w: %v", codersdk.ErrDatabaseNotReachable, err)
}
}
Expand Down
37 changes: 22 additions & 15 deletions coderd/tailnet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strconv"
"sync/atomic"
"testing"
"time"

"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -365,6 +366,14 @@ func TestServerTailnet_ReverseProxy(t *testing.T) {
})
}

type fakePing struct {
err error
}

func (f *fakePing) Ping(context.Context) (time.Duration, error) {
return time.Duration(0), f.err
}

func TestServerTailnet_Healthcheck(t *testing.T) {
t.Parallel()

Expand All @@ -373,9 +382,8 @@ func TestServerTailnet_Healthcheck(t *testing.T) {
t.Parallel()

ctx := testutil.Context(t, testutil.WaitMedium)
fn := func(ctx context.Context) error { return nil }

agents, serverTailnet := setupServerTailnetAgent(t, 1, withHealthcheckFn(fn))
agents, serverTailnet := setupServerTailnetAgent(t, 1, withHealthChecker(&fakePing{}))

a := agents[0]
conn, release, err := serverTailnet.AgentConn(ctx, a.id)
Expand All @@ -391,9 +399,8 @@ func TestServerTailnet_Healthcheck(t *testing.T) {
t.Parallel()

ctx := testutil.Context(t, testutil.WaitMedium)
fn := func(ctx context.Context) error { return xerrors.Errorf("oops, db gone") }

agents, serverTailnet := setupServerTailnetAgent(t, 1, withHealthcheckFn(fn))
agents, serverTailnet := setupServerTailnetAgent(t, 1, withHealthChecker(&fakePing{err: xerrors.New("oops")}))

a := agents[0]
_, release, err := serverTailnet.AgentConn(ctx, a.id)
Expand Down Expand Up @@ -427,13 +434,13 @@ type agentWithID struct {
}

type serverOption struct {
HealthcheckFn func(ctx context.Context) error
HealthCheck coderd.Pinger
DERPAndStunOptions []tailnettest.DERPAndStunOption
}

func withHealthcheckFn(fn func(ctx context.Context) error) serverOption {
func withHealthChecker(p coderd.Pinger) serverOption {
return serverOption{
HealthcheckFn: fn,
HealthCheck: p,
}
}

Expand All @@ -446,12 +453,12 @@ func withDERPAndStunOptions(opts ...tailnettest.DERPAndStunOption) serverOption
func setupServerTailnetAgent(t *testing.T, agentNum int, opts ...serverOption) ([]agentWithID, *coderd.ServerTailnet) {
logger := testutil.Logger(t)

var healthcheckFn func(ctx context.Context) error
var healthChecker coderd.Pinger
var derpAndStunOptions []tailnettest.DERPAndStunOption
for _, opt := range opts {
derpAndStunOptions = append(derpAndStunOptions, opt.DERPAndStunOptions...)
if opt.HealthcheckFn != nil {
healthcheckFn = opt.HealthcheckFn
if opt.HealthCheck != nil {
healthChecker = opt.HealthCheck
}
}

Expand Down Expand Up @@ -495,11 +502,11 @@ func setupServerTailnetAgent(t *testing.T, agentNum int, opts ...serverOption) (
}

dialer := &coderd.InmemTailnetDialer{
CoordPtr: &coordPtr,
DERPFn: func() *tailcfg.DERPMap { return derpMap },
Logger: logger,
ClientID: uuid.UUID{5},
DatabaseHealthcheckFn: healthcheckFn,
CoordPtr: &coordPtr,
DERPFn: func() *tailcfg.DERPMap { return derpMap },
Logger: logger,
ClientID: uuid.UUID{5},
DatabaseHealthCheck: healthChecker,
}
serverTailnet, err := coderd.NewServerTailnet(
context.Background(),
Expand Down