Skip to content

Commit 03f0745

Browse files
committed
fix: routinely ping agent websocket to ensure liveness
1 parent ba8dd49 commit 03f0745

File tree

4 files changed

+39
-4
lines changed

4 files changed

+39
-4
lines changed

agent/agent.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,9 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
430430
// runCoordinator runs a coordinator and returns whether a reconnect
431431
// should occur.
432432
func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error {
433+
ctx, cancel := context.WithCancel(ctx)
434+
defer cancel()
435+
433436
coordinator, err := a.client.ListenWorkspaceAgent(ctx)
434437
if err != nil {
435438
return err

cli/agent.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ func workspaceAgent() *cobra.Command {
8383
slog.F("version", version),
8484
)
8585
client := codersdk.New(coderURL)
86+
client.Logger = logger
8687
// Set a reasonable timeout so requests can't hang forever!
8788
client.HTTPClient.Timeout = 10 * time.Second
8889

codersdk/workspaceagents.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,39 @@ func (c *Client) ListenWorkspaceAgent(ctx context.Context) (net.Conn, error) {
340340
return nil, readBodyAsError(res)
341341
}
342342

343+
go func() {
344+
tick := 30 * time.Second
345+
ticker := time.NewTicker(tick)
346+
defer ticker.Stop()
347+
defer func() {
348+
c.Logger.Debug(ctx, "coordinate pinger exited")
349+
}()
350+
for {
351+
select {
352+
case <-ctx.Done():
353+
return
354+
case start := <-ticker.C:
355+
ctx, cancel := context.WithTimeout(ctx, tick)
356+
357+
err := conn.Ping(ctx)
358+
if err != nil {
359+
c.Logger.Error(ctx, "workspace agent coordinate ping", slog.Error(err))
360+
361+
err := conn.Close(websocket.StatusAbnormalClosure, "Ping failed")
362+
if err != nil {
363+
c.Logger.Error(ctx, "close workspace agent coordinate websocket", slog.Error(err))
364+
}
365+
366+
cancel()
367+
return
368+
}
369+
370+
c.Logger.Debug(ctx, "got coordinate pong", slog.F("took", time.Since(start)))
371+
cancel()
372+
}
373+
}
374+
}()
375+
343376
return websocket.NetConn(ctx, conn, websocket.MessageBinary), nil
344377
}
345378

provisionerd/provisionerd_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ import (
1212
"testing"
1313
"time"
1414

15-
"github.com/coder/coder/provisionerd/runner"
16-
"github.com/coder/coder/testutil"
17-
1815
"github.com/hashicorp/yamux"
1916
"github.com/stretchr/testify/assert"
2017
"github.com/stretchr/testify/require"
@@ -26,11 +23,12 @@ import (
2623

2724
"cdr.dev/slog"
2825
"cdr.dev/slog/sloggers/slogtest"
29-
3026
"github.com/coder/coder/provisionerd"
3127
"github.com/coder/coder/provisionerd/proto"
28+
"github.com/coder/coder/provisionerd/runner"
3229
"github.com/coder/coder/provisionersdk"
3330
sdkproto "github.com/coder/coder/provisionersdk/proto"
31+
"github.com/coder/coder/testutil"
3432
)
3533

3634
func TestMain(m *testing.M) {

0 commit comments

Comments
 (0)