Skip to content

Commit b117b5c

Browse files
committed
encapsulate WaitForReinitLoop for easier testing
1 parent 9feebef commit b117b5c

File tree

7 files changed

+1739
-1774
lines changed

7 files changed

+1739
-1774
lines changed

cli/agent.go

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ import (
1919
"golang.org/x/xerrors"
2020
"gopkg.in/natefinch/lumberjack.v2"
2121

22-
"github.com/coder/retry"
23-
2422
"github.com/prometheus/client_golang/prometheus"
2523

2624
"cdr.dev/slog"
@@ -332,27 +330,7 @@ func (r *RootCmd) workspaceAgent() *serpent.Command {
332330
containerLister = agentcontainers.NewDocker(execer)
333331
}
334332

335-
// TODO: timeout ok?
336-
reinitCtx, reinitCancel := context.WithTimeout(context.Background(), time.Hour*24)
337-
defer reinitCancel()
338-
reinitEvents := make(chan agentsdk.ReinitializationEvent)
339-
340-
go func() {
341-
// Retry to wait for reinit, main context cancels the retrier.
342-
for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
343-
select {
344-
case <-reinitCtx.Done():
345-
return
346-
default:
347-
}
348-
349-
err := client.WaitForReinit(reinitCtx, reinitEvents)
350-
if err != nil {
351-
logger.Error(ctx, "failed to wait for reinit instructions, will retry", slog.Error(err))
352-
}
353-
}
354-
}()
355-
333+
reinitEvents := agentsdk.WaitForReinitLoop(ctx, logger, client)
356334
var (
357335
lastErr error
358336
mustExit bool
@@ -409,7 +387,6 @@ func (r *RootCmd) workspaceAgent() *serpent.Command {
409387
prometheusSrvClose()
410388

411389
if mustExit {
412-
reinitCancel()
413390
break
414391
}
415392

coderd/apidoc/docs.go

Lines changed: 11 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codersdk/agentsdk/agentsdk.go

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"tailscale.com/tailcfg"
2020

2121
"cdr.dev/slog"
22+
"github.com/coder/retry"
2223
"github.com/coder/websocket"
2324

2425
"github.com/coder/coder/v2/agent/proto"
@@ -707,49 +708,84 @@ func PrebuildClaimedChannel(id uuid.UUID) string {
707708
// - ping: ignored, keepalive
708709
// - prebuild claimed: a prebuilt workspace is claimed, so the agent must reinitialize.
709710
// NOTE: the caller is responsible for closing the events chan.
710-
func (c *Client) WaitForReinit(ctx context.Context, events chan<- ReinitializationEvent) error {
711+
func (c *Client) WaitForReinit(ctx context.Context) (*ReinitializationEvent, error) {
711712
// TODO: allow configuring httpclient
712713
c.SDK.HTTPClient.Timeout = time.Hour * 24
713714

715+
// TODO (sasswart): tried the following to fix the above, it won't work. The shorter timeout wins.
716+
// I also considered cloning c.SDK.HTTPClient and setting the timeout on the cloned client.
717+
// That won't work because we can't pass the cloned HTTPClient into s.SDK.Request.
718+
// Looks like we're going to need a separate client to be able to have a longer timeout.
719+
//
720+
// timeoutCtx, cancelTimeoutCtx := context.WithTimeout(ctx, 24*time.Hour)
721+
// defer cancelTimeoutCtx()
722+
714723
res, err := c.SDK.Request(ctx, http.MethodGet, "/api/v2/workspaceagents/me/reinit", nil)
715724
if err != nil {
716-
return xerrors.Errorf("execute request: %w", err)
725+
return nil, xerrors.Errorf("execute request: %w", err)
717726
}
718727
defer res.Body.Close()
719728

720729
if res.StatusCode != http.StatusOK {
721-
return codersdk.ReadBodyAsError(res)
730+
return nil, codersdk.ReadBodyAsError(res)
722731
}
723732

724733
nextEvent := codersdk.ServerSentEventReader(ctx, res.Body)
725734

726735
for {
736+
// TODO (Sasswart): I don't like that we do this select at the start and at the end.
737+
// nextEvent should return an error if the context is canceled, but that feels like a larger refactor.
738+
// if it did, we'd only have the select at the end of the loop.
727739
select {
728740
case <-ctx.Done():
729-
return ctx.Err()
741+
return nil, ctx.Err()
730742
default:
731743
}
732744

733745
sse, err := nextEvent()
734746
if err != nil {
735-
return xerrors.Errorf("failed to read server-sent event: %w", err)
747+
return nil, xerrors.Errorf("failed to read server-sent event: %w", err)
736748
}
737749
if sse.Type != codersdk.ServerSentEventTypeData {
738750
continue
739751
}
740752
var reinitEvent ReinitializationEvent
741753
b, ok := sse.Data.([]byte)
742754
if !ok {
743-
return xerrors.Errorf("expected data as []byte, got %T", sse.Data)
755+
return nil, xerrors.Errorf("expected data as []byte, got %T", sse.Data)
744756
}
745757
err = json.Unmarshal(b, &reinitEvent)
746758
if err != nil {
747-
return xerrors.Errorf("unmarshal reinit response: %w", err)
759+
return nil, xerrors.Errorf("unmarshal reinit response: %w", err)
748760
}
749761
select {
750762
case <-ctx.Done():
751-
return ctx.Err()
752-
case events <- reinitEvent:
763+
return nil, ctx.Err()
764+
default:
765+
return &reinitEvent, nil
753766
}
754767
}
755768
}
769+
770+
func WaitForReinitLoop(ctx context.Context, logger slog.Logger, client *Client) <-chan ReinitializationEvent {
771+
reinitEvents := make(chan ReinitializationEvent)
772+
773+
go func() {
774+
for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
775+
logger.Debug(ctx, "waiting for agent reinitialization instructions")
776+
reinitEvent, err := client.WaitForReinit(ctx)
777+
if err != nil {
778+
logger.Error(ctx, "failed to wait for agent reinitialization instructions", slog.Error(err))
779+
}
780+
reinitEvents <- *reinitEvent
781+
select {
782+
case <-ctx.Done():
783+
close(reinitEvents)
784+
return
785+
case reinitEvents <- *reinitEvent:
786+
}
787+
}
788+
}()
789+
790+
return reinitEvents
791+
}

0 commit comments

Comments
 (0)