@@ -19,6 +19,7 @@ import (
19
19
"tailscale.com/tailcfg"
20
20
21
21
"cdr.dev/slog"
22
+ "github.com/coder/retry"
22
23
"github.com/coder/websocket"
23
24
24
25
"github.com/coder/coder/v2/agent/proto"
@@ -707,49 +708,84 @@ func PrebuildClaimedChannel(id uuid.UUID) string {
707
708
// - ping: ignored, keepalive
708
709
// - prebuild claimed: a prebuilt workspace is claimed, so the agent must reinitialize.
709
710
// NOTE: the caller is responsible for closing the events chan.
710
- func (c * Client ) WaitForReinit (ctx context.Context , events chan <- ReinitializationEvent ) error {
711
+ func (c * Client ) WaitForReinit (ctx context.Context ) ( * ReinitializationEvent , error ) {
711
712
// TODO: allow configuring httpclient
712
713
c .SDK .HTTPClient .Timeout = time .Hour * 24
713
714
715
+ // TODO (sasswart): tried the following to fix the above, it won't work. The shorter timeout wins.
716
+ // I also considered cloning c.SDK.HTTPClient and setting the timeout on the cloned client.
717
+ // That won't work because we can't pass the cloned HTTPClient into s.SDK.Request.
718
+ // Looks like we're going to need a separate client to be able to have a longer timeout.
719
+ //
720
+ // timeoutCtx, cancelTimeoutCtx := context.WithTimeout(ctx, 24*time.Hour)
721
+ // defer cancelTimeoutCtx()
722
+
714
723
res , err := c .SDK .Request (ctx , http .MethodGet , "/api/v2/workspaceagents/me/reinit" , nil )
715
724
if err != nil {
716
- return xerrors .Errorf ("execute request: %w" , err )
725
+ return nil , xerrors .Errorf ("execute request: %w" , err )
717
726
}
718
727
defer res .Body .Close ()
719
728
720
729
if res .StatusCode != http .StatusOK {
721
- return codersdk .ReadBodyAsError (res )
730
+ return nil , codersdk .ReadBodyAsError (res )
722
731
}
723
732
724
733
nextEvent := codersdk .ServerSentEventReader (ctx , res .Body )
725
734
726
735
for {
736
+ // TODO (Sasswart): I don't like that we do this select at the start and at the end.
737
+ // nextEvent should return an error if the context is canceled, but that feels like a larger refactor.
738
+ // if it did, we'd only have the select at the end of the loop.
727
739
select {
728
740
case <- ctx .Done ():
729
- return ctx .Err ()
741
+ return nil , ctx .Err ()
730
742
default :
731
743
}
732
744
733
745
sse , err := nextEvent ()
734
746
if err != nil {
735
- return xerrors .Errorf ("failed to read server-sent event: %w" , err )
747
+ return nil , xerrors .Errorf ("failed to read server-sent event: %w" , err )
736
748
}
737
749
if sse .Type != codersdk .ServerSentEventTypeData {
738
750
continue
739
751
}
740
752
var reinitEvent ReinitializationEvent
741
753
b , ok := sse .Data .([]byte )
742
754
if ! ok {
743
- return xerrors .Errorf ("expected data as []byte, got %T" , sse .Data )
755
+ return nil , xerrors .Errorf ("expected data as []byte, got %T" , sse .Data )
744
756
}
745
757
err = json .Unmarshal (b , & reinitEvent )
746
758
if err != nil {
747
- return xerrors .Errorf ("unmarshal reinit response: %w" , err )
759
+ return nil , xerrors .Errorf ("unmarshal reinit response: %w" , err )
748
760
}
749
761
select {
750
762
case <- ctx .Done ():
751
- return ctx .Err ()
752
- case events <- reinitEvent :
763
+ return nil , ctx .Err ()
764
+ default :
765
+ return & reinitEvent , nil
753
766
}
754
767
}
755
768
}
769
+
770
+ func WaitForReinitLoop (ctx context.Context , logger slog.Logger , client * Client ) <- chan ReinitializationEvent {
771
+ reinitEvents := make (chan ReinitializationEvent )
772
+
773
+ go func () {
774
+ for retrier := retry .New (100 * time .Millisecond , 10 * time .Second ); retrier .Wait (ctx ); {
775
+ logger .Debug (ctx , "waiting for agent reinitialization instructions" )
776
+ reinitEvent , err := client .WaitForReinit (ctx )
777
+ if err != nil {
778
+ logger .Error (ctx , "failed to wait for agent reinitialization instructions" , slog .Error (err ))
779
+ }
780
+ reinitEvents <- * reinitEvent
781
+ select {
782
+ case <- ctx .Done ():
783
+ close (reinitEvents )
784
+ return
785
+ case reinitEvents <- * reinitEvent :
786
+ }
787
+ }
788
+ }()
789
+
790
+ return reinitEvents
791
+ }
0 commit comments