@@ -90,7 +90,6 @@ type Options struct {
90
90
91
91
type Client interface {
92
92
ConnectRPC (ctx context.Context ) (drpc.Conn , error )
93
- PostLifecycle (ctx context.Context , state agentsdk.PostLifecycleRequest ) error
94
93
PostMetadata (ctx context.Context , req agentsdk.PostMetadataRequest ) error
95
94
RewriteDERPMap (derpMap * tailcfg.DERPMap )
96
95
}
@@ -299,7 +298,6 @@ func (a *agent) init() {
299
298
// may be happening, but regardless after the intermittent
300
299
// failure, you'll want the agent to reconnect.
301
300
func (a * agent ) runLoop () {
302
- go a .reportLifecycleUntilClose ()
303
301
go a .reportMetadataUntilGracefulShutdown ()
304
302
go a .manageProcessPriorityUntilGracefulShutdown ()
305
303
@@ -618,21 +616,19 @@ func (a *agent) reportMetadataUntilGracefulShutdown() {
618
616
}
619
617
}
620
618
621
- // reportLifecycleUntilClose reports the current lifecycle state once. All state
619
+ // reportLifecycle reports the current lifecycle state once. All state
622
620
// changes are reported in order.
623
- func (a * agent ) reportLifecycleUntilClose () {
624
- // part of graceful shut down is reporting the final lifecycle states, e.g "ShuttingDown" so the
625
- // lifecycle reporting has to be via the "hard" context.
626
- ctx := a .hardCtx
621
+ func (a * agent ) reportLifecycle (ctx context.Context , conn drpc.Conn ) error {
622
+ aAPI := proto .NewDRPCAgentClient (conn )
627
623
lastReportedIndex := 0 // Start off with the created state without reporting it.
628
624
for {
629
625
select {
630
626
case <- a .lifecycleUpdate :
631
627
case <- ctx .Done ():
632
- return
628
+ return ctx . Err ()
633
629
}
634
630
635
- for r := retry . New ( time . Second , 15 * time . Second ); r . Wait ( ctx ); {
631
+ for {
636
632
a .lifecycleMu .RLock ()
637
633
lastIndex := len (a .lifecycleStates ) - 1
638
634
report := a .lifecycleStates [lastReportedIndex ]
@@ -644,33 +640,35 @@ func (a *agent) reportLifecycleUntilClose() {
644
640
if lastIndex == lastReportedIndex {
645
641
break
646
642
}
643
+ l , err := agentsdk .ProtoFromLifecycle (report )
644
+ if err != nil {
645
+ a .logger .Critical (ctx , "failed to convert lifecycle state" , slog .F ("report" , report ))
646
+ // Skip this report; there is no point retrying. Maybe we can successfully convert the next one?
647
+ lastReportedIndex ++
648
+ continue
649
+ }
650
+ logger := a .logger .With (slog .F ("payload" , l ))
651
+ logger .Debug (ctx , "reporting lifecycle state" )
647
652
648
- a .logger .Debug (ctx , "reporting lifecycle state" , slog .F ("payload" , report ))
653
+ _ , err = aAPI .UpdateLifecycle (ctx , & proto.UpdateLifecycleRequest {Lifecycle : l })
654
+ if err != nil {
655
+ return xerrors .Errorf ("failed to update lifecycle: %w" , err )
656
+ }
649
657
650
- err := a .client .PostLifecycle (ctx , report )
651
- if err == nil {
652
- a .logger .Debug (ctx , "successfully reported lifecycle state" , slog .F ("payload" , report ))
653
- r .Reset () // don't back off when we are successful
654
- lastReportedIndex ++
655
- select {
656
- case a .lifecycleReported <- report .State :
657
- case <- a .lifecycleReported :
658
- a .lifecycleReported <- report .State
659
- }
660
- if lastReportedIndex < lastIndex {
661
- // Keep reporting until we've sent all messages, we can't
662
- // rely on the channel triggering us before the backlog is
663
- // consumed.
664
- continue
665
- }
666
- break
658
+ logger .Debug (ctx , "successfully reported lifecycle state" )
659
+ lastReportedIndex ++
660
+ select {
661
+ case a .lifecycleReported <- report .State :
662
+ case <- a .lifecycleReported :
663
+ a .lifecycleReported <- report .State
667
664
}
668
- if xerrors .Is (err , context .Canceled ) || xerrors .Is (err , context .DeadlineExceeded ) {
669
- a .logger .Debug (ctx , "canceled reporting lifecycle state" , slog .F ("payload" , report ))
670
- return
665
+ if lastReportedIndex < lastIndex {
666
+ // Keep reporting until we've sent all messages, we can't
667
+ // rely on the channel triggering us before the backlog is
668
+ // consumed.
669
+ continue
671
670
}
672
- // If we fail to report the state we probably shouldn't exit, log only.
673
- a .logger .Error (ctx , "agent failed to report the lifecycle state" , slog .Error (err ))
671
+ break
674
672
}
675
673
}
676
674
}
@@ -780,6 +778,10 @@ func (a *agent) run() (retErr error) {
780
778
return err
781
779
})
782
780
781
+ // part of graceful shut down is reporting the final lifecycle states, e.g "ShuttingDown" so the
782
+ // lifecycle reporting has to be via gracefulShutdownBehaviorRemain
783
+ connMan .start ("report lifecycle" , gracefulShutdownBehaviorRemain , a .reportLifecycle )
784
+
783
785
// channels to sync goroutines below
784
786
// handle manifest
785
787
// |
0 commit comments