@@ -11,8 +11,10 @@ import (
11
11
"net/http"
12
12
"net/netip"
13
13
"net/url"
14
+ "runtime/pprof"
14
15
"strconv"
15
16
"strings"
17
+ "sync"
16
18
"time"
17
19
18
20
"github.com/google/uuid"
@@ -291,11 +293,12 @@ func (api *API) workspaceAgentPTY(rw http.ResponseWriter, r *http.Request) {
291
293
})
292
294
return
293
295
}
294
- go httpapi .Heartbeat (ctx , conn )
295
296
296
- _ , wsNetConn := websocketNetConn (ctx , conn , websocket .MessageBinary )
297
+ ctx , wsNetConn := websocketNetConn (ctx , conn , websocket .MessageBinary )
297
298
defer wsNetConn .Close () // Also closes conn.
298
299
300
+ go httpapi .Heartbeat (ctx , conn )
301
+
299
302
agentConn , release , err := api .workspaceAgentCache .Acquire (r , workspaceAgent .ID )
300
303
if err != nil {
301
304
_ = conn .Close (websocket .StatusInternalError , httpapi .WebsocketCloseSprintf ("dial workspace agent: %s" , err ))
@@ -606,11 +609,40 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
606
609
})
607
610
return
608
611
}
609
- go httpapi .Heartbeat (ctx , conn )
610
612
611
613
ctx , wsNetConn := websocketNetConn (ctx , conn , websocket .MessageBinary )
612
614
defer wsNetConn .Close ()
613
615
616
+ // We use a custom heartbeat routine here instead of `httpapi.Heartbeat`
617
+ // because we want to log the agent's last ping time.
618
+ var lastPing time.Time
619
+ var pingMu sync.Mutex
620
+ go pprof .Do (ctx , pprof .Labels ("agent" , workspaceAgent .ID .String ()), func (ctx context.Context ) {
621
+ // TODO(mafredri): Is this too frequent? Use separate ping disconnect timeout?
622
+ t := time .NewTicker (api .AgentConnectionUpdateFrequency )
623
+ defer t .Stop ()
624
+
625
+ for {
626
+ select {
627
+ case <- t .C :
628
+ case <- ctx .Done ():
629
+ return
630
+ }
631
+
632
+ // We don't need a context that times out here because the ping will
633
+ // eventually go through. If the context times out, then other
634
+ // websocket read operations will receive an error, obfuscating the
635
+ // actual problem.
636
+ err := conn .Ping (ctx )
637
+ if err != nil {
638
+ return
639
+ }
640
+ pingMu .Lock ()
641
+ lastPing = time .Now ()
642
+ pingMu .Unlock ()
643
+ }
644
+ })
645
+
614
646
firstConnectedAt := workspaceAgent .FirstConnectedAt
615
647
if ! firstConnectedAt .Valid {
616
648
firstConnectedAt = sql.NullTime {
@@ -654,9 +686,12 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
654
686
ctx , cancel := context .WithTimeout (dbauthz .AsSystemRestricted (api .ctx ), api .AgentInactiveDisconnectTimeout )
655
687
defer cancel ()
656
688
657
- disconnectedAt = sql.NullTime {
658
- Time : database .Now (),
659
- Valid : true ,
689
+ // Only update timestamp if the disconnect is new.
690
+ if ! disconnectedAt .Valid {
691
+ disconnectedAt = sql.NullTime {
692
+ Time : database .Now (),
693
+ Valid : true ,
694
+ }
660
695
}
661
696
err := updateConnectionTimes (ctx )
662
697
if err != nil {
@@ -711,15 +746,37 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
711
746
return
712
747
case <- ticker .C :
713
748
}
714
- lastConnectedAt = sql.NullTime {
715
- Time : database .Now (),
716
- Valid : true ,
749
+
750
+ pingMu .Lock ()
751
+ lastPing := lastPing
752
+ pingMu .Unlock ()
753
+
754
+ var connectionStatusChanged bool
755
+ if time .Since (lastPing ) > api .AgentInactiveDisconnectTimeout {
756
+ if ! disconnectedAt .Valid {
757
+ connectionStatusChanged = true
758
+ disconnectedAt = sql.NullTime {
759
+ Time : database .Now (),
760
+ Valid : true ,
761
+ }
762
+ }
763
+ } else {
764
+ connectionStatusChanged = disconnectedAt .Valid
765
+ // TODO(mafredri): Should we update it here or allow lastConnectedAt to shadow it?
766
+ disconnectedAt = sql.NullTime {}
767
+ lastConnectedAt = sql.NullTime {
768
+ Time : database .Now (),
769
+ Valid : true ,
770
+ }
717
771
}
718
772
err = updateConnectionTimes (ctx )
719
773
if err != nil {
720
774
_ = conn .Close (websocket .StatusGoingAway , err .Error ())
721
775
return
722
776
}
777
+ if connectionStatusChanged {
778
+ api .publishWorkspaceUpdate (ctx , build .WorkspaceID )
779
+ }
723
780
err := ensureLatestBuild ()
724
781
if err != nil {
725
782
// Disconnect agents that are no longer valid.
0 commit comments