Skip to content

Commit 214d380

Browse files
committed
comments
1 parent 2896d6b commit 214d380

File tree

5 files changed

+176
-47
lines changed

5 files changed

+176
-47
lines changed

codersdk/workspacesdk/connector.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ import (
2727
type tailnetConn interface {
2828
tailnet.Coordinatee
2929
SetDERPMap(derpMap *tailcfg.DERPMap)
30+
// SetTunnelDestination indicates to tailnet that the peer id is a
31+
// destination.
32+
SetTunnelDestination(id uuid.UUID)
3033
}
3134

3235
// tailnetAPIConnector dials the tailnet API (v2+) and then uses the API with a tailnet.Conn to
@@ -75,6 +78,7 @@ func runTailnetAPIConnector(
7578
connected: make(chan error, 1),
7679
closed: make(chan struct{}),
7780
}
81+
conn.SetTunnelDestination(agentID)
7882
tac.gracefulCtx, tac.cancelGracefulCtx = context.WithCancel(context.Background())
7983
go tac.manageGracefulTimeout()
8084
go tac.run()
@@ -86,9 +90,11 @@ func runTailnetAPIConnector(
8690
func (tac *tailnetAPIConnector) manageGracefulTimeout() {
8791
defer tac.cancelGracefulCtx()
8892
<-tac.ctx.Done()
93+
timer := time.NewTimer(time.Second)
94+
defer timer.Stop()
8995
select {
9096
case <-tac.closed:
91-
case <-time.After(time.Second):
97+
case <-timer.C:
9298
}
9399
}
94100

codersdk/workspacesdk/connector_internal_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ func (*fakeTailnetConn) SetNodeCallback(func(*tailnet.Node)) {}
102102

103103
func (*fakeTailnetConn) SetDERPMap(*tailcfg.DERPMap) {}
104104

105+
func (*fakeTailnetConn) SetTunnelDestination(uuid.UUID) {}
106+
105107
func newFakeTailnetConn() *fakeTailnetConn {
106108
return &fakeTailnetConn{}
107109
}

tailnet/configmaps.go

Lines changed: 81 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,10 @@ type phased struct {
5656

5757
type configMaps struct {
5858
phased
59-
netmapDirty bool
60-
derpMapDirty bool
61-
filterDirty bool
62-
closing bool
63-
waitReadyForHandshake bool
59+
netmapDirty bool
60+
derpMapDirty bool
61+
filterDirty bool
62+
closing bool
6463

6564
engine engineConfigurable
6665
static netmap.NetworkMap
@@ -218,8 +217,9 @@ func (c *configMaps) peerConfigLocked() []*tailcfg.Node {
218217
out := make([]*tailcfg.Node, 0, len(c.peers))
219218
for _, p := range c.peers {
220219
// Don't add nodes that we havent received a READY_FOR_HANDSHAKE for
221-
// yet, if we want to wait for them.
222-
if !p.readyForHandshake && c.waitReadyForHandshake {
220+
// yet, if they're a destination. If we received a READY_FOR_HANDSHAKE
221+
// for a peer before we receive their node, the node will be nil.
222+
if (!p.readyForHandshake && p.isDestination) || p.node == nil {
223223
continue
224224
}
225225
n := p.node.Clone()
@@ -231,10 +231,17 @@ func (c *configMaps) peerConfigLocked() []*tailcfg.Node {
231231
return out
232232
}
233233

234-
func (c *configMaps) setWaitForHandshake(wait bool) {
234+
func (c *configMaps) setTunnelDestinaion(id uuid.UUID) {
235235
c.L.Lock()
236236
defer c.L.Unlock()
237-
c.waitReadyForHandshake = wait
237+
lc, ok := c.peers[id]
238+
if !ok {
239+
lc = &peerLifecycle{
240+
peerID: id,
241+
}
242+
c.peers[id] = lc
243+
}
244+
lc.isDestination = true
238245
}
239246

240247
// setAddresses sets the addresses belonging to this node to the given slice. It
@@ -343,8 +350,10 @@ func (c *configMaps) updatePeers(updates []*proto.CoordinateResponse_PeerUpdate)
343350
// worry about them being up-to-date when handling updates below, and it covers
344351
// all peers, not just the ones we got updates about.
345352
for _, lc := range c.peers {
346-
if peerStatus, ok := status.Peer[lc.node.Key]; ok {
347-
lc.lastHandshake = peerStatus.LastHandshake
353+
if lc.node != nil {
354+
if peerStatus, ok := status.Peer[lc.node.Key]; ok {
355+
lc.lastHandshake = peerStatus.LastHandshake
356+
}
348357
}
349358
}
350359

@@ -399,7 +408,7 @@ func (c *configMaps) updatePeerLocked(update *proto.CoordinateResponse_PeerUpdat
399408
// SSH connections don't send packets while idle, so we use keep alives
400409
// to avoid random hangs while we set up the connection again after
401410
// inactivity.
402-
node.KeepAlive = (statusOk && peerStatus.Active) || (peerOk && lc.node.KeepAlive)
411+
node.KeepAlive = (statusOk && peerStatus.Active) || (peerOk && lc.node != nil && lc.node.KeepAlive)
403412
}
404413
switch {
405414
case !peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_NODE:
@@ -409,17 +418,14 @@ func (c *configMaps) updatePeerLocked(update *proto.CoordinateResponse_PeerUpdat
409418
lastHandshake = ps.LastHandshake
410419
}
411420
lc = &peerLifecycle{
412-
peerID: id,
413-
node: node,
414-
lastHandshake: lastHandshake,
415-
lost: false,
416-
readyForHandshake: !c.waitReadyForHandshake,
417-
}
418-
if c.waitReadyForHandshake {
419-
lc.readyForHandshakeTimer = c.clock.AfterFunc(5*time.Second, func() {
420-
logger.Debug(context.Background(), "ready for handshake timeout")
421-
c.peerReadyForHandshakeTimeout(id)
422-
})
421+
peerID: id,
422+
node: node,
423+
lastHandshake: lastHandshake,
424+
lost: false,
425+
// If we're receiving a NODE update for a peer we don't already have
426+
// a lifecycle for, it's likely the source of a tunnel. We don't
427+
// need to wait for a READY_FOR_HANDSHAKE.
428+
readyForHandshake: true,
423429
}
424430
c.peers[id] = lc
425431
logger.Debug(context.Background(), "adding new peer")
@@ -428,27 +434,50 @@ func (c *configMaps) updatePeerLocked(update *proto.CoordinateResponse_PeerUpdat
428434
return lc.readyForHandshake
429435
case peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_NODE:
430436
// update
431-
node.Created = lc.node.Created
437+
if lc.node != nil {
438+
node.Created = lc.node.Created
439+
}
432440
dirty = !lc.node.Equal(node) && lc.readyForHandshake
433441
lc.node = node
434442
lc.lost = false
435443
lc.resetTimer()
444+
if lc.isDestination {
445+
if !lc.readyForHandshake {
446+
lc.setReadyForHandshakeTimer(c)
447+
} else {
448+
lc.node.KeepAlive = true
449+
}
450+
}
436451
logger.Debug(context.Background(), "node update to existing peer", slog.F("dirty", dirty))
437452
return dirty
438453
case peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_READY_FOR_HANDSHAKE:
439-
wasReady := lc.readyForHandshake
454+
dirty := !lc.readyForHandshake
440455
lc.readyForHandshake = true
441456
if lc.readyForHandshakeTimer != nil {
442457
lc.readyForHandshakeTimer.Stop()
443458
lc.readyForHandshakeTimer = nil
444459
}
445-
lc.node.KeepAlive = true
460+
if lc.node != nil {
461+
dirty = dirty || !lc.node.KeepAlive
462+
lc.node.KeepAlive = true
463+
}
446464
logger.Debug(context.Background(), "peer ready for handshake")
447-
return !wasReady
465+
// only force a reconfig if the node populated
466+
return dirty && lc.node != nil
448467
case !peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_READY_FOR_HANDSHAKE:
449-
// TODO: should we keep track of ready for handshake messages we get
450-
// from unknown nodes?
468+
// When we receive a READY_FOR_HANDSHAKE for a peer we don't know about,
469+
// we create a peerLifecycle with the peerID and set readyForHandshake
470+
// to true. Eventually we should receive a NODE update for this peer,
471+
// and it'll be programmed into wireguard.
451472
logger.Debug(context.Background(), "got peer ready for handshake for unknown peer")
473+
lc = &peerLifecycle{
474+
peerID: id,
475+
lost: true,
476+
readyForHandshake: true,
477+
}
478+
// Timeout the peer in case we never receive a NODE update for it.
479+
lc.setLostTimer(c)
480+
c.peers[id] = lc
452481
return false
453482
case !peerOk:
454483
// disconnected or lost, but we don't have the node. No op
@@ -606,35 +635,50 @@ func (c *configMaps) peerReadyForHandshakeTimeout(peerID uuid.UUID) {
606635
}
607636

608637
type peerLifecycle struct {
609-
peerID uuid.UUID
638+
peerID uuid.UUID
639+
// isDestination specifies if the peer is a destination, meaning we
640+
// initiated a tunnel to the peer. When the peer is a destination, we do not
641+
// respond to node updates with READY_FOR_HANDSHAKEs, and we wait to program
642+
// the peer into wireguard until we receive a READY_FOR_HANDSHAKE from the
643+
// peer or the timeout is reached.
644+
isDestination bool
645+
// node is the tailcfg.Node for the peer. It may be nil until we receive a
646+
// NODE update for it.
610647
node *tailcfg.Node
611648
lost bool
612649
lastHandshake time.Time
613-
timer *clock.Timer
650+
lostTimer *clock.Timer
614651
readyForHandshake bool
615652
readyForHandshakeTimer *clock.Timer
616653
}
617654

618655
func (l *peerLifecycle) resetTimer() {
619-
if l.timer != nil {
620-
l.timer.Stop()
621-
l.timer = nil
656+
if l.lostTimer != nil {
657+
l.lostTimer.Stop()
658+
l.lostTimer = nil
622659
}
623660
}
624661

625662
func (l *peerLifecycle) setLostTimer(c *configMaps) {
626-
if l.timer != nil {
627-
l.timer.Stop()
663+
if l.lostTimer != nil {
664+
l.lostTimer.Stop()
628665
}
629666
ttl := lostTimeout - c.clock.Since(l.lastHandshake)
630667
if ttl <= 0 {
631668
ttl = time.Nanosecond
632669
}
633-
l.timer = c.clock.AfterFunc(ttl, func() {
670+
l.lostTimer = c.clock.AfterFunc(ttl, func() {
634671
c.peerLostTimeout(l.peerID)
635672
})
636673
}
637674

675+
func (l *peerLifecycle) setReadyForHandshakeTimer(c *configMaps) {
676+
l.readyForHandshakeTimer = c.clock.AfterFunc(5*time.Second, func() {
677+
c.logger.Debug(context.Background(), "ready for handshake timeout", slog.F("peer_id", l.peerID))
678+
c.peerReadyForHandshakeTimeout(l.peerID)
679+
})
680+
}
681+
638682
// prefixesDifferent returns true if the two slices contain different prefixes
639683
// where order doesn't matter.
640684
func prefixesDifferent(a, b []netip.Prefix) bool {

0 commit comments

Comments
 (0)