Skip to content

Commit 48d4734

Browse files
committed
feat: add setAllPeersLost to configMaps
1 parent 8156019 commit 48d4734

File tree

2 files changed

+108
-0
lines changed

2 files changed

+108
-0
lines changed

tailnet/configmaps.go

+23
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,29 @@ func (c *configMaps) updatePeerLocked(update *proto.CoordinateResponse_PeerUpdat
430430
}
431431
}
432432

433+
// setAllPeersLost marks all peers as lost. Typically, this is called when we lose connection to
434+
// the Coordinator. (When we reconnect, we will get NODE updates for all peers that are still connected
435+
// and mark them as not lost.)
436+
func (c *configMaps) setAllPeersLost() {
437+
c.L.Lock()
438+
defer c.L.Unlock()
439+
for _, lc := range c.peers {
440+
if lc.lost {
441+
// skip processing already lost nodes, as this just results in timer churn
442+
continue
443+
}
444+
lc.lost = true
445+
lc.setLostTimer(c)
446+
// it's important to drop a log here so that we see it get marked lost if grepping thru
447+
// the logs for a specific peer
448+
c.logger.Debug(context.Background(),
449+
"setAllPeersLost marked peer lost",
450+
slog.F("peer_id", lc.peerID),
451+
slog.F("key_id", lc.node.Key.ShortString()),
452+
)
453+
}
454+
}
455+
433456
// peerLostTimeout is the callback that peerLifecycle uses when a peer is lost the timeout to
434457
// receive a handshake fires.
435458
func (c *configMaps) peerLostTimeout(id uuid.UUID) {

tailnet/configmaps_internal_test.go

+85
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,91 @@ func TestConfigMaps_updatePeers_lost_and_found(t *testing.T) {
491491
_ = testutil.RequireRecvCtx(ctx, t, done)
492492
}
493493

494+
func TestConfigMaps_setAllPeersLost(t *testing.T) {
495+
t.Parallel()
496+
ctx := testutil.Context(t, testutil.WaitShort)
497+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
498+
fEng := newFakeEngineConfigurable()
499+
nodePrivateKey := key.NewNode()
500+
nodeID := tailcfg.NodeID(5)
501+
discoKey := key.NewDisco()
502+
uut := newConfigMaps(logger, fEng, nodeID, nodePrivateKey, discoKey.Public())
503+
defer uut.close()
504+
start := time.Date(2024, time.January, 1, 8, 0, 0, 0, time.UTC)
505+
mClock := clock.NewMock()
506+
mClock.Set(start)
507+
uut.clock = mClock
508+
509+
p1ID := uuid.UUID{1}
510+
p1Node := newTestNode(1)
511+
p1n, err := NodeToProto(p1Node)
512+
require.NoError(t, err)
513+
p2ID := uuid.UUID{2}
514+
p2Node := newTestNode(2)
515+
p2n, err := NodeToProto(p2Node)
516+
require.NoError(t, err)
517+
518+
s1 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, start)
519+
520+
updates := []*proto.CoordinateResponse_PeerUpdate{
521+
{
522+
Id: p1ID[:],
523+
Kind: proto.CoordinateResponse_PeerUpdate_NODE,
524+
Node: p1n,
525+
},
526+
{
527+
Id: p2ID[:],
528+
Kind: proto.CoordinateResponse_PeerUpdate_NODE,
529+
Node: p2n,
530+
},
531+
}
532+
uut.updatePeers(updates)
533+
nm := testutil.RequireRecvCtx(ctx, t, fEng.setNetworkMap)
534+
r := testutil.RequireRecvCtx(ctx, t, fEng.reconfig)
535+
require.Len(t, nm.Peers, 2)
536+
require.Len(t, r.wg.Peers, 2)
537+
_ = testutil.RequireRecvCtx(ctx, t, s1)
538+
539+
mClock.Add(5 * time.Second)
540+
uut.setAllPeersLost()
541+
542+
// No reprogramming yet, since we keep the peer around.
543+
select {
544+
case <-fEng.setNetworkMap:
545+
t.Fatal("should not reprogram")
546+
default:
547+
// OK!
548+
}
549+
550+
// When we advance the clock, even by a few ms, the timeout for peer 2 pops
551+
// because our status only includes a handshake for peer 1
552+
s2 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, start)
553+
mClock.Add(time.Millisecond * 10)
554+
_ = testutil.RequireRecvCtx(ctx, t, s2)
555+
556+
nm = testutil.RequireRecvCtx(ctx, t, fEng.setNetworkMap)
557+
r = testutil.RequireRecvCtx(ctx, t, fEng.reconfig)
558+
require.Len(t, nm.Peers, 1)
559+
require.Len(t, r.wg.Peers, 1)
560+
561+
// Finally, advance the clock until after the timeout
562+
s3 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, start)
563+
mClock.Add(lostTimeout)
564+
_ = testutil.RequireRecvCtx(ctx, t, s3)
565+
566+
nm = testutil.RequireRecvCtx(ctx, t, fEng.setNetworkMap)
567+
r = testutil.RequireRecvCtx(ctx, t, fEng.reconfig)
568+
require.Len(t, nm.Peers, 0)
569+
require.Len(t, r.wg.Peers, 0)
570+
571+
done := make(chan struct{})
572+
go func() {
573+
defer close(done)
574+
uut.close()
575+
}()
576+
_ = testutil.RequireRecvCtx(ctx, t, done)
577+
}
578+
494579
func TestConfigMaps_setBlockEndpoints_different(t *testing.T) {
495580
t.Parallel()
496581
ctx := testutil.Context(t, testutil.WaitShort)

0 commit comments

Comments
 (0)