From b0d789b06f2f2e6911f873c2f9b829eae8cfb158 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 15 Nov 2022 14:23:25 +0000 Subject: [PATCH 1/2] fix: Refactor tailnet conn AwaitReachable to allow for pings >1s RTT --- tailnet/conn.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tailnet/conn.go b/tailnet/conn.go index 5a5c8e50f394d..86491d1fac690 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -424,19 +424,22 @@ func (c *Conn) Ping(ctx context.Context, ip netip.Addr) (time.Duration, error) { // address is reachable. It's the callers responsibility to provide // a timeout, otherwise this function will block forever. func (c *Conn) AwaitReachable(ctx context.Context, ip netip.Addr) bool { - ticker := time.NewTicker(time.Millisecond * 100) - defer ticker.Stop() - completedCtx, completed := context.WithCancel(ctx) + ctx, cancel := context.WithCancel(ctx) + defer cancel() // Cancel all pending pings on exit. + + completedCtx, completed := context.WithCancel(context.Background()) + defer completed() + run := func() { - ctx, cancelFunc := context.WithTimeout(completedCtx, time.Second) - defer cancelFunc() _, err := c.Ping(ctx, ip) if err == nil { completed() } } + + ticker := time.NewTicker(time.Millisecond * 100) + defer ticker.Stop() go run() - defer completed() for { select { case <-completedCtx.Done(): From b7256c5682b60cbf56f31bccc3b79c46ef2eb0d5 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 15 Nov 2022 17:39:08 +0000 Subject: [PATCH 2/2] fix: Add exponential backoff --- tailnet/conn.go | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tailnet/conn.go b/tailnet/conn.go index 86491d1fac690..325783c48bda8 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/cenkalti/backoff/v4" "github.com/google/uuid" "go4.org/netipx" "golang.org/x/xerrors" @@ -431,20 +432,36 @@ func (c *Conn) AwaitReachable(ctx context.Context, ip netip.Addr) bool { defer completed() run := func() { + // Safety timeout, initially we'll have around 10-20 goroutines + // running in parallel. The exponential backoff will converge + // around ~1 ping / 30s, this means we'll have around 10-20 + // goroutines pending towards the end as well. + ctx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + _, err := c.Ping(ctx, ip) if err == nil { completed() } } - ticker := time.NewTicker(time.Millisecond * 100) - defer ticker.Stop() + eb := backoff.NewExponentialBackOff() + eb.MaxElapsedTime = 0 + eb.InitialInterval = 50 * time.Millisecond + eb.MaxInterval = 30 * time.Second + // Consume the first interval since + // we'll fire off a ping immediately. + _ = eb.NextBackOff() + + t := backoff.NewTicker(eb) + defer t.Stop() + go run() for { select { case <-completedCtx.Done(): return true - case <-ticker.C: + case <-t.C: // Pings can take a while, so we can run multiple // in parallel to return ASAP. go run()