Skip to content

fix: use TSMP for pings and checking reachability #11306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ func TestAgent_Stats_Magic(t *testing.T) {
require.NoError(t, err)
err = session.Shell()
require.NoError(t, err)
var s *agentsdk.Stats
require.Eventuallyf(t, func() bool {
var ok bool
s, ok = <-stats
s, ok := <-stats
t.Logf("got stats: ok=%t, ConnectionCount=%d, RxBytes=%d, TxBytes=%d, SessionCountVSCode=%d, ConnectionMedianLatencyMS=%f",
ok, s.ConnectionCount, s.RxBytes, s.TxBytes, s.SessionCountVSCode, s.ConnectionMedianLatencyMS)
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 &&
// Ensure that the connection didn't count as a "normal" SSH session.
// This was a special one, so it should be labeled specially in the stats!
Expand All @@ -186,7 +186,7 @@ func TestAgent_Stats_Magic(t *testing.T) {
// If it isn't, it's set to -1.
s.ConnectionMedianLatencyMS >= 0
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats: %+v", s,
"never saw stats",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gives misleading information since the pointer is passed on the initial call to Eventually. It is not updated since <-stats returns a new pointer, rather than updating the data at the old pointer. Changed in favor of logging the relevant info as we get it.

)
// The shell will automatically exit if there is no stdin!
_ = stdin.Close()
Expand Down Expand Up @@ -240,14 +240,14 @@ func TestAgent_Stats_Magic(t *testing.T) {
_ = tunneledConn.Close()
})

var s *agentsdk.Stats
require.Eventuallyf(t, func() bool {
var ok bool
s, ok = <-stats
s, ok := <-stats
t.Logf("got stats with conn open: ok=%t, ConnectionCount=%d, SessionCountJetBrains=%d",
ok, s.ConnectionCount, s.SessionCountJetBrains)
return ok && s.ConnectionCount > 0 &&
s.SessionCountJetBrains == 1
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats with conn open: %+v", s,
"never saw stats with conn open",
)

// Kill the server and connection after checking for the echo.
Expand All @@ -256,12 +256,13 @@ func TestAgent_Stats_Magic(t *testing.T) {
_ = tunneledConn.Close()

require.Eventuallyf(t, func() bool {
var ok bool
s, ok = <-stats
return ok && s.ConnectionCount == 0 &&
s, ok := <-stats
t.Logf("got stats after disconnect %t, %d",
ok, s.SessionCountJetBrains)
return ok &&
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TSMP ping counts as a "connection" in the agent stats so it never goes to zero. I'm not sure why we were testing it going to zero in the first place @code-asher

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we wanted to make sure ConnectionCount was also updating correctly, but did not realize there would be other connections going on.

s.SessionCountJetBrains == 0
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats after conn closes: %+v", s,
"never saw stats after conn closes",
)
})
}
Expand Down
4 changes: 2 additions & 2 deletions tailnet/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,12 +670,12 @@ func (c *Conn) Status() *ipnstate.Status {
return sb.Status()
}

// Ping sends a Disco ping to the Wireguard engine.
// Ping sends a ping to the Wireguard engine.
// The bool returned is true if the ping was performed P2P.
func (c *Conn) Ping(ctx context.Context, ip netip.Addr) (time.Duration, bool, *ipnstate.PingResult, error) {
errCh := make(chan error, 1)
prChan := make(chan *ipnstate.PingResult, 1)
go c.wireguardEngine.Ping(ip, tailcfg.PingDisco, func(pr *ipnstate.PingResult) {
go c.wireguardEngine.Ping(ip, tailcfg.PingTSMP, func(pr *ipnstate.PingResult) {
if pr.Err != "" {
errCh <- xerrors.New(pr.Err)
return
Expand Down