Skip to content

Commit 4e7beee

Browse files
authored
feat: show tailnet peer diagnostics after coder ping (#12314)
Beginnings of a solution to #12297 Doesn't cover disco or definitively display whether we successfully connected to DERP, but shows some checklist diagnostics for connecting to an agent. For this first PR, I just added it to `coder ping` to see how we like it, but could be incorporated into `coder ssh` _et al._ after a timeout. ``` $ coder ping dogfood2 p2p connection established in 147ms pong from dogfood2 p2p via 95.217.xxx.yyy:42631 in 147ms pong from dogfood2 p2p via 95.217.xxx.yyy:42631 in 140ms pong from dogfood2 p2p via 95.217.xxx.yyy:42631 in 140ms ✔ preferred DERP region 999 (Council Bluffs, Iowa) ✔ sent local data to Coder networking coodinator ✔ received remote agent data from Coder networking coordinator preferred DERP 10013 (Europe Fly.io (Paris)) endpoints: 95.217.xxx.yyy:42631, 95.217.xxx.yyy:37576, 172.17.0.1:37576, 172.20.0.10:37576 ✔ Wireguard handshake 11s ago ```
1 parent 32691e6 commit 4e7beee

11 files changed

+503
-8
lines changed

cli/cliui/agent.go

+56
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@ package cliui
22

33
import (
44
"context"
5+
"fmt"
56
"io"
7+
"strconv"
8+
"strings"
69
"time"
710

811
"github.com/google/uuid"
912
"golang.org/x/xerrors"
1013

1114
"github.com/coder/coder/v2/codersdk"
15+
"github.com/coder/coder/v2/tailnet"
1216
)
1317

1418
var errAgentShuttingDown = xerrors.New("agent is shutting down")
@@ -281,3 +285,55 @@ type closeFunc func() error
281285
func (c closeFunc) Close() error {
282286
return c()
283287
}
288+
289+
func PeerDiagnostics(w io.Writer, d tailnet.PeerDiagnostics) {
290+
if d.PreferredDERP > 0 {
291+
rn, ok := d.DERPRegionNames[d.PreferredDERP]
292+
if !ok {
293+
rn = "unknown"
294+
}
295+
_, _ = fmt.Fprintf(w, "✔ preferred DERP region: %d (%s)\n", d.PreferredDERP, rn)
296+
} else {
297+
_, _ = fmt.Fprint(w, "✘ not connected to DERP\n")
298+
}
299+
if d.SentNode {
300+
_, _ = fmt.Fprint(w, "✔ sent local data to Coder networking coodinator\n")
301+
} else {
302+
_, _ = fmt.Fprint(w, "✘ have not sent local data to Coder networking coordinator\n")
303+
}
304+
if d.ReceivedNode != nil {
305+
dp := d.ReceivedNode.DERP
306+
dn := ""
307+
// should be 127.3.3.40:N where N is the DERP region
308+
ap := strings.Split(dp, ":")
309+
if len(ap) == 2 {
310+
dp = ap[1]
311+
di, err := strconv.Atoi(dp)
312+
if err == nil {
313+
var ok bool
314+
dn, ok = d.DERPRegionNames[di]
315+
if ok {
316+
dn = fmt.Sprintf("(%s)", dn)
317+
} else {
318+
dn = "(unknown)"
319+
}
320+
}
321+
}
322+
_, _ = fmt.Fprintf(w,
323+
"✔ received remote agent data from Coder networking coordinator\n preferred DERP region: %s %s\n endpoints: %s\n",
324+
dp, dn, strings.Join(d.ReceivedNode.Endpoints, ", "))
325+
} else {
326+
_, _ = fmt.Fprint(w, "✘ have not received remote agent data from Coder networking coordinator\n")
327+
}
328+
if !d.LastWireguardHandshake.IsZero() {
329+
ago := time.Since(d.LastWireguardHandshake)
330+
symbol := "✔"
331+
// wireguard is supposed to refresh handshake on 5 minute intervals
332+
if ago > 5*time.Minute {
333+
symbol = "⚠"
334+
}
335+
_, _ = fmt.Fprintf(w, "%s Wireguard handshake %s ago\n", symbol, ago.Round(time.Second))
336+
} else {
337+
_, _ = fmt.Fprint(w, "✘ Wireguard is not connected\n")
338+
}
339+
}

cli/cliui/agent_test.go

+191
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"context"
77
"io"
88
"os"
9+
"regexp"
910
"strings"
1011
"sync/atomic"
1112
"testing"
@@ -15,12 +16,14 @@ import (
1516
"github.com/stretchr/testify/assert"
1617
"github.com/stretchr/testify/require"
1718
"golang.org/x/xerrors"
19+
"tailscale.com/tailcfg"
1820

1921
"github.com/coder/coder/v2/cli/clibase"
2022
"github.com/coder/coder/v2/cli/clitest"
2123
"github.com/coder/coder/v2/cli/cliui"
2224
"github.com/coder/coder/v2/coderd/util/ptr"
2325
"github.com/coder/coder/v2/codersdk"
26+
"github.com/coder/coder/v2/tailnet"
2427
"github.com/coder/coder/v2/testutil"
2528
)
2629

@@ -476,3 +479,191 @@ func TestAgent(t *testing.T) {
476479
require.NoError(t, cmd.Invoke().Run())
477480
})
478481
}
482+
483+
func TestPeerDiagnostics(t *testing.T) {
484+
t.Parallel()
485+
testCases := []struct {
486+
name string
487+
diags tailnet.PeerDiagnostics
488+
want []*regexp.Regexp // must be ordered, can omit lines
489+
}{
490+
{
491+
name: "noPreferredDERP",
492+
diags: tailnet.PeerDiagnostics{
493+
PreferredDERP: 0,
494+
DERPRegionNames: make(map[int]string),
495+
SentNode: true,
496+
ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"},
497+
LastWireguardHandshake: time.Now(),
498+
},
499+
want: []*regexp.Regexp{
500+
regexp.MustCompile("^✘ not connected to DERP$"),
501+
},
502+
},
503+
{
504+
name: "preferredDERP",
505+
diags: tailnet.PeerDiagnostics{
506+
PreferredDERP: 23,
507+
DERPRegionNames: map[int]string{
508+
23: "testo",
509+
},
510+
SentNode: true,
511+
ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"},
512+
LastWireguardHandshake: time.Now(),
513+
},
514+
want: []*regexp.Regexp{
515+
regexp.MustCompile(`^✔ preferred DERP region: 23 \(testo\)$`),
516+
},
517+
},
518+
{
519+
name: "sentNode",
520+
diags: tailnet.PeerDiagnostics{
521+
PreferredDERP: 0,
522+
DERPRegionNames: map[int]string{},
523+
SentNode: true,
524+
ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"},
525+
LastWireguardHandshake: time.Time{},
526+
},
527+
want: []*regexp.Regexp{
528+
regexp.MustCompile(`^✔ sent local data to Coder networking coodinator$`),
529+
},
530+
},
531+
{
532+
name: "didntSendNode",
533+
diags: tailnet.PeerDiagnostics{
534+
PreferredDERP: 0,
535+
DERPRegionNames: map[int]string{},
536+
SentNode: false,
537+
ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"},
538+
LastWireguardHandshake: time.Time{},
539+
},
540+
want: []*regexp.Regexp{
541+
regexp.MustCompile(`^✘ have not sent local data to Coder networking coordinator$`),
542+
},
543+
},
544+
{
545+
name: "receivedNodeDERPOKNoEndpoints",
546+
diags: tailnet.PeerDiagnostics{
547+
PreferredDERP: 0,
548+
DERPRegionNames: map[int]string{999: "Embedded"},
549+
SentNode: true,
550+
ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"},
551+
LastWireguardHandshake: time.Time{},
552+
},
553+
want: []*regexp.Regexp{
554+
regexp.MustCompile(`^✔ received remote agent data from Coder networking coordinator$`),
555+
regexp.MustCompile(`preferred DERP region: 999 \(Embedded\)$`),
556+
regexp.MustCompile(`endpoints: $`),
557+
},
558+
},
559+
{
560+
name: "receivedNodeDERPUnknownNoEndpoints",
561+
diags: tailnet.PeerDiagnostics{
562+
PreferredDERP: 0,
563+
DERPRegionNames: map[int]string{},
564+
SentNode: true,
565+
ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"},
566+
LastWireguardHandshake: time.Time{},
567+
},
568+
want: []*regexp.Regexp{
569+
regexp.MustCompile(`^✔ received remote agent data from Coder networking coordinator$`),
570+
regexp.MustCompile(`preferred DERP region: 999 \(unknown\)$`),
571+
regexp.MustCompile(`endpoints: $`),
572+
},
573+
},
574+
{
575+
name: "receivedNodeEndpointsNoDERP",
576+
diags: tailnet.PeerDiagnostics{
577+
PreferredDERP: 0,
578+
DERPRegionNames: map[int]string{999: "Embedded"},
579+
SentNode: true,
580+
ReceivedNode: &tailcfg.Node{Endpoints: []string{"99.88.77.66:4555", "33.22.11.0:3444"}},
581+
LastWireguardHandshake: time.Time{},
582+
},
583+
want: []*regexp.Regexp{
584+
regexp.MustCompile(`^✔ received remote agent data from Coder networking coordinator$`),
585+
regexp.MustCompile(`preferred DERP region:\s*$`),
586+
regexp.MustCompile(`endpoints: 99\.88\.77\.66:4555, 33\.22\.11\.0:3444$`),
587+
},
588+
},
589+
{
590+
name: "didntReceiveNode",
591+
diags: tailnet.PeerDiagnostics{
592+
PreferredDERP: 0,
593+
DERPRegionNames: map[int]string{},
594+
SentNode: false,
595+
ReceivedNode: nil,
596+
LastWireguardHandshake: time.Time{},
597+
},
598+
want: []*regexp.Regexp{
599+
regexp.MustCompile(`^✘ have not received remote agent data from Coder networking coordinator$`),
600+
},
601+
},
602+
{
603+
name: "noWireguardHandshake",
604+
diags: tailnet.PeerDiagnostics{
605+
PreferredDERP: 0,
606+
DERPRegionNames: map[int]string{},
607+
SentNode: false,
608+
ReceivedNode: nil,
609+
LastWireguardHandshake: time.Time{},
610+
},
611+
want: []*regexp.Regexp{
612+
regexp.MustCompile(`^✘ Wireguard is not connected$`),
613+
},
614+
},
615+
{
616+
name: "wireguardHandshakeRecent",
617+
diags: tailnet.PeerDiagnostics{
618+
PreferredDERP: 0,
619+
DERPRegionNames: map[int]string{},
620+
SentNode: false,
621+
ReceivedNode: nil,
622+
LastWireguardHandshake: time.Now().Add(-5 * time.Second),
623+
},
624+
want: []*regexp.Regexp{
625+
regexp.MustCompile(`^✔ Wireguard handshake \d+s ago$`),
626+
},
627+
},
628+
{
629+
name: "wireguardHandshakeOld",
630+
diags: tailnet.PeerDiagnostics{
631+
PreferredDERP: 0,
632+
DERPRegionNames: map[int]string{},
633+
SentNode: false,
634+
ReceivedNode: nil,
635+
LastWireguardHandshake: time.Now().Add(-450 * time.Second), // 7m30s
636+
},
637+
want: []*regexp.Regexp{
638+
regexp.MustCompile(`^⚠ Wireguard handshake 7m\d+s ago$`),
639+
},
640+
},
641+
}
642+
for _, tc := range testCases {
643+
tc := tc
644+
t.Run(tc.name, func(t *testing.T) {
645+
t.Parallel()
646+
r, w := io.Pipe()
647+
go func() {
648+
defer w.Close()
649+
cliui.PeerDiagnostics(w, tc.diags)
650+
}()
651+
s := bufio.NewScanner(r)
652+
i := 0
653+
got := make([]string, 0)
654+
for s.Scan() {
655+
got = append(got, s.Text())
656+
if i < len(tc.want) {
657+
reg := tc.want[i]
658+
if reg.Match(s.Bytes()) {
659+
i++
660+
}
661+
}
662+
}
663+
if i < len(tc.want) {
664+
t.Logf("failed to match regexp: %s\ngot:\n%s", tc.want[i].String(), strings.Join(got, "\n"))
665+
t.FailNow()
666+
}
667+
})
668+
}
669+
}

cli/ping.go

+2
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ func (r *RootCmd) ping() *clibase.Cmd {
135135
)
136136

137137
if n == int(pingNum) {
138+
diags := conn.GetPeerDiagnostics()
139+
cliui.PeerDiagnostics(inv.Stdout, diags)
138140
return nil
139141
}
140142
}

cli/ping_test.go

+28
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,32 @@ func TestPing(t *testing.T) {
4242
cancel()
4343
<-cmdDone
4444
})
45+
46+
t.Run("1Ping", func(t *testing.T) {
47+
t.Parallel()
48+
49+
client, workspace, agentToken := setupWorkspaceForAgent(t)
50+
inv, root := clitest.New(t, "ping", "-n", "1", workspace.Name)
51+
clitest.SetupConfig(t, client, root)
52+
pty := ptytest.New(t)
53+
inv.Stdin = pty.Input()
54+
inv.Stderr = pty.Output()
55+
inv.Stdout = pty.Output()
56+
57+
_ = agenttest.New(t, client.URL, agentToken)
58+
_ = coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
59+
60+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
61+
defer cancel()
62+
63+
cmdDone := tGo(t, func() {
64+
err := inv.WithContext(ctx).Run()
65+
assert.NoError(t, err)
66+
})
67+
68+
pty.ExpectMatch("pong from " + workspace.Name)
69+
pty.ExpectMatch("✔ received remote agent data from Coder networking coordinator")
70+
cancel()
71+
<-cmdDone
72+
})
4573
}

cli/ssh.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ func getWorkspaceAndAgent(ctx context.Context, inv *clibase.Invocation, client *
563563

564564
if workspace.LatestBuild.Transition != codersdk.WorkspaceTransitionStart {
565565
if !autostart {
566-
return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.New("workspace must be in start transition to ssh")
566+
return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.New("workspace must be started")
567567
}
568568
// Autostart the workspace for the user.
569569
// For some failure modes, return a better message.
@@ -579,7 +579,7 @@ func getWorkspaceAndAgent(ctx context.Context, inv *clibase.Invocation, client *
579579
// It cannot be in any pending or failed state.
580580
if workspace.LatestBuild.Status != codersdk.WorkspaceStatusStopped {
581581
return codersdk.Workspace{}, codersdk.WorkspaceAgent{},
582-
xerrors.Errorf("workspace must be in start transition to ssh, was unable to autostart as the last build job is %q, expected %q",
582+
xerrors.Errorf("workspace must be started; was unable to autostart as the last build job is %q, expected %q",
583583
workspace.LatestBuild.Status,
584584
codersdk.WorkspaceStatusStopped,
585585
)

codersdk/workspaceagentconn.go

+4
Original file line numberDiff line numberDiff line change
@@ -414,3 +414,7 @@ func (c *WorkspaceAgentConn) apiClient() *http.Client {
414414
},
415415
}
416416
}
417+
418+
func (c *WorkspaceAgentConn) GetPeerDiagnostics() tailnet.PeerDiagnostics {
419+
return c.Conn.GetPeerDiagnostics(c.opts.AgentID)
420+
}

tailnet/configmaps.go

+21
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,27 @@ func (c *configMaps) nodeAddresses(publicKey key.NodePublic) ([]netip.Prefix, bo
521521
return nil, false
522522
}
523523

524+
func (c *configMaps) fillPeerDiagnostics(d *PeerDiagnostics, peerID uuid.UUID) {
525+
status := c.status()
526+
c.L.Lock()
527+
defer c.L.Unlock()
528+
if c.derpMap != nil {
529+
for j, r := range c.derpMap.Regions {
530+
d.DERPRegionNames[j] = r.RegionName
531+
}
532+
}
533+
lc, ok := c.peers[peerID]
534+
if !ok {
535+
return
536+
}
537+
d.ReceivedNode = lc.node
538+
ps, ok := status.Peer[lc.node.Key]
539+
if !ok {
540+
return
541+
}
542+
d.LastWireguardHandshake = ps.LastHandshake
543+
}
544+
524545
type peerLifecycle struct {
525546
peerID uuid.UUID
526547
node *tailcfg.Node

0 commit comments

Comments
 (0)