From 4fc8c94c67ca265b9f83b681a1faad98febd4fe2 Mon Sep 17 00:00:00 2001 From: Ethan Dickson Date: Wed, 4 Sep 2024 04:47:05 +0000 Subject: [PATCH] docs: add networking troubleshooting page --- cli/cliui/agent.go | 32 +++++--- cli/cliui/agent_test.go | 2 +- docs/manifest.json | 5 ++ docs/networking/index.md | 36 +-------- docs/networking/troubleshooting.md | 124 +++++++++++++++++++++++++++++ 5 files changed, 154 insertions(+), 45 deletions(-) create mode 100644 docs/networking/troubleshooting.md diff --git a/cli/cliui/agent.go b/cli/cliui/agent.go index 9f528a6d69f20..889fdb982d8c3 100644 --- a/cli/cliui/agent.go +++ b/cli/cliui/agent.go @@ -309,7 +309,7 @@ func PeerDiagnostics(w io.Writer, d tailnet.PeerDiagnostics) { _, _ = fmt.Fprint(w, "✘ not connected to DERP\n") } if d.SentNode { - _, _ = fmt.Fprint(w, "✔ sent local data to Coder networking coodinator\n") + _, _ = fmt.Fprint(w, "✔ sent local data to Coder networking coordinator\n") } else { _, _ = fmt.Fprint(w, "✘ have not sent local data to Coder networking coordinator\n") } @@ -394,12 +394,18 @@ func (d ConnDiags) splitDiagnostics() (general, client, agent []string) { for _, msg := range d.AgentNetcheck.Interfaces.Warnings { agent = append(agent, msg.Message) } + if len(d.AgentNetcheck.Interfaces.Warnings) > 0 { + agent[len(agent)-1] += "\nhttps://coder.com/docs/networking/troubleshooting#low-mtu" + } } if d.LocalInterfaces != nil { for _, msg := range d.LocalInterfaces.Warnings { client = append(client, msg.Message) } + if len(d.LocalInterfaces.Warnings) > 0 { + client[len(client)-1] += "\nhttps://coder.com/docs/networking/troubleshooting#low-mtu" + } } if d.PingP2P && !d.Verbose { @@ -414,37 +420,45 @@ func (d ConnDiags) splitDiagnostics() (general, client, agent []string) { } if d.ConnInfo.DisableDirectConnections { - general = append(general, "❗ Your Coder administrator has blocked direct connections") + general = append(general, "❗ Your Coder administrator has blocked direct connections\n"+ + " https://coder.com/docs/networking/troubleshooting#disabled-deployment-wide") if !d.Verbose { return general, client, agent } } if !d.ConnInfo.DERPMap.HasSTUN() { - general = append(general, "The DERP map is not configured to use STUN") + general = append(general, "❗ The DERP map is not configured to use STUN\n"+ + " https://coder.com/docs/networking/troubleshooting#no-stun-servers") } else if d.LocalNetInfo != nil && !d.LocalNetInfo.UDP { - client = append(client, "Client could not connect to STUN over UDP") + client = append(client, "Client could not connect to STUN over UDP\n"+ + " https://coder.com/docs/networking/troubleshooting#udp-blocked") } if d.LocalNetInfo != nil && d.LocalNetInfo.MappingVariesByDestIP.EqualBool(true) { - client = append(client, "Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers") + client = append(client, "Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n"+ + " https://coder.com/docs/networking/troubleshooting#Endpoint-Dependent-Nat-Hard-NAT") } if d.AgentNetcheck != nil && d.AgentNetcheck.NetInfo != nil { if d.AgentNetcheck.NetInfo.MappingVariesByDestIP.EqualBool(true) { - agent = append(agent, "Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers") + agent = append(agent, "Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n"+ + " https://coder.com/docs/networking/troubleshooting#Endpoint-Dependent-Nat-Hard-NAT") } if !d.AgentNetcheck.NetInfo.UDP { - agent = append(agent, "Agent could not connect to STUN over UDP") + agent = append(agent, "Agent could not connect to STUN over UDP\n"+ + " https://coder.com/docs/networking/troubleshooting#udp-blocked") } } if d.ClientIPIsAWS { - client = append(client, "Client IP address is within an AWS range (AWS uses hard NAT)") + client = append(client, "Client IP address is within an AWS range (AWS uses hard NAT)\n"+ + " https://coder.com/docs/networking/troubleshooting#Endpoint-Dependent-Nat-Hard-NAT") } if d.AgentIPIsAWS { - agent = append(agent, "Agent IP address is within an AWS range (AWS uses hard NAT)") + agent = append(agent, "Agent IP address is within an AWS range (AWS uses hard NAT)\n"+ + " https://coder.com/docs/networking/troubleshooting#Endpoint-Dependent-Nat-Hard-NAT") } return general, client, agent } diff --git a/cli/cliui/agent_test.go b/cli/cliui/agent_test.go index aeb41130d344e..31442ae0224da 100644 --- a/cli/cliui/agent_test.go +++ b/cli/cliui/agent_test.go @@ -533,7 +533,7 @@ func TestPeerDiagnostics(t *testing.T) { LastWireguardHandshake: time.Time{}, }, want: []*regexp.Regexp{ - regexp.MustCompile(`^✔ sent local data to Coder networking coodinator$`), + regexp.MustCompile(`^✔ sent local data to Coder networking coordinator$`), }, }, { diff --git a/docs/manifest.json b/docs/manifest.json index 0da8eab196642..eb7d2b576b555 100644 --- a/docs/manifest.json +++ b/docs/manifest.json @@ -351,6 +351,11 @@ "title": "STUN and NAT", "description": "Learn how Coder establishes direct connections", "path": "./networking/stun.md" + }, + { + "title": "Troubleshooting", + "description": "Troubleshoot networking issues in Coder", + "path": "./networking/troubleshooting.md" } ] }, diff --git a/docs/networking/index.md b/docs/networking/index.md index b5f26cacd7689..d4abddc5718c8 100644 --- a/docs/networking/index.md +++ b/docs/networking/index.md @@ -169,41 +169,7 @@ with security policies. In these cases, pass the `--browser-only` flag to With browser-only connections, developers can only connect to their workspaces via the web terminal and [web IDEs](../ides/web-ides.md). -## Troubleshooting - -The `coder ping -v ` will ping a workspace and return debug logs for -the connection. We recommend running this command and inspecting the output when -debugging SSH connections to a workspace. For example: - -```console -$ coder ping -v my-workspace - -2023-06-21 17:50:22.412 [debu] wgengine: ping(fd7a:115c:a1e0:49d6:b259:b7ac:b1b2:48f4): sending disco ping to [cFYPo] ... -pong from my-workspace proxied via DERP(Denver) in 90ms -2023-06-21 17:50:22.503 [debu] wgengine: magicsock: closing connection to derp-13 (conn-close), age 5s -2023-06-21 17:50:22.503 [debu] wgengine: magicsock: 0 active derp conns -2023-06-21 17:50:22.504 [debu] wgengine: wg: [v2] Routine: receive incoming v6 - stopped -2023-06-21 17:50:22.504 [debu] wgengine: wg: [v2] Device closed -``` - -The `coder speedtest ` command measures user <-> workspace -throughput. E.g.: - -``` -$ coder speedtest dev -29ms via coder -Starting a 5s download test... -INTERVAL TRANSFER BANDWIDTH -0.00-1.00 sec 630.7840 MBits 630.7404 Mbits/sec -1.00-2.00 sec 913.9200 MBits 913.8106 Mbits/sec -2.00-3.00 sec 943.1040 MBits 943.0399 Mbits/sec -3.00-4.00 sec 933.3760 MBits 933.2143 Mbits/sec -4.00-5.00 sec 848.8960 MBits 848.7019 Mbits/sec -5.00-5.02 sec 13.5680 MBits 828.8189 Mbits/sec ----------------------------------------------------- -0.00-5.02 sec 4283.6480 MBits 853.8217 Mbits/sec -``` - ## Up next - Learn about [Port Forwarding](./port-forwarding.md) +- Troubleshoot [Networking Issues](./troubleshooting.md) diff --git a/docs/networking/troubleshooting.md b/docs/networking/troubleshooting.md new file mode 100644 index 0000000000000..deab8bdc15a6f --- /dev/null +++ b/docs/networking/troubleshooting.md @@ -0,0 +1,124 @@ +# Troubleshooting + +`coder ping ` will ping the workspace agent and print diagnostics on +the state of the connection. These diagnostics are created by inspecting both +the client and agent network configurations, and provide insights into why a +direct connection may be impeded, or why the quality of one might be degraded. + +The `-v/--verbose` flag can be appended to the command to print client debug +logs. + +```console +$ coder ping dev +pong from workspace proxied via DERP(Council Bluffs, Iowa) in 42ms +pong from workspace proxied via DERP(Council Bluffs, Iowa) in 41ms +pong from workspace proxied via DERP(Council Bluffs, Iowa) in 39ms +✔ preferred DERP region: 999 (Council Bluffs, Iowa) +✔ sent local data to Coder networking coordinator +✔ received remote agent data from Coder networking coordinator + preferred DERP region: 999 (Council Bluffs, Iowa) + endpoints: x.x.x.x:46433, x.x.x.x:46433, x.x.x.x:46433 +✔ Wireguard handshake 11s ago + +❗ You are connected via a DERP relay, not directly (p2p) +Possible client-side issues with direct connection: + - Network interface utun0 has MTU 1280, (less than 1378), which may degrade the quality of direct connections + +Possible agent-side issues with direct connection: + - Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers + - Agent IP address is within an AWS range (AWS uses hard NAT) +``` + +## Common Problems with Direct Connections + +### Disabled Deployment-wide + +Direct connections can be disabled at the deployment level by setting the +`CODER_BLOCK_DIRECT` environment variable or the `--block-direct-connections` +flag on the server. When set, this will be reflected in the output of +`coder ping`. + +### UDP Blocked + +Some corporate firewalls block UDP traffic. Direct connections require UDP +traffic to be allowed between the client and agent, as well as between the +client/agent and STUN servers in most cases. `coder ping` will indicate if +either the Coder agent or client had issues sending or receiving UDP packets to +STUN servers. + +If this is the case, you may need to add exceptions to the firewall to allow UDP +for Coder workspaces, clients, and STUN servers. + +### Endpoint-Dependent NAT (Hard NAT) + +Hard NATs prevent public endpoints gathered from STUN servers from being used by +the peer to establish a direct connection. Typically, if only one side of the +connection is behind a hard NAT, direct connections can still be established +easily. However, if both sides are behind hard NATs, direct connections may take +longer to establish or may not be possible at all. + +`coder ping` will indicate if it's possible the client or agent is behind a hard +NAT. + +Learn more about [STUN and NAT](./stun.md). + +### No STUN Servers + +If there are no STUN servers available within a deployment's DERP MAP, direct +connections may not be possible. Notable exceptions are if the client and agent +are on the same network, or if either is able to use UPnP instead of STUN to +resolve the public IP of the other. `coder ping` will indicate if no STUN +servers were found. + +### Endpoint Firewalls + +Direct connections may also be impeded if one side is behind a hard NAT and the +other is running a firewall that blocks ingress traffic from unknown 5-tuples +(Protocol, Source IP, Source Port, Destination IP, Destination Port). + +If this is suspected, you may need to add an exception for Coder to the +firewall, or reconfigure the hard NAT. + +### VPNs + +If a VPN is the default route for all IP traffic, it may interfere with the +ability for clients and agents to form direct connections. This happens if the +NAT does not permit traffic to be +['hairpinned'](./stun.md#3-direct-connections-with-vpn-and-nat-hairpinning) from +the public IP address of the NAT (determined via STUN) to the internal IP +address of the agent. + +If this is the case, you may need to add exceptions to the VPN for Coder, modify +the NAT configuration, or deploy an internal STUN server. + +### Low MTU + +If a network interface on the side of either the client or agent has an MTU +smaller than 1378, any direct connections form may have degraded quality or +performance, as IP packets are fragmented. `coder ping` will indicate if this is +the case by inspecting network interfaces on both the client and the workspace +agent. + +If another interface cannot be used, and the MTU cannot be changed, you may need +to disable direct connections, and relay all traffic via DERP instead, which +will not be affected by the low MTU. + +## Throughput + +The `coder speedtest ` command measures the throughput between the +client and the workspace agent. + +```console +$ coder speedtest workspace +29ms via coder +Starting a 5s download test... +INTERVAL TRANSFER BANDWIDTH +0.00-1.00 sec 630.7840 MBits 630.7404 Mbits/sec +1.00-2.00 sec 913.9200 MBits 913.8106 Mbits/sec +2.00-3.00 sec 943.1040 MBits 943.0399 Mbits/sec +3.00-4.00 sec 933.3760 MBits 933.2143 Mbits/sec +4.00-5.00 sec 848.8960 MBits 848.7019 Mbits/sec +5.00-5.02 sec 13.5680 MBits 828.8189 Mbits/sec +---------------------------------------------------- +0.00-5.02 sec 4283.6480 MBits 853.8217 Mbits/sec +```