Skip to content

fix: fix tailnet netcheck issues #8802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixup! fix: fix tailnet netcheck issues
  • Loading branch information
deansheather committed Jul 31, 2023
commit 7c9468f018b5b756375309460f322d9a60102d46
20 changes: 16 additions & 4 deletions coderd/coderdtest/coderdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,21 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
accessURL = serverURL
}

stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, nettype.Std{})
stunAddr.IP = net.ParseIP("127.0.0.1")
t.Cleanup(stunCleanup)
// If the STUNAddresses setting is empty or the default, start a STUN
// server. Otherwise, use the value as is.
var (
stunAddresses []string
dvStunAddresses = options.DeploymentValues.DERP.Server.STUNAddresses.Value()
)
if len(dvStunAddresses) == 0 || (len(dvStunAddresses) == 1 && dvStunAddresses[0] == "stun.l.google.com:19302") {
stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, nettype.Std{})
stunAddr.IP = net.ParseIP("127.0.0.1")
t.Cleanup(stunCleanup)
stunAddresses = []string{stunAddr.String()}
options.DeploymentValues.DERP.Server.STUNAddresses = stunAddresses
} else if dvStunAddresses[0] != "disable" {
stunAddresses = options.DeploymentValues.DERP.Server.STUNAddresses.Value()
}

derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(slogtest.Make(t, nil).Named("derp").Leveled(slog.LevelDebug)))
derpServer.SetMeshKey("test-key")
Expand Down Expand Up @@ -346,7 +358,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
if !options.DeploymentValues.DERP.Server.Enable.Value() {
region = nil
}
derpMap, err := tailnet.NewDERPMap(ctx, region, []string{stunAddr.String()}, "", "", options.DeploymentValues.DERP.Config.BlockDirect.Value())
derpMap, err := tailnet.NewDERPMap(ctx, region, stunAddresses, "", "", options.DeploymentValues.DERP.Config.BlockDirect.Value())
require.NoError(t, err)

return func(h http.Handler) {
Expand Down
2 changes: 1 addition & 1 deletion coderd/healthcheck/derp.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
mu.Unlock()
}
nc := &netcheck.Client{
PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil, nil),
PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil),
Logf: tslogger.WithPrefix(ncLogf, "netcheck: "),
}
ncReport, netcheckErr := nc.GetReport(ctx, opts.DERPMap)
Expand Down
127 changes: 127 additions & 0 deletions enterprise/wsproxy/wsproxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/coder/coder/enterprise/coderd/coderdenttest"
"github.com/coder/coder/enterprise/coderd/license"
"github.com/coder/coder/provisioner/echo"
"github.com/coder/coder/tailnet"
"github.com/coder/coder/testutil"
)

Expand Down Expand Up @@ -239,6 +240,132 @@ resourceLoop:
})
}

func TestDERPMapStunNodes(t *testing.T) {
t.Parallel()

deploymentValues := coderdtest.DeploymentValues(t)
deploymentValues.Experiments = []string{
string(codersdk.ExperimentMoons),
"*",
}
stunAddresses := []string{
"stun.l.google.com:19302",
"stun1.l.google.com:19302",
"stun2.l.google.com:19302",
"stun3.l.google.com:19302",
"stun4.l.google.com:19302",
}
deploymentValues.DERP.Server.STUNAddresses = stunAddresses

client, closer, api, _ := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
Options: &coderdtest.Options{
DeploymentValues: deploymentValues,
AppHostname: "*.primary.test.coder.com",
IncludeProvisionerDaemon: true,
RealIPConfig: &httpmw.RealIPConfig{
TrustedOrigins: []*net.IPNet{{
IP: net.ParseIP("127.0.0.1"),
Mask: net.CIDRMask(8, 32),
}},
TrustedHeaders: []string{
"CF-Connecting-IP",
},
},
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureWorkspaceProxy: 1,
},
},
})
t.Cleanup(func() {
_ = closer.Close()
})

// Create a running external proxy.
_ = coderdenttest.NewWorkspaceProxy(t, api, client, &coderdenttest.ProxyOptions{
Name: "cool-proxy",
})

// Wait for both running proxies to become healthy.
require.Eventually(t, func() bool {
healthCtx := testutil.Context(t, testutil.WaitLong)
err := api.ProxyHealth.ForceUpdate(healthCtx)
if !assert.NoError(t, err) {
return false
}

regions, err := client.Regions(healthCtx)
if !assert.NoError(t, err) {
return false
}
if !assert.Len(t, regions, 2) {
return false
}

// All regions should be healthy.
for _, r := range regions {
if !r.Healthy {
return false
}
}
return true
}, testutil.WaitLong, testutil.IntervalMedium)

// Get the DERP map and ensure that the built-in region and the proxy region
// both have the STUN nodes.
ctx := testutil.Context(t, testutil.WaitLong)
connInfo, err := client.WorkspaceAgentConnectionInfoGeneric(ctx)
require.NoError(t, err)

// There should be two DERP servers in the map: the primary and the
// proxy.
require.NotNil(t, connInfo.DERPMap)
require.Len(t, connInfo.DERPMap.Regions, 2)

var (
primaryRegion *tailcfg.DERPRegion
proxyRegion *tailcfg.DERPRegion
)
for _, r := range connInfo.DERPMap.Regions {
if r.EmbeddedRelay {
primaryRegion = r
continue
}
if r.RegionName == "cool-proxy" {
proxyRegion = r
continue
}

t.Fatalf("unexpected region: %+v", r)
}

// The primary region:
require.Equal(t, "Coder Embedded Relay", primaryRegion.RegionName)
require.Equal(t, "coder", primaryRegion.RegionCode)
require.Equal(t, 999, primaryRegion.RegionID)
require.True(t, primaryRegion.EmbeddedRelay)
require.Len(t, primaryRegion.Nodes, len(stunAddresses)+1)

// The proxy region:
require.Equal(t, "cool-proxy", proxyRegion.RegionName)
require.Equal(t, "coder_cool-proxy", proxyRegion.RegionCode)
require.Equal(t, 10001, proxyRegion.RegionID)
require.False(t, proxyRegion.EmbeddedRelay)
require.Len(t, proxyRegion.Nodes, len(stunAddresses)+1)

for _, region := range []*tailcfg.DERPRegion{primaryRegion, proxyRegion} {
stunNodes, err := tailnet.STUNNodes(region.RegionID, stunAddresses)
require.NoError(t, err)
require.Len(t, stunNodes, len(stunAddresses))

require.Equal(t, stunNodes, region.Nodes[:len(stunNodes)])

// The last node should be the Coder server.
require.NotZero(t, region.Nodes[len(region.Nodes)-1].DERPPort)
}
}

func TestDERPEndToEnd(t *testing.T) {
t.Parallel()

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ replace github.com/dlclark/regexp2 => github.com/dlclark/regexp2 v1.7.0

// There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here:
// https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main
replace tailscale.com => github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f
replace tailscale.com => github.com/coder/tailscale v0.0.0-20230731105344-d1b7f8087191

// Use our tempfork of gvisor that includes a fix for TCP connection stalls:
// https://github.com/coder/coder/issues/7388
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ github.com/coder/retry v1.4.0 h1:g0fojHFxcdgM3sBULqgjFDxw1UIvaCqk4ngUDu0EWag=
github.com/coder/retry v1.4.0/go.mod h1:blHMk9vs6LkoRT9ZHyuZo360cufXEhrxqvEzeMtRGoY=
github.com/coder/ssh v0.0.0-20230621095435-9a7e23486f1c h1:TI7TzdFI0UvQmwgyQhtI1HeyYNRxAQpr8Tw/rjT8VSA=
github.com/coder/ssh v0.0.0-20230621095435-9a7e23486f1c/go.mod h1:aGQbuCLyhRLMzZF067xc84Lh7JDs1FKwCmF1Crl9dxQ=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f h1:F0Xr1d8h8dAHn7tab1HXuzYFkcjmCydnEfdMbkOhlVk=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/tailscale v0.0.0-20230731105344-d1b7f8087191 h1:FweUPlasdC67APP0jS8LTUdVlWcl49cX4NqTkr0ZL/4=
github.com/coder/tailscale v0.0.0-20230731105344-d1b7f8087191/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/terraform-provider-coder v0.11.1 h1:1sXcHfQrX8XhmLbtKxBED2lZ5jk3/ezBtaw6uVhpJZ4=
github.com/coder/terraform-provider-coder v0.11.1/go.mod h1:UIfU3bYNeSzJJvHyJ30tEKjD6Z9utloI+HUM/7n94CY=
github.com/coder/wgtunnel v0.1.5 h1:WP3sCj/3iJ34eKvpMQEp1oJHvm24RYh0NHbj1kfUKfs=
Expand Down
53 changes: 26 additions & 27 deletions tailnet/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ func NewConn(options *Options) (conn *Conn, err error) {
server.sendNode()
})

netStack.GetTCPHandlerForFlow = server.getTCPHandlerForFlow
netStack.ForwardTCPIn = server.forwardTCP
netStack.ForwardTCPSockOpts = server.forwardTCPSockOpts

err = netStack.Start(nil)
if err != nil {
Expand Down Expand Up @@ -797,39 +798,37 @@ func (c *Conn) DialContextUDP(ctx context.Context, ipp netip.AddrPort) (*gonet.U
return c.netStack.DialContextUDP(ctx, ipp)
}

func (c *Conn) getTCPHandlerForFlow(_, dst netip.AddrPort) (func(net.Conn), []tcpip.SettableSocketOption, bool) {
var (
port = dst.Port()
opts = []tcpip.SettableSocketOption{}
)
func (c *Conn) forwardTCP(conn net.Conn, port uint16) {
c.mutex.Lock()
ln, ok := c.listeners[listenKey{"tcp", "", fmt.Sprint(port)}]
c.mutex.Unlock()
if !ok {
c.forwardTCPToLocal(conn, port)
return
}

t := time.NewTimer(time.Second)
defer t.Stop()
select {
case ln.conn <- conn:
return
case <-ln.closed:
case <-c.closed:
case <-t.C:
}
_ = conn.Close()
}

func (*Conn) forwardTCPSockOpts(port uint16) []tcpip.SettableSocketOption {
opts := []tcpip.SettableSocketOption{}

// See: https://github.com/tailscale/tailscale/blob/c7cea825aea39a00aca71ea02bab7266afc03e7c/wgengine/netstack/netstack.go#L888
if port == WorkspaceAgentSSHPort || port == 22 {
opt := tcpip.KeepaliveIdleOption(72*time.Hour + time.Minute) // Default ssh-max-timeout is 72h, so let's add some extra time.
opts = append(opts, &opt)
}

c.mutex.Lock()
ln, ok := c.listeners[listenKey{"tcp", "", fmt.Sprint(port)}]
c.mutex.Unlock()
if !ok {
return func(conn net.Conn) {
c.forwardTCPToLocal(conn, port)
}, opts, true
}

return func(conn net.Conn) {
t := time.NewTimer(time.Second)
defer t.Stop()
select {
case ln.conn <- conn:
return
case <-ln.closed:
case <-c.closed:
case <-t.C:
}
_ = conn.Close()
}, opts, true
return opts
}

func (c *Conn) forwardTCPToLocal(conn net.Conn, port uint16) {
Expand Down