Skip to content

fix: fix tailnet netcheck issues #8802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ type Client interface {

type Agent interface {
HTTPDebug() http.Handler
// TailnetConn may be nil.
TailnetConn() *tailnet.Conn
io.Closer
}

Expand Down Expand Up @@ -200,6 +202,10 @@ type agent struct {
metrics *agentMetrics
}

func (a *agent) TailnetConn() *tailnet.Conn {
return a.network
}

func (a *agent) init(ctx context.Context) {
sshSrv, err := agentssh.NewServer(ctx, a.logger.Named("ssh-server"), a.prometheusRegistry, a.filesystem, a.sshMaxTimeout, "")
if err != nil {
Expand Down
20 changes: 16 additions & 4 deletions coderd/coderdtest/coderdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,21 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
accessURL = serverURL
}

stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, nettype.Std{})
stunAddr.IP = net.ParseIP("127.0.0.1")
t.Cleanup(stunCleanup)
// If the STUNAddresses setting is empty or the default, start a STUN
// server. Otherwise, use the value as is.
var (
stunAddresses []string
dvStunAddresses = options.DeploymentValues.DERP.Server.STUNAddresses.Value()
)
if len(dvStunAddresses) == 0 || (len(dvStunAddresses) == 1 && dvStunAddresses[0] == "stun.l.google.com:19302") {
stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, nettype.Std{})
stunAddr.IP = net.ParseIP("127.0.0.1")
t.Cleanup(stunCleanup)
stunAddresses = []string{stunAddr.String()}
options.DeploymentValues.DERP.Server.STUNAddresses = stunAddresses
} else if dvStunAddresses[0] != "disable" {
stunAddresses = options.DeploymentValues.DERP.Server.STUNAddresses.Value()
}

derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(slogtest.Make(t, nil).Named("derp").Leveled(slog.LevelDebug)))
derpServer.SetMeshKey("test-key")
Expand Down Expand Up @@ -346,7 +358,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
if !options.DeploymentValues.DERP.Server.Enable.Value() {
region = nil
}
derpMap, err := tailnet.NewDERPMap(ctx, region, []string{stunAddr.String()}, "", "", options.DeploymentValues.DERP.Config.BlockDirect.Value())
derpMap, err := tailnet.NewDERPMap(ctx, region, stunAddresses, "", "", options.DeploymentValues.DERP.Config.BlockDirect.Value())
require.NoError(t, err)

return func(h http.Handler) {
Expand Down
2 changes: 1 addition & 1 deletion coderd/healthcheck/derp.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func (r *DERPNodeReport) derpURL() *url.URL {
if r.Node.HostName == "" {
derpURL.Host = r.Node.IPv4
}
if r.Node.DERPPort != 0 {
if r.Node.DERPPort != 0 && !(r.Node.DERPPort == 443 && derpURL.Scheme == "https") && !(r.Node.DERPPort == 80 && derpURL.Scheme == "http") {
derpURL.Host = fmt.Sprintf("%s:%d", derpURL.Host, r.Node.DERPPort)
}

Expand Down
9 changes: 8 additions & 1 deletion coderd/workspaceagents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,14 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) {
currentDerpMap.Store(newDerpMap)

// Wait for the agent's DERP map to be updated.
// TODO: this
require.Eventually(t, func() bool {
conn := agentCloser.TailnetConn()
if conn == nil {
return false
}
regionIDs := conn.DERPMap().RegionIDs()
return len(regionIDs) == 1 && regionIDs[0] == 2
}, testutil.WaitLong, testutil.IntervalFast)

// Wait for the DERP map to be updated on the existing client.
require.Eventually(t, func() bool {
Expand Down
45 changes: 34 additions & 11 deletions enterprise/coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ func (api *API) updateEntitlements(ctx context.Context) error {

if initial, changed, enabled := featureChanged(codersdk.FeatureWorkspaceProxy); shouldUpdate(initial, changed, enabled) {
if enabled {
fn := derpMapper(api.Logger, api.ProxyHealth)
fn := derpMapper(api.Logger, api.DeploymentValues, api.ProxyHealth)
api.AGPL.DERPMapper.Store(&fn)
} else {
api.AGPL.DERPMapper.Store(nil)
Expand Down Expand Up @@ -643,7 +643,7 @@ var (
lastDerpConflictLog time.Time
)

func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap {
func derpMapper(logger slog.Logger, cfg *codersdk.DeploymentValues, proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap {
return func(derpMap *tailcfg.DERPMap) *tailcfg.DERPMap {
derpMap = derpMap.Clone()

Expand Down Expand Up @@ -718,7 +718,7 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*
lastDerpConflictMutex.Unlock()
if shouldLog {
logger.Warn(context.Background(),
"proxy region ID or code conflict, ignoring workspace proxy for DERP map. Please change the flags on the affected proxy to use a different region ID and code",
"proxy region ID or code conflict, ignoring workspace proxy for DERP map",
slog.F("proxy_id", status.Proxy.ID),
slog.F("proxy_name", status.Proxy.Name),
slog.F("proxy_display_name", status.Proxy.DisplayName),
Expand All @@ -733,20 +733,43 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*
}
}

var stunNodes []*tailcfg.DERPNode
if !cfg.DERP.Config.BlockDirect.Value() {
stunNodes, err = agpltailnet.STUNNodes(regionID, cfg.DERP.Server.STUNAddresses)
if err != nil {
// Log a warning if we haven't logged one in the last
// minute.
lastDerpConflictMutex.Lock()
shouldLog := lastDerpConflictLog.IsZero() || time.Since(lastDerpConflictLog) > time.Minute
if shouldLog {
lastDerpConflictLog = time.Now()
}
lastDerpConflictMutex.Unlock()
if shouldLog {
logger.Error(context.Background(), "failed to calculate STUN nodes", slog.Error(err))
}

// No continue because we can keep going.
stunNodes = []*tailcfg.DERPNode{}
}
}

nodes := append(stunNodes, &tailcfg.DERPNode{
Name: fmt.Sprintf("%da", regionID),
RegionID: regionID,
HostName: u.Hostname(),
DERPPort: portInt,
STUNPort: -1,
ForceHTTP: u.Scheme == "http",
})

derpMap.Regions[regionID] = &tailcfg.DERPRegion{
// EmbeddedRelay ONLY applies to the primary.
EmbeddedRelay: false,
RegionID: regionID,
RegionCode: regionCode,
RegionName: status.Proxy.Name,
Nodes: []*tailcfg.DERPNode{{
Name: fmt.Sprintf("%da", regionID),
RegionID: regionID,
HostName: u.Hostname(),
DERPPort: portInt,
STUNPort: -1,
ForceHTTP: u.Scheme == "http",
}},
Nodes: nodes,
}
}

Expand Down
151 changes: 139 additions & 12 deletions enterprise/wsproxy/wsproxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/coder/coder/enterprise/coderd/coderdenttest"
"github.com/coder/coder/enterprise/coderd/license"
"github.com/coder/coder/provisioner/echo"
"github.com/coder/coder/tailnet"
"github.com/coder/coder/testutil"
)

Expand Down Expand Up @@ -184,24 +185,24 @@ resourceLoop:
require.Equal(t, "coder_best-proxy", proxy1Region.RegionCode)
require.Equal(t, 10001, proxy1Region.RegionID)
require.False(t, proxy1Region.EmbeddedRelay)
require.Len(t, proxy1Region.Nodes, 1)
require.Equal(t, "10001a", proxy1Region.Nodes[0].Name)
require.Equal(t, 10001, proxy1Region.Nodes[0].RegionID)
require.Equal(t, proxyAPI1.Options.AccessURL.Hostname(), proxy1Region.Nodes[0].HostName)
require.Equal(t, proxyAPI1.Options.AccessURL.Port(), fmt.Sprint(proxy1Region.Nodes[0].DERPPort))
require.Equal(t, proxyAPI1.Options.AccessURL.Scheme == "http", proxy1Region.Nodes[0].ForceHTTP)
require.Len(t, proxy1Region.Nodes, 2) // proxy + stun
require.Equal(t, "10001a", proxy1Region.Nodes[1].Name)
require.Equal(t, 10001, proxy1Region.Nodes[1].RegionID)
require.Equal(t, proxyAPI1.Options.AccessURL.Hostname(), proxy1Region.Nodes[1].HostName)
require.Equal(t, proxyAPI1.Options.AccessURL.Port(), fmt.Sprint(proxy1Region.Nodes[1].DERPPort))
require.Equal(t, proxyAPI1.Options.AccessURL.Scheme == "http", proxy1Region.Nodes[1].ForceHTTP)

// The second proxy region:
require.Equal(t, "worst-proxy", proxy2Region.RegionName)
require.Equal(t, "coder_worst-proxy", proxy2Region.RegionCode)
require.Equal(t, 10002, proxy2Region.RegionID)
require.False(t, proxy2Region.EmbeddedRelay)
require.Len(t, proxy2Region.Nodes, 1)
require.Equal(t, "10002a", proxy2Region.Nodes[0].Name)
require.Equal(t, 10002, proxy2Region.Nodes[0].RegionID)
require.Equal(t, proxyAPI2.Options.AccessURL.Hostname(), proxy2Region.Nodes[0].HostName)
require.Equal(t, proxyAPI2.Options.AccessURL.Port(), fmt.Sprint(proxy2Region.Nodes[0].DERPPort))
require.Equal(t, proxyAPI2.Options.AccessURL.Scheme == "http", proxy2Region.Nodes[0].ForceHTTP)
require.Len(t, proxy2Region.Nodes, 2) // proxy + stun
require.Equal(t, "10002a", proxy2Region.Nodes[1].Name)
require.Equal(t, 10002, proxy2Region.Nodes[1].RegionID)
require.Equal(t, proxyAPI2.Options.AccessURL.Hostname(), proxy2Region.Nodes[1].HostName)
require.Equal(t, proxyAPI2.Options.AccessURL.Port(), fmt.Sprint(proxy2Region.Nodes[1].DERPPort))
require.Equal(t, proxyAPI2.Options.AccessURL.Scheme == "http", proxy2Region.Nodes[1].ForceHTTP)
})

t.Run("ConnectDERP", func(t *testing.T) {
Expand Down Expand Up @@ -239,6 +240,132 @@ resourceLoop:
})
}

func TestDERPMapStunNodes(t *testing.T) {
t.Parallel()

deploymentValues := coderdtest.DeploymentValues(t)
deploymentValues.Experiments = []string{
string(codersdk.ExperimentMoons),
"*",
}
stunAddresses := []string{
"stun.l.google.com:19302",
"stun1.l.google.com:19302",
"stun2.l.google.com:19302",
"stun3.l.google.com:19302",
"stun4.l.google.com:19302",
}
deploymentValues.DERP.Server.STUNAddresses = stunAddresses

client, closer, api, _ := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
Options: &coderdtest.Options{
DeploymentValues: deploymentValues,
AppHostname: "*.primary.test.coder.com",
IncludeProvisionerDaemon: true,
RealIPConfig: &httpmw.RealIPConfig{
TrustedOrigins: []*net.IPNet{{
IP: net.ParseIP("127.0.0.1"),
Mask: net.CIDRMask(8, 32),
}},
TrustedHeaders: []string{
"CF-Connecting-IP",
},
},
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureWorkspaceProxy: 1,
},
},
})
t.Cleanup(func() {
_ = closer.Close()
})

// Create a running external proxy.
_ = coderdenttest.NewWorkspaceProxy(t, api, client, &coderdenttest.ProxyOptions{
Name: "cool-proxy",
})

// Wait for both running proxies to become healthy.
require.Eventually(t, func() bool {
healthCtx := testutil.Context(t, testutil.WaitLong)
err := api.ProxyHealth.ForceUpdate(healthCtx)
if !assert.NoError(t, err) {
return false
}

regions, err := client.Regions(healthCtx)
if !assert.NoError(t, err) {
return false
}
if !assert.Len(t, regions, 2) {
return false
}

// All regions should be healthy.
for _, r := range regions {
if !r.Healthy {
return false
}
}
return true
}, testutil.WaitLong, testutil.IntervalMedium)

// Get the DERP map and ensure that the built-in region and the proxy region
// both have the STUN nodes.
ctx := testutil.Context(t, testutil.WaitLong)
connInfo, err := client.WorkspaceAgentConnectionInfoGeneric(ctx)
require.NoError(t, err)

// There should be two DERP servers in the map: the primary and the
// proxy.
require.NotNil(t, connInfo.DERPMap)
require.Len(t, connInfo.DERPMap.Regions, 2)

var (
primaryRegion *tailcfg.DERPRegion
proxyRegion *tailcfg.DERPRegion
)
for _, r := range connInfo.DERPMap.Regions {
if r.EmbeddedRelay {
primaryRegion = r
continue
}
if r.RegionName == "cool-proxy" {
proxyRegion = r
continue
}

t.Fatalf("unexpected region: %+v", r)
}

// The primary region:
require.Equal(t, "Coder Embedded Relay", primaryRegion.RegionName)
require.Equal(t, "coder", primaryRegion.RegionCode)
require.Equal(t, 999, primaryRegion.RegionID)
require.True(t, primaryRegion.EmbeddedRelay)
require.Len(t, primaryRegion.Nodes, len(stunAddresses)+1)

// The proxy region:
require.Equal(t, "cool-proxy", proxyRegion.RegionName)
require.Equal(t, "coder_cool-proxy", proxyRegion.RegionCode)
require.Equal(t, 10001, proxyRegion.RegionID)
require.False(t, proxyRegion.EmbeddedRelay)
require.Len(t, proxyRegion.Nodes, len(stunAddresses)+1)

for _, region := range []*tailcfg.DERPRegion{primaryRegion, proxyRegion} {
stunNodes, err := tailnet.STUNNodes(region.RegionID, stunAddresses)
require.NoError(t, err)
require.Len(t, stunNodes, len(stunAddresses))

require.Equal(t, stunNodes, region.Nodes[:len(stunNodes)])

// The last node should be the Coder server.
require.NotZero(t, region.Nodes[len(region.Nodes)-1].DERPPort)
}
}

func TestDERPEndToEnd(t *testing.T) {
t.Parallel()

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ replace github.com/dlclark/regexp2 => github.com/dlclark/regexp2 v1.7.0

// There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here:
// https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main
replace tailscale.com => github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f
replace tailscale.com => github.com/coder/tailscale v0.0.0-20230731105344-d1b7f8087191

// Use our tempfork of gvisor that includes a fix for TCP connection stalls:
// https://github.com/coder/coder/issues/7388
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ github.com/coder/retry v1.4.0 h1:g0fojHFxcdgM3sBULqgjFDxw1UIvaCqk4ngUDu0EWag=
github.com/coder/retry v1.4.0/go.mod h1:blHMk9vs6LkoRT9ZHyuZo360cufXEhrxqvEzeMtRGoY=
github.com/coder/ssh v0.0.0-20230621095435-9a7e23486f1c h1:TI7TzdFI0UvQmwgyQhtI1HeyYNRxAQpr8Tw/rjT8VSA=
github.com/coder/ssh v0.0.0-20230621095435-9a7e23486f1c/go.mod h1:aGQbuCLyhRLMzZF067xc84Lh7JDs1FKwCmF1Crl9dxQ=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f h1:F0Xr1d8h8dAHn7tab1HXuzYFkcjmCydnEfdMbkOhlVk=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/tailscale v0.0.0-20230731105344-d1b7f8087191 h1:FweUPlasdC67APP0jS8LTUdVlWcl49cX4NqTkr0ZL/4=
github.com/coder/tailscale v0.0.0-20230731105344-d1b7f8087191/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/terraform-provider-coder v0.11.1 h1:1sXcHfQrX8XhmLbtKxBED2lZ5jk3/ezBtaw6uVhpJZ4=
github.com/coder/terraform-provider-coder v0.11.1/go.mod h1:UIfU3bYNeSzJJvHyJ30tEKjD6Z9utloI+HUM/7n94CY=
github.com/coder/wgtunnel v0.1.5 h1:WP3sCj/3iJ34eKvpMQEp1oJHvm24RYh0NHbj1kfUKfs=
Expand Down
5 changes: 4 additions & 1 deletion scripts/coder-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ source "${SCRIPT_DIR}/lib.sh"
GOOS="$(go env GOOS)"
GOARCH="$(go env GOARCH)"
BINARY_TYPE=coder-slim
if [[ $1 == server ]]; then
if [[ ${1:-} == server ]]; then
BINARY_TYPE=coder
fi
if [[ ${1:-} == wsproxy ]] && [[ ${2:-} == server ]]; then
BINARY_TYPE=coder
fi
RELATIVE_BINARY_PATH="build/${BINARY_TYPE}_${GOOS}_${GOARCH}"
Expand Down
4 changes: 2 additions & 2 deletions scripts/develop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ fatal() {
trap 'fatal "Script encountered an error"' ERR

cdroot
start_cmd API "" "${CODER_DEV_SHIM}" server --http-address 0.0.0.0:3000 --swagger-enable --access-url "http://127.0.0.1:3000" --dangerous-allow-cors-requests=true --experiments "*" "$@"
start_cmd API "" "${CODER_DEV_SHIM}" server --http-address 0.0.0.0:3000 --swagger-enable --access-url "http://127.0.0.1:3000" --dangerous-allow-cors-requests=true --experiments "*,moons" "$@"

echo '== Waiting for Coder to become ready'
# Start the timeout in the background so interrupting this script
Expand Down Expand Up @@ -181,7 +181,7 @@ fatal() {
log "Using external workspace proxy"
(
# Attempt to delete the proxy first, in case it already exists.
"${CODER_DEV_SHIM}" wsproxy delete local-proxy || true
"${CODER_DEV_SHIM}" wsproxy delete local-proxy --yes || true
# Create the proxy
proxy_session_token=$("${CODER_DEV_SHIM}" wsproxy create --name=local-proxy --display-name="Local Proxy" --icon="/emojis/1f4bb.png" --only-token)
# Start the proxy
Expand Down
Loading