Skip to content

Commit c575292

Browse files
authored
fix: fix tailnet netcheck issues (#8802)
1 parent 929f2d5 commit c575292

File tree

13 files changed

+260
-62
lines changed

13 files changed

+260
-62
lines changed

agent/agent.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ type Client interface {
8585

8686
type Agent interface {
8787
HTTPDebug() http.Handler
88+
// TailnetConn may be nil.
89+
TailnetConn() *tailnet.Conn
8890
io.Closer
8991
}
9092

@@ -200,6 +202,10 @@ type agent struct {
200202
metrics *agentMetrics
201203
}
202204

205+
func (a *agent) TailnetConn() *tailnet.Conn {
206+
return a.network
207+
}
208+
203209
func (a *agent) init(ctx context.Context) {
204210
sshSrv, err := agentssh.NewServer(ctx, a.logger.Named("ssh-server"), a.prometheusRegistry, a.filesystem, a.sshMaxTimeout, "")
205211
if err != nil {

agent/agent_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,6 +1818,15 @@ func TestAgent_UpdatedDERP(t *testing.T) {
18181818
})
18191819
require.NoError(t, err)
18201820

1821+
require.Eventually(t, func() bool {
1822+
conn := closer.TailnetConn()
1823+
if conn == nil {
1824+
return false
1825+
}
1826+
regionIDs := conn.DERPMap().RegionIDs()
1827+
return len(regionIDs) == 1 && regionIDs[0] == 2 && conn.Node().PreferredDERP == 2
1828+
}, testutil.WaitLong, testutil.IntervalFast)
1829+
18211830
// Connect from a second client and make sure it uses the new DERP map.
18221831
conn2 := newClientConn(newDerpMap)
18231832
require.Equal(t, []int{2}, conn2.DERPMap().RegionIDs())

coderd/coderdtest/coderdtest.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,21 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
303303
accessURL = serverURL
304304
}
305305

306-
stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, nettype.Std{})
307-
stunAddr.IP = net.ParseIP("127.0.0.1")
308-
t.Cleanup(stunCleanup)
306+
// If the STUNAddresses setting is empty or the default, start a STUN
307+
// server. Otherwise, use the value as is.
308+
var (
309+
stunAddresses []string
310+
dvStunAddresses = options.DeploymentValues.DERP.Server.STUNAddresses.Value()
311+
)
312+
if len(dvStunAddresses) == 0 || (len(dvStunAddresses) == 1 && dvStunAddresses[0] == "stun.l.google.com:19302") {
313+
stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, nettype.Std{})
314+
stunAddr.IP = net.ParseIP("127.0.0.1")
315+
t.Cleanup(stunCleanup)
316+
stunAddresses = []string{stunAddr.String()}
317+
options.DeploymentValues.DERP.Server.STUNAddresses = stunAddresses
318+
} else if dvStunAddresses[0] != "disable" {
319+
stunAddresses = options.DeploymentValues.DERP.Server.STUNAddresses.Value()
320+
}
309321

310322
derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(slogtest.Make(t, nil).Named("derp").Leveled(slog.LevelDebug)))
311323
derpServer.SetMeshKey("test-key")
@@ -346,7 +358,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
346358
if !options.DeploymentValues.DERP.Server.Enable.Value() {
347359
region = nil
348360
}
349-
derpMap, err := tailnet.NewDERPMap(ctx, region, []string{stunAddr.String()}, "", "", options.DeploymentValues.DERP.Config.BlockDirect.Value())
361+
derpMap, err := tailnet.NewDERPMap(ctx, region, stunAddresses, "", "", options.DeploymentValues.DERP.Config.BlockDirect.Value())
350362
require.NoError(t, err)
351363

352364
return func(h http.Handler) {

coderd/healthcheck/derp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ func (r *DERPNodeReport) derpURL() *url.URL {
172172
if r.Node.HostName == "" {
173173
derpURL.Host = r.Node.IPv4
174174
}
175-
if r.Node.DERPPort != 0 {
175+
if r.Node.DERPPort != 0 && !(r.Node.DERPPort == 443 && derpURL.Scheme == "https") && !(r.Node.DERPPort == 80 && derpURL.Scheme == "http") {
176176
derpURL.Host = fmt.Sprintf("%s:%d", derpURL.Host, r.Node.DERPPort)
177177
}
178178

coderd/workspaceagents.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -866,15 +866,14 @@ func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) {
866866
lastDERPMap = derpMap
867867
}
868868

869+
ticker.Reset(api.Options.DERPMapUpdateFrequency)
869870
select {
870871
case <-ctx.Done():
871872
return
872873
case <-api.ctx.Done():
873874
return
874875
case <-ticker.C:
875876
}
876-
877-
ticker.Reset(api.Options.DERPMapUpdateFrequency)
878877
}
879878
}
880879

coderd/workspaceagents_test.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,11 +1270,9 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) {
12701270
defer closer.Close()
12711271
user := coderdtest.CreateFirstUser(t, client)
12721272

1273-
originalDerpMap := api.DERPMap()
1274-
require.NotNil(t, originalDerpMap)
1275-
12761273
// Change the DERP mapper to our custom one.
12771274
var currentDerpMap atomic.Pointer[tailcfg.DERPMap]
1275+
originalDerpMap, _ := tailnettest.RunDERPAndSTUN(t)
12781276
currentDerpMap.Store(originalDerpMap)
12791277
derpMapFn := func(_ *tailcfg.DERPMap) *tailcfg.DERPMap {
12801278
return currentDerpMap.Load().Clone()
@@ -1304,10 +1302,8 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) {
13041302
resources := coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
13051303
agentID := resources[0].Agents[0].ID
13061304

1307-
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
1308-
defer cancel()
1309-
13101305
// Connect from a client.
1306+
ctx := testutil.Context(t, testutil.WaitLong)
13111307
conn1, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{
13121308
Logger: logger.Named("client1"),
13131309
})
@@ -1328,7 +1324,14 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) {
13281324
currentDerpMap.Store(newDerpMap)
13291325

13301326
// Wait for the agent's DERP map to be updated.
1331-
// TODO: this
1327+
require.Eventually(t, func() bool {
1328+
conn := agentCloser.TailnetConn()
1329+
if conn == nil {
1330+
return false
1331+
}
1332+
regionIDs := conn.DERPMap().RegionIDs()
1333+
return len(regionIDs) == 1 && regionIDs[0] == 2 && conn.Node().PreferredDERP == 2
1334+
}, testutil.WaitLong, testutil.IntervalFast)
13321335

13331336
// Wait for the DERP map to be updated on the existing client.
13341337
require.Eventually(t, func() bool {

enterprise/coderd/coderd.go

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ func (api *API) updateEntitlements(ctx context.Context) error {
578578

579579
if initial, changed, enabled := featureChanged(codersdk.FeatureWorkspaceProxy); shouldUpdate(initial, changed, enabled) {
580580
if enabled {
581-
fn := derpMapper(api.Logger, api.ProxyHealth)
581+
fn := derpMapper(api.Logger, api.DeploymentValues, api.ProxyHealth)
582582
api.AGPL.DERPMapper.Store(&fn)
583583
} else {
584584
api.AGPL.DERPMapper.Store(nil)
@@ -643,7 +643,7 @@ var (
643643
lastDerpConflictLog time.Time
644644
)
645645

646-
func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap {
646+
func derpMapper(logger slog.Logger, cfg *codersdk.DeploymentValues, proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap {
647647
return func(derpMap *tailcfg.DERPMap) *tailcfg.DERPMap {
648648
derpMap = derpMap.Clone()
649649

@@ -718,7 +718,7 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*
718718
lastDerpConflictMutex.Unlock()
719719
if shouldLog {
720720
logger.Warn(context.Background(),
721-
"proxy region ID or code conflict, ignoring workspace proxy for DERP map. Please change the flags on the affected proxy to use a different region ID and code",
721+
"proxy region ID or code conflict, ignoring workspace proxy for DERP map",
722722
slog.F("proxy_id", status.Proxy.ID),
723723
slog.F("proxy_name", status.Proxy.Name),
724724
slog.F("proxy_display_name", status.Proxy.DisplayName),
@@ -733,20 +733,43 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*
733733
}
734734
}
735735

736+
var stunNodes []*tailcfg.DERPNode
737+
if !cfg.DERP.Config.BlockDirect.Value() {
738+
stunNodes, err = agpltailnet.STUNNodes(regionID, cfg.DERP.Server.STUNAddresses)
739+
if err != nil {
740+
// Log a warning if we haven't logged one in the last
741+
// minute.
742+
lastDerpConflictMutex.Lock()
743+
shouldLog := lastDerpConflictLog.IsZero() || time.Since(lastDerpConflictLog) > time.Minute
744+
if shouldLog {
745+
lastDerpConflictLog = time.Now()
746+
}
747+
lastDerpConflictMutex.Unlock()
748+
if shouldLog {
749+
logger.Error(context.Background(), "failed to calculate STUN nodes", slog.Error(err))
750+
}
751+
752+
// No continue because we can keep going.
753+
stunNodes = []*tailcfg.DERPNode{}
754+
}
755+
}
756+
757+
nodes := append(stunNodes, &tailcfg.DERPNode{
758+
Name: fmt.Sprintf("%da", regionID),
759+
RegionID: regionID,
760+
HostName: u.Hostname(),
761+
DERPPort: portInt,
762+
STUNPort: -1,
763+
ForceHTTP: u.Scheme == "http",
764+
})
765+
736766
derpMap.Regions[regionID] = &tailcfg.DERPRegion{
737767
// EmbeddedRelay ONLY applies to the primary.
738768
EmbeddedRelay: false,
739769
RegionID: regionID,
740770
RegionCode: regionCode,
741771
RegionName: status.Proxy.Name,
742-
Nodes: []*tailcfg.DERPNode{{
743-
Name: fmt.Sprintf("%da", regionID),
744-
RegionID: regionID,
745-
HostName: u.Hostname(),
746-
DERPPort: portInt,
747-
STUNPort: -1,
748-
ForceHTTP: u.Scheme == "http",
749-
}},
772+
Nodes: nodes,
750773
}
751774
}
752775

enterprise/wsproxy/wsproxy_test.go

Lines changed: 139 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/coder/coder/enterprise/coderd/coderdenttest"
2626
"github.com/coder/coder/enterprise/coderd/license"
2727
"github.com/coder/coder/provisioner/echo"
28+
"github.com/coder/coder/tailnet"
2829
"github.com/coder/coder/testutil"
2930
)
3031

@@ -184,24 +185,24 @@ resourceLoop:
184185
require.Equal(t, "coder_best-proxy", proxy1Region.RegionCode)
185186
require.Equal(t, 10001, proxy1Region.RegionID)
186187
require.False(t, proxy1Region.EmbeddedRelay)
187-
require.Len(t, proxy1Region.Nodes, 1)
188-
require.Equal(t, "10001a", proxy1Region.Nodes[0].Name)
189-
require.Equal(t, 10001, proxy1Region.Nodes[0].RegionID)
190-
require.Equal(t, proxyAPI1.Options.AccessURL.Hostname(), proxy1Region.Nodes[0].HostName)
191-
require.Equal(t, proxyAPI1.Options.AccessURL.Port(), fmt.Sprint(proxy1Region.Nodes[0].DERPPort))
192-
require.Equal(t, proxyAPI1.Options.AccessURL.Scheme == "http", proxy1Region.Nodes[0].ForceHTTP)
188+
require.Len(t, proxy1Region.Nodes, 2) // proxy + stun
189+
require.Equal(t, "10001a", proxy1Region.Nodes[1].Name)
190+
require.Equal(t, 10001, proxy1Region.Nodes[1].RegionID)
191+
require.Equal(t, proxyAPI1.Options.AccessURL.Hostname(), proxy1Region.Nodes[1].HostName)
192+
require.Equal(t, proxyAPI1.Options.AccessURL.Port(), fmt.Sprint(proxy1Region.Nodes[1].DERPPort))
193+
require.Equal(t, proxyAPI1.Options.AccessURL.Scheme == "http", proxy1Region.Nodes[1].ForceHTTP)
193194

194195
// The second proxy region:
195196
require.Equal(t, "worst-proxy", proxy2Region.RegionName)
196197
require.Equal(t, "coder_worst-proxy", proxy2Region.RegionCode)
197198
require.Equal(t, 10002, proxy2Region.RegionID)
198199
require.False(t, proxy2Region.EmbeddedRelay)
199-
require.Len(t, proxy2Region.Nodes, 1)
200-
require.Equal(t, "10002a", proxy2Region.Nodes[0].Name)
201-
require.Equal(t, 10002, proxy2Region.Nodes[0].RegionID)
202-
require.Equal(t, proxyAPI2.Options.AccessURL.Hostname(), proxy2Region.Nodes[0].HostName)
203-
require.Equal(t, proxyAPI2.Options.AccessURL.Port(), fmt.Sprint(proxy2Region.Nodes[0].DERPPort))
204-
require.Equal(t, proxyAPI2.Options.AccessURL.Scheme == "http", proxy2Region.Nodes[0].ForceHTTP)
200+
require.Len(t, proxy2Region.Nodes, 2) // proxy + stun
201+
require.Equal(t, "10002a", proxy2Region.Nodes[1].Name)
202+
require.Equal(t, 10002, proxy2Region.Nodes[1].RegionID)
203+
require.Equal(t, proxyAPI2.Options.AccessURL.Hostname(), proxy2Region.Nodes[1].HostName)
204+
require.Equal(t, proxyAPI2.Options.AccessURL.Port(), fmt.Sprint(proxy2Region.Nodes[1].DERPPort))
205+
require.Equal(t, proxyAPI2.Options.AccessURL.Scheme == "http", proxy2Region.Nodes[1].ForceHTTP)
205206
})
206207

207208
t.Run("ConnectDERP", func(t *testing.T) {
@@ -239,6 +240,132 @@ resourceLoop:
239240
})
240241
}
241242

243+
func TestDERPMapStunNodes(t *testing.T) {
244+
t.Parallel()
245+
246+
deploymentValues := coderdtest.DeploymentValues(t)
247+
deploymentValues.Experiments = []string{
248+
string(codersdk.ExperimentMoons),
249+
"*",
250+
}
251+
stunAddresses := []string{
252+
"stun.l.google.com:19302",
253+
"stun1.l.google.com:19302",
254+
"stun2.l.google.com:19302",
255+
"stun3.l.google.com:19302",
256+
"stun4.l.google.com:19302",
257+
}
258+
deploymentValues.DERP.Server.STUNAddresses = stunAddresses
259+
260+
client, closer, api, _ := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
261+
Options: &coderdtest.Options{
262+
DeploymentValues: deploymentValues,
263+
AppHostname: "*.primary.test.coder.com",
264+
IncludeProvisionerDaemon: true,
265+
RealIPConfig: &httpmw.RealIPConfig{
266+
TrustedOrigins: []*net.IPNet{{
267+
IP: net.ParseIP("127.0.0.1"),
268+
Mask: net.CIDRMask(8, 32),
269+
}},
270+
TrustedHeaders: []string{
271+
"CF-Connecting-IP",
272+
},
273+
},
274+
},
275+
LicenseOptions: &coderdenttest.LicenseOptions{
276+
Features: license.Features{
277+
codersdk.FeatureWorkspaceProxy: 1,
278+
},
279+
},
280+
})
281+
t.Cleanup(func() {
282+
_ = closer.Close()
283+
})
284+
285+
// Create a running external proxy.
286+
_ = coderdenttest.NewWorkspaceProxy(t, api, client, &coderdenttest.ProxyOptions{
287+
Name: "cool-proxy",
288+
})
289+
290+
// Wait for both running proxies to become healthy.
291+
require.Eventually(t, func() bool {
292+
healthCtx := testutil.Context(t, testutil.WaitLong)
293+
err := api.ProxyHealth.ForceUpdate(healthCtx)
294+
if !assert.NoError(t, err) {
295+
return false
296+
}
297+
298+
regions, err := client.Regions(healthCtx)
299+
if !assert.NoError(t, err) {
300+
return false
301+
}
302+
if !assert.Len(t, regions, 2) {
303+
return false
304+
}
305+
306+
// All regions should be healthy.
307+
for _, r := range regions {
308+
if !r.Healthy {
309+
return false
310+
}
311+
}
312+
return true
313+
}, testutil.WaitLong, testutil.IntervalMedium)
314+
315+
// Get the DERP map and ensure that the built-in region and the proxy region
316+
// both have the STUN nodes.
317+
ctx := testutil.Context(t, testutil.WaitLong)
318+
connInfo, err := client.WorkspaceAgentConnectionInfoGeneric(ctx)
319+
require.NoError(t, err)
320+
321+
// There should be two DERP servers in the map: the primary and the
322+
// proxy.
323+
require.NotNil(t, connInfo.DERPMap)
324+
require.Len(t, connInfo.DERPMap.Regions, 2)
325+
326+
var (
327+
primaryRegion *tailcfg.DERPRegion
328+
proxyRegion *tailcfg.DERPRegion
329+
)
330+
for _, r := range connInfo.DERPMap.Regions {
331+
if r.EmbeddedRelay {
332+
primaryRegion = r
333+
continue
334+
}
335+
if r.RegionName == "cool-proxy" {
336+
proxyRegion = r
337+
continue
338+
}
339+
340+
t.Fatalf("unexpected region: %+v", r)
341+
}
342+
343+
// The primary region:
344+
require.Equal(t, "Coder Embedded Relay", primaryRegion.RegionName)
345+
require.Equal(t, "coder", primaryRegion.RegionCode)
346+
require.Equal(t, 999, primaryRegion.RegionID)
347+
require.True(t, primaryRegion.EmbeddedRelay)
348+
require.Len(t, primaryRegion.Nodes, len(stunAddresses)+1)
349+
350+
// The proxy region:
351+
require.Equal(t, "cool-proxy", proxyRegion.RegionName)
352+
require.Equal(t, "coder_cool-proxy", proxyRegion.RegionCode)
353+
require.Equal(t, 10001, proxyRegion.RegionID)
354+
require.False(t, proxyRegion.EmbeddedRelay)
355+
require.Len(t, proxyRegion.Nodes, len(stunAddresses)+1)
356+
357+
for _, region := range []*tailcfg.DERPRegion{primaryRegion, proxyRegion} {
358+
stunNodes, err := tailnet.STUNNodes(region.RegionID, stunAddresses)
359+
require.NoError(t, err)
360+
require.Len(t, stunNodes, len(stunAddresses))
361+
362+
require.Equal(t, stunNodes, region.Nodes[:len(stunNodes)])
363+
364+
// The last node should be the Coder server.
365+
require.NotZero(t, region.Nodes[len(region.Nodes)-1].DERPPort)
366+
}
367+
}
368+
242369
func TestDERPEndToEnd(t *testing.T) {
243370
t.Parallel()
244371

0 commit comments

Comments
 (0)