From eae4c3a5965cabe673bb9f43e6b231cf6865fee3 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Thu, 27 Apr 2023 16:31:36 +0000 Subject: [PATCH 01/22] chore: add derpserver to proxy, add proxies to derpmap --- cli/server.go | 2 +- coderd/coderd.go | 35 +++++---- coderd/prometheusmetrics/prometheusmetrics.go | 3 +- .../prometheusmetrics_test.go | 6 +- coderd/provisionerjobs.go | 2 +- coderd/workspaceagents.go | 14 ++-- coderd/workspacebuilds.go | 2 +- enterprise/coderd/coderd.go | 75 +++++++++++++++++++ enterprise/wsproxy/wsproxy.go | 34 ++++++--- 9 files changed, 139 insertions(+), 34 deletions(-) diff --git a/cli/server.go b/cli/server.go index 039eeecef8d0a..786306137f0c8 100644 --- a/cli/server.go +++ b/cli/server.go @@ -759,7 +759,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. if cfg.Prometheus.Enable { // Agent metrics require reference to the tailnet coordinator, so must be initiated after Coder API. - closeAgentsFunc, err := prometheusmetrics.Agents(ctx, logger, options.PrometheusRegistry, coderAPI.Database, &coderAPI.TailnetCoordinator, options.DERPMap, coderAPI.Options.AgentInactiveDisconnectTimeout, 0) + closeAgentsFunc, err := prometheusmetrics.Agents(ctx, logger, options.PrometheusRegistry, coderAPI.Database, &coderAPI.TailnetCoordinator, coderAPI.DERPMap, coderAPI.Options.AgentInactiveDisconnectTimeout, 0) if err != nil { return xerrors.Errorf("register agents prometheus metric: %w", err) } diff --git a/coderd/coderd.go b/coderd/coderd.go index 3a274cf7deca6..2b6cf1f57d603 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -35,13 +35,11 @@ import ( "tailscale.com/types/key" "tailscale.com/util/singleflight" - "cdr.dev/slog" - - "github.com/coder/coder/buildinfo" - "github.com/coder/coder/codersdk/agentsdk" - // Used for swagger docs. _ "github.com/coder/coder/coderd/apidoc" + + "cdr.dev/slog" + "github.com/coder/coder/buildinfo" "github.com/coder/coder/coderd/audit" "github.com/coder/coder/coderd/awsidentity" "github.com/coder/coder/coderd/database" @@ -63,6 +61,7 @@ import ( "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/coderd/wsconncache" "github.com/coder/coder/codersdk" + "github.com/coder/coder/codersdk/agentsdk" "github.com/coder/coder/provisionerd/proto" "github.com/coder/coder/provisionersdk" "github.com/coder/coder/site" @@ -251,14 +250,6 @@ func New(options *Options) *API { v := schedule.NewAGPLTemplateScheduleStore() options.TemplateScheduleStore.Store(&v) } - if options.HealthcheckFunc == nil { - options.HealthcheckFunc = func(ctx context.Context) (*healthcheck.Report, error) { - return healthcheck.Run(ctx, &healthcheck.ReportOptions{ - AccessURL: options.AccessURL, - DERPMap: options.DERPMap.Clone(), - }) - } - } if options.HealthcheckTimeout == 0 { options.HealthcheckTimeout = 30 * time.Second } @@ -325,6 +316,14 @@ func New(options *Options) *API { Experiments: experiments, healthCheckGroup: &singleflight.Group[string, *healthcheck.Report]{}, } + if options.HealthcheckFunc == nil { + options.HealthcheckFunc = func(ctx context.Context) (*healthcheck.Report, error) { + return healthcheck.Run(ctx, &healthcheck.ReportOptions{ + AccessURL: options.AccessURL, + DERPMap: api.DERPMap().Clone(), + }) + } + } if options.UpdateCheckOptions != nil { api.updateChecker = updatecheck.New( options.Database, @@ -814,6 +813,7 @@ type API struct { TailnetCoordinator atomic.Pointer[tailnet.Coordinator] QuotaCommitter atomic.Pointer[proto.QuotaCommitter] TemplateScheduleStore *atomic.Pointer[schedule.TemplateScheduleStore] + DERPMapper atomic.Pointer[func(derpMap *tailcfg.DERPMap) *tailcfg.DERPMap] HTTPAuth *HTTPAuthorizer @@ -954,6 +954,15 @@ func (api *API) CreateInMemoryProvisionerDaemon(ctx context.Context, debounce ti return proto.NewDRPCProvisionerDaemonClient(clientSession), nil } +func (api *API) DERPMap() *tailcfg.DERPMap { + fn := api.DERPMapper.Load() + if fn != nil { + return (*fn)(api.Options.DERPMap) + } + + return api.Options.DERPMap +} + // nolint:revive func initExperiments(log slog.Logger, raw []string) codersdk.Experiments { exps := make([]codersdk.Experiment, 0, len(raw)) diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go index 6a616bcc05438..039f454e25a91 100644 --- a/coderd/prometheusmetrics/prometheusmetrics.go +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -136,7 +136,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa } // Agents tracks the total number of workspaces with labels on status. -func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMap *tailcfg.DERPMap, agentInactiveDisconnectTimeout, duration time.Duration) (func(), error) { +func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMapFn func() *tailcfg.DERPMap, agentInactiveDisconnectTimeout, duration time.Duration) (func(), error) { if duration == 0 { duration = 1 * time.Minute } @@ -215,6 +215,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis logger.Debug(ctx, "Agent metrics collection is starting") timer := prometheus.NewTimer(metricsCollectorAgents) + derpMap := derpMapFn() workspaceRows, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{ AgentInactiveDisconnectTimeoutSeconds: int64(agentInactiveDisconnectTimeout.Seconds()), diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 9101288cca570..822cedc0ae75f 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "tailscale.com/tailcfg" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -303,6 +304,9 @@ func TestAgents(t *testing.T) { coordinatorPtr := atomic.Pointer[tailnet.Coordinator]{} coordinatorPtr.Store(&coordinator) derpMap := tailnettest.RunDERPAndSTUN(t) + derpMapFn := func() *tailcfg.DERPMap { + return derpMap + } agentInactiveDisconnectTimeout := 1 * time.Hour // don't need to focus on this value in tests registry := prometheus.NewRegistry() @@ -312,7 +316,7 @@ func TestAgents(t *testing.T) { // when closeFunc, err := prometheusmetrics.Agents(ctx, slogtest.Make(t, &slogtest.Options{ IgnoreErrors: true, - }), registry, db, &coordinatorPtr, derpMap, agentInactiveDisconnectTimeout, time.Millisecond) + }), registry, db, &coordinatorPtr, derpMapFn, agentInactiveDisconnectTimeout, time.Millisecond) require.NoError(t, err) t.Cleanup(closeFunc) diff --git a/coderd/provisionerjobs.go b/coderd/provisionerjobs.go index e9c7273dab5e8..211b8f5aa7c59 100644 --- a/coderd/provisionerjobs.go +++ b/coderd/provisionerjobs.go @@ -261,7 +261,7 @@ func (api *API) provisionerJobResources(rw http.ResponseWriter, r *http.Request, } apiAgent, err := convertWorkspaceAgent( - api.DERPMap, *api.TailnetCoordinator.Load(), agent, convertApps(dbApps), api.AgentInactiveDisconnectTimeout, + api.DERPMap(), *api.TailnetCoordinator.Load(), agent, convertApps(dbApps), api.AgentInactiveDisconnectTimeout, api.DeploymentValues.AgentFallbackTroubleshootingURL.String(), ) if err != nil { diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 1b58c9f2c3c0c..076c94fc40dc8 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -63,7 +63,7 @@ func (api *API) workspaceAgent(rw http.ResponseWriter, r *http.Request) { return } apiAgent, err := convertWorkspaceAgent( - api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, convertApps(dbApps), api.AgentInactiveDisconnectTimeout, + api.DERPMap(), *api.TailnetCoordinator.Load(), workspaceAgent, convertApps(dbApps), api.AgentInactiveDisconnectTimeout, api.DeploymentValues.AgentFallbackTroubleshootingURL.String(), ) if err != nil { @@ -88,7 +88,7 @@ func (api *API) workspaceAgentManifest(rw http.ResponseWriter, r *http.Request) ctx := r.Context() workspaceAgent := httpmw.WorkspaceAgent(r) apiAgent, err := convertWorkspaceAgent( - api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout, + api.DERPMap(), *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout, api.DeploymentValues.AgentFallbackTroubleshootingURL.String(), ) if err != nil { @@ -162,7 +162,7 @@ func (api *API) workspaceAgentManifest(rw http.ResponseWriter, r *http.Request) httpapi.Write(ctx, rw, http.StatusOK, agentsdk.Manifest{ Apps: convertApps(dbApps), - DERPMap: api.DERPMap, + DERPMap: api.DERPMap(), GitAuthConfigs: len(api.GitAuthConfigs), EnvironmentVariables: apiAgent.EnvironmentVariables, StartupScript: apiAgent.StartupScript, @@ -190,7 +190,7 @@ func (api *API) postWorkspaceAgentStartup(rw http.ResponseWriter, r *http.Reques ctx := r.Context() workspaceAgent := httpmw.WorkspaceAgent(r) apiAgent, err := convertWorkspaceAgent( - api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout, + api.DERPMap(), *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout, api.DeploymentValues.AgentFallbackTroubleshootingURL.String(), ) if err != nil { @@ -567,7 +567,7 @@ func (api *API) workspaceAgentListeningPorts(rw http.ResponseWriter, r *http.Req workspaceAgent := httpmw.WorkspaceAgentParam(r) apiAgent, err := convertWorkspaceAgent( - api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout, + api.DERPMap(), *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout, api.DeploymentValues.AgentFallbackTroubleshootingURL.String(), ) if err != nil { @@ -663,7 +663,7 @@ func (api *API) dialWorkspaceAgentTailnet(agentID uuid.UUID) (*codersdk.Workspac clientConn, serverConn := net.Pipe() conn, err := tailnet.NewConn(&tailnet.Options{ Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)}, - DERPMap: api.DERPMap, + DERPMap: api.DERPMap(), Logger: api.Logger.Named("tailnet"), }) if err != nil { @@ -732,7 +732,7 @@ func (api *API) workspaceAgentConnection(rw http.ResponseWriter, r *http.Request ctx := r.Context() httpapi.Write(ctx, rw, http.StatusOK, codersdk.WorkspaceAgentConnectionInfo{ - DERPMap: api.DERPMap, + DERPMap: api.DERPMap(), }) } diff --git a/coderd/workspacebuilds.go b/coderd/workspacebuilds.go index c99e3b9f75d9b..6f0c6f8e83b5b 100644 --- a/coderd/workspacebuilds.go +++ b/coderd/workspacebuilds.go @@ -1126,7 +1126,7 @@ func (api *API) convertWorkspaceBuild( for _, agent := range agents { apps := appsByAgentID[agent.ID] apiAgent, err := convertWorkspaceAgent( - api.DERPMap, *api.TailnetCoordinator.Load(), agent, convertApps(apps), api.AgentInactiveDisconnectTimeout, + api.DERPMap(), *api.TailnetCoordinator.Load(), agent, convertApps(apps), api.AgentInactiveDisconnectTimeout, api.DeploymentValues.AgentFallbackTroubleshootingURL.String(), ) if err != nil { diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 0979a25809d43..7de6a943fb84a 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -5,11 +5,15 @@ import ( "crypto/ed25519" "crypto/tls" "crypto/x509" + "fmt" "net/http" + "net/url" + "strconv" "sync" "time" "golang.org/x/xerrors" + "tailscale.com/tailcfg" "github.com/cenkalti/backoff/v4" "github.com/go-chi/chi/v5" @@ -429,11 +433,82 @@ func (api *API) updateEntitlements(ctx context.Context) error { } } + if changed, enabled := featureChanged(codersdk.FeatureWorkspaceProxy); changed { + if enabled { + fn := derpMapper(api.ProxyHealth) + api.AGPL.DERPMapper.Store(&fn) + } else { + api.AGPL.DERPMapper.Store(nil) + } + } + api.entitlements = entitlements return nil } +func derpMapper(proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap { + return func(derpMap *tailcfg.DERPMap) *tailcfg.DERPMap { + derpMap = derpMap.Clone() + + // Add all healthy proxies to the DERP map. + // TODO: @dean proxies should be able to disable DERP and report that + // when they register, and this should respect that. + statusMap := proxyHealth.HealthStatus() + for _, status := range statusMap { + // TODO: @dean region ID should be constant and unique for each + // proxy. Make the proxies report these values (from a flag like the + // primary) when they register. + const ( + regionID = -999 + regionCode = "proxy" + ) + + if status.Status != proxyhealth.Healthy { + // Only add healthy proxies to the DERP map. + continue + } + + u, err := url.Parse(status.Proxy.Url) + if err != nil { + // Not really any need to log, the proxy should be unreachable + // anyways and filtered out by the above condition. + continue + } + port := u.Port() + if port == "" { + port = "80" + if u.Scheme == "https" { + port = "443" + } + } + portInt, err := strconv.Atoi(port) + if err != nil { + // Not really any need to log, the proxy should be unreachable + // anyways and filtered out by the above condition. + continue + } + + derpMap.Regions[regionID] = &tailcfg.DERPRegion{ + EmbeddedRelay: true, + RegionID: regionID, + RegionCode: regionCode, + RegionName: status.Proxy.Name, + Nodes: []*tailcfg.DERPNode{{ + Name: fmt.Sprintf("%db", regionID), + RegionID: regionID, + HostName: u.Hostname(), + DERPPort: portInt, + STUNPort: -1, + ForceHTTP: u.Scheme == "http", + }}, + } + } + + return derpMap + } +} + // @Summary Get entitlements // @ID get-entitlements // @Security CoderSessionToken diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index 3f03d486fe87c..e03288a70726f 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -10,22 +10,25 @@ import ( "strings" "time" - "github.com/coder/coder/coderd/httpapi" - "github.com/go-chi/chi/v5" "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" "go.opentelemetry.io/otel/trace" "golang.org/x/xerrors" + "tailscale.com/derp" + "tailscale.com/derp/derphttp" + "tailscale.com/types/key" "cdr.dev/slog" "github.com/coder/coder/buildinfo" + "github.com/coder/coder/coderd/httpapi" "github.com/coder/coder/coderd/httpmw" "github.com/coder/coder/coderd/tracing" "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/coderd/wsconncache" "github.com/coder/coder/codersdk" "github.com/coder/coder/enterprise/wsproxy/wsproxysdk" + "github.com/coder/coder/tailnet" ) type Options struct { @@ -49,8 +52,8 @@ type Options struct { // options.AppHostname is set. AppHostnameRegex *regexp.Regexp - RealIPConfig *httpmw.RealIPConfig - + RealIPConfig *httpmw.RealIPConfig + DERPServer *derp.Server Tracing trace.TracerProvider PrometheusRegistry *prometheus.Registry @@ -96,12 +99,10 @@ type Server struct { // the moon's token. SDKClient *wsproxysdk.Client - // TODO: Missing: - // - derpserver - // Used for graceful shutdown. Required for the dialer. - ctx context.Context - cancel context.CancelFunc + ctx context.Context + cancel context.CancelFunc + derpCloseFunc func() } // New creates a new workspace proxy server. This requires a primary coderd @@ -185,6 +186,12 @@ func New(ctx context.Context, opts *Options) (*Server, error) { SecureAuthCookie: opts.SecureAuthCookie, } + if opts.DERPServer == nil { + opts.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(opts.Logger.Named("derp"))) + } + derpHandler := derphttp.Handler(opts.DERPServer) + derpHandler, s.derpCloseFunc = tailnet.WithWebsocketSupport(opts.DERPServer, derpHandler) + // Routes apiRateLimiter := httpmw.RateLimit(opts.APIRateLimit, time.Minute) // Persistent middlewares to all routes @@ -229,6 +236,14 @@ func New(ctx context.Context, opts *Options) (*Server, error) { s.AppServer.Attach(r) }) + r.Route("/derp", func(r chi.Router) { + r.Get("/", derpHandler.ServeHTTP) + // This is used when UDP is blocked, and latency must be checked via HTTP(s). + r.Get("/latency-check", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + }) + r.Get("/buildinfo", s.buildInfo) r.Get("/healthz", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("OK")) }) // TODO: @emyrk should this be authenticated or debounced? @@ -239,6 +254,7 @@ func New(ctx context.Context, opts *Options) (*Server, error) { func (s *Server) Close() error { s.cancel() + s.derpCloseFunc() return s.AppServer.Close() } From ac995253b0139c5845b4aa7daf9b64b38e1fbe0b Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Mon, 1 May 2023 18:30:36 +0000 Subject: [PATCH 02/22] progress --- coderd/apidoc/docs.go | 10 ++ coderd/apidoc/swagger.json | 10 ++ coderd/database/dbfake/databasefake.go | 2 + coderd/database/dump.sql | 19 +++- .../000121_workspace_proxy_region_id.down.sql | 4 + .../000121_workspace_proxy_region_id.up.sql | 6 ++ coderd/database/models.go | 2 + coderd/database/queries.sql.go | 42 ++++++-- coderd/database/queries/proxies.sql | 8 +- coderd/database/unique_constraint.go | 1 + codersdk/deployment.go | 8 +- codersdk/workspaceproxy.go | 8 +- docs/admin/audit-logs.md | 2 +- docs/api/enterprise.md | 6 ++ docs/api/schemas.md | 14 ++- docs/cli/server.md | 4 +- enterprise/audit/table.go | 2 + enterprise/cli/proxyserver.go | 1 + enterprise/coderd/coderd.go | 101 +++++++++++++++--- enterprise/coderd/workspaceproxy.go | 12 ++- enterprise/wsproxy/wsproxy.go | 13 +-- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 3 + site/src/api/typesGenerated.ts | 2 + 23 files changed, 232 insertions(+), 48 deletions(-) create mode 100644 coderd/database/migrations/000121_workspace_proxy_region_id.down.sql create mode 100644 coderd/database/migrations/000121_workspace_proxy_region_id.up.sql diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 91bae7945e422..d75b0e13e560e 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -9835,6 +9835,12 @@ const docTemplate = `{ "deleted": { "type": "boolean" }, + "derp_enabled": { + "type": "boolean" + }, + "display_name": { + "type": "string" + }, "icon": { "type": "string" }, @@ -10534,6 +10540,10 @@ const docTemplate = `{ "description": "AccessURL that hits the workspace proxy api.", "type": "string" }, + "derp_enabled": { + "description": "DerpEnabled indicates whether the proxy should be included in the DERP\nmap or not.", + "type": "boolean" + }, "wildcard_hostname": { "description": "WildcardHostname that the workspace proxy api is serving for subdomain apps.", "type": "string" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 7e279b3643e56..67c7bd2a68d9c 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -8876,6 +8876,12 @@ "deleted": { "type": "boolean" }, + "derp_enabled": { + "type": "boolean" + }, + "display_name": { + "type": "string" + }, "icon": { "type": "string" }, @@ -9552,6 +9558,10 @@ "description": "AccessURL that hits the workspace proxy api.", "type": "string" }, + "derp_enabled": { + "description": "DerpEnabled indicates whether the proxy should be included in the DERP\nmap or not.", + "type": "boolean" + }, "wildcard_hostname": { "description": "WildcardHostname that the workspace proxy api is serving for subdomain apps.", "type": "string" diff --git a/coderd/database/dbfake/databasefake.go b/coderd/database/dbfake/databasefake.go index 7f80385af2dfb..ffdf01e5afaf6 100644 --- a/coderd/database/dbfake/databasefake.go +++ b/coderd/database/dbfake/databasefake.go @@ -5206,6 +5206,7 @@ func (q *fakeQuerier) InsertWorkspaceProxy(_ context.Context, arg database.Inser Name: arg.Name, DisplayName: arg.DisplayName, Icon: arg.Icon, + DerpEnabled: arg.DerpEnabled, TokenHashedSecret: arg.TokenHashedSecret, CreatedAt: arg.CreatedAt, UpdatedAt: arg.UpdatedAt, @@ -5223,6 +5224,7 @@ func (q *fakeQuerier) RegisterWorkspaceProxy(_ context.Context, arg database.Reg if p.ID == arg.ID { p.Url = arg.Url p.WildcardHostname = arg.WildcardHostname + p.DerpEnabled = arg.DerpEnabled p.UpdatedAt = database.Now() q.workspaceProxies[i] = p return p, nil diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index bb8c12d2bc654..650bb609d3bbe 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -676,7 +676,9 @@ CREATE TABLE workspace_proxies ( created_at timestamp with time zone NOT NULL, updated_at timestamp with time zone NOT NULL, deleted boolean NOT NULL, - token_hashed_secret bytea NOT NULL + token_hashed_secret bytea NOT NULL, + region_id integer NOT NULL, + derp_enabled boolean DEFAULT true NOT NULL ); COMMENT ON COLUMN workspace_proxies.icon IS 'Expects an emoji character. (/emojis/1f1fa-1f1f8.png)'; @@ -689,6 +691,16 @@ COMMENT ON COLUMN workspace_proxies.deleted IS 'Boolean indicator of a deleted w COMMENT ON COLUMN workspace_proxies.token_hashed_secret IS 'Hashed secret is used to authenticate the workspace proxy using a session token.'; +CREATE SEQUENCE workspace_proxies_region_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE workspace_proxies_region_id_seq OWNED BY workspace_proxies.region_id; + CREATE TABLE workspace_resource_metadata ( workspace_resource_id uuid NOT NULL, key character varying(1024) NOT NULL, @@ -739,6 +751,8 @@ ALTER TABLE ONLY provisioner_job_logs ALTER COLUMN id SET DEFAULT nextval('provi ALTER TABLE ONLY workspace_agent_startup_logs ALTER COLUMN id SET DEFAULT nextval('workspace_agent_startup_logs_id_seq'::regclass); +ALTER TABLE ONLY workspace_proxies ALTER COLUMN region_id SET DEFAULT nextval('workspace_proxies_region_id_seq'::regclass); + ALTER TABLE ONLY workspace_resource_metadata ALTER COLUMN id SET DEFAULT nextval('workspace_resource_metadata_id_seq'::regclass); ALTER TABLE ONLY workspace_agent_stats @@ -861,6 +875,9 @@ ALTER TABLE ONLY workspace_builds ALTER TABLE ONLY workspace_proxies ADD CONSTRAINT workspace_proxies_pkey PRIMARY KEY (id); +ALTER TABLE ONLY workspace_proxies + ADD CONSTRAINT workspace_proxies_region_id_unique UNIQUE (region_id); + ALTER TABLE ONLY workspace_resource_metadata ADD CONSTRAINT workspace_resource_metadata_name UNIQUE (workspace_resource_id, key); diff --git a/coderd/database/migrations/000121_workspace_proxy_region_id.down.sql b/coderd/database/migrations/000121_workspace_proxy_region_id.down.sql new file mode 100644 index 0000000000000..8c2b96c37447c --- /dev/null +++ b/coderd/database/migrations/000121_workspace_proxy_region_id.down.sql @@ -0,0 +1,4 @@ +ALTER TABLE workspace_proxies + DROP CONSTRAINT workspace_proxies_region_id_unique, + DROP COLUMN region_id, + DROP COLUMN derp_enabled; diff --git a/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql b/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql new file mode 100644 index 0000000000000..9964b2110b09c --- /dev/null +++ b/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql @@ -0,0 +1,6 @@ +ALTER TABLE workspace_proxies + -- adding a serial to a table without a default value will be filled as you + -- would expect + ADD COLUMN region_id serial NOT NULL, + ADD COLUMN derp_enabled boolean NOT NULL DEFAULT true, + ADD CONSTRAINT workspace_proxies_region_id_unique UNIQUE (region_id); diff --git a/coderd/database/models.go b/coderd/database/models.go index bda061b89448d..babbb39548dfe 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1686,6 +1686,8 @@ type WorkspaceProxy struct { Deleted bool `db:"deleted" json:"deleted"` // Hashed secret is used to authenticate the workspace proxy using a session token. TokenHashedSecret []byte `db:"token_hashed_secret" json:"token_hashed_secret"` + RegionID int32 `db:"region_id" json:"region_id"` + DerpEnabled bool `db:"derp_enabled" json:"derp_enabled"` } type WorkspaceResource struct { diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index e78e08632d835..bbe11cf399835 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -2817,7 +2817,7 @@ func (q *sqlQuerier) UpdateProvisionerJobWithCompleteByID(ctx context.Context, a const getWorkspaceProxies = `-- name: GetWorkspaceProxies :many SELECT - id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret + id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled FROM workspace_proxies WHERE @@ -2844,6 +2844,8 @@ func (q *sqlQuerier) GetWorkspaceProxies(ctx context.Context) ([]WorkspaceProxy, &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ); err != nil { return nil, err } @@ -2860,7 +2862,7 @@ func (q *sqlQuerier) GetWorkspaceProxies(ctx context.Context) ([]WorkspaceProxy, const getWorkspaceProxyByHostname = `-- name: GetWorkspaceProxyByHostname :one SELECT - id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret + id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled FROM workspace_proxies WHERE @@ -2916,13 +2918,15 @@ func (q *sqlQuerier) GetWorkspaceProxyByHostname(ctx context.Context, arg GetWor &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ) return i, err } const getWorkspaceProxyByID = `-- name: GetWorkspaceProxyByID :one SELECT - id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret + id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled FROM workspace_proxies WHERE @@ -2945,13 +2949,15 @@ func (q *sqlQuerier) GetWorkspaceProxyByID(ctx context.Context, id uuid.UUID) (W &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ) return i, err } const getWorkspaceProxyByName = `-- name: GetWorkspaceProxyByName :one SELECT - id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret + id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled FROM workspace_proxies WHERE @@ -2975,6 +2981,8 @@ func (q *sqlQuerier) GetWorkspaceProxyByName(ctx context.Context, name string) ( &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ) return i, err } @@ -2988,13 +2996,14 @@ INSERT INTO name, display_name, icon, + derp_enabled, token_hashed_secret, created_at, updated_at, deleted ) VALUES - ($1, '', '', $2, $3, $4, $5, $6, $7, false) RETURNING id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret + ($1, '', '', $2, $3, $4, $5, $6, $7, $8, false) RETURNING id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled ` type InsertWorkspaceProxyParams struct { @@ -3002,6 +3011,7 @@ type InsertWorkspaceProxyParams struct { Name string `db:"name" json:"name"` DisplayName string `db:"display_name" json:"display_name"` Icon string `db:"icon" json:"icon"` + DerpEnabled bool `db:"derp_enabled" json:"derp_enabled"` TokenHashedSecret []byte `db:"token_hashed_secret" json:"token_hashed_secret"` CreatedAt time.Time `db:"created_at" json:"created_at"` UpdatedAt time.Time `db:"updated_at" json:"updated_at"` @@ -3013,6 +3023,7 @@ func (q *sqlQuerier) InsertWorkspaceProxy(ctx context.Context, arg InsertWorkspa arg.Name, arg.DisplayName, arg.Icon, + arg.DerpEnabled, arg.TokenHashedSecret, arg.CreatedAt, arg.UpdatedAt, @@ -3029,6 +3040,8 @@ func (q *sqlQuerier) InsertWorkspaceProxy(ctx context.Context, arg InsertWorkspa &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ) return i, err } @@ -3037,22 +3050,29 @@ const registerWorkspaceProxy = `-- name: RegisterWorkspaceProxy :one UPDATE workspace_proxies SET - url = $1, - wildcard_hostname = $2, + url = $1 :: text, + wildcard_hostname = $2 :: text, + derp_enabled = $3 :: boolean, updated_at = Now() WHERE - id = $3 -RETURNING id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret + id = $4 +RETURNING id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled ` type RegisterWorkspaceProxyParams struct { Url string `db:"url" json:"url"` WildcardHostname string `db:"wildcard_hostname" json:"wildcard_hostname"` + DerpEnabled bool `db:"derp_enabled" json:"derp_enabled"` ID uuid.UUID `db:"id" json:"id"` } func (q *sqlQuerier) RegisterWorkspaceProxy(ctx context.Context, arg RegisterWorkspaceProxyParams) (WorkspaceProxy, error) { - row := q.db.QueryRowContext(ctx, registerWorkspaceProxy, arg.Url, arg.WildcardHostname, arg.ID) + row := q.db.QueryRowContext(ctx, registerWorkspaceProxy, + arg.Url, + arg.WildcardHostname, + arg.DerpEnabled, + arg.ID, + ) var i WorkspaceProxy err := row.Scan( &i.ID, @@ -3065,6 +3085,8 @@ func (q *sqlQuerier) RegisterWorkspaceProxy(ctx context.Context, arg RegisterWor &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ) return i, err } diff --git a/coderd/database/queries/proxies.sql b/coderd/database/queries/proxies.sql index 722138938f1ae..e46b0a55e037c 100644 --- a/coderd/database/queries/proxies.sql +++ b/coderd/database/queries/proxies.sql @@ -7,20 +7,22 @@ INSERT INTO name, display_name, icon, + derp_enabled, token_hashed_secret, created_at, updated_at, deleted ) VALUES - ($1, '', '', $2, $3, $4, $5, $6, $7, false) RETURNING *; + ($1, '', '', $2, $3, $4, $5, $6, $7, $8, false) RETURNING *; -- name: RegisterWorkspaceProxy :one UPDATE workspace_proxies SET - url = @url, - wildcard_hostname = @wildcard_hostname, + url = @url :: text, + wildcard_hostname = @wildcard_hostname :: text, + derp_enabled = @derp_enabled :: boolean, updated_at = Now() WHERE id = @id diff --git a/coderd/database/unique_constraint.go b/coderd/database/unique_constraint.go index f0ba6c702ac93..c8dbc831e8651 100644 --- a/coderd/database/unique_constraint.go +++ b/coderd/database/unique_constraint.go @@ -22,6 +22,7 @@ const ( UniqueWorkspaceBuildParametersWorkspaceBuildIDNameKey UniqueConstraint = "workspace_build_parameters_workspace_build_id_name_key" // ALTER TABLE ONLY workspace_build_parameters ADD CONSTRAINT workspace_build_parameters_workspace_build_id_name_key UNIQUE (workspace_build_id, name); UniqueWorkspaceBuildsJobIDKey UniqueConstraint = "workspace_builds_job_id_key" // ALTER TABLE ONLY workspace_builds ADD CONSTRAINT workspace_builds_job_id_key UNIQUE (job_id); UniqueWorkspaceBuildsWorkspaceIDBuildNumberKey UniqueConstraint = "workspace_builds_workspace_id_build_number_key" // ALTER TABLE ONLY workspace_builds ADD CONSTRAINT workspace_builds_workspace_id_build_number_key UNIQUE (workspace_id, build_number); + UniqueWorkspaceProxiesRegionIDUnique UniqueConstraint = "workspace_proxies_region_id_unique" // ALTER TABLE ONLY workspace_proxies ADD CONSTRAINT workspace_proxies_region_id_unique UNIQUE (region_id); UniqueWorkspaceResourceMetadataName UniqueConstraint = "workspace_resource_metadata_name" // ALTER TABLE ONLY workspace_resource_metadata ADD CONSTRAINT workspace_resource_metadata_name UNIQUE (workspace_resource_id, key); UniqueIndexApiKeyName UniqueConstraint = "idx_api_key_name" // CREATE UNIQUE INDEX idx_api_key_name ON api_keys USING btree (user_id, token_name) WHERE (login_type = 'token'::login_type); UniqueIndexOrganizationName UniqueConstraint = "idx_organization_name" // CREATE UNIQUE INDEX idx_organization_name ON organizations USING btree (name); diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 61ab6658f3732..2eb74e0d7d40b 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -657,6 +657,7 @@ when required by your organization's security policy.`, Value: &c.DERP.Server.Enable, Group: &deploymentGroupNetworkingDERP, YAML: "enable", + Annotations: clibase.Annotations{}.Mark(annotationExternalProxies, "true"), }, { Name: "DERP Server Region ID", @@ -667,6 +668,7 @@ when required by your organization's security policy.`, Value: &c.DERP.Server.RegionID, Group: &deploymentGroupNetworkingDERP, YAML: "regionID", + // Does not apply to external proxies as this value is generated. }, { Name: "DERP Server Region Code", @@ -677,6 +679,7 @@ when required by your organization's security policy.`, Value: &c.DERP.Server.RegionCode, Group: &deploymentGroupNetworkingDERP, YAML: "regionCode", + // Does not apply to external proxies as we use the proxy name. }, { Name: "DERP Server Region Name", @@ -687,6 +690,7 @@ when required by your organization's security policy.`, Value: &c.DERP.Server.RegionName, Group: &deploymentGroupNetworkingDERP, YAML: "regionName", + // Does not apply to external proxies as we use the proxy name. }, { Name: "DERP Server STUN Addresses", @@ -703,10 +707,12 @@ when required by your organization's security policy.`, Description: "An HTTP URL that is accessible by other replicas to relay DERP traffic. Required for high availability.", Flag: "derp-server-relay-url", Env: "CODER_DERP_SERVER_RELAY_URL", - Annotations: clibase.Annotations{}.Mark(annotationEnterpriseKey, "true"), Value: &c.DERP.Server.RelayURL, Group: &deploymentGroupNetworkingDERP, YAML: "relayURL", + Annotations: clibase.Annotations{}. + Mark(annotationEnterpriseKey, "true"). + Mark(annotationExternalProxies, "true"), }, { Name: "DERP Config URL", diff --git a/codersdk/workspaceproxy.go b/codersdk/workspaceproxy.go index 336d37e30b283..9ea1522ec0853 100644 --- a/codersdk/workspaceproxy.go +++ b/codersdk/workspaceproxy.go @@ -46,13 +46,15 @@ type ProxyHealthReport struct { } type WorkspaceProxy struct { - ID uuid.UUID `json:"id" format:"uuid" table:"id"` - Name string `json:"name" table:"name,default_sort"` - Icon string `json:"icon" table:"icon"` + ID uuid.UUID `json:"id" format:"uuid" table:"id"` + Name string `json:"name" table:"name,default_sort"` + DisplayName string `json:"display_name" table:"display_name"` + Icon string `json:"icon" table:"icon"` // Full url including scheme of the proxy api url: https://us.example.com URL string `json:"url" table:"url"` // WildcardHostname with the wildcard for subdomain based app hosting: *.us.example.com WildcardHostname string `json:"wildcard_hostname" table:"wildcard_hostname"` + DerpEnabled bool `json:"derp_enabled" table:"derp_enabled"` CreatedAt time.Time `json:"created_at" format:"date-time" table:"created_at"` UpdatedAt time.Time `json:"updated_at" format:"date-time" table:"updated_at"` Deleted bool `json:"deleted" table:"deleted"` diff --git a/docs/admin/audit-logs.md b/docs/admin/audit-logs.md index 643ae0d76e9c6..c8e55692ae01f 100644 --- a/docs/admin/audit-logs.md +++ b/docs/admin/audit-logs.md @@ -20,7 +20,7 @@ We track the following resources: | User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typefalse
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| | Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
idtrue
last_used_atfalse
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| | WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
display_nametrue
icontrue
idtrue
nametrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| +| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
derp_enabledtrue
display_nametrue
icontrue
idtrue
nametrue
region_idtrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| diff --git a/docs/api/enterprise.md b/docs/api/enterprise.md index fbee85b9970f1..26bddd5a8931f 100644 --- a/docs/api/enterprise.md +++ b/docs/api/enterprise.md @@ -1182,6 +1182,8 @@ curl -X GET http://coder-server:8080/api/v2/workspaceproxies \ { "created_at": "2019-08-24T14:15:22Z", "deleted": true, + "derp_enabled": true, + "display_name": "string", "icon": "string", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", @@ -1215,6 +1217,8 @@ Status Code **200** | `[array item]` | array | false | | | | `» created_at` | string(date-time) | false | | | | `» deleted` | boolean | false | | | +| `» derp_enabled` | boolean | false | | | +| `» display_name` | string | false | | | | `» icon` | string | false | | | | `» id` | string(uuid) | false | | | | `» name` | string | false | | | @@ -1277,6 +1281,8 @@ curl -X POST http://coder-server:8080/api/v2/workspaceproxies \ { "created_at": "2019-08-24T14:15:22Z", "deleted": true, + "derp_enabled": true, + "display_name": "string", "icon": "string", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", diff --git a/docs/api/schemas.md b/docs/api/schemas.md index ee8e52e07a4a4..db4e53f027e02 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -5320,6 +5320,8 @@ Parameter represents a set value for the scope. { "created_at": "2019-08-24T14:15:22Z", "deleted": true, + "derp_enabled": true, + "display_name": "string", "icon": "string", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", @@ -5343,6 +5345,8 @@ Parameter represents a set value for the scope. | ------------------- | -------------------------------------------------------------- | -------- | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `created_at` | string | false | | | | `deleted` | boolean | false | | | +| `derp_enabled` | boolean | false | | | +| `display_name` | string | false | | | | `icon` | string | false | | | | `id` | string | false | | | | `name` | string | false | | | @@ -6658,16 +6662,18 @@ _None_ ```json { "access_url": "string", + "derp_enabled": true, "wildcard_hostname": "string" } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------------- | ------ | -------- | ------------ | ----------------------------------------------------------------------------- | -| `access_url` | string | false | | Access URL that hits the workspace proxy api. | -| `wildcard_hostname` | string | false | | Wildcard hostname that the workspace proxy api is serving for subdomain apps. | +| Name | Type | Required | Restrictions | Description | +| ------------------- | ------- | -------- | ------------ | ----------------------------------------------------------------------------------- | +| `access_url` | string | false | | Access URL that hits the workspace proxy api. | +| `derp_enabled` | boolean | false | | Derp enabled indicates whether the proxy should be included in the DERP map or not. | +| `wildcard_hostname` | string | false | | Wildcard hostname that the workspace proxy api is serving for subdomain apps. | ## wsproxysdk.RegisterWorkspaceProxyResponse diff --git a/docs/cli/server.md b/docs/cli/server.md index 3cbcafe9bbc35..887281006c504 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -128,7 +128,7 @@ Whether to enable or disable the embedded DERP relay server. | YAML | networking.derp.regionCode | | Default | coder | -Region code to use for the embedded DERP server. +Region code to use for the embedded DERP server. On external proxies, an empty string or "coder" will use the previous (or randomly generated) value. ### --derp-server-region-id @@ -139,7 +139,7 @@ Region code to use for the embedded DERP server. | YAML | networking.derp.regionID | | Default | 999 | -Region ID to use for the embedded DERP server. +Region ID to use for the embedded DERP server. On external proxies, a value of < 0 or 999 will use the previous (or randomly generated) value. ### --derp-server-region-name diff --git a/enterprise/audit/table.go b/enterprise/audit/table.go index 38378cf678be1..8d6d4557ef63c 100644 --- a/enterprise/audit/table.go +++ b/enterprise/audit/table.go @@ -173,6 +173,8 @@ var auditableResourcesTypes = map[any]map[string]Action{ "updated_at": ActionIgnore, "deleted": ActionIgnore, "token_hashed_secret": ActionSecret, + "derp_enabled": ActionTrack, + "region_id": ActionTrack, }, } diff --git a/enterprise/cli/proxyserver.go b/enterprise/cli/proxyserver.go index af5716424bc0e..5e437141f48fd 100644 --- a/enterprise/cli/proxyserver.go +++ b/enterprise/cli/proxyserver.go @@ -238,6 +238,7 @@ func (*RootCmd) proxyServer() *clibase.Cmd { APIRateLimit: int(cfg.RateLimit.API.Value()), SecureAuthCookie: cfg.SecureAuthCookie.Value(), DisablePathApps: cfg.DisablePathApps.Value(), + DERPEnabled: cfg.DERP.Server.Enable.Value(), ProxySessionToken: proxySessionToken.Value(), }) if err != nil { diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 7de6a943fb84a..e9d877e750fdd 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -6,9 +6,11 @@ import ( "crypto/tls" "crypto/x509" "fmt" + "math" "net/http" "net/url" "strconv" + "strings" "sync" "time" @@ -435,7 +437,7 @@ func (api *API) updateEntitlements(ctx context.Context) error { if changed, enabled := featureChanged(codersdk.FeatureWorkspaceProxy); changed { if enabled { - fn := derpMapper(api.ProxyHealth) + fn := derpMapper(api.Logger, api.ProxyHealth) api.AGPL.DERPMapper.Store(&fn) } else { api.AGPL.DERPMapper.Store(nil) @@ -447,25 +449,62 @@ func (api *API) updateEntitlements(ctx context.Context) error { return nil } -func derpMapper(proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap { +var ( + lastDerpConflictMutex sync.Mutex + lastDerpConflictLog time.Time +) + +func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *tailcfg.DERPMap { return func(derpMap *tailcfg.DERPMap) *tailcfg.DERPMap { derpMap = derpMap.Clone() + // Find the starting region ID that we'll use for proxies. This must be + // deterministic based on the derp map. + startingRegionID := 0 + for _, region := range derpMap.Regions { + if region.RegionID > startingRegionID { + startingRegionID = region.RegionID + } + } + if startingRegionID < 0 { + startingRegionID = 0 + } + if startingRegionID >= 1<<32 { + // Enforce an upper bound on the region ID. This shouldn't be hit in + // practice, but it's a good sanity check. + lastDerpConflictMutex.Lock() + shouldLog := lastDerpConflictLog.IsZero() || time.Since(lastDerpConflictLog) > time.Minute + if shouldLog { + lastDerpConflictLog = time.Now() + } + lastDerpConflictMutex.Unlock() + if shouldLog { + logger.Warn( + context.Background(), + "existing DERP region IDs are too large, proxy region IDs will not be populated in the derp map. Please ensure that all DERP region IDs are less than 2^32.", + slog.F("starting_region_id", startingRegionID), + slog.F("max_region_id", 1<<32-1), + ) + return derpMap + } + } + + // Round to the nearest 10,000 with a sufficient buffer of at least + // 2,000. + const roundStartingRegionID = 10_000 + const startingRegionIDBuffer = 2_000 + startingRegionID += startingRegionIDBuffer + startingRegionID = int(math.Ceil(float64(startingRegionID)/roundStartingRegionID) * roundStartingRegionID) + if startingRegionID < roundStartingRegionID { + startingRegionID = roundStartingRegionID + } + // Add all healthy proxies to the DERP map. - // TODO: @dean proxies should be able to disable DERP and report that - // when they register, and this should respect that. statusMap := proxyHealth.HealthStatus() + statusLoop: for _, status := range statusMap { - // TODO: @dean region ID should be constant and unique for each - // proxy. Make the proxies report these values (from a flag like the - // primary) when they register. - const ( - regionID = -999 - regionCode = "proxy" - ) - - if status.Status != proxyhealth.Healthy { - // Only add healthy proxies to the DERP map. + if status.Status != proxyhealth.Healthy || !status.Proxy.DerpEnabled { + // Only add healthy proxies with DERP enabled to the DERP map. continue } @@ -489,6 +528,40 @@ func derpMapper(proxyHealth *proxyhealth.ProxyHealth) func(*tailcfg.DERPMap) *ta continue } + // Sanity check that the region ID and code is unique. + // + // This should be impossible to hit as the IDs are enforced to be + // unique by the database and the computed ID is greater than any + // existing ID in the DERP map. + regionID := startingRegionID + int(status.Proxy.RegionID) + regionCode := fmt.Sprintf("coder_%s", strings.ToLower(status.Proxy.Name)) + for _, r := range derpMap.Regions { + if r.RegionID == regionID || r.RegionCode == regionCode { + // Log a warning if we haven't logged one in the last + // minute. + lastDerpConflictMutex.Lock() + shouldLog := lastDerpConflictLog.IsZero() || time.Since(lastDerpConflictLog) > time.Minute + if shouldLog { + lastDerpConflictLog = time.Now() + } + lastDerpConflictMutex.Unlock() + if shouldLog { + logger.Warn(context.Background(), + "proxy region ID or code conflict, ignoring workspace proxy for DERP map. Please change the flags on the affected proxy to use a different region ID and code.", + slog.F("proxy_id", status.Proxy.ID), + slog.F("proxy_name", status.Proxy.Name), + slog.F("proxy_display_name", status.Proxy.DisplayName), + slog.F("proxy_url", status.Proxy.Url), + slog.F("proxy_region_id", status.Proxy.RegionID), + slog.F("proxy_computed_region_id", regionID), + slog.F("proxy_computed_region_code", regionCode), + ) + } + + continue statusLoop + } + } + derpMap.Regions[regionID] = &tailcfg.DERPRegion{ EmbeddedRelay: true, RegionID: regionID, diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index c2bae1560a823..bd88c4f241332 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -171,10 +171,13 @@ func (api *API) postWorkspaceProxy(rw http.ResponseWriter, r *http.Request) { DisplayName: req.DisplayName, Icon: req.Icon, TokenHashedSecret: hashedSecret[:], - CreatedAt: database.Now(), - UpdatedAt: database.Now(), + // Enabled by default, but will be disabled on register if the proxy has + // it disabled. + DerpEnabled: true, + CreatedAt: database.Now(), + UpdatedAt: database.Now(), }) - if database.IsUniqueViolation(err) { + if database.IsUniqueViolation(err, database.UniqueWorkspaceProxiesLowerNameIndex) { httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ Message: fmt.Sprintf("Workspace proxy with name %q already exists.", req.Name), }) @@ -333,6 +336,7 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) _, err := api.Database.RegisterWorkspaceProxy(ctx, database.RegisterWorkspaceProxyParams{ ID: proxy.ID, Url: req.AccessURL, + DerpEnabled: req.DerpEnabled, WildcardHostname: req.WildcardHostname, }) if httpapi.Is404Error(err) { @@ -459,9 +463,11 @@ func convertProxy(p database.WorkspaceProxy, status proxyhealth.ProxyStatus) cod return codersdk.WorkspaceProxy{ ID: p.ID, Name: p.Name, + DisplayName: p.DisplayName, Icon: p.Icon, URL: p.Url, WildcardHostname: p.WildcardHostname, + DerpEnabled: p.DerpEnabled, CreatedAt: p.CreatedAt, UpdatedAt: p.UpdatedAt, Deleted: p.Deleted, diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index e03288a70726f..8b7085dde023d 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -53,13 +53,13 @@ type Options struct { AppHostnameRegex *regexp.Regexp RealIPConfig *httpmw.RealIPConfig - DERPServer *derp.Server Tracing trace.TracerProvider PrometheusRegistry *prometheus.Registry APIRateLimit int SecureAuthCookie bool DisablePathApps bool + DERPEnabled bool ProxySessionToken string } @@ -140,6 +140,7 @@ func New(ctx context.Context, opts *Options) (*Server, error) { regResp, err := client.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ AccessURL: opts.AccessURL.String(), WildcardHostname: opts.AppHostname, + DerpEnabled: opts.DERPEnabled, }) if err != nil { return nil, xerrors.Errorf("register proxy: %w", err) @@ -186,11 +187,11 @@ func New(ctx context.Context, opts *Options) (*Server, error) { SecureAuthCookie: opts.SecureAuthCookie, } - if opts.DERPServer == nil { - opts.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(opts.Logger.Named("derp"))) - } - derpHandler := derphttp.Handler(opts.DERPServer) - derpHandler, s.derpCloseFunc = tailnet.WithWebsocketSupport(opts.DERPServer, derpHandler) + derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(opts.Logger.Named("derp"))) + // TODO: mesh and derpmesh package stuff + // derpServer.SetMeshKey(regResp.DERPMeshKey) + derpHandler := derphttp.Handler(derpServer) + derpHandler, s.derpCloseFunc = tailnet.WithWebsocketSupport(derpServer, derpHandler) // Routes apiRateLimiter := httpmw.RateLimit(opts.APIRateLimit, time.Minute) diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index cd2fdf27882dc..73117beef4459 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -148,6 +148,9 @@ type RegisterWorkspaceProxyRequest struct { AccessURL string `json:"access_url"` // WildcardHostname that the workspace proxy api is serving for subdomain apps. WildcardHostname string `json:"wildcard_hostname"` + // DerpEnabled indicates whether the proxy should be included in the DERP + // map or not. + DerpEnabled bool `json:"derp_enabled"` } type RegisterWorkspaceProxyResponse struct { diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 6c3e7f0cea6bf..dd4f99e1956a1 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1258,9 +1258,11 @@ export interface WorkspaceOptions { export interface WorkspaceProxy { readonly id: string readonly name: string + readonly display_name: string readonly icon: string readonly url: string readonly wildcard_hostname: string + readonly derp_enabled: boolean readonly created_at: string readonly updated_at: string readonly deleted: boolean From 4ba7af6ad73447b60287367cfd16cff085b6320b Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 3 May 2023 18:10:10 +0000 Subject: [PATCH 03/22] progress --- coderd/apidoc/docs.go | 27 +++++ coderd/apidoc/swagger.json | 27 +++++ coderd/database/dbauthz/system.go | 7 ++ coderd/database/dbfake/databasefake.go | 15 +++ coderd/database/dump.sql | 3 +- .../000121_workspace_proxy_region_id.up.sql | 4 +- .../000122_replica_primary.down.sql | 10 ++ .../migrations/000122_replica_primary.up.sql | 2 + coderd/database/models.go | 1 + coderd/database/querier.go | 1 + coderd/database/queries.sql.go | 43 ++++++- coderd/database/queries/replicas.sql | 11 +- docs/api/schemas.md | 41 +++++-- docs/cli/server.md | 4 +- enterprise/coderd/coderd.go | 2 +- enterprise/coderd/coderdenttest/proxytest.go | 1 + enterprise/coderd/replicas.go | 2 +- enterprise/coderd/workspaceproxy.go | 113 ++++++++++++++++-- enterprise/replicasync/replicasync.go | 22 +++- enterprise/replicasync/replicasync_test.go | 8 +- enterprise/wsproxy/wsproxy.go | 14 +++ enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 24 ++++ 22 files changed, 343 insertions(+), 39 deletions(-) create mode 100644 coderd/database/migrations/000122_replica_primary.down.sql create mode 100644 coderd/database/migrations/000122_replica_primary.up.sql diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index d75b0e13e560e..5daa32422b012 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -10544,6 +10544,26 @@ const docTemplate = `{ "description": "DerpEnabled indicates whether the proxy should be included in the DERP\nmap or not.", "type": "boolean" }, + "hostname": { + "description": "ReplicaHostname is the OS hostname of the machine that the proxy is running\non. This is only used for tracking purposes in the replicas table.", + "type": "string" + }, + "replica_error": { + "description": "ReplicaError is the error that the replica encountered when trying to\ndial it's peers. This is stored in the replicas table for debugging\npurposes but does not affect the proxy's ability to register.\n\nThis value is only stored on subsequent requests to the register\nendpoint, not the first request.", + "type": "string" + }, + "replica_id": { + "description": "ReplicaID is a unique identifier for the replica of the proxy that is\nregistering. It should be generated by the client on startup and\npersisted (in memory only) until the process is restarted.", + "type": "string" + }, + "replica_relay_address": { + "description": "ReplicaRelayAddress is the DERP address of the replica that other\nreplicas may use to connect internally for DERP meshing.", + "type": "string" + }, + "version": { + "description": "Version is the Coder version of the proxy.", + "type": "string" + }, "wildcard_hostname": { "description": "WildcardHostname that the workspace proxy api is serving for subdomain apps.", "type": "string" @@ -10555,6 +10575,13 @@ const docTemplate = `{ "properties": { "app_security_key": { "type": "string" + }, + "sibling_replicas": { + "description": "SiblingReplicas is a list of all other replicas of the proxy that have\nnot timed out.", + "type": "array", + "items": { + "$ref": "#/definitions/codersdk.Replica" + } } } } diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 67c7bd2a68d9c..0b715087ea08e 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -9562,6 +9562,26 @@ "description": "DerpEnabled indicates whether the proxy should be included in the DERP\nmap or not.", "type": "boolean" }, + "hostname": { + "description": "ReplicaHostname is the OS hostname of the machine that the proxy is running\non. This is only used for tracking purposes in the replicas table.", + "type": "string" + }, + "replica_error": { + "description": "ReplicaError is the error that the replica encountered when trying to\ndial it's peers. This is stored in the replicas table for debugging\npurposes but does not affect the proxy's ability to register.\n\nThis value is only stored on subsequent requests to the register\nendpoint, not the first request.", + "type": "string" + }, + "replica_id": { + "description": "ReplicaID is a unique identifier for the replica of the proxy that is\nregistering. It should be generated by the client on startup and\npersisted (in memory only) until the process is restarted.", + "type": "string" + }, + "replica_relay_address": { + "description": "ReplicaRelayAddress is the DERP address of the replica that other\nreplicas may use to connect internally for DERP meshing.", + "type": "string" + }, + "version": { + "description": "Version is the Coder version of the proxy.", + "type": "string" + }, "wildcard_hostname": { "description": "WildcardHostname that the workspace proxy api is serving for subdomain apps.", "type": "string" @@ -9573,6 +9593,13 @@ "properties": { "app_security_key": { "type": "string" + }, + "sibling_replicas": { + "description": "SiblingReplicas is a list of all other replicas of the proxy that have\nnot timed out.", + "type": "array", + "items": { + "$ref": "#/definitions/codersdk.Replica" + } } } } diff --git a/coderd/database/dbauthz/system.go b/coderd/database/dbauthz/system.go index 1252788f375ce..fe1f65c4d9c44 100644 --- a/coderd/database/dbauthz/system.go +++ b/coderd/database/dbauthz/system.go @@ -154,6 +154,13 @@ func (q *querier) InsertDeploymentID(ctx context.Context, value string) error { return q.db.InsertDeploymentID(ctx, value) } +func (q *querier) GetReplicaByID(ctx context.Context, id uuid.UUID) (database.Replica, error) { + if err := q.authorizeContext(ctx, rbac.ActionRead, rbac.ResourceSystem); err != nil { + return database.Replica{}, err + } + return q.db.GetReplicaByID(ctx, id) +} + func (q *querier) InsertReplica(ctx context.Context, arg database.InsertReplicaParams) (database.Replica, error) { if err := q.authorizeContext(ctx, rbac.ActionCreate, rbac.ResourceSystem); err != nil { return database.Replica{}, err diff --git a/coderd/database/dbfake/databasefake.go b/coderd/database/dbfake/databasefake.go index ffdf01e5afaf6..95d16564c7af8 100644 --- a/coderd/database/dbfake/databasefake.go +++ b/coderd/database/dbfake/databasefake.go @@ -4908,6 +4908,19 @@ func (q *fakeQuerier) DeleteReplicasUpdatedBefore(_ context.Context, before time return nil } +func (q *fakeQuerier) GetReplicaByID(_ context.Context, id uuid.UUID) (database.Replica, error) { + q.mutex.RLock() + defer q.mutex.RUnlock() + + for _, replica := range q.replicas { + if replica.ID == id { + return replica, nil + } + } + + return database.Replica{}, sql.ErrNoRows +} + func (q *fakeQuerier) InsertReplica(_ context.Context, arg database.InsertReplicaParams) (database.Replica, error) { if err := validateDatabaseType(arg); err != nil { return database.Replica{}, err @@ -4926,6 +4939,7 @@ func (q *fakeQuerier) InsertReplica(_ context.Context, arg database.InsertReplic RelayAddress: arg.RelayAddress, Version: arg.Version, DatabaseLatency: arg.DatabaseLatency, + Primary: arg.Primary, } q.replicas = append(q.replicas, replica) return replica, nil @@ -4952,6 +4966,7 @@ func (q *fakeQuerier) UpdateReplica(_ context.Context, arg database.UpdateReplic replica.Version = arg.Version replica.Error = arg.Error replica.DatabaseLatency = arg.DatabaseLatency + replica.Primary = arg.Primary q.replicas[index] = replica return replica, nil } diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 650bb609d3bbe..e9e34553e3cf2 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -360,7 +360,8 @@ CREATE TABLE replicas ( relay_address text NOT NULL, database_latency integer NOT NULL, version text NOT NULL, - error text DEFAULT ''::text NOT NULL + error text DEFAULT ''::text NOT NULL, + "primary" boolean DEFAULT true NOT NULL ); CREATE TABLE site_configs ( diff --git a/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql b/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql index 9964b2110b09c..727926362b2bd 100644 --- a/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql +++ b/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql @@ -1,6 +1,6 @@ ALTER TABLE workspace_proxies - -- adding a serial to a table without a default value will be filled as you - -- would expect + -- Adding a serial to a table without a default value will be filled as you + -- would expect on versions of Postgres >= 9 AFAIK (which we require). ADD COLUMN region_id serial NOT NULL, ADD COLUMN derp_enabled boolean NOT NULL DEFAULT true, ADD CONSTRAINT workspace_proxies_region_id_unique UNIQUE (region_id); diff --git a/coderd/database/migrations/000122_replica_primary.down.sql b/coderd/database/migrations/000122_replica_primary.down.sql new file mode 100644 index 0000000000000..ad84fc48681e9 --- /dev/null +++ b/coderd/database/migrations/000122_replica_primary.down.sql @@ -0,0 +1,10 @@ +BEGIN; + +-- drop any rows that aren't primary replicas +DELETE FROM replicas + WHERE "primary" = false; + +ALTER TABLE replicas + DROP COLUMN "primary"; + +COMMIT; diff --git a/coderd/database/migrations/000122_replica_primary.up.sql b/coderd/database/migrations/000122_replica_primary.up.sql new file mode 100644 index 0000000000000..1f3081d1789a7 --- /dev/null +++ b/coderd/database/migrations/000122_replica_primary.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE replicas + ADD COLUMN "primary" boolean NOT NULL DEFAULT true; diff --git a/coderd/database/models.go b/coderd/database/models.go index babbb39548dfe..12eee12d8f5c2 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1404,6 +1404,7 @@ type Replica struct { DatabaseLatency int32 `db:"database_latency" json:"database_latency"` Version string `db:"version" json:"version"` Error string `db:"error" json:"error"` + Primary bool `db:"primary" json:"primary"` } type SiteConfig struct { diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 01b79c0ae1f0f..a197f172f8168 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -96,6 +96,7 @@ type sqlcQuerier interface { GetProvisionerLogsAfterID(ctx context.Context, arg GetProvisionerLogsAfterIDParams) ([]ProvisionerJobLog, error) GetQuotaAllowanceForUser(ctx context.Context, userID uuid.UUID) (int64, error) GetQuotaConsumedForUser(ctx context.Context, ownerID uuid.UUID) (int64, error) + GetReplicaByID(ctx context.Context, id uuid.UUID) (Replica, error) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error) GetServiceBanner(ctx context.Context) (string, error) GetTemplateAverageBuildTime(ctx context.Context, arg GetTemplateAverageBuildTimeParams) (GetTemplateAverageBuildTimeRow, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index bbe11cf399835..7b7571339ddf7 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -3167,8 +3167,32 @@ func (q *sqlQuerier) DeleteReplicasUpdatedBefore(ctx context.Context, updatedAt return err } +const getReplicaByID = `-- name: GetReplicaByID :one +SELECT id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error, "primary" FROM replicas WHERE id = $1 +` + +func (q *sqlQuerier) GetReplicaByID(ctx context.Context, id uuid.UUID) (Replica, error) { + row := q.db.QueryRowContext(ctx, getReplicaByID, id) + var i Replica + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.StartedAt, + &i.StoppedAt, + &i.UpdatedAt, + &i.Hostname, + &i.RegionID, + &i.RelayAddress, + &i.DatabaseLatency, + &i.Version, + &i.Error, + &i.Primary, + ) + return i, err +} + const getReplicasUpdatedAfter = `-- name: GetReplicasUpdatedAfter :many -SELECT id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error FROM replicas WHERE updated_at > $1 AND stopped_at IS NULL +SELECT id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error, "primary" FROM replicas WHERE updated_at > $1 AND stopped_at IS NULL ` func (q *sqlQuerier) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error) { @@ -3192,6 +3216,7 @@ func (q *sqlQuerier) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time &i.DatabaseLatency, &i.Version, &i.Error, + &i.Primary, ); err != nil { return nil, err } @@ -3216,8 +3241,9 @@ INSERT INTO replicas ( region_id, relay_address, version, - database_latency -) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error + database_latency, + "primary" +) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error, "primary" ` type InsertReplicaParams struct { @@ -3230,6 +3256,7 @@ type InsertReplicaParams struct { RelayAddress string `db:"relay_address" json:"relay_address"` Version string `db:"version" json:"version"` DatabaseLatency int32 `db:"database_latency" json:"database_latency"` + Primary bool `db:"primary" json:"primary"` } func (q *sqlQuerier) InsertReplica(ctx context.Context, arg InsertReplicaParams) (Replica, error) { @@ -3243,6 +3270,7 @@ func (q *sqlQuerier) InsertReplica(ctx context.Context, arg InsertReplicaParams) arg.RelayAddress, arg.Version, arg.DatabaseLatency, + arg.Primary, ) var i Replica err := row.Scan( @@ -3257,6 +3285,7 @@ func (q *sqlQuerier) InsertReplica(ctx context.Context, arg InsertReplicaParams) &i.DatabaseLatency, &i.Version, &i.Error, + &i.Primary, ) return i, err } @@ -3271,8 +3300,9 @@ UPDATE replicas SET hostname = $7, version = $8, error = $9, - database_latency = $10 -WHERE id = $1 RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error + database_latency = $10, + "primary" = $11 +WHERE id = $1 RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error, "primary" ` type UpdateReplicaParams struct { @@ -3286,6 +3316,7 @@ type UpdateReplicaParams struct { Version string `db:"version" json:"version"` Error string `db:"error" json:"error"` DatabaseLatency int32 `db:"database_latency" json:"database_latency"` + Primary bool `db:"primary" json:"primary"` } func (q *sqlQuerier) UpdateReplica(ctx context.Context, arg UpdateReplicaParams) (Replica, error) { @@ -3300,6 +3331,7 @@ func (q *sqlQuerier) UpdateReplica(ctx context.Context, arg UpdateReplicaParams) arg.Version, arg.Error, arg.DatabaseLatency, + arg.Primary, ) var i Replica err := row.Scan( @@ -3314,6 +3346,7 @@ func (q *sqlQuerier) UpdateReplica(ctx context.Context, arg UpdateReplicaParams) &i.DatabaseLatency, &i.Version, &i.Error, + &i.Primary, ) return i, err } diff --git a/coderd/database/queries/replicas.sql b/coderd/database/queries/replicas.sql index e87c1f46432f2..5a0b4ac0fe95e 100644 --- a/coderd/database/queries/replicas.sql +++ b/coderd/database/queries/replicas.sql @@ -1,6 +1,9 @@ -- name: GetReplicasUpdatedAfter :many SELECT * FROM replicas WHERE updated_at > $1 AND stopped_at IS NULL; +-- name: GetReplicaByID :one +SELECT * FROM replicas WHERE id = $1; + -- name: InsertReplica :one INSERT INTO replicas ( id, @@ -11,8 +14,9 @@ INSERT INTO replicas ( region_id, relay_address, version, - database_latency -) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING *; + database_latency, + "primary" +) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING *; -- name: UpdateReplica :one UPDATE replicas SET @@ -24,7 +28,8 @@ UPDATE replicas SET hostname = $7, version = $8, error = $9, - database_latency = $10 + database_latency = $10, + "primary" = $11 WHERE id = $1 RETURNING *; -- name: DeleteReplicasUpdatedBefore :exec diff --git a/docs/api/schemas.md b/docs/api/schemas.md index db4e53f027e02..9c2566588757e 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -6663,28 +6663,51 @@ _None_ { "access_url": "string", "derp_enabled": true, + "hostname": "string", + "replica_error": "string", + "replica_id": "string", + "replica_relay_address": "string", + "version": "string", "wildcard_hostname": "string" } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------------- | ------- | -------- | ------------ | ----------------------------------------------------------------------------------- | -| `access_url` | string | false | | Access URL that hits the workspace proxy api. | -| `derp_enabled` | boolean | false | | Derp enabled indicates whether the proxy should be included in the DERP map or not. | -| `wildcard_hostname` | string | false | | Wildcard hostname that the workspace proxy api is serving for subdomain apps. | +| Name | Type | Required | Restrictions | Description | +| ------------------------------------------------------------------------------------------------- | ------- | -------- | ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `access_url` | string | false | | Access URL that hits the workspace proxy api. | +| `derp_enabled` | boolean | false | | Derp enabled indicates whether the proxy should be included in the DERP map or not. | +| `hostname` | string | false | | Hostname is the OS hostname of the machine that the proxy is running on. This is only used for tracking purposes in the replicas table. | +| `replica_error` | string | false | | Replica error is the error that the replica encountered when trying to dial it's peers. This is stored in the replicas table for debugging purposes but does not affect the proxy's ability to register. | +| This value is only stored on subsequent requests to the register endpoint, not the first request. | +| `replica_id` | string | false | | Replica ID is a unique identifier for the replica of the proxy that is registering. It should be generated by the client on startup and persisted (in memory only) until the process is restarted. | +| `replica_relay_address` | string | false | | Replica relay address is the DERP address of the replica that other replicas may use to connect internally for DERP meshing. | +| `version` | string | false | | Version is the Coder version of the proxy. | +| `wildcard_hostname` | string | false | | Wildcard hostname that the workspace proxy api is serving for subdomain apps. | ## wsproxysdk.RegisterWorkspaceProxyResponse ```json { - "app_security_key": "string" + "app_security_key": "string", + "sibling_replicas": [ + { + "created_at": "2019-08-24T14:15:22Z", + "database_latency": 0, + "error": "string", + "hostname": "string", + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "region_id": 0, + "relay_address": "string" + } + ] } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------------ | ------ | -------- | ------------ | ----------- | -| `app_security_key` | string | false | | | +| Name | Type | Required | Restrictions | Description | +| ------------------ | --------------------------------------------- | -------- | ------------ | -------------------------------------------------------------------------------------- | +| `app_security_key` | string | false | | | +| `sibling_replicas` | array of [codersdk.Replica](#codersdkreplica) | false | | Sibling replicas is a list of all other replicas of the proxy that have not timed out. | diff --git a/docs/cli/server.md b/docs/cli/server.md index 887281006c504..3cbcafe9bbc35 100644 --- a/docs/cli/server.md +++ b/docs/cli/server.md @@ -128,7 +128,7 @@ Whether to enable or disable the embedded DERP relay server. | YAML | networking.derp.regionCode | | Default | coder | -Region code to use for the embedded DERP server. On external proxies, an empty string or "coder" will use the previous (or randomly generated) value. +Region code to use for the embedded DERP server. ### --derp-server-region-id @@ -139,7 +139,7 @@ Region code to use for the embedded DERP server. On external proxies, an empty s | YAML | networking.derp.regionID | | Default | 999 | -Region ID to use for the embedded DERP server. On external proxies, a value of < 0 or 999 will use the previous (or randomly generated) value. +Region ID to use for the embedded DERP server. ### --derp-server-region-name diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 71b43d4ddebb0..c97a0e3550de7 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -318,7 +318,7 @@ func (api *API) updateEntitlements(ctx context.Context) error { entitlements, err := license.Entitlements( ctx, api.Database, - api.Logger, len(api.replicaManager.All()), len(api.GitAuthConfigs), api.Keys, map[codersdk.FeatureName]bool{ + api.Logger, len(api.replicaManager.AllPrimary()), len(api.GitAuthConfigs), api.Keys, map[codersdk.FeatureName]bool{ codersdk.FeatureAuditLog: api.AuditLogging, codersdk.FeatureBrowserOnly: api.BrowserOnly, codersdk.FeatureSCIM: len(api.SCIMAPIKey) != 0, diff --git a/enterprise/coderd/coderdenttest/proxytest.go b/enterprise/coderd/coderdenttest/proxytest.go index 6b517a5994ca4..784cc7cdd4e00 100644 --- a/enterprise/coderd/coderdenttest/proxytest.go +++ b/enterprise/coderd/coderdenttest/proxytest.go @@ -55,6 +55,7 @@ func NewWorkspaceProxy(t *testing.T, coderdAPI *coderd.API, owner *codersdk.Clie defer mutex.RUnlock() if handler == nil { http.Error(w, "handler not set", http.StatusServiceUnavailable) + return } handler.ServeHTTP(w, r) diff --git a/enterprise/coderd/replicas.go b/enterprise/coderd/replicas.go index b5c5af2743385..77e0c45aeff2c 100644 --- a/enterprise/coderd/replicas.go +++ b/enterprise/coderd/replicas.go @@ -24,7 +24,7 @@ func (api *API) replicas(rw http.ResponseWriter, r *http.Request) { return } - replicas := api.replicaManager.All() + replicas := api.replicaManager.AllPrimary() res := make([]codersdk.Replica, 0, len(replicas)) for _, replica := range replicas { res = append(res, convertReplica(replica)) diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index b2a8ff5230444..62d6373c71bd2 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -14,6 +14,7 @@ import ( "golang.org/x/xerrors" "cdr.dev/slog" + "github.com/coder/coder/buildinfo" agpl "github.com/coder/coder/coderd" "github.com/coder/coder/coderd/audit" "github.com/coder/coder/coderd/database" @@ -25,6 +26,7 @@ import ( "github.com/coder/coder/codersdk" "github.com/coder/coder/cryptorand" "github.com/coder/coder/enterprise/coderd/proxyhealth" + "github.com/coder/coder/enterprise/replicasync" "github.com/coder/coder/enterprise/wsproxy/wsproxysdk" ) @@ -314,6 +316,10 @@ func (api *API) workspaceProxyIssueSignedAppToken(rw http.ResponseWriter, r *htt // in the database and returns a signed token that can be used to authenticate // tokens. // +// This is called periodically by the proxy in the background (once per minute +// per replica) to ensure that the proxy is still registered and the +// corresponding replica table entry is refreshed. +// // @Summary Register workspace proxy // @ID register-workspace-proxy // @Security CoderSessionToken @@ -335,6 +341,14 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) return } + if req.Version != buildinfo.Version() { + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Version mismatch.", + Detail: fmt.Sprintf("Proxy version %q does not match primary server version %q", req.Version, buildinfo.Version()), + }) + return + } + if err := validateProxyURL(req.AccessURL); err != nil { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: "URL is invalid.", @@ -353,12 +367,75 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) } } - _, err := api.Database.RegisterWorkspaceProxy(ctx, database.RegisterWorkspaceProxyParams{ - ID: proxy.ID, - Url: req.AccessURL, - DerpEnabled: req.DerpEnabled, - WildcardHostname: req.WildcardHostname, - }) + // TODO: get region ID + var regionID int32 = 1234 + + err := api.Database.InTx(func(db database.Store) error { + // First, update the proxy's values in the database. + _, err := db.RegisterWorkspaceProxy(ctx, database.RegisterWorkspaceProxyParams{ + ID: proxy.ID, + Url: req.AccessURL, + DerpEnabled: req.DerpEnabled, + WildcardHostname: req.WildcardHostname, + }) + if err != nil { + return xerrors.Errorf("register workspace proxy: %w", err) + } + + // Second, find the replica that corresponds to this proxy and refresh + // it if it exists. If it doesn't exist, create it. + now := time.Now() + replica, err := db.GetReplicaByID(ctx, req.ReplicaID) + if err == nil { + // Replica exists, update it. + if replica.StoppedAt.Valid && !replica.StartedAt.IsZero() { + // If the replica deregistered, it shouldn't be able to + // re-register before restarting. + // TODO: sadly this results in 500 + return xerrors.Errorf("replica %s is stopped but not deregistered", replica.ID) + } + + replica, err = db.UpdateReplica(ctx, database.UpdateReplicaParams{ + ID: replica.ID, + UpdatedAt: now, + StartedAt: replica.StartedAt, + StoppedAt: replica.StoppedAt, + RelayAddress: req.ReplicaRelayAddress, + RegionID: regionID, + Hostname: req.ReplicaHostname, + Version: req.Version, + Error: req.ReplicaError, + DatabaseLatency: 0, + Primary: false, + }) + if err != nil { + return xerrors.Errorf("update replica: %w", err) + } + } + if xerrors.Is(err, sql.ErrNoRows) { + // Replica doesn't exist, create it. + replica, err = db.InsertReplica(ctx, database.InsertReplicaParams{ + ID: req.ReplicaID, + CreatedAt: now, + StartedAt: now, + UpdatedAt: now, + Hostname: req.ReplicaHostname, + RegionID: regionID, + RelayAddress: req.ReplicaRelayAddress, + Version: req.Version, + DatabaseLatency: 0, + Primary: false, + }) + if err != nil { + return xerrors.Errorf("insert replica: %w", err) + } + } + if err != nil { + return xerrors.Errorf("get replica: %w", err) + } + + return nil + }, nil) if httpapi.Is404Error(err) { httpapi.ResourceNotFound(rw) return @@ -368,8 +445,27 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) return } + // Publish a replicasync event with a nil ID so every replica (yes, even the + // current replica) will refresh its replicas list. + err = api.Pubsub.Publish(replicasync.PubsubEvent, []byte(uuid.Nil.String())) + if err != nil { + httpapi.InternalServerError(rw, err) + return + } + + // Find sibling regions to respond with for derpmesh. + siblings := api.replicaManager.InRegion(regionID) + siblingsRes := make([]codersdk.Replica, 0, len(siblings)) + for _, replica := range siblings { + if replica.ID == req.ReplicaID { + continue + } + siblingsRes = append(siblingsRes, convertReplica(replica)) + } + httpapi.Write(ctx, rw, http.StatusCreated, wsproxysdk.RegisterWorkspaceProxyResponse{ - AppSecurityKey: api.AppSecurityKey.String(), + AppSecurityKey: api.AppSecurityKey.String(), + SiblingReplicas: siblingsRes, }) go api.forceWorkspaceProxyHealthUpdate(api.ctx) @@ -455,7 +551,8 @@ func (api *API) reconnectingPTYSignedToken(rw http.ResponseWriter, r *http.Reque }, SessionToken: httpmw.APITokenFromRequest(r), // The following fields aren't required as long as the request is authed - // with a valid API key. + // with a valid API key, which we know since this endpoint is protected + // by auth middleware already. PathAppBaseURL: "", AppHostname: "", // The following fields are empty for terminal apps. diff --git a/enterprise/replicasync/replicasync.go b/enterprise/replicasync/replicasync.go index 4b31b912ea673..a4cfd8840c282 100644 --- a/enterprise/replicasync/replicasync.go +++ b/enterprise/replicasync/replicasync.go @@ -73,6 +73,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, pubsub data RelayAddress: options.RelayAddress, Version: buildinfo.Version(), DatabaseLatency: int32(databaseLatency.Microseconds()), + Primary: true, }) if err != nil { return nil, xerrors.Errorf("insert replica: %w", err) @@ -298,6 +299,7 @@ func (m *Manager) syncReplicas(ctx context.Context) error { Version: m.self.Version, Error: replicaError, DatabaseLatency: int32(databaseLatency.Microseconds()), + Primary: m.self.Primary, }) if err != nil { return xerrors.Errorf("update replica: %w", err) @@ -323,12 +325,17 @@ func (m *Manager) Self() database.Replica { return m.self } -// All returns every replica, including itself. -func (m *Manager) All() []database.Replica { +// AllPrimary returns every primary replica (not workspace proxy replicas), +// including itself. +func (m *Manager) AllPrimary() []database.Replica { m.mutex.Lock() defer m.mutex.Unlock() replicas := make([]database.Replica, 0, len(m.peers)) for _, replica := range append(m.peers, m.self) { + if !replica.Primary { + continue + } + // When we assign the non-pointer to a // variable it loses the reference. replica := replica @@ -337,13 +344,13 @@ func (m *Manager) All() []database.Replica { return replicas } -// Regional returns all replicas in the same region excluding itself. -func (m *Manager) Regional() []database.Replica { +// InRegion returns every replica in the given DERP region excluding itself. +func (m *Manager) InRegion(regionID int32) []database.Replica { m.mutex.Lock() defer m.mutex.Unlock() replicas := make([]database.Replica, 0) for _, replica := range m.peers { - if replica.RegionID != m.self.RegionID { + if replica.RegionID != regionID { continue } replicas = append(replicas, replica) @@ -351,6 +358,11 @@ func (m *Manager) Regional() []database.Replica { return replicas } +// Regional returns all replicas in the same region excluding itself. +func (m *Manager) Regional() []database.Replica { + return m.InRegion(m.self.RegionID) +} + // SetCallback sets a function to execute whenever new peers // are refreshed or updated. func (m *Manager) SetCallback(callback func()) { diff --git a/enterprise/replicasync/replicasync_test.go b/enterprise/replicasync/replicasync_test.go index 49890524c9d78..6866a056009ec 100644 --- a/enterprise/replicasync/replicasync_test.go +++ b/enterprise/replicasync/replicasync_test.go @@ -60,6 +60,7 @@ func TestReplica(t *testing.T) { UpdatedAt: database.Now(), Hostname: "something", RelayAddress: srv.URL, + Primary: true, }) require.NoError(t, err) server, err := replicasync.New(context.Background(), slogtest.Make(t, nil), db, pubsub, &replicasync.Options{ @@ -100,6 +101,7 @@ func TestReplica(t *testing.T) { UpdatedAt: database.Now(), Hostname: "something", RelayAddress: srv.URL, + Primary: true, }) require.NoError(t, err) server, err := replicasync.New(context.Background(), slogtest.Make(t, nil), db, pubsub, &replicasync.Options{ @@ -123,6 +125,7 @@ func TestReplica(t *testing.T) { Hostname: "something", // Fake address to dial! RelayAddress: "http://127.0.0.1:1", + Primary: true, }) require.NoError(t, err) server, err := replicasync.New(context.Background(), slogtest.Make(t, nil), db, pubsub, &replicasync.Options{ @@ -150,6 +153,7 @@ func TestReplica(t *testing.T) { ID: uuid.New(), RelayAddress: srv.URL, UpdatedAt: database.Now(), + Primary: true, }) require.NoError(t, err) // Publish multiple times to ensure it can handle that case. @@ -168,6 +172,7 @@ func TestReplica(t *testing.T) { _, err := db.InsertReplica(context.Background(), database.InsertReplicaParams{ ID: uuid.New(), UpdatedAt: database.Now().Add(-time.Hour), + Primary: true, }) require.NoError(t, err) server, err := replicasync.New(context.Background(), slogtest.Make(t, nil), db, pubsub, &replicasync.Options{ @@ -211,8 +216,7 @@ func TestReplica(t *testing.T) { server.SetCallback(func() { m.Lock() defer m.Unlock() - - if len(server.All()) != count { + if len(server.AllPrimary()) != count { return } if done { diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index a39572ee8b8ab..a32886d7e32f0 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -5,6 +5,7 @@ import ( "fmt" "net/http" "net/url" + "os" "reflect" "regexp" "strings" @@ -138,10 +139,23 @@ func New(ctx context.Context, opts *Options) (*Server, error) { return nil, xerrors.Errorf("%q is a workspace proxy, not a primary coderd instance", opts.DashboardURL) } + // TODO: registering logic need to be moved to a struct that calls it + // periodically + replicaID := uuid.New() + osHostname, err := os.Hostname() + if err != nil { + return nil, xerrors.Errorf("get OS hostname: %w", err) + } regResp, err := client.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ AccessURL: opts.AccessURL.String(), WildcardHostname: opts.AppHostname, DerpEnabled: opts.DERPEnabled, + ReplicaID: replicaID, + ReplicaHostname: osHostname, + ReplicaError: "", + // TODO: replica relay address + ReplicaRelayAddress: "", + Version: buildinfo.Version(), }) if err != nil { return nil, xerrors.Errorf("register proxy: %w", err) diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index 73117beef4459..b3b9e4135f896 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -151,10 +151,34 @@ type RegisterWorkspaceProxyRequest struct { // DerpEnabled indicates whether the proxy should be included in the DERP // map or not. DerpEnabled bool `json:"derp_enabled"` + + // ReplicaID is a unique identifier for the replica of the proxy that is + // registering. It should be generated by the client on startup and + // persisted (in memory only) until the process is restarted. + ReplicaID uuid.UUID `json:"replica_id"` + // ReplicaHostname is the OS hostname of the machine that the proxy is running + // on. This is only used for tracking purposes in the replicas table. + ReplicaHostname string `json:"hostname"` + // ReplicaError is the error that the replica encountered when trying to + // dial it's peers. This is stored in the replicas table for debugging + // purposes but does not affect the proxy's ability to register. + // + // This value is only stored on subsequent requests to the register + // endpoint, not the first request. + ReplicaError string `json:"replica_error"` + // ReplicaRelayAddress is the DERP address of the replica that other + // replicas may use to connect internally for DERP meshing. + ReplicaRelayAddress string `json:"replica_relay_address"` + + // Version is the Coder version of the proxy. + Version string `json:"version"` } type RegisterWorkspaceProxyResponse struct { AppSecurityKey string `json:"app_security_key"` + // SiblingReplicas is a list of all other replicas of the proxy that have + // not timed out. + SiblingReplicas []codersdk.Replica `json:"sibling_replicas"` } func (c *Client) RegisterWorkspaceProxy(ctx context.Context, req RegisterWorkspaceProxyRequest) (RegisterWorkspaceProxyResponse, error) { From dcf072ebc1a6d64377082f6eb589017e5f76dbb3 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Thu, 4 May 2023 21:06:23 +0000 Subject: [PATCH 04/22] derp mesh probably working --- enterprise/cli/proxyserver.go | 37 ++--- enterprise/coderd/coderdenttest/proxytest.go | 5 + enterprise/coderd/workspaceproxy.go | 5 +- enterprise/wsproxy/wsproxy.go | 144 ++++++++++++++----- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 135 +++++++++++++++++ 5 files changed, 276 insertions(+), 50 deletions(-) diff --git a/enterprise/cli/proxyserver.go b/enterprise/cli/proxyserver.go index 5e437141f48fd..c99e046a67dd9 100644 --- a/enterprise/cli/proxyserver.go +++ b/enterprise/cli/proxyserver.go @@ -225,22 +225,27 @@ func (*RootCmd) proxyServer() *clibase.Cmd { closers.Add(closeFunc) } - proxy, err := wsproxy.New(ctx, &wsproxy.Options{ - Logger: logger, - HTTPClient: httpClient, - DashboardURL: primaryAccessURL.Value(), - AccessURL: cfg.AccessURL.Value(), - AppHostname: appHostname, - AppHostnameRegex: appHostnameRegex, - RealIPConfig: realIPConfig, - Tracing: tracer, - PrometheusRegistry: prometheusRegistry, - APIRateLimit: int(cfg.RateLimit.API.Value()), - SecureAuthCookie: cfg.SecureAuthCookie.Value(), - DisablePathApps: cfg.DisablePathApps.Value(), - DERPEnabled: cfg.DERP.Server.Enable.Value(), - ProxySessionToken: proxySessionToken.Value(), - }) + opts := &wsproxy.Options{ + Logger: logger, + HTTPClient: httpClient, + DashboardURL: primaryAccessURL.Value(), + AccessURL: cfg.AccessURL.Value(), + AppHostname: appHostname, + AppHostnameRegex: appHostnameRegex, + RealIPConfig: realIPConfig, + Tracing: tracer, + PrometheusRegistry: prometheusRegistry, + APIRateLimit: int(cfg.RateLimit.API.Value()), + SecureAuthCookie: cfg.SecureAuthCookie.Value(), + DisablePathApps: cfg.DisablePathApps.Value(), + DERPEnabled: cfg.DERP.Server.Enable.Value(), + DERPServerRelayAddress: cfg.DERP.Server.RelayURL.String(), + ProxySessionToken: proxySessionToken.Value(), + } + if httpServers.TLSConfig != nil { + opts.TLSCertificates = httpServers.TLSConfig.Certificates + } + proxy, err := wsproxy.New(ctx, opts) if err != nil { return xerrors.Errorf("create workspace proxy: %w", err) } diff --git a/enterprise/coderd/coderdenttest/proxytest.go b/enterprise/coderd/coderdenttest/proxytest.go index 784cc7cdd4e00..6e1364b2c4a9e 100644 --- a/enterprise/coderd/coderdenttest/proxytest.go +++ b/enterprise/coderd/coderdenttest/proxytest.go @@ -14,6 +14,7 @@ import ( "github.com/moby/moby/pkg/namesgenerator" "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "cdr.dev/slog" @@ -131,6 +132,10 @@ func NewWorkspaceProxy(t *testing.T, coderdAPI *coderd.API, owner *codersdk.Clie PrometheusRegistry: prometheus.NewRegistry(), }) require.NoError(t, err) + t.Cleanup(func() { + err := wssrv.Close() + assert.NoError(t, err) + }) mutex.Lock() handler = wssrv.Handler diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 62d6373c71bd2..2a1558cc044b9 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -391,8 +391,8 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) if replica.StoppedAt.Valid && !replica.StartedAt.IsZero() { // If the replica deregistered, it shouldn't be able to // re-register before restarting. - // TODO: sadly this results in 500 - return xerrors.Errorf("replica %s is stopped but not deregistered", replica.ID) + // TODO: sadly this results in 500 when it should be 400 + return xerrors.Errorf("replica %s is marked stopped", replica.ID) } replica, err = db.UpdateReplica(ctx, database.UpdateReplicaParams{ @@ -465,6 +465,7 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) httpapi.Write(ctx, rw, http.StatusCreated, wsproxysdk.RegisterWorkspaceProxyResponse{ AppSecurityKey: api.AppSecurityKey.String(), + DERPMeshKey: api.DERPServer.MeshKey(), SiblingReplicas: siblingsRes, }) diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index a32886d7e32f0..73d20efd13ea4 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -2,6 +2,8 @@ package wsproxy import ( "context" + "crypto/tls" + "crypto/x509" "fmt" "net/http" "net/url" @@ -13,6 +15,7 @@ import ( "github.com/go-chi/chi/v5" "github.com/google/uuid" + "github.com/hashicorp/go-multierror" "github.com/prometheus/client_golang/prometheus" "go.opentelemetry.io/otel/trace" "golang.org/x/xerrors" @@ -28,6 +31,7 @@ import ( "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/coderd/wsconncache" "github.com/coder/coder/codersdk" + "github.com/coder/coder/enterprise/derpmesh" "github.com/coder/coder/enterprise/wsproxy/wsproxysdk" "github.com/coder/coder/site" "github.com/coder/coder/tailnet" @@ -57,11 +61,13 @@ type Options struct { RealIPConfig *httpmw.RealIPConfig Tracing trace.TracerProvider PrometheusRegistry *prometheus.Registry + TLSCertificates []tls.Certificate - APIRateLimit int - SecureAuthCookie bool - DisablePathApps bool - DERPEnabled bool + APIRateLimit int + SecureAuthCookie bool + DisablePathApps bool + DERPEnabled bool + DERPServerRelayAddress string ProxySessionToken string } @@ -101,10 +107,14 @@ type Server struct { // the moon's token. SDKClient *wsproxysdk.Client + // DERP + derpMesh *derpmesh.Mesh + // Used for graceful shutdown. Required for the dialer. ctx context.Context cancel context.CancelFunc derpCloseFunc func() + registerDone <-chan struct{} } // New creates a new workspace proxy server. This requires a primary coderd @@ -139,35 +149,33 @@ func New(ctx context.Context, opts *Options) (*Server, error) { return nil, xerrors.Errorf("%q is a workspace proxy, not a primary coderd instance", opts.DashboardURL) } - // TODO: registering logic need to be moved to a struct that calls it - // periodically - replicaID := uuid.New() - osHostname, err := os.Hostname() - if err != nil { - return nil, xerrors.Errorf("get OS hostname: %w", err) + meshRootCA := x509.NewCertPool() + for _, certificate := range opts.TLSCertificates { + for _, certificatePart := range certificate.Certificate { + certificate, err := x509.ParseCertificate(certificatePart) + if err != nil { + return nil, xerrors.Errorf("parse certificate %s: %w", certificate.Subject.CommonName, err) + } + meshRootCA.AddCert(certificate) + } } - regResp, err := client.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ - AccessURL: opts.AccessURL.String(), - WildcardHostname: opts.AppHostname, - DerpEnabled: opts.DERPEnabled, - ReplicaID: replicaID, - ReplicaHostname: osHostname, - ReplicaError: "", - // TODO: replica relay address - ReplicaRelayAddress: "", - Version: buildinfo.Version(), - }) - if err != nil { - return nil, xerrors.Errorf("register proxy: %w", err) + // This TLS configuration spoofs access from the access URL hostname + // assuming that the certificates provided will cover that hostname. + // + // Replica sync and DERP meshing require accessing replicas via their + // internal IP addresses, and if TLS is configured we use the same + // certificates. + meshTLSConfig := &tls.Config{ + MinVersion: tls.VersionTLS12, + Certificates: opts.TLSCertificates, + RootCAs: meshRootCA, + ServerName: opts.AccessURL.Hostname(), } - secKey, err := workspaceapps.KeyFromString(regResp.AppSecurityKey) - if err != nil { - return nil, xerrors.Errorf("parse app security key: %w", err) - } + derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(opts.Logger.Named("derp"))) - r := chi.NewRouter() ctx, cancel := context.WithCancel(context.Background()) + r := chi.NewRouter() s := &Server{ Options: opts, Handler: r, @@ -176,10 +184,48 @@ func New(ctx context.Context, opts *Options) (*Server, error) { TracerProvider: opts.Tracing, PrometheusRegistry: opts.PrometheusRegistry, SDKClient: client, + derpMesh: derpmesh.New(opts.Logger.Named("derpmesh"), derpServer, meshTLSConfig), ctx: ctx, cancel: cancel, } + // Register the workspace proxy with the primary coderd instance and start a + // goroutine to periodically re-register. + replicaID := uuid.New() + osHostname, err := os.Hostname() + if err != nil { + return nil, xerrors.Errorf("get OS hostname: %w", err) + } + regResp, registerDone, err := client.RegisterWorkspaceProxyLoop(ctx, wsproxysdk.RegisterWorkspaceProxyLoopOpts{ + Logger: opts.Logger, + Request: wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: opts.AccessURL.String(), + WildcardHostname: opts.AppHostname, + DerpEnabled: opts.DERPEnabled, + ReplicaID: replicaID, + ReplicaHostname: osHostname, + ReplicaError: "", + ReplicaRelayAddress: opts.DERPServerRelayAddress, + Version: buildinfo.Version(), + }, + MutateFn: s.mutateRegister, + CallbackFn: s.handleRegister, + FailureFn: s.handleRegisterFailure, + }) + if err != nil { + return nil, xerrors.Errorf("register proxy: %w", err) + } + s.registerDone = registerDone + err = s.handleRegister(ctx, regResp) + if err != nil { + return nil, xerrors.Errorf("handle register: %w", err) + } + derpServer.SetMeshKey(regResp.DERPMeshKey) + + secKey, err := workspaceapps.KeyFromString(regResp.AppSecurityKey) + if err != nil { + return nil, xerrors.Errorf("parse app security key: %w", err) + } s.AppServer = &workspaceapps.Server{ Logger: opts.Logger.Named("workspaceapps"), DashboardURL: opts.DashboardURL, @@ -202,9 +248,6 @@ func New(ctx context.Context, opts *Options) (*Server, error) { SecureAuthCookie: opts.SecureAuthCookie, } - derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(opts.Logger.Named("derp"))) - // TODO: mesh and derpmesh package stuff - // derpServer.SetMeshKey(regResp.DERPMeshKey) derpHandler := derphttp.Handler(derpServer) derpHandler, s.derpCloseFunc = tailnet.WithWebsocketSupport(derpServer, derpHandler) @@ -279,14 +322,51 @@ func New(ctx context.Context, opts *Options) (*Server, error) { func (s *Server) Close() error { s.cancel() + + var err error + registerDoneWaitTicker := time.NewTicker(3 * time.Second) + select { + case <-registerDoneWaitTicker.C: + err = multierror.Append(err, xerrors.New("timed out waiting for registerDone")) + case <-s.registerDone: + } s.derpCloseFunc() - return s.AppServer.Close() + appServerErr := s.AppServer.Close() + if appServerErr != nil { + err = multierror.Append(err, appServerErr) + } + return err } func (s *Server) DialWorkspaceAgent(id uuid.UUID) (*codersdk.WorkspaceAgentConn, error) { return s.SDKClient.DialWorkspaceAgent(s.ctx, id, nil) } +func (*Server) mutateRegister(_ *wsproxysdk.RegisterWorkspaceProxyRequest) { + // TODO: we should probably ping replicas similarly to the replicasync + // package in the primary and update req.ReplicaError accordingly. +} + +func (s *Server) handleRegister(_ context.Context, res wsproxysdk.RegisterWorkspaceProxyResponse) error { + addresses := make([]string, len(res.SiblingReplicas)) + for i, replica := range res.SiblingReplicas { + addresses[i] = replica.RelayAddress + } + s.derpMesh.SetAddresses(addresses, false) + + return nil +} + +func (s *Server) handleRegisterFailure(err error) { + if s.ctx.Err() != nil { + return + } + s.Logger.Fatal(s.ctx, + "failed to periodically re-register workspace proxy with primary Coder deployment", + slog.Error(err), + ) +} + func (s *Server) buildInfo(rw http.ResponseWriter, r *http.Request) { httpapi.Write(r.Context(), rw, http.StatusOK, codersdk.BuildInfoResponse{ ExternalURL: buildinfo.ExternalURL(), diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index b3b9e4135f896..5396e7ec68edb 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -6,10 +6,13 @@ import ( "io" "net/http" "net/url" + "time" "github.com/google/uuid" "golang.org/x/xerrors" + "cdr.dev/slog" + "github.com/coder/coder/coderd/httpmw" "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/codersdk" @@ -176,6 +179,7 @@ type RegisterWorkspaceProxyRequest struct { type RegisterWorkspaceProxyResponse struct { AppSecurityKey string `json:"app_security_key"` + DERPMeshKey string `json:"derp_mesh_key"` // SiblingReplicas is a list of all other replicas of the proxy that have // not timed out. SiblingReplicas []codersdk.Replica `json:"sibling_replicas"` @@ -197,3 +201,134 @@ func (c *Client) RegisterWorkspaceProxy(ctx context.Context, req RegisterWorkspa var resp RegisterWorkspaceProxyResponse return resp, json.NewDecoder(res.Body).Decode(&resp) } + +type RegisterWorkspaceProxyLoopOpts struct { + Logger slog.Logger + Request RegisterWorkspaceProxyRequest + + // Interval between registration attempts. Defaults to 30 seconds. Note that + // the initial registration is not delayed by this interval. + Interval time.Duration + // MaxFailureCount is the maximum amount of attempts that the loop will + // retry registration before giving up. Defaults to 10 (for ~5 minutes). + MaxFailureCount int + // AttemptTimeout is the maximum amount of time that the loop will wait for + // a response from the server before considering the attempt a failure. + // Defaults to 10 seconds. + AttemptTimeout time.Duration + + // MutateFn is called before each request to mutate the request struct. This + // can be used to update fields like ReplicaError. + MutateFn func(req *RegisterWorkspaceProxyRequest) + // CallbackFn is called with the response from the server after each + // successful registration, except the first. The callback function is + // called in a blocking manner, so it should avoid blocking for too long. If + // the callback returns an error, the loop will stop immediately and the + // error will be returned to the FailureFn. + CallbackFn func(ctx context.Context, res RegisterWorkspaceProxyResponse) error + // FailureFn is called with the last error returned from the server if the + // context is canceled, registration fails for more than MaxFailureCount, + // or if any permanent values in the response change. + FailureFn func(err error) +} + +// RegisterWorkspaceProxyLoop will register the workspace proxy and then start a +// goroutine to keep registering periodically in the background. +// +// The first response is returned immediately, and subsequent responses will be +// notified to the given CallbackFn. When the context is canceled the loop will +// stop immediately and the context error will be returned to the FailureFn. +// +// The returned channel will be closed when the loop stops and can be used to +// ensure the loop is dead before continuing. +func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWorkspaceProxyLoopOpts) (RegisterWorkspaceProxyResponse, <-chan struct{}, error) { + if opts.Interval == 0 { + opts.Interval = 30 * time.Second + } + if opts.MaxFailureCount == 0 { + opts.MaxFailureCount = 10 + } + if opts.AttemptTimeout == 0 { + opts.AttemptTimeout = 10 * time.Second + } + if opts.MutateFn == nil { + opts.MutateFn = func(_ *RegisterWorkspaceProxyRequest) {} + } + if opts.CallbackFn == nil { + opts.CallbackFn = func(_ context.Context, _ RegisterWorkspaceProxyResponse) error { + return nil + } + } + if opts.FailureFn == nil { + opts.FailureFn = func(_ error) {} + } + + originalRes, err := c.RegisterWorkspaceProxy(ctx, opts.Request) + if err != nil { + return RegisterWorkspaceProxyResponse{}, nil, xerrors.Errorf("register workspace proxy: %w", err) + } + + done := make(chan struct{}) + go func() { + defer close(done) + + var ( + failedAttempts = 0 + ticker = time.NewTicker(opts.Interval) + ) + for { + select { + case <-ctx.Done(): + opts.FailureFn(ctx.Err()) + return + case <-ticker.C: + } + + opts.Logger.Debug(ctx, + "re-registering workspace proxy with Coder primary", + slog.F("req", opts.Request), + slog.F("timeout", opts.AttemptTimeout), + slog.F("failed_attempts", failedAttempts), + ) + opts.MutateFn(&opts.Request) + registerCtx, cancel := context.WithTimeout(ctx, opts.AttemptTimeout) + res, err := c.RegisterWorkspaceProxy(registerCtx, opts.Request) + cancel() + if err != nil { + failedAttempts++ + opts.Logger.Warn(ctx, + "failed to re-register workspace proxy with Coder primary", + slog.F("req", opts.Request), + slog.F("timeout", opts.AttemptTimeout), + slog.F("failed_attempts", failedAttempts), + slog.F("err", err), + ) + + if failedAttempts > opts.MaxFailureCount { + opts.FailureFn(xerrors.Errorf("exceeded re-registration failure count of %d: last error: %w", opts.MaxFailureCount, err)) + return + } + } + failedAttempts = 0 + + if res.AppSecurityKey != originalRes.AppSecurityKey { + opts.FailureFn(xerrors.New("app security key has changed, proxy must be restarted")) + return + } + if res.DERPMeshKey != originalRes.DERPMeshKey { + opts.FailureFn(xerrors.New("DERP mesh key has changed, proxy must be restarted")) + return + } + + err = opts.CallbackFn(ctx, res) + if err != nil { + opts.FailureFn(xerrors.Errorf("callback fn returned error: %w", err)) + return + } + + ticker.Reset(opts.Interval) + } + }() + + return originalRes, done, nil +} From 2d2f1a3226449f0e644533545418e784a0b804b0 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Thu, 4 May 2023 22:17:06 +0000 Subject: [PATCH 05/22] deregister --- enterprise/coderd/coderd.go | 1 + enterprise/coderd/workspaceproxy.go | 89 +++++++++++++++++++-- enterprise/wsproxy/wsproxy.go | 3 +- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 58 ++++++++++++-- 4 files changed, 137 insertions(+), 14 deletions(-) diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index c97a0e3550de7..59048906fb70f 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -119,6 +119,7 @@ func New(ctx context.Context, options *Options) (*API, error) { ) r.Post("/issue-signed-app-token", api.workspaceProxyIssueSignedAppToken) r.Post("/register", api.workspaceProxyRegister) + r.Post("/deregister", api.workspaceProxyDeregister) }) r.Route("/{workspaceproxy}", func(r chi.Router) { r.Use( diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 2a1558cc044b9..5eccac4899407 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -34,7 +34,7 @@ import ( // This is useful when a proxy is created or deleted. Errors will be logged. func (api *API) forceWorkspaceProxyHealthUpdate(ctx context.Context) { if err := api.ProxyHealth.ForceUpdate(ctx); err != nil { - api.Logger.Error(ctx, "force proxy health update", slog.Error(err)) + api.Logger.Warn(ctx, "force proxy health update", slog.Error(err)) } } @@ -316,9 +316,9 @@ func (api *API) workspaceProxyIssueSignedAppToken(rw http.ResponseWriter, r *htt // in the database and returns a signed token that can be used to authenticate // tokens. // -// This is called periodically by the proxy in the background (once per minute -// per replica) to ensure that the proxy is still registered and the -// corresponding replica table entry is refreshed. +// This is called periodically by the proxy in the background (every 30s per +// replica) to ensure that the proxy is still registered and the corresponding +// replica table entry is refreshed. // // @Summary Register workspace proxy // @ID register-workspace-proxy @@ -326,7 +326,7 @@ func (api *API) workspaceProxyIssueSignedAppToken(rw http.ResponseWriter, r *htt // @Accept json // @Produce json // @Tags Enterprise -// @Param request body wsproxysdk.RegisterWorkspaceProxyRequest true "Issue signed app token request" +// @Param request body wsproxysdk.RegisterWorkspaceProxyRequest true "Register workspace proxy request" // @Success 201 {object} wsproxysdk.RegisterWorkspaceProxyResponse // @Router /workspaceproxies/me/register [post] // @x-apidocgen {"skip": true} @@ -367,6 +367,13 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) } } + if req.ReplicaID == uuid.Nil { + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Replica ID is invalid.", + }) + return + } + // TODO: get region ID var regionID int32 = 1234 @@ -472,6 +479,78 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) go api.forceWorkspaceProxyHealthUpdate(api.ctx) } +// @Summary Deregister workspace proxy +// @ID deregister-workspace-proxy +// @Security CoderSessionToken +// @Accept json +// @Tags Enterprise +// @Param request body wsproxysdk.DeregisterWorkspaceProxyRequest true "Deregister workspace proxy request" +// @Success 204 +// @Router /workspaceproxies/me/deregister [post] +// @x-apidocgen {"skip": true} +func (api *API) workspaceProxyDeregister(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + var req wsproxysdk.DeregisterWorkspaceProxyRequest + if !httpapi.Read(ctx, rw, r, &req) { + return + } + + err := api.Database.InTx(func(db database.Store) error { + now := time.Now() + replica, err := db.GetReplicaByID(ctx, req.ReplicaID) + if err != nil { + return xerrors.Errorf("get replica: %w", err) + } + + if replica.StoppedAt.Valid && !replica.StartedAt.IsZero() { + // TODO: sadly this results in 500 when it should be 400 + return xerrors.Errorf("replica %s is already marked stopped", replica.ID) + } + + replica, err = db.UpdateReplica(ctx, database.UpdateReplicaParams{ + ID: replica.ID, + UpdatedAt: now, + StartedAt: replica.StartedAt, + StoppedAt: sql.NullTime{ + Valid: true, + Time: now, + }, + RelayAddress: replica.RelayAddress, + RegionID: replica.RegionID, + Hostname: replica.Hostname, + Version: replica.Version, + Error: replica.Error, + DatabaseLatency: replica.DatabaseLatency, + Primary: replica.Primary, + }) + if err != nil { + return xerrors.Errorf("update replica: %w", err) + } + + return nil + }, nil) + if httpapi.Is404Error(err) { + httpapi.ResourceNotFound(rw) + return + } + if err != nil { + httpapi.InternalServerError(rw, err) + return + } + + // Publish a replicasync event with a nil ID so every replica (yes, even the + // current replica) will refresh its replicas list. + err = api.Pubsub.Publish(replicasync.PubsubEvent, []byte(uuid.Nil.String())) + if err != nil { + httpapi.InternalServerError(rw, err) + return + } + + rw.WriteHeader(http.StatusNoContent) + go api.forceWorkspaceProxyHealthUpdate(api.ctx) +} + // reconnectingPTYSignedToken issues a signed app token for use when connecting // to the reconnecting PTY websocket on an external workspace proxy. This is set // by the client as a query parameter when connecting. diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index 73d20efd13ea4..427cb18f9fc07 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -324,7 +324,7 @@ func (s *Server) Close() error { s.cancel() var err error - registerDoneWaitTicker := time.NewTicker(3 * time.Second) + registerDoneWaitTicker := time.NewTicker(11 * time.Second) // the attempt timeout is 10s select { case <-registerDoneWaitTicker.C: err = multierror.Append(err, xerrors.New("timed out waiting for registerDone")) @@ -335,6 +335,7 @@ func (s *Server) Close() error { if appServerErr != nil { err = multierror.Append(err, appServerErr) } + s.SDKClient.SDKClient.HTTPClient.CloseIdleConnections() return err } diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index 5396e7ec68edb..73edd40d21fae 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -202,6 +202,29 @@ func (c *Client) RegisterWorkspaceProxy(ctx context.Context, req RegisterWorkspa return resp, json.NewDecoder(res.Body).Decode(&resp) } +type DeregisterWorkspaceProxyRequest struct { + // ReplicaID is a unique identifier for the replica of the proxy that is + // deregistering. It should be generated by the client on startup and + // should've already been passed to the register endpoint. + ReplicaID uuid.UUID `json:"replica_id"` +} + +func (c *Client) DeregisterWorkspaceProxy(ctx context.Context, req DeregisterWorkspaceProxyRequest) error { + res, err := c.Request(ctx, http.MethodPost, + "/api/v2/workspaceproxies/me/deregister", + req, + ) + if err != nil { + return xerrors.Errorf("make request: %w", err) + } + defer res.Body.Close() + + if res.StatusCode != http.StatusNoContent { + return codersdk.ReadBodyAsError(res) + } + return nil +} + type RegisterWorkspaceProxyLoopOpts struct { Logger slog.Logger Request RegisterWorkspaceProxyRequest @@ -240,7 +263,9 @@ type RegisterWorkspaceProxyLoopOpts struct { // stop immediately and the context error will be returned to the FailureFn. // // The returned channel will be closed when the loop stops and can be used to -// ensure the loop is dead before continuing. +// ensure the loop is dead before continuing. When a fatal error is encountered, +// the proxy will be deregistered (with the same ReplicaID and AttemptTimeout) +// before calling the FailureFn. func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWorkspaceProxyLoopOpts) (RegisterWorkspaceProxyResponse, <-chan struct{}, error) { if opts.Interval == 0 { opts.Interval = 30 * time.Second @@ -259,8 +284,25 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo return nil } } - if opts.FailureFn == nil { - opts.FailureFn = func(_ error) {} + + failureFn := func(err error) { + // We have to use background context here because the original context + // may be canceled. + deregisterCtx, cancel := context.WithTimeout(context.Background(), opts.AttemptTimeout) + defer cancel() + deregisterErr := c.DeregisterWorkspaceProxy(deregisterCtx, DeregisterWorkspaceProxyRequest{ + ReplicaID: opts.Request.ReplicaID, + }) + if deregisterErr != nil { + opts.Logger.Error(ctx, + "failed to deregister workspace proxy with Coder primary (it will be automatically deregistered shortly)", + slog.F("err", deregisterErr), + ) + } + + if opts.FailureFn != nil { + opts.FailureFn(err) + } } originalRes, err := c.RegisterWorkspaceProxy(ctx, opts.Request) @@ -279,7 +321,7 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo for { select { case <-ctx.Done(): - opts.FailureFn(ctx.Err()) + failureFn(ctx.Err()) return case <-ticker.C: } @@ -305,24 +347,24 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo ) if failedAttempts > opts.MaxFailureCount { - opts.FailureFn(xerrors.Errorf("exceeded re-registration failure count of %d: last error: %w", opts.MaxFailureCount, err)) + failureFn(xerrors.Errorf("exceeded re-registration failure count of %d: last error: %w", opts.MaxFailureCount, err)) return } } failedAttempts = 0 if res.AppSecurityKey != originalRes.AppSecurityKey { - opts.FailureFn(xerrors.New("app security key has changed, proxy must be restarted")) + failureFn(xerrors.New("app security key has changed, proxy must be restarted")) return } if res.DERPMeshKey != originalRes.DERPMeshKey { - opts.FailureFn(xerrors.New("DERP mesh key has changed, proxy must be restarted")) + failureFn(xerrors.New("DERP mesh key has changed, proxy must be restarted")) return } err = opts.CallbackFn(ctx, res) if err != nil { - opts.FailureFn(xerrors.Errorf("callback fn returned error: %w", err)) + failureFn(xerrors.Errorf("callback fn returned error: %w", err)) return } From 28ae1558226ec0087fdd421fbae7917fa462d1f4 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Thu, 4 May 2023 23:38:15 +0000 Subject: [PATCH 06/22] tests and various fixes --- coderd/database/dbfake/databasefake.go | 5 + codersdk/workspaceagents.go | 36 ++- enterprise/coderd/coderd.go | 58 +++-- enterprise/coderd/coderdenttest/proxytest.go | 4 +- enterprise/coderd/workspaceproxy.go | 5 +- enterprise/wsproxy/wsproxy_test.go | 238 ++++++++++++++++++- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 4 + 7 files changed, 311 insertions(+), 39 deletions(-) diff --git a/coderd/database/dbfake/databasefake.go b/coderd/database/dbfake/databasefake.go index 95d16564c7af8..1d8b7efb78137 100644 --- a/coderd/database/dbfake/databasefake.go +++ b/coderd/database/dbfake/databasefake.go @@ -5210,10 +5210,14 @@ func (q *fakeQuerier) InsertWorkspaceProxy(_ context.Context, arg database.Inser q.mutex.Lock() defer q.mutex.Unlock() + lastRegionID := int32(0) for _, p := range q.workspaceProxies { if !p.Deleted && p.Name == arg.Name { return database.WorkspaceProxy{}, errDuplicateKey } + if p.RegionID > lastRegionID { + lastRegionID = p.RegionID + } } p := database.WorkspaceProxy{ @@ -5223,6 +5227,7 @@ func (q *fakeQuerier) InsertWorkspaceProxy(_ context.Context, arg database.Inser Icon: arg.Icon, DerpEnabled: arg.DerpEnabled, TokenHashedSecret: arg.TokenHashedSecret, + RegionID: lastRegionID + 1, CreatedAt: arg.CreatedAt, UpdatedAt: arg.UpdatedAt, Deleted: false, diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index 8f418eebf29ff..23bf0d2a39d3e 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -147,29 +147,41 @@ type WorkspaceAgentConnectionInfo struct { DERPMap *tailcfg.DERPMap `json:"derp_map"` } +func (c *Client) WorkspaceAgentConnectionInfo(ctx context.Context, agentID uuid.UUID) (WorkspaceAgentConnectionInfo, error) { + res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/workspaceagents/%s/connection", agentID), nil) + if err != nil { + return WorkspaceAgentConnectionInfo{}, err + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return WorkspaceAgentConnectionInfo{}, ReadBodyAsError(res) + } + + var connInfo WorkspaceAgentConnectionInfo + return connInfo, json.NewDecoder(res.Body).Decode(&connInfo) +} + // @typescript-ignore DialWorkspaceAgentOptions type DialWorkspaceAgentOptions struct { Logger slog.Logger // BlockEndpoints forced a direct connection through DERP. BlockEndpoints bool + // CustomConnectionInfo avoids hitting the API to get connection info. + CustomConnectionInfo *WorkspaceAgentConnectionInfo } func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, options *DialWorkspaceAgentOptions) (agentConn *WorkspaceAgentConn, err error) { if options == nil { options = &DialWorkspaceAgentOptions{} } - res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/workspaceagents/%s/connection", agentID), nil) - if err != nil { - return nil, err - } - defer res.Body.Close() - if res.StatusCode != http.StatusOK { - return nil, ReadBodyAsError(res) - } - var connInfo WorkspaceAgentConnectionInfo - err = json.NewDecoder(res.Body).Decode(&connInfo) - if err != nil { - return nil, xerrors.Errorf("decode conn info: %w", err) + + connInfo := options.CustomConnectionInfo + if connInfo == nil { + res, err := c.WorkspaceAgentConnectionInfo(ctx, agentID) + if err != nil { + return nil, xerrors.Errorf("get connection info: %w", err) + } + connInfo = &res } ip := tailnet.IP() diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 59048906fb70f..9befee249f3db 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -450,6 +450,35 @@ func (api *API) updateEntitlements(ctx context.Context) error { return nil } +// getProxyDERPStartingRegionID returns the starting region ID that should be +// used for workspace proxies. A proxy's actual region ID is the return value +// from this function + it's RegionID field. +// +// Two ints are returned, the first is the starting region ID for proxies, and +// the second is the maximum region ID that already exists in the DERP map. +func getProxyDERPStartingRegionID(derpMap *tailcfg.DERPMap) (sID int, mID int) { + maxRegionID := 0 + for _, region := range derpMap.Regions { + if region.RegionID > maxRegionID { + maxRegionID = region.RegionID + } + } + if maxRegionID < 0 { + maxRegionID = 0 + } + + // Round to the nearest 10,000 with a sufficient buffer of at least 2,000. + const roundStartingRegionID = 10_000 + const startingRegionIDBuffer = 2_000 + startingRegionID := maxRegionID + startingRegionIDBuffer + startingRegionID = int(math.Ceil(float64(startingRegionID)/roundStartingRegionID) * roundStartingRegionID) + if startingRegionID < roundStartingRegionID { + startingRegionID = roundStartingRegionID + } + + return startingRegionID, maxRegionID +} + var ( lastDerpConflictMutex sync.Mutex lastDerpConflictLog time.Time @@ -461,16 +490,8 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(* // Find the starting region ID that we'll use for proxies. This must be // deterministic based on the derp map. - startingRegionID := 0 - for _, region := range derpMap.Regions { - if region.RegionID > startingRegionID { - startingRegionID = region.RegionID - } - } - if startingRegionID < 0 { - startingRegionID = 0 - } - if startingRegionID >= 1<<32 { + startingRegionID, largestRegionID := getProxyDERPStartingRegionID(derpMap) + if largestRegionID >= 1<<32 { // Enforce an upper bound on the region ID. This shouldn't be hit in // practice, but it's a good sanity check. lastDerpConflictMutex.Lock() @@ -483,23 +504,13 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(* logger.Warn( context.Background(), "existing DERP region IDs are too large, proxy region IDs will not be populated in the derp map. Please ensure that all DERP region IDs are less than 2^32.", - slog.F("starting_region_id", startingRegionID), + slog.F("largest_region_id", largestRegionID), slog.F("max_region_id", 1<<32-1), ) return derpMap } } - // Round to the nearest 10,000 with a sufficient buffer of at least - // 2,000. - const roundStartingRegionID = 10_000 - const startingRegionIDBuffer = 2_000 - startingRegionID += startingRegionIDBuffer - startingRegionID = int(math.Ceil(float64(startingRegionID)/roundStartingRegionID) * roundStartingRegionID) - if startingRegionID < roundStartingRegionID { - startingRegionID = roundStartingRegionID - } - // Add all healthy proxies to the DERP map. statusMap := proxyHealth.HealthStatus() statusLoop: @@ -564,12 +575,13 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(* } derpMap.Regions[regionID] = &tailcfg.DERPRegion{ - EmbeddedRelay: true, + // EmbeddedRelay ONLY applies to the primary. + EmbeddedRelay: false, RegionID: regionID, RegionCode: regionCode, RegionName: status.Proxy.Name, Nodes: []*tailcfg.DERPNode{{ - Name: fmt.Sprintf("%db", regionID), + Name: fmt.Sprintf("%da", regionID), RegionID: regionID, HostName: u.Hostname(), DERPPort: portInt, diff --git a/enterprise/coderd/coderdenttest/proxytest.go b/enterprise/coderd/coderdenttest/proxytest.go index 6e1364b2c4a9e..f1081d78d8ed6 100644 --- a/enterprise/coderd/coderdenttest/proxytest.go +++ b/enterprise/coderd/coderdenttest/proxytest.go @@ -129,7 +129,9 @@ func NewWorkspaceProxy(t *testing.T, coderdAPI *coderd.API, owner *codersdk.Clie DisablePathApps: options.DisablePathApps, // We need a new registry to not conflict with the coderd internal // proxy metrics. - PrometheusRegistry: prometheus.NewRegistry(), + PrometheusRegistry: prometheus.NewRegistry(), + DERPEnabled: true, + DERPServerRelayAddress: accessURL.String(), }) require.NoError(t, err) t.Cleanup(func() { diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 5eccac4899407..e95c3be200730 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -374,8 +374,8 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) return } - // TODO: get region ID - var regionID int32 = 1234 + startingRegionID, _ := getProxyDERPStartingRegionID(api.Options.DERPMap) + regionID := int32(startingRegionID) + proxy.RegionID err := api.Database.InTx(func(db database.Store) error { // First, update the proxy's values in the database. @@ -473,6 +473,7 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) httpapi.Write(ctx, rw, http.StatusCreated, wsproxysdk.RegisterWorkspaceProxyResponse{ AppSecurityKey: api.AppSecurityKey.String(), DERPMeshKey: api.DERPServer.MeshKey(), + DERPRegionID: regionID, SiblingReplicas: siblingsRes, }) diff --git a/enterprise/wsproxy/wsproxy_test.go b/enterprise/wsproxy/wsproxy_test.go index 6b4ef67bbfeb1..611dcfbb6058a 100644 --- a/enterprise/wsproxy/wsproxy_test.go +++ b/enterprise/wsproxy/wsproxy_test.go @@ -1,18 +1,251 @@ package wsproxy_test import ( + "fmt" "net" "testing" + "time" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "tailscale.com/tailcfg" + + "cdr.dev/slog" + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/coder/agent" "github.com/coder/coder/cli/clibase" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/httpmw" "github.com/coder/coder/coderd/workspaceapps/apptest" "github.com/coder/coder/codersdk" + "github.com/coder/coder/codersdk/agentsdk" "github.com/coder/coder/enterprise/coderd/coderdenttest" "github.com/coder/coder/enterprise/coderd/license" + "github.com/coder/coder/provisioner/echo" + "github.com/coder/coder/testutil" ) +func TestDERP(t *testing.T) { + t.Parallel() + + deploymentValues := coderdtest.DeploymentValues(t) + deploymentValues.Experiments = []string{ + string(codersdk.ExperimentMoons), + "*", + } + + client, closer, api := coderdenttest.NewWithAPI(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + DeploymentValues: deploymentValues, + AppHostname: "*.primary.test.coder.com", + IncludeProvisionerDaemon: true, + RealIPConfig: &httpmw.RealIPConfig{ + TrustedOrigins: []*net.IPNet{{ + IP: net.ParseIP("127.0.0.1"), + Mask: net.CIDRMask(8, 32), + }}, + TrustedHeaders: []string{ + "CF-Connecting-IP", + }, + }, + }, + }) + t.Cleanup(func() { + _ = closer.Close() + }) + + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureWorkspaceProxy: 1, + }, + }) + + // Create two running external proxies. + proxyAPI1 := coderdenttest.NewWorkspaceProxy(t, api, client, &coderdenttest.ProxyOptions{ + Name: "best-proxy", + }) + proxyAPI2 := coderdenttest.NewWorkspaceProxy(t, api, client, &coderdenttest.ProxyOptions{ + Name: "worst-proxy", + }) + + // Create a proxy that is never started. + createProxyCtx := testutil.Context(t, testutil.WaitLong) + _, err := client.CreateWorkspaceProxy(createProxyCtx, codersdk.CreateWorkspaceProxyRequest{ + Name: "never-started-proxy", + }) + require.NoError(t, err) + + // Wait for both running proxies to become healthy. + for i := 0; i < 10; i++ { + regionsCtx := testutil.Context(t, testutil.WaitLong) + regions, err := client.Regions(regionsCtx) + require.NoError(t, err) + require.Len(t, regions, 4) + // The first 3 regions should be healthy. + for _, r := range regions[:3] { + require.True(t, r.Healthy) + } + // The last region should be unhealthy. + require.False(t, regions[3].Healthy) + time.Sleep(time.Second) + } + + // Create a workspace + apps + authToken := uuid.NewString() + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionApply: echo.ProvisionApplyWithAgent(authToken), + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) + workspace.LatestBuild = build + + agentID := uuid.Nil +resourceLoop: + for _, res := range build.Resources { + for _, agnt := range res.Agents { + agentID = agnt.ID + break resourceLoop + } + } + require.NotEqual(t, uuid.Nil, agentID) + + // Connect an agent to the workspace + agentClient := agentsdk.New(client.URL) + agentClient.SetSessionToken(authToken) + agentCloser := agent.New(agent.Options{ + Client: agentClient, + Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug), + }) + t.Cleanup(func() { + _ = agentCloser.Close() + }) + coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID) + + t.Run("ReturnedInDERPMap", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + connInfo, err := client.WorkspaceAgentConnectionInfo(ctx, agentID) + require.NoError(t, err) + + // There should be three DERP servers in the map: the primary, and each + // of the two running proxies. + require.NotNil(t, connInfo.DERPMap) + require.Len(t, connInfo.DERPMap.Regions, 3) + + var ( + primaryRegion *tailcfg.DERPRegion + proxy1Region *tailcfg.DERPRegion + proxy2Region *tailcfg.DERPRegion + ) + for _, r := range connInfo.DERPMap.Regions { + if r.RegionName == "Coder" { + primaryRegion = r + continue + } + if r.RegionName == "best-proxy" { + proxy1Region = r + continue + } + if r.RegionName == "worst-proxy" { + proxy2Region = r + continue + } + + t.Fatalf("unexpected region: %+v", r) + } + + // The primary region: + require.Equal(t, "Coder", primaryRegion.RegionName) + require.Equal(t, "coder", primaryRegion.RegionCode) + require.Equal(t, 1, primaryRegion.RegionID) + require.True(t, primaryRegion.EmbeddedRelay) + require.Len(t, primaryRegion.Nodes, 1) + require.Equal(t, "1a", primaryRegion.Nodes[0].Name) + require.Equal(t, 1, primaryRegion.Nodes[0].RegionID) + require.Equal(t, "", primaryRegion.Nodes[0].HostName) // embedded region has no hostname returned + require.Equal(t, api.AccessURL.Port(), fmt.Sprint(primaryRegion.Nodes[0].DERPPort)) + require.Equal(t, api.AccessURL.Scheme == "http", primaryRegion.Nodes[0].ForceHTTP) + + // The first proxy region: + require.Equal(t, "best-proxy", proxy1Region.RegionName) + require.Equal(t, "coder_best-proxy", proxy1Region.RegionCode) + require.Equal(t, 10001, proxy1Region.RegionID) + require.False(t, proxy1Region.EmbeddedRelay) + require.Len(t, proxy1Region.Nodes, 1) + require.Equal(t, "10001a", proxy1Region.Nodes[0].Name) + require.Equal(t, 10001, proxy1Region.Nodes[0].RegionID) + require.Equal(t, proxyAPI1.Options.AccessURL.Hostname(), proxy1Region.Nodes[0].HostName) + require.Equal(t, proxyAPI1.Options.AccessURL.Port(), fmt.Sprint(proxy1Region.Nodes[0].DERPPort)) + require.Equal(t, proxyAPI1.Options.AccessURL.Scheme == "http", proxy1Region.Nodes[0].ForceHTTP) + + // The second proxy region: + require.Equal(t, "worst-proxy", proxy2Region.RegionName) + require.Equal(t, "coder_worst-proxy", proxy2Region.RegionCode) + require.Equal(t, 10002, proxy2Region.RegionID) + require.False(t, proxy2Region.EmbeddedRelay) + require.Len(t, proxy2Region.Nodes, 1) + require.Equal(t, "10002a", proxy2Region.Nodes[0].Name) + require.Equal(t, 10002, proxy2Region.Nodes[0].RegionID) + require.Equal(t, proxyAPI2.Options.AccessURL.Hostname(), proxy2Region.Nodes[0].HostName) + require.Equal(t, proxyAPI2.Options.AccessURL.Port(), fmt.Sprint(proxy2Region.Nodes[0].DERPPort)) + require.Equal(t, proxyAPI2.Options.AccessURL.Scheme == "http", proxy2Region.Nodes[0].ForceHTTP) + }) + + t.Run("ConnectDERP", func(t *testing.T) { + t.Parallel() + + connInfo, err := client.WorkspaceAgentConnectionInfo(testutil.Context(t, testutil.WaitLong), agentID) + require.NoError(t, err) + require.NotNil(t, connInfo.DERPMap) + require.Len(t, connInfo.DERPMap.Regions, 3) + + // Connect to each region. + for _, r := range connInfo.DERPMap.Regions { + r := r + + t.Run(r.RegionName, func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ + Logger: slogtest.Make(t, &slogtest.Options{ + IgnoreErrors: true, + }).Named("agent").Leveled(slog.LevelDebug), + // Force DERP. + BlockEndpoints: true, + // Force connecting to this region only. + CustomConnectionInfo: &codersdk.WorkspaceAgentConnectionInfo{ + DERPMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + r.RegionID: r, + }, + OmitDefaultRegions: true, + }, + }, + }) + require.NoError(t, err) + t.Cleanup(func() { + err := conn.Close() + assert.NoError(t, err) + }) + + ok := conn.AwaitReachable(ctx) + require.True(t, ok) + + _, p2p, _, err := conn.Ping(ctx) + require.NoError(t, err) + require.False(t, p2p) + }) + } + }) +} + func TestWorkspaceProxyWorkspaceApps(t *testing.T) { t.Parallel() @@ -26,7 +259,7 @@ func TestWorkspaceProxyWorkspaceApps(t *testing.T) { "*", } - client, _, api := coderdenttest.NewWithAPI(t, &coderdenttest.Options{ + client, closer, api := coderdenttest.NewWithAPI(t, &coderdenttest.Options{ Options: &coderdtest.Options{ DeploymentValues: deploymentValues, AppHostname: "*.primary.test.coder.com", @@ -42,6 +275,9 @@ func TestWorkspaceProxyWorkspaceApps(t *testing.T) { }, }, }) + t.Cleanup(func() { + _ = closer.Close() + }) user := coderdtest.CreateFirstUser(t, client) _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index 73edd40d21fae..fad1f0a33bb8c 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -180,6 +180,7 @@ type RegisterWorkspaceProxyRequest struct { type RegisterWorkspaceProxyResponse struct { AppSecurityKey string `json:"app_security_key"` DERPMeshKey string `json:"derp_mesh_key"` + DERPRegionID int32 `json:"derp_region_id"` // SiblingReplicas is a list of all other replicas of the proxy that have // not timed out. SiblingReplicas []codersdk.Replica `json:"sibling_replicas"` @@ -361,6 +362,9 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo failureFn(xerrors.New("DERP mesh key has changed, proxy must be restarted")) return } + if res.DERPRegionID != originalRes.DERPRegionID { + failureFn(xerrors.New("DERP region ID has changed, proxy must be restarted")) + } err = opts.CallbackFn(ctx, res) if err != nil { From 5f5d4ff6cf8db5bc78f71ff10a5165db93fc3325 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Fri, 5 May 2023 07:32:31 +0000 Subject: [PATCH 07/22] more tests --- enterprise/coderd/workspaceproxy.go | 6 +- enterprise/coderd/workspaceproxy_test.go | 338 +++++++++++++++++++++++ 2 files changed, 340 insertions(+), 4 deletions(-) diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index e95c3be200730..9690af167ab4c 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -418,8 +418,7 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) if err != nil { return xerrors.Errorf("update replica: %w", err) } - } - if xerrors.Is(err, sql.ErrNoRows) { + } else if xerrors.Is(err, sql.ErrNoRows) { // Replica doesn't exist, create it. replica, err = db.InsertReplica(ctx, database.InsertReplicaParams{ ID: req.ReplicaID, @@ -436,8 +435,7 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) if err != nil { return xerrors.Errorf("insert replica: %w", err) } - } - if err != nil { + } else if err != nil { return xerrors.Errorf("get replica: %w", err) } diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index 4a48a0b7349da..9d589830b9008 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -7,6 +7,7 @@ import ( "net/http/httputil" "net/url" "testing" + "time" "github.com/google/uuid" "github.com/moby/moby/pkg/namesgenerator" @@ -16,7 +17,9 @@ import ( "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" "github.com/coder/coder/agent" + "github.com/coder/coder/buildinfo" "github.com/coder/coder/coderd/coderdtest" + "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbtestutil" "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/codersdk" @@ -245,6 +248,341 @@ func TestWorkspaceProxyCRUD(t *testing.T) { }) } +func TestProxyRegisterDeregister(t *testing.T) { + t.Parallel() + + setup := func(t *testing.T) (*codersdk.Client, database.Store) { + dv := coderdtest.DeploymentValues(t) + dv.Experiments = []string{ + string(codersdk.ExperimentMoons), + "*", + } + + db, pubsub := dbtestutil.NewDB(t) + client := coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + DeploymentValues: dv, + Database: db, + Pubsub: pubsub, + IncludeProvisionerDaemon: true, + }, + }) + + _ = coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureWorkspaceProxy: 1, + }, + }) + + return client, db + } + + t.Run("OK", func(t *testing.T) { + t.Parallel() + + client, db := setup(t) + + ctx := testutil.Context(t, testutil.WaitLong) + const ( + proxyName = "hello" + proxyDisplayName = "Hello World" + proxyIcon = "/emojis/flag.png" + ) + createRes, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: proxyName, + DisplayName: proxyDisplayName, + Icon: proxyIcon, + }) + require.NoError(t, err) + + proxyClient := wsproxysdk.New(client.URL) + proxyClient.SetSessionToken(createRes.ProxyToken) + + // Register + req := wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://proxy.coder.test", + WildcardHostname: "*.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "mars", + ReplicaError: "", + ReplicaRelayAddress: "http://127.0.0.1:8080", + Version: buildinfo.Version(), + } + registerRes1, err := proxyClient.RegisterWorkspaceProxy(ctx, req) + require.NoError(t, err) + require.NotEmpty(t, registerRes1.AppSecurityKey) + require.NotEmpty(t, registerRes1.DERPMeshKey) + require.EqualValues(t, 10001, registerRes1.DERPRegionID) + require.Empty(t, registerRes1.SiblingReplicas) + + // Get the proxy to ensure fields have updated. + // TODO: we don't have a way to get the proxy by ID yet. + proxies, err := client.WorkspaceProxies(ctx) + require.NoError(t, err) + require.Len(t, proxies, 1) + require.Equal(t, createRes.Proxy.ID, proxies[0].ID) + require.Equal(t, proxyName, proxies[0].Name) + require.Equal(t, proxyDisplayName, proxies[0].DisplayName) + require.Equal(t, proxyIcon, proxies[0].Icon) + require.Equal(t, req.AccessURL, proxies[0].URL) + require.Equal(t, req.AccessURL, proxies[0].URL) + require.Equal(t, req.WildcardHostname, proxies[0].WildcardHostname) + require.Equal(t, req.DerpEnabled, proxies[0].DerpEnabled) + require.False(t, proxies[0].Deleted) + + // Get the replica from the DB. + replica, err := db.GetReplicaByID(ctx, req.ReplicaID) + require.NoError(t, err) + require.Equal(t, req.ReplicaID, replica.ID) + require.Equal(t, req.ReplicaHostname, replica.Hostname) + require.Equal(t, req.ReplicaError, replica.Error) + require.Equal(t, req.ReplicaRelayAddress, replica.RelayAddress) + require.Equal(t, req.Version, replica.Version) + require.EqualValues(t, 10001, replica.RegionID) + require.False(t, replica.StoppedAt.Valid) + require.Zero(t, replica.DatabaseLatency) + require.False(t, replica.Primary) + + // Re-register with most fields changed. + req = wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://cool.proxy.coder.test", + WildcardHostname: "*.cool.proxy.coder.test", + DerpEnabled: false, + ReplicaID: req.ReplicaID, + ReplicaHostname: "venus", + ReplicaError: "error", + ReplicaRelayAddress: "http://127.0.0.1:9090", + Version: buildinfo.Version(), + } + registerRes2, err := proxyClient.RegisterWorkspaceProxy(ctx, req) + require.NoError(t, err) + require.Equal(t, registerRes1, registerRes2) + + // Get the proxy to ensure nothing has changed except updated_at. + // TODO: we don't have a way to get the proxy by ID yet. + proxiesNew, err := client.WorkspaceProxies(ctx) + require.NoError(t, err) + require.Len(t, proxiesNew, 1) + require.Equal(t, createRes.Proxy.ID, proxiesNew[0].ID) + require.Equal(t, proxyName, proxiesNew[0].Name) + require.Equal(t, proxyDisplayName, proxiesNew[0].DisplayName) + require.Equal(t, proxyIcon, proxiesNew[0].Icon) + require.Equal(t, req.AccessURL, proxiesNew[0].URL) + require.Equal(t, req.AccessURL, proxiesNew[0].URL) + require.Equal(t, req.WildcardHostname, proxiesNew[0].WildcardHostname) + require.Equal(t, req.DerpEnabled, proxiesNew[0].DerpEnabled) + require.False(t, proxiesNew[0].Deleted) + + // Get the replica from the DB and ensure the fields have been updated, + // especially the updated_at. + replica, err = db.GetReplicaByID(ctx, req.ReplicaID) + require.NoError(t, err) + require.Equal(t, req.ReplicaID, replica.ID) + require.Equal(t, req.ReplicaHostname, replica.Hostname) + require.Equal(t, req.ReplicaError, replica.Error) + require.Equal(t, req.ReplicaRelayAddress, replica.RelayAddress) + require.Equal(t, req.Version, replica.Version) + require.EqualValues(t, 10001, replica.RegionID) + require.False(t, replica.StoppedAt.Valid) + require.Zero(t, replica.DatabaseLatency) + require.False(t, replica.Primary) + + // Deregister + err = proxyClient.DeregisterWorkspaceProxy(ctx, wsproxysdk.DeregisterWorkspaceProxyRequest{ + ReplicaID: req.ReplicaID, + }) + require.NoError(t, err) + + // Ensure the replica has been fully stopped. + replica, err = db.GetReplicaByID(ctx, req.ReplicaID) + require.NoError(t, err) + require.Equal(t, req.ReplicaID, replica.ID) + require.True(t, replica.StoppedAt.Valid) + + // Re-register should fail + _, err = proxyClient.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{}) + require.Error(t, err) + }) + + t.Run("BlockMismatchingVersion", func(t *testing.T) { + t.Parallel() + + client, _ := setup(t) + + ctx := testutil.Context(t, testutil.WaitLong) + createRes, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: "hi", + }) + require.NoError(t, err) + + proxyClient := wsproxysdk.New(client.URL) + proxyClient.SetSessionToken(createRes.ProxyToken) + + _, err = proxyClient.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://proxy.coder.test", + WildcardHostname: "*.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "mars", + ReplicaError: "", + ReplicaRelayAddress: "http://127.0.0.1:8080", + Version: "v0.0.0", + }) + require.Error(t, err) + var sdkErr *codersdk.Error + require.ErrorAs(t, err, &sdkErr) + require.Equal(t, http.StatusBadRequest, sdkErr.StatusCode()) + require.Contains(t, sdkErr.Response.Message, "Version mismatch") + }) + + t.Run("ReregisterUpdateReplica", func(t *testing.T) { + t.Parallel() + + client, db := setup(t) + + ctx := testutil.Context(t, testutil.WaitLong) + createRes, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: "hi", + }) + require.NoError(t, err) + + proxyClient := wsproxysdk.New(client.URL) + proxyClient.SetSessionToken(createRes.ProxyToken) + + req := wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://proxy.coder.test", + WildcardHostname: "*.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "mars", + ReplicaError: "", + ReplicaRelayAddress: "http://127.0.0.1:8080", + Version: buildinfo.Version(), + } + _, err = proxyClient.RegisterWorkspaceProxy(ctx, req) + require.NoError(t, err) + + // Get the replica from the DB. + replica, err := db.GetReplicaByID(ctx, req.ReplicaID) + require.NoError(t, err) + require.Equal(t, req.ReplicaID, replica.ID) + + time.Sleep(time.Millisecond) + + // Re-register with no changed fields. + _, err = proxyClient.RegisterWorkspaceProxy(ctx, req) + require.NoError(t, err) + + // Get the replica from the DB and make sure updated_at has changed. + replica, err = db.GetReplicaByID(ctx, req.ReplicaID) + require.NoError(t, err) + require.Equal(t, req.ReplicaID, replica.ID) + require.Greater(t, replica.UpdatedAt.UnixNano(), replica.CreatedAt.UnixNano()) + }) + + t.Run("DeregisterNonExistentReplica", func(t *testing.T) { + t.Parallel() + + client, _ := setup(t) + + ctx := testutil.Context(t, testutil.WaitLong) + createRes, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: "hi", + }) + require.NoError(t, err) + + proxyClient := wsproxysdk.New(client.URL) + proxyClient.SetSessionToken(createRes.ProxyToken) + + err = proxyClient.DeregisterWorkspaceProxy(ctx, wsproxysdk.DeregisterWorkspaceProxyRequest{ + ReplicaID: uuid.New(), + }) + require.Error(t, err) + var sdkErr *codersdk.Error + require.ErrorAs(t, err, &sdkErr) + require.Equal(t, http.StatusNotFound, sdkErr.StatusCode()) + }) + + t.Run("ReturnSiblings", func(t *testing.T) { + t.Parallel() + + client, _ := setup(t) + + ctx := testutil.Context(t, testutil.WaitLong) + createRes1, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: "one", + }) + require.NoError(t, err) + createRes2, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: "two", + }) + require.NoError(t, err) + + // Register a replica on proxy 2. This shouldn't be returned by replicas + // for proxy 1. + proxyClient2 := wsproxysdk.New(client.URL) + proxyClient2.SetSessionToken(createRes2.ProxyToken) + _, err = proxyClient2.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://other.proxy.coder.test", + WildcardHostname: "*.other.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "venus", + ReplicaError: "", + ReplicaRelayAddress: "http://127.0.0.1:9090", + Version: buildinfo.Version(), + }) + require.NoError(t, err) + + // Register replica 1. + proxyClient1 := wsproxysdk.New(client.URL) + proxyClient1.SetSessionToken(createRes1.ProxyToken) + req1 := wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://one.proxy.coder.test", + WildcardHostname: "*.one.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "mars1", + ReplicaError: "", + ReplicaRelayAddress: "http://127.0.0.1:8081", + Version: buildinfo.Version(), + } + registerRes1, err := proxyClient1.RegisterWorkspaceProxy(ctx, req1) + require.NoError(t, err) + require.Empty(t, registerRes1.SiblingReplicas) + + // Register replica 2 and expect to get replica 1 as a sibling. + req2 := wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://two.proxy.coder.test", + WildcardHostname: "*.two.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "mars2", + ReplicaError: "", + ReplicaRelayAddress: "http://127.0.0.1:8082", + Version: buildinfo.Version(), + } + registerRes2, err := proxyClient1.RegisterWorkspaceProxy(ctx, req2) + require.NoError(t, err) + require.Len(t, registerRes2.SiblingReplicas, 1) + require.Equal(t, req1.ReplicaID, registerRes2.SiblingReplicas[0].ID) + require.Equal(t, req1.ReplicaHostname, registerRes2.SiblingReplicas[0].Hostname) + require.Equal(t, req1.ReplicaRelayAddress, registerRes2.SiblingReplicas[0].RelayAddress) + require.EqualValues(t, 10001, registerRes2.SiblingReplicas[0].RegionID) + + // Re-register replica 1 and expect to get replica 2 as a sibling. + registerRes1, err = proxyClient1.RegisterWorkspaceProxy(ctx, req1) + require.NoError(t, err) + require.Len(t, registerRes1.SiblingReplicas, 1) + require.Equal(t, req2.ReplicaID, registerRes1.SiblingReplicas[0].ID) + require.Equal(t, req2.ReplicaHostname, registerRes1.SiblingReplicas[0].Hostname) + require.Equal(t, req2.ReplicaRelayAddress, registerRes1.SiblingReplicas[0].RelayAddress) + require.EqualValues(t, 10001, registerRes1.SiblingReplicas[0].RegionID) + }) +} + func TestIssueSignedAppToken(t *testing.T) { t.Parallel() From e4a3008e066b5128b5fcfd06377cff43cc5e2fe5 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 30 May 2023 19:26:17 +0000 Subject: [PATCH 08/22] derp tests work --- ...000125_workspace_proxy_region_id.down.sql} | 0 ...> 000125_workspace_proxy_region_id.up.sql} | 0 ...wn.sql => 000126_replica_primary.down.sql} | 0 ...y.up.sql => 000126_replica_primary.up.sql} | 0 coderd/healthcheck/derp.go | 5 +- codersdk/workspaceagents.go | 12 +- enterprise/wsproxy/wsproxy_test.go | 208 ++++++++++++++---- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 1 - 8 files changed, 178 insertions(+), 48 deletions(-) rename coderd/database/migrations/{000121_workspace_proxy_region_id.down.sql => 000125_workspace_proxy_region_id.down.sql} (100%) rename coderd/database/migrations/{000121_workspace_proxy_region_id.up.sql => 000125_workspace_proxy_region_id.up.sql} (100%) rename coderd/database/migrations/{000122_replica_primary.down.sql => 000126_replica_primary.down.sql} (100%) rename coderd/database/migrations/{000122_replica_primary.up.sql => 000126_replica_primary.up.sql} (100%) diff --git a/coderd/database/migrations/000121_workspace_proxy_region_id.down.sql b/coderd/database/migrations/000125_workspace_proxy_region_id.down.sql similarity index 100% rename from coderd/database/migrations/000121_workspace_proxy_region_id.down.sql rename to coderd/database/migrations/000125_workspace_proxy_region_id.down.sql diff --git a/coderd/database/migrations/000121_workspace_proxy_region_id.up.sql b/coderd/database/migrations/000125_workspace_proxy_region_id.up.sql similarity index 100% rename from coderd/database/migrations/000121_workspace_proxy_region_id.up.sql rename to coderd/database/migrations/000125_workspace_proxy_region_id.up.sql diff --git a/coderd/database/migrations/000122_replica_primary.down.sql b/coderd/database/migrations/000126_replica_primary.down.sql similarity index 100% rename from coderd/database/migrations/000122_replica_primary.down.sql rename to coderd/database/migrations/000126_replica_primary.down.sql diff --git a/coderd/database/migrations/000122_replica_primary.up.sql b/coderd/database/migrations/000126_replica_primary.up.sql similarity index 100% rename from coderd/database/migrations/000122_replica_primary.up.sql rename to coderd/database/migrations/000126_replica_primary.up.sql diff --git a/coderd/healthcheck/derp.go b/coderd/healthcheck/derp.go index 0e7c66f474113..0e9e8ec34d415 100644 --- a/coderd/healthcheck/derp.go +++ b/coderd/healthcheck/derp.go @@ -168,7 +168,10 @@ func (r *DERPNodeReport) derpURL() *url.URL { derpURL.Scheme = "http" } if r.Node.HostName == "" { - derpURL.Host = fmt.Sprintf("%s:%d", r.Node.IPv4, r.Node.DERPPort) + derpURL.Host = r.Node.IPv4 + } + if r.Node.DERPPort != 0 { + derpURL.Host = fmt.Sprintf("%s:%d", derpURL.Host, r.Node.DERPPort) } return derpURL diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index 36bce0a55485a..4fb3e460343d5 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -167,8 +167,6 @@ type DialWorkspaceAgentOptions struct { Logger slog.Logger // BlockEndpoints forced a direct connection through DERP. BlockEndpoints bool - // CustomConnectionInfo avoids hitting the API to get connection info. - CustomConnectionInfo *WorkspaceAgentConnectionInfo } func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, options *DialWorkspaceAgentOptions) (agentConn *WorkspaceAgentConn, err error) { @@ -176,13 +174,9 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti options = &DialWorkspaceAgentOptions{} } - connInfo := options.CustomConnectionInfo - if connInfo == nil { - res, err := c.WorkspaceAgentConnectionInfo(ctx, agentID) - if err != nil { - return nil, xerrors.Errorf("get connection info: %w", err) - } - connInfo = &res + connInfo, err := c.WorkspaceAgentConnectionInfo(ctx, agentID) + if err != nil { + return nil, xerrors.Errorf("get connection info: %w", err) } ip := tailnet.IP() diff --git a/enterprise/wsproxy/wsproxy_test.go b/enterprise/wsproxy/wsproxy_test.go index f1cb40b1d0789..9f6007192fdf8 100644 --- a/enterprise/wsproxy/wsproxy_test.go +++ b/enterprise/wsproxy/wsproxy_test.go @@ -4,8 +4,8 @@ import ( "fmt" "net" "testing" - "time" + "github.com/davecgh/go-spew/spew" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -16,6 +16,7 @@ import ( "github.com/coder/coder/agent" "github.com/coder/coder/cli/clibase" "github.com/coder/coder/coderd/coderdtest" + "github.com/coder/coder/coderd/healthcheck" "github.com/coder/coder/coderd/httpmw" "github.com/coder/coder/coderd/workspaceapps/apptest" "github.com/coder/coder/codersdk" @@ -78,19 +79,32 @@ func TestDERP(t *testing.T) { require.NoError(t, err) // Wait for both running proxies to become healthy. - for i := 0; i < 10; i++ { - regionsCtx := testutil.Context(t, testutil.WaitLong) - regions, err := client.Regions(regionsCtx) - require.NoError(t, err) - require.Len(t, regions, 4) + require.Eventually(t, func() bool { + healthCtx := testutil.Context(t, testutil.WaitLong) + err := api.ProxyHealth.ForceUpdate(healthCtx) + if !assert.NoError(t, err) { + return false + } + + regions, err := client.Regions(healthCtx) + if !assert.NoError(t, err) { + return false + } + if !assert.Len(t, regions, 4) { + return false + } + // The first 3 regions should be healthy. for _, r := range regions[:3] { - require.True(t, r.Healthy) + if !r.Healthy { + return false + } } - // The last region should be unhealthy. - require.False(t, regions[3].Healthy) - time.Sleep(time.Second) - } + + // The last region should never be healthy. + assert.False(t, regions[3].Healthy) + return true + }, testutil.WaitLong, testutil.IntervalMedium) // Create a workspace + apps authToken := uuid.NewString() @@ -212,40 +226,160 @@ resourceLoop: t.Run(r.RegionName, func(t *testing.T) { t.Parallel() - ctx := testutil.Context(t, testutil.WaitLong) - conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ - Logger: slogtest.Make(t, &slogtest.Options{ - IgnoreErrors: true, - }).Named("agent").Leveled(slog.LevelDebug), - // Force DERP. - BlockEndpoints: true, - // Force connecting to this region only. - CustomConnectionInfo: &codersdk.WorkspaceAgentConnectionInfo{ - DERPMap: &tailcfg.DERPMap{ - Regions: map[int]*tailcfg.DERPRegion{ - r.RegionID: r, - }, - OmitDefaultRegions: true, - }, + derpMap := &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + r.RegionID: r, }, - }) - require.NoError(t, err) - t.Cleanup(func() { - err := conn.Close() - assert.NoError(t, err) - }) + OmitDefaultRegions: true, + } - ok := conn.AwaitReachable(ctx) - require.True(t, ok) + ctx := testutil.Context(t, testutil.WaitLong) + report := healthcheck.DERPReport{} + report.Run(ctx, &healthcheck.DERPReportOptions{ + DERPMap: derpMap, + }) - _, p2p, _, err := conn.Ping(ctx) - require.NoError(t, err) - require.False(t, p2p) + t.Log("healthcheck report: " + spew.Sdump(&report)) + require.True(t, report.Healthy, "healthcheck failed, see report dump") }) } }) } +func TestDERPEndToEnd(t *testing.T) { + t.Parallel() + + deploymentValues := coderdtest.DeploymentValues(t) + deploymentValues.Experiments = []string{ + string(codersdk.ExperimentMoons), + "*", + } + + client, closer, api := coderdenttest.NewWithAPI(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + DeploymentValues: deploymentValues, + AppHostname: "*.primary.test.coder.com", + IncludeProvisionerDaemon: true, + RealIPConfig: &httpmw.RealIPConfig{ + TrustedOrigins: []*net.IPNet{{ + IP: net.ParseIP("127.0.0.1"), + Mask: net.CIDRMask(8, 32), + }}, + TrustedHeaders: []string{ + "CF-Connecting-IP", + }, + }, + }, + }) + t.Cleanup(func() { + _ = closer.Close() + }) + + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureWorkspaceProxy: 1, + }, + }) + + coderdenttest.NewWorkspaceProxy(t, api, client, &coderdenttest.ProxyOptions{ + Name: "best-proxy", + }) + + // Wait for the proxy to become healthy. + require.Eventually(t, func() bool { + healthCtx := testutil.Context(t, testutil.WaitLong) + err := api.ProxyHealth.ForceUpdate(healthCtx) + if !assert.NoError(t, err) { + return false + } + + regions, err := client.Regions(healthCtx) + if !assert.NoError(t, err) { + return false + } + if !assert.Len(t, regions, 2) { + return false + } + for _, r := range regions { + if !r.Healthy { + return false + } + } + return true + }, testutil.WaitLong, testutil.IntervalMedium) + + // Swap out the DERPMapper for a fake one that only returns the proxy. This + // allows us to force the agent to pick the proxy as its preferred region. + oldDERPMapper := *api.AGPL.DERPMapper.Load() + newDERPMapper := func(derpMap *tailcfg.DERPMap) *tailcfg.DERPMap { + derpMap = oldDERPMapper(derpMap) + // Strip everything but the proxy, which is region ID 10001. + derpMap.Regions = map[int]*tailcfg.DERPRegion{ + 10001: derpMap.Regions[10001], + } + derpMap.OmitDefaultRegions = true + return derpMap + } + api.AGPL.DERPMapper.Store(&newDERPMapper) + + // Create a workspace + apps + authToken := uuid.NewString() + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionApply: echo.ProvisionApplyWithAgent(authToken), + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) + workspace.LatestBuild = build + + agentID := uuid.Nil +resourceLoop: + for _, res := range build.Resources { + for _, agnt := range res.Agents { + agentID = agnt.ID + break resourceLoop + } + } + require.NotEqual(t, uuid.Nil, agentID) + + // Connect an agent to the workspace + agentClient := agentsdk.New(client.URL) + agentClient.SetSessionToken(authToken) + agentCloser := agent.New(agent.Options{ + Client: agentClient, + Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug), + }) + t.Cleanup(func() { + _ = agentCloser.Close() + }) + coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID) + + // Connect to the workspace agent. + ctx := testutil.Context(t, testutil.WaitLong) + conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ + Logger: slogtest.Make(t, &slogtest.Options{ + IgnoreErrors: true, + }).Named("client").Leveled(slog.LevelDebug), + // Force DERP. + BlockEndpoints: true, + }) + require.NoError(t, err) + t.Cleanup(func() { + err := conn.Close() + assert.NoError(t, err) + }) + + ok := conn.AwaitReachable(ctx) + require.True(t, ok) + + _, p2p, _, err := conn.Ping(ctx) + require.NoError(t, err) + require.False(t, p2p) +} + func TestWorkspaceProxyWorkspaceApps(t *testing.T) { t.Parallel() diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index fad1f0a33bb8c..89fa4ed388058 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -12,7 +12,6 @@ import ( "golang.org/x/xerrors" "cdr.dev/slog" - "github.com/coder/coder/coderd/httpmw" "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/codersdk" From 404c3e4d94ec1d3b49c400871b1ba132ecc7dd03 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 31 May 2023 16:17:11 +0000 Subject: [PATCH 09/22] update derp map on new connection --- agent/agent.go | 10 +- agent/agent_test.go | 24 ++- coderd/coderd.go | 6 +- .../prometheusmetrics_test.go | 6 +- coderd/workspaceagents.go | 9 +- coderd/wsconncache/wsconncache_test.go | 11 +- codersdk/workspaceagents.go | 9 +- enterprise/coderd/coderd.go | 4 +- enterprise/tailnet/coordinator.go | 34 +++- enterprise/tailnet/coordinator_test.go | 138 ++++++++++++--- tailnet/coordinator.go | 71 +++++--- tailnet/coordinator_test.go | 157 ++++++++++++++---- tailnet/derpmap.go | 105 ++++++++++++ 13 files changed, 481 insertions(+), 103 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 165c73598939c..d560226c82d0d 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -810,8 +810,14 @@ func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error } defer coordinator.Close() a.logger.Info(ctx, "connected to coordination endpoint") - sendNodes, errChan := tailnet.ServeCoordinator(coordinator, func(nodes []*tailnet.Node) error { - return network.UpdateNodes(nodes, false) + sendNodes, errChan := tailnet.ServeCoordinator(coordinator, func(update tailnet.CoordinatorNodeUpdate) error { + // Check if we need to update our DERP map. + if !tailnet.CompareDERPMaps(network.DERPMap(), update.DERPMap) { + a.logger.Info(ctx, "updating DERP map on connection request due to changes", slog.F("old", network.DERPMap()), slog.F("new", update.DERPMap)) + network.SetDERPMap(update.DERPMap) + } + + return network.UpdateNodes(update.Nodes, false) }) network.SetNodeCallback(sendNodes) select { diff --git a/agent/agent_test.go b/agent/agent_test.go index 832919e7bf5df..afeff7ebda1bf 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -889,7 +889,7 @@ func TestAgent_StartupScript(t *testing.T) { DERPMap: &tailcfg.DERPMap{}, }, statsChan: make(chan *agentsdk.Stats), - coordinator: tailnet.NewCoordinator(logger), + coordinator: tailnet.NewCoordinator(logger, emptyDerpMapFn), } closer := agent.New(agent.Options{ Client: client, @@ -930,7 +930,7 @@ func TestAgent_StartupScript(t *testing.T) { return codersdk.ReadBodyAsError(res) }, statsChan: make(chan *agentsdk.Stats), - coordinator: tailnet.NewCoordinator(logger), + coordinator: tailnet.NewCoordinator(logger, emptyDerpMapFn), } closer := agent.New(agent.Options{ Client: client, @@ -1327,7 +1327,7 @@ func TestAgent_Lifecycle(t *testing.T) { ShutdownScript: "echo " + expected, }, statsChan: make(chan *agentsdk.Stats), - coordinator: tailnet.NewCoordinator(logger), + coordinator: tailnet.NewCoordinator(logger, emptyDerpMapFn), } fs := afero.NewMemMapFs() @@ -1585,7 +1585,7 @@ func TestAgent_Reconnect(t *testing.T) { logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) // After the agent is disconnected from a coordinator, it's supposed // to reconnect! - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) defer coordinator.Close() agentID := uuid.New() @@ -1623,7 +1623,7 @@ func TestAgent_Reconnect(t *testing.T) { func TestAgent_WriteVSCodeConfigs(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) defer coordinator.Close() client := &client{ @@ -1737,7 +1737,9 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati if metadata.DERPMap == nil { metadata.DERPMap = tailnettest.RunDERPAndSTUN(t) } - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, func() *tailcfg.DERPMap { + return metadata.DERPMap + }) t.Cleanup(func() { _ = coordinator.Close() }) @@ -1785,8 +1787,10 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati defer close(serveClientDone) coordinator.ServeClient(serverConn, uuid.New(), agentID) }() - sendNode, _ := tailnet.ServeCoordinator(clientConn, func(node []*tailnet.Node) error { - return conn.UpdateNodes(node, false) + sendNode, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { + // Don't need to worry about updating the DERP map since it'll never + // change in this test (as we aren't dealing with proxies etc.) + return conn.UpdateNodes(update.Nodes, false) }) conn.SetNodeCallback(sendNode) agentConn := &codersdk.WorkspaceAgentConn{ @@ -2095,3 +2099,7 @@ func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actua } return true } + +func emptyDerpMapFn() *tailcfg.DERPMap { + return &tailcfg.DERPMap{} +} diff --git a/coderd/coderd.go b/coderd/coderd.go index 4782c03490fbc..46405d33cc62c 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -224,9 +224,6 @@ func New(options *Options) *API { if options.PrometheusRegistry == nil { options.PrometheusRegistry = prometheus.NewRegistry() } - if options.TailnetCoordinator == nil { - options.TailnetCoordinator = tailnet.NewCoordinator(options.Logger) - } if options.DERPServer == nil { options.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(options.Logger.Named("derp"))) } @@ -314,6 +311,9 @@ func New(options *Options) *API { Experiments: experiments, healthCheckGroup: &singleflight.Group[string, *healthcheck.Report]{}, } + if options.TailnetCoordinator == nil { + options.TailnetCoordinator = tailnet.NewCoordinator(options.Logger, api.DERPMap) + } if options.HealthcheckFunc == nil { options.HealthcheckFunc = func(ctx context.Context, apiKey string) (*healthcheck.Report, error) { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 822cedc0ae75f..2176fea217338 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -300,13 +300,13 @@ func TestAgents(t *testing.T) { coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) // given - coordinator := tailnet.NewCoordinator(slogtest.Make(t, nil).Leveled(slog.LevelDebug)) - coordinatorPtr := atomic.Pointer[tailnet.Coordinator]{} - coordinatorPtr.Store(&coordinator) derpMap := tailnettest.RunDERPAndSTUN(t) derpMapFn := func() *tailcfg.DERPMap { return derpMap } + coordinator := tailnet.NewCoordinator(slogtest.Make(t, nil).Leveled(slog.LevelDebug), derpMapFn) + coordinatorPtr := atomic.Pointer[tailnet.Coordinator]{} + coordinatorPtr.Store(&coordinator) agentInactiveDisconnectTimeout := 1 * time.Hour // don't need to focus on this value in tests registry := prometheus.NewRegistry() diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index b04b0bbfd0023..e807ec679ba7c 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -687,8 +687,13 @@ func (api *API) dialWorkspaceAgentTailnet(agentID uuid.UUID) (*codersdk.Workspac return left }) - sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(node []*tailnet.Node) error { - err = conn.UpdateNodes(node, true) + sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { + // Check if we need to update the DERP map used by the connection. + if !tailnet.CompareDERPMaps(conn.DERPMap(), update.DERPMap) { + conn.SetDERPMap(update.DERPMap) + } + + err = conn.UpdateNodes(update.Nodes, true) if err != nil { return xerrors.Errorf("update nodes: %w", err) } diff --git a/coderd/wsconncache/wsconncache_test.go b/coderd/wsconncache/wsconncache_test.go index 6fdecbcf7bf3f..205cedef9ce2d 100644 --- a/coderd/wsconncache/wsconncache_test.go +++ b/coderd/wsconncache/wsconncache_test.go @@ -20,6 +20,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" "go.uber.org/goleak" + "tailscale.com/tailcfg" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -159,7 +160,9 @@ func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Durati logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) manifest.DERPMap = tailnettest.RunDERPAndSTUN(t) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, func() *tailcfg.DERPMap { + return manifest.DERPMap + }) t.Cleanup(func() { _ = coordinator.Close() }) @@ -190,8 +193,10 @@ func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Durati _ = conn.Close() }) go coordinator.ServeClient(serverConn, uuid.New(), agentID) - sendNode, _ := tailnet.ServeCoordinator(clientConn, func(node []*tailnet.Node) error { - return conn.UpdateNodes(node, false) + sendNode, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { + // Don't need to worry about updating the DERP map since it'll never + // change in this test (as we aren't dealing with proxies etc.) + return conn.UpdateNodes(update.Nodes, false) }) conn.SetNodeCallback(sendNode) agentConn := &codersdk.WorkspaceAgentConn{ diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index 4fb3e460343d5..c06dd4299f0cc 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -248,8 +248,13 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti options.Logger.Debug(ctx, "failed to dial", slog.Error(err)) continue } - sendNode, errChan := tailnet.ServeCoordinator(websocket.NetConn(ctx, ws, websocket.MessageBinary), func(node []*tailnet.Node) error { - return conn.UpdateNodes(node, false) + sendNode, errChan := tailnet.ServeCoordinator(websocket.NetConn(ctx, ws, websocket.MessageBinary), func(update tailnet.CoordinatorNodeUpdate) error { + // Check if we need to update the DERP map used by the connection. + if !tailnet.CompareDERPMaps(conn.DERPMap(), update.DERPMap) { + options.Logger.Debug(ctx, "updating DERP map on connection request due to changes", slog.F("old", conn.DERPMap()), slog.F("new", update.DERPMap)) + conn.SetDERPMap(update.DERPMap) + } + return conn.UpdateNodes(update.Nodes, false) }) conn.SetNodeCallback(sendNode) options.Logger.Debug(ctx, "serving coordinator") diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 4f673dde38353..44dcd361e7c7d 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -403,9 +403,9 @@ func (api *API) updateEntitlements(ctx context.Context) error { } if changed, enabled := featureChanged(codersdk.FeatureHighAvailability); changed { - coordinator := agpltailnet.NewCoordinator(api.Logger) + coordinator := agpltailnet.NewCoordinator(api.Logger, api.AGPL.DERPMap) if enabled { - haCoordinator, err := tailnet.NewCoordinator(api.Logger, api.Pubsub) + haCoordinator, err := tailnet.NewCoordinator(api.Logger, api.Pubsub, api.AGPL.DERPMap) if err != nil { api.Logger.Error(ctx, "unable to set up high availability coordinator", slog.Error(err)) // If we try to setup the HA coordinator and it fails, nothing diff --git a/enterprise/tailnet/coordinator.go b/enterprise/tailnet/coordinator.go index c25a9c2f773f3..6f9b163cbebae 100644 --- a/enterprise/tailnet/coordinator.go +++ b/enterprise/tailnet/coordinator.go @@ -14,6 +14,7 @@ import ( "github.com/google/uuid" lru "github.com/hashicorp/golang-lru/v2" "golang.org/x/xerrors" + "tailscale.com/tailcfg" "cdr.dev/slog" "github.com/coder/coder/coderd/database" @@ -22,7 +23,7 @@ import ( // NewCoordinator creates a new high availability coordinator // that uses PostgreSQL pubsub to exchange handshakes. -func NewCoordinator(logger slog.Logger, pubsub database.Pubsub) (agpl.Coordinator, error) { +func NewCoordinator(logger slog.Logger, pubsub database.Pubsub, derpMapFn func() *tailcfg.DERPMap) (agpl.Coordinator, error) { ctx, cancelFunc := context.WithCancel(context.Background()) nameCache, err := lru.New[uuid.UUID, string](512) @@ -34,8 +35,9 @@ func NewCoordinator(logger slog.Logger, pubsub database.Pubsub) (agpl.Coordinato id: uuid.New(), log: logger, pubsub: pubsub, - closeFunc: cancelFunc, close: make(chan struct{}), + closeFunc: cancelFunc, + derpMapFn: derpMapFn, nodes: map[uuid.UUID]*agpl.Node{}, agentSockets: map[uuid.UUID]*agpl.TrackedConn{}, agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*agpl.TrackedConn{}, @@ -57,6 +59,8 @@ type haCoordinator struct { close chan struct{} closeFunc context.CancelFunc + derpMapFn func() *tailcfg.DERPMap + // nodes maps agent and connection IDs their respective node. nodes map[uuid.UUID]*agpl.Node // agentSockets maps agent IDs to their open websocket. @@ -109,7 +113,10 @@ func (c *haCoordinator) ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID // node of the agent. This allows the connection to establish. node, ok := c.nodes[agent] if ok { - err := tc.Enqueue([]*agpl.Node{node}) + err := tc.Enqueue(agpl.CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: []*agpl.Node{node}, + }) c.mutex.Unlock() if err != nil { return xerrors.Errorf("enqueue node: %w", err) @@ -177,7 +184,10 @@ func (c *haCoordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *js } return nil } - err = agentSocket.Enqueue([]*agpl.Node{&node}) + err = agentSocket.Enqueue(agpl.CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: []*agpl.Node{&node}, + }) c.mutex.Unlock() if err != nil { return xerrors.Errorf("enqueu nodes: %w", err) @@ -212,7 +222,10 @@ func (c *haCoordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) err // Publish all nodes on this instance that want to connect to this agent. nodes := c.nodesSubscribedToAgent(id) if len(nodes) > 0 { - err := tc.Enqueue(nodes) + err := tc.Enqueue(agpl.CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: nodes, + }) if err != nil { c.mutex.Unlock() return xerrors.Errorf("enqueue nodes: %w", err) @@ -308,8 +321,12 @@ func (c *haCoordinator) handleAgentUpdate(id uuid.UUID, decoder *json.Decoder) ( } // Publish the new node to every listening socket. + derpMap := c.derpMapFn() for _, connectionSocket := range connectionSockets { - _ = connectionSocket.Enqueue([]*agpl.Node{&node}) + _ = connectionSocket.Enqueue(agpl.CoordinatorNodeUpdate{ + DERPMap: derpMap, + Nodes: []*agpl.Node{&node}, + }) } c.mutex.Unlock() return &node, nil @@ -486,7 +503,10 @@ func (c *haCoordinator) handlePubsubMessage(ctx context.Context, message []byte) if err != nil { c.log.Error(ctx, "invalid nodes JSON", slog.F("id", agentID), slog.Error(err), slog.F("node", string(nodeJSON))) } - err = agentSocket.Enqueue(nodes) + err = agentSocket.Enqueue(agpl.CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: nodes, + }) if err != nil { c.log.Error(ctx, "send callmemaybe to agent", slog.Error(err)) return diff --git a/enterprise/tailnet/coordinator_test.go b/enterprise/tailnet/coordinator_test.go index cf85af4a5a565..7c23331850811 100644 --- a/enterprise/tailnet/coordinator_test.go +++ b/enterprise/tailnet/coordinator_test.go @@ -1,12 +1,14 @@ package tailnet_test import ( + "context" "net" "testing" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "tailscale.com/tailcfg" "cdr.dev/slog/sloggers/slogtest" @@ -21,12 +23,12 @@ func TestCoordinatorSingle(t *testing.T) { t.Parallel() t.Run("ClientWithoutAgent", func(t *testing.T) { t.Parallel() - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory()) + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory(), emptyDerpMapFn) require.NoError(t, err) defer coordinator.Close() client, server := net.Pipe() - sendNode, errChan := agpl.ServeCoordinator(client, func(node []*agpl.Node) error { + sendNode, errChan := agpl.ServeCoordinator(client, func(update agpl.CoordinatorNodeUpdate) error { return nil }) id := uuid.New() @@ -49,12 +51,12 @@ func TestCoordinatorSingle(t *testing.T) { t.Run("AgentWithoutClients", func(t *testing.T) { t.Parallel() - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory()) + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory(), emptyDerpMapFn) require.NoError(t, err) defer coordinator.Close() client, server := net.Pipe() - sendNode, errChan := agpl.ServeCoordinator(client, func(node []*agpl.Node) error { + sendNode, errChan := agpl.ServeCoordinator(client, func(update agpl.CoordinatorNodeUpdate) error { return nil }) id := uuid.New() @@ -77,15 +79,15 @@ func TestCoordinatorSingle(t *testing.T) { t.Run("AgentWithClient", func(t *testing.T) { t.Parallel() - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory()) + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory(), emptyDerpMapFn) require.NoError(t, err) defer coordinator.Close() agentWS, agentServerWS := net.Pipe() defer agentWS.Close() agentNodeChan := make(chan []*agpl.Node) - sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { - agentNodeChan <- nodes + sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { + agentNodeChan <- update.Nodes return nil }) agentID := uuid.New() @@ -104,8 +106,8 @@ func TestCoordinatorSingle(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*agpl.Node) - sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(nodes []*agpl.Node) error { - clientNodeChan <- nodes + sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(update agpl.CoordinatorNodeUpdate) error { + clientNodeChan <- update.Nodes return nil }) clientID := uuid.New() @@ -136,8 +138,8 @@ func TestCoordinatorSingle(t *testing.T) { agentWS, agentServerWS = net.Pipe() defer agentWS.Close() agentNodeChan = make(chan []*agpl.Node) - _, agentErrChan = agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { - agentNodeChan <- nodes + _, agentErrChan = agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { + agentNodeChan <- update.Nodes return nil }) closeAgentChan = make(chan struct{}) @@ -160,6 +162,100 @@ func TestCoordinatorSingle(t *testing.T) { <-clientErrChan <-closeClientChan }) + + t.Run("SendsDERPMap", func(t *testing.T) { + t.Parallel() + + derpMapFn := func() *tailcfg.DERPMap { + return &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + Nodes: []*tailcfg.DERPNode{ + { + Name: "derp1", + RegionID: 1, + HostName: "derp1.example.com", + // blah + }, + }, + }, + }, + } + } + + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), database.NewPubsubInMemory(), derpMapFn) + require.NoError(t, err) + defer coordinator.Close() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) + defer cancel() + agentWS, agentServerWS := net.Pipe() + defer agentWS.Close() + agentUpdateChan := make(chan agpl.CoordinatorNodeUpdate) + sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { + agentUpdateChan <- update + return nil + }) + agentID := uuid.New() + closeAgentChan := make(chan struct{}) + go func() { + err := coordinator.ServeAgent(agentServerWS, agentID, "") + assert.NoError(t, err) + close(closeAgentChan) + }() + sendAgentNode(&agpl.Node{}) + require.Eventually(t, func() bool { + return coordinator.Node(agentID) != nil + }, testutil.WaitShort, testutil.IntervalFast) + + clientWS, clientServerWS := net.Pipe() + defer clientWS.Close() + defer clientServerWS.Close() + clientUpdateChan := make(chan agpl.CoordinatorNodeUpdate) + sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(update agpl.CoordinatorNodeUpdate) error { + clientUpdateChan <- update + return nil + }) + clientID := uuid.New() + closeClientChan := make(chan struct{}) + go func() { + err := coordinator.ServeClient(clientServerWS, clientID, agentID) + assert.NoError(t, err) + close(closeClientChan) + }() + select { + case clientUpdate := <-clientUpdateChan: + require.Equal(t, derpMapFn(), clientUpdate.DERPMap) + require.Len(t, clientUpdate.Nodes, 1) + case <-ctx.Done(): + t.Fatal("timed out") + } + sendClientNode(&agpl.Node{}) + agentUpdate := <-agentUpdateChan + require.Equal(t, derpMapFn(), agentUpdate.DERPMap) + require.Len(t, agentUpdate.Nodes, 1) + + // Ensure an update to the agent node reaches the client! + sendAgentNode(&agpl.Node{}) + select { + case clientUpdate := <-clientUpdateChan: + require.Equal(t, derpMapFn(), clientUpdate.DERPMap) + require.Len(t, clientUpdate.Nodes, 1) + case <-ctx.Done(): + t.Fatal("timed out") + } + + err = agentWS.Close() + require.NoError(t, err) + <-agentErrChan + <-closeAgentChan + + err = clientWS.Close() + require.NoError(t, err) + <-clientErrChan + <-closeClientChan + }) } func TestCoordinatorHA(t *testing.T) { @@ -170,15 +266,15 @@ func TestCoordinatorHA(t *testing.T) { _, pubsub := dbtestutil.NewDB(t) - coordinator1, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub) + coordinator1, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub, emptyDerpMapFn) require.NoError(t, err) defer coordinator1.Close() agentWS, agentServerWS := net.Pipe() defer agentWS.Close() agentNodeChan := make(chan []*agpl.Node) - sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { - agentNodeChan <- nodes + sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { + agentNodeChan <- update.Nodes return nil }) agentID := uuid.New() @@ -193,7 +289,7 @@ func TestCoordinatorHA(t *testing.T) { return coordinator1.Node(agentID) != nil }, testutil.WaitShort, testutil.IntervalFast) - coordinator2, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub) + coordinator2, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub, emptyDerpMapFn) require.NoError(t, err) defer coordinator2.Close() @@ -201,8 +297,8 @@ func TestCoordinatorHA(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*agpl.Node) - sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(nodes []*agpl.Node) error { - clientNodeChan <- nodes + sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(update agpl.CoordinatorNodeUpdate) error { + clientNodeChan <- update.Nodes return nil }) clientID := uuid.New() @@ -234,8 +330,8 @@ func TestCoordinatorHA(t *testing.T) { agentWS, agentServerWS = net.Pipe() defer agentWS.Close() agentNodeChan = make(chan []*agpl.Node) - _, agentErrChan = agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { - agentNodeChan <- nodes + _, agentErrChan = agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { + agentNodeChan <- update.Nodes return nil }) closeAgentChan = make(chan struct{}) @@ -259,3 +355,7 @@ func TestCoordinatorHA(t *testing.T) { <-closeClientChan }) } + +func emptyDerpMapFn() *tailcfg.DERPMap { + return &tailcfg.DERPMap{} +} diff --git a/tailnet/coordinator.go b/tailnet/coordinator.go index 5ee49cd194f16..b9ed75e27c5c1 100644 --- a/tailnet/coordinator.go +++ b/tailnet/coordinator.go @@ -75,8 +75,21 @@ type Node struct { Endpoints []string `json:"endpoints"` } +// CoordinatorNewNodes is written to a coordinator websocket when there are new +// nodes or existing nodes have been updated. +// +// The DERPMap is provided so the client can always use an up-to-date DERPMap. +// The DERPMap should only be applied to the tailnet if it is different from +// the current one. +type CoordinatorNodeUpdate struct { + // DERPMap is the current DERP map used by Coder. + DERPMap *tailcfg.DERPMap `json:"derp_map"` + // Nodes are the new list of nodes to add to the tailnet. + Nodes []*Node `json:"nodes"` +} + // ServeCoordinator matches the RW structure of a coordinator to exchange node messages. -func ServeCoordinator(conn net.Conn, updateNodes func(node []*Node) error) (func(node *Node), <-chan error) { +func ServeCoordinator(conn net.Conn, callback func(update CoordinatorNodeUpdate) error) (func(node *Node), <-chan error) { errChan := make(chan error, 1) sendErr := func(err error) { select { @@ -87,15 +100,15 @@ func ServeCoordinator(conn net.Conn, updateNodes func(node []*Node) error) (func go func() { decoder := json.NewDecoder(conn) for { - var nodes []*Node - err := decoder.Decode(&nodes) + var data CoordinatorNodeUpdate + err := decoder.Decode(&data) if err != nil { sendErr(xerrors.Errorf("read: %w", err)) return } - err = updateNodes(nodes) + err = callback(data) if err != nil { - sendErr(xerrors.Errorf("update nodes: %w", err)) + sendErr(xerrors.Errorf("run callback fn: %w", err)) } } }() @@ -118,9 +131,9 @@ const LoggerName = "coord" // NewCoordinator constructs a new in-memory connection coordinator. This // coordinator is incompatible with multiple Coder replicas as all node data is // in-memory. -func NewCoordinator(logger slog.Logger) Coordinator { +func NewCoordinator(logger slog.Logger, derpMapFn func() *tailcfg.DERPMap) Coordinator { return &coordinator{ - core: newCore(logger), + core: newCore(logger, derpMapFn), } } @@ -142,6 +155,8 @@ type core struct { mutex sync.RWMutex closed bool + derpMapFn func() *tailcfg.DERPMap + // nodes maps agent and connection IDs their respective node. nodes map[uuid.UUID]*Node // agentSockets maps agent IDs to their open websocket. @@ -155,7 +170,7 @@ type core struct { agentNameCache *lru.Cache[uuid.UUID, string] } -func newCore(logger slog.Logger) *core { +func newCore(logger slog.Logger, derpMapFn func() *tailcfg.DERPMap) *core { nameCache, err := lru.New[uuid.UUID, string](512) if err != nil { panic("make lru cache: " + err.Error()) @@ -164,6 +179,7 @@ func newCore(logger slog.Logger) *core { return &core{ logger: logger, closed: false, + derpMapFn: derpMapFn, nodes: make(map[uuid.UUID]*Node), agentSockets: map[uuid.UUID]*TrackedConn{}, agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*TrackedConn{}, @@ -177,7 +193,7 @@ type TrackedConn struct { ctx context.Context cancel func() conn net.Conn - updates chan []*Node + updates chan CoordinatorNodeUpdate logger slog.Logger // ID is an ephemeral UUID used to uniquely identify the owner of the @@ -190,10 +206,10 @@ type TrackedConn struct { Overwrites int64 } -func (t *TrackedConn) Enqueue(n []*Node) (err error) { +func (t *TrackedConn) Enqueue(update CoordinatorNodeUpdate) (err error) { atomic.StoreInt64(&t.LastWrite, time.Now().Unix()) select { - case t.updates <- n: + case t.updates <- update: return nil default: return ErrWouldBlock @@ -218,10 +234,10 @@ func (t *TrackedConn) SendUpdates() { case <-t.ctx.Done(): t.logger.Debug(t.ctx, "done sending updates") return - case nodes := <-t.updates: - data, err := json.Marshal(nodes) + case update := <-t.updates: + data, err := json.Marshal(update) if err != nil { - t.logger.Error(t.ctx, "unable to marshal nodes update", slog.Error(err), slog.F("nodes", nodes)) + t.logger.Error(t.ctx, "unable to marshal nodes update", slog.Error(err), slog.F("data", update)) return } @@ -237,11 +253,11 @@ func (t *TrackedConn) SendUpdates() { _, err = t.conn.Write(data) if err != nil { // often, this is just because the connection is closed/broken, so only log at debug. - t.logger.Debug(t.ctx, "could not write nodes to connection", slog.Error(err), slog.F("nodes", nodes)) + t.logger.Debug(t.ctx, "could not write nodes to connection", slog.Error(err), slog.F("nodes", update)) _ = t.Close() return } - t.logger.Debug(t.ctx, "wrote nodes", slog.F("nodes", nodes)) + t.logger.Debug(t.ctx, "wrote node update", slog.F("data", update)) // nhooyr.io/websocket has a bugged implementation of deadlines on a websocket net.Conn. What they are // *supposed* to do is set a deadline for any subsequent writes to complete, otherwise the call to Write() @@ -264,7 +280,7 @@ func NewTrackedConn(ctx context.Context, cancel func(), conn net.Conn, id uuid.U // coordinator mutex while queuing. Node updates don't // come quickly, so 512 should be plenty for all but // the most pathological cases. - updates := make(chan []*Node, 512) + updates := make(chan CoordinatorNodeUpdate, 512) now := time.Now().Unix() return &TrackedConn{ ctx: ctx, @@ -365,7 +381,10 @@ func (c *core) initAndTrackClient( // node of the agent. This allows the connection to establish. node, ok := c.nodes[agent] if ok { - err := tc.Enqueue([]*Node{node}) + err := tc.Enqueue(CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: []*Node{node}, + }) // this should never error since we're still the only goroutine that // knows about the TrackedConn. If we hit an error something really // wrong is happening @@ -433,7 +452,10 @@ func (c *core) clientNodeUpdate(id, agent uuid.UUID, node *Node) error { return nil } - err := agentSocket.Enqueue([]*Node{node}) + err := agentSocket.Enqueue(CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: []*Node{node}, + }) if err != nil { return xerrors.Errorf("Enqueue node: %w", err) } @@ -529,7 +551,10 @@ func (c *core) initAndTrackAgent(ctx context.Context, cancel func(), conn net.Co } nodes = append(nodes, node) } - err := tc.Enqueue(nodes) + err := tc.Enqueue(CoordinatorNodeUpdate{ + DERPMap: c.derpMapFn(), + Nodes: nodes, + }) // this should never error since we're still the only goroutine that // knows about the TrackedConn. If we hit an error something really // wrong is happening @@ -568,8 +593,12 @@ func (c *core) agentNodeUpdate(id uuid.UUID, node *Node) error { } // Publish the new node to every listening socket. + derpMap := c.derpMapFn() for clientID, connectionSocket := range connectionSockets { - err := connectionSocket.Enqueue([]*Node{node}) + err := connectionSocket.Enqueue(CoordinatorNodeUpdate{ + DERPMap: derpMap, + Nodes: []*Node{node}, + }) if err == nil { logger.Debug(context.Background(), "enqueued agent node to client", slog.F("client_id", clientID)) diff --git a/tailnet/coordinator_test.go b/tailnet/coordinator_test.go index 94c6f6da58341..15d0fb642d1ab 100644 --- a/tailnet/coordinator_test.go +++ b/tailnet/coordinator_test.go @@ -10,6 +10,7 @@ import ( "time" "nhooyr.io/websocket" + "tailscale.com/tailcfg" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -27,9 +28,9 @@ func TestCoordinator(t *testing.T) { t.Run("ClientWithoutAgent", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) client, server := net.Pipe() - sendNode, errChan := tailnet.ServeCoordinator(client, func(node []*tailnet.Node) error { + sendNode, errChan := tailnet.ServeCoordinator(client, func(_ tailnet.CoordinatorNodeUpdate) error { return nil }) id := uuid.New() @@ -52,9 +53,9 @@ func TestCoordinator(t *testing.T) { t.Run("AgentWithoutClients", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) client, server := net.Pipe() - sendNode, errChan := tailnet.ServeCoordinator(client, func(node []*tailnet.Node) error { + sendNode, errChan := tailnet.ServeCoordinator(client, func(_ tailnet.CoordinatorNodeUpdate) error { return nil }) id := uuid.New() @@ -77,7 +78,7 @@ func TestCoordinator(t *testing.T) { t.Run("AgentWithClient", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) // in this test we use real websockets to test use of deadlines ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) @@ -85,8 +86,8 @@ func TestCoordinator(t *testing.T) { agentWS, agentServerWS := websocketConn(ctx, t) defer agentWS.Close() agentNodeChan := make(chan []*tailnet.Node) - sendAgentNode, agentErrChan := tailnet.ServeCoordinator(agentWS, func(nodes []*tailnet.Node) error { - agentNodeChan <- nodes + sendAgentNode, agentErrChan := tailnet.ServeCoordinator(agentWS, func(update tailnet.CoordinatorNodeUpdate) error { + agentNodeChan <- update.Nodes return nil }) agentID := uuid.New() @@ -105,8 +106,8 @@ func TestCoordinator(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*tailnet.Node) - sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(nodes []*tailnet.Node) error { - clientNodeChan <- nodes + sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(update tailnet.CoordinatorNodeUpdate) error { + clientNodeChan <- update.Nodes return nil }) clientID := uuid.New() @@ -149,8 +150,8 @@ func TestCoordinator(t *testing.T) { agentWS, agentServerWS = net.Pipe() defer agentWS.Close() agentNodeChan = make(chan []*tailnet.Node) - _, agentErrChan = tailnet.ServeCoordinator(agentWS, func(nodes []*tailnet.Node) error { - agentNodeChan <- nodes + _, agentErrChan = tailnet.ServeCoordinator(agentWS, func(update tailnet.CoordinatorNodeUpdate) error { + agentNodeChan <- update.Nodes return nil }) closeAgentChan = make(chan struct{}) @@ -177,13 +178,13 @@ func TestCoordinator(t *testing.T) { t.Run("AgentDoubleConnect", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) agentWS1, agentServerWS1 := net.Pipe() defer agentWS1.Close() agentNodeChan1 := make(chan []*tailnet.Node) - sendAgentNode1, agentErrChan1 := tailnet.ServeCoordinator(agentWS1, func(nodes []*tailnet.Node) error { - agentNodeChan1 <- nodes + sendAgentNode1, agentErrChan1 := tailnet.ServeCoordinator(agentWS1, func(update tailnet.CoordinatorNodeUpdate) error { + agentNodeChan1 <- update.Nodes return nil }) agentID := uuid.New() @@ -202,8 +203,8 @@ func TestCoordinator(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*tailnet.Node) - sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(nodes []*tailnet.Node) error { - clientNodeChan <- nodes + sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(update tailnet.CoordinatorNodeUpdate) error { + clientNodeChan <- update.Nodes return nil }) clientID := uuid.New() @@ -228,8 +229,8 @@ func TestCoordinator(t *testing.T) { agentWS2, agentServerWS2 := net.Pipe() defer agentWS2.Close() agentNodeChan2 := make(chan []*tailnet.Node) - _, agentErrChan2 := tailnet.ServeCoordinator(agentWS2, func(nodes []*tailnet.Node) error { - agentNodeChan2 <- nodes + _, agentErrChan2 := tailnet.ServeCoordinator(agentWS2, func(update tailnet.CoordinatorNodeUpdate) error { + agentNodeChan2 <- update.Nodes return nil }) closeAgentChan2 := make(chan struct{}) @@ -268,6 +269,99 @@ func TestCoordinator(t *testing.T) { <-agentErrChan1 <-closeAgentChan1 }) + + t.Run("SendsDERPMap", func(t *testing.T) { + t.Parallel() + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + + derpMapFn := func() *tailcfg.DERPMap { + return &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + Nodes: []*tailcfg.DERPNode{ + { + Name: "derp1", + RegionID: 1, + HostName: "derp1.example.com", + // blah + }, + }, + }, + }, + } + } + + coordinator := tailnet.NewCoordinator(logger, derpMapFn) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) + defer cancel() + agentWS, agentServerWS := websocketConn(ctx, t) + defer agentWS.Close() + agentUpdateChan := make(chan tailnet.CoordinatorNodeUpdate) + sendAgentNode, agentErrChan := tailnet.ServeCoordinator(agentWS, func(update tailnet.CoordinatorNodeUpdate) error { + agentUpdateChan <- update + return nil + }) + agentID := uuid.New() + closeAgentChan := make(chan struct{}) + go func() { + err := coordinator.ServeAgent(agentServerWS, agentID, "") + assert.NoError(t, err) + close(closeAgentChan) + }() + sendAgentNode(&tailnet.Node{}) + require.Eventually(t, func() bool { + return coordinator.Node(agentID) != nil + }, testutil.WaitShort, testutil.IntervalFast) + + clientWS, clientServerWS := websocketConn(ctx, t) + defer clientWS.Close() + defer clientServerWS.Close() + clientUpdateChan := make(chan tailnet.CoordinatorNodeUpdate) + sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(update tailnet.CoordinatorNodeUpdate) error { + clientUpdateChan <- update + return nil + }) + clientID := uuid.New() + closeClientChan := make(chan struct{}) + go func() { + err := coordinator.ServeClient(clientServerWS, clientID, agentID) + assert.NoError(t, err) + close(closeClientChan) + }() + select { + case clientUpdate := <-clientUpdateChan: + require.Equal(t, derpMapFn(), clientUpdate.DERPMap) + require.Len(t, clientUpdate.Nodes, 1) + case <-ctx.Done(): + t.Fatal("timed out") + } + sendClientNode(&tailnet.Node{}) + agentUpdate := <-agentUpdateChan + require.Equal(t, derpMapFn(), agentUpdate.DERPMap) + require.Len(t, agentUpdate.Nodes, 1) + + // Ensure an update to the agent node reaches the client! + sendAgentNode(&tailnet.Node{}) + select { + case clientUpdate := <-clientUpdateChan: + require.Equal(t, derpMapFn(), clientUpdate.DERPMap) + require.Len(t, clientUpdate.Nodes, 1) + case <-ctx.Done(): + t.Fatal("timed out") + } + + err := agentWS.Close() + require.NoError(t, err) + <-agentErrChan + <-closeAgentChan + + err = clientWS.Close() + require.NoError(t, err) + <-clientErrChan + <-closeClientChan + }) } // TestCoordinator_AgentUpdateWhileClientConnects tests for regression on @@ -275,7 +369,7 @@ func TestCoordinator(t *testing.T) { func TestCoordinator_AgentUpdateWhileClientConnects(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger) + coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) agentWS, agentServerWS := net.Pipe() defer agentWS.Close() @@ -310,9 +404,7 @@ func TestCoordinator_AgentUpdateWhileClientConnects(t *testing.T) { // peek one byte from the node update, so we know the coordinator is // trying to write to the client. - // buffer needs to be 2 characters longer because return value is a list - // so, it needs [ and ] - buf := make([]byte, len(aData)+2) + buf := make([]byte, 2048) err = clientWS.SetReadDeadline(time.Now().Add(testutil.WaitShort)) require.NoError(t, err) n, err := clientWS.Read(buf[:1]) @@ -334,25 +426,24 @@ func TestCoordinator_AgentUpdateWhileClientConnects(t *testing.T) { require.NoError(t, err) n, err = clientWS.Read(buf[1:]) require.NoError(t, err) - require.Equal(t, len(buf)-1, n) - var cNodes []*tailnet.Node - err = json.Unmarshal(buf, &cNodes) + var cUpdate tailnet.CoordinatorNodeUpdate + err = json.Unmarshal(buf[:n+1], &cUpdate) require.NoError(t, err) - require.Len(t, cNodes, 1) - require.Equal(t, 0, cNodes[0].PreferredDERP) + require.Len(t, cUpdate.Nodes, 1) + require.Equal(t, 0, cUpdate.Nodes[0].PreferredDERP) // read second update // without a fix for https://github.com/coder/coder/issues/7295 our // read would time out here. err = clientWS.SetReadDeadline(time.Now().Add(testutil.WaitShort)) require.NoError(t, err) + buf = make([]byte, 2048) n, err = clientWS.Read(buf) require.NoError(t, err) - require.Equal(t, len(buf), n) - err = json.Unmarshal(buf, &cNodes) + err = json.Unmarshal(buf[:n], &cUpdate) require.NoError(t, err) - require.Len(t, cNodes, 1) - require.Equal(t, 1, cNodes[0].PreferredDERP) + require.Len(t, cUpdate.Nodes, 1) + require.Equal(t, 1, cUpdate.Nodes[0].PreferredDERP) } func websocketConn(ctx context.Context, t *testing.T) (client net.Conn, server net.Conn) { @@ -377,3 +468,7 @@ func websocketConn(ctx context.Context, t *testing.T) (client net.Conn, server n require.True(t, ok) return client, server } + +func emptyDerpMapFn() *tailcfg.DERPMap { + return &tailcfg.DERPMap{} +} diff --git a/tailnet/derpmap.go b/tailnet/derpmap.go index 13a998177c24f..34ebf5f472e6c 100644 --- a/tailnet/derpmap.go +++ b/tailnet/derpmap.go @@ -76,3 +76,108 @@ func NewDERPMap(ctx context.Context, region *tailcfg.DERPRegion, stunAddrs []str } return derpMap, nil } + +// CompareDERPMaps returns true if the given DERPMaps are equivalent. Ordering +// of slices is ignored. +func CompareDERPMaps(a *tailcfg.DERPMap, b *tailcfg.DERPMap) bool { + if a == nil || b == nil { + return false + } + if len(a.Regions) != len(b.Regions) { + return false + } + if a.OmitDefaultRegions != b.OmitDefaultRegions { + return false + } + + for id, region := range a.Regions { + other, ok := b.Regions[id] + if !ok { + return false + } + if !compareDERPRegions(region, other) { + return false + } + } + return true +} + +func compareDERPRegions(a *tailcfg.DERPRegion, b *tailcfg.DERPRegion) bool { + if a == nil || b == nil { + return false + } + if a.EmbeddedRelay != b.EmbeddedRelay { + return false + } + if a.RegionID != b.RegionID { + return false + } + if a.RegionCode != b.RegionCode { + return false + } + if a.RegionName != b.RegionName { + return false + } + if a.Avoid != b.Avoid { + return false + } + if len(a.Nodes) != len(b.Nodes) { + return false + } + + // Convert both slices to maps so ordering can be ignored easier. + aNodes := map[string]*tailcfg.DERPNode{} + for _, node := range a.Nodes { + aNodes[node.Name] = node + } + bNodes := map[string]*tailcfg.DERPNode{} + for _, node := range b.Nodes { + bNodes[node.Name] = node + } + + for name, aNode := range aNodes { + bNode, ok := bNodes[name] + if !ok { + return false + } + + if aNode.Name != bNode.Name { + return false + } + if aNode.RegionID != bNode.RegionID { + return false + } + if aNode.HostName != bNode.HostName { + return false + } + if aNode.CertName != bNode.CertName { + return false + } + if aNode.IPv4 != bNode.IPv4 { + return false + } + if aNode.IPv6 != bNode.IPv6 { + return false + } + if aNode.STUNPort != bNode.STUNPort { + return false + } + if aNode.STUNOnly != bNode.STUNOnly { + return false + } + if aNode.DERPPort != bNode.DERPPort { + return false + } + if aNode.InsecureForTests != bNode.InsecureForTests { + return false + } + if aNode.ForceHTTP != bNode.ForceHTTP { + return false + } + if aNode.STUNTestIP != bNode.STUNTestIP { + return false + } + } + + return true +} From 9b503fad60675f1c1c15aa744e7e339d443eade4 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 13 Jun 2023 16:26:31 +0000 Subject: [PATCH 10/22] fixes --- coderd/database/dbfake/dbfake.go | 26 +++++----- coderd/database/dbmetrics/dbmetrics.go | 7 +++ ...000127_workspace_proxy_region_id.down.sql} | 0 ...> 000127_workspace_proxy_region_id.up.sql} | 0 coderd/database/queries.sql.go | 4 +- enterprise/coderd/workspaceproxy_test.go | 48 +++++++++---------- 6 files changed, 45 insertions(+), 40 deletions(-) rename coderd/database/migrations/{000125_workspace_proxy_region_id.down.sql => 000127_workspace_proxy_region_id.down.sql} (100%) rename coderd/database/migrations/{000125_workspace_proxy_region_id.up.sql => 000127_workspace_proxy_region_id.up.sql} (100%) diff --git a/coderd/database/dbfake/dbfake.go b/coderd/database/dbfake/dbfake.go index b0140a4998785..0297a4b6a6cc2 100644 --- a/coderd/database/dbfake/dbfake.go +++ b/coderd/database/dbfake/dbfake.go @@ -2130,6 +2130,19 @@ func (q *fakeQuerier) GetQuotaConsumedForUser(_ context.Context, userID uuid.UUI return sum, nil } +func (q *fakeQuerier) GetReplicaByID(_ context.Context, id uuid.UUID) (database.Replica, error) { + q.mutex.RLock() + defer q.mutex.RUnlock() + + for _, replica := range q.replicas { + if replica.ID == id { + return replica, nil + } + } + + return database.Replica{}, sql.ErrNoRows +} + func (q *fakeQuerier) GetReplicasUpdatedAfter(_ context.Context, updatedAt time.Time) ([]database.Replica, error) { q.mutex.RLock() defer q.mutex.RUnlock() @@ -3708,19 +3721,6 @@ func (q *fakeQuerier) InsertProvisionerJobLogs(_ context.Context, arg database.I return logs, nil } -func (q *fakeQuerier) GetReplicaByID(_ context.Context, id uuid.UUID) (database.Replica, error) { - q.mutex.RLock() - defer q.mutex.RUnlock() - - for _, replica := range q.replicas { - if replica.ID == id { - return replica, nil - } - } - - return database.Replica{}, sql.ErrNoRows -} - func (q *fakeQuerier) InsertReplica(_ context.Context, arg database.InsertReplicaParams) (database.Replica, error) { if err := validateDatabaseType(arg); err != nil { return database.Replica{}, err diff --git a/coderd/database/dbmetrics/dbmetrics.go b/coderd/database/dbmetrics/dbmetrics.go index 136d972f4510b..9d3eefce57a68 100644 --- a/coderd/database/dbmetrics/dbmetrics.go +++ b/coderd/database/dbmetrics/dbmetrics.go @@ -1532,3 +1532,10 @@ func (m metricsStore) GetDefaultProxyConfig(ctx context.Context) (database.GetDe m.queryLatencies.WithLabelValues("GetDefaultProxyConfig").Observe(time.Since(start).Seconds()) return resp, err } + +func (m metricsStore) GetReplicaByID(ctx context.Context, id uuid.UUID) (database.Replica, error) { + start := time.Now() + replica, err := m.s.GetReplicaByID(ctx, id) + m.queryLatencies.WithLabelValues("GetReplicaByID").Observe(time.Since(start).Seconds()) + return replica, err +} diff --git a/coderd/database/migrations/000125_workspace_proxy_region_id.down.sql b/coderd/database/migrations/000127_workspace_proxy_region_id.down.sql similarity index 100% rename from coderd/database/migrations/000125_workspace_proxy_region_id.down.sql rename to coderd/database/migrations/000127_workspace_proxy_region_id.down.sql diff --git a/coderd/database/migrations/000125_workspace_proxy_region_id.up.sql b/coderd/database/migrations/000127_workspace_proxy_region_id.up.sql similarity index 100% rename from coderd/database/migrations/000125_workspace_proxy_region_id.up.sql rename to coderd/database/migrations/000127_workspace_proxy_region_id.up.sql diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 9f004777ebc37..d4ffa1f9fbc26 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -2773,7 +2773,7 @@ SET updated_at = Now() WHERE id = $5 -RETURNING id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret +RETURNING id, name, display_name, icon, url, wildcard_hostname, created_at, updated_at, deleted, token_hashed_secret, region_id, derp_enabled ` type UpdateWorkspaceProxyParams struct { @@ -2805,6 +2805,8 @@ func (q *sqlQuerier) UpdateWorkspaceProxy(ctx context.Context, arg UpdateWorkspa &i.UpdatedAt, &i.Deleted, &i.TokenHashedSecret, + &i.RegionID, + &i.DerpEnabled, ) return i, err } diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index 9cf119667d429..0227fe47320ca 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -334,20 +334,17 @@ func TestProxyRegisterDeregister(t *testing.T) { require.EqualValues(t, 10001, registerRes1.DERPRegionID) require.Empty(t, registerRes1.SiblingReplicas) - // Get the proxy to ensure fields have updated. - // TODO: we don't have a way to get the proxy by ID yet. - proxies, err := client.WorkspaceProxies(ctx) - require.NoError(t, err) - require.Len(t, proxies, 1) - require.Equal(t, createRes.Proxy.ID, proxies[0].ID) - require.Equal(t, proxyName, proxies[0].Name) - require.Equal(t, proxyDisplayName, proxies[0].DisplayName) - require.Equal(t, proxyIcon, proxies[0].Icon) - require.Equal(t, req.AccessURL, proxies[0].URL) - require.Equal(t, req.AccessURL, proxies[0].URL) - require.Equal(t, req.WildcardHostname, proxies[0].WildcardHostname) - require.Equal(t, req.DerpEnabled, proxies[0].DerpEnabled) - require.False(t, proxies[0].Deleted) + proxy, err := client.WorkspaceProxyByID(ctx, createRes.Proxy.ID) + require.NoError(t, err) + require.Equal(t, createRes.Proxy.ID, proxy.ID) + require.Equal(t, proxyName, proxy.Name) + require.Equal(t, proxyDisplayName, proxy.DisplayName) + require.Equal(t, proxyIcon, proxy.Icon) + require.Equal(t, req.AccessURL, proxy.URL) + require.Equal(t, req.AccessURL, proxy.URL) + require.Equal(t, req.WildcardHostname, proxy.WildcardHostname) + require.Equal(t, req.DerpEnabled, proxy.DerpEnabled) + require.False(t, proxy.Deleted) // Get the replica from the DB. replica, err := db.GetReplicaByID(ctx, req.ReplicaID) @@ -379,18 +376,17 @@ func TestProxyRegisterDeregister(t *testing.T) { // Get the proxy to ensure nothing has changed except updated_at. // TODO: we don't have a way to get the proxy by ID yet. - proxiesNew, err := client.WorkspaceProxies(ctx) - require.NoError(t, err) - require.Len(t, proxiesNew, 1) - require.Equal(t, createRes.Proxy.ID, proxiesNew[0].ID) - require.Equal(t, proxyName, proxiesNew[0].Name) - require.Equal(t, proxyDisplayName, proxiesNew[0].DisplayName) - require.Equal(t, proxyIcon, proxiesNew[0].Icon) - require.Equal(t, req.AccessURL, proxiesNew[0].URL) - require.Equal(t, req.AccessURL, proxiesNew[0].URL) - require.Equal(t, req.WildcardHostname, proxiesNew[0].WildcardHostname) - require.Equal(t, req.DerpEnabled, proxiesNew[0].DerpEnabled) - require.False(t, proxiesNew[0].Deleted) + proxyNew, err := client.WorkspaceProxyByID(ctx, createRes.Proxy.ID) + require.NoError(t, err) + require.Equal(t, createRes.Proxy.ID, proxyNew.ID) + require.Equal(t, proxyName, proxyNew.Name) + require.Equal(t, proxyDisplayName, proxyNew.DisplayName) + require.Equal(t, proxyIcon, proxyNew.Icon) + require.Equal(t, req.AccessURL, proxyNew.URL) + require.Equal(t, req.AccessURL, proxyNew.URL) + require.Equal(t, req.WildcardHostname, proxyNew.WildcardHostname) + require.Equal(t, req.DerpEnabled, proxyNew.DerpEnabled) + require.False(t, proxyNew.Deleted) // Get the replica from the DB and ensure the fields have been updated, // especially the updated_at. From 0e6d39a7abbe2496b74053b600853d11ffabf7ba Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 13 Jun 2023 19:18:20 +0000 Subject: [PATCH 11/22] tests for derp map changing --- agent/agent_test.go | 103 ++++++++++++++++++++++++++++++++++++ tailnet/conn_test.go | 121 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+) diff --git a/agent/agent_test.go b/agent/agent_test.go index afeff7ebda1bf..05f095655bd0d 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1564,6 +1564,109 @@ func TestAgent_Dial(t *testing.T) { } } +// TestAgent_UpdatedDERP checks that agents can handle their DERP map being +// updated, and that clients can also handle it. +func TestAgent_UpdatedDERP(t *testing.T) { + t.Parallel() + + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + + derpMap := *tailnettest.RunDERPAndSTUN(t) + metadata := agentsdk.Manifest{ + DERPMap: &derpMap, + } + coordinator := tailnet.NewCoordinator(logger, func() *tailcfg.DERPMap { + return &derpMap + }) + defer func() { + _ = coordinator.Close() + }() + agentID := uuid.New() + statsCh := make(chan *agentsdk.Stats, 50) + fs := afero.NewMemMapFs() + c := &client{ + t: t, + agentID: agentID, + manifest: metadata, + statsChan: statsCh, + coordinator: coordinator, + } + closer := agent.New(agent.Options{ + Client: c, + Filesystem: fs, + Logger: logger.Named("agent"), + ReconnectingPTYTimeout: time.Minute, + }) + defer func() { + _ = closer.Close() + }() + + // Setup a client connection. + newClientConn := func() *codersdk.WorkspaceAgentConn { + conn, err := tailnet.NewConn(&tailnet.Options{ + Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)}, + DERPMap: metadata.DERPMap, + Logger: logger.Named("client"), + }) + require.NoError(t, err) + clientConn, serverConn := net.Pipe() + serveClientDone := make(chan struct{}) + t.Cleanup(func() { + _ = clientConn.Close() + _ = serverConn.Close() + _ = conn.Close() + <-serveClientDone + }) + go func() { + defer close(serveClientDone) + err := coordinator.ServeClient(serverConn, uuid.New(), agentID) + assert.NoError(t, err) + }() + sendNode, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { + if tailnet.CompareDERPMaps(conn.DERPMap(), update.DERPMap) { + conn.SetDERPMap(update.DERPMap) + } + return conn.UpdateNodes(update.Nodes, false) + }) + conn.SetNodeCallback(sendNode) + + sdkConn := &codersdk.WorkspaceAgentConn{ + Conn: conn, + } + t.Cleanup(func() { + _ = sdkConn.Close() + }) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + if !sdkConn.AwaitReachable(ctx) { + t.Fatal("agent not reachable") + } + + return sdkConn + } + conn1 := newClientConn() + + // Change the DERP map. + derpMap = *tailnettest.RunDERPAndSTUN(t) + // Change the region ID. + derpMap.Regions[2] = derpMap.Regions[1] + delete(derpMap.Regions, 1) + derpMap.Regions[2].RegionID = 2 + for _, node := range derpMap.Regions[2].Nodes { + node.RegionID = 2 + } + + // Connect from a second client and make sure it uses the new DERP map. + conn2 := newClientConn() + require.Equal(t, []int{2}, conn2.DERPMap().RegionIDs()) + + // Make sure the first client is still reachable (it received a a DERP map + // update when the agent's nodes changed). + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + require.True(t, conn1.AwaitReachable(ctx)) +} + func TestAgent_Speedtest(t *testing.T) { t.Parallel() t.Skip("This test is relatively flakey because of Tailscale's speedtest code...") diff --git a/tailnet/conn_test.go b/tailnet/conn_test.go index 2e19379e6df03..964a668307fd5 100644 --- a/tailnet/conn_test.go +++ b/tailnet/conn_test.go @@ -195,3 +195,124 @@ func TestConn_PreferredDERP(t *testing.T) { t.Fatal("timed out waiting for node") } } + +// TestConn_UpdateDERP tests that when update the DERP map we pick a new +// preferred DERP server and new connections can be made from clients. +func TestConn_UpdateDERP(t *testing.T) { + t.Parallel() + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + + derpMap1 := tailnettest.RunDERPAndSTUN(t) + ip := tailnet.IP() + conn, err := tailnet.NewConn(&tailnet.Options{ + Addresses: []netip.Prefix{netip.PrefixFrom(ip, 128)}, + Logger: logger.Named("w1"), + DERPMap: derpMap1, + BlockEndpoints: true, + }) + require.NoError(t, err) + defer func() { + err := conn.Close() + assert.NoError(t, err) + }() + + // Buffer channel so callback doesn't block + nodes := make(chan *tailnet.Node, 50) + conn.SetNodeCallback(func(node *tailnet.Node) { + nodes <- node + }) + + ctx1, cancel1 := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel1() + select { + case node := <-nodes: + require.Equal(t, 1, node.PreferredDERP) + case <-ctx1.Done(): + t.Fatal("timed out waiting for node") + } + + // Connect from a different client. + client1, err := tailnet.NewConn(&tailnet.Options{ + Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)}, + Logger: logger.Named("client1"), + DERPMap: derpMap1, + BlockEndpoints: true, + }) + require.NoError(t, err) + defer func() { + err := client1.Close() + assert.NoError(t, err) + }() + client1.SetNodeCallback(func(node *tailnet.Node) { + err := conn.UpdateNodes([]*tailnet.Node{node}, false) + assert.NoError(t, err) + }) + client1.UpdateNodes([]*tailnet.Node{conn.Node()}, false) + + awaitReachableCtx1, awaitReachableCancel1 := context.WithTimeout(context.Background(), testutil.WaitShort) + defer awaitReachableCancel1() + require.True(t, client1.AwaitReachable(awaitReachableCtx1, ip)) + + // Update the DERP map and wait for the preferred DERP server to change. + derpMap2 := tailnettest.RunDERPAndSTUN(t) + // Change the region ID. + derpMap2.Regions[2] = derpMap2.Regions[1] + delete(derpMap2.Regions, 1) + derpMap2.Regions[2].RegionID = 2 + for _, node := range derpMap2.Regions[2].Nodes { + node.RegionID = 2 + } + conn.SetDERPMap(derpMap2) + + ctx2, cancel2 := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel2() +parentLoop: + for { + select { + case node := <-nodes: + if node.PreferredDERP != 2 { + t.Logf("waiting for preferred DERP server to change, got %v", node.PreferredDERP) + continue + } + t.Log("preferred DERP server changed!") + break parentLoop + case <-ctx2.Done(): + t.Fatal("timed out waiting for preferred DERP server to change") + } + } + + // Client1 should be dropped... + awaitReachableCtx2, awaitReachableCancel2 := context.WithTimeout(context.Background(), testutil.WaitShort) + defer awaitReachableCancel2() + require.False(t, client1.AwaitReachable(awaitReachableCtx2, ip)) + + // ... unless the client updates it's derp map and nodes. + client1.SetDERPMap(derpMap2) + client1.UpdateNodes([]*tailnet.Node{conn.Node()}, false) + awaitReachableCtx3, awaitReachableCancel3 := context.WithTimeout(context.Background(), testutil.WaitShort) + defer awaitReachableCancel3() + require.True(t, client1.AwaitReachable(awaitReachableCtx3, ip)) + + // Connect from a different different client with up-to-date derp map and + // nodes. + client2, err := tailnet.NewConn(&tailnet.Options{ + Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)}, + Logger: logger.Named("client2"), + DERPMap: derpMap2, + BlockEndpoints: true, + }) + require.NoError(t, err) + defer func() { + err := client2.Close() + assert.NoError(t, err) + }() + client2.SetNodeCallback(func(node *tailnet.Node) { + err := conn.UpdateNodes([]*tailnet.Node{node}, false) + assert.NoError(t, err) + }) + client2.UpdateNodes([]*tailnet.Node{conn.Node()}, false) + + awaitReachableCtx4, awaitReachableCancel4 := context.WithTimeout(context.Background(), testutil.WaitShort) + defer awaitReachableCancel4() + require.True(t, client2.AwaitReachable(awaitReachableCtx4, ip)) +} From 2943ac27c1cf1a6b664037c909655f524e3aa19e Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 20 Jun 2023 14:18:55 +0000 Subject: [PATCH 12/22] backwards compatible --- tailnet/coordinator.go | 2 +- tailnet/derpmap.go | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tailnet/coordinator.go b/tailnet/coordinator.go index b9ed75e27c5c1..6043ec2aeb365 100644 --- a/tailnet/coordinator.go +++ b/tailnet/coordinator.go @@ -83,7 +83,7 @@ type Node struct { // the current one. type CoordinatorNodeUpdate struct { // DERPMap is the current DERP map used by Coder. - DERPMap *tailcfg.DERPMap `json:"derp_map"` + DERPMap *tailcfg.DERPMap `json:"derp_map,omitempty"` // Nodes are the new list of nodes to add to the tailnet. Nodes []*Node `json:"nodes"` } diff --git a/tailnet/derpmap.go b/tailnet/derpmap.go index 34ebf5f472e6c..dc3d22795d8cd 100644 --- a/tailnet/derpmap.go +++ b/tailnet/derpmap.go @@ -79,9 +79,16 @@ func NewDERPMap(ctx context.Context, region *tailcfg.DERPRegion, stunAddrs []str // CompareDERPMaps returns true if the given DERPMaps are equivalent. Ordering // of slices is ignored. +// +// If the first map is nil, the second map must also be nil for them to be +// considered equivalent. If the second map is nil, the first map can be any +// value and the function will return true. func CompareDERPMaps(a *tailcfg.DERPMap, b *tailcfg.DERPMap) bool { - if a == nil || b == nil { - return false + if a == nil { + return b == nil + } + if b == nil { + return true } if len(a.Regions) != len(b.Regions) { return false From f0fa5784cf2772b3a9150b68827fc1e929682a51 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 20 Jun 2023 14:41:00 +0000 Subject: [PATCH 13/22] other comments --- coderd/coderd.go | 2 +- enterprise/coderd/coderd.go | 28 ++++++++++++++++----- enterprise/coderd/workspaceproxy.go | 6 ++++- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 1 + 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index f9b738536f860..0992d0ac58688 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -327,7 +327,7 @@ func New(options *Options) *API { options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ AccessURL: options.AccessURL, - DERPMap: api.DERPMap().Clone(), + DERPMap: api.DERPMap(), APIKey: apiKey, }) } diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index a3788c80456a3..77f60858cca46 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -476,11 +476,12 @@ func (api *API) updateEntitlements(ctx context.Context) error { // // Two ints are returned, the first is the starting region ID for proxies, and // the second is the maximum region ID that already exists in the DERP map. -func getProxyDERPStartingRegionID(derpMap *tailcfg.DERPMap) (sID int, mID int) { - maxRegionID := 0 +func getProxyDERPStartingRegionID(derpMap *tailcfg.DERPMap) (sID int64, mID int64) { + var maxRegionID int64 for _, region := range derpMap.Regions { - if region.RegionID > maxRegionID { - maxRegionID = region.RegionID + rid := int64(region.RegionID) + if rid > maxRegionID { + maxRegionID = rid } } if maxRegionID < 0 { @@ -488,10 +489,25 @@ func getProxyDERPStartingRegionID(derpMap *tailcfg.DERPMap) (sID int, mID int) { } // Round to the nearest 10,000 with a sufficient buffer of at least 2,000. + // The buffer allows for future "fixed" regions to be added to the base DERP + // map without conflicting with proxy region IDs (standard DERP maps usually + // use incrementing IDs for new regions). + // + // Example: + // maxRegionID = -2_000 -> startingRegionID = 10_000 + // maxRegionID = 8_000 -> startingRegionID = 10_000 + // maxRegionID = 8_500 -> startingRegionID = 20_000 + // maxRegionID = 12_000 -> startingRegionID = 20_000 + // maxRegionID = 20_000 -> startingRegionID = 30_000 const roundStartingRegionID = 10_000 const startingRegionIDBuffer = 2_000 + // Add the buffer first. startingRegionID := maxRegionID + startingRegionIDBuffer - startingRegionID = int(math.Ceil(float64(startingRegionID)/roundStartingRegionID) * roundStartingRegionID) + // Round UP to the nearest 10,000. Go's math.Ceil rounds up to the nearest + // integer, so we need to divide by 10,000 first and then multiply by + // 10,000. + startingRegionID = int64(math.Ceil(float64(startingRegionID)/roundStartingRegionID) * roundStartingRegionID) + // This should never be hit but it's here just in case. if startingRegionID < roundStartingRegionID { startingRegionID = roundStartingRegionID } @@ -565,7 +581,7 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(* // This should be impossible to hit as the IDs are enforced to be // unique by the database and the computed ID is greater than any // existing ID in the DERP map. - regionID := startingRegionID + int(status.Proxy.RegionID) + regionID := int(startingRegionID) + int(status.Proxy.RegionID) regionCode := fmt.Sprintf("coder_%s", strings.ToLower(status.Proxy.Name)) for _, r := range derpMap.Regions { if r.RegionID == regionID || r.RegionCode == regionCode { diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 81da38bfd075e..e9b985ef319f1 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "database/sql" + "flag" "fmt" "net/http" "net/url" @@ -531,7 +532,10 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) return } - if req.Version != buildinfo.Version() { + // Version check should be forced in non-dev builds and when running in + // tests. + shouldForceVersion := !buildinfo.IsDev() || flag.Lookup("test.v") != nil + if shouldForceVersion && req.Version != buildinfo.Version() { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: "Version mismatch.", Detail: fmt.Sprintf("Proxy version %q does not match primary server version %q", req.Version, buildinfo.Version()), diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index 89fa4ed388058..04b5e850c8014 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -350,6 +350,7 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo failureFn(xerrors.Errorf("exceeded re-registration failure count of %d: last error: %w", opts.MaxFailureCount, err)) return } + continue } failedAttempts = 0 From b4051132563b97f8ae6d9ffa116919f3c8d67cfc Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Sun, 25 Jun 2023 14:24:20 +0000 Subject: [PATCH 14/22] fixup! Merge branch 'main' into dean/proxy-derp-map --- agent/agent.go | 10 +- agent/agent_test.go | 36 ++-- cli/netcheck.go | 2 +- coderd/coderd.go | 6 +- ...wn.sql => 000131_replica_primary.down.sql} | 0 ...y.up.sql => 000131_replica_primary.up.sql} | 0 ...000132_workspace_proxy_region_id.down.sql} | 0 ...> 000132_workspace_proxy_region_id.up.sql} | 0 .../prometheusmetrics_test.go | 2 +- coderd/workspaceagents.go | 13 +- coderd/wsconncache/wsconncache_test.go | 11 +- codersdk/workspaceagents.go | 25 +-- docs/admin/audit-logs.md | 30 +--- enterprise/coderd/coderd.go | 4 +- enterprise/tailnet/coordinator.go | 34 +--- enterprise/tailnet/coordinator_test.go | 138 +++------------ enterprise/wsproxy/wsproxy_test.go | 12 +- tailnet/coordinator.go | 73 +++----- tailnet/coordinator_test.go | 157 ++++-------------- 19 files changed, 116 insertions(+), 437 deletions(-) rename coderd/database/migrations/{000126_replica_primary.down.sql => 000131_replica_primary.down.sql} (100%) rename coderd/database/migrations/{000126_replica_primary.up.sql => 000131_replica_primary.up.sql} (100%) rename coderd/database/migrations/{000127_workspace_proxy_region_id.down.sql => 000132_workspace_proxy_region_id.down.sql} (100%) rename coderd/database/migrations/{000127_workspace_proxy_region_id.up.sql => 000132_workspace_proxy_region_id.up.sql} (100%) diff --git a/agent/agent.go b/agent/agent.go index 41b5d3576c636..b1218190bbcb4 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -817,14 +817,8 @@ func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error } defer coordinator.Close() a.logger.Info(ctx, "connected to coordination endpoint") - sendNodes, errChan := tailnet.ServeCoordinator(coordinator, func(update tailnet.CoordinatorNodeUpdate) error { - // Check if we need to update our DERP map. - if !tailnet.CompareDERPMaps(network.DERPMap(), update.DERPMap) { - a.logger.Info(ctx, "updating DERP map on connection request due to changes", slog.F("old", network.DERPMap()), slog.F("new", update.DERPMap)) - network.SetDERPMap(update.DERPMap) - } - - return network.UpdateNodes(update.Nodes, false) + sendNodes, errChan := tailnet.ServeCoordinator(coordinator, func(nodes []*tailnet.Node) error { + return network.UpdateNodes(nodes, false) }) network.SetNodeCallback(sendNodes) select { diff --git a/agent/agent_test.go b/agent/agent_test.go index ea18b3be1925e..8771f36860611 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -889,7 +889,7 @@ func TestAgent_StartupScript(t *testing.T) { DERPMap: &tailcfg.DERPMap{}, }, statsChan: make(chan *agentsdk.Stats), - coordinator: tailnet.NewCoordinator(logger, emptyDerpMapFn), + coordinator: tailnet.NewCoordinator(logger), } closer := agent.New(agent.Options{ Client: client, @@ -930,7 +930,7 @@ func TestAgent_StartupScript(t *testing.T) { return codersdk.ReadBodyAsError(res) }, statsChan: make(chan *agentsdk.Stats), - coordinator: tailnet.NewCoordinator(logger, emptyDerpMapFn), + coordinator: tailnet.NewCoordinator(logger), } closer := agent.New(agent.Options{ Client: client, @@ -1290,7 +1290,7 @@ func TestAgent_Lifecycle(t *testing.T) { ShutdownScript: "echo " + expected, }, statsChan: make(chan *agentsdk.Stats), - coordinator: tailnet.NewCoordinator(logger, emptyDerpMapFn), + coordinator: tailnet.NewCoordinator(logger), } fs := afero.NewMemMapFs() @@ -1538,9 +1538,7 @@ func TestAgent_UpdatedDERP(t *testing.T) { metadata := agentsdk.Manifest{ DERPMap: &derpMap, } - coordinator := tailnet.NewCoordinator(logger, func() *tailcfg.DERPMap { - return &derpMap - }) + coordinator := tailnet.NewCoordinator(logger) defer func() { _ = coordinator.Close() }() @@ -1585,11 +1583,8 @@ func TestAgent_UpdatedDERP(t *testing.T) { err := coordinator.ServeClient(serverConn, uuid.New(), agentID) assert.NoError(t, err) }() - sendNode, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { - if tailnet.CompareDERPMaps(conn.DERPMap(), update.DERPMap) { - conn.SetDERPMap(update.DERPMap) - } - return conn.UpdateNodes(update.Nodes, false) + sendNode, _ := tailnet.ServeCoordinator(clientConn, func(nodes []*tailnet.Node) error { + return conn.UpdateNodes(nodes, false) }) conn.SetNodeCallback(sendNode) @@ -1610,6 +1605,7 @@ func TestAgent_UpdatedDERP(t *testing.T) { conn1 := newClientConn() // Change the DERP map. + // TODO: fix this test derpMap = *tailnettest.RunDERPAndSTUN(t) // Change the region ID. derpMap.Regions[2] = derpMap.Regions[1] @@ -1651,7 +1647,7 @@ func TestAgent_Reconnect(t *testing.T) { logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) // After the agent is disconnected from a coordinator, it's supposed // to reconnect! - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) defer coordinator.Close() agentID := uuid.New() @@ -1689,7 +1685,7 @@ func TestAgent_Reconnect(t *testing.T) { func TestAgent_WriteVSCodeConfigs(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) defer coordinator.Close() client := &client{ @@ -1803,9 +1799,7 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati if metadata.DERPMap == nil { metadata.DERPMap = tailnettest.RunDERPAndSTUN(t) } - coordinator := tailnet.NewCoordinator(logger, func() *tailcfg.DERPMap { - return metadata.DERPMap - }) + coordinator := tailnet.NewCoordinator(logger) t.Cleanup(func() { _ = coordinator.Close() }) @@ -1853,10 +1847,8 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati defer close(serveClientDone) coordinator.ServeClient(serverConn, uuid.New(), agentID) }() - sendNode, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { - // Don't need to worry about updating the DERP map since it'll never - // change in this test (as we aren't dealing with proxies etc.) - return conn.UpdateNodes(update.Nodes, false) + sendNode, _ := tailnet.ServeCoordinator(clientConn, func(nodes []*tailnet.Node) error { + return conn.UpdateNodes(nodes, false) }) conn.SetNodeCallback(sendNode) agentConn := &codersdk.WorkspaceAgentConn{ @@ -2165,7 +2157,3 @@ func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actua } return true } - -func emptyDerpMapFn() *tailcfg.DERPMap { - return &tailcfg.DERPMap{} -} diff --git a/cli/netcheck.go b/cli/netcheck.go index 4215c4dbf09b1..17ada6e5a95d3 100644 --- a/cli/netcheck.go +++ b/cli/netcheck.go @@ -27,7 +27,7 @@ func (r *RootCmd) netcheck() *clibase.Cmd { ctx, cancel := context.WithTimeout(inv.Context(), 30*time.Second) defer cancel() - connInfo, err := client.WorkspaceAgentConnectionInfo(ctx) + connInfo, err := client.WorkspaceAgentConnectionInfoGeneric(ctx) if err != nil { return err } diff --git a/coderd/coderd.go b/coderd/coderd.go index 4159c6b6d69cc..b36201dfae5b9 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -232,6 +232,9 @@ func New(options *Options) *API { if options.DERPServer == nil { options.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(options.Logger.Named("derp"))) } + if options.TailnetCoordinator == nil { + options.TailnetCoordinator = tailnet.NewCoordinator(options.Logger) + } if options.Auditor == nil { options.Auditor = audit.NewNop() } @@ -318,9 +321,6 @@ func New(options *Options) *API { Experiments: experiments, healthCheckGroup: &singleflight.Group[string, *healthcheck.Report]{}, } - if options.TailnetCoordinator == nil { - options.TailnetCoordinator = tailnet.NewCoordinator(options.Logger, api.DERPMap) - } if options.HealthcheckFunc == nil { options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ diff --git a/coderd/database/migrations/000126_replica_primary.down.sql b/coderd/database/migrations/000131_replica_primary.down.sql similarity index 100% rename from coderd/database/migrations/000126_replica_primary.down.sql rename to coderd/database/migrations/000131_replica_primary.down.sql diff --git a/coderd/database/migrations/000126_replica_primary.up.sql b/coderd/database/migrations/000131_replica_primary.up.sql similarity index 100% rename from coderd/database/migrations/000126_replica_primary.up.sql rename to coderd/database/migrations/000131_replica_primary.up.sql diff --git a/coderd/database/migrations/000127_workspace_proxy_region_id.down.sql b/coderd/database/migrations/000132_workspace_proxy_region_id.down.sql similarity index 100% rename from coderd/database/migrations/000127_workspace_proxy_region_id.down.sql rename to coderd/database/migrations/000132_workspace_proxy_region_id.down.sql diff --git a/coderd/database/migrations/000127_workspace_proxy_region_id.up.sql b/coderd/database/migrations/000132_workspace_proxy_region_id.up.sql similarity index 100% rename from coderd/database/migrations/000127_workspace_proxy_region_id.up.sql rename to coderd/database/migrations/000132_workspace_proxy_region_id.up.sql diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 2176fea217338..3c3759bc30fd0 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -304,7 +304,7 @@ func TestAgents(t *testing.T) { derpMapFn := func() *tailcfg.DERPMap { return derpMap } - coordinator := tailnet.NewCoordinator(slogtest.Make(t, nil).Leveled(slog.LevelDebug), derpMapFn) + coordinator := tailnet.NewCoordinator(slogtest.Make(t, nil).Leveled(slog.LevelDebug)) coordinatorPtr := atomic.Pointer[tailnet.Coordinator]{} coordinatorPtr.Store(&coordinator) agentInactiveDisconnectTimeout := 1 * time.Hour // don't need to focus on this value in tests diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 560c7a32a988b..5240718abf3a7 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -757,17 +757,8 @@ func (api *API) dialWorkspaceAgentTailnet(agentID uuid.UUID) (*codersdk.Workspac return left }) - sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { - // Check if we need to update the DERP map used by the connection. - if !tailnet.CompareDERPMaps(conn.DERPMap(), update.DERPMap) { - conn.SetDERPMap(update.DERPMap) - } - - err = conn.UpdateNodes(update.Nodes, true) - if err != nil { - return xerrors.Errorf("update nodes: %w", err) - } - return nil + sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(nodes []*tailnet.Node) error { + return conn.UpdateNodes(nodes, true) }) conn.SetNodeCallback(sendNodes) agentConn := &codersdk.WorkspaceAgentConn{ diff --git a/coderd/wsconncache/wsconncache_test.go b/coderd/wsconncache/wsconncache_test.go index 205cedef9ce2d..9021fdf23e290 100644 --- a/coderd/wsconncache/wsconncache_test.go +++ b/coderd/wsconncache/wsconncache_test.go @@ -20,7 +20,6 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" "go.uber.org/goleak" - "tailscale.com/tailcfg" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -160,9 +159,7 @@ func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Durati logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) manifest.DERPMap = tailnettest.RunDERPAndSTUN(t) - coordinator := tailnet.NewCoordinator(logger, func() *tailcfg.DERPMap { - return manifest.DERPMap - }) + coordinator := tailnet.NewCoordinator(logger) t.Cleanup(func() { _ = coordinator.Close() }) @@ -193,10 +190,8 @@ func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Durati _ = conn.Close() }) go coordinator.ServeClient(serverConn, uuid.New(), agentID) - sendNode, _ := tailnet.ServeCoordinator(clientConn, func(update tailnet.CoordinatorNodeUpdate) error { - // Don't need to worry about updating the DERP map since it'll never - // change in this test (as we aren't dealing with proxies etc.) - return conn.UpdateNodes(update.Nodes, false) + sendNode, _ := tailnet.ServeCoordinator(clientConn, func(nodes []*tailnet.Node) error { + return conn.UpdateNodes(nodes, false) }) conn.SetNodeCallback(sendNode) agentConn := &codersdk.WorkspaceAgentConn{ diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index 1f67083ec76c8..e02b720abe0b4 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -171,24 +171,18 @@ type WorkspaceAgentConnectionInfo struct { DisableDirectConnections bool `json:"disable_direct_connections"` } -func (c *Client) WorkspaceAgentConnectionInfoGeneric(ctx context.Context) (*WorkspaceAgentConnectionInfo, error) { +func (c *Client) WorkspaceAgentConnectionInfoGeneric(ctx context.Context) (WorkspaceAgentConnectionInfo, error) { res, err := c.Request(ctx, http.MethodGet, "/api/v2/workspaceagents/connection", nil) if err != nil { - return nil, err + return WorkspaceAgentConnectionInfo{}, err } defer res.Body.Close() - if res.StatusCode != http.StatusOK { - return nil, ReadBodyAsError(res) - } - - var info WorkspaceAgentConnectionInfo - err = json.NewDecoder(res.Body).Decode(&info) - if err != nil { - return nil, xerrors.Errorf("decode connection info: %w", err) + return WorkspaceAgentConnectionInfo{}, ReadBodyAsError(res) } - return &info, nil + var connInfo WorkspaceAgentConnectionInfo + return connInfo, json.NewDecoder(res.Body).Decode(&connInfo) } func (c *Client) WorkspaceAgentConnectionInfo(ctx context.Context, agentID uuid.UUID) (WorkspaceAgentConnectionInfo, error) { @@ -295,13 +289,8 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti options.Logger.Debug(ctx, "failed to dial", slog.Error(err)) continue } - sendNode, errChan := tailnet.ServeCoordinator(websocket.NetConn(ctx, ws, websocket.MessageBinary), func(update tailnet.CoordinatorNodeUpdate) error { - // Check if we need to update the DERP map used by the connection. - if !tailnet.CompareDERPMaps(conn.DERPMap(), update.DERPMap) { - options.Logger.Debug(ctx, "updating DERP map on connection request due to changes", slog.F("old", conn.DERPMap()), slog.F("new", update.DERPMap)) - conn.SetDERPMap(update.DERPMap) - } - return conn.UpdateNodes(update.Nodes, false) + sendNode, errChan := tailnet.ServeCoordinator(websocket.NetConn(ctx, ws, websocket.MessageBinary), func(nodes []*tailnet.Node) error { + return conn.UpdateNodes(nodes, false) }) conn.SetNodeCallback(sendNode) options.Logger.Debug(ctx, "serving coordinator") diff --git a/docs/admin/audit-logs.md b/docs/admin/audit-logs.md index 4b1d62cfc0364..576c58f3eaa5a 100644 --- a/docs/admin/audit-logs.md +++ b/docs/admin/audit-logs.md @@ -9,33 +9,6 @@ We track the following resources: -<<<<<<< HEAD -| Resource | | -| -------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| APIKey
login, logout, register, create, delete |
FieldTracked
created_attrue
expires_attrue
hashed_secretfalse
idfalse
ip_addressfalse
last_usedtrue
lifetime_secondsfalse
login_typefalse
scopefalse
token_namefalse
updated_atfalse
user_idtrue
| -| Group
create, write, delete |
FieldTracked
avatar_urltrue
idtrue
memberstrue
nametrue
organization_idfalse
quota_allowancetrue
| -| GitSSHKey
create |
FieldTracked
created_atfalse
private_keytrue
public_keytrue
updated_atfalse
user_idtrue
| -| License
create, delete |
FieldTracked
exptrue
idfalse
jwtfalse
uploaded_attrue
uuidtrue
| -| Template
write, delete |
FieldTracked
active_version_idtrue
allow_user_autostarttrue
allow_user_autostoptrue
allow_user_cancel_workspace_jobstrue
created_atfalse
created_bytrue
default_ttltrue
deletedfalse
descriptiontrue
display_nametrue
failure_ttltrue
group_acltrue
icontrue
idtrue
inactivity_ttltrue
max_ttltrue
nametrue
organization_idfalse
provisionertrue
updated_atfalse
user_acltrue
| -| TemplateVersion
create, write |
FieldTracked
created_atfalse
created_bytrue
git_auth_providersfalse
idtrue
job_idfalse
nametrue
organization_idfalse
readmetrue
template_idtrue
updated_atfalse
| -| User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typefalse
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| -| Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
idtrue
last_used_atfalse
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| -| WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
derp_enabledtrue
display_nametrue
icontrue
idtrue
nametrue
region_idtrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| -||||||| 8c4b7c01e -| Resource | | -| -------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| APIKey
login, logout, register, create, delete |
FieldTracked
created_attrue
expires_attrue
hashed_secretfalse
idfalse
ip_addressfalse
last_usedtrue
lifetime_secondsfalse
login_typefalse
scopefalse
token_namefalse
updated_atfalse
user_idtrue
| -| Group
create, write, delete |
FieldTracked
avatar_urltrue
idtrue
memberstrue
nametrue
organization_idfalse
quota_allowancetrue
| -| GitSSHKey
create |
FieldTracked
created_atfalse
private_keytrue
public_keytrue
updated_atfalse
user_idtrue
| -| License
create, delete |
FieldTracked
exptrue
idfalse
jwtfalse
uploaded_attrue
uuidtrue
| -| Template
write, delete |
FieldTracked
active_version_idtrue
allow_user_autostarttrue
allow_user_autostoptrue
allow_user_cancel_workspace_jobstrue
created_atfalse
created_bytrue
default_ttltrue
deletedfalse
descriptiontrue
display_nametrue
failure_ttltrue
group_acltrue
icontrue
idtrue
inactivity_ttltrue
max_ttltrue
nametrue
organization_idfalse
provisionertrue
updated_atfalse
user_acltrue
| -| TemplateVersion
create, write |
FieldTracked
created_atfalse
created_bytrue
git_auth_providersfalse
idtrue
job_idfalse
nametrue
organization_idfalse
readmetrue
template_idtrue
updated_atfalse
| -| User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typefalse
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| -| Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
idtrue
last_used_atfalse
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| -| WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
display_nametrue
icontrue
idtrue
nametrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| -======= | Resource | | | -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | APIKey
login, logout, register, create, delete |
FieldTracked
created_attrue
expires_attrue
hashed_secretfalse
idfalse
ip_addressfalse
last_usedtrue
lifetime_secondsfalse
login_typefalse
scopefalse
token_namefalse
updated_atfalse
user_idtrue
| @@ -47,8 +20,7 @@ We track the following resources: | User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typefalse
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| | Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
idtrue
last_used_atfalse
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| | WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
display_nametrue
icontrue
idtrue
nametrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| ->>>>>>> main +| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
derp_enabledtrue
display_nametrue
icontrue
idtrue
nametrue
region_idtrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 468bcaf58eea8..36682535ba2bd 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -418,13 +418,13 @@ func (api *API) updateEntitlements(ctx context.Context) error { } if changed, enabled := featureChanged(codersdk.FeatureHighAvailability); changed { - coordinator := agpltailnet.NewCoordinator(api.Logger, api.AGPL.DERPMap) + coordinator := agpltailnet.NewCoordinator(api.Logger) if enabled { var haCoordinator agpltailnet.Coordinator if api.AGPL.Experiments.Enabled(codersdk.ExperimentTailnetPGCoordinator) { haCoordinator, err = tailnet.NewPGCoord(ctx, api.Logger, api.Pubsub, api.Database) } else { - haCoordinator, err = tailnet.NewCoordinator(api.Logger, api.Pubsub, api.AGPL.DERPMap, api.AGPL.DERPMap) + haCoordinator, err = tailnet.NewCoordinator(api.Logger, api.Pubsub) } if err != nil { api.Logger.Error(ctx, "unable to set up high availability coordinator", slog.Error(err)) diff --git a/enterprise/tailnet/coordinator.go b/enterprise/tailnet/coordinator.go index 6e3471ef8941c..b0d9cfa64032f 100644 --- a/enterprise/tailnet/coordinator.go +++ b/enterprise/tailnet/coordinator.go @@ -14,7 +14,6 @@ import ( "github.com/google/uuid" lru "github.com/hashicorp/golang-lru/v2" "golang.org/x/xerrors" - "tailscale.com/tailcfg" "cdr.dev/slog" "github.com/coder/coder/coderd/database/pubsub" @@ -23,7 +22,7 @@ import ( // NewCoordinator creates a new high availability coordinator // that uses PostgreSQL pubsub to exchange handshakes. -func NewCoordinator(logger slog.Logger, ps pubsub.Pubsub, derpMapFn func() *tailcfg.DERPMap) (agpl.Coordinator, error) { +func NewCoordinator(logger slog.Logger, ps pubsub.Pubsub) (agpl.Coordinator, error) { ctx, cancelFunc := context.WithCancel(context.Background()) nameCache, err := lru.New[uuid.UUID, string](512) @@ -35,9 +34,8 @@ func NewCoordinator(logger slog.Logger, ps pubsub.Pubsub, derpMapFn func() *tail id: uuid.New(), log: logger, pubsub: ps, - close: make(chan struct{}), closeFunc: cancelFunc, - derpMapFn: derpMapFn, + close: make(chan struct{}), nodes: map[uuid.UUID]*agpl.Node{}, agentSockets: map[uuid.UUID]*agpl.TrackedConn{}, agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*agpl.TrackedConn{}, @@ -59,8 +57,6 @@ type haCoordinator struct { close chan struct{} closeFunc context.CancelFunc - derpMapFn func() *tailcfg.DERPMap - // nodes maps agent and connection IDs their respective node. nodes map[uuid.UUID]*agpl.Node // agentSockets maps agent IDs to their open websocket. @@ -113,10 +109,7 @@ func (c *haCoordinator) ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID // node of the agent. This allows the connection to establish. node, ok := c.nodes[agent] if ok { - err := tc.Enqueue(agpl.CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: []*agpl.Node{node}, - }) + err := tc.Enqueue([]*agpl.Node{node}) c.mutex.Unlock() if err != nil { return xerrors.Errorf("enqueue node: %w", err) @@ -184,10 +177,7 @@ func (c *haCoordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *js } return nil } - err = agentSocket.Enqueue(agpl.CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: []*agpl.Node{&node}, - }) + err = agentSocket.Enqueue([]*agpl.Node{&node}) c.mutex.Unlock() if err != nil { return xerrors.Errorf("enqueu nodes: %w", err) @@ -222,10 +212,7 @@ func (c *haCoordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) err // Publish all nodes on this instance that want to connect to this agent. nodes := c.nodesSubscribedToAgent(id) if len(nodes) > 0 { - err := tc.Enqueue(agpl.CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: nodes, - }) + err := tc.Enqueue(nodes) if err != nil { c.mutex.Unlock() return xerrors.Errorf("enqueue nodes: %w", err) @@ -321,12 +308,8 @@ func (c *haCoordinator) handleAgentUpdate(id uuid.UUID, decoder *json.Decoder) ( } // Publish the new node to every listening socket. - derpMap := c.derpMapFn() for _, connectionSocket := range connectionSockets { - _ = connectionSocket.Enqueue(agpl.CoordinatorNodeUpdate{ - DERPMap: derpMap, - Nodes: []*agpl.Node{&node}, - }) + _ = connectionSocket.Enqueue([]*agpl.Node{&node}) } c.mutex.Unlock() return &node, nil @@ -503,10 +486,7 @@ func (c *haCoordinator) handlePubsubMessage(ctx context.Context, message []byte) if err != nil { c.log.Error(ctx, "invalid nodes JSON", slog.F("id", agentID), slog.Error(err), slog.F("node", string(nodeJSON))) } - err = agentSocket.Enqueue(agpl.CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: nodes, - }) + err = agentSocket.Enqueue(nodes) if err != nil { c.log.Error(ctx, "send callmemaybe to agent", slog.Error(err)) return diff --git a/enterprise/tailnet/coordinator_test.go b/enterprise/tailnet/coordinator_test.go index 6e6012580f2ed..a29bf2ad273a9 100644 --- a/enterprise/tailnet/coordinator_test.go +++ b/enterprise/tailnet/coordinator_test.go @@ -1,14 +1,12 @@ package tailnet_test import ( - "context" "net" "testing" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "tailscale.com/tailcfg" "cdr.dev/slog/sloggers/slogtest" @@ -23,12 +21,12 @@ func TestCoordinatorSingle(t *testing.T) { t.Parallel() t.Run("ClientWithoutAgent", func(t *testing.T) { t.Parallel() - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory(), emptyDerpMapFn) + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory()) require.NoError(t, err) defer coordinator.Close() client, server := net.Pipe() - sendNode, errChan := agpl.ServeCoordinator(client, func(update agpl.CoordinatorNodeUpdate) error { + sendNode, errChan := agpl.ServeCoordinator(client, func(node []*agpl.Node) error { return nil }) id := uuid.New() @@ -51,12 +49,12 @@ func TestCoordinatorSingle(t *testing.T) { t.Run("AgentWithoutClients", func(t *testing.T) { t.Parallel() - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory(), emptyDerpMapFn) + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory()) require.NoError(t, err) defer coordinator.Close() client, server := net.Pipe() - sendNode, errChan := agpl.ServeCoordinator(client, func(update agpl.CoordinatorNodeUpdate) error { + sendNode, errChan := agpl.ServeCoordinator(client, func(node []*agpl.Node) error { return nil }) id := uuid.New() @@ -79,15 +77,15 @@ func TestCoordinatorSingle(t *testing.T) { t.Run("AgentWithClient", func(t *testing.T) { t.Parallel() - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory(), emptyDerpMapFn) + coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory()) require.NoError(t, err) defer coordinator.Close() agentWS, agentServerWS := net.Pipe() defer agentWS.Close() agentNodeChan := make(chan []*agpl.Node) - sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { - agentNodeChan <- update.Nodes + sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { + agentNodeChan <- nodes return nil }) agentID := uuid.New() @@ -106,8 +104,8 @@ func TestCoordinatorSingle(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*agpl.Node) - sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(update agpl.CoordinatorNodeUpdate) error { - clientNodeChan <- update.Nodes + sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(nodes []*agpl.Node) error { + clientNodeChan <- nodes return nil }) clientID := uuid.New() @@ -138,8 +136,8 @@ func TestCoordinatorSingle(t *testing.T) { agentWS, agentServerWS = net.Pipe() defer agentWS.Close() agentNodeChan = make(chan []*agpl.Node) - _, agentErrChan = agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { - agentNodeChan <- update.Nodes + _, agentErrChan = agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { + agentNodeChan <- nodes return nil }) closeAgentChan = make(chan struct{}) @@ -162,100 +160,6 @@ func TestCoordinatorSingle(t *testing.T) { <-clientErrChan <-closeClientChan }) - - t.Run("SendsDERPMap", func(t *testing.T) { - t.Parallel() - - derpMapFn := func() *tailcfg.DERPMap { - return &tailcfg.DERPMap{ - Regions: map[int]*tailcfg.DERPRegion{ - 1: { - RegionID: 1, - Nodes: []*tailcfg.DERPNode{ - { - Name: "derp1", - RegionID: 1, - HostName: "derp1.example.com", - // blah - }, - }, - }, - }, - } - } - - coordinator, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub.NewInMemory(), derpMapFn) - require.NoError(t, err) - defer coordinator.Close() - - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) - defer cancel() - agentWS, agentServerWS := net.Pipe() - defer agentWS.Close() - agentUpdateChan := make(chan agpl.CoordinatorNodeUpdate) - sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { - agentUpdateChan <- update - return nil - }) - agentID := uuid.New() - closeAgentChan := make(chan struct{}) - go func() { - err := coordinator.ServeAgent(agentServerWS, agentID, "") - assert.NoError(t, err) - close(closeAgentChan) - }() - sendAgentNode(&agpl.Node{}) - require.Eventually(t, func() bool { - return coordinator.Node(agentID) != nil - }, testutil.WaitShort, testutil.IntervalFast) - - clientWS, clientServerWS := net.Pipe() - defer clientWS.Close() - defer clientServerWS.Close() - clientUpdateChan := make(chan agpl.CoordinatorNodeUpdate) - sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(update agpl.CoordinatorNodeUpdate) error { - clientUpdateChan <- update - return nil - }) - clientID := uuid.New() - closeClientChan := make(chan struct{}) - go func() { - err := coordinator.ServeClient(clientServerWS, clientID, agentID) - assert.NoError(t, err) - close(closeClientChan) - }() - select { - case clientUpdate := <-clientUpdateChan: - require.Equal(t, derpMapFn(), clientUpdate.DERPMap) - require.Len(t, clientUpdate.Nodes, 1) - case <-ctx.Done(): - t.Fatal("timed out") - } - sendClientNode(&agpl.Node{}) - agentUpdate := <-agentUpdateChan - require.Equal(t, derpMapFn(), agentUpdate.DERPMap) - require.Len(t, agentUpdate.Nodes, 1) - - // Ensure an update to the agent node reaches the client! - sendAgentNode(&agpl.Node{}) - select { - case clientUpdate := <-clientUpdateChan: - require.Equal(t, derpMapFn(), clientUpdate.DERPMap) - require.Len(t, clientUpdate.Nodes, 1) - case <-ctx.Done(): - t.Fatal("timed out") - } - - err = agentWS.Close() - require.NoError(t, err) - <-agentErrChan - <-closeAgentChan - - err = clientWS.Close() - require.NoError(t, err) - <-clientErrChan - <-closeClientChan - }) } func TestCoordinatorHA(t *testing.T) { @@ -266,15 +170,15 @@ func TestCoordinatorHA(t *testing.T) { _, pubsub := dbtestutil.NewDB(t) - coordinator1, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub, emptyDerpMapFn) + coordinator1, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub) require.NoError(t, err) defer coordinator1.Close() agentWS, agentServerWS := net.Pipe() defer agentWS.Close() agentNodeChan := make(chan []*agpl.Node) - sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { - agentNodeChan <- update.Nodes + sendAgentNode, agentErrChan := agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { + agentNodeChan <- nodes return nil }) agentID := uuid.New() @@ -289,7 +193,7 @@ func TestCoordinatorHA(t *testing.T) { return coordinator1.Node(agentID) != nil }, testutil.WaitShort, testutil.IntervalFast) - coordinator2, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub, emptyDerpMapFn) + coordinator2, err := tailnet.NewCoordinator(slogtest.Make(t, nil), pubsub) require.NoError(t, err) defer coordinator2.Close() @@ -297,8 +201,8 @@ func TestCoordinatorHA(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*agpl.Node) - sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(update agpl.CoordinatorNodeUpdate) error { - clientNodeChan <- update.Nodes + sendClientNode, clientErrChan := agpl.ServeCoordinator(clientWS, func(nodes []*agpl.Node) error { + clientNodeChan <- nodes return nil }) clientID := uuid.New() @@ -330,8 +234,8 @@ func TestCoordinatorHA(t *testing.T) { agentWS, agentServerWS = net.Pipe() defer agentWS.Close() agentNodeChan = make(chan []*agpl.Node) - _, agentErrChan = agpl.ServeCoordinator(agentWS, func(update agpl.CoordinatorNodeUpdate) error { - agentNodeChan <- update.Nodes + _, agentErrChan = agpl.ServeCoordinator(agentWS, func(nodes []*agpl.Node) error { + agentNodeChan <- nodes return nil }) closeAgentChan = make(chan struct{}) @@ -355,7 +259,3 @@ func TestCoordinatorHA(t *testing.T) { <-closeClientChan }) } - -func emptyDerpMapFn() *tailcfg.DERPMap { - return &tailcfg.DERPMap{} -} diff --git a/enterprise/wsproxy/wsproxy_test.go b/enterprise/wsproxy/wsproxy_test.go index 9f6007192fdf8..9d60f2f625212 100644 --- a/enterprise/wsproxy/wsproxy_test.go +++ b/enterprise/wsproxy/wsproxy_test.go @@ -158,7 +158,7 @@ resourceLoop: proxy2Region *tailcfg.DERPRegion ) for _, r := range connInfo.DERPMap.Regions { - if r.RegionName == "Coder" { + if r.EmbeddedRelay { primaryRegion = r continue } @@ -175,16 +175,10 @@ resourceLoop: } // The primary region: - require.Equal(t, "Coder", primaryRegion.RegionName) + require.Equal(t, "Coder Embedded Relay", primaryRegion.RegionName) require.Equal(t, "coder", primaryRegion.RegionCode) - require.Equal(t, 1, primaryRegion.RegionID) + require.Equal(t, 999, primaryRegion.RegionID) require.True(t, primaryRegion.EmbeddedRelay) - require.Len(t, primaryRegion.Nodes, 1) - require.Equal(t, "1a", primaryRegion.Nodes[0].Name) - require.Equal(t, 1, primaryRegion.Nodes[0].RegionID) - require.Equal(t, "", primaryRegion.Nodes[0].HostName) // embedded region has no hostname returned - require.Equal(t, api.AccessURL.Port(), fmt.Sprint(primaryRegion.Nodes[0].DERPPort)) - require.Equal(t, api.AccessURL.Scheme == "http", primaryRegion.Nodes[0].ForceHTTP) // The first proxy region: require.Equal(t, "best-proxy", proxy1Region.RegionName) diff --git a/tailnet/coordinator.go b/tailnet/coordinator.go index 5e483d944f748..ee675ef6665e7 100644 --- a/tailnet/coordinator.go +++ b/tailnet/coordinator.go @@ -76,21 +76,8 @@ type Node struct { Endpoints []string `json:"endpoints"` } -// CoordinatorNewNodes is written to a coordinator websocket when there are new -// nodes or existing nodes have been updated. -// -// The DERPMap is provided so the client can always use an up-to-date DERPMap. -// The DERPMap should only be applied to the tailnet if it is different from -// the current one. -type CoordinatorNodeUpdate struct { - // DERPMap is the current DERP map used by Coder. - DERPMap *tailcfg.DERPMap `json:"derp_map,omitempty"` - // Nodes are the new list of nodes to add to the tailnet. - Nodes []*Node `json:"nodes"` -} - // ServeCoordinator matches the RW structure of a coordinator to exchange node messages. -func ServeCoordinator(conn net.Conn, callback func(update CoordinatorNodeUpdate) error) (func(node *Node), <-chan error) { +func ServeCoordinator(conn net.Conn, updateNodes func(node []*Node) error) (func(node *Node), <-chan error) { errChan := make(chan error, 1) sendErr := func(err error) { select { @@ -101,15 +88,15 @@ func ServeCoordinator(conn net.Conn, callback func(update CoordinatorNodeUpdate) go func() { decoder := json.NewDecoder(conn) for { - var data CoordinatorNodeUpdate - err := decoder.Decode(&data) + var nodes []*Node + err := decoder.Decode(&nodes) if err != nil { sendErr(xerrors.Errorf("read: %w", err)) return } - err = callback(data) + err = updateNodes(nodes) if err != nil { - sendErr(xerrors.Errorf("run callback fn: %w", err)) + sendErr(xerrors.Errorf("update nodes: %w", err)) } } }() @@ -132,9 +119,9 @@ const LoggerName = "coord" // NewCoordinator constructs a new in-memory connection coordinator. This // coordinator is incompatible with multiple Coder replicas as all node data is // in-memory. -func NewCoordinator(logger slog.Logger, derpMapFn func() *tailcfg.DERPMap) Coordinator { +func NewCoordinator(logger slog.Logger) Coordinator { return &coordinator{ - core: newCore(logger, derpMapFn), + core: newCore(logger), } } @@ -156,8 +143,6 @@ type core struct { mutex sync.RWMutex closed bool - derpMapFn func() *tailcfg.DERPMap - // nodes maps agent and connection IDs their respective node. nodes map[uuid.UUID]*Node // agentSockets maps agent IDs to their open websocket. @@ -171,7 +156,7 @@ type core struct { agentNameCache *lru.Cache[uuid.UUID, string] } -func newCore(logger slog.Logger, derpMapFn func() *tailcfg.DERPMap) *core { +func newCore(logger slog.Logger) *core { nameCache, err := lru.New[uuid.UUID, string](512) if err != nil { panic("make lru cache: " + err.Error()) @@ -180,7 +165,6 @@ func newCore(logger slog.Logger, derpMapFn func() *tailcfg.DERPMap) *core { return &core{ logger: logger, closed: false, - derpMapFn: derpMapFn, nodes: make(map[uuid.UUID]*Node), agentSockets: map[uuid.UUID]*TrackedConn{}, agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*TrackedConn{}, @@ -194,7 +178,7 @@ type TrackedConn struct { ctx context.Context cancel func() conn net.Conn - updates chan CoordinatorNodeUpdate + updates chan []*Node logger slog.Logger lastData []byte @@ -208,10 +192,10 @@ type TrackedConn struct { Overwrites int64 } -func (t *TrackedConn) Enqueue(update CoordinatorNodeUpdate) (err error) { +func (t *TrackedConn) Enqueue(n []*Node) (err error) { atomic.StoreInt64(&t.LastWrite, time.Now().Unix()) select { - case t.updates <- update: + case t.updates <- n: return nil default: return ErrWouldBlock @@ -236,14 +220,14 @@ func (t *TrackedConn) SendUpdates() { case <-t.ctx.Done(): t.logger.Debug(t.ctx, "done sending updates") return - case update := <-t.updates: - data, err := json.Marshal(update) + case nodes := <-t.updates: + data, err := json.Marshal(nodes) if err != nil { - t.logger.Error(t.ctx, "unable to marshal nodes update", slog.Error(err), slog.F("data", update)) + t.logger.Error(t.ctx, "unable to marshal nodes update", slog.Error(err), slog.F("nodes", nodes)) return } if bytes.Equal(t.lastData, data) { - t.logger.Debug(t.ctx, "skipping duplicate update", slog.F("update", update)) + t.logger.Debug(t.ctx, "skipping duplicate update", slog.F("nodes", nodes)) continue } @@ -259,11 +243,11 @@ func (t *TrackedConn) SendUpdates() { _, err = t.conn.Write(data) if err != nil { // often, this is just because the connection is closed/broken, so only log at debug. - t.logger.Debug(t.ctx, "could not write nodes to connection", slog.Error(err), slog.F("nodes", update)) + t.logger.Debug(t.ctx, "could not write nodes to connection", slog.Error(err), slog.F("nodes", nodes)) _ = t.Close() return } - t.logger.Debug(t.ctx, "wrote node update", slog.F("data", update)) + t.logger.Debug(t.ctx, "wrote nodes", slog.F("nodes", nodes)) // nhooyr.io/websocket has a bugged implementation of deadlines on a websocket net.Conn. What they are // *supposed* to do is set a deadline for any subsequent writes to complete, otherwise the call to Write() @@ -287,7 +271,7 @@ func NewTrackedConn(ctx context.Context, cancel func(), conn net.Conn, id uuid.U // coordinator mutex while queuing. Node updates don't // come quickly, so 512 should be plenty for all but // the most pathological cases. - updates := make(chan CoordinatorNodeUpdate, 512) + updates := make(chan []*Node, 512) now := time.Now().Unix() return &TrackedConn{ ctx: ctx, @@ -388,10 +372,7 @@ func (c *core) initAndTrackClient( // node of the agent. This allows the connection to establish. node, ok := c.nodes[agent] if ok { - err := tc.Enqueue(CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: []*Node{node}, - }) + err := tc.Enqueue([]*Node{node}) // this should never error since we're still the only goroutine that // knows about the TrackedConn. If we hit an error something really // wrong is happening @@ -459,10 +440,7 @@ func (c *core) clientNodeUpdate(id, agent uuid.UUID, node *Node) error { return nil } - err := agentSocket.Enqueue(CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: []*Node{node}, - }) + err := agentSocket.Enqueue([]*Node{node}) if err != nil { return xerrors.Errorf("Enqueue node: %w", err) } @@ -558,10 +536,7 @@ func (c *core) initAndTrackAgent(ctx context.Context, cancel func(), conn net.Co } nodes = append(nodes, node) } - err := tc.Enqueue(CoordinatorNodeUpdate{ - DERPMap: c.derpMapFn(), - Nodes: nodes, - }) + err := tc.Enqueue(nodes) // this should never error since we're still the only goroutine that // knows about the TrackedConn. If we hit an error something really // wrong is happening @@ -600,12 +575,8 @@ func (c *core) agentNodeUpdate(id uuid.UUID, node *Node) error { } // Publish the new node to every listening socket. - derpMap := c.derpMapFn() for clientID, connectionSocket := range connectionSockets { - err := connectionSocket.Enqueue(CoordinatorNodeUpdate{ - DERPMap: derpMap, - Nodes: []*Node{node}, - }) + err := connectionSocket.Enqueue([]*Node{node}) if err == nil { logger.Debug(context.Background(), "enqueued agent node to client", slog.F("client_id", clientID)) diff --git a/tailnet/coordinator_test.go b/tailnet/coordinator_test.go index 4b75491dbdb99..300a89ad5f9e0 100644 --- a/tailnet/coordinator_test.go +++ b/tailnet/coordinator_test.go @@ -10,7 +10,6 @@ import ( "time" "nhooyr.io/websocket" - "tailscale.com/tailcfg" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -28,9 +27,9 @@ func TestCoordinator(t *testing.T) { t.Run("ClientWithoutAgent", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) client, server := net.Pipe() - sendNode, errChan := tailnet.ServeCoordinator(client, func(_ tailnet.CoordinatorNodeUpdate) error { + sendNode, errChan := tailnet.ServeCoordinator(client, func(node []*tailnet.Node) error { return nil }) id := uuid.New() @@ -53,9 +52,9 @@ func TestCoordinator(t *testing.T) { t.Run("AgentWithoutClients", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) client, server := net.Pipe() - sendNode, errChan := tailnet.ServeCoordinator(client, func(_ tailnet.CoordinatorNodeUpdate) error { + sendNode, errChan := tailnet.ServeCoordinator(client, func(node []*tailnet.Node) error { return nil }) id := uuid.New() @@ -78,7 +77,7 @@ func TestCoordinator(t *testing.T) { t.Run("AgentWithClient", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) // in this test we use real websockets to test use of deadlines ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) @@ -86,8 +85,8 @@ func TestCoordinator(t *testing.T) { agentWS, agentServerWS := websocketConn(ctx, t) defer agentWS.Close() agentNodeChan := make(chan []*tailnet.Node) - sendAgentNode, agentErrChan := tailnet.ServeCoordinator(agentWS, func(update tailnet.CoordinatorNodeUpdate) error { - agentNodeChan <- update.Nodes + sendAgentNode, agentErrChan := tailnet.ServeCoordinator(agentWS, func(nodes []*tailnet.Node) error { + agentNodeChan <- nodes return nil }) agentID := uuid.New() @@ -106,8 +105,8 @@ func TestCoordinator(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*tailnet.Node) - sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(update tailnet.CoordinatorNodeUpdate) error { - clientNodeChan <- update.Nodes + sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(nodes []*tailnet.Node) error { + clientNodeChan <- nodes return nil }) clientID := uuid.New() @@ -150,8 +149,8 @@ func TestCoordinator(t *testing.T) { agentWS, agentServerWS = net.Pipe() defer agentWS.Close() agentNodeChan = make(chan []*tailnet.Node) - _, agentErrChan = tailnet.ServeCoordinator(agentWS, func(update tailnet.CoordinatorNodeUpdate) error { - agentNodeChan <- update.Nodes + _, agentErrChan = tailnet.ServeCoordinator(agentWS, func(nodes []*tailnet.Node) error { + agentNodeChan <- nodes return nil }) closeAgentChan = make(chan struct{}) @@ -178,13 +177,13 @@ func TestCoordinator(t *testing.T) { t.Run("AgentDoubleConnect", func(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) agentWS1, agentServerWS1 := net.Pipe() defer agentWS1.Close() agentNodeChan1 := make(chan []*tailnet.Node) - sendAgentNode1, agentErrChan1 := tailnet.ServeCoordinator(agentWS1, func(update tailnet.CoordinatorNodeUpdate) error { - agentNodeChan1 <- update.Nodes + sendAgentNode1, agentErrChan1 := tailnet.ServeCoordinator(agentWS1, func(nodes []*tailnet.Node) error { + agentNodeChan1 <- nodes return nil }) agentID := uuid.New() @@ -203,8 +202,8 @@ func TestCoordinator(t *testing.T) { defer clientWS.Close() defer clientServerWS.Close() clientNodeChan := make(chan []*tailnet.Node) - sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(update tailnet.CoordinatorNodeUpdate) error { - clientNodeChan <- update.Nodes + sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(nodes []*tailnet.Node) error { + clientNodeChan <- nodes return nil }) clientID := uuid.New() @@ -229,8 +228,8 @@ func TestCoordinator(t *testing.T) { agentWS2, agentServerWS2 := net.Pipe() defer agentWS2.Close() agentNodeChan2 := make(chan []*tailnet.Node) - _, agentErrChan2 := tailnet.ServeCoordinator(agentWS2, func(update tailnet.CoordinatorNodeUpdate) error { - agentNodeChan2 <- update.Nodes + _, agentErrChan2 := tailnet.ServeCoordinator(agentWS2, func(nodes []*tailnet.Node) error { + agentNodeChan2 <- nodes return nil }) closeAgentChan2 := make(chan struct{}) @@ -269,99 +268,6 @@ func TestCoordinator(t *testing.T) { <-agentErrChan1 <-closeAgentChan1 }) - - t.Run("SendsDERPMap", func(t *testing.T) { - t.Parallel() - logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - - derpMapFn := func() *tailcfg.DERPMap { - return &tailcfg.DERPMap{ - Regions: map[int]*tailcfg.DERPRegion{ - 1: { - RegionID: 1, - Nodes: []*tailcfg.DERPNode{ - { - Name: "derp1", - RegionID: 1, - HostName: "derp1.example.com", - // blah - }, - }, - }, - }, - } - } - - coordinator := tailnet.NewCoordinator(logger, derpMapFn) - - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) - defer cancel() - agentWS, agentServerWS := websocketConn(ctx, t) - defer agentWS.Close() - agentUpdateChan := make(chan tailnet.CoordinatorNodeUpdate) - sendAgentNode, agentErrChan := tailnet.ServeCoordinator(agentWS, func(update tailnet.CoordinatorNodeUpdate) error { - agentUpdateChan <- update - return nil - }) - agentID := uuid.New() - closeAgentChan := make(chan struct{}) - go func() { - err := coordinator.ServeAgent(agentServerWS, agentID, "") - assert.NoError(t, err) - close(closeAgentChan) - }() - sendAgentNode(&tailnet.Node{}) - require.Eventually(t, func() bool { - return coordinator.Node(agentID) != nil - }, testutil.WaitShort, testutil.IntervalFast) - - clientWS, clientServerWS := websocketConn(ctx, t) - defer clientWS.Close() - defer clientServerWS.Close() - clientUpdateChan := make(chan tailnet.CoordinatorNodeUpdate) - sendClientNode, clientErrChan := tailnet.ServeCoordinator(clientWS, func(update tailnet.CoordinatorNodeUpdate) error { - clientUpdateChan <- update - return nil - }) - clientID := uuid.New() - closeClientChan := make(chan struct{}) - go func() { - err := coordinator.ServeClient(clientServerWS, clientID, agentID) - assert.NoError(t, err) - close(closeClientChan) - }() - select { - case clientUpdate := <-clientUpdateChan: - require.Equal(t, derpMapFn(), clientUpdate.DERPMap) - require.Len(t, clientUpdate.Nodes, 1) - case <-ctx.Done(): - t.Fatal("timed out") - } - sendClientNode(&tailnet.Node{}) - agentUpdate := <-agentUpdateChan - require.Equal(t, derpMapFn(), agentUpdate.DERPMap) - require.Len(t, agentUpdate.Nodes, 1) - - // Ensure an update to the agent node reaches the client! - sendAgentNode(&tailnet.Node{}) - select { - case clientUpdate := <-clientUpdateChan: - require.Equal(t, derpMapFn(), clientUpdate.DERPMap) - require.Len(t, clientUpdate.Nodes, 1) - case <-ctx.Done(): - t.Fatal("timed out") - } - - err := agentWS.Close() - require.NoError(t, err) - <-agentErrChan - <-closeAgentChan - - err = clientWS.Close() - require.NoError(t, err) - <-clientErrChan - <-closeClientChan - }) } // TestCoordinator_AgentUpdateWhileClientConnects tests for regression on @@ -369,7 +275,7 @@ func TestCoordinator(t *testing.T) { func TestCoordinator_AgentUpdateWhileClientConnects(t *testing.T) { t.Parallel() logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) - coordinator := tailnet.NewCoordinator(logger, emptyDerpMapFn) + coordinator := tailnet.NewCoordinator(logger) agentWS, agentServerWS := net.Pipe() defer agentWS.Close() @@ -404,7 +310,9 @@ func TestCoordinator_AgentUpdateWhileClientConnects(t *testing.T) { // peek one byte from the node update, so we know the coordinator is // trying to write to the client. - buf := make([]byte, 2048) + // buffer needs to be 2 characters longer because return value is a list + // so, it needs [ and ] + buf := make([]byte, len(aData)+2) err = clientWS.SetReadDeadline(time.Now().Add(testutil.WaitShort)) require.NoError(t, err) n, err := clientWS.Read(buf[:1]) @@ -426,24 +334,25 @@ func TestCoordinator_AgentUpdateWhileClientConnects(t *testing.T) { require.NoError(t, err) n, err = clientWS.Read(buf[1:]) require.NoError(t, err) - var cUpdate tailnet.CoordinatorNodeUpdate - err = json.Unmarshal(buf[:n+1], &cUpdate) + require.Equal(t, len(buf)-1, n) + var cNodes []*tailnet.Node + err = json.Unmarshal(buf, &cNodes) require.NoError(t, err) - require.Len(t, cUpdate.Nodes, 1) - require.Equal(t, 0, cUpdate.Nodes[0].PreferredDERP) + require.Len(t, cNodes, 1) + require.Equal(t, 0, cNodes[0].PreferredDERP) // read second update // without a fix for https://github.com/coder/coder/issues/7295 our // read would time out here. err = clientWS.SetReadDeadline(time.Now().Add(testutil.WaitShort)) require.NoError(t, err) - buf = make([]byte, 2048) n, err = clientWS.Read(buf) require.NoError(t, err) - err = json.Unmarshal(buf[:n], &cUpdate) + require.Equal(t, len(buf), n) + err = json.Unmarshal(buf, &cNodes) require.NoError(t, err) - require.Len(t, cUpdate.Nodes, 1) - require.Equal(t, 1, cUpdate.Nodes[0].PreferredDERP) + require.Len(t, cNodes, 1) + require.Equal(t, 1, cNodes[0].PreferredDERP) } func websocketConn(ctx context.Context, t *testing.T) (client net.Conn, server net.Conn) { @@ -468,7 +377,3 @@ func websocketConn(ctx context.Context, t *testing.T) (client net.Conn, server n require.True(t, ok) return client, server } - -func emptyDerpMapFn() *tailcfg.DERPMap { - return &tailcfg.DERPMap{} -} From 6a08a59c5d4e1e1c30c5f87eafef5e8b3f141f8f Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 28 Jun 2023 13:14:18 +0000 Subject: [PATCH 15/22] change derp map updates to be separate websocket --- agent/agent.go | 56 ++++++++- agent/agent_test.go | 25 ++++ coderd/coderd.go | 4 + coderd/workspaceagents.go | 76 ++++++++++++ coderd/wsconncache/wsconncache_test.go | 18 +++ codersdk/agentsdk/agentsdk.go | 162 +++++++++++++++++++------ codersdk/workspaceagents.go | 93 +++++++++++--- 7 files changed, 375 insertions(+), 59 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index b1218190bbcb4..beb2c12ce3754 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -28,6 +28,7 @@ import ( "github.com/spf13/afero" "go.uber.org/atomic" "golang.org/x/exp/slices" + "golang.org/x/sync/errgroup" "golang.org/x/xerrors" "tailscale.com/net/speedtest" "tailscale.com/tailcfg" @@ -71,6 +72,7 @@ type Options struct { type Client interface { Manifest(ctx context.Context) (agentsdk.Manifest, error) Listen(ctx context.Context) (net.Conn, error) + DERPMapUpdates(ctx context.Context) (<-chan agentsdk.DERPMapUpdate, io.Closer, error) ReportStats(ctx context.Context, log slog.Logger, statsChan <-chan *agentsdk.Stats, setInterval func(time.Duration)) (io.Closer, error) PostLifecycle(ctx context.Context, state agentsdk.PostLifecycleRequest) error PostAppHealth(ctx context.Context, req agentsdk.PostAppHealthsRequest) error @@ -615,12 +617,26 @@ func (a *agent) run(ctx context.Context) error { network.SetBlockEndpoints(manifest.DisableDirectConnections) } - a.logger.Debug(ctx, "running tailnet connection coordinator") - err = a.runCoordinator(ctx, network) - if err != nil { - return xerrors.Errorf("run coordinator: %w", err) - } - return nil + eg, egCtx := errgroup.WithContext(ctx) + eg.Go(func() error { + a.logger.Debug(egCtx, "running tailnet connection coordinator") + err = a.runCoordinator(egCtx, network) + if err != nil { + return xerrors.Errorf("run coordinator: %w", err) + } + return nil + }) + + eg.Go(func() error { + a.logger.Debug(egCtx, "running derp map subscriber") + err = a.runDERPMapSubscriber(egCtx, network) + if err != nil { + return xerrors.Errorf("run derp map subscriber: %w", err) + } + return nil + }) + + return eg.Wait() } func (a *agent) trackConnGoroutine(fn func()) error { @@ -829,6 +845,34 @@ func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error } } +// runDERPMapSubscriber runs a coordinator and returns if a reconnect should occur. +func (a *agent) runDERPMapSubscriber(ctx context.Context, network *tailnet.Conn) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + updates, closer, err := a.client.DERPMapUpdates(ctx) + if err != nil { + return err + } + defer closer.Close() + + a.logger.Info(ctx, "connected to derp map endpoint") + for { + select { + case <-ctx.Done(): + return ctx.Err() + case update := <-updates: + if update.Err != nil { + return update.Err + } + if update.DERPMap != nil && !tailnet.CompareDERPMaps(a.network.DERPMap(), update.DERPMap) { + a.logger.Info(ctx, "updating derp map due to detected changes") + network.SetDERPMap(update.DERPMap) + } + } + } +} + func (a *agent) runStartupScript(ctx context.Context, script string) error { return a.runScript(ctx, "startup", script) } diff --git a/agent/agent_test.go b/agent/agent_test.go index 8771f36860611..017fb076fc034 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1912,12 +1912,37 @@ type client struct { lifecycleStates []codersdk.WorkspaceAgentLifecycle startup agentsdk.PostStartupRequest logs []agentsdk.StartupLog + + derpMapUpdates chan agentsdk.DERPMapUpdate } func (c *client) Manifest(_ context.Context) (agentsdk.Manifest, error) { return c.manifest, nil } +type closer struct { + closeFunc func() error +} + +func (c *closer) Close() error { + return c.closeFunc() +} + +func (c *client) DERPMapUpdates(_ context.Context) (<-chan agentsdk.DERPMapUpdate, io.Closer, error) { + updates := c.derpMapUpdates + if updates == nil { + updates = make(chan agentsdk.DERPMapUpdate) + } + + closed := make(chan struct{}) + return updates, &closer{ + closeFunc: func() error { + close(closed) + return nil + }, + }, nil +} + func (c *client) Listen(_ context.Context) (net.Conn, error) { clientConn, serverConn := net.Pipe() closed := make(chan struct{}) diff --git a/coderd/coderd.go b/coderd/coderd.go index b36201dfae5b9..b15e62d5ed44d 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -486,6 +486,10 @@ func New(options *Options) *API { r.Use(apiKeyMiddleware) r.Get("/regions", api.regions) }) + r.Route("/derp-map", func(r chi.Router) { + r.Use(apiKeyMiddleware) + r.Get("/", api.derpMapUpdates) + }) r.Route("/deployment", func(r chi.Router) { r.Use(apiKeyMiddleware) r.Get("/config", api.deploymentValues) diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 5240718abf3a7..31d04e06718e7 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -731,6 +731,8 @@ func (api *API) workspaceAgentListeningPorts(rw http.ResponseWriter, r *http.Req func (api *API) dialWorkspaceAgentTailnet(agentID uuid.UUID) (*codersdk.WorkspaceAgentConn, error) { clientConn, serverConn := net.Pipe() + + derpMap := api.DERPMap() conn, err := tailnet.NewConn(&tailnet.Options{ Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)}, DERPMap: api.DERPMap(), @@ -761,6 +763,28 @@ func (api *API) dialWorkspaceAgentTailnet(agentID uuid.UUID) (*codersdk.Workspac return conn.UpdateNodes(nodes, true) }) conn.SetNodeCallback(sendNodes) + go func() { + for { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + lastDERPMap := derpMap + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + + derpMap := api.DERPMap() + if lastDERPMap == nil || tailnet.CompareDERPMaps(lastDERPMap, derpMap) { + conn.SetDERPMap(derpMap) + lastDERPMap = derpMap + } + } + } + }() + agentConn := &codersdk.WorkspaceAgentConn{ Conn: conn, CloseFunc: func() { @@ -782,6 +806,9 @@ func (api *API) dialWorkspaceAgentTailnet(agentID uuid.UUID) (*codersdk.Workspac }() if !agentConn.AwaitReachable(ctx) { _ = agentConn.Close() + _ = serverConn.Close() + _ = clientConn.Close() + cancel() return nil, xerrors.Errorf("agent not reachable") } return agentConn, nil @@ -824,6 +851,55 @@ func (api *API) workspaceAgentConnectionGeneric(rw http.ResponseWriter, r *http. }) } +// @Summary Get DERP map updates +// @ID get-derp-map-updates +// @Security CoderSessionToken +// @Tags Agents +// @Success 101 +// @Router /derpmap [get] +func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + api.WebsocketWaitMutex.Lock() + api.WebsocketWaitGroup.Add(1) + api.WebsocketWaitMutex.Unlock() + defer api.WebsocketWaitGroup.Done() + + ws, err := websocket.Accept(rw, r, nil) + if err != nil { + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Failed to accept websocket.", + Detail: err.Error(), + }) + return + } + nconn := websocket.NetConn(ctx, ws, websocket.MessageBinary) + + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + var lastDERPMap *tailcfg.DERPMap + for { + derpMap := api.DERPMap() + if lastDERPMap == nil || !tailnet.CompareDERPMaps(lastDERPMap, derpMap) { + err := json.NewEncoder(nconn).Encode(derpMap) + if err != nil { + _ = ws.Close(websocket.StatusInternalError, err.Error()) + return + } + lastDERPMap = derpMap + } + + select { + case <-ctx.Done(): + return + case <-api.ctx.Done(): + return + case <-ticker.C: + } + } +} + // @Summary Coordinate workspace agent via Tailnet // @Description It accepts a WebSocket connection to an agent that listens to // @Description incoming connections and publishes node updates. diff --git a/coderd/wsconncache/wsconncache_test.go b/coderd/wsconncache/wsconncache_test.go index 9021fdf23e290..6389a571f2910 100644 --- a/coderd/wsconncache/wsconncache_test.go +++ b/coderd/wsconncache/wsconncache_test.go @@ -219,6 +219,24 @@ func (c *client) Manifest(_ context.Context) (agentsdk.Manifest, error) { return c.manifest, nil } +type closer struct { + closeFunc func() error +} + +func (c *closer) Close() error { + return c.closeFunc() +} + +func (c *client) DERPMapUpdates(_ context.Context) (<-chan agentsdk.DERPMapUpdate, io.Closer, error) { + closed := make(chan struct{}) + return make(<-chan agentsdk.DERPMapUpdate), &closer{ + closeFunc: func() error { + close(closed) + return nil + }, + }, nil +} + func (c *client) Listen(_ context.Context) (net.Conn, error) { clientConn, serverConn := net.Pipe() closed := make(chan struct{}) diff --git a/codersdk/agentsdk/agentsdk.go b/codersdk/agentsdk/agentsdk.go index ac0211cf2d37e..4a1a3c2bfe684 100644 --- a/codersdk/agentsdk/agentsdk.go +++ b/codersdk/agentsdk/agentsdk.go @@ -151,6 +151,76 @@ func (c *Client) Manifest(ctx context.Context) (Manifest, error) { return agentMeta, nil } +type DERPMapUpdate struct { + Err error + DERPMap *tailcfg.DERPMap +} + +// DERPMapUpdates connects to the DERP map updates WebSocket. +func (c *Client) DERPMapUpdates(ctx context.Context) (<-chan DERPMapUpdate, io.Closer, error) { + derpMapURL, err := c.SDK.URL.Parse("/api/v2/derp-map") + if err != nil { + return nil, nil, xerrors.Errorf("parse url: %w", err) + } + jar, err := cookiejar.New(nil) + if err != nil { + return nil, nil, xerrors.Errorf("create cookie jar: %w", err) + } + jar.SetCookies(derpMapURL, []*http.Cookie{{ + Name: codersdk.SessionTokenCookie, + Value: c.SDK.SessionToken(), + }}) + httpClient := &http.Client{ + Jar: jar, + Transport: c.SDK.HTTPClient.Transport, + } + // nolint:bodyclose + conn, res, err := websocket.Dial(ctx, derpMapURL.String(), &websocket.DialOptions{ + HTTPClient: httpClient, + }) + if err != nil { + if res == nil { + return nil, nil, err + } + return nil, nil, codersdk.ReadBodyAsError(res) + } + + ctx, cancelFunc := context.WithCancel(ctx) + ctx, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageBinary) + pingClosed := pingWebSocket(ctx, c.SDK.Logger, conn, "derp map") + + updates := make(chan DERPMapUpdate) + dec := json.NewDecoder(wsNetConn) + go func() { + defer close(updates) + defer cancelFunc() + defer conn.Close(websocket.StatusGoingAway, "Listen closed") + for { + var update DERPMapUpdate + err := dec.Decode(&update.DERPMap) + if err != nil { + update.Err = err + update.DERPMap = nil + return + } + select { + case updates <- update: + case <-ctx.Done(): + return + } + } + }() + + return updates, &closer{ + closeFunc: func() error { + cancelFunc() + _ = conn.Close(websocket.StatusGoingAway, "Listen closed") + <-pingClosed + return nil + }, + }, nil +} + // Listen connects to the workspace agent coordinate WebSocket // that handles connection negotiation. func (c *Client) Listen(ctx context.Context) (net.Conn, error) { @@ -183,50 +253,14 @@ func (c *Client) Listen(ctx context.Context) (net.Conn, error) { ctx, cancelFunc := context.WithCancel(ctx) ctx, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageBinary) - - // Ping once every 30 seconds to ensure that the websocket is alive. If we - // don't get a response within 30s we kill the websocket and reconnect. - // See: https://github.com/coder/coder/pull/5824 - closed := make(chan struct{}) - go func() { - defer close(closed) - tick := 30 * time.Second - ticker := time.NewTicker(tick) - defer ticker.Stop() - defer func() { - c.SDK.Logger.Debug(ctx, "coordinate pinger exited") - }() - for { - select { - case <-ctx.Done(): - return - case start := <-ticker.C: - ctx, cancel := context.WithTimeout(ctx, tick) - - err := conn.Ping(ctx) - if err != nil { - c.SDK.Logger.Error(ctx, "workspace agent coordinate ping", slog.Error(err)) - - err := conn.Close(websocket.StatusGoingAway, "Ping failed") - if err != nil { - c.SDK.Logger.Error(ctx, "close workspace agent coordinate websocket", slog.Error(err)) - } - - cancel() - return - } - - c.SDK.Logger.Debug(ctx, "got coordinate pong", slog.F("took", time.Since(start))) - cancel() - } - } - }() + pingClosed := pingWebSocket(ctx, c.SDK.Logger, conn, "coordinate") return &closeNetConn{ Conn: wsNetConn, closeFunc: func() { cancelFunc() - <-closed + _ = conn.Close(websocket.StatusGoingAway, "Listen closed") + <-pingClosed }, }, nil } @@ -687,3 +721,53 @@ func (c *closeNetConn) Close() error { c.closeFunc() return c.Conn.Close() } + +func pingWebSocket(ctx context.Context, logger slog.Logger, conn *websocket.Conn, name string) <-chan struct{} { + // Ping once every 30 seconds to ensure that the websocket is alive. If we + // don't get a response within 30s we kill the websocket and reconnect. + // See: https://github.com/coder/coder/pull/5824 + closed := make(chan struct{}) + go func() { + defer close(closed) + tick := 30 * time.Second + ticker := time.NewTicker(tick) + defer ticker.Stop() + defer func() { + logger.Debug(ctx, fmt.Sprintf("%s pinger exited", name)) + }() + for { + select { + case <-ctx.Done(): + return + case start := <-ticker.C: + ctx, cancel := context.WithTimeout(ctx, tick) + + err := conn.Ping(ctx) + if err != nil { + logger.Error(ctx, fmt.Sprintf("workspace agent %s ping", name), slog.Error(err)) + + err := conn.Close(websocket.StatusGoingAway, "Ping failed") + if err != nil { + logger.Error(ctx, fmt.Sprintf("close workspace agent %s websocket", name), slog.Error(err)) + } + + cancel() + return + } + + logger.Debug(ctx, fmt.Sprintf("got %s ping", name), slog.F("took", time.Since(start))) + cancel() + } + } + }() + + return closed +} + +type closer struct { + closeFunc func() error +} + +func (c *closer) Close() error { + return c.closeFunc() +} diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index e02b720abe0b4..eb4ed3933579f 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -244,43 +244,44 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti } }() - coordinateURL, err := c.URL.Parse(fmt.Sprintf("/api/v2/workspaceagents/%s/coordinate", agentID)) - if err != nil { - return nil, xerrors.Errorf("parse url: %w", err) - } - coordinateHeaders := make(http.Header) + headers := make(http.Header) tokenHeader := SessionTokenHeader if c.SessionTokenHeader != "" { tokenHeader = c.SessionTokenHeader } - coordinateHeaders.Set(tokenHeader, c.SessionToken()) + headers.Set(tokenHeader, c.SessionToken()) ctx, cancel := context.WithCancel(ctx) defer func() { if err != nil { cancel() } }() - closed := make(chan struct{}) - first := make(chan error) + + coordinateURL, err := c.URL.Parse(fmt.Sprintf("/api/v2/workspaceagents/%s/coordinate", agentID)) + if err != nil { + return nil, xerrors.Errorf("parse url: %w", err) + } + closedCoordinator := make(chan struct{}) + firstCoordinator := make(chan error) go func() { - defer close(closed) + defer close(closedCoordinator) isFirst := true for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); { options.Logger.Debug(ctx, "connecting") // nolint:bodyclose ws, res, err := websocket.Dial(ctx, coordinateURL.String(), &websocket.DialOptions{ HTTPClient: c.HTTPClient, - HTTPHeader: coordinateHeaders, + HTTPHeader: headers, // Need to disable compression to avoid a data-race. CompressionMode: websocket.CompressionDisabled, }) if isFirst { if res != nil && res.StatusCode == http.StatusConflict { - first <- ReadBodyAsError(res) + firstCoordinator <- ReadBodyAsError(res) return } isFirst = false - close(first) + close(firstCoordinator) } if err != nil { if errors.Is(err, context.Canceled) { @@ -307,7 +308,71 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti _ = ws.Close(websocket.StatusGoingAway, "") } }() - err = <-first + + derpMapURL, err := c.URL.Parse("/api/v2/derp-map") + if err != nil { + return nil, xerrors.Errorf("parse url: %w", err) + } + closedDerpMap := make(chan struct{}) + firstDerpMap := make(chan error) + go func() { + defer close(closedDerpMap) + isFirst := true + for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); { + options.Logger.Debug(ctx, "connecting to server for derp map updates") + // nolint:bodyclose + ws, res, err := websocket.Dial(ctx, derpMapURL.String(), &websocket.DialOptions{ + HTTPClient: c.HTTPClient, + HTTPHeader: headers, + // Need to disable compression to avoid a data-race. + CompressionMode: websocket.CompressionDisabled, + }) + if isFirst { + if res != nil && res.StatusCode == http.StatusConflict { + firstDerpMap <- ReadBodyAsError(res) + return + } + isFirst = false + close(firstDerpMap) + } + if err != nil { + if errors.Is(err, context.Canceled) { + return + } + options.Logger.Debug(ctx, "failed to dial", slog.Error(err)) + continue + } + + var ( + nconn = websocket.NetConn(ctx, ws, websocket.MessageBinary) + dec = json.NewDecoder(nconn) + lastDERPMap *tailcfg.DERPMap + ) + for { + var derpMap tailcfg.DERPMap + err := dec.Decode(&derpMap) + if err != nil { + if !xerrors.Is(err, context.Canceled) { + options.Logger.Debug(ctx, "failed to decode derp map", slog.Error(err)) + } + _ = ws.Close(websocket.StatusGoingAway, "") + return + } + + if lastDERPMap == nil || !tailnet.CompareDERPMaps(lastDERPMap, &derpMap) { + options.Logger.Debug(ctx, "updating derp map due to detected changes") + conn.SetDERPMap(&derpMap) + } + lastDERPMap = &derpMap + } + } + }() + + err = <-firstCoordinator + if err != nil { + return nil, err + } + err = <-firstDerpMap if err != nil { return nil, err } @@ -316,7 +381,7 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti Conn: conn, CloseFunc: func() { cancel() - <-closed + <-closedCoordinator }, } if !agentConn.AwaitReachable(ctx) { From 9e658d6f233e118dabd73756881dcf58aa8de7bd Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Mon, 24 Jul 2023 07:40:12 +0000 Subject: [PATCH 16/22] fixup! Merge branch 'main' into dean/proxy-derp-map --- agent/agent_test.go | 1 + coderd/coderd.go | 7 +++++++ ...0138_proxy_derp.down.sql => 000141_proxy_derp.down.sql} | 0 .../{000138_proxy_derp.up.sql => 000141_proxy_derp.up.sql} | 0 coderd/workspaceagents.go | 2 +- enterprise/coderd/coderd.go | 2 +- 6 files changed, 10 insertions(+), 2 deletions(-) rename coderd/database/migrations/{000138_proxy_derp.down.sql => 000141_proxy_derp.down.sql} (100%) rename coderd/database/migrations/{000138_proxy_derp.up.sql => 000141_proxy_derp.up.sql} (100%) diff --git a/agent/agent_test.go b/agent/agent_test.go index dfb46ed39b341..d897951496896 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1735,6 +1735,7 @@ func TestAgent_UpdatedDERP(t *testing.T) { statsCh := make(chan *agentsdk.Stats, 50) fs := afero.NewMemMapFs() client := agenttest.NewClient(t, + logger.Named("agent"), agentID, agentsdk.Manifest{ DERPMap: originalDerpMap, diff --git a/coderd/coderd.go b/coderd/coderd.go index 7fadf31843d15..4e334b825e3ed 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -344,6 +344,13 @@ func New(options *Options) *API { Experiments: experiments, healthCheckGroup: &singleflight.Group[string, *healthcheck.Report]{}, } + if options.UpdateCheckOptions != nil { + api.updateChecker = updatecheck.New( + options.Database, + options.Logger.Named("update_checker"), + *options.UpdateCheckOptions, + ) + } if options.HealthcheckFunc == nil { options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report { return healthcheck.Run(ctx, &healthcheck.ReportOptions{ diff --git a/coderd/database/migrations/000138_proxy_derp.down.sql b/coderd/database/migrations/000141_proxy_derp.down.sql similarity index 100% rename from coderd/database/migrations/000138_proxy_derp.down.sql rename to coderd/database/migrations/000141_proxy_derp.down.sql diff --git a/coderd/database/migrations/000138_proxy_derp.up.sql b/coderd/database/migrations/000141_proxy_derp.up.sql similarity index 100% rename from coderd/database/migrations/000138_proxy_derp.up.sql rename to coderd/database/migrations/000141_proxy_derp.up.sql diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 3a014b6d83977..5bf53d338536b 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -816,7 +816,7 @@ func (api *API) workspaceAgentConnectionGeneric(rw http.ResponseWriter, r *http. // @Security CoderSessionToken // @Tags Agents // @Success 101 -// @Router /derpmap [get] +// @Router /derp-map [get] func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) { ctx := r.Context() diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index cf5195b536f21..5f81781a9f368 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -571,7 +571,7 @@ func (api *API) updateEntitlements(ctx context.Context) error { } } - if changed, enabled := featureChanged(codersdk.FeatureWorkspaceProxy); changed { + if initial, changed, enabled := featureChanged(codersdk.FeatureWorkspaceProxy); shouldUpdate(initial, changed, enabled) { if enabled { fn := derpMapper(api.Logger, api.ProxyHealth) api.AGPL.DERPMapper.Store(&fn) From 67f2e5cd831d18e3774db5999b28b454d91071ca Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 25 Jul 2023 13:39:38 +0000 Subject: [PATCH 17/22] Working tests --- cli/server.go | 2 +- coderd/coderd.go | 20 ++-- coderd/coderdtest/coderdtest.go | 3 +- coderd/httpapi/httpapi.go | 16 ++- coderd/workspaceagents.go | 5 +- coderd/workspaceagents_test.go | 101 ++++++++++++++++++ coderd/workspacebuilds_test.go | 5 +- codersdk/agentsdk/agentsdk.go | 44 +++++--- .../coderd/coderdenttest/coderdenttest.go | 2 +- enterprise/coderd/workspaceproxy.go | 2 +- 10 files changed, 164 insertions(+), 36 deletions(-) diff --git a/cli/server.go b/cli/server.go index bae3ebf71477e..e6a9e90d0f044 100644 --- a/cli/server.go +++ b/cli/server.go @@ -477,7 +477,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. AppHostnameRegex: appHostnameRegex, Logger: logger.Named("coderd"), Database: dbfake.New(), - DERPMap: derpMap, + BaseDERPMap: derpMap, Pubsub: pubsub.NewInMemory(), CacheDir: cacheDir, GoogleTokenValidator: googleTokenValidator, diff --git a/coderd/coderd.go b/coderd/coderd.go index 4e334b825e3ed..4691c97ec06a5 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -118,10 +118,13 @@ type Options struct { RealIPConfig *httpmw.RealIPConfig TrialGenerator func(ctx context.Context, email string) error // TLSCertificates is used to mesh DERP servers securely. - TLSCertificates []tls.Certificate - TailnetCoordinator tailnet.Coordinator - DERPServer *derp.Server - DERPMap *tailcfg.DERPMap + TLSCertificates []tls.Certificate + TailnetCoordinator tailnet.Coordinator + DERPServer *derp.Server + // BaseDERPMap is used as the base DERP map for all clients and agents. + // Proxies are added to this list. + BaseDERPMap *tailcfg.DERPMap + DERPMapUpdateFrequency time.Duration SwaggerEndpoint bool SetUserGroups func(ctx context.Context, tx database.Store, userID uuid.UUID, groupNames []string) error TemplateScheduleStore *atomic.Pointer[schedule.TemplateScheduleStore] @@ -238,6 +241,9 @@ func New(options *Options) *API { if options.DERPServer == nil { options.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(options.Logger.Named("derp"))) } + if options.DERPMapUpdateFrequency == 0 { + options.DERPMapUpdateFrequency = 5 * time.Second + } if options.TailnetCoordinator == nil { options.TailnetCoordinator = tailnet.NewCoordinator(options.Logger) } @@ -379,7 +385,7 @@ func New(options *Options) *API { api.agentProvider, err = NewServerTailnet(api.ctx, options.Logger, options.DERPServer, - options.DERPMap, + options.BaseDERPMap, func(context.Context) (tailnet.MultiAgentConn, error) { return (*api.TailnetCoordinator.Load()).ServeMultiAgent(uuid.New()), nil }, @@ -1107,10 +1113,10 @@ func (api *API) CreateInMemoryProvisionerDaemon(ctx context.Context, debounce ti func (api *API) DERPMap() *tailcfg.DERPMap { fn := api.DERPMapper.Load() if fn != nil { - return (*fn)(api.Options.DERPMap) + return (*fn)(api.Options.BaseDERPMap) } - return api.Options.DERPMap + return api.Options.BaseDERPMap } // nolint:revive diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 71882acec4a10..546bb60b1c1bd 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -385,7 +385,8 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can TLSCertificates: options.TLSCertificates, TrialGenerator: options.TrialGenerator, TailnetCoordinator: options.Coordinator, - DERPMap: derpMap, + BaseDERPMap: derpMap, + DERPMapUpdateFrequency: 150 * time.Millisecond, MetricsCacheRefreshInterval: options.MetricsCacheRefreshInterval, AgentStatsRefreshInterval: options.AgentStatsRefreshInterval, DeploymentValues: options.DeploymentValues, diff --git a/coderd/httpapi/httpapi.go b/coderd/httpapi/httpapi.go index 658c0cc39294b..b7559d5feeabe 100644 --- a/coderd/httpapi/httpapi.go +++ b/coderd/httpapi/httpapi.go @@ -151,11 +151,9 @@ func Write(ctx context.Context, rw http.ResponseWriter, status int, response int enc := json.NewEncoder(rw) enc.SetEscapeHTML(true) - err := enc.Encode(response) - if err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } + // We can't really do much about these errors, it's probably due to a + // dropped connection. + _ = enc.Encode(response) } func WriteIndent(ctx context.Context, rw http.ResponseWriter, status int, response interface{}) { @@ -169,11 +167,9 @@ func WriteIndent(ctx context.Context, rw http.ResponseWriter, status int, respon enc.SetEscapeHTML(true) enc.SetIndent("", "\t") - err := enc.Encode(response) - if err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } + // We can't really do much about these errors, it's probably due to a + // dropped connection. + _ = enc.Encode(response) } // Read decodes JSON from the HTTP request into the value provided. It uses diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 5bf53d338536b..2fd5aa49aeb44 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -834,8 +834,9 @@ func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) { return } nconn := websocket.NetConn(ctx, ws, websocket.MessageBinary) + defer nconn.Close() - ticker := time.NewTicker(5 * time.Second) + ticker := time.NewTicker(api.Options.DERPMapUpdateFrequency) defer ticker.Stop() var lastDERPMap *tailcfg.DERPMap @@ -857,6 +858,8 @@ func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) { return case <-ticker.C: } + + ticker.Reset(api.Options.DERPMapUpdateFrequency) } } diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 6afec803bbc49..e5fb66b909807 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -15,6 +15,7 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "tailscale.com/tailcfg" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -25,6 +26,7 @@ import ( "github.com/coder/coder/codersdk/agentsdk" "github.com/coder/coder/provisioner/echo" "github.com/coder/coder/provisionersdk/proto" + "github.com/coder/coder/tailnet/tailnettest" "github.com/coder/coder/testutil" ) @@ -1247,3 +1249,102 @@ func TestWorkspaceAgent_Startup(t *testing.T) { require.Equal(t, http.StatusBadRequest, cerr.StatusCode()) }) } + +// TestWorkspaceAgent_UpdatedDERP runs a real coderd server, with a real agent +// and a real client, and updates the DERP map live to ensure connections still +// work. +func TestWorkspaceAgent_UpdatedDERP(t *testing.T) { + t.Parallel() + + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + + dv := coderdtest.DeploymentValues(t) + err := dv.DERP.Config.BlockDirect.Set("true") + require.NoError(t, err) + + client, closer, api := coderdtest.NewWithAPI(t, &coderdtest.Options{ + IncludeProvisionerDaemon: true, + DeploymentValues: dv, + }) + defer closer.Close() + user := coderdtest.CreateFirstUser(t, client) + + originalDerpMap := api.DERPMap() + require.NotNil(t, originalDerpMap) + + // Change the DERP mapper to our custom one. + currentDerpMap := originalDerpMap + derpMapFn := func(_ *tailcfg.DERPMap) *tailcfg.DERPMap { + return currentDerpMap + } + api.DERPMapper.Store(&derpMapFn) + + // Start workspace a workspace agent. + agentToken := uuid.NewString() + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionApplyWithAgent(agentToken), + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID) + agentClient := agentsdk.New(client.URL) + agentClient.SetSessionToken(agentToken) + agentCloser := agent.New(agent.Options{ + Client: agentClient, + Logger: logger.Named("agent"), + }) + defer func() { + _ = agentCloser.Close() + }() + resources := coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID) + agentID := resources[0].Agents[0].ID + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + // Connect from a client. + conn1, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ + Logger: logger.Named("client1"), + }) + require.NoError(t, err) + defer conn1.Close() + ok := conn1.AwaitReachable(ctx) + require.True(t, ok) + + // Change the DERP map and change the region ID. + newDerpMap, _ := tailnettest.RunDERPAndSTUN(t) + require.NotNil(t, newDerpMap) + newDerpMap.Regions[2] = newDerpMap.Regions[1] + delete(newDerpMap.Regions, 1) + newDerpMap.Regions[2].RegionID = 2 + for _, node := range newDerpMap.Regions[2].Nodes { + node.RegionID = 2 + } + currentDerpMap = newDerpMap + + // Wait for the agent's DERP map to be updated. + // TODO: this + + // Wait for the DERP map to be updated on the existing client. + require.Eventually(t, func() bool { + regionIDs := conn1.Conn.DERPMap().RegionIDs() + return len(regionIDs) == 1 && regionIDs[0] == 2 + }, testutil.WaitLong, testutil.IntervalFast) + + // The first client should still be able to reach the agent. + ok = conn1.AwaitReachable(ctx) + require.True(t, ok) + + // Connect from a second client. + conn2, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ + Logger: logger.Named("client2"), + }) + require.NoError(t, err) + defer conn2.Close() + ok = conn2.AwaitReachable(ctx) + require.True(t, ok) + require.Equal(t, []int{2}, conn2.DERPMap().RegionIDs()) +} diff --git a/coderd/workspacebuilds_test.go b/coderd/workspacebuilds_test.go index 0fb5b03139224..b838e39e3b251 100644 --- a/coderd/workspacebuilds_test.go +++ b/coderd/workspacebuilds_test.go @@ -645,7 +645,8 @@ func TestWorkspaceBuildDebugMode(t *testing.T) { // Create user deploymentValues := coderdtest.DeploymentValues(t) - deploymentValues.EnableTerraformDebugMode = false + err := deploymentValues.EnableTerraformDebugMode.Set("false") + require.NoError(t, err) adminClient := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true, DeploymentValues: deploymentValues}) owner := coderdtest.CreateFirstUser(t, adminClient) @@ -663,7 +664,7 @@ func TestWorkspaceBuildDebugMode(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - _, err := adminClient.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + _, err = adminClient.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{ TemplateVersionID: workspace.LatestBuild.TemplateVersionID, Transition: codersdk.WorkspaceTransitionStart, LogLevel: "debug", diff --git a/codersdk/agentsdk/agentsdk.go b/codersdk/agentsdk/agentsdk.go index be605c7bb8cd1..9d3ad05aa1d79 100644 --- a/codersdk/agentsdk/agentsdk.go +++ b/codersdk/agentsdk/agentsdk.go @@ -115,6 +115,20 @@ func (c *Client) Manifest(ctx context.Context) (Manifest, error) { if err != nil { return Manifest{}, err } + err = c.rewriteDerpMap(agentMeta.DERPMap) + if err != nil { + return Manifest{}, err + } + return agentMeta, nil +} + +// rewriteDerpMap rewrites the DERP map to use the access URL of the SDK as the +// "embedded relay" access URL. The passed derp map is modified in place. +// +// Agents can provide an arbitrary access URL that may be different that the +// globally configured one. This breaks the built-in DERP, which would continue +// to reference the global access URL. +func (c *Client) rewriteDerpMap(derpMap *tailcfg.DERPMap) error { accessingPort := c.SDK.URL.Port() if accessingPort == "" { accessingPort = "80" @@ -124,15 +138,9 @@ func (c *Client) Manifest(ctx context.Context) (Manifest, error) { } accessPort, err := strconv.Atoi(accessingPort) if err != nil { - return Manifest{}, xerrors.Errorf("convert accessing port %q: %w", accessingPort, err) + return xerrors.Errorf("convert accessing port %q: %w", accessingPort, err) } - // Agents can provide an arbitrary access URL that may be different - // that the globally configured one. This breaks the built-in DERP, - // which would continue to reference the global access URL. - // - // This converts all built-in DERPs to use the access URL that the - // manifest request was performed with. - for _, region := range agentMeta.DERPMap.Regions { + for _, region := range derpMap.Regions { if !region.EmbeddedRelay { continue } @@ -146,7 +154,7 @@ func (c *Client) Manifest(ctx context.Context) (Manifest, error) { node.ForceHTTP = c.SDK.URL.Scheme == "http" } } - return agentMeta, nil + return nil } type DERPMapUpdate struct { @@ -187,10 +195,14 @@ func (c *Client) DERPMapUpdates(ctx context.Context) (<-chan DERPMapUpdate, io.C ctx, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageBinary) pingClosed := pingWebSocket(ctx, c.SDK.Logger(), conn, "derp map") - updates := make(chan DERPMapUpdate) - dec := json.NewDecoder(wsNetConn) + var ( + updates = make(chan DERPMapUpdate) + updatesClosed = make(chan struct{}) + dec = json.NewDecoder(wsNetConn) + ) go func() { defer close(updates) + defer close(updatesClosed) defer cancelFunc() defer conn.Close(websocket.StatusGoingAway, "Listen closed") for { @@ -201,6 +213,13 @@ func (c *Client) DERPMapUpdates(ctx context.Context) (<-chan DERPMapUpdate, io.C update.DERPMap = nil return } + err = c.rewriteDerpMap(update.DERPMap) + if err != nil { + update.Err = err + update.DERPMap = nil + return + } + select { case updates <- update: case <-ctx.Done(): @@ -212,8 +231,9 @@ func (c *Client) DERPMapUpdates(ctx context.Context) (<-chan DERPMapUpdate, io.C return updates, &closer{ closeFunc: func() error { cancelFunc() - _ = conn.Close(websocket.StatusGoingAway, "Listen closed") + _ = wsNetConn.Close() <-pingClosed + <-updatesClosed return nil }, }, nil diff --git a/enterprise/coderd/coderdenttest/coderdenttest.go b/enterprise/coderd/coderdenttest/coderdenttest.go index e8eed329a29d0..e9ac69ac24f17 100644 --- a/enterprise/coderd/coderdenttest/coderdenttest.go +++ b/enterprise/coderd/coderdenttest/coderdenttest.go @@ -86,7 +86,7 @@ func NewWithAPI(t *testing.T, options *Options) ( BrowserOnly: options.BrowserOnly, SCIMAPIKey: options.SCIMAPIKey, DERPServerRelayAddress: oop.AccessURL.String(), - DERPServerRegionID: oop.DERPMap.RegionIDs()[0], + DERPServerRegionID: oop.BaseDERPMap.RegionIDs()[0], Options: oop, EntitlementsUpdateInterval: options.EntitlementsUpdateInterval, Keys: Keys, diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index c900344878609..923746eff424a 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -569,7 +569,7 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) return } - startingRegionID, _ := getProxyDERPStartingRegionID(api.Options.DERPMap) + startingRegionID, _ := getProxyDERPStartingRegionID(api.Options.BaseDERPMap) regionID := int32(startingRegionID) + proxy.RegionID err := api.Database.InTx(func(db database.Store) error { From c26936ab61bd617e3ba3f6b5bc55f504fa86a417 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 25 Jul 2023 13:46:47 +0000 Subject: [PATCH 18/22] fixup! Merge branch 'main' into dean/proxy-derp-map --- .../{000141_proxy_derp.down.sql => 000142_proxy_derp.down.sql} | 0 .../{000141_proxy_derp.up.sql => 000142_proxy_derp.up.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename coderd/database/migrations/{000141_proxy_derp.down.sql => 000142_proxy_derp.down.sql} (100%) rename coderd/database/migrations/{000141_proxy_derp.up.sql => 000142_proxy_derp.up.sql} (100%) diff --git a/coderd/database/migrations/000141_proxy_derp.down.sql b/coderd/database/migrations/000142_proxy_derp.down.sql similarity index 100% rename from coderd/database/migrations/000141_proxy_derp.down.sql rename to coderd/database/migrations/000142_proxy_derp.down.sql diff --git a/coderd/database/migrations/000141_proxy_derp.up.sql b/coderd/database/migrations/000142_proxy_derp.up.sql similarity index 100% rename from coderd/database/migrations/000141_proxy_derp.up.sql rename to coderd/database/migrations/000142_proxy_derp.up.sql From e59de5a052186c2c4921bfb3edf3b19f7992d86a Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 25 Jul 2023 14:12:45 +0000 Subject: [PATCH 19/22] fixup! Merge branch 'main' into dean/proxy-derp-map --- coderd/apidoc/docs.go | 32 +++++++++++--------------------- coderd/apidoc/swagger.json | 32 +++++++++++--------------------- docs/admin/audit-logs.md | 32 +------------------------------- docs/api/agents.md | 4 ++-- docs/api/schemas.md | 28 ++++++++++++---------------- 5 files changed, 37 insertions(+), 91 deletions(-) diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 68cf63aca6099..7612020709a89 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -530,7 +530,7 @@ const docTemplate = `{ } } }, - "/derpmap": { + "/derp-map": { "get": { "security": [ { @@ -5340,11 +5340,6 @@ const docTemplate = `{ } } }, -<<<<<<< HEAD - "/workspaceproxies/me/deregister": { -||||||| 616e1d7e9 - "/workspaceproxies/me/goingaway": { -======= "/workspaceproxies/me/coordinate": { "get": { "security": [ @@ -5367,8 +5362,7 @@ const docTemplate = `{ } } }, - "/workspaceproxies/me/goingaway": { ->>>>>>> main + "/workspaceproxies/me/deregister": { "post": { "security": [ { @@ -11437,18 +11431,6 @@ const docTemplate = `{ } } }, -<<<<<<< HEAD - "wsproxysdk.DeregisterWorkspaceProxyRequest": { - "type": "object", - "properties": { - "replica_id": { - "description": "ReplicaID is a unique identifier for the replica of the proxy that is\nderegistering. It should be generated by the client on startup and\nshould've already been passed to the register endpoint.", - "type": "string" - } - } - }, -||||||| 616e1d7e9 -======= "wsproxysdk.AgentIsLegacyResponse": { "type": "object", "properties": { @@ -11460,7 +11442,15 @@ const docTemplate = `{ } } }, ->>>>>>> main + "wsproxysdk.DeregisterWorkspaceProxyRequest": { + "type": "object", + "properties": { + "replica_id": { + "description": "ReplicaID is a unique identifier for the replica of the proxy that is\nderegistering. It should be generated by the client on startup and\nshould've already been passed to the register endpoint.", + "type": "string" + } + } + }, "wsproxysdk.IssueSignedAppTokenResponse": { "type": "object", "properties": { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 4ca4ea65c91c0..2dbc5da3ed05e 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -450,7 +450,7 @@ } } }, - "/derpmap": { + "/derp-map": { "get": { "security": [ { @@ -4706,11 +4706,6 @@ } } }, -<<<<<<< HEAD - "/workspaceproxies/me/deregister": { -||||||| 616e1d7e9 - "/workspaceproxies/me/goingaway": { -======= "/workspaceproxies/me/coordinate": { "get": { "security": [ @@ -4731,8 +4726,7 @@ } } }, - "/workspaceproxies/me/goingaway": { ->>>>>>> main + "/workspaceproxies/me/deregister": { "post": { "security": [ { @@ -10429,18 +10423,6 @@ } } }, -<<<<<<< HEAD - "wsproxysdk.DeregisterWorkspaceProxyRequest": { - "type": "object", - "properties": { - "replica_id": { - "description": "ReplicaID is a unique identifier for the replica of the proxy that is\nderegistering. It should be generated by the client on startup and\nshould've already been passed to the register endpoint.", - "type": "string" - } - } - }, -||||||| 616e1d7e9 -======= "wsproxysdk.AgentIsLegacyResponse": { "type": "object", "properties": { @@ -10452,7 +10434,15 @@ } } }, ->>>>>>> main + "wsproxysdk.DeregisterWorkspaceProxyRequest": { + "type": "object", + "properties": { + "replica_id": { + "description": "ReplicaID is a unique identifier for the replica of the proxy that is\nderegistering. It should be generated by the client on startup and\nshould've already been passed to the register endpoint.", + "type": "string" + } + } + }, "wsproxysdk.IssueSignedAppTokenResponse": { "type": "object", "properties": { diff --git a/docs/admin/audit-logs.md b/docs/admin/audit-logs.md index a31d70b26a380..27ccbb763ab2a 100644 --- a/docs/admin/audit-logs.md +++ b/docs/admin/audit-logs.md @@ -9,35 +9,6 @@ We track the following resources: -<<<<<<< HEAD -| Resource | | -| -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| APIKey
login, logout, register, create, delete |
FieldTracked
created_attrue
expires_attrue
hashed_secretfalse
idfalse
ip_addressfalse
last_usedtrue
lifetime_secondsfalse
login_typefalse
scopefalse
token_namefalse
updated_atfalse
user_idtrue
| -| AuditOAuthConvertState
|
FieldTracked
created_attrue
expires_attrue
from_login_typetrue
to_login_typetrue
user_idtrue
| -| Group
create, write, delete |
FieldTracked
avatar_urltrue
idtrue
memberstrue
nametrue
organization_idfalse
quota_allowancetrue
| -| GitSSHKey
create |
FieldTracked
created_atfalse
private_keytrue
public_keytrue
updated_atfalse
user_idtrue
| -| License
create, delete |
FieldTracked
exptrue
idfalse
jwtfalse
uploaded_attrue
uuidtrue
| -| Template
write, delete |
FieldTracked
active_version_idtrue
allow_user_autostarttrue
allow_user_autostoptrue
allow_user_cancel_workspace_jobstrue
created_atfalse
created_bytrue
default_ttltrue
deletedfalse
descriptiontrue
display_nametrue
failure_ttltrue
group_acltrue
icontrue
idtrue
inactivity_ttltrue
locked_ttltrue
max_ttltrue
nametrue
organization_idfalse
provisionertrue
updated_atfalse
user_acltrue
| -| TemplateVersion
create, write |
FieldTracked
created_atfalse
created_bytrue
git_auth_providersfalse
idtrue
job_idfalse
messagefalse
nametrue
organization_idfalse
readmetrue
template_idtrue
updated_atfalse
| -| User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typetrue
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| -| Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
idtrue
last_used_atfalse
locked_attrue
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| -| WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
derp_enabledtrue
display_nametrue
icontrue
idtrue
nametrue
region_idtrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| -||||||| 616e1d7e9 -| Resource | | -| -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| APIKey
login, logout, register, create, delete |
FieldTracked
created_attrue
expires_attrue
hashed_secretfalse
idfalse
ip_addressfalse
last_usedtrue
lifetime_secondsfalse
login_typefalse
scopefalse
token_namefalse
updated_atfalse
user_idtrue
| -| AuditOAuthConvertState
|
FieldTracked
created_attrue
expires_attrue
from_login_typetrue
to_login_typetrue
user_idtrue
| -| Group
create, write, delete |
FieldTracked
avatar_urltrue
idtrue
memberstrue
nametrue
organization_idfalse
quota_allowancetrue
| -| GitSSHKey
create |
FieldTracked
created_atfalse
private_keytrue
public_keytrue
updated_atfalse
user_idtrue
| -| License
create, delete |
FieldTracked
exptrue
idfalse
jwtfalse
uploaded_attrue
uuidtrue
| -| Template
write, delete |
FieldTracked
active_version_idtrue
allow_user_autostarttrue
allow_user_autostoptrue
allow_user_cancel_workspace_jobstrue
created_atfalse
created_bytrue
default_ttltrue
deletedfalse
descriptiontrue
display_nametrue
failure_ttltrue
group_acltrue
icontrue
idtrue
inactivity_ttltrue
locked_ttltrue
max_ttltrue
nametrue
organization_idfalse
provisionertrue
updated_atfalse
user_acltrue
| -| TemplateVersion
create, write |
FieldTracked
created_atfalse
created_bytrue
git_auth_providersfalse
idtrue
job_idfalse
messagefalse
nametrue
organization_idfalse
readmetrue
template_idtrue
updated_atfalse
| -| User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typetrue
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| -| Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
idtrue
last_used_atfalse
locked_attrue
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| -| WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
display_nametrue
icontrue
idtrue
nametrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| -======= | Resource | | | -------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | APIKey
login, logout, register, create, delete |
FieldTracked
created_attrue
expires_attrue
hashed_secretfalse
idfalse
ip_addressfalse
last_usedtrue
lifetime_secondsfalse
login_typefalse
scopefalse
token_namefalse
updated_atfalse
user_idtrue
| @@ -50,8 +21,7 @@ We track the following resources: | User
create, write, delete |
FieldTracked
avatar_urlfalse
created_atfalse
deletedtrue
emailtrue
hashed_passwordtrue
idtrue
last_seen_atfalse
login_typetrue
quiet_hours_scheduletrue
rbac_rolestrue
statustrue
updated_atfalse
usernametrue
| | Workspace
create, write, delete |
FieldTracked
autostart_scheduletrue
created_atfalse
deletedfalse
deleting_attrue
idtrue
last_used_atfalse
locked_attrue
nametrue
organization_idfalse
owner_idtrue
template_idtrue
ttltrue
updated_atfalse
| | WorkspaceBuild
start, stop |
FieldTracked
build_numberfalse
created_atfalse
daily_costfalse
deadlinefalse
idfalse
initiator_by_avatar_urlfalse
initiator_by_usernamefalse
initiator_idfalse
job_idfalse
max_deadlinefalse
provisioner_statefalse
reasonfalse
template_version_idtrue
transitionfalse
updated_atfalse
workspace_idfalse
| -| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
display_nametrue
icontrue
idtrue
nametrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| ->>>>>>> main +| WorkspaceProxy
|
FieldTracked
created_attrue
deletedfalse
derp_enabledtrue
display_nametrue
icontrue
idtrue
nametrue
region_idtrue
token_hashed_secrettrue
updated_atfalse
urltrue
wildcard_hostnametrue
| diff --git a/docs/api/agents.md b/docs/api/agents.md index 344ace32d8a10..919fa06923c82 100644 --- a/docs/api/agents.md +++ b/docs/api/agents.md @@ -6,11 +6,11 @@ ```shell # Example request using curl -curl -X GET http://coder-server:8080/api/v2/derpmap \ +curl -X GET http://coder-server:8080/api/v2/derp-map \ -H 'Coder-Session-Token: API_KEY' ``` -`GET /derpmap` +`GET /derp-map` ### Responses diff --git a/docs/api/schemas.md b/docs/api/schemas.md index 5a542758f3304..652aea46b2c74 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -7282,40 +7282,36 @@ _None_ | `username_or_id` | string | false | | For the following fields, if the AccessMethod is AccessMethodTerminal, then only AgentNameOrID may be set and it must be a UUID. The other fields must be left blank. | | `workspace_name_or_id` | string | false | | | -<<<<<<< HEAD -## wsproxysdk.DeregisterWorkspaceProxyRequest +## wsproxysdk.AgentIsLegacyResponse ```json { - "replica_id": "string" + "found": true, + "legacy": true } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------ | ------ | -------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `replica_id` | string | false | | Replica ID is a unique identifier for the replica of the proxy that is deregistering. It should be generated by the client on startup and should've already been passed to the register endpoint. | +| Name | Type | Required | Restrictions | Description | +| -------- | ------- | -------- | ------------ | ----------- | +| `found` | boolean | false | | | +| `legacy` | boolean | false | | | -||||||| 616e1d7e9 -======= -## wsproxysdk.AgentIsLegacyResponse +## wsproxysdk.DeregisterWorkspaceProxyRequest ```json { - "found": true, - "legacy": true + "replica_id": "string" } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| -------- | ------- | -------- | ------------ | ----------- | -| `found` | boolean | false | | | -| `legacy` | boolean | false | | | +| Name | Type | Required | Restrictions | Description | +| ------------ | ------ | -------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `replica_id` | string | false | | Replica ID is a unique identifier for the replica of the proxy that is deregistering. It should be generated by the client on startup and should've already been passed to the register endpoint. | ->>>>>>> main ## wsproxysdk.IssueSignedAppTokenResponse ```json From 2df067f6895d7bf30920268cfb3cb3934470c875 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Tue, 25 Jul 2023 15:17:21 +0000 Subject: [PATCH 20/22] fixup! Merge branch 'main' into dean/proxy-derp-map --- enterprise/coderd/workspaceproxy.go | 13 +++++++--- enterprise/coderd/workspaceproxy_test.go | 33 ++++++++++++++++++++++++ enterprise/replicasync/replicasync.go | 16 +++++++++++- 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 923746eff424a..cce631b18d844 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -645,9 +645,16 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) return } - // Publish a replicasync event with a nil ID so every replica (yes, even the - // current replica) will refresh its replicas list. - err = api.Pubsub.Publish(replicasync.PubsubEvent, []byte(uuid.Nil.String())) + // Update replica sync and notify all other replicas to update their + // replica list. + err = api.replicaManager.PublishUpdate() + if err != nil { + httpapi.InternalServerError(rw, err) + return + } + replicaUpdateCtx, replicaUpdateCancel := context.WithTimeout(ctx, 5*time.Second) + defer replicaUpdateCancel() + err = api.replicaManager.UpdateNow(replicaUpdateCtx) if err != nil { httpapi.InternalServerError(rw, err) return diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index cd1b83ff61a3d..6487f0758b51c 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -586,6 +586,39 @@ func TestProxyRegisterDeregister(t *testing.T) { require.Equal(t, req2.ReplicaRelayAddress, registerRes1.SiblingReplicas[0].RelayAddress) require.EqualValues(t, 10001, registerRes1.SiblingReplicas[0].RegionID) }) + + // ReturnSiblings2 tries to create 1000 proxy replicas and ensures that they + // all return the correct number of siblings. + t.Run("ReturnSiblings2", func(t *testing.T) { + t.Parallel() + + client, _ := setup(t) + ctx := testutil.Context(t, testutil.WaitLong) + + createRes, err := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ + Name: "proxy", + }) + require.NoError(t, err) + + proxyClient := wsproxysdk.New(client.URL) + proxyClient.SetSessionToken(createRes.ProxyToken) + + for i := 0; i < 1000; i++ { + registerRes, err := proxyClient.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://proxy.coder.test", + WildcardHostname: "*.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "venus", + ReplicaError: "", + ReplicaRelayAddress: fmt.Sprintf("http://127.0.0.1:%d", 8080+i), + Version: buildinfo.Version(), + }) + require.NoErrorf(t, err, "register proxy %d", i) + + require.Lenf(t, registerRes.SiblingReplicas, i, "siblings for proxy %d", i) + } + }) } func TestIssueSignedAppToken(t *testing.T) { diff --git a/enterprise/replicasync/replicasync.go b/enterprise/replicasync/replicasync.go index 5a08e2922f243..42bf402a6682e 100644 --- a/enterprise/replicasync/replicasync.go +++ b/enterprise/replicasync/replicasync.go @@ -126,6 +126,20 @@ type Manager struct { callback func() } +func (m *Manager) ID() uuid.UUID { + return m.id +} + +// UpdateNow synchronously updates replicas. +func (m *Manager) UpdateNow(ctx context.Context) error { + return m.syncReplicas(ctx) +} + +// PublishUpdate notifies all other replicas to update. +func (m *Manager) PublishUpdate() error { + return m.pubsub.Publish(PubsubEvent, []byte(m.id.String())) +} + // updateInterval is used to determine a replicas state. // If the replica was updated > the time, it's considered healthy. // If the replica was updated < the time, it's considered stale. @@ -307,7 +321,7 @@ func (m *Manager) syncReplicas(ctx context.Context) error { } if m.self.Error != replica.Error { // Publish an update occurred! - err = m.pubsub.Publish(PubsubEvent, []byte(m.self.ID.String())) + err = m.PublishUpdate() if err != nil { return xerrors.Errorf("publish replica update: %w", err) } From dfbfa96025ca4cfc48b9ffc3fd9d3398c4c14ed2 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 26 Jul 2023 14:37:55 +0000 Subject: [PATCH 21/22] Please --- agent/agent.go | 6 +-- coderd/wsconncache/wsconncache_test.go | 2 +- enterprise/coderd/coderd.go | 4 +- enterprise/coderd/workspaceproxy_test.go | 48 ++++++++++++++------- enterprise/wsproxy/wsproxysdk/wsproxysdk.go | 4 +- site/src/testHelpers/entities.ts | 4 ++ 6 files changed, 45 insertions(+), 23 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 657c5558619c3..e3cac00663c71 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -704,7 +704,7 @@ func (a *agent) run(ctx context.Context) error { eg, egCtx := errgroup.WithContext(ctx) eg.Go(func() error { a.logger.Debug(egCtx, "running tailnet connection coordinator") - err = a.runCoordinator(egCtx, network) + err := a.runCoordinator(egCtx, network) if err != nil { return xerrors.Errorf("run coordinator: %w", err) } @@ -713,7 +713,7 @@ func (a *agent) run(ctx context.Context) error { eg.Go(func() error { a.logger.Debug(egCtx, "running derp map subscriber") - err = a.runDERPMapSubscriber(egCtx, network) + err := a.runDERPMapSubscriber(egCtx, network) if err != nil { return xerrors.Errorf("run derp map subscriber: %w", err) } @@ -963,7 +963,7 @@ func (a *agent) runDERPMapSubscriber(ctx context.Context, network *tailnet.Conn) if update.Err != nil { return update.Err } - if update.DERPMap != nil && !tailnet.CompareDERPMaps(a.network.DERPMap(), update.DERPMap) { + if update.DERPMap != nil && !tailnet.CompareDERPMaps(network.DERPMap(), update.DERPMap) { a.logger.Info(ctx, "updating derp map due to detected changes") network.SetDERPMap(update.DERPMap) } diff --git a/coderd/wsconncache/wsconncache_test.go b/coderd/wsconncache/wsconncache_test.go index b922e2eb4fdaf..00526fa0d27fb 100644 --- a/coderd/wsconncache/wsconncache_test.go +++ b/coderd/wsconncache/wsconncache_test.go @@ -229,7 +229,7 @@ func (c *closer) Close() error { return c.closeFunc() } -func (c *client) DERPMapUpdates(_ context.Context) (<-chan agentsdk.DERPMapUpdate, io.Closer, error) { +func (*client) DERPMapUpdates(_ context.Context) (<-chan agentsdk.DERPMapUpdate, io.Closer, error) { closed := make(chan struct{}) return make(<-chan agentsdk.DERPMapUpdate), &closer{ closeFunc: func() error { diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 52dd76b26e30e..da6fed76d61fc 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -659,7 +659,7 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(* if shouldLog { logger.Warn( context.Background(), - "existing DERP region IDs are too large, proxy region IDs will not be populated in the derp map. Please ensure that all DERP region IDs are less than 2^32.", + "existing DERP region IDs are too large, proxy region IDs will not be populated in the derp map. Please ensure that all DERP region IDs are less than 2^32", slog.F("largest_region_id", largestRegionID), slog.F("max_region_id", 1<<32-1), ) @@ -715,7 +715,7 @@ func derpMapper(logger slog.Logger, proxyHealth *proxyhealth.ProxyHealth) func(* lastDerpConflictMutex.Unlock() if shouldLog { logger.Warn(context.Background(), - "proxy region ID or code conflict, ignoring workspace proxy for DERP map. Please change the flags on the affected proxy to use a different region ID and code.", + "proxy region ID or code conflict, ignoring workspace proxy for DERP map. Please change the flags on the affected proxy to use a different region ID and code", slog.F("proxy_id", status.Proxy.ID), slog.F("proxy_name", status.Proxy.Name), slog.F("proxy_display_name", status.Proxy.DisplayName), diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index 6487f0758b51c..bade66db7212c 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -587,7 +587,7 @@ func TestProxyRegisterDeregister(t *testing.T) { require.EqualValues(t, 10001, registerRes1.SiblingReplicas[0].RegionID) }) - // ReturnSiblings2 tries to create 1000 proxy replicas and ensures that they + // ReturnSiblings2 tries to create 100 proxy replicas and ensures that they // all return the correct number of siblings. t.Run("ReturnSiblings2", func(t *testing.T) { t.Parallel() @@ -603,20 +603,38 @@ func TestProxyRegisterDeregister(t *testing.T) { proxyClient := wsproxysdk.New(client.URL) proxyClient.SetSessionToken(createRes.ProxyToken) - for i := 0; i < 1000; i++ { - registerRes, err := proxyClient.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ - AccessURL: "https://proxy.coder.test", - WildcardHostname: "*.proxy.coder.test", - DerpEnabled: true, - ReplicaID: uuid.New(), - ReplicaHostname: "venus", - ReplicaError: "", - ReplicaRelayAddress: fmt.Sprintf("http://127.0.0.1:%d", 8080+i), - Version: buildinfo.Version(), - }) - require.NoErrorf(t, err, "register proxy %d", i) - - require.Lenf(t, registerRes.SiblingReplicas, i, "siblings for proxy %d", i) + for i := 0; i < 100; i++ { + ok := false + for j := 0; j < 2; j++ { + registerRes, err := proxyClient.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{ + AccessURL: "https://proxy.coder.test", + WildcardHostname: "*.proxy.coder.test", + DerpEnabled: true, + ReplicaID: uuid.New(), + ReplicaHostname: "venus", + ReplicaError: "", + ReplicaRelayAddress: fmt.Sprintf("http://127.0.0.1:%d", 8080+i), + Version: buildinfo.Version(), + }) + require.NoErrorf(t, err, "register proxy %d", i) + + // If the sibling replica count is wrong, try again. The impact + // of this not being immediate is that proxies may not function + // as DERP relays until they register again in 30 seconds. + // + // In the real world, replicas will not be registering this + // quickly. Kubernetes rolls out gradually in practice. + if len(registerRes.SiblingReplicas) != i { + t.Logf("%d: expected %d siblings, got %d", i, i, len(registerRes.SiblingReplicas)) + time.Sleep(100 * time.Millisecond) + continue + } + + ok = true + break + } + + require.True(t, ok, "expected to register replica %d", i) } }) } diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index 8979317627b3f..f98ab3673eadd 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -303,7 +303,7 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo if deregisterErr != nil { opts.Logger.Error(ctx, "failed to deregister workspace proxy with Coder primary (it will be automatically deregistered shortly)", - slog.F("err", deregisterErr), + slog.Error(deregisterErr), ) } @@ -350,7 +350,7 @@ func (c *Client) RegisterWorkspaceProxyLoop(ctx context.Context, opts RegisterWo slog.F("req", opts.Request), slog.F("timeout", opts.AttemptTimeout), slog.F("failed_attempts", failedAttempts), - slog.F("err", err), + slog.Error(err), ) if failedAttempts > opts.MaxFailureCount { diff --git a/site/src/testHelpers/entities.ts b/site/src/testHelpers/entities.ts index ad7259c05a76b..a8e1baaf0056f 100644 --- a/site/src/testHelpers/entities.ts +++ b/site/src/testHelpers/entities.ts @@ -79,6 +79,7 @@ export const MockPrimaryWorkspaceProxy: TypesGen.WorkspaceProxy = { healthy: true, path_app_url: "https://coder.com", wildcard_hostname: "*.coder.com", + derp_enabled: true, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), deleted: false, @@ -96,6 +97,7 @@ export const MockHealthyWildWorkspaceProxy: TypesGen.WorkspaceProxy = { healthy: true, path_app_url: "https://external.com", wildcard_hostname: "*.external.com", + derp_enabled: true, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), deleted: false, @@ -113,6 +115,7 @@ export const MockUnhealthyWildWorkspaceProxy: TypesGen.WorkspaceProxy = { healthy: false, path_app_url: "https://unhealthy.coder.com", wildcard_hostname: "*unhealthy..coder.com", + derp_enabled: true, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), deleted: false, @@ -138,6 +141,7 @@ export const MockWorkspaceProxies: TypesGen.WorkspaceProxy[] = [ healthy: true, path_app_url: "https://cowboy.coder.com", wildcard_hostname: "", + derp_enabled: false, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), deleted: false, From 8223a35df58270af9aff0ebbaeda8eb689835a1d Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 26 Jul 2023 15:03:32 +0000 Subject: [PATCH 22/22] fixup! Please --- coderd/workspaceagents_test.go | 8 +++++--- enterprise/coderd/coderd.go | 14 ++++++++------ enterprise/coderd/coderdenttest/coderdenttest.go | 2 ++ enterprise/coderd/workspaceproxy_test.go | 1 + 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index e5fb66b909807..87603361780e1 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -9,6 +9,7 @@ import ( "runtime" "strconv" "strings" + "sync/atomic" "testing" "time" @@ -1273,9 +1274,10 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) { require.NotNil(t, originalDerpMap) // Change the DERP mapper to our custom one. - currentDerpMap := originalDerpMap + var currentDerpMap atomic.Pointer[tailcfg.DERPMap] + currentDerpMap.Store(originalDerpMap) derpMapFn := func(_ *tailcfg.DERPMap) *tailcfg.DERPMap { - return currentDerpMap + return currentDerpMap.Load().Clone() } api.DERPMapper.Store(&derpMapFn) @@ -1323,7 +1325,7 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) { for _, node := range newDerpMap.Regions[2].Nodes { node.RegionID = 2 } - currentDerpMap = newDerpMap + currentDerpMap.Store(newDerpMap) // Wait for the agent's DERP map to be updated. // TODO: this diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index da6fed76d61fc..25e4dc7b89c99 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -299,10 +299,11 @@ func New(ctx context.Context, options *Options) (_ *API, err error) { ServerName: options.AccessURL.Hostname(), } api.replicaManager, err = replicasync.New(ctx, options.Logger, options.Database, options.Pubsub, &replicasync.Options{ - ID: api.AGPL.ID, - RelayAddress: options.DERPServerRelayAddress, - RegionID: int32(options.DERPServerRegionID), - TLSConfig: meshTLSConfig, + ID: api.AGPL.ID, + RelayAddress: options.DERPServerRelayAddress, + RegionID: int32(options.DERPServerRegionID), + TLSConfig: meshTLSConfig, + UpdateInterval: options.ReplicaSyncUpdateInterval, }) if err != nil { return nil, xerrors.Errorf("initialize replica: %w", err) @@ -350,8 +351,9 @@ type Options struct { SCIMAPIKey []byte // Used for high availability. - DERPServerRelayAddress string - DERPServerRegionID int + ReplicaSyncUpdateInterval time.Duration + DERPServerRelayAddress string + DERPServerRegionID int // Used for user quiet hours schedules. DefaultQuietHoursSchedule string // cron schedule, if empty user quiet hours schedules are disabled diff --git a/enterprise/coderd/coderdenttest/coderdenttest.go b/enterprise/coderd/coderdenttest/coderdenttest.go index e9ac69ac24f17..92e0b627d60ae 100644 --- a/enterprise/coderd/coderdenttest/coderdenttest.go +++ b/enterprise/coderd/coderdenttest/coderdenttest.go @@ -55,6 +55,7 @@ type Options struct { NoDefaultQuietHoursSchedule bool DontAddLicense bool DontAddFirstUser bool + ReplicaSyncUpdateInterval time.Duration } // New constructs a codersdk client connected to an in-memory Enterprise API instance. @@ -87,6 +88,7 @@ func NewWithAPI(t *testing.T, options *Options) ( SCIMAPIKey: options.SCIMAPIKey, DERPServerRelayAddress: oop.AccessURL.String(), DERPServerRegionID: oop.BaseDERPMap.RegionIDs()[0], + ReplicaSyncUpdateInterval: options.ReplicaSyncUpdateInterval, Options: oop, EntitlementsUpdateInterval: options.EntitlementsUpdateInterval, Keys: Keys, diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index bade66db7212c..781ef3974ed15 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -278,6 +278,7 @@ func TestProxyRegisterDeregister(t *testing.T) { Pubsub: pubsub, IncludeProvisionerDaemon: true, }, + ReplicaSyncUpdateInterval: time.Minute, LicenseOptions: &coderdenttest.LicenseOptions{ Features: license.Features{ codersdk.FeatureWorkspaceProxy: 1,