diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000000000..a24dfad099030 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,6 @@ +# These APIs are versioned, so any changes need to be carefully reviewed for whether +# to bump API major or minor versions. +agent/proto/ @spikecurtis @johnstcn +tailnet/proto/ @spikecurtis @johnstcn +vpn/vpn.proto @spikecurtis @johnstcn +vpn/version.go @spikecurtis @johnstcn diff --git a/agent/agent.go b/agent/agent.go index 4c8497d105acc..6d27802d1291a 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -31,7 +31,6 @@ import ( "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" "golang.org/x/xerrors" - "storj.io/drpc" "tailscale.com/net/speedtest" "tailscale.com/tailcfg" "tailscale.com/types/netlogtype" @@ -94,7 +93,9 @@ type Options struct { } type Client interface { - ConnectRPC(ctx context.Context) (drpc.Conn, error) + ConnectRPC23(ctx context.Context) ( + proto.DRPCAgentClient23, tailnetproto.DRPCTailnetClient23, error, + ) RewriteDERPMap(derpMap *tailcfg.DERPMap) } @@ -410,7 +411,7 @@ func (t *trySingleflight) Do(key string, fn func()) { fn() } -func (a *agent) reportMetadata(ctx context.Context, conn drpc.Conn) error { +func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient23) error { tickerDone := make(chan struct{}) collectDone := make(chan struct{}) ctx, cancel := context.WithCancel(ctx) @@ -572,7 +573,6 @@ func (a *agent) reportMetadata(ctx context.Context, conn drpc.Conn) error { reportTimeout = 30 * time.Second reportError = make(chan error, 1) reportInFlight = false - aAPI = proto.NewDRPCAgentClient(conn) ) for { @@ -627,8 +627,7 @@ func (a *agent) reportMetadata(ctx context.Context, conn drpc.Conn) error { // reportLifecycle reports the current lifecycle state once. All state // changes are reported in order. -func (a *agent) reportLifecycle(ctx context.Context, conn drpc.Conn) error { - aAPI := proto.NewDRPCAgentClient(conn) +func (a *agent) reportLifecycle(ctx context.Context, aAPI proto.DRPCAgentClient23) error { for { select { case <-a.lifecycleUpdate: @@ -710,8 +709,7 @@ func (a *agent) setLifecycle(state codersdk.WorkspaceAgentLifecycle) { // fetchServiceBannerLoop fetches the service banner on an interval. It will // not be fetched immediately; the expectation is that it is primed elsewhere // (and must be done before the session actually starts). -func (a *agent) fetchServiceBannerLoop(ctx context.Context, conn drpc.Conn) error { - aAPI := proto.NewDRPCAgentClient(conn) +func (a *agent) fetchServiceBannerLoop(ctx context.Context, aAPI proto.DRPCAgentClient23) error { ticker := time.NewTicker(a.announcementBannersRefreshInterval) defer ticker.Stop() for { @@ -737,7 +735,7 @@ func (a *agent) fetchServiceBannerLoop(ctx context.Context, conn drpc.Conn) erro } func (a *agent) run() (retErr error) { - // This allows the agent to refresh it's token if necessary. + // This allows the agent to refresh its token if necessary. // For instance identity this is required, since the instance // may not have re-provisioned, but a new agent ID was created. sessionToken, err := a.exchangeToken(a.hardCtx) @@ -747,12 +745,12 @@ func (a *agent) run() (retErr error) { a.sessionToken.Store(&sessionToken) // ConnectRPC returns the dRPC connection we use for the Agent and Tailnet v2+ APIs - conn, err := a.client.ConnectRPC(a.hardCtx) + aAPI, tAPI, err := a.client.ConnectRPC23(a.hardCtx) if err != nil { return err } defer func() { - cErr := conn.Close() + cErr := aAPI.DRPCConn().Close() if cErr != nil { a.logger.Debug(a.hardCtx, "error closing drpc connection", slog.Error(err)) } @@ -761,11 +759,10 @@ func (a *agent) run() (retErr error) { // A lot of routines need the agent API / tailnet API connection. We run them in their own // goroutines in parallel, but errors in any routine will cause them all to exit so we can // redial the coder server and retry. - connMan := newAPIConnRoutineManager(a.gracefulCtx, a.hardCtx, a.logger, conn) + connMan := newAPIConnRoutineManager(a.gracefulCtx, a.hardCtx, a.logger, aAPI, tAPI) - connMan.start("init notification banners", gracefulShutdownBehaviorStop, - func(ctx context.Context, conn drpc.Conn) error { - aAPI := proto.NewDRPCAgentClient(conn) + connMan.startAgentAPI("init notification banners", gracefulShutdownBehaviorStop, + func(ctx context.Context, aAPI proto.DRPCAgentClient23) error { bannersProto, err := aAPI.GetAnnouncementBanners(ctx, &proto.GetAnnouncementBannersRequest{}) if err != nil { return xerrors.Errorf("fetch service banner: %w", err) @@ -781,9 +778,9 @@ func (a *agent) run() (retErr error) { // sending logs gets gracefulShutdownBehaviorRemain because we want to send logs generated by // shutdown scripts. - connMan.start("send logs", gracefulShutdownBehaviorRemain, - func(ctx context.Context, conn drpc.Conn) error { - err := a.logSender.SendLoop(ctx, proto.NewDRPCAgentClient(conn)) + connMan.startAgentAPI("send logs", gracefulShutdownBehaviorRemain, + func(ctx context.Context, aAPI proto.DRPCAgentClient23) error { + err := a.logSender.SendLoop(ctx, aAPI) if xerrors.Is(err, agentsdk.LogLimitExceededError) { // we don't want this error to tear down the API connection and propagate to the // other routines that use the API. The LogSender has already dropped a warning @@ -795,10 +792,10 @@ func (a *agent) run() (retErr error) { // part of graceful shut down is reporting the final lifecycle states, e.g "ShuttingDown" so the // lifecycle reporting has to be via gracefulShutdownBehaviorRemain - connMan.start("report lifecycle", gracefulShutdownBehaviorRemain, a.reportLifecycle) + connMan.startAgentAPI("report lifecycle", gracefulShutdownBehaviorRemain, a.reportLifecycle) // metadata reporting can cease as soon as we start gracefully shutting down - connMan.start("report metadata", gracefulShutdownBehaviorStop, a.reportMetadata) + connMan.startAgentAPI("report metadata", gracefulShutdownBehaviorStop, a.reportMetadata) // channels to sync goroutines below // handle manifest @@ -819,55 +816,55 @@ func (a *agent) run() (retErr error) { networkOK := newCheckpoint(a.logger) manifestOK := newCheckpoint(a.logger) - connMan.start("handle manifest", gracefulShutdownBehaviorStop, a.handleManifest(manifestOK)) + connMan.startAgentAPI("handle manifest", gracefulShutdownBehaviorStop, a.handleManifest(manifestOK)) - connMan.start("app health reporter", gracefulShutdownBehaviorStop, - func(ctx context.Context, conn drpc.Conn) error { + connMan.startAgentAPI("app health reporter", gracefulShutdownBehaviorStop, + func(ctx context.Context, aAPI proto.DRPCAgentClient23) error { if err := manifestOK.wait(ctx); err != nil { return xerrors.Errorf("no manifest: %w", err) } manifest := a.manifest.Load() NewWorkspaceAppHealthReporter( - a.logger, manifest.Apps, agentsdk.AppHealthPoster(proto.NewDRPCAgentClient(conn)), + a.logger, manifest.Apps, agentsdk.AppHealthPoster(aAPI), )(ctx) return nil }) - connMan.start("create or update network", gracefulShutdownBehaviorStop, + connMan.startAgentAPI("create or update network", gracefulShutdownBehaviorStop, a.createOrUpdateNetwork(manifestOK, networkOK)) - connMan.start("coordination", gracefulShutdownBehaviorStop, - func(ctx context.Context, conn drpc.Conn) error { + connMan.startTailnetAPI("coordination", gracefulShutdownBehaviorStop, + func(ctx context.Context, tAPI tailnetproto.DRPCTailnetClient23) error { if err := networkOK.wait(ctx); err != nil { return xerrors.Errorf("no network: %w", err) } - return a.runCoordinator(ctx, conn, a.network) + return a.runCoordinator(ctx, tAPI, a.network) }, ) - connMan.start("derp map subscriber", gracefulShutdownBehaviorStop, - func(ctx context.Context, conn drpc.Conn) error { + connMan.startTailnetAPI("derp map subscriber", gracefulShutdownBehaviorStop, + func(ctx context.Context, tAPI tailnetproto.DRPCTailnetClient23) error { if err := networkOK.wait(ctx); err != nil { return xerrors.Errorf("no network: %w", err) } - return a.runDERPMapSubscriber(ctx, conn, a.network) + return a.runDERPMapSubscriber(ctx, tAPI, a.network) }) - connMan.start("fetch service banner loop", gracefulShutdownBehaviorStop, a.fetchServiceBannerLoop) + connMan.startAgentAPI("fetch service banner loop", gracefulShutdownBehaviorStop, a.fetchServiceBannerLoop) - connMan.start("stats report loop", gracefulShutdownBehaviorStop, func(ctx context.Context, conn drpc.Conn) error { + connMan.startAgentAPI("stats report loop", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient23) error { if err := networkOK.wait(ctx); err != nil { return xerrors.Errorf("no network: %w", err) } - return a.statsReporter.reportLoop(ctx, proto.NewDRPCAgentClient(conn)) + return a.statsReporter.reportLoop(ctx, aAPI) }) return connMan.wait() } // handleManifest returns a function that fetches and processes the manifest -func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, conn drpc.Conn) error { - return func(ctx context.Context, conn drpc.Conn) error { +func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, aAPI proto.DRPCAgentClient23) error { + return func(ctx context.Context, aAPI proto.DRPCAgentClient23) error { var ( sentResult = false err error @@ -877,7 +874,6 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, manifestOK.complete(err) } }() - aAPI := proto.NewDRPCAgentClient(conn) mp, err := aAPI.GetManifest(ctx, &proto.GetManifestRequest{}) if err != nil { return xerrors.Errorf("fetch metadata: %w", err) @@ -977,8 +973,8 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, // createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates // the tailnet using the information in the manifest -func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, drpc.Conn) error { - return func(ctx context.Context, _ drpc.Conn) (retErr error) { +func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient23) error { + return func(ctx context.Context, _ proto.DRPCAgentClient23) (retErr error) { if err := manifestOK.wait(ctx); err != nil { return xerrors.Errorf("no manifest: %w", err) } @@ -1325,9 +1321,8 @@ func (a *agent) createTailnet(ctx context.Context, agentID uuid.UUID, derpMap *t // runCoordinator runs a coordinator and returns whether a reconnect // should occur. -func (a *agent) runCoordinator(ctx context.Context, conn drpc.Conn, network *tailnet.Conn) error { +func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTailnetClient23, network *tailnet.Conn) error { defer a.logger.Debug(ctx, "disconnected from coordination RPC") - tClient := tailnetproto.NewDRPCTailnetClient(conn) // we run the RPC on the hardCtx so that we have a chance to send the disconnect message if we // gracefully shut down. coordinate, err := tClient.Coordinate(a.hardCtx) @@ -1373,11 +1368,10 @@ func (a *agent) runCoordinator(ctx context.Context, conn drpc.Conn, network *tai } // runDERPMapSubscriber runs a coordinator and returns if a reconnect should occur. -func (a *agent) runDERPMapSubscriber(ctx context.Context, conn drpc.Conn, network *tailnet.Conn) error { +func (a *agent) runDERPMapSubscriber(ctx context.Context, tClient tailnetproto.DRPCTailnetClient23, network *tailnet.Conn) error { defer a.logger.Debug(ctx, "disconnected from derp map RPC") ctx, cancel := context.WithCancel(ctx) defer cancel() - tClient := tailnetproto.NewDRPCTailnetClient(conn) stream, err := tClient.StreamDERPMaps(ctx, &tailnetproto.StreamDERPMapsRequest{}) if err != nil { return xerrors.Errorf("stream DERP Maps: %w", err) @@ -1981,13 +1975,17 @@ const ( type apiConnRoutineManager struct { logger slog.Logger - conn drpc.Conn + aAPI proto.DRPCAgentClient23 + tAPI tailnetproto.DRPCTailnetClient23 eg *errgroup.Group stopCtx context.Context remainCtx context.Context } -func newAPIConnRoutineManager(gracefulCtx, hardCtx context.Context, logger slog.Logger, conn drpc.Conn) *apiConnRoutineManager { +func newAPIConnRoutineManager( + gracefulCtx, hardCtx context.Context, logger slog.Logger, + aAPI proto.DRPCAgentClient23, tAPI tailnetproto.DRPCTailnetClient23, +) *apiConnRoutineManager { // routines that remain in operation during graceful shutdown use the remainCtx. They'll still // exit if the errgroup hits an error, which usually means a problem with the conn. eg, remainCtx := errgroup.WithContext(hardCtx) @@ -2007,17 +2005,60 @@ func newAPIConnRoutineManager(gracefulCtx, hardCtx context.Context, logger slog. stopCtx := eitherContext(remainCtx, gracefulCtx) return &apiConnRoutineManager{ logger: logger, - conn: conn, + aAPI: aAPI, + tAPI: tAPI, eg: eg, stopCtx: stopCtx, remainCtx: remainCtx, } } -func (a *apiConnRoutineManager) start(name string, b gracefulShutdownBehavior, f func(context.Context, drpc.Conn) error) { +// startAgentAPI starts a routine that uses the Agent API. c.f. startTailnetAPI which is the same +// but for Tailnet. +func (a *apiConnRoutineManager) startAgentAPI( + name string, behavior gracefulShutdownBehavior, + f func(context.Context, proto.DRPCAgentClient23) error, +) { + logger := a.logger.With(slog.F("name", name)) + var ctx context.Context + switch behavior { + case gracefulShutdownBehaviorStop: + ctx = a.stopCtx + case gracefulShutdownBehaviorRemain: + ctx = a.remainCtx + default: + panic("unknown behavior") + } + a.eg.Go(func() error { + logger.Debug(ctx, "starting agent routine") + err := f(ctx, a.aAPI) + if xerrors.Is(err, context.Canceled) && ctx.Err() != nil { + logger.Debug(ctx, "swallowing context canceled") + // Don't propagate context canceled errors to the error group, because we don't want the + // graceful context being canceled to halt the work of routines with + // gracefulShutdownBehaviorRemain. Note that we check both that the error is + // context.Canceled and that *our* context is currently canceled, because when Coderd + // unilaterally closes the API connection (for example if the build is outdated), it can + // sometimes show up as context.Canceled in our RPC calls. + return nil + } + logger.Debug(ctx, "routine exited", slog.Error(err)) + if err != nil { + return xerrors.Errorf("error in routine %s: %w", name, err) + } + return nil + }) +} + +// startTailnetAPI starts a routine that uses the Tailnet API. c.f. startAgentAPI which is the same +// but for the Agent API. +func (a *apiConnRoutineManager) startTailnetAPI( + name string, behavior gracefulShutdownBehavior, + f func(context.Context, tailnetproto.DRPCTailnetClient23) error, +) { logger := a.logger.With(slog.F("name", name)) var ctx context.Context - switch b { + switch behavior { case gracefulShutdownBehaviorStop: ctx = a.stopCtx case gracefulShutdownBehaviorRemain: @@ -2026,8 +2067,8 @@ func (a *apiConnRoutineManager) start(name string, b gracefulShutdownBehavior, f panic("unknown behavior") } a.eg.Go(func() error { - logger.Debug(ctx, "starting routine") - err := f(ctx, a.conn) + logger.Debug(ctx, "starting tailnet routine") + err := f(ctx, a.tAPI) if xerrors.Is(err, context.Canceled) && ctx.Err() != nil { logger.Debug(ctx, "swallowing context canceled") // Don't propagate context canceled errors to the error group, because we don't want the diff --git a/agent/agenttest/client.go b/agent/agenttest/client.go index 8817b311fcda6..6b2581e7831f2 100644 --- a/agent/agenttest/client.go +++ b/agent/agenttest/client.go @@ -15,7 +15,6 @@ import ( "golang.org/x/exp/slices" "golang.org/x/xerrors" "google.golang.org/protobuf/types/known/durationpb" - "storj.io/drpc" "storj.io/drpc/drpcmux" "storj.io/drpc/drpcserver" "tailscale.com/tailcfg" @@ -97,7 +96,9 @@ func (c *Client) Close() { c.derpMapOnce.Do(func() { close(c.derpMapUpdates) }) } -func (c *Client) ConnectRPC(ctx context.Context) (drpc.Conn, error) { +func (c *Client) ConnectRPC23(ctx context.Context) ( + agentproto.DRPCAgentClient23, proto.DRPCTailnetClient23, error, +) { conn, lis := drpcsdk.MemTransportPipe() c.LastWorkspaceAgent = func() { _ = conn.Close() @@ -115,7 +116,7 @@ func (c *Client) ConnectRPC(ctx context.Context) (drpc.Conn, error) { go func() { _ = c.server.Serve(serveCtx, lis) }() - return conn, nil + return agentproto.NewDRPCAgentClient(conn), proto.NewDRPCTailnetClient(conn), nil } func (c *Client) GetLifecycleStates() []codersdk.WorkspaceAgentLifecycle { diff --git a/agent/proto/agent_drpc_old.go b/agent/proto/agent_drpc_old.go index 9da7f6dee49ac..f46afaba42596 100644 --- a/agent/proto/agent_drpc_old.go +++ b/agent/proto/agent_drpc_old.go @@ -24,15 +24,19 @@ type DRPCAgentClient20 interface { // DRPCAgentClient21 is the Agent API at v2.1. It is useful if you want to be maximally compatible // with Coderd Release Versions from 2.12+ type DRPCAgentClient21 interface { - DRPCConn() drpc.Conn - - GetManifest(ctx context.Context, in *GetManifestRequest) (*Manifest, error) - GetServiceBanner(ctx context.Context, in *GetServiceBannerRequest) (*ServiceBanner, error) - UpdateStats(ctx context.Context, in *UpdateStatsRequest) (*UpdateStatsResponse, error) - UpdateLifecycle(ctx context.Context, in *UpdateLifecycleRequest) (*Lifecycle, error) - BatchUpdateAppHealths(ctx context.Context, in *BatchUpdateAppHealthRequest) (*BatchUpdateAppHealthResponse, error) - UpdateStartup(ctx context.Context, in *UpdateStartupRequest) (*Startup, error) - BatchUpdateMetadata(ctx context.Context, in *BatchUpdateMetadataRequest) (*BatchUpdateMetadataResponse, error) - BatchCreateLogs(ctx context.Context, in *BatchCreateLogsRequest) (*BatchCreateLogsResponse, error) + DRPCAgentClient20 GetAnnouncementBanners(ctx context.Context, in *GetAnnouncementBannersRequest) (*GetAnnouncementBannersResponse, error) } + +// DRPCAgentClient22 is the Agent API at v2.2. It is identical to 2.1, since the change was made on +// the Tailnet API, which uses the same version number. Compatible with Coder v2.13+ +type DRPCAgentClient22 interface { + DRPCAgentClient21 +} + +// DRPCAgentClient23 is the Agent API at v2.3. It adds the ScriptCompleted RPC. Compatible with +// Coder v2.18+ +type DRPCAgentClient23 interface { + DRPCAgentClient22 + ScriptCompleted(ctx context.Context, in *WorkspaceAgentScriptCompletedRequest) (*WorkspaceAgentScriptCompletedResponse, error) +} diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 1ab2eb64b874a..c1c6ee35aa5df 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -2041,13 +2041,12 @@ func requireGetManifest(ctx context.Context, t testing.TB, aAPI agentproto.DRPCA } func postStartup(ctx context.Context, t testing.TB, client agent.Client, startup *agentproto.Startup) error { - conn, err := client.ConnectRPC(ctx) + aAPI, _, err := client.ConnectRPC23(ctx) require.NoError(t, err) defer func() { - cErr := conn.Close() + cErr := aAPI.DRPCConn().Close() require.NoError(t, cErr) }() - aAPI := agentproto.NewDRPCAgentClient(conn) _, err = aAPI.UpdateStartup(ctx, &agentproto.UpdateStartupRequest{Startup: startup}) return err } diff --git a/codersdk/agentsdk/agentsdk.go b/codersdk/agentsdk/agentsdk.go index 243b672a8007c..2965fdec2b269 100644 --- a/codersdk/agentsdk/agentsdk.go +++ b/codersdk/agentsdk/agentsdk.go @@ -24,6 +24,7 @@ import ( "github.com/coder/coder/v2/apiversion" "github.com/coder/coder/v2/codersdk" drpcsdk "github.com/coder/coder/v2/codersdk/drpc" + tailnetproto "github.com/coder/coder/v2/tailnet/proto" ) // ExternalLogSourceID is the statically-defined ID of a log-source that @@ -159,6 +160,7 @@ func (c *Client) RewriteDERPMap(derpMap *tailcfg.DERPMap) { // ConnectRPC20 returns a dRPC client to the Agent API v2.0. Notably, it is missing // GetAnnouncementBanners, but is useful when you want to be maximally compatible with Coderd // Release Versions from 2.9+ +// Deprecated: use ConnectRPC20WithTailnet func (c *Client) ConnectRPC20(ctx context.Context) (proto.DRPCAgentClient20, error) { conn, err := c.connectRPCVersion(ctx, apiversion.New(2, 0)) if err != nil { @@ -167,8 +169,22 @@ func (c *Client) ConnectRPC20(ctx context.Context) (proto.DRPCAgentClient20, err return proto.NewDRPCAgentClient(conn), nil } +// ConnectRPC20WithTailnet returns a dRPC client to the Agent API v2.0. Notably, it is missing +// GetAnnouncementBanners, but is useful when you want to be maximally compatible with Coderd +// Release Versions from 2.9+ +func (c *Client) ConnectRPC20WithTailnet(ctx context.Context) ( + proto.DRPCAgentClient20, tailnetproto.DRPCTailnetClient20, error, +) { + conn, err := c.connectRPCVersion(ctx, apiversion.New(2, 0)) + if err != nil { + return nil, nil, err + } + return proto.NewDRPCAgentClient(conn), tailnetproto.NewDRPCTailnetClient(conn), nil +} + // ConnectRPC21 returns a dRPC client to the Agent API v2.1. It is useful when you want to be // maximally compatible with Coderd Release Versions from 2.12+ +// Deprecated: use ConnectRPC21WithTailnet func (c *Client) ConnectRPC21(ctx context.Context) (proto.DRPCAgentClient21, error) { conn, err := c.connectRPCVersion(ctx, apiversion.New(2, 1)) if err != nil { @@ -177,6 +193,42 @@ func (c *Client) ConnectRPC21(ctx context.Context) (proto.DRPCAgentClient21, err return proto.NewDRPCAgentClient(conn), nil } +// ConnectRPC21WithTailnet returns a dRPC client to the Agent API v2.1. It is useful when you want to be +// maximally compatible with Coderd Release Versions from 2.12+ +func (c *Client) ConnectRPC21WithTailnet(ctx context.Context) ( + proto.DRPCAgentClient21, tailnetproto.DRPCTailnetClient21, error, +) { + conn, err := c.connectRPCVersion(ctx, apiversion.New(2, 1)) + if err != nil { + return nil, nil, err + } + return proto.NewDRPCAgentClient(conn), tailnetproto.NewDRPCTailnetClient(conn), nil +} + +// ConnectRPC22 returns a dRPC client to the Agent API v2.2. It is useful when you want to be +// maximally compatible with Coderd Release Versions from 2.13+ +func (c *Client) ConnectRPC22(ctx context.Context) ( + proto.DRPCAgentClient22, tailnetproto.DRPCTailnetClient22, error, +) { + conn, err := c.connectRPCVersion(ctx, apiversion.New(2, 2)) + if err != nil { + return nil, nil, err + } + return proto.NewDRPCAgentClient(conn), tailnetproto.NewDRPCTailnetClient(conn), nil +} + +// ConnectRPC23 returns a dRPC client to the Agent API v2.3. It is useful when you want to be +// maximally compatible with Coderd Release Versions from 2.18+ +func (c *Client) ConnectRPC23(ctx context.Context) ( + proto.DRPCAgentClient23, tailnetproto.DRPCTailnetClient23, error, +) { + conn, err := c.connectRPCVersion(ctx, apiversion.New(2, 3)) + if err != nil { + return nil, nil, err + } + return proto.NewDRPCAgentClient(conn), tailnetproto.NewDRPCTailnetClient(conn), nil +} + // ConnectRPC connects to the workspace agent API and tailnet API func (c *Client) ConnectRPC(ctx context.Context) (drpc.Conn, error) { return c.connectRPCVersion(ctx, proto.CurrentVersion) diff --git a/codersdk/workspacesdk/workspacesdk.go b/codersdk/workspacesdk/workspacesdk.go index 2e0214fdc1010..2724b733ea16a 100644 --- a/codersdk/workspacesdk/workspacesdk.go +++ b/codersdk/workspacesdk/workspacesdk.go @@ -217,14 +217,14 @@ func (c *Client) DialAgent(dialCtx context.Context, agentID uuid.UUID, options * return nil, xerrors.Errorf("parse url: %w", err) } q := coordinateURL.Query() - // TODO (ethanndickson) - the current version includes 2 additions we don't currently use: + // The current version includes additions // // 2.1 GetAnnouncementBanners on the Agent API (version locked to Tailnet API) // 2.2 PostTelemetry on the Tailnet API + // 2.3 RefreshResumeToken, WorkspaceUpdates // - // So, asking for API 2.2 just makes us incompatible back level servers, for no real benefit. - // As a temporary measure, we'll specifically ask for API version 2.0 until we implement sending - // telemetry. + // Since resume tokens and telemetry are optional, and fail gracefully, and we don't use + // WorkspaceUpdates to talk to a single agent, we ask for version 2.0 for maximum compatibility q.Add("version", "2.0") coordinateURL.RawQuery = q.Encode() diff --git a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go index 09483456a96cf..9825f43c4129d 100644 --- a/enterprise/wsproxy/wsproxysdk/wsproxysdk.go +++ b/enterprise/wsproxy/wsproxysdk/wsproxysdk.go @@ -19,9 +19,20 @@ import ( "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk/workspacesdk" agpl "github.com/coder/coder/v2/tailnet" - "github.com/coder/coder/v2/tailnet/proto" ) +// TailnetAPIVersion is the version of the Tailnet API we use for wsproxy. +// +// # The current version of the Tailnet API includes additions +// +// 2.1 GetAnnouncementBanners on the Agent API (version locked to Tailnet API) +// 2.2 PostTelemetry on the Tailnet API +// 2.3 RefreshResumeToken, WorkspaceUpdates +// +// Since resume tokens and telemetry are optional, and fail gracefully, and we don't use +// WorkspaceUpdates in the wsproxy, we ask for version 2.0 for maximum compatibility +const TailnetAPIVersion = "2.0" + // Client is a HTTP client for a subset of Coder API routes that external // proxies need. type Client struct { @@ -508,7 +519,7 @@ func (c *Client) TailnetDialer() (*workspacesdk.WebsocketDialer, error) { return nil, xerrors.Errorf("parse url: %w", err) } q := coordinateURL.Query() - q.Add("version", proto.CurrentVersion.String()) + q.Add("version", TailnetAPIVersion) coordinateURL.RawQuery = q.Encode() coordinateHeaders := make(http.Header) tokenHeader := codersdk.SessionTokenHeader diff --git a/tailnet/controllers.go b/tailnet/controllers.go index 5c2c3151ab26e..7ddcfd9e8de36 100644 --- a/tailnet/controllers.go +++ b/tailnet/controllers.go @@ -684,9 +684,7 @@ func sendTelemetry( _, err := client.PostTelemetry(ctx, &proto.TelemetryRequest{ Events: []*proto.TelemetryEvent{event}, }) - if drpcerr.Code(err) == drpcerr.Unimplemented || - drpc.ProtocolError.Has(err) && - strings.Contains(err.Error(), "unknown rpc: ") { + if IsDRPCUnimplementedError(err) { logger.Debug( context.Background(), "attempted to send telemetry to a server that doesn't support it", @@ -703,6 +701,14 @@ func sendTelemetry( return false } +// IsDRPCUnimplementedError returns true if the error indicates the RPC called is not implemented +// by the server. +func IsDRPCUnimplementedError(err error) bool { + return drpcerr.Code(err) == drpcerr.Unimplemented || + drpc.ProtocolError.Has(err) && + strings.Contains(err.Error(), "unknown rpc: ") +} + type basicResumeTokenController struct { logger slog.Logger @@ -810,7 +816,14 @@ func (r *basicResumeTokenRefresher) refresh() { } return } - if err != nil { + if IsDRPCUnimplementedError(err) { + r.logger.Info(r.ctx, "resume token is not supported by the server") + select { + case r.errCh <- nil: + default: // already have an error + } + return + } else if err != nil { r.logger.Error(r.ctx, "error refreshing coordinator resume token", slog.Error(err)) select { case r.errCh <- err: diff --git a/tailnet/controllers_test.go b/tailnet/controllers_test.go index 1b11ebbe16419..994a524b5c258 100644 --- a/tailnet/controllers_test.go +++ b/tailnet/controllers_test.go @@ -35,6 +35,8 @@ import ( "github.com/coder/quartz" ) +var unimplementedError = drpcerr.WithCode(xerrors.New("Unimplemented"), drpcerr.Unimplemented) + func TestInMemoryCoordination(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) @@ -706,7 +708,7 @@ func TestBasicTelemetryController_Unimplemented(t *testing.T) { call = testutil.RequireRecvCtx(ctx, t, ft.calls) // for real this time - telemetryError = drpcerr.WithCode(xerrors.New("Unimplemented"), drpcerr.Unimplemented) + telemetryError = unimplementedError testutil.RequireSendCtx(ctx, t, call.errCh, telemetryError) testutil.RequireRecvCtx(ctx, t, sendDone) @@ -932,6 +934,27 @@ func TestBasicResumeTokenController_NewWhileRefreshing(t *testing.T) { require.NoError(t, err) } +func TestBasicResumeTokenController_Unimplemented(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + mClock := quartz.NewMock(t) + + uut := tailnet.NewBasicResumeTokenController(logger, mClock) + _, ok := uut.Token() + require.False(t, ok) + + fr := newFakeResumeTokenClient(ctx) + cw := uut.New(fr) + + call := testutil.RequireRecvCtx(ctx, t, fr.calls) + testutil.RequireSendCtx(ctx, t, call.errCh, unimplementedError) + err := testutil.RequireRecvCtx(ctx, t, cw.Wait()) + require.NoError(t, err) + _, ok = uut.Token() + require.False(t, ok) +} + func newFakeResumeTokenClient(ctx context.Context) *fakeResumeTokenClient { return &fakeResumeTokenClient{ ctx: ctx, diff --git a/tailnet/proto/tailnet_drpc_old.go b/tailnet/proto/tailnet_drpc_old.go new file mode 100644 index 0000000000000..64be85d87542f --- /dev/null +++ b/tailnet/proto/tailnet_drpc_old.go @@ -0,0 +1,36 @@ +package proto + +import ( + "context" + + "storj.io/drpc" +) + +// DRPCTailnetClient20 is the Tailnet API at v2.0. +type DRPCTailnetClient20 interface { + DRPCConn() drpc.Conn + + StreamDERPMaps(ctx context.Context, in *StreamDERPMapsRequest) (DRPCTailnet_StreamDERPMapsClient, error) + Coordinate(ctx context.Context) (DRPCTailnet_CoordinateClient, error) +} + +// DRPCTailnetClient21 is the Tailnet API at v2.1. It is functionally identical to 2.0, because the +// change was to the Agent API (GetAnnouncementBanners). +type DRPCTailnetClient21 interface { + DRPCTailnetClient20 +} + +// DRPCTailnetClient22 is the Tailnet API at v2.2. It adds telemetry support. Compatible with Coder +// v2.13+ +type DRPCTailnetClient22 interface { + DRPCTailnetClient21 + PostTelemetry(ctx context.Context, in *TelemetryRequest) (*TelemetryResponse, error) +} + +// DRPCTailnetClient23 is the Tailnet API at v2.3. It adds resume token and workspace updates +// support. Compatible with Coder v2.18+. +type DRPCTailnetClient23 interface { + DRPCTailnetClient22 + RefreshResumeToken(ctx context.Context, in *RefreshResumeTokenRequest) (*RefreshResumeTokenResponse, error) + WorkspaceUpdates(ctx context.Context, in *WorkspaceUpdatesRequest) (DRPCTailnet_WorkspaceUpdatesClient, error) +} diff --git a/tailnet/proto/version.go b/tailnet/proto/version.go index 4eaf60f2a9ef5..8d8bd5343d2ee 100644 --- a/tailnet/proto/version.go +++ b/tailnet/proto/version.go @@ -4,9 +4,43 @@ import ( "github.com/coder/coder/v2/apiversion" ) +// Version history: +// +// API v1: +// - retroactively applied name for the HTTP Rest APIs for the Agent and the +// JSON over websocket coordination and DERP Map APIs for Tailnet +// +// API v2.0: +// - Shipped in Coder v2.8.0 +// - first dRPC over yamux over websocket APIs for tailnet and agent +// +// API v2.1: +// - Shipped in Coder v2.12.0 +// - Added support for multiple banners via the GetAnnouncementBanners RPC on +// the Agent API. +// - No changes to the Tailnet API. +// +// API v2.2: +// - Shipped in Coder v2.13.0 +// - Added support for network telemetry via the PostTelemetry RPC on the +// Tailnet API. +// - No changes to the Agent API. +// +// API v2.3: +// - Shipped in Coder v2.18.0 +// - Added support for client Resume Tokens on the Tailnet API via the +// RefreshResumeToken RPC. (This actually shipped in Coder v2.15.0, but we +// forgot to increment the API version. If you dial for API v2.2, you MAY +// be connected to a server that supports RefreshResumeToken, but be +// prepared to process "unsupported" errors.) +// - Added support for WorkspaceUpdates RPC on the Tailnet API. +// - Added support for ScriptCompleted RPC on the Agent API. (This actually +// shipped in Coder v2.16.0, but we forgot to increment the API version. If +// you dial for API v2.2, you MAY be connected to a server that supports +// ScriptCompleted, but be prepared to process "unsupported" errors.) const ( CurrentMajor = 2 - CurrentMinor = 2 + CurrentMinor = 3 ) var CurrentVersion = apiversion.New(CurrentMajor, CurrentMinor) diff --git a/vpn/speaker.go b/vpn/speaker.go index 98a623df50e4d..e4305ad4ae4d0 100644 --- a/vpn/speaker.go +++ b/vpn/speaker.go @@ -60,13 +60,6 @@ const ( SpeakerRoleTunnel SpeakerRole = "tunnel" ) -const ( - CurrentMajor = 1 - CurrentMinor = 0 -) - -var CurrentVersion = apiversion.New(CurrentMajor, CurrentMinor) - // speaker is an implementation of the CoderVPN protocol. It handles unary RPCs and their responses, // as well as the low-level serialization & deserialization to the ReadWriteCloser (rwc). // diff --git a/vpn/version.go b/vpn/version.go new file mode 100644 index 0000000000000..d869ad38ce07d --- /dev/null +++ b/vpn/version.go @@ -0,0 +1,10 @@ +package vpn + +import "github.com/coder/coder/v2/apiversion" + +const ( + CurrentMajor = 1 + CurrentMinor = 0 +) + +var CurrentVersion = apiversion.New(CurrentMajor, CurrentMinor)