From 3eb9abcbd3305cc2823a0493d889e031a850b563 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Fri, 5 Apr 2024 12:29:08 -0500 Subject: [PATCH 1/7] fix(coderd): prevent agent reverse proxy from using `HTTP[S]_PROXY` envs (#12875) Updates https://github.com/coder/coder/issues/12790 (cherry picked from commit a2b28f80d7a14e1a42af2f6527aedfce757727b1) --- coderd/tailnet.go | 7 +++++-- coderd/tailnet_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/coderd/tailnet.go b/coderd/tailnet.go index f684b05cd2756..0bcf21bb9d3a1 100644 --- a/coderd/tailnet.go +++ b/coderd/tailnet.go @@ -32,11 +32,14 @@ import ( var tailnetTransport *http.Transport func init() { - var valid bool - tailnetTransport, valid = http.DefaultTransport.(*http.Transport) + tp, valid := http.DefaultTransport.(*http.Transport) if !valid { panic("dev error: default transport is the wrong type") } + tailnetTransport = tp.Clone() + // We do not want to respect the proxy settings from the environment, since + // all network traffic happens over wireguard. + tailnetTransport.Proxy = nil } var _ workspaceapps.AgentProvider = (*ServerTailnet)(nil) diff --git a/coderd/tailnet_test.go b/coderd/tailnet_test.go index b7b7ad1df938c..0a78a8349c0df 100644 --- a/coderd/tailnet_test.go +++ b/coderd/tailnet_test.go @@ -68,6 +68,35 @@ func TestServerTailnet_AgentConn_NoSTUN(t *testing.T) { assert.True(t, conn.AwaitReachable(ctx)) } +//nolint:paralleltest // t.Setenv +func TestServerTailnet_ReverseProxy_ProxyEnv(t *testing.T) { + t.Setenv("HTTP_PROXY", "http://169.254.169.254:12345") + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + agents, serverTailnet := setupServerTailnetAgent(t, 1) + a := agents[0] + + u, err := url.Parse(fmt.Sprintf("http://127.0.0.1:%d", workspacesdk.AgentHTTPAPIServerPort)) + require.NoError(t, err) + + rp := serverTailnet.ReverseProxy(u, u, a.id) + + rw := httptest.NewRecorder() + req := httptest.NewRequest( + http.MethodGet, + u.String(), + nil, + ).WithContext(ctx) + + rp.ServeHTTP(rw, req) + res := rw.Result() + defer res.Body.Close() + + assert.Equal(t, http.StatusOK, res.StatusCode) +} + func TestServerTailnet_ReverseProxy(t *testing.T) { t.Parallel() From 3fc6111994b5e129afee416f002ae4f9710915ac Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Wed, 10 Apr 2024 22:49:13 +0400 Subject: [PATCH 2/7] fix: stop sending DeleteTailnetPeer when coordinator is unhealthy (#12925) fixes #12923 Prevents Coordinate peer connections from generating spurious database queries like DeleteTailnetPeer when the coordinator is unhealthy. It does this by checking the health of the querier before accepting a connection, rather than unconditionally accepting it only for it to get swatted down later. (cherry picked from commit 06eae954c978db7767d984348f3d471f64858b19) --- enterprise/tailnet/pgcoord.go | 24 +++++++++- enterprise/tailnet/pgcoord_internal_test.go | 51 +++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/enterprise/tailnet/pgcoord.go b/enterprise/tailnet/pgcoord.go index aabb21eef6b28..aecdcde828d78 100644 --- a/enterprise/tailnet/pgcoord.go +++ b/enterprise/tailnet/pgcoord.go @@ -231,6 +231,17 @@ func (c *pgCoord) Coordinate( logger := c.logger.With(slog.F("peer_id", id)) reqs := make(chan *proto.CoordinateRequest, agpl.RequestBufferSize) resps := make(chan *proto.CoordinateResponse, agpl.ResponseBufferSize) + if !c.querier.isHealthy() { + // If the coordinator is unhealthy, we don't want to hook this Coordinate call up to the + // binder, as that can cause an unnecessary call to DeleteTailnetPeer when the connIO is + // closed. Instead, we just close the response channel and bail out. + // c.f. https://github.com/coder/coder/issues/12923 + c.logger.Info(ctx, "closed incoming coordinate call while unhealthy", + slog.F("peer_id", id), + ) + close(resps) + return reqs, resps + } cIO := newConnIO(c.ctx, ctx, logger, c.bindings, c.tunnelerCh, reqs, resps, id, name, a) err := agpl.SendCtx(c.ctx, c.newConnections, cIO) if err != nil { @@ -842,7 +853,12 @@ func (q *querier) newConn(c *connIO) { defer q.mu.Unlock() if !q.healthy { err := c.Close() - q.logger.Info(q.ctx, "closed incoming connection while unhealthy", + // This can only happen during a narrow window where we were healthy + // when pgCoord checked before accepting the connection, but now are + // unhealthy now that we get around to processing it. Seeing a small + // number of these logs is not worrying, but a large number probably + // indicates something is amiss. + q.logger.Warn(q.ctx, "closed incoming connection while unhealthy", slog.Error(err), slog.F("peer_id", c.UniqueID()), ) @@ -865,6 +881,12 @@ func (q *querier) newConn(c *connIO) { }) } +func (q *querier) isHealthy() bool { + q.mu.Lock() + defer q.mu.Unlock() + return q.healthy +} + func (q *querier) cleanupConn(c *connIO) { logger := q.logger.With(slog.F("peer_id", c.UniqueID())) q.mu.Lock() diff --git a/enterprise/tailnet/pgcoord_internal_test.go b/enterprise/tailnet/pgcoord_internal_test.go index d5b79d6225d2c..b1bfb371f0959 100644 --- a/enterprise/tailnet/pgcoord_internal_test.go +++ b/enterprise/tailnet/pgcoord_internal_test.go @@ -13,6 +13,7 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" + "golang.org/x/xerrors" gProto "google.golang.org/protobuf/proto" "cdr.dev/slog" @@ -21,6 +22,8 @@ import ( "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbmock" "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/coderd/database/pubsub" + agpl "github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet/proto" "github.com/coder/coder/v2/testutil" ) @@ -291,3 +294,51 @@ func TestGetDebug(t *testing.T) { require.Equal(t, peerID, debug.Tunnels[0].SrcID) require.Equal(t, dstID, debug.Tunnels[0].DstID) } + +// TestPGCoordinatorUnhealthy tests that when the coordinator fails to send heartbeats and is +// unhealthy it disconnects any peers and does not send any extraneous database queries. +func TestPGCoordinatorUnhealthy(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) + + ctrl := gomock.NewController(t) + mStore := dbmock.NewMockStore(ctrl) + ps := pubsub.NewInMemory() + + // after 3 failed heartbeats, the coordinator is unhealthy + mStore.EXPECT(). + UpsertTailnetCoordinator(gomock.Any(), gomock.Any()). + MinTimes(3). + Return(database.TailnetCoordinator{}, xerrors.New("badness")) + mStore.EXPECT(). + DeleteCoordinator(gomock.Any(), gomock.Any()). + Times(1). + Return(nil) + // But, in particular we DO NOT want the coordinator to call DeleteTailnetPeer, as this is + // unnecessary and can spam the database. c.f. https://github.com/coder/coder/issues/12923 + + // these cleanup queries run, but we don't care for this test + mStore.EXPECT().CleanTailnetCoordinators(gomock.Any()).AnyTimes().Return(nil) + mStore.EXPECT().CleanTailnetLostPeers(gomock.Any()).AnyTimes().Return(nil) + mStore.EXPECT().CleanTailnetTunnels(gomock.Any()).AnyTimes().Return(nil) + + coordinator, err := newPGCoordInternal(ctx, logger, ps, mStore) + require.NoError(t, err) + + require.Eventually(t, func() bool { + return !coordinator.querier.isHealthy() + }, testutil.WaitShort, testutil.IntervalFast) + + pID := uuid.UUID{5} + _, resps := coordinator.Coordinate(ctx, pID, "test", agpl.AgentCoordinateeAuth{ID: pID}) + resp := testutil.RequireRecvCtx(ctx, t, resps) + require.Nil(t, resp, "channel should be closed") + + // give the coordinator some time to process any pending work. We are + // testing here that a database call is absent, so we don't want to race to + // shut down the test. + time.Sleep(testutil.IntervalMedium) + _ = coordinator.Close() + require.Eventually(t, ctrl.Satisfied, testutil.WaitShort, testutil.IntervalFast) +} From 353888a5d8fd971c2ed7e68260199eadadc60aa9 Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Thu, 11 Apr 2024 10:05:53 +0400 Subject: [PATCH 3/7] feat: add src_id and dst_id indexes to tailnet_tunnels (#12911) Fixes #12780 Adds indexes to the `tailnet_tunnels` table to speed up `GetTailnetTunnelPeerIDs` and `GetTailnetTunnelPeerBindings` queries, which match on `src_id` and `dst_id`. (cherry picked from commit a231b5aef503ada90e37bd755a9574b858d8d60e) --- coderd/database/dump.sql | 4 ++++ .../migrations/000206_add_tailnet_tunnels_indexes.down.sql | 2 ++ .../migrations/000206_add_tailnet_tunnels_indexes.up.sql | 3 +++ 3 files changed, 9 insertions(+) create mode 100644 coderd/database/migrations/000206_add_tailnet_tunnels_indexes.down.sql create mode 100644 coderd/database/migrations/000206_add_tailnet_tunnels_indexes.up.sql diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 830f8a1825b20..03d3640f8d28a 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -1624,6 +1624,10 @@ CREATE INDEX idx_tailnet_clients_coordinator ON tailnet_clients USING btree (coo CREATE INDEX idx_tailnet_peers_coordinator ON tailnet_peers USING btree (coordinator_id); +CREATE INDEX idx_tailnet_tunnels_dst_id ON tailnet_tunnels USING hash (dst_id); + +CREATE INDEX idx_tailnet_tunnels_src_id ON tailnet_tunnels USING hash (src_id); + CREATE UNIQUE INDEX idx_users_email ON users USING btree (email) WHERE (deleted = false); CREATE UNIQUE INDEX idx_users_username ON users USING btree (username) WHERE (deleted = false); diff --git a/coderd/database/migrations/000206_add_tailnet_tunnels_indexes.down.sql b/coderd/database/migrations/000206_add_tailnet_tunnels_indexes.down.sql new file mode 100644 index 0000000000000..475e509ac6843 --- /dev/null +++ b/coderd/database/migrations/000206_add_tailnet_tunnels_indexes.down.sql @@ -0,0 +1,2 @@ +DROP INDEX idx_tailnet_tunnels_src_id; +DROP INDEX idx_tailnet_tunnels_dst_id; diff --git a/coderd/database/migrations/000206_add_tailnet_tunnels_indexes.up.sql b/coderd/database/migrations/000206_add_tailnet_tunnels_indexes.up.sql new file mode 100644 index 0000000000000..42f5729e1410c --- /dev/null +++ b/coderd/database/migrations/000206_add_tailnet_tunnels_indexes.up.sql @@ -0,0 +1,3 @@ +-- Since src_id and dst_id are UUIDs, we only ever compare them with equality, so hash is better +CREATE INDEX idx_tailnet_tunnels_src_id ON tailnet_tunnels USING hash (src_id); +CREATE INDEX idx_tailnet_tunnels_dst_id ON tailnet_tunnels USING hash (dst_id); From bda13a2818b71cd104db2afd0e21a56acdd46f9d Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 17 Apr 2024 11:01:20 -0700 Subject: [PATCH 4/7] fix: make terminal raw in ssh command on windows (#12990) (cherry picked from commit d426569d4a1bb40042af52244768848c56af5aac) --- cli/ssh.go | 22 +++++++++----- pty/terminal.go | 31 ++++++++++++++++++++ pty/terminal_other.go | 36 +++++++++++++++++++++++ pty/terminal_windows.go | 65 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 pty/terminal.go create mode 100644 pty/terminal_other.go create mode 100644 pty/terminal_windows.go diff --git a/cli/ssh.go b/cli/ssh.go index 31b020917518b..83f1f4105170d 100644 --- a/cli/ssh.go +++ b/cli/ssh.go @@ -25,12 +25,8 @@ import ( "golang.org/x/xerrors" "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" - "github.com/coder/retry" - "github.com/coder/serpent" - "cdr.dev/slog" "cdr.dev/slog/sloggers/sloghuman" - "github.com/coder/coder/v2/cli/cliui" "github.com/coder/coder/v2/cli/cliutil" "github.com/coder/coder/v2/coderd/autobuild/notify" @@ -38,6 +34,9 @@ import ( "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk/workspacesdk" "github.com/coder/coder/v2/cryptorand" + "github.com/coder/coder/v2/pty" + "github.com/coder/retry" + "github.com/coder/serpent" ) var ( @@ -341,15 +340,22 @@ func (r *RootCmd) ssh() *serpent.Command { } } - stdoutFile, validOut := inv.Stdout.(*os.File) stdinFile, validIn := inv.Stdin.(*os.File) - if validOut && validIn && isatty.IsTerminal(stdoutFile.Fd()) { - state, err := term.MakeRaw(int(stdinFile.Fd())) + stdoutFile, validOut := inv.Stdout.(*os.File) + if validIn && validOut && isatty.IsTerminal(stdinFile.Fd()) && isatty.IsTerminal(stdoutFile.Fd()) { + inState, err := pty.MakeInputRaw(stdinFile.Fd()) + if err != nil { + return err + } + defer func() { + _ = pty.RestoreTerminal(stdinFile.Fd(), inState) + }() + outState, err := pty.MakeOutputRaw(stdoutFile.Fd()) if err != nil { return err } defer func() { - _ = term.Restore(int(stdinFile.Fd()), state) + _ = pty.RestoreTerminal(stdoutFile.Fd(), outState) }() windowChange := listenWindowSize(ctx) diff --git a/pty/terminal.go b/pty/terminal.go new file mode 100644 index 0000000000000..2c1a35c3ee35f --- /dev/null +++ b/pty/terminal.go @@ -0,0 +1,31 @@ +package pty + +// TerminalState differs per-platform. +type TerminalState struct { + state terminalState +} + +// MakeInputRaw calls term.MakeRaw on non-Windows platforms. On Windows it sets +// special terminal modes that enable VT100 emulation as well as setting the +// same modes that term.MakeRaw sets. +// +//nolint:revive +func MakeInputRaw(fd uintptr) (*TerminalState, error) { + return makeInputRaw(fd) +} + +// MakeOutputRaw does nothing on non-Windows platforms. On Windows it sets +// special terminal modes that enable VT100 emulation as well as setting the +// same modes that term.MakeRaw sets. +// +//nolint:revive +func MakeOutputRaw(fd uintptr) (*TerminalState, error) { + return makeOutputRaw(fd) +} + +// RestoreTerminal restores the terminal back to its original state. +// +//nolint:revive +func RestoreTerminal(fd uintptr, state *TerminalState) error { + return restoreTerminal(fd, state) +} diff --git a/pty/terminal_other.go b/pty/terminal_other.go new file mode 100644 index 0000000000000..9c04354715253 --- /dev/null +++ b/pty/terminal_other.go @@ -0,0 +1,36 @@ +//go:build !windows +// +build !windows + +package pty + +import "golang.org/x/term" + +type terminalState *term.State + +//nolint:revive +func makeInputRaw(fd uintptr) (*TerminalState, error) { + s, err := term.MakeRaw(int(fd)) + if err != nil { + return nil, err + } + return &TerminalState{ + state: s, + }, nil +} + +//nolint:revive +func makeOutputRaw(_ uintptr) (*TerminalState, error) { + // Does nothing. makeInputRaw does enough for both input and output. + return &TerminalState{ + state: nil, + }, nil +} + +//nolint:revive +func restoreTerminal(fd uintptr, state *TerminalState) error { + if state == nil || state.state == nil { + return nil + } + + return term.Restore(int(fd), state.state) +} diff --git a/pty/terminal_windows.go b/pty/terminal_windows.go new file mode 100644 index 0000000000000..1d8f99d5b9eb1 --- /dev/null +++ b/pty/terminal_windows.go @@ -0,0 +1,65 @@ +//go:build windows +// +build windows + +package pty + +import "golang.org/x/sys/windows" + +type terminalState uint32 + +// This is adapted from term.MakeRaw, but adds +// ENABLE_VIRTUAL_TERMINAL_PROCESSING to the output mode and +// ENABLE_VIRTUAL_TERMINAL_INPUT to the input mode. +// +// See: https://github.com/golang/term/blob/5b15d269ba1f54e8da86c8aa5574253aea0c2198/term_windows.go#L23 +// +// Copyright 2019 The Go Authors. BSD-3-Clause license. See: +// https://github.com/golang/term/blob/master/LICENSE +func makeRaw(handle windows.Handle, input bool) (uint32, error) { + var prevState uint32 + if err := windows.GetConsoleMode(handle, &prevState); err != nil { + return 0, err + } + + var raw uint32 + if input { + raw = prevState &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT) + raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT + } else { + raw = prevState | windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING + } + + if err := windows.SetConsoleMode(handle, raw); err != nil { + return 0, err + } + return prevState, nil +} + +//nolint:revive +func makeInputRaw(handle uintptr) (*TerminalState, error) { + prevState, err := makeRaw(windows.Handle(handle), true) + if err != nil { + return nil, err + } + + return &TerminalState{ + state: terminalState(prevState), + }, nil +} + +//nolint:revive +func makeOutputRaw(handle uintptr) (*TerminalState, error) { + prevState, err := makeRaw(windows.Handle(handle), false) + if err != nil { + return nil, err + } + + return &TerminalState{ + state: terminalState(prevState), + }, nil +} + +//nolint:revive +func restoreTerminal(handle uintptr, state *TerminalState) error { + return windows.SetConsoleMode(windows.Handle(handle), uint32(state.state)) +} From cdeba67944f284fa623a5f0165db78999eaab1d9 Mon Sep 17 00:00:00 2001 From: Stephen Kirby Date: Wed, 17 Apr 2024 22:33:20 +0000 Subject: [PATCH 5/7] v2.10.1 changelog --- docs/changelogs/v2.10.1.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 docs/changelogs/v2.10.1.md diff --git a/docs/changelogs/v2.10.1.md b/docs/changelogs/v2.10.1.md new file mode 100644 index 0000000000000..55a68c7147218 --- /dev/null +++ b/docs/changelogs/v2.10.1.md @@ -0,0 +1,22 @@ +## Changelog + +### Features + +- Added `src_id` and `dst_id` indexes to tailnet_tunnels to mitigate the risk of DB overloading (#12911) + +### Bug fixes + +- Fixed an issue where multiple unhealthy PGCoordinators would cause outages (#12925) +- Fixed the terminal in `ssh` command on Windows, allowing keyboard navigation (#12990) +- Fixed an issue where `code-server` would not connect, responding with 502 (#12875) + +Compare: [`v2.10.0...v2.10.1`](https://github.com/coder/coder/compare/v2.10.0...v2.10.1) + +## Container image + +- `docker pull ghcr.io/coder/coder:v2.10.1` + +## Install/upgrade + +Refer to our docs to [install](https://coder.com/docs/v2/latest/install) or [upgrade](https://coder.com/docs/v2/latest/admin/upgrade) Coder, or use a release asset below. + From 2101dbce034f035808d236da5d68521380e4dd70 Mon Sep 17 00:00:00 2001 From: Stephen Kirby Date: Wed, 17 Apr 2024 22:35:02 +0000 Subject: [PATCH 6/7] updated version flags in kube install --- docs/install/kubernetes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/install/kubernetes.md b/docs/install/kubernetes.md index 2e7fd125de07a..3bf6203cf003a 100644 --- a/docs/install/kubernetes.md +++ b/docs/install/kubernetes.md @@ -127,7 +127,7 @@ locally in order to log in and manage templates. helm install coder coder-v2/coder \ --namespace coder \ --values values.yaml \ - --version 2.10.0 + --version 2.10.1 ``` For the **stable** Coder release: @@ -136,7 +136,7 @@ locally in order to log in and manage templates. helm install coder coder-v2/coder \ --namespace coder \ --values values.yaml \ - --version 2.9.1 + --version 2.9.3 ``` You can watch Coder start up by running `kubectl get pods -n coder`. Once From 2ed7226e85a397f6d1351d6fa976430c71da5a5d Mon Sep 17 00:00:00 2001 From: Stephen Kirby Date: Wed, 17 Apr 2024 22:38:41 +0000 Subject: [PATCH 7/7] added mainline disclaimer --- docs/changelogs/v2.10.1.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelogs/v2.10.1.md b/docs/changelogs/v2.10.1.md index 55a68c7147218..ca2e28a2a9779 100644 --- a/docs/changelogs/v2.10.1.md +++ b/docs/changelogs/v2.10.1.md @@ -1,5 +1,8 @@ ## Changelog +> [!NOTE] +> This is a mainline Coder release. We advise enterprise customers without a staging environment to install our [latest stable release](https://github.com/coder/coder/releases/latest) while we refine this version. Learn more about our [Release Schedule](../install/releases.md). + ### Features - Added `src_id` and `dst_id` indexes to tailnet_tunnels to mitigate the risk of DB overloading (#12911)