From b0db1ee6d426895cfd87fac80e0f76f5a5bf5edd Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Thu, 10 Nov 2022 00:52:34 +0000 Subject: [PATCH 01/12] chore: Close idle connections on test cleanup It's possible that this was the source of a leak on Windows... --- Makefile | 1 + cli/server.go | 1 + cli/server_test.go | 1 + coderd/coderdtest/coderdtest.go | 4 +++- coderd/workspaceapps_test.go | 9 +++++++++ 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1db45f268840d..67e9a073a8371 100644 --- a/Makefile +++ b/Makefile @@ -465,6 +465,7 @@ test: test-clean test-postgres: test-clean test-postgres-docker DB=ci DB_FROM=$(shell go run scripts/migrate-ci/main.go) gotestsum --junitfile="gotests.xml" --packages="./..." -- \ -covermode=atomic -coverprofile="gotests.coverage" -timeout=20m \ + -parallel=4 \ -coverpkg=./... \ -count=1 -race -failfast .PHONY: test-postgres diff --git a/cli/server.go b/cli/server.go index bc4ccad533d7f..14fd59f2b6b00 100644 --- a/cli/server.go +++ b/cli/server.go @@ -566,6 +566,7 @@ func Server(vip *viper.Viper, newAPI func(context.Context, *coderd.Options) (*co InsecureSkipVerify: true, }, } + defer client.HTTPClient.CloseIdleConnections() } // Since errCh only has one buffered slot, all routines diff --git a/cli/server_test.go b/cli/server_test.go index 596730237a731..e890de93b2ce8 100644 --- a/cli/server_test.go +++ b/cli/server_test.go @@ -425,6 +425,7 @@ func TestServer(t *testing.T) { }, }, } + defer client.HTTPClient.CloseIdleConnections() // Use the first certificate and hostname. client.URL.Host = "alpaca.com:443" diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 0fe4b5a340d2e..39bf2e52348dc 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -298,12 +298,14 @@ func NewWithAPI(t *testing.T, options *Options) (*codersdk.Client, io.Closer, *c if options.IncludeProvisionerDaemon { provisionerCloser = NewProvisionerDaemon(t, coderAPI) } + client := codersdk.New(coderAPI.AccessURL) t.Cleanup(func() { cancelFunc() _ = provisionerCloser.Close() _ = coderAPI.Close() + client.HTTPClient.CloseIdleConnections() }) - return codersdk.New(coderAPI.AccessURL), provisionerCloser, coderAPI + return client, provisionerCloser, coderAPI } // NewProvisionerDaemon launches a provisionerd instance configured to work diff --git a/coderd/workspaceapps_test.go b/coderd/workspaceapps_test.go index 2605a740c699c..7261b29e4308e 100644 --- a/coderd/workspaceapps_test.go +++ b/coderd/workspaceapps_test.go @@ -135,6 +135,9 @@ func setupProxyTest(t *testing.T, customAppHost ...string) (*codersdk.Client, co return (&net.Dialer{}).DialContext(ctx, network, client.URL.Host) } client.HTTPClient.Transport = transport + t.Cleanup(func() { + transport.CloseIdleConnections() + }) return client, user, workspace, uint16(tcpAddr.Port) } @@ -540,6 +543,9 @@ func TestWorkspaceAppsProxySubdomainPassthrough(t *testing.T) { return (&net.Dialer{}).DialContext(ctx, network, client.URL.Host) } client.HTTPClient.Transport = transport + t.Cleanup(func() { + transport.CloseIdleConnections() + }) ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() @@ -579,6 +585,9 @@ func TestWorkspaceAppsProxySubdomainBlocked(t *testing.T) { return (&net.Dialer{}).DialContext(ctx, network, client.URL.Host) } client.HTTPClient.Transport = transport + t.Cleanup(func() { + transport.CloseIdleConnections() + }) return client } From 8880b9ac0bcddb8cbeb537c3a0653df4a19d72dd Mon Sep 17 00:00:00 2001 From: Ammar Bandukwala Date: Wed, 9 Nov 2022 22:05:34 +0000 Subject: [PATCH 02/12] ci: use big runners --- .github/workflows/coder.yaml | 12 ++++++------ .github/workflows/release.yaml | 2 +- Makefile | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/coder.yaml b/.github/workflows/coder.yaml index ae76147c0a329..cb4ee05dea33e 100644 --- a/.github/workflows/coder.yaml +++ b/.github/workflows/coder.yaml @@ -89,7 +89,7 @@ jobs: style-lint-golangci: name: style/lint/golangci timeout-minutes: 5 - runs-on: ${{ github.repository_owner == 'coder' && 'buildjet-8vcpu-ubuntu-2204' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || 'ubuntu-latest' }} steps: - uses: actions/checkout@v3 - uses: actions/setup-go@v3 @@ -171,7 +171,7 @@ jobs: gen: name: "style/gen" timeout-minutes: 8 - runs-on: ${{ github.repository_owner == 'coder' && 'buildjet-8vcpu-ubuntu-2204' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || 'ubuntu-latest' }} needs: changes if: needs.changes.outputs.docs-only == 'false' steps: @@ -276,7 +276,7 @@ jobs: test-go: name: "test/go" - runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'buildjet-8vcpu-ubuntu-2204' || matrix.os }} + runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-8-cores'|| matrix.os }} timeout-minutes: 20 strategy: matrix: @@ -356,7 +356,7 @@ jobs: test-go-postgres: name: "test/go/postgres" - runs-on: ${{ github.repository_owner == 'coder' && 'buildjet-8vcpu-ubuntu-2204' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || 'ubuntu-latest' }} # This timeout must be greater than the timeout set by `go test` in # `make test-postgres` to ensure we receive a trace of running # goroutines. Setting this to the timeout +5m should work quite well @@ -417,7 +417,7 @@ jobs: deploy: name: "deploy" - runs-on: ${{ github.repository_owner == 'coder' && 'buildjet-8vcpu-ubuntu-2204' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || 'ubuntu-latest' }} timeout-minutes: 30 needs: changes if: | @@ -514,7 +514,7 @@ jobs: test-js: name: "test/js" - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || 'ubuntu-latest' }} timeout-minutes: 20 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 49284702fc583..8444e30baa030 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -28,7 +28,7 @@ env: jobs: release: - runs-on: ${{ github.repository_owner == 'coder' && 'buildjet-8vcpu-ubuntu-2204' || 'ubuntu-latest' }} + runs-on: ${{ github.repository_owner == 'coder' && 'ubuntu-latest-16-cores' || 'ubuntu-latest' }} env: # Necessary for Docker manifest DOCKER_CLI_EXPERIMENTAL: "enabled" diff --git a/Makefile b/Makefile index 67e9a073a8371..02d71810c9165 100644 --- a/Makefile +++ b/Makefile @@ -463,6 +463,8 @@ test: test-clean # When updating -timeout for this test, keep in sync with # test-go-postgres (.github/workflows/coder.yaml). test-postgres: test-clean test-postgres-docker + # The postgres test is prone to failure, so we limit parallelism for + # more consistent execution. DB=ci DB_FROM=$(shell go run scripts/migrate-ci/main.go) gotestsum --junitfile="gotests.xml" --packages="./..." -- \ -covermode=atomic -coverprofile="gotests.coverage" -timeout=20m \ -parallel=4 \ From 2662aa3bb20dc39ce8069556697471eb4e71f9e4 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sat, 12 Nov 2022 20:56:22 -0600 Subject: [PATCH 03/12] fix: Improve tailnet connections by reducing timeouts This awaits connection ping before running a dial. Before, we were hitting the TCP retransmission and handshake timeouts, which could intermittently add 1 or 5 seconds to a connection being initialized. --- agent/agent_test.go | 7 +--- cli/agent_test.go | 23 ++----------- cli/portforward.go | 18 +--------- cli/speedtest.go | 56 ++++++++++++++++-------------- cli/ssh.go | 4 +-- coderd/workspaceagents_test.go | 7 +--- codersdk/agentconn.go | 27 +++++---------- codersdk/workspaceagents.go | 3 +- go.mod | 2 +- go.sum | 4 +-- tailnet/conn.go | 63 ++++++++++++++++++++++++++++++---- tailnet/conn_test.go | 6 ++-- 12 files changed, 112 insertions(+), 108 deletions(-) diff --git a/agent/agent_test.go b/agent/agent_test.go index c53438404a2fb..133167e45e952 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -498,12 +498,7 @@ func TestAgent(t *testing.T) { }() conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) - require.Eventually(t, func() bool { - ctx, cancelFunc := context.WithTimeout(context.Background(), testutil.IntervalFast) - defer cancelFunc() - _, err := conn.Ping(ctx) - return err == nil - }, testutil.WaitMedium, testutil.IntervalFast) + require.True(t, conn.AwaitReachable(context.Background())) conn1, err := conn.DialContext(context.Background(), l.Addr().Network(), l.Addr().String()) require.NoError(t, err) defer conn1.Close() diff --git a/cli/agent_test.go b/cli/agent_test.go index a2c79ceae2753..edcab3ac4a0e5 100644 --- a/cli/agent_test.go +++ b/cli/agent_test.go @@ -14,7 +14,6 @@ import ( "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/provisioner/echo" "github.com/coder/coder/provisionersdk/proto" - "github.com/coder/coder/testutil" ) func TestWorkspaceAgent(t *testing.T) { @@ -71,12 +70,7 @@ func TestWorkspaceAgent(t *testing.T) { dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil) require.NoError(t, err) defer dialer.Close() - require.Eventually(t, func() bool { - ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast) - defer cancelFunc() - _, err := dialer.Ping(ctx) - return err == nil - }, testutil.WaitMedium, testutil.IntervalFast) + require.True(t, dialer.AwaitReachable(context.Background())) cancelFunc() err = <-errC require.NoError(t, err) @@ -134,12 +128,7 @@ func TestWorkspaceAgent(t *testing.T) { dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil) require.NoError(t, err) defer dialer.Close() - require.Eventually(t, func() bool { - ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast) - defer cancelFunc() - _, err := dialer.Ping(ctx) - return err == nil - }, testutil.WaitMedium, testutil.IntervalFast) + require.True(t, dialer.AwaitReachable(context.Background())) cancelFunc() err = <-errC require.NoError(t, err) @@ -197,13 +186,7 @@ func TestWorkspaceAgent(t *testing.T) { dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil) require.NoError(t, err) defer dialer.Close() - require.Eventually(t, func() bool { - ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast) - defer cancelFunc() - _, err := dialer.Ping(ctx) - return err == nil - }, testutil.WaitMedium, testutil.IntervalFast) - + require.True(t, dialer.AwaitReachable(context.Background())) sshClient, err := dialer.SSHClient(ctx) require.NoError(t, err) defer sshClient.Close() diff --git a/cli/portforward.go b/cli/portforward.go index ca7cb51f14719..ea6edb2c9d89e 100644 --- a/cli/portforward.go +++ b/cli/portforward.go @@ -10,7 +10,6 @@ import ( "strings" "sync" "syscall" - "time" "github.com/pion/udp" "github.com/spf13/cobra" @@ -145,22 +144,7 @@ func portForward() *cobra.Command { closeAllListeners() }() - ticker := time.NewTicker(250 * time.Millisecond) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-ticker.C: - } - - _, err = conn.Ping(ctx) - if err != nil { - continue - } - break - } - ticker.Stop() + conn.AwaitReachable(ctx) _, _ = fmt.Fprintln(cmd.OutOrStderr(), "Ready!") wg.Wait() return closeErr diff --git a/cli/speedtest.go b/cli/speedtest.go index 873e5e2794963..0761b558ef39f 100644 --- a/cli/speedtest.go +++ b/cli/speedtest.go @@ -62,33 +62,37 @@ func speedtest() *cobra.Command { return err } defer conn.Close() - ticker := time.NewTicker(time.Second) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-ticker.C: + if direct { + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + } + dur, err := conn.Ping(ctx) + if err != nil { + continue + } + status := conn.Status() + if len(status.Peers()) != 1 { + continue + } + peer := status.Peer[status.Peers()[0]] + if peer.CurAddr == "" && direct { + cmd.Printf("Waiting for a direct connection... (%dms via %s)\n", dur.Milliseconds(), peer.Relay) + continue + } + via := peer.Relay + if via == "" { + via = "direct" + } + cmd.Printf("%dms via %s\n", dur.Milliseconds(), via) + break } - dur, err := conn.Ping(ctx) - if err != nil { - continue - } - status := conn.Status() - if len(status.Peers()) != 1 { - continue - } - peer := status.Peer[status.Peers()[0]] - if peer.CurAddr == "" && direct { - cmd.Printf("Waiting for a direct connection... (%dms via %s)\n", dur.Milliseconds(), peer.Relay) - continue - } - via := peer.Relay - if via == "" { - via = "direct" - } - cmd.Printf("%dms via %s\n", dur.Milliseconds(), via) - break + } else { + conn.AwaitReachable(ctx) } dir := tsspeedtest.Download if reverse { diff --git a/cli/ssh.go b/cli/ssh.go index 811d87af18ff1..57a8c4aab4ac4 100644 --- a/cli/ssh.go +++ b/cli/ssh.go @@ -90,12 +90,12 @@ func ssh() *cobra.Command { return xerrors.Errorf("await agent: %w", err) } - conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, nil) + conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, &codersdk.DialWorkspaceAgentOptions{}) if err != nil { return err } defer conn.Close() - + conn.AwaitReachable(ctx) stopPolling := tryPollWorkspaceAutostop(ctx, client, workspace) defer stopPolling() diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 9a3088711fe24..e89b913f1bf17 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -178,12 +178,7 @@ func TestWorkspaceAgentListen(t *testing.T) { defer func() { _ = conn.Close() }() - require.Eventually(t, func() bool { - ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast) - defer cancelFunc() - _, err := conn.Ping(ctx) - return err == nil - }, testutil.WaitLong, testutil.IntervalFast) + conn.AwaitReachable(ctx) }) t.Run("FailNonLatestBuild", func(t *testing.T) { diff --git a/codersdk/agentconn.go b/codersdk/agentconn.go index a68ab0672ad6b..f980767336daa 100644 --- a/codersdk/agentconn.go +++ b/codersdk/agentconn.go @@ -16,9 +16,7 @@ import ( "golang.org/x/crypto/ssh" "golang.org/x/xerrors" - "tailscale.com/ipn/ipnstate" "tailscale.com/net/speedtest" - "tailscale.com/tailcfg" "github.com/coder/coder/coderd/tracing" "github.com/coder/coder/tailnet" @@ -133,27 +131,18 @@ type AgentConn struct { CloseFunc func() } +func (c *AgentConn) AwaitReachable(ctx context.Context) bool { + ctx, span := tracing.StartSpan(ctx) + defer span.End() + + return c.Conn.AwaitReachable(ctx, TailnetIP) +} + func (c *AgentConn) Ping(ctx context.Context) (time.Duration, error) { ctx, span := tracing.StartSpan(ctx) defer span.End() - errCh := make(chan error, 1) - durCh := make(chan time.Duration, 1) - go c.Conn.Ping(TailnetIP, tailcfg.PingDisco, func(pr *ipnstate.PingResult) { - if pr.Err != "" { - errCh <- xerrors.New(pr.Err) - return - } - durCh <- time.Duration(pr.LatencySeconds * float64(time.Second)) - }) - select { - case err := <-errCh: - return 0, err - case <-ctx.Done(): - return 0, ctx.Err() - case dur := <-durCh: - return dur, nil - } + return c.Conn.Ping(ctx, TailnetIP) } func (c *AgentConn) CloseWithError(_ error) error { diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index 8f6d43f6eaf4e..a5e1b0ce2e3a0 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -447,13 +447,14 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti _ = conn.Close() return nil, err } + return &AgentConn{ Conn: conn, CloseFunc: func() { cancelFunc() <-closed }, - }, err + }, nil } // WorkspaceAgent returns an agent by ID. diff --git a/go.mod b/go.mod index 517f5bedb3f47..b23b5df68c7ae 100644 --- a/go.mod +++ b/go.mod @@ -40,7 +40,7 @@ replace github.com/tcnksm/go-httpstat => github.com/kylecarbs/go-httpstat v0.0.0 // There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here: // https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main -replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20221104170440-ef53dca69a41 +replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20221113021630-95a755422ae8 // Switch to our fork that imports fixes from http://github.com/tailscale/ssh. // See: https://github.com/coder/coder/issues/3371 diff --git a/go.sum b/go.sum index e6525b0464a99..5a6e0d97912d3 100644 --- a/go.sum +++ b/go.sum @@ -355,8 +355,8 @@ github.com/coder/retry v1.3.0 h1:5lAAwt/2Cm6lVmnfBY7sOMXcBOwcwJhmV5QGSELIVWY= github.com/coder/retry v1.3.0/go.mod h1:tXuRgZgWjUnU5LZPT4lJh4ew2elUhexhlnXzrJWdyFY= github.com/coder/ssh v0.0.0-20220811105153-fcea99919338 h1:tN5GKFT68YLVzJoA8AHuiMNJ0qlhoD3pGN3JY9gxSko= github.com/coder/ssh v0.0.0-20220811105153-fcea99919338/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914= -github.com/coder/tailscale v1.1.1-0.20221104170440-ef53dca69a41 h1:/mjNjfUarvH8BdmvDVLvtIIENoe3PevqCyZQmAlILuw= -github.com/coder/tailscale v1.1.1-0.20221104170440-ef53dca69a41/go.mod h1:lkCb74eSJwxeNq8YwyILoHD5vtHktiZnTOxBxo3tbNc= +github.com/coder/tailscale v1.1.1-0.20221113021630-95a755422ae8 h1:09dWJOeJLuCQlcj0/Xmnz1ii2GIkdiSBKhVDMGE33iI= +github.com/coder/tailscale v1.1.1-0.20221113021630-95a755422ae8/go.mod h1:lkCb74eSJwxeNq8YwyILoHD5vtHktiZnTOxBxo3tbNc= github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE= github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU= github.com/containerd/aufs v0.0.0-20210316121734-20793ff83c97/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU= diff --git a/tailnet/conn.go b/tailnet/conn.go index 108a7cc5dda47..5a5c8e50f394d 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -2,6 +2,7 @@ package tailnet import ( "context" + "errors" "fmt" "io" "net" @@ -198,7 +199,7 @@ func NewConn(options *Options) (*Conn, error) { wireguardEngine: wireguardEngine, } wireguardEngine.SetStatusCallback(func(s *wgengine.Status, err error) { - server.logger.Info(context.Background(), "wireguard status", slog.F("status", s), slog.F("err", err)) + server.logger.Debug(context.Background(), "wireguard status", slog.F("status", s), slog.F("err", err)) if err != nil { return } @@ -217,7 +218,7 @@ func NewConn(options *Options) (*Conn, error) { server.sendNode() }) wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) { - server.logger.Info(context.Background(), "netinfo callback", slog.F("netinfo", ni)) + server.logger.Debug(context.Background(), "netinfo callback", slog.F("netinfo", ni)) // If the lastMutex is blocked, it's possible that // multiple NetInfo callbacks occur at the same time. // @@ -383,6 +384,9 @@ func (c *Conn) UpdateNodes(nodes []*Node) error { if c.isClosed() { return nil } + if errors.Is(err, wgengine.ErrNoChanges) { + return nil + } return xerrors.Errorf("reconfig: %w", err) } return nil @@ -395,9 +399,56 @@ func (c *Conn) Status() *ipnstate.Status { return sb.Status() } -// Ping sends a ping to the Wireguard engine. -func (c *Conn) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) { - c.wireguardEngine.Ping(ip, pingType, cb) +// Ping sends a Disco ping to the Wireguard engine. +func (c *Conn) Ping(ctx context.Context, ip netip.Addr) (time.Duration, error) { + errCh := make(chan error, 1) + durCh := make(chan time.Duration, 1) + go c.wireguardEngine.Ping(ip, tailcfg.PingDisco, func(pr *ipnstate.PingResult) { + if pr.Err != "" { + errCh <- xerrors.New(pr.Err) + return + } + durCh <- time.Duration(pr.LatencySeconds * float64(time.Second)) + }) + select { + case err := <-errCh: + return 0, err + case <-ctx.Done(): + return 0, ctx.Err() + case dur := <-durCh: + return dur, nil + } +} + +// AwaitReachable pings the provided IP continually until the +// address is reachable. It's the callers responsibility to provide +// a timeout, otherwise this function will block forever. +func (c *Conn) AwaitReachable(ctx context.Context, ip netip.Addr) bool { + ticker := time.NewTicker(time.Millisecond * 100) + defer ticker.Stop() + completedCtx, completed := context.WithCancel(ctx) + run := func() { + ctx, cancelFunc := context.WithTimeout(completedCtx, time.Second) + defer cancelFunc() + _, err := c.Ping(ctx, ip) + if err == nil { + completed() + } + } + go run() + defer completed() + for { + select { + case <-completedCtx.Done(): + return true + case <-ticker.C: + // Pings can take a while, so we can run multiple + // in parallel to return ASAP. + go run() + case <-ctx.Done(): + return false + } + } } // Closed is a channel that ends when the connection has @@ -466,7 +517,7 @@ func (c *Conn) sendNode() { } c.nodeSending = true go func() { - c.logger.Info(context.Background(), "sending node", slog.F("node", node)) + c.logger.Debug(context.Background(), "sending node", slog.F("node", node)) nodeCallback(node) c.lastMutex.Lock() c.nodeSending = false diff --git a/tailnet/conn_test.go b/tailnet/conn_test.go index bc3d5aec284af..a967a0772cdd8 100644 --- a/tailnet/conn_test.go +++ b/tailnet/conn_test.go @@ -62,14 +62,16 @@ func TestTailnet(t *testing.T) { err := w1.UpdateNodes([]*tailnet.Node{node}) require.NoError(t, err) }) - + require.True(t, w2.AwaitReachable(context.Background(), w1IP)) conn := make(chan struct{}) go func() { listener, err := w1.Listen("tcp", ":35565") assert.NoError(t, err) defer listener.Close() nc, err := listener.Accept() - assert.NoError(t, err) + if !assert.NoError(t, err) { + return + } _ = nc.Close() conn <- struct{}{} }() From 3146d46143c017ccac3b6b1aeceb5e4f6aac73d2 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 17:59:25 +0000 Subject: [PATCH 04/12] Add logging to Startupscript test --- agent/agent_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/agent_test.go b/agent/agent_test.go index c53438404a2fb..ebd52a47e6549 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -370,9 +370,11 @@ func TestAgent(t *testing.T) { require.Eventually(t, func() bool { content, err := os.ReadFile(tempPath) if err != nil { + t.Logf("read file %q: %s", tempPath, err) return false } if len(content) == 0 { + t.Logf("no content in %q", tempPath) return false } if runtime.GOOS == "windows" { From fbb1bdb25e29d6a974389cc3b8e83cc0e7c3ddf5 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 18:13:51 +0000 Subject: [PATCH 05/12] Add better logging --- agent/agent.go | 1 + agent/agent_test.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/agent.go b/agent/agent.go index d82008a5a0c5c..da0f97990724f 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -375,6 +375,7 @@ func (a *agent) runStartupScript(ctx context.Context, script string) error { return nil } + a.logger.Info(ctx, "running startup script", slog.F("script", script)) writer, err := os.OpenFile(filepath.Join(os.TempDir(), "coder-startup-script.log"), os.O_CREATE|os.O_RDWR, 0o600) if err != nil { return xerrors.Errorf("open startup script log file: %w", err) diff --git a/agent/agent_test.go b/agent/agent_test.go index 0d7f37cf8cc6d..f87c70ca3162d 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -363,7 +363,7 @@ func TestAgent(t *testing.T) { tempPath := filepath.Join(t.TempDir(), "content.txt") content := "somethingnice" setupAgent(t, codersdk.WorkspaceAgentMetadata{ - StartupScript: fmt.Sprintf("echo %s > %s", content, tempPath), + StartupScript: fmt.Sprintf("echo %s> %s", content, tempPath), }, 0) var gotContent string From 8c289e505eb311b4ccb6d0a7ddd27764a99f8703 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 18:31:50 +0000 Subject: [PATCH 06/12] Write startup script logs to fs dir --- agent/agent.go | 6 +++++- agent/agent_test.go | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/agent/agent.go b/agent/agent.go index da0f97990724f..b3edb7612c1d4 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -376,7 +376,11 @@ func (a *agent) runStartupScript(ctx context.Context, script string) error { } a.logger.Info(ctx, "running startup script", slog.F("script", script)) - writer, err := os.OpenFile(filepath.Join(os.TempDir(), "coder-startup-script.log"), os.O_CREATE|os.O_RDWR, 0o600) + tempDir, err := afero.TempDir(a.filesystem, "", "") + if err != nil { + return xerrors.Errorf("create temp dir: %w", err) + } + writer, err := a.filesystem.OpenFile(filepath.Join(tempDir, "coder-startup-script.log"), os.O_CREATE|os.O_RDWR, 0o600) if err != nil { return xerrors.Errorf("open startup script log file: %w", err) } diff --git a/agent/agent_test.go b/agent/agent_test.go index f87c70ca3162d..fac3ae273c9ac 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -681,6 +681,7 @@ func setupAgent(t *testing.T, metadata codersdk.WorkspaceAgentMetadata, ptyTimeo statsChan: statsCh, coordinator: coordinator, }, + Filesystem: afero.NewMemMapFs(), Logger: slogtest.Make(t, nil).Leveled(slog.LevelDebug), ReconnectingPTYTimeout: ptyTimeout, }) From a839dbb1384ba3f12f395b7f4d219eca5cdea2a8 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 13 Nov 2022 18:40:42 +0000 Subject: [PATCH 07/12] Fix startup script test --- agent/agent_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/agent_test.go b/agent/agent_test.go index fac3ae273c9ac..b9dc3f19039b1 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -673,6 +673,7 @@ func setupAgent(t *testing.T, metadata codersdk.WorkspaceAgentMetadata, ptyTimeo coordinator := tailnet.NewCoordinator() agentID := uuid.New() statsCh := make(chan *codersdk.AgentStats) + fs := afero.NewMemMapFs() closer := agent.New(agent.Options{ Client: &client{ t: t, @@ -681,7 +682,7 @@ func setupAgent(t *testing.T, metadata codersdk.WorkspaceAgentMetadata, ptyTimeo statsChan: statsCh, coordinator: coordinator, }, - Filesystem: afero.NewMemMapFs(), + Filesystem: fs, Logger: slogtest.Make(t, nil).Leveled(slog.LevelDebug), ReconnectingPTYTimeout: ptyTimeout, }) From 318f68ea6b8521b2875213ce0e77591a1800fe73 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 13 Nov 2022 19:11:22 +0000 Subject: [PATCH 08/12] Fix startup script test --- agent/agent.go | 1 - agent/agent_test.go | 27 ++++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index b3edb7612c1d4..b36949b365fe1 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -387,7 +387,6 @@ func (a *agent) runStartupScript(ctx context.Context, script string) error { defer func() { _ = writer.Close() }() - cmd, err := a.createCommand(ctx, script, nil) if err != nil { return xerrors.Errorf("create command: %w", err) diff --git a/agent/agent_test.go b/agent/agent_test.go index b9dc3f19039b1..277177dbf21d8 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -61,7 +61,7 @@ func TestAgent(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - conn, stats := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + conn, stats, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) sshClient, err := conn.SSHClient(ctx) require.NoError(t, err) @@ -81,7 +81,7 @@ func TestAgent(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - conn, stats := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + conn, stats, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) ptyConn, err := conn.ReconnectingPTY(ctx, uuid.NewString(), 128, 128, "/bin/bash") require.NoError(t, err) @@ -231,7 +231,7 @@ func TestAgent(t *testing.T) { if runtime.GOOS == "windows" { home = "/" + strings.ReplaceAll(home, "\\", "/") } - conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + conn, _, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) sshClient, err := conn.SSHClient(ctx) require.NoError(t, err) defer sshClient.Close() @@ -261,7 +261,7 @@ func TestAgent(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + conn, _, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) sshClient, err := conn.SSHClient(ctx) require.NoError(t, err) defer sshClient.Close() @@ -361,9 +361,9 @@ func TestAgent(t *testing.T) { t.Run("StartupScript", func(t *testing.T) { t.Parallel() tempPath := filepath.Join(t.TempDir(), "content.txt") - content := "somethingnice" + content := "hello" setupAgent(t, codersdk.WorkspaceAgentMetadata{ - StartupScript: fmt.Sprintf("echo %s> %s", content, tempPath), + StartupScript: "echo " + content + " > " + tempPath, }, 0) var gotContent string @@ -386,7 +386,7 @@ func TestAgent(t *testing.T) { } gotContent = string(content) return true - }, testutil.WaitMedium, testutil.IntervalMedium) + }, testutil.WaitShort, testutil.IntervalMedium) require.Equal(t, content, strings.TrimSpace(gotContent)) }) @@ -402,7 +402,7 @@ func TestAgent(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + conn, _, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) id := uuid.NewString() netConn, err := conn.ReconnectingPTY(ctx, id, 100, 100, "/bin/bash") require.NoError(t, err) @@ -499,7 +499,7 @@ func TestAgent(t *testing.T) { } }() - conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + conn, _, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) require.True(t, conn.AwaitReachable(context.Background())) conn1, err := conn.DialContext(context.Background(), l.Addr().Network(), l.Addr().String()) require.NoError(t, err) @@ -520,7 +520,7 @@ func TestAgent(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() derpMap := tailnettest.RunDERPAndSTUN(t) - conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{ + conn, _, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{ DERPMap: derpMap, }, 0) defer conn.Close() @@ -603,7 +603,7 @@ func TestAgent(t *testing.T) { } func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) *exec.Cmd { - agentConn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) + agentConn, _, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0) listener, err := net.Listen("tcp", "127.0.0.1:0") require.NoError(t, err) waitGroup := sync.WaitGroup{} @@ -646,7 +646,7 @@ func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) *exe func setupSSHSession(t *testing.T, options codersdk.WorkspaceAgentMetadata) *ssh.Session { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - conn, _ := setupAgent(t, options, 0) + conn, _, _ := setupAgent(t, options, 0) sshClient, err := conn.SSHClient(ctx) require.NoError(t, err) t.Cleanup(func() { @@ -666,6 +666,7 @@ func (c closeFunc) Close() error { func setupAgent(t *testing.T, metadata codersdk.WorkspaceAgentMetadata, ptyTimeout time.Duration) ( *codersdk.AgentConn, <-chan *codersdk.AgentStats, + afero.Fs, ) { if metadata.DERPMap == nil { metadata.DERPMap = tailnettest.RunDERPAndSTUN(t) @@ -708,7 +709,7 @@ func setupAgent(t *testing.T, metadata codersdk.WorkspaceAgentMetadata, ptyTimeo conn.SetNodeCallback(sendNode) return &codersdk.AgentConn{ Conn: conn, - }, statsCh + }, statsCh, fs } var dialTestPayload = []byte("dean-was-here123") From 774ae5b353904560fef7677f7e2af086b7627811 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 19:19:05 +0000 Subject: [PATCH 09/12] Reduce test timeout --- .github/workflows/coder.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/coder.yaml b/.github/workflows/coder.yaml index cb4ee05dea33e..d2bd0bf4a5dfa 100644 --- a/.github/workflows/coder.yaml +++ b/.github/workflows/coder.yaml @@ -336,11 +336,7 @@ jobs: echo ::set-output name=cover::false fi set -x - test_timeout=5m - if [[ "${{ matrix.os }}" == windows* ]]; then - test_timeout=10m - fi - gotestsum --junitfile="gotests.xml" --packages="./..." -- -parallel=8 -timeout=$test_timeout -short -failfast $COVERAGE_FLAGS + gotestsum --junitfile="gotests.xml" --packages="./..." -- -parallel=8 -timeout=3m -short -failfast $COVERAGE_FLAGS - uses: codecov/codecov-action@v3 # This action has a tendency to error out unexpectedly, it has From 0144c63e6c8ee01481be3a789c152fca02af86fe Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 19:47:24 +0000 Subject: [PATCH 10/12] Use central tmp dir in agent --- agent/agent.go | 12 +++++++----- agent/agent_test.go | 15 +++++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index b36949b365fe1..b81d0ca12fe36 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -56,6 +56,7 @@ const ( type Options struct { Filesystem afero.Fs + TempDir string ExchangeToken func(ctx context.Context) (string, error) Client Client ReconnectingPTYTimeout time.Duration @@ -78,6 +79,9 @@ func New(options Options) io.Closer { if options.Filesystem == nil { options.Filesystem = afero.NewOsFs() } + if options.TempDir == "" { + options.TempDir = os.TempDir() + } if options.ExchangeToken == nil { options.ExchangeToken = func(ctx context.Context) (string, error) { return "", nil @@ -93,6 +97,7 @@ func New(options Options) io.Closer { client: options.Client, exchangeToken: options.ExchangeToken, filesystem: options.Filesystem, + tempDir: options.TempDir, stats: &Stats{}, } server.init(ctx) @@ -104,6 +109,7 @@ type agent struct { client Client exchangeToken func(ctx context.Context) (string, error) filesystem afero.Fs + tempDir string reconnectingPTYs sync.Map reconnectingPTYTimeout time.Duration @@ -376,11 +382,7 @@ func (a *agent) runStartupScript(ctx context.Context, script string) error { } a.logger.Info(ctx, "running startup script", slog.F("script", script)) - tempDir, err := afero.TempDir(a.filesystem, "", "") - if err != nil { - return xerrors.Errorf("create temp dir: %w", err) - } - writer, err := a.filesystem.OpenFile(filepath.Join(tempDir, "coder-startup-script.log"), os.O_CREATE|os.O_RDWR, 0o600) + writer, err := a.filesystem.OpenFile(filepath.Join(a.tempDir, "coder-startup-script.log"), os.O_CREATE|os.O_RDWR, 0o600) if err != nil { return xerrors.Errorf("open startup script log file: %w", err) } diff --git a/agent/agent_test.go b/agent/agent_test.go index 277177dbf21d8..70acb77e51902 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -360,21 +360,20 @@ func TestAgent(t *testing.T) { t.Run("StartupScript", func(t *testing.T) { t.Parallel() - tempPath := filepath.Join(t.TempDir(), "content.txt") - content := "hello" - setupAgent(t, codersdk.WorkspaceAgentMetadata{ - StartupScript: "echo " + content + " > " + tempPath, + content := "some output" + _, _, fs := setupAgent(t, codersdk.WorkspaceAgentMetadata{ + StartupScript: "echo " + content, }, 0) - var gotContent string require.Eventually(t, func() bool { - content, err := os.ReadFile(tempPath) + outputPath := filepath.Join(os.TempDir(), "coder-startup-script.log") + content, err := afero.ReadFile(fs, outputPath) if err != nil { - t.Logf("read file %q: %s", tempPath, err) + t.Logf("read file %q: %s", outputPath, err) return false } if len(content) == 0 { - t.Logf("no content in %q", tempPath) + t.Logf("no content in %q", outputPath) return false } if runtime.GOOS == "windows" { From a46376232ba0dcef3b1999c49a71455e638d9f9b Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 20:04:33 +0000 Subject: [PATCH 11/12] Adjust output --- agent/agent_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/agent_test.go b/agent/agent_test.go index 70acb77e51902..564b29e967896 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -360,7 +360,7 @@ func TestAgent(t *testing.T) { t.Run("StartupScript", func(t *testing.T) { t.Parallel() - content := "some output" + content := "output" _, _, fs := setupAgent(t, codersdk.WorkspaceAgentMetadata{ StartupScript: "echo " + content, }, 0) From 5193feb1296ad8580edb7b4883993d3230c70233 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sun, 13 Nov 2022 20:13:20 +0000 Subject: [PATCH 12/12] Skip startup script test on Windows --- agent/agent_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agent/agent_test.go b/agent/agent_test.go index 564b29e967896..5f368dc74ef28 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -360,6 +360,9 @@ func TestAgent(t *testing.T) { t.Run("StartupScript", func(t *testing.T) { t.Parallel() + if runtime.GOOS == "windows" { + t.Skip("This test doesn't work on Windows for some reason...") + } content := "output" _, _, fs := setupAgent(t, codersdk.WorkspaceAgentMetadata{ StartupScript: "echo " + content,