From df574a8f1f16ddfae9cc548557dd53abd7547f2a Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 7 Jan 2025 16:45:31 +0000 Subject: [PATCH 1/6] fix(cli/ssh): retry on autostart conflict Fixes #13032 --- cli/ssh.go | 17 +++++++++----- cli/ssh_test.go | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/cli/ssh.go b/cli/ssh.go index 7df590946fd6b..7a1d5940bfd01 100644 --- a/cli/ssh.go +++ b/cli/ssh.go @@ -657,12 +657,19 @@ func getWorkspaceAndAgent(ctx context.Context, inv *serpent.Invocation, client * // workspaces with the active version. _, _ = fmt.Fprintf(inv.Stderr, "Workspace was stopped, starting workspace to allow connecting to %q...\n", workspace.Name) _, err = startWorkspace(inv, client, workspace, workspaceParameterFlags{}, buildFlags{}, WorkspaceStart) - if cerr, ok := codersdk.AsError(err); ok && cerr.StatusCode() == http.StatusForbidden { - _, err = startWorkspace(inv, client, workspace, workspaceParameterFlags{}, buildFlags{}, WorkspaceUpdate) - if err != nil { - return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.Errorf("start workspace with active template version: %w", err) + if cerr, ok := codersdk.AsError(err); ok { + switch cerr.StatusCode() { + case http.StatusConflict: + _, _ = fmt.Fprintln(inv.Stderr, "Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...") + return getWorkspaceAndAgent(ctx, inv, client, false, input) + + case http.StatusForbidden: + _, err = startWorkspace(inv, client, workspace, workspaceParameterFlags{}, buildFlags{}, WorkspaceUpdate) + if err != nil { + return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.Errorf("start workspace with active template version: %w", err) + } + _, _ = fmt.Fprintln(inv.Stdout, "Unable to start the workspace with template version from last build. Your workspace has been updated to the current active template version.") } - _, _ = fmt.Fprintln(inv.Stdout, "Unable to start the workspace with template version from last build. Your workspace has been updated to the current active template version.") } else if err != nil { return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.Errorf("start workspace with current template version: %w", err) } diff --git a/cli/ssh_test.go b/cli/ssh_test.go index 62feaf2b61e95..4533522cdb2fa 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -145,6 +145,65 @@ func TestSSH(t *testing.T) { pty.WriteLine("exit") <-cmdDone }) + t.Run("StartStoppedWorkspaceConflict", func(t *testing.T) { + t.Parallel() + + authToken := uuid.NewString() + ownerClient := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + owner := coderdtest.CreateFirstUser(t, ownerClient) + client, _ := coderdtest.CreateAnotherUser(t, ownerClient, owner.OrganizationID, rbac.RoleTemplateAdmin()) + version := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.PlanComplete, + ProvisionApply: echo.ProvisionApplyWithAgent(authToken), + }) + coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, owner.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, template.ID) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + // Stop the workspace + workspaceBuild := coderdtest.CreateWorkspaceBuild(t, client, workspace, database.WorkspaceTransitionStop) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspaceBuild.ID) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) + defer cancel() + + type proc struct { + stdout bytes.Buffer + pty *ptytest.PTY + err chan error + } + procs := make([]proc, 3) + for i := range procs { + // SSH to the workspace which should autostart it + inv, root := clitest.New(t, "ssh", workspace.Name) + + proc := proc{ + pty: ptytest.New(t).Attach(inv), + err: make(chan error, 1), + } + procs[i] = proc + inv.Stdout = io.MultiWriter(proc.pty.Output(), &proc.stdout) + clitest.SetupConfig(t, client, root) + clitest.StartWithAssert(t, inv, func(*testing.T, error) { + // Noop. + }) + } + + var foundConflict int + for _, proc := range procs { + // Either allow the command to start the workspace or fail + // due to conflict (race), in which case it retries. + match := proc.pty.ExpectRegexMatchContext(ctx, "(Waiting for the workspace agent to connect|Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...)") + if strings.Contains(match, "Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...") { + foundConflict++ + // It should retry without autostart. + proc.pty.ExpectMatchContext(ctx, "Waiting for the workspace agent to connect") + } + } + // TODO(mafredri): Remove this if it's racy. + require.Greater(t, foundConflict, 0, "expected at least one conflict") + }) t.Run("RequireActiveVersion", func(t *testing.T) { t.Parallel() From e5bee0b4c325e81d74d30090448faa4041709c44 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 7 Jan 2025 17:23:17 +0000 Subject: [PATCH 2/6] cleanup test --- cli/ssh_test.go | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/cli/ssh_test.go b/cli/ssh_test.go index 4533522cdb2fa..c2b86f93cb767 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -168,22 +168,12 @@ func TestSSH(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) defer cancel() - type proc struct { - stdout bytes.Buffer - pty *ptytest.PTY - err chan error - } - procs := make([]proc, 3) - for i := range procs { + ptys := make([]*ptytest.PTY, 3) + for i := range ptys { // SSH to the workspace which should autostart it inv, root := clitest.New(t, "ssh", workspace.Name) - proc := proc{ - pty: ptytest.New(t).Attach(inv), - err: make(chan error, 1), - } - procs[i] = proc - inv.Stdout = io.MultiWriter(proc.pty.Output(), &proc.stdout) + ptys[i] = ptytest.New(t).Attach(inv) clitest.SetupConfig(t, client, root) clitest.StartWithAssert(t, inv, func(*testing.T, error) { // Noop. @@ -191,14 +181,14 @@ func TestSSH(t *testing.T) { } var foundConflict int - for _, proc := range procs { + for _, pty := range ptys { // Either allow the command to start the workspace or fail // due to conflict (race), in which case it retries. - match := proc.pty.ExpectRegexMatchContext(ctx, "(Waiting for the workspace agent to connect|Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...)") + match := pty.ExpectRegexMatchContext(ctx, "(Waiting for the workspace agent to connect|Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...)") if strings.Contains(match, "Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...") { foundConflict++ // It should retry without autostart. - proc.pty.ExpectMatchContext(ctx, "Waiting for the workspace agent to connect") + pty.ExpectMatchContext(ctx, "Waiting for the workspace agent to connect") } } // TODO(mafredri): Remove this if it's racy. From 042bd427cc3454e4580f91c57592aa852447b1c1 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 8 Jan 2025 11:38:58 +0000 Subject: [PATCH 3/6] fix test racyness, add apimiddleware to coderdtest --- cli/ssh_test.go | 50 +++++++++++++++++++++++++++------ coderd/coderdtest/coderdtest.go | 15 +++++++++- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/cli/ssh_test.go b/cli/ssh_test.go index c2b86f93cb767..5d7c5ef4b2699 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -17,6 +17,7 @@ import ( "os/exec" "path" "path/filepath" + "regexp" "runtime" "strings" "testing" @@ -148,8 +149,33 @@ func TestSSH(t *testing.T) { t.Run("StartStoppedWorkspaceConflict", func(t *testing.T) { t.Parallel() + // Intercept builds to synchronize execution of the SSH command. + // The purpose here is to make sure all commands try to trigger + // a start build of the workspace. + isFirstBuild := true + buildURL := regexp.MustCompile("/api/v2/workspaces/.*/builds") + buildReq := make(chan struct{}) + buildResume := make(chan struct{}) + buildSyncMW := func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost && buildURL.MatchString(r.URL.Path) { + if !isFirstBuild { + t.Log("buildSyncMW: blocking build") + buildReq <- struct{}{} + <-buildResume + t.Log("buildSyncMW: resuming build") + } + isFirstBuild = false + } + next.ServeHTTP(w, r) + }) + } + authToken := uuid.NewString() - ownerClient := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + ownerClient := coderdtest.New(t, &coderdtest.Options{ + IncludeProvisionerDaemon: true, + APIMiddleware: buildSyncMW, + }) owner := coderdtest.CreateFirstUser(t, ownerClient) client, _ := coderdtest.CreateAnotherUser(t, ownerClient, owner.OrganizationID, rbac.RoleTemplateAdmin()) version := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, &echo.Responses{ @@ -165,21 +191,28 @@ func TestSSH(t *testing.T) { workspaceBuild := coderdtest.CreateWorkspaceBuild(t, client, workspace, database.WorkspaceTransitionStop) coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspaceBuild.ID) - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitMedium) defer cancel() - ptys := make([]*ptytest.PTY, 3) - for i := range ptys { + var ptys []*ptytest.PTY + for i := 0; i < 3; i++ { // SSH to the workspace which should autostart it inv, root := clitest.New(t, "ssh", workspace.Name) - ptys[i] = ptytest.New(t).Attach(inv) + pty := ptytest.New(t).Attach(inv) + ptys = append(ptys, pty) clitest.SetupConfig(t, client, root) - clitest.StartWithAssert(t, inv, func(*testing.T, error) { - // Noop. + testutil.Go(t, func() { + _ = inv.WithContext(ctx).Run() }) } + for _, pty := range ptys { + pty.ExpectMatchContext(ctx, "Workspace was stopped, starting workspace to allow connecting to") + testutil.RequireRecvCtx(ctx, t, buildReq) + } + close(buildResume) + var foundConflict int for _, pty := range ptys { // Either allow the command to start the workspace or fail @@ -191,8 +224,7 @@ func TestSSH(t *testing.T) { pty.ExpectMatchContext(ctx, "Waiting for the workspace agent to connect") } } - // TODO(mafredri): Remove this if it's racy. - require.Greater(t, foundConflict, 0, "expected at least one conflict") + require.Equal(t, foundConflict, 2, "expected 2 conflicts") }) t.Run("RequireActiveVersion", func(t *testing.T) { t.Parallel() diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index bd1ed740a7ce7..aa096707b8fb7 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -33,6 +33,7 @@ import ( "cloud.google.com/go/compute/metadata" "github.com/fullsailor/pkcs7" + "github.com/go-chi/chi/v5" "github.com/golang-jwt/jwt/v4" "github.com/google/uuid" "github.com/moby/moby/pkg/namesgenerator" @@ -146,6 +147,11 @@ type Options struct { Database database.Store Pubsub pubsub.Pubsub + // APIMiddleware inserts middleware before api.RootHandler, this can be + // useful in certain tests where you want to intercept requests before + // passing them on to the API, e.g. for synchronization of execution. + APIMiddleware func(http.Handler) http.Handler + ConfigSSH codersdk.SSHConfigResponse SwaggerEndpoint bool @@ -555,7 +561,14 @@ func NewWithAPI(t testing.TB, options *Options) (*codersdk.Client, io.Closer, *c setHandler, cancelFunc, serverURL, newOptions := NewOptions(t, options) // We set the handler after server creation for the access URL. coderAPI := coderd.New(newOptions) - setHandler(coderAPI.RootHandler) + rootHandler := coderAPI.RootHandler + if options.APIMiddleware != nil { + r := chi.NewRouter() + r.Use(options.APIMiddleware) + r.Mount("/", rootHandler) + rootHandler = r + } + setHandler(rootHandler) var provisionerCloser io.Closer = nopcloser{} if options.IncludeProvisionerDaemon { provisionerCloser = NewTaggedProvisionerDaemon(t, coderAPI, "test", options.ProvisionerDaemonTags) From 331aa872d9889dd250739c4b9705b59a9d4f8a86 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 8 Jan 2025 12:21:36 +0000 Subject: [PATCH 4/6] race --- cli/ssh_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/ssh_test.go b/cli/ssh_test.go index 5d7c5ef4b2699..00ab385f5c82c 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -164,8 +164,9 @@ func TestSSH(t *testing.T) { buildReq <- struct{}{} <-buildResume t.Log("buildSyncMW: resuming build") + } else { + isFirstBuild = false } - isFirstBuild = false } next.ServeHTTP(w, r) }) From 8fe33b8beb878ef516743426d01c03ba90a7336d Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 8 Jan 2025 12:34:47 +0000 Subject: [PATCH 5/6] try to fix test --- cli/ssh_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/ssh_test.go b/cli/ssh_test.go index 00ab385f5c82c..bd107852251f7 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -210,6 +210,8 @@ func TestSSH(t *testing.T) { for _, pty := range ptys { pty.ExpectMatchContext(ctx, "Workspace was stopped, starting workspace to allow connecting to") + } + for range ptys { testutil.RequireRecvCtx(ctx, t, buildReq) } close(buildResume) @@ -218,14 +220,12 @@ func TestSSH(t *testing.T) { for _, pty := range ptys { // Either allow the command to start the workspace or fail // due to conflict (race), in which case it retries. - match := pty.ExpectRegexMatchContext(ctx, "(Waiting for the workspace agent to connect|Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...)") + match := pty.ExpectRegexMatchContext(ctx, "Waiting for the workspace agent to connect") if strings.Contains(match, "Unable to start the workspace due to conflict, the workspace may be starting, retrying without autostart...") { foundConflict++ - // It should retry without autostart. - pty.ExpectMatchContext(ctx, "Waiting for the workspace agent to connect") } } - require.Equal(t, foundConflict, 2, "expected 2 conflicts") + require.Equal(t, 2, foundConflict, "expected 2 conflicts") }) t.Run("RequireActiveVersion", func(t *testing.T) { t.Parallel() From 7a703f0bcfa1db996b18cc3f17c133a43d4911be Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 8 Jan 2025 13:00:31 +0000 Subject: [PATCH 6/6] handle unique violation in wsbuilder --- coderd/wsbuilder/wsbuilder.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index 2123322356a3c..3d757f4c5590b 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -381,6 +381,10 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object code := http.StatusInternalServerError if rbac.IsUnauthorizedError(err) { code = http.StatusForbidden + } else if database.IsUniqueViolation(err) { + // Concurrent builds may result in duplicate + // workspace_builds_workspace_id_build_number_key. + code = http.StatusConflict } return BuildError{code, "insert workspace build", err} }