From 4142fb3576ef2899efdd8a5eba024be95556779a Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Wed, 1 May 2024 13:00:12 -0500 Subject: [PATCH 1/3] chore: shutdown provisioner should stop waiting on client --- enterprise/cli/provisionerdaemons.go | 6 ++++++ provisionerd/provisionerd.go | 3 +++ 2 files changed, 9 insertions(+) diff --git a/enterprise/cli/provisionerdaemons.go b/enterprise/cli/provisionerdaemons.go index 0b0548cfd0c72..079b1891346eb 100644 --- a/enterprise/cli/provisionerdaemons.go +++ b/enterprise/cli/provisionerdaemons.go @@ -239,6 +239,12 @@ func (r *RootCmd) provisionerDaemonStart() *serpent.Command { return xerrors.Errorf("shutdown: %w", err) } + // Shutdown does not call close. Must call it manually. + err = srv.Close() + if err != nil { + return xerrors.Errorf("close server: %w", err) + } + cancel() if xerrors.Is(exitErr, context.Canceled) { return nil diff --git a/provisionerd/provisionerd.go b/provisionerd/provisionerd.go index 3e49648700f2f..deac80466b48d 100644 --- a/provisionerd/provisionerd.go +++ b/provisionerd/provisionerd.go @@ -236,6 +236,9 @@ func (p *Server) client() (proto.DRPCProvisionerDaemonClient, bool) { select { case <-p.closeContext.Done(): return nil, false + case <-p.shuttingDownCh: + // Shutting down should return a nil client and unblock + return nil, false case client := <-p.clientCh: return client, true } From 52b0e83a9eee831b2817eef6d5842c9f0f771c87 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 2 May 2024 09:24:41 -0500 Subject: [PATCH 2/3] chore: add unit test that replicates failed client conn This fails on `main` --- provisionerd/provisionerd_test.go | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/provisionerd/provisionerd_test.go b/provisionerd/provisionerd_test.go index 2031fa6c3939e..68b3afc26ca10 100644 --- a/provisionerd/provisionerd_test.go +++ b/provisionerd/provisionerd_test.go @@ -597,6 +597,50 @@ func TestProvisionerd(t *testing.T) { assert.True(t, didFail.Load(), "should fail the job") }) + // When there is no coderd, the connect loop will never succeed. + t.Run("ShutdownNoCoderd", func(t *testing.T) { + t.Parallel() + done := make(chan struct{}) + t.Cleanup(func() { + close(done) + }) + + connectAttemptedClose := sync.Once{} + connectAttempted := make(chan struct{}) + server := createProvisionerd(t, func(ctx context.Context) (proto.DRPCProvisionerDaemonClient, error) { + connectAttemptedClose.Do(func() { close(connectAttempted) }) + return nil, fmt.Errorf("client connection always fails") + }, provisionerd.LocalProvisioners{ + "someprovisioner": createProvisionerClient(t, done, provisionerTestServer{ + plan: func( + s *provisionersdk.Session, + _ *sdkproto.PlanRequest, + canceledOrComplete <-chan struct{}, + ) *sdkproto.PlanComplete { + s.ProvisionLog(sdkproto.LogLevel_DEBUG, "in progress") + <-canceledOrComplete + return &sdkproto.PlanComplete{ + Error: "some error", + } + }, + apply: func( + _ *provisionersdk.Session, + _ *sdkproto.ApplyRequest, + _ <-chan struct{}, + ) *sdkproto.ApplyComplete { + t.Error("should never apply") + return &sdkproto.ApplyComplete{} + }, + }), + }) + + // At least 1 attempt to connect + require.Condition(t, closedWithin(connectAttempted, testutil.WaitShort)) + err := server.Shutdown(context.Background(), true) + require.NoError(t, err) + require.NoError(t, server.Close()) + }) + t.Run("Shutdown", func(t *testing.T) { t.Parallel() done := make(chan struct{}) From 418c4ddfc8c458c923a5d0495adb54aa5ea1b090 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 2 May 2024 09:29:03 -0500 Subject: [PATCH 3/3] cleanup test --- provisionerd/provisionerd_test.go | 38 +++++++++++-------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/provisionerd/provisionerd_test.go b/provisionerd/provisionerd_test.go index 68b3afc26ca10..bca072707f491 100644 --- a/provisionerd/provisionerd_test.go +++ b/provisionerd/provisionerd_test.go @@ -597,7 +597,8 @@ func TestProvisionerd(t *testing.T) { assert.True(t, didFail.Load(), "should fail the job") }) - // When there is no coderd, the connect loop will never succeed. + // Simulates when there is no coderd to connect to. So the client connection + // will never be established. t.Run("ShutdownNoCoderd", func(t *testing.T) { t.Parallel() done := make(chan struct{}) @@ -608,36 +609,23 @@ func TestProvisionerd(t *testing.T) { connectAttemptedClose := sync.Once{} connectAttempted := make(chan struct{}) server := createProvisionerd(t, func(ctx context.Context) (proto.DRPCProvisionerDaemonClient, error) { + // This is the dial out to Coderd, which in this unit test will always fail. connectAttemptedClose.Do(func() { close(connectAttempted) }) return nil, fmt.Errorf("client connection always fails") }, provisionerd.LocalProvisioners{ - "someprovisioner": createProvisionerClient(t, done, provisionerTestServer{ - plan: func( - s *provisionersdk.Session, - _ *sdkproto.PlanRequest, - canceledOrComplete <-chan struct{}, - ) *sdkproto.PlanComplete { - s.ProvisionLog(sdkproto.LogLevel_DEBUG, "in progress") - <-canceledOrComplete - return &sdkproto.PlanComplete{ - Error: "some error", - } - }, - apply: func( - _ *provisionersdk.Session, - _ *sdkproto.ApplyRequest, - _ <-chan struct{}, - ) *sdkproto.ApplyComplete { - t.Error("should never apply") - return &sdkproto.ApplyComplete{} - }, - }), + "someprovisioner": createProvisionerClient(t, done, provisionerTestServer{}), }) - // At least 1 attempt to connect + // Wait for at least 1 attempt to connect to ensure the connect go routine + // is running. require.Condition(t, closedWithin(connectAttempted, testutil.WaitShort)) - err := server.Shutdown(context.Background(), true) - require.NoError(t, err) + + // The test is ensuring this Shutdown call does not block indefinitely. + // If it does, the context will return with an error, and the test will + // fail. + shutdownCtx := testutil.Context(t, testutil.WaitShort) + err := server.Shutdown(shutdownCtx, true) + require.NoError(t, err, "shutdown did not unblock. Failed to close the server gracefully.") require.NoError(t, server.Close()) })