From 249218b383c6315399f6356e09b9acde81c51f1e Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 13 Oct 2023 15:21:11 +0000 Subject: [PATCH 1/5] fix(coderd/provisionerdserver): avoid logging on potentially canceled context --- coderd/provisionerdserver/provisionerdserver.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index dd8bed7fef1b5..bd9430e70e6c9 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1190,7 +1190,12 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) // have a shutdown signal we can listen to. <-wait if err := s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspaceBuild.WorkspaceID), []byte{}); err != nil { - s.Logger.Error(ctx, "workspace notification after agent timeout failed", + // If the publish failed due to the context being canceled, there's nothing more for us + // to do here. + if errors.Is(err, context.Canceled) { + return + } + s.Logger.Error(context.Background(), "workspace notification after agent timeout failed", slog.F("workspace_build_id", workspaceBuild.ID), slog.Error(err), ) From ac5a8d356689d6560401b372b9717f3f1389982c Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 16 Oct 2023 10:58:33 +0000 Subject: [PATCH 2/5] fix(coderd): pass server ctx into provisionerd server --- coderd/coderd.go | 1 + .../provisionerdserver/provisionerdserver.go | 33 ++++++++++++------- .../provisionerdserver_test.go | 1 + enterprise/coderd/provisionerdaemons.go | 1 + 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index 780386c4a5db1..174685a8137d1 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -1108,6 +1108,7 @@ func (api *API) CreateInMemoryProvisionerDaemon(ctx context.Context) (client pro logger := api.Logger.Named(fmt.Sprintf("inmem-provisionerd-%s", name)) logger.Info(ctx, "starting in-memory provisioner daemon") srv, err := provisionerdserver.NewServer( + ctx, api.AccessURL, uuid.New(), logger, diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index bd9430e70e6c9..2b5250c1e487d 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -58,6 +58,7 @@ type Options struct { } type server struct { + ctx context.Context AccessURL *url.URL ID uuid.UUID Logger slog.Logger @@ -107,6 +108,7 @@ func (t Tags) Valid() error { } func NewServer( + ctx context.Context, accessURL *url.URL, id uuid.UUID, logger slog.Logger, @@ -125,6 +127,9 @@ func NewServer( options Options, ) (proto.DRPCProvisionerDaemonServer, error) { // Panic early if pointers are nil + if ctx == nil { + return nil, xerrors.New("ctx is nil") + } if quotaCommitter == nil { return nil, xerrors.New("quotaCommitter is nil") } @@ -153,6 +158,7 @@ func NewServer( options.AcquireJobLongPollDur = DefaultAcquireJobLongPollDur } return &server{ + ctx: ctx, AccessURL: accessURL, ID: id, Logger: logger, @@ -1184,21 +1190,24 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) } go func() { for _, wait := range updates { - // Wait for the next potential timeout to occur. Note that we - // can't listen on the context here because we will hang around - // after this function has returned. The s also doesn't - // have a shutdown signal we can listen to. - <-wait - if err := s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspaceBuild.WorkspaceID), []byte{}); err != nil { - // If the publish failed due to the context being canceled, there's nothing more for us - // to do here. - if errors.Is(err, context.Canceled) { - return - } - s.Logger.Error(context.Background(), "workspace notification after agent timeout failed", + select { + case <-s.ctx.Done(): + // If the server is shutting down, we don't want to wait around. + s.Logger.Warn(context.Background(), "stopping notifications due to server shutdown", slog.F("workspace_build_id", workspaceBuild.ID), slog.Error(err), ) + case <-wait: + // Wait for the next potential timeout to occur. Note that we + // can't listen on the context here because we will hang around + // after this function has returned. The s also doesn't + // have a shutdown signal we can listen to. + if err := s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspaceBuild.WorkspaceID), []byte{}); err != nil { + s.Logger.Error(context.Background(), "workspace notification after agent timeout failed", + slog.F("workspace_build_id", workspaceBuild.ID), + slog.Error(err), + ) + } } } }() diff --git a/coderd/provisionerdserver/provisionerdserver_test.go b/coderd/provisionerdserver/provisionerdserver_test.go index 34f3b8377c5d1..db97724c72987 100644 --- a/coderd/provisionerdserver/provisionerdserver_test.go +++ b/coderd/provisionerdserver/provisionerdserver_test.go @@ -1733,6 +1733,7 @@ func setup(t *testing.T, ignoreLogErrors bool, ov *overrides) (proto.DRPCProvisi } srv, err := provisionerdserver.NewServer( + ctx, &url.URL{}, srvID, slogtest.Make(t, &slogtest.Options{IgnoreErrors: ignoreLogErrors}), diff --git a/enterprise/coderd/provisionerdaemons.go b/enterprise/coderd/provisionerdaemons.go index c74a439e2db87..70f59f40308f0 100644 --- a/enterprise/coderd/provisionerdaemons.go +++ b/enterprise/coderd/provisionerdaemons.go @@ -243,6 +243,7 @@ func (api *API) provisionerDaemonServe(rw http.ResponseWriter, r *http.Request) logger := api.Logger.Named(fmt.Sprintf("ext-provisionerd-%s", name)) logger.Info(ctx, "starting external provisioner daemon") srv, err := provisionerdserver.NewServer( + api.ctx, api.AccessURL, uuid.New(), logger, From 17cece2296c7cfb6d7d2d2c86c6d1a0e34e808bc Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 16 Oct 2023 11:01:14 +0000 Subject: [PATCH 3/5] fixup! fix(coderd): pass server ctx into provisionerd server --- coderd/provisionerdserver/provisionerdserver.go | 1 + 1 file changed, 1 insertion(+) diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 2b5250c1e487d..be45f6d26dbbb 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1197,6 +1197,7 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) slog.F("workspace_build_id", workspaceBuild.ID), slog.Error(err), ) + return case <-wait: // Wait for the next potential timeout to occur. Note that we // can't listen on the context here because we will hang around From 1d58f8b1d25a38390f33221ffa35f47ee208c10a Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 16 Oct 2023 11:43:27 +0000 Subject: [PATCH 4/5] address PR nits --- coderd/provisionerdserver/provisionerdserver.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index be45f6d26dbbb..34ebe1e326372 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -58,7 +58,10 @@ type Options struct { } type server struct { - ctx context.Context + // lifecycleCtx must be tied to the API server's lifecycle + // as when the API server shuts down, we want to cancel any + // long-running operations. + lifecycleCtx context.Context AccessURL *url.URL ID uuid.UUID Logger slog.Logger @@ -108,7 +111,7 @@ func (t Tags) Valid() error { } func NewServer( - ctx context.Context, + lifecycleCtx context.Context, accessURL *url.URL, id uuid.UUID, logger slog.Logger, @@ -126,8 +129,8 @@ func NewServer( deploymentValues *codersdk.DeploymentValues, options Options, ) (proto.DRPCProvisionerDaemonServer, error) { - // Panic early if pointers are nil - if ctx == nil { + // Fail-fast if pointers are nil + if lifecycleCtx == nil { return nil, xerrors.New("ctx is nil") } if quotaCommitter == nil { @@ -158,7 +161,7 @@ func NewServer( options.AcquireJobLongPollDur = DefaultAcquireJobLongPollDur } return &server{ - ctx: ctx, + lifecycleCtx: lifecycleCtx, AccessURL: accessURL, ID: id, Logger: logger, @@ -1191,7 +1194,7 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) go func() { for _, wait := range updates { select { - case <-s.ctx.Done(): + case <-s.lifecycleCtx.Done(): // If the server is shutting down, we don't want to wait around. s.Logger.Warn(context.Background(), "stopping notifications due to server shutdown", slog.F("workspace_build_id", workspaceBuild.ID), From 9cba8b1fda4dea2f4dda54a07c0f61597a966c20 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 16 Oct 2023 12:04:56 +0000 Subject: [PATCH 5/5] address PR comments --- coderd/coderd.go | 2 +- coderd/provisionerdserver/provisionerdserver.go | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index 174685a8137d1..f301265cc5ad7 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -1108,7 +1108,7 @@ func (api *API) CreateInMemoryProvisionerDaemon(ctx context.Context) (client pro logger := api.Logger.Named(fmt.Sprintf("inmem-provisionerd-%s", name)) logger.Info(ctx, "starting in-memory provisioner daemon") srv, err := provisionerdserver.NewServer( - ctx, + api.ctx, api.AccessURL, uuid.New(), logger, diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 34ebe1e326372..5afb85565c50b 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1196,18 +1196,14 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) select { case <-s.lifecycleCtx.Done(): // If the server is shutting down, we don't want to wait around. - s.Logger.Warn(context.Background(), "stopping notifications due to server shutdown", + s.Logger.Debug(ctx, "stopping notifications due to server shutdown", slog.F("workspace_build_id", workspaceBuild.ID), - slog.Error(err), ) return case <-wait: - // Wait for the next potential timeout to occur. Note that we - // can't listen on the context here because we will hang around - // after this function has returned. The s also doesn't - // have a shutdown signal we can listen to. + // Wait for the next potential timeout to occur. if err := s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspaceBuild.WorkspaceID), []byte{}); err != nil { - s.Logger.Error(context.Background(), "workspace notification after agent timeout failed", + s.Logger.Error(ctx, "workspace notification after agent timeout failed", slog.F("workspace_build_id", workspaceBuild.ID), slog.Error(err), )