Skip to content

Commit c55793c

Browse files
committed
feat: Add graceful exits to provisionerd
Terraform (or other provisioners) may need to cleanup state, or cancel actions before exit. This adds the ability to gracefully exit provisionerd.
1 parent cb1c883 commit c55793c

File tree

13 files changed

+1092
-537
lines changed

13 files changed

+1092
-537
lines changed

coderd/provisionerdaemons.go

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,8 @@ func (server *provisionerdServer) UpdateJob(ctx context.Context, request *proto.
404404
return &proto.UpdateJobResponse{}, nil
405405
}
406406

407-
func (server *provisionerdServer) CancelJob(ctx context.Context, cancelJob *proto.CancelledJob) (*proto.Empty, error) {
408-
jobID, err := uuid.Parse(cancelJob.JobId)
407+
func (server *provisionerdServer) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.Empty, error) {
408+
jobID, err := uuid.Parse(failJob.JobId)
409409
if err != nil {
410410
return nil, xerrors.Errorf("parse job id: %w", err)
411411
}
@@ -422,19 +422,34 @@ func (server *provisionerdServer) CancelJob(ctx context.Context, cancelJob *prot
422422
Time: database.Now(),
423423
Valid: true,
424424
},
425-
CancelledAt: sql.NullTime{
426-
Time: database.Now(),
427-
Valid: true,
428-
},
429425
UpdatedAt: database.Now(),
430426
Error: sql.NullString{
431-
String: cancelJob.Error,
432-
Valid: cancelJob.Error != "",
427+
String: failJob.Error,
428+
Valid: failJob.Error != "",
433429
},
434430
})
435431
if err != nil {
436432
return nil, xerrors.Errorf("update provisioner job: %w", err)
437433
}
434+
switch jobType := failJob.Type.(type) {
435+
case *proto.FailedJob_WorkspaceProvision_:
436+
if jobType.WorkspaceProvision.State == nil {
437+
break
438+
}
439+
var input workspaceProvisionJob
440+
err = json.Unmarshal(job.Input, &input)
441+
if err != nil {
442+
return nil, xerrors.Errorf("unmarshal workspace provision input: %w", err)
443+
}
444+
err = server.Database.UpdateWorkspaceHistoryByID(ctx, database.UpdateWorkspaceHistoryByIDParams{
445+
ID: jobID,
446+
UpdatedAt: database.Now(),
447+
ProvisionerState: jobType.WorkspaceProvision.State,
448+
})
449+
if err != nil {
450+
return nil, xerrors.Errorf("update workspace history state: %w", err)
451+
}
452+
}
438453
return &proto.Empty{}, nil
439454
}
440455

coderd/provisionerjobs.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,18 +276,18 @@ func convertProvisionerJob(provisionerJob database.ProvisionerJob) ProvisionerJo
276276
case !provisionerJob.StartedAt.Valid:
277277
job.Status = ProvisionerJobStatusPending
278278
case provisionerJob.CompletedAt.Valid:
279-
job.Status = ProvisionerJobStatusSucceeded
279+
if job.Error == "" {
280+
job.Status = ProvisionerJobStatusSucceeded
281+
} else {
282+
job.Status = ProvisionerJobStatusFailed
283+
}
280284
case database.Now().Sub(provisionerJob.UpdatedAt) > 30*time.Second:
281285
job.Status = ProvisionerJobStatusFailed
282286
job.Error = "Worker failed to update job in time."
283287
default:
284288
job.Status = ProvisionerJobStatusRunning
285289
}
286290

287-
if !provisionerJob.CancelledAt.Valid && job.Error != "" {
288-
job.Status = ProvisionerJobStatusFailed
289-
}
290-
291291
return job
292292
}
293293

provisioner/echo/serve.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ func (*echo) Provision(request *proto.Provision_Request, stream proto.DRPCProvis
102102
return stream.Context().Err()
103103
}
104104

105+
func (*echo) Shutdown(_ context.Context, _ *proto.Empty) (*proto.Empty, error) {
106+
return &proto.Empty{}, nil
107+
}
108+
105109
type Responses struct {
106110
Parse []*proto.Parse_Response
107111
Provision []*proto.Provision_Response

provisioner/terraform/provision.go

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"io"
99
"os"
10+
"os/exec"
1011
"path/filepath"
1112
"reflect"
1213
"strings"
@@ -253,25 +254,21 @@ func (t *terraform) runTerraformPlan(ctx context.Context, terraform *tfexec.Terr
253254
}
254255

255256
func (t *terraform) runTerraformApply(ctx context.Context, terraform *tfexec.Terraform, request *proto.Provision_Request, stream proto.DRPCProvisioner_ProvisionStream, statefilePath string) error {
256-
env := map[string]string{
257-
"CODER_URL": request.Metadata.CoderUrl,
258-
"CODER_WORKSPACE_TRANSITION": strings.ToLower(request.Metadata.WorkspaceTransition.String()),
257+
env := []string{
258+
"CODER_URL=" + request.Metadata.CoderUrl,
259+
"CODER_WORKSPACE_TRANSITION=" + strings.ToLower(request.Metadata.WorkspaceTransition.String()),
259260
}
260-
options := []tfexec.ApplyOption{tfexec.JSON(true)}
261+
vars := []string{}
261262
for _, param := range request.ParameterValues {
262263
switch param.DestinationScheme {
263264
case proto.ParameterDestination_ENVIRONMENT_VARIABLE:
264-
env[param.Name] = param.Value
265+
env = append(env, fmt.Sprintf("%s=%s", param.Name, param.Value))
265266
case proto.ParameterDestination_PROVISIONER_VARIABLE:
266-
options = append(options, tfexec.Var(fmt.Sprintf("%s=%s", param.Name, param.Value)))
267+
vars = append(vars, fmt.Sprintf("%s=%s", param.Name, param.Value))
267268
default:
268269
return xerrors.Errorf("unsupported parameter type %q for %q", param.DestinationScheme, param.Name)
269270
}
270271
}
271-
err := terraform.SetEnv(env)
272-
if err != nil {
273-
return xerrors.Errorf("apply environment variables: %w", err)
274-
}
275272

276273
reader, writer := io.Pipe()
277274
defer reader.Close()
@@ -319,11 +316,24 @@ func (t *terraform) runTerraformApply(ctx context.Context, terraform *tfexec.Ter
319316
}
320317
}()
321318

322-
terraform.SetStdout(writer)
323-
t.logger.Debug(ctx, "running apply", slog.F("options", options))
324-
err = terraform.Apply(ctx, options...)
319+
t.logger.Debug(ctx, "running apply", slog.F("vars", len(vars)), slog.F("env", len(env)))
320+
err := runApplyCommand(ctx, t.shutdownCtx, terraform.ExecPath(), terraform.WorkingDir(), writer, env, vars)
325321
if err != nil {
326-
return xerrors.Errorf("apply terraform: %w", err)
322+
errorMessage := err.Error()
323+
// Terraform can fail and apply and still need to store it's state.
324+
// In this case, we return Complete with an explicit error message.
325+
statefileContent, err := os.ReadFile(statefilePath)
326+
if err != nil {
327+
return xerrors.Errorf("read file %q: %w", statefilePath, err)
328+
}
329+
return stream.Send(&proto.Provision_Response{
330+
Type: &proto.Provision_Response_Complete{
331+
Complete: &proto.Provision_Complete{
332+
State: statefileContent,
333+
Error: errorMessage,
334+
},
335+
},
336+
})
327337
}
328338
t.logger.Debug(ctx, "ran apply")
329339

@@ -428,6 +438,35 @@ func (t *terraform) runTerraformApply(ctx context.Context, terraform *tfexec.Ter
428438
})
429439
}
430440

441+
// This couldn't use terraform-exec, because it doesn't support cancellation, and there didn't appear
442+
// to be a straight-forward way to add it.
443+
func runApplyCommand(ctx, shutdownCtx context.Context, bin, dir string, stdout io.Writer, env, vars []string) error {
444+
args := []string{
445+
"apply",
446+
"-no-color",
447+
"-auto-approve",
448+
"-input=false",
449+
"-json",
450+
"-refresh=true",
451+
}
452+
for _, variable := range vars {
453+
args = append(args, "-var", variable)
454+
}
455+
cmd := exec.CommandContext(ctx, bin, args...)
456+
go func() {
457+
select {
458+
case <-ctx.Done():
459+
return
460+
case <-shutdownCtx.Done():
461+
_ = cmd.Process.Signal(os.Kill)
462+
}
463+
}()
464+
cmd.Stdout = stdout
465+
cmd.Env = env
466+
cmd.Dir = dir
467+
return cmd.Run()
468+
}
469+
431470
type terraformProvisionLog struct {
432471
Level string `json:"@level"`
433472
Message string `json:"@message"`

provisioner/terraform/serve.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"cdr.dev/slog"
1111

1212
"github.com/coder/coder/provisionersdk"
13+
"github.com/coder/coder/provisionersdk/proto"
1314
)
1415

1516
var (
@@ -43,14 +44,25 @@ func Serve(ctx context.Context, options *ServeOptions) error {
4344
}
4445
options.BinaryPath = binaryPath
4546
}
46-
47+
shutdownCtx, shutdownCancel := context.WithCancel(ctx)
4748
return provisionersdk.Serve(ctx, &terraform{
48-
binaryPath: options.BinaryPath,
49-
logger: options.Logger,
49+
binaryPath: options.BinaryPath,
50+
logger: options.Logger,
51+
shutdownCtx: shutdownCtx,
52+
shutdownCancel: shutdownCancel,
5053
}, options.ServeOptions)
5154
}
5255

5356
type terraform struct {
5457
binaryPath string
5558
logger slog.Logger
59+
60+
shutdownCtx context.Context
61+
shutdownCancel context.CancelFunc
62+
}
63+
64+
// Shutdown signals to begin graceful shutdown of any running operations.
65+
func (t *terraform) Shutdown(ctx context.Context, _ *proto.Empty) (*proto.Empty, error) {
66+
t.shutdownCancel()
67+
return &proto.Empty{}, nil
5668
}

0 commit comments

Comments
 (0)