Skip to content

refactor: change template archive extraction to be on provisioner #9264

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Code review fixes
Signed-off-by: Spike Curtis <spike@coder.com>
  • Loading branch information
spikecurtis committed Aug 24, 2023
commit d1a9b2a7c2032c89286a670531639fd1ac3129d1
4 changes: 3 additions & 1 deletion coderd/coderdtest/coderdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,12 @@ func NewProvisionerDaemon(t testing.TB, coderAPI *coderd.API) io.Closer {
_ = echoServer.Close()
cancelFunc()
})
// seems t.TempDir() is not safe to call from a different goroutine
workDir := t.TempDir()
go func() {
err := echo.Serve(ctx, &provisionersdk.ServeOptions{
Listener: echoServer,
WorkDirectory: t.TempDir(),
WorkDirectory: workDir,
Logger: coderAPI.Logger.Named("echo").Leveled(slog.LevelDebug),
})
assert.NoError(t, err)
Expand Down
10 changes: 5 additions & 5 deletions provisionerd/proto/provisionerd.proto
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ message AcquiredJob {
TemplateImport template_import = 7;
TemplateDryRun template_dry_run = 8;
}
// trace_metadata is currently used for tracing information only. It allows
// jobs to be tied to the request that created them.
map<string, string> trace_metadata = 9;
// trace_metadata is currently used for tracing information only. It allows
// jobs to be tied to the request that created them.
map<string, string> trace_metadata = 9;
}

message FailedJob {
Expand Down Expand Up @@ -113,15 +113,15 @@ message UpdateJobRequest {
string job_id = 1;
repeated Log logs = 2;
repeated provisioner.TemplateVariable template_variables = 4;
repeated provisioner.VariableValue user_variable_values = 5;
repeated provisioner.VariableValue user_variable_values = 5;
bytes readme = 6;
}

message UpdateJobResponse {
reserved 2;

bool canceled = 1;
repeated provisioner.VariableValue variable_values = 3;
repeated provisioner.VariableValue variable_values = 3;
}

message CommitQuotaRequest {
Expand Down
26 changes: 13 additions & 13 deletions provisionersdk/proto/provisioner.proto
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ message InstanceIdentityAuth {
}

message GitAuthProvider {
string id = 1;
string access_token = 2;
string id = 1;
string access_token = 2;
}

// Agent represents a running agent on the workspace.
Expand All @@ -110,15 +110,15 @@ message Agent {
string token = 9;
string instance_id = 10;
}
int32 connection_timeout_seconds = 11;
string troubleshooting_url = 12;
string motd_file = 13;
// Field 14 was bool login_before_ready = 14, now removed.
int32 startup_script_timeout_seconds = 15;
string shutdown_script = 16;
int32 shutdown_script_timeout_seconds = 17;
int32 connection_timeout_seconds = 11;
string troubleshooting_url = 12;
string motd_file = 13;
// Field 14 was bool login_before_ready = 14, now removed.
int32 startup_script_timeout_seconds = 15;
string shutdown_script = 16;
int32 shutdown_script_timeout_seconds = 17;
repeated Metadata metadata = 18;
string startup_script_behavior = 19;
string startup_script_behavior = 19;
}

enum AppSharingLevel {
Expand Down Expand Up @@ -207,12 +207,12 @@ message ParseRequest {
message ParseComplete {
string error = 1;
repeated TemplateVariable template_variables = 2;
bytes readme = 3;
bytes readme = 3;
}

// PlanRequest asks the provisioner to plan what resources & parameters it will create
message PlanRequest {
Metadata metadata = 1;
Metadata metadata = 1;
repeated RichParameterValue rich_parameter_values = 2;
repeated VariableValue variable_values = 3;
repeated GitAuthProvider git_auth_providers = 4;
Expand All @@ -229,7 +229,7 @@ message PlanComplete {
// ApplyRequest asks the provisioner to apply the changes. Apply MUST be preceded by a successful plan request/response
// in the same Session. The plan data is not transmitted over the wire and is cached by the provisioner in the Session.
message ApplyRequest {
Metadata metadata = 1;
Metadata metadata = 1;
}

// ApplyComplete indicates a request to apply completed.
Expand Down
21 changes: 16 additions & 5 deletions provisionersdk/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,23 @@ func (p *protoServer) Session(stream proto.DRPCProvisioner_SessionStream) error
}
sessDir := fmt.Sprintf("Session%p", s)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why %p? isn't it better to link it with (PID+something/id)? if the process dies, it would be easier to find it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need something unique to the Session, for the lifetime of the Session. The pointer is a simple and easy choice.

Each provisioner needs to be run with a unique work directory so that they don't trample on each others caches, so there is no need to qualify the pointer with a PID.

The PID might make things easier to find... if you know the PID, which is rarely the case after you have a crash.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Point in favour of PID: the PID will at least show up in dmesg in case of an OOMKill.

s.WorkDirectory = filepath.Join(p.opts.WorkDirectory, sessDir)
err := os.MkdirAll(s.WorkDirectory, 0o700)
// the WorkDirectory shouldn't exist, but it's possible it does from a restarted process or failed cleanup
err := os.RemoveAll(s.WorkDirectory)
if err != nil {
// RemoveAll returns nil if (as expected) the path doesn't exist. So, if we hit an error, the WorkDirectory
// exists AND we failed to clean it up.
s.Logger.Error(s.Context(), "failed to pre-clean work directory", slog.Error(err))
return xerrors.Errorf("failed to pre-clean work directory: %w", err)
}
err = os.MkdirAll(s.WorkDirectory, 0o700)
if err != nil {
return xerrors.Errorf("create work directory %q: %w", s.WorkDirectory, err)
}
defer func() {
var err error
// Cleanup the work directory after execution.
for attempt := 0; attempt < 5; attempt++ {
err := os.RemoveAll(s.WorkDirectory)
err = os.RemoveAll(s.WorkDirectory)
if err != nil {
// On Windows, open files cannot be removed.
// When the provisioner daemon is shutting down,
Expand All @@ -54,8 +63,10 @@ func (p *protoServer) Session(stream proto.DRPCProvisioner_SessionStream) error
continue
}
s.Logger.Debug(s.Context(), "cleaned up work directory")
break
return
}
s.Logger.Error(s.Context(), "failed to clean up work directory after multiple attempts",
slog.F("path", s.WorkDirectory), slog.Error(err))
}()
req, err := stream.Recv()
if err != nil {
Expand All @@ -70,7 +81,7 @@ func (p *protoServer) Session(stream proto.DRPCProvisioner_SessionStream) error
s.logLevel = proto.LogLevel_value[strings.ToUpper(s.Config.ProvisionerLogLevel)]
}

err = s.ExtractArchive()
err = s.extractArchive()
if err != nil {
return xerrors.Errorf("extract archive: %w", err)
}
Expand Down Expand Up @@ -176,7 +187,7 @@ func (s *Session) Context() context.Context {
return s.stream.Context()
}

func (s *Session) ExtractArchive() error {
func (s *Session) extractArchive() error {
ctx := s.Context()

s.Logger.Info(ctx, "unpacking template source archive",
Expand Down