Skip to content

Commit b34ee64

Browse files
committed
feat: pass through signals to inner container
1 parent 8e68914 commit b34ee64

File tree

7 files changed

+268
-63
lines changed

7 files changed

+268
-63
lines changed

cli/docker.go

Lines changed: 129 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ import (
77
"io"
88
"net/url"
99
"os"
10+
"os/exec"
11+
"os/signal"
1012
"path"
1113
"path/filepath"
1214
"sort"
1315
"strconv"
1416
"strings"
17+
"syscall"
18+
"time"
1519

1620
"github.com/docker/docker/api/types/container"
1721
"github.com/google/go-containerregistry/pkg/name"
@@ -157,11 +161,26 @@ func dockerCmd() *cobra.Command {
157161
Short: "Create a docker-based CVM",
158162
RunE: func(cmd *cobra.Command, args []string) (err error) {
159163
var (
160-
ctx = cmd.Context()
161-
log = slog.Make(slogjson.Sink(cmd.ErrOrStderr()), slogkubeterminate.Make()).Leveled(slog.LevelDebug)
162-
blog buildlog.Logger = buildlog.JSONLogger{Encoder: json.NewEncoder(os.Stderr)}
164+
ctx, cancel = context.WithCancel(cmd.Context()) //nolint
165+
log = slog.Make(slogjson.Sink(cmd.ErrOrStderr()), slogkubeterminate.Make()).Leveled(slog.LevelDebug)
166+
blog buildlog.Logger = buildlog.JSONLogger{Encoder: json.NewEncoder(os.Stderr)}
163167
)
164168

169+
// We technically leak a context here, but it's impact is negligible.
170+
signalCtx, signalCancel := context.WithCancel(cmd.Context())
171+
sigs := make(chan os.Signal, 1)
172+
signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT, syscall.SIGWINCH)
173+
174+
// Spawn a goroutine to wait for a signal.
175+
go func() {
176+
defer signalCancel()
177+
log.Info(ctx, "waiting for signal")
178+
<-sigs
179+
log.Info(ctx, "got signal, canceling context")
180+
}()
181+
182+
cmd.SetContext(ctx)
183+
165184
if flags.noStartupLogs {
166185
log = slog.Make(slogjson.Sink(io.Discard))
167186
blog = buildlog.NopLogger{}
@@ -210,11 +229,13 @@ func dockerCmd() *cobra.Command {
210229
case err := <-background.New(ctx, log, "sysbox-mgr", sysboxArgs...).Run():
211230
blog.Info(sysboxErrMsg)
212231
//nolint
213-
log.Fatal(ctx, "sysbox-mgr exited", slog.Error(err))
232+
log.Critical(ctx, "sysbox-mgr exited", slog.Error(err))
233+
panic(err)
214234
case err := <-background.New(ctx, log, "sysbox-fs").Run():
215235
blog.Info(sysboxErrMsg)
216236
//nolint
217-
log.Fatal(ctx, "sysbox-fs exited", slog.Error(err))
237+
log.Critical(ctx, "sysbox-fs exited", slog.Error(err))
238+
panic(err)
218239
}
219240
}()
220241

@@ -316,7 +337,7 @@ func dockerCmd() *cobra.Command {
316337
)
317338
}
318339

319-
err = runDockerCVM(ctx, log, client, blog, flags)
340+
bootstrapExecID, err := runDockerCVM(ctx, log, client, blog, flags)
320341
if err != nil {
321342
// It's possible we failed because we ran out of disk while
322343
// pulling the image. We should restart the daemon and use
@@ -345,14 +366,53 @@ func dockerCmd() *cobra.Command {
345366
}()
346367

347368
log.Debug(ctx, "reattempting container creation")
348-
err = runDockerCVM(ctx, log, client, blog, flags)
369+
bootstrapExecID, err = runDockerCVM(ctx, log, client, blog, flags)
349370
}
350371
if err != nil {
351372
blog.Errorf("Failed to run envbox: %v", err)
352373
return xerrors.Errorf("run: %w", err)
353374
}
354375
}
355376

377+
go func() {
378+
defer cancel()
379+
380+
<-signalCtx.Done()
381+
log.Debug(ctx, "ctx canceled, forwarding signal to inner container")
382+
383+
time.Sleep(time.Second * 10)
384+
if bootstrapExecID == "" {
385+
log.Debug(ctx, "no bootstrap exec id, skipping")
386+
return
387+
}
388+
389+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*90)
390+
defer cancel()
391+
392+
bootstrapPID, err := dockerutil.GetExecPID(ctx, client, bootstrapExecID)
393+
if err != nil {
394+
log.Error(ctx, "get exec pid", slog.Error(err))
395+
}
396+
397+
log.Debug(ctx, "killing container", slog.F("bootstrap_pid", bootstrapPID))
398+
399+
// The PID returned is the PID _outside_ the container...
400+
out, err := exec.Command("kill", "-TERM", strconv.Itoa(bootstrapPID)).CombinedOutput()
401+
if err != nil {
402+
log.Error(ctx, "kill bootstrap process", slog.Error(err), slog.F("output", string(out)))
403+
return
404+
}
405+
406+
log.Debug(ctx, "sent kill signal waiting for process to exit")
407+
err = dockerutil.WaitForExit(ctx, client, bootstrapExecID)
408+
if err != nil {
409+
log.Error(ctx, "wait for exit", slog.Error(err))
410+
return
411+
}
412+
413+
log.Debug(ctx, "bootstrap process successfully exited")
414+
}()
415+
356416
return nil
357417
},
358418
}
@@ -390,25 +450,22 @@ func dockerCmd() *cobra.Command {
390450
return cmd
391451
}
392452

393-
func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client, blog buildlog.Logger, flags flags) error {
453+
func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client, blog buildlog.Logger, flags flags) (string, error) {
394454
fs := xunix.GetFS(ctx)
395-
396-
// Set our OOM score to something really unfavorable to avoid getting killed
397-
// in memory-scarce scenarios.
398455
err := xunix.SetOOMScore(ctx, "self", "-1000")
399456
if err != nil {
400-
return xerrors.Errorf("set oom score: %w", err)
457+
return "", xerrors.Errorf("set oom score: %w", err)
401458
}
402459
ref, err := name.ParseReference(flags.innerImage)
403460
if err != nil {
404-
return xerrors.Errorf("parse ref: %w", err)
461+
return "", xerrors.Errorf("parse ref: %w", err)
405462
}
406463

407464
var dockerAuth dockerutil.AuthConfig
408465
if flags.imagePullSecret != "" {
409466
dockerAuth, err = dockerutil.AuthConfigFromString(flags.imagePullSecret, ref.Context().RegistryStr())
410467
if err != nil {
411-
return xerrors.Errorf("parse auth config: %w", err)
468+
return "", xerrors.Errorf("parse auth config: %w", err)
412469
}
413470
}
414471

@@ -417,7 +474,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
417474
log.Info(ctx, "detected file", slog.F("image", flags.innerImage))
418475
dockerAuth, err = dockerutil.AuthConfigFromPath(flags.dockerConfig, ref.Context().RegistryStr())
419476
if err != nil && !xerrors.Is(err, os.ErrNotExist) {
420-
return xerrors.Errorf("auth config from file: %w", err)
477+
return "", xerrors.Errorf("auth config from file: %w", err)
421478
}
422479
}
423480

@@ -430,7 +487,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
430487
// Add any user-specified mounts to our mounts list.
431488
extraMounts, err := parseMounts(flags.containerMounts)
432489
if err != nil {
433-
return xerrors.Errorf("read mounts: %w", err)
490+
return "", xerrors.Errorf("read mounts: %w", err)
434491
}
435492
mounts = append(mounts, extraMounts...)
436493

@@ -442,7 +499,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
442499
blog.Info("Creating TUN device")
443500
dev, err := xunix.CreateTUNDevice(ctx, OuterTUNPath)
444501
if err != nil {
445-
return xerrors.Errorf("creat tun device: %w", err)
502+
return "", xerrors.Errorf("creat tun device: %w", err)
446503
}
447504

448505
devices = append(devices, container.DeviceMapping{
@@ -457,7 +514,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
457514
blog.Info("Creating FUSE device")
458515
dev, err := xunix.CreateFuseDevice(ctx, OuterFUSEPath)
459516
if err != nil {
460-
return xerrors.Errorf("create fuse device: %w", err)
517+
return "", xerrors.Errorf("create fuse device: %w", err)
461518
}
462519

463520
devices = append(devices, container.DeviceMapping{
@@ -479,7 +536,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
479536
)
480537
err = fs.Chown(device.PathOnHost, UserNamespaceOffset, UserNamespaceOffset)
481538
if err != nil {
482-
return xerrors.Errorf("chown device %q: %w", device.PathOnHost, err)
539+
return "", xerrors.Errorf("chown device %q: %w", device.PathOnHost, err)
483540
}
484541
}
485542

@@ -492,27 +549,27 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
492549
ProgressFn: dockerutil.DefaultLogImagePullFn(blog),
493550
})
494551
if err != nil {
495-
return xerrors.Errorf("pull image: %w", err)
552+
return "", xerrors.Errorf("pull image: %w", err)
496553
}
497554

498555
log.Debug(ctx, "remounting /sys")
499556

500557
// After image pull we remount /sys so sysbox can have appropriate perms to create a container.
501558
err = xunix.MountFS(ctx, "/sys", "/sys", "", "remount", "rw")
502559
if err != nil {
503-
return xerrors.Errorf("remount /sys: %w", err)
560+
return "", xerrors.Errorf("remount /sys: %w", err)
504561
}
505562

506563
if flags.addGPU {
507564
if flags.hostUsrLibDir == "" {
508-
return xerrors.Errorf("when using GPUs, %q must be specified", EnvUsrLibDir)
565+
return "", xerrors.Errorf("when using GPUs, %q must be specified", EnvUsrLibDir)
509566
}
510567

511568
// Unmount GPU drivers in /proc as it causes issues when creating any
512569
// container in some cases (even the image metadata container).
513570
_, err = xunix.TryUnmountProcGPUDrivers(ctx, log)
514571
if err != nil {
515-
return xerrors.Errorf("unmount /proc GPU drivers: %w", err)
572+
return "", xerrors.Errorf("unmount /proc GPU drivers: %w", err)
516573
}
517574
}
518575

@@ -528,7 +585,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
528585
// with /sbin/init or something simple like 'sleep infinity'.
529586
imgMeta, err := dockerutil.GetImageMetadata(ctx, log, client, flags.innerImage, flags.innerUsername)
530587
if err != nil {
531-
return xerrors.Errorf("get image metadata: %w", err)
588+
return "", xerrors.Errorf("get image metadata: %w", err)
532589
}
533590

534591
blog.Infof("Detected entrypoint user '%s:%s' with home directory %q", imgMeta.UID, imgMeta.UID, imgMeta.HomeDir)
@@ -543,11 +600,11 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
543600

544601
uid, err := strconv.ParseInt(imgMeta.UID, 10, 32)
545602
if err != nil {
546-
return xerrors.Errorf("parse image uid: %w", err)
603+
return "", xerrors.Errorf("parse image uid: %w", err)
547604
}
548605
gid, err := strconv.ParseInt(imgMeta.GID, 10, 32)
549606
if err != nil {
550-
return xerrors.Errorf("parse image gid: %w", err)
607+
return "", xerrors.Errorf("parse image gid: %w", err)
551608
}
552609

553610
for _, m := range mounts {
@@ -568,13 +625,13 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
568625
mounter := xunix.Mounter(ctx)
569626
err := mounter.Mount("", m.Source, "", []string{"remount,rw"})
570627
if err != nil {
571-
return xerrors.Errorf("remount: %w", err)
628+
return "", xerrors.Errorf("remount: %w", err)
572629
}
573630
}
574631

575632
err := fs.Chmod(m.Source, 0o2755)
576633
if err != nil {
577-
return xerrors.Errorf("chmod mountpoint %q: %w", m.Source, err)
634+
return "", xerrors.Errorf("chmod mountpoint %q: %w", m.Source, err)
578635
}
579636

580637
var (
@@ -601,14 +658,14 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
601658
// user.
602659
err = fs.Chown(m.Source, shiftedUID, shiftedGID)
603660
if err != nil {
604-
return xerrors.Errorf("chown mountpoint %q: %w", m.Source, err)
661+
return "", xerrors.Errorf("chown mountpoint %q: %w", m.Source, err)
605662
}
606663
}
607664

608665
if flags.addGPU {
609666
devs, binds, err := xunix.GPUs(ctx, log, flags.hostUsrLibDir)
610667
if err != nil {
611-
return xerrors.Errorf("find gpus: %w", err)
668+
return "", xerrors.Errorf("find gpus: %w", err)
612669
}
613670

614671
for _, dev := range devs {
@@ -679,22 +736,22 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
679736
MemoryLimit: int64(flags.memory),
680737
})
681738
if err != nil {
682-
return xerrors.Errorf("create container: %w", err)
739+
return "", xerrors.Errorf("create container: %w", err)
683740
}
684741

685742
blog.Info("Pruning images to free up disk...")
686743
// Prune images to avoid taking up any unnecessary disk from the user.
687744
_, err = dockerutil.PruneImages(ctx, client)
688745
if err != nil {
689-
return xerrors.Errorf("prune images: %w", err)
746+
return "", xerrors.Errorf("prune images: %w", err)
690747
}
691748

692749
// TODO fix iptables when istio detected.
693750

694751
blog.Info("Starting up workspace...")
695752
err = client.ContainerStart(ctx, containerID, container.StartOptions{})
696753
if err != nil {
697-
return xerrors.Errorf("start container: %w", err)
754+
return "", xerrors.Errorf("start container: %w", err)
698755
}
699756

700757
log.Debug(ctx, "creating bootstrap directory", slog.F("directory", imgMeta.HomeDir))
@@ -714,7 +771,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
714771
Args: []string{"-p", bootDir},
715772
})
716773
if err != nil {
717-
return xerrors.Errorf("make bootstrap dir: %w", err)
774+
return "", xerrors.Errorf("make bootstrap dir: %w", err)
718775
}
719776

720777
cpuQuota, err := xunix.ReadCPUQuota(ctx, log)
@@ -736,34 +793,50 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
736793
}
737794

738795
blog.Info("Envbox startup complete!")
739-
740-
// The bootstrap script doesn't return since it execs the agent
741-
// meaning that it can get pretty noisy if we were to log by default.
742-
// In order to allow users to discern issues getting the bootstrap script
743-
// to complete successfully we pipe the output to stdout if
744-
// CODER_DEBUG=true.
745-
debugWriter := io.Discard
746-
if flags.debug {
747-
debugWriter = os.Stdout
796+
if flags.boostrapScript == "" {
797+
return "", nil
748798
}
749-
// Bootstrap the container if a script has been provided.
750799
blog.Infof("Bootstrapping workspace...")
751-
err = dockerutil.BootstrapContainer(ctx, client, dockerutil.BootstrapConfig{
752-
ContainerID: containerID,
753-
User: imgMeta.UID,
754-
Script: flags.boostrapScript,
755-
// We set this because the default behavior is to download the agent
756-
// to /tmp/coder.XXXX. This causes a race to happen where we finish
757-
// downloading the binary but before we can execute systemd remounts
758-
// /tmp.
759-
Env: []string{fmt.Sprintf("BINARY_DIR=%s", bootDir)},
760-
StdOutErr: debugWriter,
800+
801+
bootstrapExec, err := client.ContainerExecCreate(ctx, containerID, container.ExecOptions{
802+
User: imgMeta.UID,
803+
Cmd: []string{"/bin/sh", "-s"},
804+
Env: []string{fmt.Sprintf("BINARY_DIR=%s", bootDir)},
805+
AttachStdin: true,
806+
AttachStdout: true,
807+
AttachStderr: true,
808+
Detach: true,
761809
})
762810
if err != nil {
763-
return xerrors.Errorf("boostrap container: %w", err)
811+
return "", xerrors.Errorf("create exec: %w", err)
764812
}
765813

766-
return nil
814+
resp, err := client.ContainerExecAttach(ctx, bootstrapExec.ID, container.ExecStartOptions{})
815+
if err != nil {
816+
return "", xerrors.Errorf("attach exec: %w", err)
817+
}
818+
819+
_, err = io.Copy(resp.Conn, strings.NewReader(flags.boostrapScript))
820+
if err != nil {
821+
return "", xerrors.Errorf("copy stdin: %w", err)
822+
}
823+
err = resp.CloseWrite()
824+
if err != nil {
825+
return "", xerrors.Errorf("close write: %w", err)
826+
}
827+
828+
go func() {
829+
defer resp.Close()
830+
rd := io.LimitReader(resp.Reader, 1<<10)
831+
_, err := io.Copy(blog, rd)
832+
if err != nil {
833+
log.Error(ctx, "copy bootstrap output", slog.Error(err))
834+
}
835+
}()
836+
837+
// We can't just call ExecInspect because there's a race where the cmd
838+
// hasn't been assigned a PID yet.
839+
return bootstrapExec.ID, nil
767840
}
768841

769842
//nolint:revive

0 commit comments

Comments
 (0)