From 361631ddbcf655b46b0872036f4266312aa66c0b Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Mon, 27 Jan 2025 12:11:08 +0000 Subject: [PATCH 1/7] fix(xunix): also mount shared symlinked shared object files (#123) * fix(xunix): also mount shared object files with .so.N * chore(Makefile): add test and test-integration targets * chore(README.md): add hacking and troubleshooting sections * chore(integration): fix tests under cgroupv2 Signed-off-by: Cian Johnston Co-authored-by: Dean Sheather --- Makefile | 8 +++++ README.md | 34 +++++++++++++++++++++ integration/docker_test.go | 61 +++++++++++++++++++++++++++----------- xunix/gpu.go | 9 +++--- xunix/gpu_test.go | 4 +-- 5 files changed, 92 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index 2c3655b..a2c8341 100644 --- a/Makefile +++ b/Makefile @@ -32,3 +32,11 @@ fmt/go: .PHONY: fmt/md fmt/md: go run github.com/Kunde21/markdownfmt/v3/cmd/markdownfmt@v3.1.0 -w ./README.md + +.PHONY: test +test: + go test -v -count=1 ./... + +.PHONY: test-integration +test-integration: + go test -v -count=1 -tags=integration ./integration/ diff --git a/README.md b/README.md index 7ed5bda..fba30bc 100644 --- a/README.md +++ b/README.md @@ -86,3 +86,37 @@ env { > } > } > ``` + +## GPUs + +When passing through GPUs to the inner container, you may end up using associated tooling such as the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) or the [NVIDIA GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). These will inject required utilities and libraries inside the inner container. You can verify this by directly running (without Envbox) a barebones image like `debian:bookworm` and running `mount` or `nvidia-smi` inside the container. + +Envbox will detect these mounts and pass them inside the inner container it creates, so that GPU-aware tools run inside the inner container can still utilize these libraries. + +## Hacking + +Here's a simple one-liner to run the `codercom/enterprise-minimal:ubuntu` image in Envbox using Docker: + +``` +docker run -it --rm \ + -v /tmp/envbox/docker:/var/lib/coder/docker \ + -v /tmp/envbox/containers:/var/lib/coder/containers \ + -v /tmp/envbox/sysbox:/var/lib/sysbox \ + -v /tmp/envbox/docker:/var/lib/docker \ + -v /usr/src:/usr/src:ro \ + -v /lib/modules:/lib/modules:ro \ + --privileged \ + -e CODER_INNER_IMAGE=codercom/enterprise-minimal:ubuntu \ + -e CODER_INNER_USERNAME=coder \ + envbox:latest /envbox docker +``` + +This will store persistent data under `/tmp/envbox`. + +## Troubleshooting + +### `failed to write to cgroup.procs: write /sys/fs/cgroup/docker//init.scope/cgroup.procs: operation not supported: unknown` + +This issue occurs in Docker if you have `cgroupns-mode` set to `private`. To validate, add `--cgroupns=host` to your `docker run` invocation and re-run. + +To permanently set this as the default in your Docker daemon, add `"default-cgroupns-mode": "host"` to your `/etc/docker/daemon.json` and restart Docker. diff --git a/integration/docker_test.go b/integration/docker_test.go index 9f88b09..6bf0f7e 100644 --- a/integration/docker_test.go +++ b/integration/docker_test.go @@ -240,28 +240,53 @@ func TestDocker(t *testing.T) { require.Equal(t, "1000", strings.TrimSpace(string(out))) // Validate that memory limit is being applied to the inner container. - out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + // First check under cgroupv2 path. + if out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ ContainerID: resource.Container.ID, - Cmd: []string{"cat", "/sys/fs/cgroup/memory/memory.limit_in_bytes"}, - }) - require.NoError(t, err) - require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out))) + Cmd: []string{"cat", "/sys/fs/cgroup/memory.max"}, + }); err == nil { + require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out))) + } else { // fall back to cgroupv1 path. + out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"cat", "/sys/fs/cgroup/memory/memory.limit_in_bytes"}, + }) + require.NoError(t, err) + require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out))) + } - periodStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + // Validate the cpu limits are being applied to the inner container. + // First check under cgroupv2 path. + var quota, period int64 + if out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ ContainerID: resource.Container.ID, - Cmd: []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"}, - }) - require.NoError(t, err) - period, err := strconv.ParseInt(strings.TrimSpace(string(periodStr)), 10, 64) - require.NoError(t, err) + Cmd: []string{"cat", "/sys/fs/cgroup/cpu.max"}, + }); err == nil { + // out is in the format "period quota" + // e.g. "100000 100000" + fields := strings.Fields(string(out)) + require.Len(t, fields, 2) + period, err = strconv.ParseInt(fields[0], 10, 64) + require.NoError(t, err) + quota, err = strconv.ParseInt(fields[1], 10, 64) + require.NoError(t, err) + } else { // fall back to cgroupv1 path. + periodStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"}, + }) + require.NoError(t, err) + period, err = strconv.ParseInt(strings.TrimSpace(string(periodStr)), 10, 64) + require.NoError(t, err) - quotaStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ - ContainerID: resource.Container.ID, - Cmd: []string{"cat", "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"}, - }) - require.NoError(t, err) - quota, err := strconv.ParseInt(strings.TrimSpace(string(quotaStr)), 10, 64) - require.NoError(t, err) + quotaStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"cat", "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"}, + }) + require.NoError(t, err) + quota, err = strconv.ParseInt(strings.TrimSpace(string(quotaStr)), 10, 64) + require.NoError(t, err) + } // Validate that the CPU limit is being applied to the inner container. actualLimit := float64(quota) / float64(period) diff --git a/xunix/gpu.go b/xunix/gpu.go index a494ab5..0708667 100644 --- a/xunix/gpu.go +++ b/xunix/gpu.go @@ -17,9 +17,10 @@ import ( ) var ( - gpuMountRegex = regexp.MustCompile("(?i)(nvidia|vulkan|cuda)") - gpuExtraRegex = regexp.MustCompile("(?i)(libgl|nvidia|vulkan|cuda)") - gpuEnvRegex = regexp.MustCompile("(?i)nvidia") + gpuMountRegex = regexp.MustCompile("(?i)(nvidia|vulkan|cuda)") + gpuExtraRegex = regexp.MustCompile("(?i)(libgl|nvidia|vulkan|cuda)") + gpuEnvRegex = regexp.MustCompile("(?i)nvidia") + sharedObjectRegex = regexp.MustCompile(`\.so(\.[0-9\.]+)?$`) ) func GPUEnvs(ctx context.Context) []string { @@ -103,7 +104,7 @@ func usrLibGPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]mount return nil } - if filepath.Ext(path) != ".so" || !gpuExtraRegex.MatchString(path) { + if !sharedObjectRegex.MatchString(path) || !gpuExtraRegex.MatchString(path) { return nil } diff --git a/xunix/gpu_test.go b/xunix/gpu_test.go index 4cbf5f0..f8d8d47 100644 --- a/xunix/gpu_test.go +++ b/xunix/gpu_test.go @@ -56,13 +56,13 @@ func TestGPUs(t *testing.T) { expectedUsrLibFiles = []string{ filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so"), filepath.Join(usrLibMountpoint, "libnvidia-ml.so"), + filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"), } // fakeUsrLibFiles are files that should be written to the "mounted" // /usr/lib directory. It includes files that shouldn't be returned. fakeUsrLibFiles = append([]string{ filepath.Join(usrLibMountpoint, "libcurl-gnutls.so"), - filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"), }, expectedUsrLibFiles...) ) @@ -98,7 +98,7 @@ func TestGPUs(t *testing.T) { devices, binds, err := xunix.GPUs(ctx, log, usrLibMountpoint) require.NoError(t, err) require.Len(t, devices, 2, "unexpected 2 nvidia devices") - require.Len(t, binds, 3, "expected 4 nvidia binds") + require.Len(t, binds, 4, "expected 4 nvidia binds") require.Contains(t, binds, mount.MountPoint{ Device: "/dev/sda1", Path: "/usr/local/nvidia", From a556bac7b6cd446bb39064cd3449c10e9956cab2 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Thu, 6 Mar 2025 12:43:04 +0000 Subject: [PATCH 2/7] fix(dockerutil): GetImageMetadata: detect correct usr lib dir based on os release (#127) We had been mounting GPU libraries into the destination /usr/lib inside the inner container by default. This doesn't hold true for Redhat-based distributions, who use /usr/lib64 instead. - Adds the capability to sniff /etc/os-release inside the inner container - Modifies the destination path for bind-mounting GPU libraries to the inner container based on the detected OS release ID - Adds integration testing for Envbox+NVidia GPUs (not run by default in CI) - Attempts to automatically set CODER_USR_LIB_DIR if not specified. - Adds the capability to manually set the inner mount path via CODER_INNER_USR_LIB_DIR. --- .github/workflows/ci.yaml | 4 +- Makefile | 2 +- README.md | 56 ++++++- cli/docker.go | 26 +++- dockerutil/image.go | 81 +++++++++-- dockerutil/image_linux_amd64.go | 5 + dockerutil/image_linux_arm64.go | 5 + integration/docker_test.go | 6 +- integration/gpu_test.go | 202 ++++++++++++++++++++++++++ integration/integrationtest/docker.go | 2 + 10 files changed, 369 insertions(+), 20 deletions(-) create mode 100644 dockerutil/image_linux_amd64.go create mode 100644 dockerutil/image_linux_arm64.go create mode 100644 integration/gpu_test.go diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f3ffc86..c106dd0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ on: pull_request: workflow_dispatch: - + permissions: actions: read checks: none @@ -209,7 +209,7 @@ jobs: category: "Trivy" - name: Upload Trivy scan results as an artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: trivy path: trivy-results.sarif diff --git a/Makefile b/Makefile index a2c8341..b230ab7 100644 --- a/Makefile +++ b/Makefile @@ -39,4 +39,4 @@ test: .PHONY: test-integration test-integration: - go test -v -count=1 -tags=integration ./integration/ + CODER_TEST_INTEGRATION=1 go test -v -count=1 ./integration/ diff --git a/README.md b/README.md index fba30bc..437e381 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ The environment variables can be used to configure various aspects of the inner | `CODER_DOCKER_BRIDGE_CIDR` | The bridge CIDR to start the Docker daemon with. | false | | `CODER_MOUNTS` | A list of mounts to mount into the inner container. Mounts default to `rw`. Ex: `CODER_MOUNTS=/home/coder:/home/coder,/var/run/mysecret:/var/run/mysecret:ro` | false | | `CODER_USR_LIB_DIR` | The mountpoint of the host `/usr/lib` directory. Only required when using GPUs. | false | +| `CODER_INNER_USR_LIB_DIR` | The inner /usr/lib mountpoint. This is automatically detected based on `/etc/os-release` in the inner image, but may optionally be overridden. | false | | `CODER_ADD_TUN` | If `CODER_ADD_TUN=true` add a TUN device to the inner container. | false | | `CODER_ADD_FUSE` | If `CODER_ADD_FUSE=true` add a FUSE device to the inner container. | false | | `CODER_ADD_GPU` | If `CODER_ADD_GPU=true` add detected GPUs and related files to the inner container. Requires setting `CODER_USR_LIB_DIR` and mounting in the hosts `/usr/lib/` directory. | false | @@ -43,7 +44,7 @@ It is not possible to develop `envbox` effectively using a containerized environ If a login is required to pull images from a private repository, create a secret following the instructions from the [Kubernetes Documentation](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) as such: -``` +```shell kubectl -n create secret docker-registry regcred \ --docker-server= \ --docker-username= \ @@ -53,7 +54,7 @@ kubectl -n create secret docker-registry regcred \ Then reference the secret in your template as such: -``` +```shell env { name = "CODER_IMAGE_PULL_SECRET" value_from { @@ -93,11 +94,60 @@ When passing through GPUs to the inner container, you may end up using associate Envbox will detect these mounts and pass them inside the inner container it creates, so that GPU-aware tools run inside the inner container can still utilize these libraries. +Here's an example Docker command to run a GPU-enabled workload in Envbox. Note the following: + +1) The NVidia container runtime must be installed on the host (`--runtime=nvidia`). +2) `CODER_ADD_GPU=true` must be set to enable GPU-specific functionality. +3) When `CODER_ADD_GPU` is set, it is required to also set `CODER_USR_LIB_DIR` to a location where the relevant host directory has been mounted inside the outer container. In the below example, this is `/usr/lib/x86_64-linux-gnu` on the underlying host. It is mounted into the container under the path `/var/coder/usr/lib`. We then set `CODER_USR_LIB_DIR=/var/coder/usr/lib`. The actual location inside the container is not important **as long as it does not overwrite any pre-existing directories containing system libraries**. +4) The location to mount the libraries in the inner container is determined by the distribution ID in the `/etc/os-release` of the inner container. If the automatically determined location is incorrect (e.g. `nvidia-smi` complains about not being able to find libraries), you can set it manually via `CODER_INNER_USR_LIB_DIR`. + + > Note: this step is required in case user workloads require libraries from the underlying host that are not added in by the container runtime. + +```shell +docker run -it --rm \ + --runtime=nvidia \ + --gpus=all \ + --name=envbox-gpu-test \ + -v /tmp/envbox/docker:/var/lib/coder/docker \ + -v /tmp/envbox/containers:/var/lib/coder/containers \ + -v /tmp/envbox/sysbox:/var/lib/sysbox \ + -v /tmp/envbox/docker:/var/lib/docker \ + -v /usr/src:/usr/src:ro \ + -v /lib/modules:/lib/modules:ro \ + -v /usr/lib/x86_64-linux-gnu:/var/coder/usr/lib \ + --privileged \ + -e CODER_INNER_IMAGE=nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2 \ + -e CODER_INNER_USERNAME=root \ + -e CODER_ADD_GPU=true \ + -e CODER_USR_LIB_DIR=/var/coder/usr/lib \ + envbox:latest /envbox docker +``` + +To validate GPU functionality, you can run the following commands: + +1) To validate that the container runtime correctly passed the required GPU tools into the outer container, run: + + ```shell + docker exec -it envbox-gpu-test nvidia-smi + ``` + +2) To validate the same inside the inner container, run: + + ```shell + docker exec -it envbox-gpu-test docker exec -it workspace_cvm nvidia-smi + ``` + +3) To validate that the sample CUDA application inside the container runs correctly: + + ```shell + docker exec -it envbox-gpu-test docker exec -it workspace_cvm /tmp/vectorAdd + ``` + ## Hacking Here's a simple one-liner to run the `codercom/enterprise-minimal:ubuntu` image in Envbox using Docker: -``` +```shell docker run -it --rm \ -v /tmp/envbox/docker:/var/lib/coder/docker \ -v /tmp/envbox/containers:/var/lib/coder/containers \ diff --git a/cli/docker.go b/cli/docker.go index f48ea64..2f6ec5f 100644 --- a/cli/docker.go +++ b/cli/docker.go @@ -98,6 +98,7 @@ var ( EnvMemory = "CODER_MEMORY" EnvAddGPU = "CODER_ADD_GPU" EnvUsrLibDir = "CODER_USR_LIB_DIR" + EnvInnerUsrLibDir = "CODER_INNER_USR_LIB_DIR" EnvDockerConfig = "CODER_DOCKER_CONFIG" EnvDebug = "CODER_DEBUG" EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT" @@ -135,6 +136,7 @@ type flags struct { boostrapScript string containerMounts string hostUsrLibDir string + innerUsrLibDir string dockerConfig string cpus int memory int @@ -370,6 +372,7 @@ func dockerCmd() *cobra.Command { cliflag.StringVarP(cmd.Flags(), &flags.boostrapScript, "boostrap-script", "", EnvBootstrap, "", "The script to use to bootstrap the container. This should typically install and start the agent.") cliflag.StringVarP(cmd.Flags(), &flags.containerMounts, "mounts", "", EnvMounts, "", "Comma separated list of mounts in the form of ':[:options]' (e.g. /var/lib/docker:/var/lib/docker:ro,/usr/src:/usr/src).") cliflag.StringVarP(cmd.Flags(), &flags.hostUsrLibDir, "usr-lib-dir", "", EnvUsrLibDir, "", "The host /usr/lib mountpoint. Used to detect GPU drivers to mount into inner container.") + cliflag.StringVarP(cmd.Flags(), &flags.innerUsrLibDir, "inner-usr-lib-dir", "", EnvInnerUsrLibDir, "", "The inner /usr/lib mountpoint. This is automatically detected based on /etc/os-release in the inner image, but may optionally be overridden.") cliflag.StringVarP(cmd.Flags(), &flags.dockerConfig, "docker-config", "", EnvDockerConfig, "/root/.docker/config.json", "The path to the docker config to consult when pulling an image.") cliflag.BoolVarP(cmd.Flags(), &flags.addTUN, "add-tun", "", EnvAddTun, false, "Add a TUN device to the inner container.") cliflag.BoolVarP(cmd.Flags(), &flags.addFUSE, "add-fuse", "", EnvAddFuse, false, "Add a FUSE device to the inner container.") @@ -523,7 +526,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client // of the user so that we can chown directories to the namespaced UID inside // the inner container as well as whether we should be starting the container // with /sbin/init or something simple like 'sleep infinity'. - imgMeta, err := dockerutil.GetImageMetadata(ctx, client, flags.innerImage, flags.innerUsername) + imgMeta, err := dockerutil.GetImageMetadata(ctx, log, client, flags.innerImage, flags.innerUsername) if err != nil { return xerrors.Errorf("get image metadata: %w", err) } @@ -534,6 +537,8 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client slog.F("uid", imgMeta.UID), slog.F("gid", imgMeta.GID), slog.F("has_init", imgMeta.HasInit), + slog.F("os_release", imgMeta.OsReleaseID), + slog.F("home_dir", imgMeta.HomeDir), ) uid, err := strconv.ParseInt(imgMeta.UID, 10, 32) @@ -614,16 +619,33 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client }) } + innerUsrLibDir := imgMeta.UsrLibDir() + if flags.innerUsrLibDir != "" { + log.Info(ctx, "overriding auto-detected inner usr lib dir ", + slog.F("before", innerUsrLibDir), + slog.F("after", flags.innerUsrLibDir)) + innerUsrLibDir = flags.innerUsrLibDir + } for _, bind := range binds { // If the bind has a path that points to the host-mounted /usr/lib // directory we need to remap it to /usr/lib inside the container. mountpoint := bind.Path if strings.HasPrefix(mountpoint, flags.hostUsrLibDir) { mountpoint = filepath.Join( - "/usr/lib", + // Note: we used to mount into /usr/lib, but this can change + // based on the distro inside the container. + innerUsrLibDir, strings.TrimPrefix(mountpoint, strings.TrimSuffix(flags.hostUsrLibDir, "/")), ) } + // Even though xunix.GPUs checks for duplicate mounts, we need to check + // for duplicates again here after remapping the path. + if slices.ContainsFunc(mounts, func(m xunix.Mount) bool { + return m.Mountpoint == mountpoint + }) { + log.Debug(ctx, "skipping duplicate mount", slog.F("path", mountpoint)) + continue + } mounts = append(mounts, xunix.Mount{ Source: bind.Path, Mountpoint: mountpoint, diff --git a/dockerutil/image.go b/dockerutil/image.go index ffb2bdb..a5323f2 100644 --- a/dockerutil/image.go +++ b/dockerutil/image.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "cdr.dev/slog" dockertypes "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" @@ -22,6 +23,23 @@ import ( const diskFullStorageDriver = "vfs" +// Adapted from https://github.com/NVIDIA/libnvidia-container/blob/v1.15.0/src/nvc_container.c#L152-L165 +var UsrLibDirs = map[string]string{ + // Debian-based distros use a multi-arch directory. + "debian": usrLibMultiarchDir, + "ubuntu": usrLibMultiarchDir, + // Fedora and Redhat use the standard /usr/lib64. + "fedora": "/usr/lib64", + "rhel": "/usr/lib64", + // Fall back to the standard /usr/lib. + "linux": "/usr/lib", +} + +// /etc/os-release is the standard location for system identification data on +// Linux systems running systemd. +// Ref: https://www.freedesktop.org/software/systemd/man/latest/os-release.html +var etcOsRelease = "/etc/os-release" + type PullImageConfig struct { Client Client Image string @@ -148,15 +166,16 @@ func processImagePullEvents(r io.Reader, fn ImagePullProgressFn) error { } type ImageMetadata struct { - UID string - GID string - HomeDir string - HasInit bool + UID string + GID string + HomeDir string + HasInit bool + OsReleaseID string } // GetImageMetadata returns metadata about an image such as the UID/GID of the // provided username and whether it contains an /sbin/init that we should run. -func GetImageMetadata(ctx context.Context, client Client, img, username string) (ImageMetadata, error) { +func GetImageMetadata(ctx context.Context, log slog.Logger, client Client, img, username string) (ImageMetadata, error) { // Creating a dummy container to inspect the filesystem. created, err := client.ContainerCreate(ctx, &container.Config{ @@ -226,14 +245,58 @@ func GetImageMetadata(ctx context.Context, client Client, img, username string) return ImageMetadata{}, xerrors.Errorf("no users returned for username %s", username) } + // Read the /etc/os-release file to get the ID of the OS. + // We only care about the ID field. + var osReleaseID string + out, err = ExecContainer(ctx, client, ExecConfig{ + ContainerID: inspect.ID, + Cmd: "cat", + Args: []string{etcOsRelease}, + }) + if err != nil { + log.Error(ctx, "read os-release", slog.Error(err)) + log.Error(ctx, "falling back to linux for os-release ID") + osReleaseID = "linux" + } else { + osReleaseID = GetOSReleaseID(out) + } + return ImageMetadata{ - UID: users[0].Uid, - GID: users[0].Gid, - HomeDir: users[0].HomeDir, - HasInit: initExists, + UID: users[0].Uid, + GID: users[0].Gid, + HomeDir: users[0].HomeDir, + HasInit: initExists, + OsReleaseID: osReleaseID, }, nil } +// UsrLibDir returns the path to the /usr/lib directory for the given +// operating system determined by the /etc/os-release file. +func (im ImageMetadata) UsrLibDir() string { + if val, ok := UsrLibDirs[im.OsReleaseID]; ok && val != "" { + return val + } + return UsrLibDirs["linux"] // fallback +} + +// GetOSReleaseID returns the ID of the operating system from the +// raw contents of /etc/os-release. +func GetOSReleaseID(raw []byte) string { + var osReleaseID string + for _, line := range strings.Split(string(raw), "\n") { + if strings.HasPrefix(line, "ID=") { + osReleaseID = strings.TrimPrefix(line, "ID=") + // The value may be quoted. + osReleaseID = strings.Trim(osReleaseID, "\"") + break + } + } + if osReleaseID == "" { + return "linux" + } + return osReleaseID +} + func DefaultLogImagePullFn(log buildlog.Logger) func(ImagePullEvent) error { var ( // Avoid spamming too frequently, the messages can come quickly diff --git a/dockerutil/image_linux_amd64.go b/dockerutil/image_linux_amd64.go new file mode 100644 index 0000000..577c1be --- /dev/null +++ b/dockerutil/image_linux_amd64.go @@ -0,0 +1,5 @@ +package dockerutil + +// usrLibMultiarchDir is defined for arm64 and amd64 architectures. +// Envbox is not published for other architectures. +var usrLibMultiarchDir = "/usr/lib/aarch64-linux-gnu" diff --git a/dockerutil/image_linux_arm64.go b/dockerutil/image_linux_arm64.go new file mode 100644 index 0000000..5952a29 --- /dev/null +++ b/dockerutil/image_linux_arm64.go @@ -0,0 +1,5 @@ +package dockerutil + +// usrLibMultiarchDir is defined for arm64 and amd64 architectures. +// Envbox is not published for other architectures. +var usrLibMultiarchDir = "/usr/lib/x86_64-linux-gnu" diff --git a/integration/docker_test.go b/integration/docker_test.go index 6bf0f7e..533d2ba 100644 --- a/integration/docker_test.go +++ b/integration/docker_test.go @@ -1,6 +1,3 @@ -//go:build integration -// +build integration - package integration_test import ( @@ -24,6 +21,9 @@ import ( func TestDocker(t *testing.T) { t.Parallel() + if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" { + t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1") + } // Dockerd just tests that dockerd can spin up and function correctly. t.Run("Dockerd", func(t *testing.T) { diff --git a/integration/gpu_test.go b/integration/gpu_test.go new file mode 100644 index 0000000..47edef4 --- /dev/null +++ b/integration/gpu_test.go @@ -0,0 +1,202 @@ +package integration_test + +import ( + "context" + "os" + "os/exec" + "slices" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/coder/envbox/integration/integrationtest" +) + +func TestDocker_Nvidia(t *testing.T) { + t.Parallel() + if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" { + t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1") + } + // Only run this test if the nvidia container runtime is detected. + // Check if the nvidia runtime is available using `docker info`. + // The docker client doesn't expose this information so we need to fetch it ourselves. + if !slices.Contains(dockerRuntimes(t), "nvidia") { + t.Skip("this test requires nvidia runtime to be available") + } + + t.Run("Ubuntu", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that we can run nvidia-smi in the inner container. + _, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi") + require.NoError(t, err, "failed to run nvidia-smi in the inner container") + }) + + t.Run("Redhat", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.RedhatImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib64", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib64", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that we can run nvidia-smi in the inner container. + _, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi") + require.NoError(t, err, "failed to run nvidia-smi in the inner container") + }) + + t.Run("InnerUsrLibDirOverride", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--env", "CODER_INNER_USR_LIB_DIR=/usr/lib/coder", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that the libraries end up in the expected location in the inner container. + out, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "ls", "-l", "/usr/lib/coder") + require.NoError(t, err, "inner usr lib dir override failed") + require.Regexp(t, `(?i)(libgl|nvidia|vulkan|cuda)`, out) + }) +} + +// dockerRuntimes returns the list of container runtimes available on the host. +// It does this by running `docker info` and parsing the output. +func dockerRuntimes(t *testing.T) []string { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "docker", "info", "--format", "{{ range $k, $v := .Runtimes}}{{ println $k }}{{ end }}") + out, err := cmd.CombinedOutput() + require.NoError(t, err, "failed to get docker runtimes: %s", out) + raw := strings.TrimSpace(string(out)) + return strings.Split(raw, "\n") +} + +// startEnvboxCmd starts the envbox container with the given arguments. +// Ideally we would use ory/dockertest for this, but it doesn't support +// specifying the runtime. We have alternatively used the docker client library, +// but a nice property of using the docker cli is that if a test fails, you can +// just run the command manually to debug! +func startEnvboxCmd(ctx context.Context, t *testing.T, innerImage, innerUser string, addlArgs ...string) (containerID string) { + t.Helper() + + var ( + tmpDir = integrationtest.TmpDir(t) + binds = integrationtest.DefaultBinds(t, tmpDir) + cancelCtx, cancel = context.WithCancel(ctx) + ) + t.Cleanup(cancel) + + // Unfortunately ory/dockertest does not allow us to specify runtime. + // We're instead going to just run the container directly via the docker cli. + startEnvboxArgs := []string{ + "run", + "--detach", + "--rm", + "--privileged", + "--env", "CODER_INNER_IMAGE=" + innerImage, + "--env", "CODER_INNER_USERNAME=" + innerUser, + } + for _, bind := range binds { + bindParts := []string{bind.Source, bind.Target} + if bind.ReadOnly { + bindParts = append(bindParts, "ro") + } + startEnvboxArgs = append(startEnvboxArgs, []string{"-v", strings.Join(bindParts, ":")}...) + } + startEnvboxArgs = append(startEnvboxArgs, addlArgs...) + startEnvboxArgs = append(startEnvboxArgs, "envbox:latest", "/envbox", "docker") + t.Logf("envbox docker cmd: docker %s", strings.Join(startEnvboxArgs, " ")) + + // Start the envbox container without attaching. + startEnvboxCmd := exec.CommandContext(cancelCtx, "docker", startEnvboxArgs...) + out, err := startEnvboxCmd.CombinedOutput() + require.NoError(t, err, "failed to start envbox container") + containerID = strings.TrimSpace(string(out)) + t.Logf("envbox container ID: %s", containerID) + t.Cleanup(func() { + if t.Failed() { + // Dump the logs if the test failed. + logsCmd := exec.Command("docker", "logs", containerID) + out, err := logsCmd.CombinedOutput() + if err != nil { + t.Logf("failed to read logs: %s", err) + } + t.Logf("envbox logs:\n%s", string(out)) + } + // Stop the envbox container. + stopEnvboxCmd := exec.Command("docker", "rm", "-f", containerID) + out, err := stopEnvboxCmd.CombinedOutput() + if err != nil { + t.Errorf("failed to stop envbox container: %s", out) + } + }) + + // Wait for the Docker CVM to come up. + waitCtx, waitCancel := context.WithTimeout(cancelCtx, 5*time.Minute) + defer waitCancel() +WAITLOOP: + for { + select { + case <-waitCtx.Done(): + t.Fatal("timed out waiting for inner container to come up") + default: + execCmd := exec.CommandContext(cancelCtx, "docker", "exec", containerID, "docker", "inspect", "workspace_cvm") + out, err := execCmd.CombinedOutput() + if err != nil { + t.Logf("waiting for inner container to come up:\n%s", string(out)) + <-time.After(time.Second) + continue WAITLOOP + } + t.Logf("inner container is up") + break WAITLOOP + } + } + + return containerID +} + +func execContainerCmd(ctx context.Context, t *testing.T, containerID string, cmdArgs ...string) (string, error) { + t.Helper() + + execArgs := []string{"exec", containerID} + execArgs = append(execArgs, cmdArgs...) + t.Logf("exec cmd: docker %s", strings.Join(execArgs, " ")) + execCmd := exec.CommandContext(ctx, "docker", execArgs...) + out, err := execCmd.CombinedOutput() + if err != nil { + t.Logf("exec cmd failed: %s\n%s", err.Error(), string(out)) + } else { + t.Logf("exec cmd success: %s", out) + } + return strings.TrimSpace(string(out)), err +} diff --git a/integration/integrationtest/docker.go b/integration/integrationtest/docker.go index 6b5e07c..4390ee2 100644 --- a/integration/integrationtest/docker.go +++ b/integration/integrationtest/docker.go @@ -40,6 +40,8 @@ const ( // UbuntuImage is just vanilla ubuntu (80MB) but the user is set to a non-root // user . UbuntuImage = "gcr.io/coder-dev-1/sreya/ubuntu-coder" + // Redhat UBI9 image as of 2025-03-05 + RedhatImage = "registry.access.redhat.com/ubi9/ubi:9.5" // RegistryImage is used to assert that we add certs // correctly to the docker daemon when pulling an image From a9acf2c339aa44746cbe020b0dcfe0c4ea7e3497 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Thu, 6 Mar 2025 12:57:23 +0000 Subject: [PATCH 3/7] fix(dockerutil): correct usrLibMultiarchDir (#128) --- dockerutil/image.go | 3 ++- dockerutil/image_linux_amd64.go | 2 +- dockerutil/image_linux_arm64.go | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dockerutil/image.go b/dockerutil/image.go index a5323f2..f49cba0 100644 --- a/dockerutil/image.go +++ b/dockerutil/image.go @@ -9,13 +9,14 @@ import ( "strings" "time" - "cdr.dev/slog" dockertypes "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/image" "golang.org/x/xerrors" + "cdr.dev/slog" + "github.com/coder/envbox/buildlog" "github.com/coder/envbox/xunix" "github.com/coder/retry" diff --git a/dockerutil/image_linux_amd64.go b/dockerutil/image_linux_amd64.go index 577c1be..5952a29 100644 --- a/dockerutil/image_linux_amd64.go +++ b/dockerutil/image_linux_amd64.go @@ -2,4 +2,4 @@ package dockerutil // usrLibMultiarchDir is defined for arm64 and amd64 architectures. // Envbox is not published for other architectures. -var usrLibMultiarchDir = "/usr/lib/aarch64-linux-gnu" +var usrLibMultiarchDir = "/usr/lib/x86_64-linux-gnu" diff --git a/dockerutil/image_linux_arm64.go b/dockerutil/image_linux_arm64.go index 5952a29..577c1be 100644 --- a/dockerutil/image_linux_arm64.go +++ b/dockerutil/image_linux_arm64.go @@ -2,4 +2,4 @@ package dockerutil // usrLibMultiarchDir is defined for arm64 and amd64 architectures. // Envbox is not published for other architectures. -var usrLibMultiarchDir = "/usr/lib/x86_64-linux-gnu" +var usrLibMultiarchDir = "/usr/lib/aarch64-linux-gnu" From a09c7c22907c42908f2255415bdf6a368efb35ac Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 7 Mar 2025 10:22:47 +0000 Subject: [PATCH 4/7] fix(xunix): improve handling of gpu library mounts (#129) * Mounts symlinks to auto-added GPU libs to inner container * Tightens up gpuExtraRegex to avoid extraneous matches (e.g. libglib.so) * Adds GPU integration tests to include regression test for redhat * Adds GPU integration test with sample NVIDIA CUDA image --- integration/gpu_test.go | 119 ++++++++++++++++++++++-- integration/integrationtest/docker.go | 3 + xunix/gpu.go | 99 +++++++++++++++++++- xunix/gpu_test.go | 129 +++++++++++++++++++++++++- 4 files changed, 333 insertions(+), 17 deletions(-) diff --git a/integration/gpu_test.go b/integration/gpu_test.go index 47edef4..f3fc1ac 100644 --- a/integration/gpu_test.go +++ b/integration/gpu_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/coder/envbox/integration/integrationtest" @@ -41,8 +42,7 @@ func TestDocker_Nvidia(t *testing.T) { ) // Assert that we can run nvidia-smi in the inner container. - _, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi") - require.NoError(t, err, "failed to run nvidia-smi in the inner container") + assertInnerNvidiaSMI(ctx, t, ctID) }) t.Run("Redhat", func(t *testing.T) { @@ -52,16 +52,29 @@ func TestDocker_Nvidia(t *testing.T) { // Start the envbox container. ctID := startEnvboxCmd(ctx, t, integrationtest.RedhatImage, "root", - "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib64", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", "--env", "CODER_ADD_GPU=true", - "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib64", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", "--runtime=nvidia", "--gpus=all", ) // Assert that we can run nvidia-smi in the inner container. - _, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi") - require.NoError(t, err, "failed to run nvidia-smi in the inner container") + assertInnerNvidiaSMI(ctx, t, ctID) + + // Make sure dnf still works. This checks for a regression due to + // gpuExtraRegex matching `libglib.so` in the outer container. + // This had a dependency on `libpcre.so.3` which would cause dnf to fail. + out, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "dnf") + if !assert.NoError(t, err, "failed to run dnf in the inner container") { + t.Logf("dnf output:\n%s", strings.TrimSpace(out)) + } + + // Make sure libglib.so is not present in the inner container. + out, err = execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "ls", "-1", "/usr/lib/x86_64-linux-gnu/libglib*") + // An error is expected here. + assert.Error(t, err, "libglib should not be present in the inner container") + assert.Contains(t, out, "No such file or directory", "libglib should not be present in the inner container") }) t.Run("InnerUsrLibDirOverride", func(t *testing.T) { @@ -79,11 +92,58 @@ func TestDocker_Nvidia(t *testing.T) { "--gpus=all", ) - // Assert that the libraries end up in the expected location in the inner container. - out, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "ls", "-l", "/usr/lib/coder") + // Assert that the libraries end up in the expected location in the inner + // container. + out, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "ls", "-1", "/usr/lib/coder") require.NoError(t, err, "inner usr lib dir override failed") require.Regexp(t, `(?i)(libgl|nvidia|vulkan|cuda)`, out) }) + + t.Run("EmptyHostUsrLibDir", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + emptyUsrLibDir := t.TempDir() + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root", + "-v", emptyUsrLibDir+":/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + ofs := outerFiles(ctx, t, ctID, "/usr/lib/x86_64-linux-gnu/libnv*") + // Assert invariant: the outer container has the files we expect. + require.NotEmpty(t, ofs, "failed to list outer container files") + // Assert that expected files are available in the inner container. + assertInnerFiles(ctx, t, ctID, "/usr/lib/x86_64-linux-gnu/libnv*", ofs...) + assertInnerNvidiaSMI(ctx, t, ctID) + }) + + t.Run("CUDASample", func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.CUDASampleImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that we can run nvidia-smi in the inner container. + assertInnerNvidiaSMI(ctx, t, ctID) + + // Assert that /tmp/vectorAdd runs successfully in the inner container. + _, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "/tmp/vectorAdd") + require.NoError(t, err, "failed to run /tmp/vectorAdd in the inner container") + }) } // dockerRuntimes returns the list of container runtimes available on the host. @@ -101,6 +161,49 @@ func dockerRuntimes(t *testing.T) []string { return strings.Split(raw, "\n") } +// outerFiles returns the list of files in the outer container matching the +// given pattern. It does this by running `ls -1` in the outer container. +func outerFiles(ctx context.Context, t *testing.T, containerID, pattern string) []string { + t.Helper() + // We need to use /bin/sh -c to avoid the shell interpreting the glob. + out, err := execContainerCmd(ctx, t, containerID, "/bin/sh", "-c", "ls -1 "+pattern) + require.NoError(t, err, "failed to list outer container files") + files := strings.Split(strings.TrimSpace(out), "\n") + slices.Sort(files) + return files +} + +// assertInnerFiles checks that all the files matching the given pattern exist in the +// inner container. +func assertInnerFiles(ctx context.Context, t *testing.T, containerID, pattern string, expected ...string) { + t.Helper() + + // Get the list of files in the inner container. + // We need to use /bin/sh -c to avoid the shell interpreting the glob. + out, err := execContainerCmd(ctx, t, containerID, "docker", "exec", "workspace_cvm", "/bin/sh", "-c", "ls -1 "+pattern) + require.NoError(t, err, "failed to list inner container files") + innerFiles := strings.Split(strings.TrimSpace(out), "\n") + + // Check that the expected files exist in the inner container. + missingFiles := make([]string, 0) + for _, expectedFile := range expected { + if !slices.Contains(innerFiles, expectedFile) { + missingFiles = append(missingFiles, expectedFile) + } + } + require.Empty(t, missingFiles, "missing files in inner container: %s", strings.Join(missingFiles, ", ")) +} + +// assertInnerNvidiaSMI checks that nvidia-smi runs successfully in the inner +// container. +func assertInnerNvidiaSMI(ctx context.Context, t *testing.T, containerID string) { + t.Helper() + // Assert that we can run nvidia-smi in the inner container. + out, err := execContainerCmd(ctx, t, containerID, "docker", "exec", "workspace_cvm", "nvidia-smi") + require.NoError(t, err, "failed to run nvidia-smi in the inner container") + require.Contains(t, out, "NVIDIA-SMI", "nvidia-smi output does not contain NVIDIA-SMI") +} + // startEnvboxCmd starts the envbox container with the given arguments. // Ideally we would use ory/dockertest for this, but it doesn't support // specifying the runtime. We have alternatively used the docker client library, diff --git a/integration/integrationtest/docker.go b/integration/integrationtest/docker.go index 4390ee2..5b42bbe 100644 --- a/integration/integrationtest/docker.go +++ b/integration/integrationtest/docker.go @@ -42,6 +42,9 @@ const ( UbuntuImage = "gcr.io/coder-dev-1/sreya/ubuntu-coder" // Redhat UBI9 image as of 2025-03-05 RedhatImage = "registry.access.redhat.com/ubi9/ubi:9.5" + // CUDASampleImage is a CUDA sample image from NVIDIA's container registry. + // It contains a binary /tmp/vectorAdd which can be run to test the CUDA setup. + CUDASampleImage = "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2" // RegistryImage is used to assert that we add certs // correctly to the docker daemon when pulling an image diff --git a/xunix/gpu.go b/xunix/gpu.go index 0708667..a9129d5 100644 --- a/xunix/gpu.go +++ b/xunix/gpu.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "regexp" + "slices" "sort" "strings" @@ -17,9 +18,9 @@ import ( ) var ( - gpuMountRegex = regexp.MustCompile("(?i)(nvidia|vulkan|cuda)") - gpuExtraRegex = regexp.MustCompile("(?i)(libgl|nvidia|vulkan|cuda)") - gpuEnvRegex = regexp.MustCompile("(?i)nvidia") + gpuMountRegex = regexp.MustCompile(`(?i)(nvidia|vulkan|cuda)`) + gpuExtraRegex = regexp.MustCompile(`(?i)(libgl(e|sx|\.)|nvidia|vulkan|cuda)`) + gpuEnvRegex = regexp.MustCompile(`(?i)nvidia`) sharedObjectRegex = regexp.MustCompile(`\.so(\.[0-9\.]+)?$`) ) @@ -39,6 +40,7 @@ func GPUEnvs(ctx context.Context) []string { func GPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]Device, []mount.MountPoint, error) { var ( + afs = GetFS(ctx) mounter = Mounter(ctx) devices = []Device{} binds = []mount.MountPoint{} @@ -64,6 +66,22 @@ func GPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]Device, []m // If it's not in /dev treat it as a bind mount. binds = append(binds, m) + // We also want to find any symlinks that point to the target. + // This is important for the nvidia driver as it mounts the driver + // files with the driver version appended to the end, and creates + // symlinks that point to the actual files. + links, err := SameDirSymlinks(afs, m.Path) + if err != nil { + log.Error(ctx, "find symlinks", slog.F("path", m.Path), slog.Error(err)) + } else { + for _, link := range links { + log.Debug(ctx, "found symlink", slog.F("link", link), slog.F("target", m.Path)) + binds = append(binds, mount.MountPoint{ + Path: link, + Opts: []string{"ro"}, + }) + } + } } } @@ -104,7 +122,11 @@ func usrLibGPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]mount return nil } - if !sharedObjectRegex.MatchString(path) || !gpuExtraRegex.MatchString(path) { + if !gpuExtraRegex.MatchString(path) { + return nil + } + + if !sharedObjectRegex.MatchString(path) { return nil } @@ -176,6 +198,75 @@ func recursiveSymlinks(afs FS, mountpoint string, path string) ([]string, error) return paths, nil } +// SameDirSymlinks returns all links in the same directory as `target` that +// point to target, either indirectly or directly. Only symlinks in the same +// directory as `target` are considered. +func SameDirSymlinks(afs FS, target string) ([]string, error) { + // Get the list of files in the directory of the target. + fis, err := afero.ReadDir(afs, filepath.Dir(target)) + if err != nil { + return nil, xerrors.Errorf("read dir %q: %w", filepath.Dir(target), err) + } + + // Do an initial pass to map all symlinks to their destinations. + allLinks := make(map[string]string) + for _, fi := range fis { + // Ignore non-symlinks. + if fi.Mode()&os.ModeSymlink == 0 { + continue + } + + absPath := filepath.Join(filepath.Dir(target), fi.Name()) + link, err := afs.Readlink(filepath.Join(filepath.Dir(target), fi.Name())) + if err != nil { + return nil, xerrors.Errorf("readlink %q: %w", fi.Name(), err) + } + + if !filepath.IsAbs(link) { + link = filepath.Join(filepath.Dir(target), link) + } + allLinks[absPath] = link + } + + // Now we can start checking for symlinks that point to the target. + var ( + found = make([]string, 0) + // Set an arbitrary upper limit to prevent infinite loops. + maxIterations = 10 + ) + for range maxIterations { + var foundThisTime bool + for linkName, linkDest := range allLinks { + // Ignore symlinks that point outside of target's directory. + if filepath.Dir(linkName) != filepath.Dir(target) { + continue + } + + // If the symlink points to the target, add it to the list. + if linkDest == target { + if !slices.Contains(found, linkName) { + found = append(found, linkName) + foundThisTime = true + } + } + + // If the symlink points to another symlink that we already determined + // points to the target, add it to the list. + if slices.Contains(found, linkDest) { + if !slices.Contains(found, linkName) { + found = append(found, linkName) + foundThisTime = true + } + } + } + // If we didn't find any new symlinks, we're done. + if !foundThisTime { + break + } + } + return found, nil +} + // TryUnmountProcGPUDrivers unmounts any GPU-related mounts under /proc as it causes // issues when creating any container in some cases. Errors encountered while // unmounting are treated as non-fatal. diff --git a/xunix/gpu_test.go b/xunix/gpu_test.go index f8d8d47..4324fcf 100644 --- a/xunix/gpu_test.go +++ b/xunix/gpu_test.go @@ -2,10 +2,13 @@ package xunix_test import ( "context" + "os" "path/filepath" + "sort" "testing" "github.com/spf13/afero" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "k8s.io/mount-utils" @@ -59,11 +62,17 @@ func TestGPUs(t *testing.T) { filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"), } - // fakeUsrLibFiles are files that should be written to the "mounted" - // /usr/lib directory. It includes files that shouldn't be returned. - fakeUsrLibFiles = append([]string{ + // fakeUsrLibFiles are files that we do not expect to be returned + // bind mounts for. + fakeUsrLibFiles = []string{ filepath.Join(usrLibMountpoint, "libcurl-gnutls.so"), - }, expectedUsrLibFiles...) + filepath.Join(usrLibMountpoint, "libglib.so"), + } + + // allUsrLibFiles are all the files that should be written to the + // "mounted" /usr/lib directory. It includes files that shouldn't + // be returned. + allUsrLibFiles = append(expectedUsrLibFiles, fakeUsrLibFiles...) ) ctx := xunix.WithFS(context.Background(), fs) @@ -90,10 +99,14 @@ func TestGPUs(t *testing.T) { err := fs.MkdirAll(filepath.Join(usrLibMountpoint, "nvidia"), 0o755) require.NoError(t, err) - for _, file := range fakeUsrLibFiles { + for _, file := range allUsrLibFiles { _, err = fs.Create(file) require.NoError(t, err) } + for _, mp := range mounter.MountPoints { + _, err = fs.Create(mp.Path) + require.NoError(t, err) + } devices, binds, err := xunix.GPUs(ctx, log, usrLibMountpoint) require.NoError(t, err) @@ -110,5 +123,111 @@ func TestGPUs(t *testing.T) { Opts: []string{"ro"}, }) } + for _, file := range fakeUsrLibFiles { + require.NotContains(t, binds, mount.MountPoint{ + Path: file, + Opts: []string{"ro"}, + }) + } }) } + +func Test_SameDirSymlinks(t *testing.T) { + t.Parallel() + + var ( + ctx = context.Background() + // We need to test with a real filesystem as the fake one doesn't + // support creating symlinks. + tmpDir = t.TempDir() + // We do test with the interface though! + afs = xunix.GetFS(ctx) + ) + + // Create some files in the temporary directory. + _, err := os.Create(filepath.Join(tmpDir, "file1.real")) + require.NoError(t, err, "create file") + _, err = os.Create(filepath.Join(tmpDir, "file2.real")) + require.NoError(t, err, "create file2") + _, err = os.Create(filepath.Join(tmpDir, "file3.real")) + require.NoError(t, err, "create file3") + _, err = os.Create(filepath.Join(tmpDir, "file4.real")) + require.NoError(t, err, "create file4") + + // Create a symlink to the file in the temporary directory. + // This needs to be done by the real os package. + err = os.Symlink(filepath.Join(tmpDir, "file1.real"), filepath.Join(tmpDir, "file1.link1")) + require.NoError(t, err, "create first symlink") + + // Create another symlink to the previous symlink. + err = os.Symlink(filepath.Join(tmpDir, "file1.link1"), filepath.Join(tmpDir, "file1.link2")) + require.NoError(t, err, "create second symlink") + + // Create a symlink to a file outside of the temporary directory. + err = os.MkdirAll(filepath.Join(tmpDir, "dir"), 0o755) + require.NoError(t, err, "create dir") + // Create a symlink from file2 to inside the dir. + err = os.Symlink(filepath.Join(tmpDir, "file2.real"), filepath.Join(tmpDir, "dir", "file2.link1")) + require.NoError(t, err, "create dir symlink") + + // Create a symlink with a relative path. To do this, we need to + // change the working directory to the temporary directory. + oldWorkingDir, err := os.Getwd() + require.NoError(t, err, "get working dir") + // Change the working directory to the temporary directory. + require.NoError(t, os.Chdir(tmpDir), "change working dir") + err = os.Symlink(filepath.Join(tmpDir, "file4.real"), "file4.link1") + require.NoError(t, err, "create relative symlink") + // Change the working directory back to the original. + require.NoError(t, os.Chdir(oldWorkingDir), "change working dir back") + + for _, tt := range []struct { + name string + expected []string + }{ + { + // Two symlinks to the same file. + name: "file1.real", + expected: []string{ + filepath.Join(tmpDir, "file1.link1"), + filepath.Join(tmpDir, "file1.link2"), + }, + }, + { + // Mid-way in the symlink chain. + name: "file1.link1", + expected: []string{ + filepath.Join(tmpDir, "file1.link2"), + }, + }, + { + // End of the symlink chain. + name: "file1.link2", + expected: []string{}, + }, + { + // Symlink to a file outside of the temporary directory. + name: "file2.real", + expected: []string{}, + }, + { + // No symlinks to this file. + name: "file3.real", + expected: []string{}, + }, + { + // One relative symlink. + name: "file4.real", + expected: []string{filepath.Join(tmpDir, "file4.link1")}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + fullPath := filepath.Join(tmpDir, tt.name) + actual, err := xunix.SameDirSymlinks(afs, fullPath) + require.NoError(t, err, "find symlink") + sort.Strings(actual) + assert.Equal(t, tt.expected, actual, "find symlinks %q", tt.name) + }) + } +} From 2f38dbc66e9246a9adf55490cdc36b2b3845febc Mon Sep 17 00:00:00 2001 From: Bjorn Robertsson Date: Fri, 7 Mar 2025 12:56:01 +0000 Subject: [PATCH 5/7] chore(README.md): document CODER_BOOTSTRAP_SCRIPT (#130) Adds missing docs regarding CODER_BOOTSTRAP_SCRIPT in README Co-authored-by: Cian Johnston --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 437e381..4fb1ef9 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ The environment variables can be used to configure various aspects of the inner | `CODER_INNER_HOSTNAME` | The hostname to use for the inner container. | false | | `CODER_IMAGE_PULL_SECRET` | The docker credentials to use when pulling the inner container. The recommended way to do this is to create an [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) and then reference the secret using an [environment variable](https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#define-container-environment-variables-using-secret-data). See below for example. | false | | `CODER_DOCKER_BRIDGE_CIDR` | The bridge CIDR to start the Docker daemon with. | false | +| `CODER_BOOTSTRAP_SCRIPT` | The script to use to bootstrap the container. This should typically install and start the agent. | | +| false | | | | `CODER_MOUNTS` | A list of mounts to mount into the inner container. Mounts default to `rw`. Ex: `CODER_MOUNTS=/home/coder:/home/coder,/var/run/mysecret:/var/run/mysecret:ro` | false | | `CODER_USR_LIB_DIR` | The mountpoint of the host `/usr/lib` directory. Only required when using GPUs. | false | | `CODER_INNER_USR_LIB_DIR` | The inner /usr/lib mountpoint. This is automatically detected based on `/etc/os-release` in the inner image, but may optionally be overridden. | false | From c72a26466a5e9f4f952146d7bb60f7ffac71f806 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 7 Mar 2025 15:28:36 +0000 Subject: [PATCH 6/7] chore(README.md): fix row formatting (#131) --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 4fb1ef9..d09c4a5 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,7 @@ The environment variables can be used to configure various aspects of the inner | `CODER_INNER_HOSTNAME` | The hostname to use for the inner container. | false | | `CODER_IMAGE_PULL_SECRET` | The docker credentials to use when pulling the inner container. The recommended way to do this is to create an [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) and then reference the secret using an [environment variable](https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#define-container-environment-variables-using-secret-data). See below for example. | false | | `CODER_DOCKER_BRIDGE_CIDR` | The bridge CIDR to start the Docker daemon with. | false | -| `CODER_BOOTSTRAP_SCRIPT` | The script to use to bootstrap the container. This should typically install and start the agent. | | -| false | | | +| `CODER_BOOTSTRAP_SCRIPT` | The script to use to bootstrap the container. This should typically install and start the agent. | false | | `CODER_MOUNTS` | A list of mounts to mount into the inner container. Mounts default to `rw`. Ex: `CODER_MOUNTS=/home/coder:/home/coder,/var/run/mysecret:/var/run/mysecret:ro` | false | | `CODER_USR_LIB_DIR` | The mountpoint of the host `/usr/lib` directory. Only required when using GPUs. | false | | `CODER_INNER_USR_LIB_DIR` | The inner /usr/lib mountpoint. This is automatically detected based on `/etc/os-release` in the inner image, but may optionally be overridden. | false | From 8e68914158e856de652699ab314c9b1b23010993 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Sun, 16 Mar 2025 22:48:14 -0400 Subject: [PATCH 7/7] fix: correctly parse image digest (#133) --- cli/docker.go | 13 ++++++++++--- cli/docker_test.go | 5 +++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cli/docker.go b/cli/docker.go index 2f6ec5f..9ac872c 100644 --- a/cli/docker.go +++ b/cli/docker.go @@ -399,14 +399,14 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client if err != nil { return xerrors.Errorf("set oom score: %w", err) } - ref, err := name.NewTag(flags.innerImage) + ref, err := name.ParseReference(flags.innerImage) if err != nil { return xerrors.Errorf("parse ref: %w", err) } var dockerAuth dockerutil.AuthConfig if flags.imagePullSecret != "" { - dockerAuth, err = dockerutil.AuthConfigFromString(flags.imagePullSecret, ref.RegistryStr()) + dockerAuth, err = dockerutil.AuthConfigFromString(flags.imagePullSecret, ref.Context().RegistryStr()) if err != nil { return xerrors.Errorf("parse auth config: %w", err) } @@ -415,7 +415,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client log.Info(ctx, "checking for docker config file", slog.F("path", flags.dockerConfig)) if _, err := fs.Stat(flags.dockerConfig); err == nil { log.Info(ctx, "detected file", slog.F("image", flags.innerImage)) - dockerAuth, err = dockerutil.AuthConfigFromPath(flags.dockerConfig, ref.RegistryStr()) + dockerAuth, err = dockerutil.AuthConfigFromPath(flags.dockerConfig, ref.Context().RegistryStr()) if err != nil && !xerrors.Is(err, os.ErrNotExist) { return xerrors.Errorf("auth config from file: %w", err) } @@ -656,6 +656,13 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client } blog.Info("Creating workspace...") + // If imgMeta.HasInit is true, we just use flags.boostrapScript as the entrypoint. + // But if it's false, we need to run /sbin/init as the entrypoint. + // We need to mount or run some exec command that injects a systemd service for starting + // the coder agent. + + // We need to check that if PID1 is systemd (or /sbin/init) that systemd propagates SIGTERM + // to service units. If it doesn't then this solution doesn't help us. // Create the inner container. containerID, err := dockerutil.CreateContainer(ctx, client, &dockerutil.ContainerConfig{ diff --git a/cli/docker_test.go b/cli/docker_test.go index 88e9bc3..9f05201 100644 --- a/cli/docker_test.go +++ b/cli/docker_test.go @@ -110,6 +110,11 @@ func TestDocker(t *testing.T) { image: "gcr.io/ubuntu:24.04", success: true, }, + { + name: "RegistryRepositorySha", + image: "gcr.io/images/helloworld@sha256:13e101dd511a26a2147e123456bdff5845c9461aaa53d856845745b063001234", + success: true, + }, } for _, tc := range testcases {