Skip to content

Commit a2174ed

Browse files
Merge branch 'main' into PYT-996-update-upstream-opinfo-to-generate-appropriately-scaled-sample-inputs
2 parents e793cf1 + 178515d commit a2174ed

File tree

1,603 files changed

+64781
-32404
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,603 files changed

+64781
-32404
lines changed

.ci/aarch64_linux/build_aarch64_wheel.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,7 @@ def build_torchvision(
438438
)
439439
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
440440
elif build_version is not None:
441-
build_vars += (
442-
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
443-
)
441+
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
444442
if host.using_docker():
445443
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
446444

@@ -495,9 +493,7 @@ def build_torchdata(
495493
)
496494
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
497495
elif build_version is not None:
498-
build_vars += (
499-
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
500-
)
496+
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
501497
if host.using_docker():
502498
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
503499

@@ -553,9 +549,7 @@ def build_torchtext(
553549
)
554550
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
555551
elif build_version is not None:
556-
build_vars += (
557-
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
558-
)
552+
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
559553
if host.using_docker():
560554
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
561555

@@ -613,9 +607,7 @@ def build_torchaudio(
613607
)
614608
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
615609
elif build_version is not None:
616-
build_vars += (
617-
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
618-
)
610+
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
619611
if host.using_docker():
620612
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
621613

.ci/docker/README.md

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,104 @@ See `build.sh` for valid build environments (it's the giant switch).
3636
# Set flags (see build.sh) and build image
3737
sudo bash -c 'TRITON=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
3838
```
39+
40+
## [Guidance] Adding a New Base Docker Image
41+
42+
### Background
43+
44+
The base Docker images in directory `.ci/docker/` are built by the `docker-builds.yml` workflow. Those images are used throughout the PyTorch CI/CD pipeline. You should only create or modify a base Docker image if you need specific environment changes or dependencies before building PyTorch on CI.
45+
46+
1. **Automatic Rebuilding**:
47+
- The Docker image building process is triggered automatically when changes are made to files in the `.ci/docker/*` directory
48+
- This ensures all images stay up-to-date with the latest dependencies and configurations
49+
50+
2. **Image Reuse in PyTorch Build Workflows** (example: linux-build):
51+
- The images generated by `docker-builds.yml` are reused in `_linux-build.yml` through the `calculate-docker-image` step
52+
- The `_linux-build.yml` workflow:
53+
- Pulls the Docker image determined by the `calculate-docker-image` step
54+
- Runs a Docker container with that image
55+
- Executes `.ci/pytorch/build.sh` inside the container to build PyTorch
56+
57+
3. **Usage in Test Workflows** (example: linux-test):
58+
- The same Docker images are also used in `_linux-test.yml` for running tests
59+
- The `_linux-test.yml` workflow follows a similar pattern:
60+
- It uses the `calculate-docker-image` step to determine which Docker image to use
61+
- It pulls the Docker image and runs a container with that image
62+
- It installs the wheels from the artifacts generated by PyTorch build jobs
63+
- It executes test scripts (like `.ci/pytorch/test.sh` or `.ci/pytorch/multigpu-test.sh`) inside the container
64+
65+
### Understanding File Purposes
66+
67+
#### `.ci/docker/build.sh` vs `.ci/pytorch/build.sh`
68+
- **`.ci/docker/build.sh`**:
69+
- Used for building base Docker images
70+
- Executed by the `docker-builds.yml` workflow to pre-build Docker images for CI
71+
- Contains configurations for different Docker build environments
72+
73+
- **`.ci/pytorch/build.sh`**:
74+
- Used for building PyTorch inside a Docker container
75+
- Called by workflows like `_linux-build.yml` after the Docker container is started
76+
- Builds PyTorch wheels and other artifacts
77+
78+
#### `.ci/docker/ci_commit_pins/` vs `.github/ci_commit_pins`
79+
- **`.ci/docker/ci_commit_pins/`**:
80+
- Used for pinning dependency versions during base Docker image building
81+
- Ensures consistent environments for building PyTorch
82+
- Changes here trigger base Docker image rebuilds
83+
84+
- **`.github/ci_commit_pins`**:
85+
- Used for pinning dependency versions during PyTorch building and tests
86+
- Ensures consistent dependencies for PyTorch across different builds
87+
- Used by build scripts running inside Docker containers
88+
89+
### Step-by-Step Guide for Adding a New Base Docker Image
90+
91+
#### 1. Add Pinned Commits (If Applicable)
92+
93+
We use pinned commits for build stability. The `nightly.yml` workflow checks and updates pinned commits for certain repository dependencies daily.
94+
95+
If your new Docker image needs a library installed from a specific pinned commit or built from source:
96+
97+
1. Add the repository you want to track in `nightly.yml` and `merge-rules.yml`
98+
2. Add the initial pinned commit in `.ci/docker/ci_commit_pins/`. The text filename should match the one defined in step 1
99+
100+
#### 2. Configure the Base Docker Image
101+
1. **Add new Base Docker image configuration** (if applicable):
102+
103+
Add the configuration in `.ci/docker/build.sh`. For example:
104+
```bash
105+
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-new1)
106+
CUDA_VERSION=12.8.1
107+
ANACONDA_PYTHON_VERSION=3.12
108+
GCC_VERSION=11
109+
VISION=yes
110+
KATEX=yes
111+
UCX_COMMIT=${_UCX_COMMIT}
112+
UCC_COMMIT=${_UCC_COMMIT}
113+
TRITON=yes
114+
NEW_ARG_1=yes
115+
;;
116+
```
117+
118+
2. **Add build arguments to Docker build command**:
119+
120+
If you're introducing a new argument to the Docker build, make sure to add it in the Docker build step in `.ci/docker/build.sh`:
121+
```bash
122+
docker build \
123+
....
124+
--build-arg "NEW_ARG_1=${NEW_ARG_1}"
125+
```
126+
127+
3. **Update Dockerfile logic**:
128+
129+
Update the Dockerfile to use the new argument. For example, in `ubuntu/Dockerfile`:
130+
```dockerfile
131+
ARG NEW_ARG_1
132+
# Set up environment for NEW_ARG_1
133+
RUN if [ -n "${NEW_ARG_1}" ]; then bash ./do_something.sh; fi
134+
```
135+
136+
4. **Add the Docker configuration** in `.github/workflows/docker-builds.yml`:
137+
138+
The `docker-builds.yml` workflow pre-builds the Docker images whenever changes occur in the `.ci/docker/` directory. This includes the
139+
pinned commit updates.

.ci/docker/build.sh

Lines changed: 8 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ tag=$(echo $image | awk -F':' '{print $2}')
9393
case "$tag" in
9494
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11)
9595
CUDA_VERSION=12.4
96-
CUDNN_VERSION=9
9796
ANACONDA_PYTHON_VERSION=3.10
9897
GCC_VERSION=11
9998
VISION=yes
@@ -104,7 +103,6 @@ case "$tag" in
104103
;;
105104
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11)
106105
CUDA_VERSION=12.8.1
107-
CUDNN_VERSION=9
108106
ANACONDA_PYTHON_VERSION=3.10
109107
GCC_VERSION=11
110108
VISION=yes
@@ -115,7 +113,6 @@ case "$tag" in
115113
;;
116114
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
117115
CUDA_VERSION=12.8.1
118-
CUDNN_VERSION=9
119116
ANACONDA_PYTHON_VERSION=3.10
120117
GCC_VERSION=9
121118
VISION=yes
@@ -127,7 +124,6 @@ case "$tag" in
127124
;;
128125
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks)
129126
CUDA_VERSION=12.8.1
130-
CUDNN_VERSION=9
131127
ANACONDA_PYTHON_VERSION=3.12
132128
GCC_VERSION=9
133129
VISION=yes
@@ -139,7 +135,6 @@ case "$tag" in
139135
;;
140136
pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks)
141137
CUDA_VERSION=12.8.1
142-
CUDNN_VERSION=9
143138
ANACONDA_PYTHON_VERSION=3.13
144139
GCC_VERSION=9
145140
VISION=yes
@@ -149,56 +144,18 @@ case "$tag" in
149144
TRITON=yes
150145
INDUCTOR_BENCHMARKS=yes
151146
;;
152-
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9)
153-
CUDA_VERSION=12.6.3
154-
CUDNN_VERSION=9
155-
ANACONDA_PYTHON_VERSION=3.10
156-
GCC_VERSION=9
157-
VISION=yes
158-
KATEX=yes
159-
UCX_COMMIT=${_UCX_COMMIT}
160-
UCC_COMMIT=${_UCC_COMMIT}
161-
TRITON=yes
162-
;;
163-
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks)
164-
CUDA_VERSION=12.6
165-
CUDNN_VERSION=9
166-
ANACONDA_PYTHON_VERSION=3.10
167-
GCC_VERSION=9
168-
VISION=yes
169-
KATEX=yes
170-
UCX_COMMIT=${_UCX_COMMIT}
171-
UCC_COMMIT=${_UCC_COMMIT}
172-
TRITON=yes
173-
INDUCTOR_BENCHMARKS=yes
174-
;;
175-
pytorch-linux-jammy-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks)
176-
CUDA_VERSION=12.6
177-
CUDNN_VERSION=9
147+
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm)
148+
CUDA_VERSION=12.8.1
178149
ANACONDA_PYTHON_VERSION=3.12
179-
GCC_VERSION=9
180-
VISION=yes
181-
KATEX=yes
182-
UCX_COMMIT=${_UCX_COMMIT}
183-
UCC_COMMIT=${_UCC_COMMIT}
184-
TRITON=yes
185-
INDUCTOR_BENCHMARKS=yes
186-
;;
187-
pytorch-linux-jammy-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks)
188-
CUDA_VERSION=12.6
189-
CUDNN_VERSION=9
190-
ANACONDA_PYTHON_VERSION=3.13
191-
GCC_VERSION=9
150+
GCC_VERSION=11
192151
VISION=yes
193152
KATEX=yes
194153
UCX_COMMIT=${_UCX_COMMIT}
195154
UCC_COMMIT=${_UCC_COMMIT}
196155
TRITON=yes
197-
INDUCTOR_BENCHMARKS=yes
198156
;;
199157
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
200158
CUDA_VERSION=12.8.1
201-
CUDNN_VERSION=9
202159
ANACONDA_PYTHON_VERSION=3.10
203160
GCC_VERSION=9
204161
VISION=yes
@@ -219,19 +176,7 @@ case "$tag" in
219176
VISION=yes
220177
TRITON=yes
221178
;;
222-
pytorch-linux-jammy-py3.11-clang12)
223-
ANACONDA_PYTHON_VERSION=3.11
224-
CLANG_VERSION=12
225-
VISION=yes
226-
TRITON=yes
227-
;;
228-
pytorch-linux-jammy-py3.9-gcc9)
229-
ANACONDA_PYTHON_VERSION=3.9
230-
GCC_VERSION=9
231-
VISION=yes
232-
TRITON=yes
233-
;;
234-
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-noble-rocm-n-py3)
179+
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
235180
if [[ $tag =~ "jammy" ]]; then
236181
ANACONDA_PYTHON_VERSION=3.10
237182
else
@@ -245,7 +190,9 @@ case "$tag" in
245190
KATEX=yes
246191
UCX_COMMIT=${_UCX_COMMIT}
247192
UCC_COMMIT=${_UCC_COMMIT}
248-
INDUCTOR_BENCHMARKS=yes
193+
if [[ $tag =~ "benchmarks" ]]; then
194+
INDUCTOR_BENCHMARKS=yes
195+
fi
249196
;;
250197
pytorch-linux-noble-rocm-alpha-py3)
251198
ANACONDA_PYTHON_VERSION=3.12
@@ -257,7 +204,6 @@ case "$tag" in
257204
KATEX=yes
258205
UCX_COMMIT=${_UCX_COMMIT}
259206
UCC_COMMIT=${_UCC_COMMIT}
260-
INDUCTOR_BENCHMARKS=yes
261207
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
262208
;;
263209
pytorch-linux-jammy-xpu-2025.0-py3)
@@ -276,7 +222,7 @@ case "$tag" in
276222
NINJA_VERSION=1.9.0
277223
TRITON=yes
278224
;;
279-
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
225+
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
280226
ANACONDA_PYTHON_VERSION=3.9
281227
GCC_VERSION=11
282228
VISION=yes
@@ -288,7 +234,6 @@ case "$tag" in
288234
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12)
289235
ANACONDA_PYTHON_VERSION=3.9
290236
CUDA_VERSION=12.8.1
291-
CUDNN_VERSION=9
292237
CLANG_VERSION=12
293238
VISION=yes
294239
TRITON=yes
@@ -367,7 +312,6 @@ case "$tag" in
367312
fi
368313
if [[ "$image" == *cuda* ]]; then
369314
extract_version_from_image_name cuda CUDA_VERSION
370-
extract_version_from_image_name cudnn CUDNN_VERSION
371315
fi
372316
if [[ "$image" == *rocm* ]]; then
373317
extract_version_from_image_name rocm ROCM_VERSION
@@ -419,9 +363,6 @@ docker build \
419363
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
420364
--build-arg "GCC_VERSION=${GCC_VERSION}" \
421365
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
422-
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
423-
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
424-
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
425366
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
426367
--build-arg "KATEX=${KATEX:-}" \
427368
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \

.ci/docker/ci_commit_pins/triton.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ae848267bebc65c6181e8cc5e64a6357d2679260
1+
f7888497a1eb9e98d4c07537f0d0bcfe180d1363

.ci/docker/common/install_cpython.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ function do_cpython_build {
6666
ln -s pip3 ${prefix}/bin/pip
6767
fi
6868
# install setuptools since python 3.12 is required to use distutils
69-
${prefix}/bin/pip install wheel==0.45.1 setuptools==80.9.0
70-
local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
69+
# packaging is needed to create symlink since wheel no longer provides needed information
70+
${prefix}/bin/pip install packaging==25.0 wheel==0.45.1 setuptools==80.9.0
71+
local abi_tag=$(${prefix}/bin/python -c "from packaging.tags import interpreter_name, interpreter_version; import sysconfig ; from sysconfig import get_config_var; print('{0}{1}-{0}{1}{2}'.format(interpreter_name(), interpreter_version(), 't' if sysconfig.get_config_var('Py_GIL_DISABLED') else ''))")
7172
ln -sf ${prefix} /opt/python/${abi_tag}
7273
}
7374

.ci/docker/common/install_cuda.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ function install_nvshmem {
6868
# download, unpack, install
6969
wget -q "${url}"
7070
tar xf "${filename}.tar.gz"
71-
cp -a "libnvshmem/include/"* /usr/local/include/
72-
cp -a "libnvshmem/lib/"* /usr/local/lib/
71+
cp -a "libnvshmem/include/"* /usr/local/cuda/include/
72+
cp -a "libnvshmem/lib/"* /usr/local/cuda/lib64/
7373

7474
# cleanup
7575
cd ..

.ci/docker/common/install_cudnn.sh

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)