Skip to content

Commit c33e136

Browse files
committed
Merge branch 'refs/heads/main' into moksiucik_torchrun_xpu
2 parents aa0a90b + 2ffb510 commit c33e136

File tree

202 files changed

+7951
-4823
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+7951
-4823
lines changed

.github/actionlint.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ self-hosted-runner:
5353
- linux.rocm.gpu.mi250
5454
- linux.rocm.gpu.2
5555
- linux.rocm.gpu.4
56-
# MI300 runners
57-
- linux.rocm.gpu.mi300.2
58-
- linux.rocm.gpu.mi300.4
56+
# gfx942 runners
57+
- linux.rocm.gpu.gfx942.2
58+
- linux.rocm.gpu.gfx942.4
5959
- rocm-docker
6060
# Org wise AWS `mac2.metal` runners (2020 Mac mini hardware powered by Apple silicon M1 processors)
6161
- macos-m1-stable

.github/ci_commit_pins/vllm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
8f605ee30912541126c0fe46d0c8c413101b600a
1+
ca9e2be3ed6320b51f52f536595cd24e254f8bb2

.github/scripts/trymerge.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1891,7 +1891,9 @@ def validate_revert(
18911891
else pr.get_comment_by_id(comment_id)
18921892
)
18931893
if comment.editor_login is not None:
1894-
raise PostCommentError("Don't want to revert based on edited command")
1894+
raise PostCommentError(
1895+
"Halting the revert as the revert comment has been edited."
1896+
)
18951897
author_association = comment.author_association
18961898
author_login = comment.author_login
18971899
allowed_reverters = ["COLLABORATOR", "MEMBER", "OWNER"]

.github/workflows/_rocm-test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@ jobs:
269269
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
270270
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
271271
272-
- name: Change permissions (only needed for MI300 and MI355 kubernetes runners for now)
273-
if: ${{ always() && steps.test.conclusion && (contains(matrix.runner, 'mi300') || contains(matrix.runner, 'mi355')) }}
272+
- name: Change permissions (only needed for kubernetes runners for now)
273+
if: ${{ always() && steps.test.conclusion && (contains(matrix.runner, 'gfx942') || contains(matrix.runner, 'mi355')) }}
274274
run: |
275275
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1001:1001 test"
276276

.github/workflows/inductor-perf-test-nightly-rocm.yml

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -88,23 +88,23 @@ jobs:
8888
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
8989
test-matrix: |
9090
{ include: [
91-
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
92-
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
93-
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
94-
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
95-
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
96-
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
97-
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
98-
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
99-
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
100-
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
101-
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
102-
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
103-
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
104-
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
105-
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
106-
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
107-
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
91+
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
92+
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
93+
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
94+
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
95+
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
96+
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
97+
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
98+
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
99+
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
100+
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
101+
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
102+
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
103+
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
104+
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
105+
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
106+
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
107+
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
108108
]}
109109
secrets: inherit
110110

.github/workflows/inductor-rocm-mi300.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ jobs:
4747
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
4848
test-matrix: |
4949
{ include: [
50-
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
51-
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
50+
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
51+
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
5252
]}
5353
secrets: inherit
5454

.github/workflows/periodic-rocm-mi300.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ jobs:
5959
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
6060
test-matrix: |
6161
{ include: [
62-
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
63-
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
64-
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
62+
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
63+
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
64+
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
6565
]}
6666
secrets: inherit
6767

.github/workflows/rocm-mi300.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ jobs:
4848
sync-tag: rocm-build
4949
test-matrix: |
5050
{ include: [
51-
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
52-
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
53-
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
54-
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
55-
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
56-
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
51+
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
52+
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
53+
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
54+
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
55+
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
56+
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
5757
]}
5858
secrets: inherit
5959

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,14 @@ cmake_dependent_option(
872872
"USE_CUDA OR USE_ROCM;NOT MSVC"
873873
OFF)
874874

875+
cmake_dependent_option(
876+
USE_FBGEMM_GENAI
877+
"Whether to build FBGEMM GenAI quantized GEMM kernels.\
878+
Will be disabled if not supported by the platform"
879+
OFF
880+
"USE_CUDA OR USE_ROCM"
881+
OFF)
882+
875883
# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
876884
# Eff Attention won't
877885
cmake_dependent_option(
@@ -905,6 +913,10 @@ if(USE_FBGEMM)
905913
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
906914
endif()
907915

916+
if(USE_FBGEMM_GENAI)
917+
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM_GENAI")
918+
endif()
919+
908920
if(USE_PYTORCH_QNNPACK)
909921
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
910922
endif()

CODEOWNERS

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ nn/qat/ @jerryzh168
5151
/torch/csrc/distributed/c10d/Ops.* @kwen2501
5252

5353
# ONNX Export
54-
/torch/_dynamo/backends/onnxrt.py @wschin
55-
/torch/csrc/jit/passes/onnx.h @titaiwangms @shubhambhokare1
56-
/torch/csrc/jit/passes/onnx.cpp @titaiwangms @shubhambhokare1
57-
/torch/csrc/jit/passes/onnx/ @titaiwangms @shubhambhokare1
58-
/torch/onnx/ @titaiwangms @shubhambhokare1 @justinchuby @wschin
59-
/test/onnx/ @titaiwangms @shubhambhokare1 @justinchuby @wschin
54+
/torch/_dynamo/backends/onnxrt.py @titaiwangms @xadupre @justinchuby
55+
/torch/csrc/jit/passes/onnx.h @titaiwangms @xadupre
56+
/torch/csrc/jit/passes/onnx.cpp @titaiwangms @xadupre
57+
/torch/csrc/jit/passes/onnx/ @titaiwangms @xadupre
58+
/torch/onnx/ @titaiwangms @xadupre @justinchuby
59+
/test/onnx/ @titaiwangms @xadupre @justinchuby
6060

6161
# CI
6262
/.ci @pytorch/pytorch-dev-infra

0 commit comments

Comments
 (0)