pytorch
diff --git a/‎.ci/docker/ci_commit_pins/triton.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/triton.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-docs.txt
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/requirements-docs.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/manywheel/build_rocm.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/manywheel/build_rocm.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/build.sh
Lines changed: 4 additions & 0 deletions b/‎.ci/pytorch/build.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/pytorch/common_utils.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/pytorch/common_utils.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/test.sh
Lines changed: 22 additions & 0 deletions b/‎.ci/pytorch/test.sh
Lines changed: 22 additions & 0 deletions
diff --git a/‎.github/actionlint.yaml
Lines changed: 3 additions & 3 deletions b/‎.github/actionlint.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/ci_commit_pins/audio.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/audio.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_commit_pins/vllm.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/vllm.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_commit_pins/xla.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/xla.txt
Lines changed: 1 addition & 1 deletion
@@ -1 +1 @@
-11ec6354315768a85da41032535e3b7b99c5f706
+f7888497a1eb9e98d4c07537f0d0bcfe180d1363
@@ -1,7 +1,7 @@
 sphinx==5.3.0
 #Description: This is used to generate PyTorch docs
 #Pinned versions: 5.3.0
--e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
+-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2
 
 # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
 # but it doesn't seem to work and hangs around idly. The initial thought that it is probably
@@ -50,7 +50,7 @@ IPython==8.12.0
 #Pinned versions: 8.12.0
 
 myst-nb==0.17.2
-#Description: This is used to generate PyTorch functorch and torch.compile docs
+#Description: This is used to generate PyTorch functorch and torch.compile docs.
 #Pinned versions: 0.17.2
 
 # The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
 
@@ -194,7 +194,7 @@ ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
 ROCBLAS_LIB_DST=lib/rocblas/library
 ROCBLAS_ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
 ROCBLAS_OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
-ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $OTHER_FILES)
+ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $ROCBLAS_OTHER_FILES)
 
 # hipblaslt library files
 HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library
 
@@ -50,6 +50,9 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
   export ATEN_THREADING=NATIVE
 fi
 
+# Enable LLVM dependency for TensorExpr testing
+export USE_LLVM=/opt/llvm
+export LLVM_DIR=/opt/llvm/lib/cmake/llvm
 
 if ! which conda; then
   # In ROCm CIs, we are doing cross compilation on build machines with
@@ -189,6 +192,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
   export USE_ASAN=1
   export REL_WITH_DEB_INFO=1
   export UBSAN_FLAGS="-fno-sanitize-recover=all"
+  unset USE_LLVM
 fi
 
 if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
 
@@ -245,7 +245,7 @@ function install_torchrec_and_fbgemm() {
     if [ "${found_whl}" == "0" ]; then
       git clone --recursive https://github.com/pytorch/fbgemm
       pushd fbgemm/fbgemm_gpu
-      git checkout "${fbgemm_commit}"
+      git checkout "${fbgemm_commit}" --recurse-submodules
       python setup.py bdist_wheel \
         --build-variant=rocm \
         -DHIP_ROOT_DIR="${ROCM_PATH}" \
 
@@ -627,6 +627,8 @@ test_perf_for_dashboard() {
     device=cuda_a10g
   elif [[ "${TEST_CONFIG}" == *h100* ]]; then
     device=cuda_h100
+  elif [[ "${TEST_CONFIG}" == *b200* ]]; then
+    device=cuda_b200
   elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
     device=rocm
   fi
@@ -801,6 +803,16 @@ test_dynamo_benchmark() {
   if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
     test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
   elif [[ "${TEST_CONFIG}" == *perf* ]]; then
+    # TODO (huydhn): Just smoke test some sample models
+    if [[ "${TEST_CONFIG}" == *b200* ]]; then
+      if [[ "${suite}" == "huggingface" ]]; then
+        export TORCHBENCH_ONLY_MODELS="DistillGPT2"
+      elif [[ "${suite}" == "timm_models" ]]; then
+        export TORCHBENCH_ONLY_MODELS="inception_v3"
+      elif [[ "${suite}" == "torchbench" ]]; then
+        export TORCHBENCH_ONLY_MODELS="hf_Bert"
+      fi
+    fi
     test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
   else
     if [[ "${TEST_CONFIG}" == *cpu* ]]; then
@@ -1039,10 +1051,20 @@ test_libtorch_api() {
     mkdir -p $TEST_REPORTS_DIR
 
     OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" "$TORCH_BIN_DIR"/test_api --gtest_filter='-IMethodTest.*' --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
+    "$TORCH_BIN_DIR"/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
   else
     # Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
     OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
 
+    # On s390x, pytorch is built without llvm.
+    # Even if it would be built with llvm, llvm currently doesn't support used features on s390x and
+    # test fails with errors like:
+    # JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer
+    # unknown file: Failure
+    # C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) }
+    if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
+      python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
+    fi
   fi
 
   # quantization is not fully supported on s390x yet
 
@@ -53,9 +53,9 @@ self-hosted-runner:
     - linux.rocm.gpu.mi250
     - linux.rocm.gpu.2
     - linux.rocm.gpu.4
-    # MI300 runners
-    - linux.rocm.gpu.mi300.2
-    - linux.rocm.gpu.mi300.4
+    # gfx942 runners
+    - linux.rocm.gpu.gfx942.2
+    - linux.rocm.gpu.gfx942.4
     - rocm-docker
     # Org wise AWS `mac2.metal` runners (2020 Mac mini hardware powered by Apple silicon M1 processors)
     - macos-m1-stable
 
@@ -1 +1 @@
-f6dfe1231dcdd221a68416e49ab85c2575cbb824
+9b57c7bd5ad4db093c5bb31c802df9f04d933ac9
@@ -1 +1 @@
-8f605ee30912541126c0fe46d0c8c413101b600a
+6a39ba85fe0f2fff9494b5eccea717c93510c230
@@ -1 +1 @@
-29ae4c76c026185f417a25e841d2cd5e65f087a3
+b6a5b82b9948b610fa4c304d0d869c82b8f17db1
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-11ec6354315768a85da41032535e3b7b99c5f706`
	`1`	`+f7888497a1eb9e98d4c07537f0d0bcfe180d1363`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-f6dfe1231dcdd221a68416e49ab85c2575cbb824`
	`1`	`+9b57c7bd5ad4db093c5bb31c802df9f04d933ac9`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-8f605ee30912541126c0fe46d0c8c413101b600a`
	`1`	`+6a39ba85fe0f2fff9494b5eccea717c93510c230`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-29ae4c76c026185f417a25e841d2cd5e65f087a3`
	`1`	`+b6a5b82b9948b610fa4c304d0d869c82b8f17db1`