pytorch
diff --git a/‎.github/ci_commit_pins/xla.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/xla.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 5 additions & 5 deletions b/‎CMakeLists.txt
Lines changed: 5 additions & 5 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/autocast_mode.cpp
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/autocast_mode.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/detail/MTIAHooksInterface.cpp
Lines changed: 4 additions & 0 deletions b/‎aten/src/ATen/detail/MTIAHooksInterface.cpp
Lines changed: 4 additions & 0 deletions
diff --git a/‎aten/src/ATen/detail/MTIAHooksInterface.h
Lines changed: 2 additions & 0 deletions b/‎aten/src/ATen/detail/MTIAHooksInterface.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/mps/operations/BinaryKernel.mm
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/native/mps/operations/BinaryKernel.mm
Lines changed: 1 addition & 0 deletions
diff --git a/‎c10/cuda/CUDAFunctions.cpp
Lines changed: 1 addition & 3 deletions b/‎c10/cuda/CUDAFunctions.cpp
Lines changed: 1 addition & 3 deletions
diff --git a/‎c10/cuda/CUDAStream.cpp
Lines changed: 0 additions & 3 deletions b/‎c10/cuda/CUDAStream.cpp
Lines changed: 0 additions & 3 deletions
diff --git a/‎cmake/BLAS_ABI.cmake
Lines changed: 1 addition & 0 deletions b/‎cmake/BLAS_ABI.cmake
Lines changed: 1 addition & 0 deletions
@@ -1 +1 @@
-b6a5b82b9948b610fa4c304d0d869c82b8f17db1
+095faec1e7b6cc47220181e74ae9cde2605f9b00
@@ -253,7 +253,6 @@ cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
 option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
-option(USE_FAKELOWP "Use FakeLowp operators" OFF)
 option(USE_GFLAGS "Use GFLAGS" OFF)
 option(USE_GLOG "Use GLOG" OFF)
 option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
@@ -836,10 +835,11 @@ include(ExternalProject)
 
 # ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
 # CMAKE_SYSTEM_PROCESSOR thinks its 64bit
-if(USE_FBGEMM
-   AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
-                                                      4)
-        OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
+if(USE_FBGEMM AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+  message(WARNING
+    "x64 operating system is required for FBGEMM. "
+    "Not compiling with FBGEMM. "
+    "Turn this warning off by USE_FBGEMM=OFF.")
   set(USE_FBGEMM OFF)
 endif()
 
 
@@ -243,7 +243,7 @@ git submodule update --init --recursive
 
 ```bash
 conda install cmake ninja
-# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section below
+# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section above
 pip install -r requirements.txt
 ```
 
 
@@ -239,6 +239,7 @@ TORCH_LIBRARY_IMPL(aten, AutocastMPS, m) {
   KERNEL_MPS(scaled_dot_product_attention, lower_precision_fp)
 
   // fp32
+  KERNEL_MPS(conv_transpose3d, input, fp32)
   KERNEL_MPS(acos, fp32)
   KERNEL_MPS(asin, fp32)
   KERNEL_MPS(cosh, fp32)
 
@@ -21,6 +21,10 @@ bool isMTIAHooksBuilt() {
 
 } // namespace detail
 
+bool MTIAHooksInterface::isAvailable() const {
+  return detail::isMTIAHooksBuilt() && detail::getMTIAHooks().deviceCount() > 0;
+}
+
 C10_DEFINE_REGISTRY(MTIAHooksRegistry, MTIAHooksInterface, MTIAHooksArgs)
 
 } // namespace at
@@ -149,6 +149,8 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
     FAIL_MTIAHOOKS_FUNC(__func__);
     return;
   }
+
+  virtual bool isAvailable() const override;
 };
 
 struct TORCH_API MTIAHooksArgs {};
 
@@ -53,6 +53,7 @@ void binary_op_kernel(const std::string func_name,
                   .add_input(input)
                   .add_input(other)
                   .check_all_same_dtype(false)
+                  .promote_inputs_to_common_dtype(true)
                   .build();
 
   lib.exec_binary_kernel(iter, func_name, alpha);
 
@@ -53,21 +53,19 @@ int device_count_impl(bool fail_if_no_driver) {
             "https://pytorch.org to install a PyTorch version that has been "
             "compiled with your version of the CUDA driver.");
       }
-    } break;
+    }
     case cudaErrorInitializationError:
       TORCH_CHECK(
           false,
           "CUDA driver initialization failed, you might not "
           "have a CUDA gpu.");
-      break;
     case cudaErrorUnknown:
       TORCH_CHECK(
           false,
           "CUDA unknown error - this may be due to an "
           "incorrectly set up environment, e.g. changing env "
           "variable CUDA_VISIBLE_DEVICES after program start. "
           "Setting the available devices to be zero.");
-      break;
 #if C10_ASAN_ENABLED
     case cudaErrorMemoryAllocation:
       // In ASAN mode, we know that a cudaErrorMemoryAllocation error will
 
@@ -216,9 +216,6 @@ static void initSingleStream(int p, DeviceIndex device_index, int i) {
 // Creates the low and high priority stream pools for the specified device
 // Warning: only call once per device!
 static void initDeviceStreamState(DeviceIndex device_index) {
-  // Switches to the requested device so streams are properly associated
-  // with it.
-  CUDAGuard device_guard{device_index};
   for (const auto i : c10::irange(kStreamsPerPool)) {
     for (const auto p : c10::irange(max_stream_priorities)) {
       initSingleStream(p, device_index, i);
 
@@ -1,3 +1,4 @@
+include(CMakePushCheckState)
 # Push host architecture when cross-compiling otherwise check would fail
 # when cross-compiling for arm64 on x86_64
 cmake_push_check_state(RESET)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-b6a5b82b9948b610fa4c304d0d869c82b8f17db1`
	`1`	`+095faec1e7b6cc47220181e74ae9cde2605f9b00`
Original file line number	Diff line number	Diff line change
`@@ -149,6 +149,8 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {`
`149`	`149`	`FAIL_MTIAHOOKS_FUNC(__func__);`
`150`	`150`	`return;`
`151`	`151`	`}`
	`152`	`+`
	`153`	`+ virtual bool isAvailable() const override;`
`152`	`154`	`};`
`153`	`155`
`154`	`156`	`struct TORCH_API MTIAHooksArgs {};`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+include(CMakePushCheckState)`
`1`	`2`	`# Push host architecture when cross-compiling otherwise check would fail`
`2`	`3`	`# when cross-compiling for arm64 on x86_64`
`3`	`4`	`cmake_push_check_state(RESET)`