Skip to content

Commit 5ed2137

Browse files
committed
Update
[ghstack-poisoned]
2 parents afaf3dc + 78fd036 commit 5ed2137

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+938
-770
lines changed

.github/ci_commit_pins/xla.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b6a5b82b9948b610fa4c304d0d869c82b8f17db1
1+
095faec1e7b6cc47220181e74ae9cde2605f9b00

CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
253253
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
254254
option(USE_KINETO "Use Kineto profiling library" ON)
255255
option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
256-
option(USE_FAKELOWP "Use FakeLowp operators" OFF)
257256
option(USE_GFLAGS "Use GFLAGS" OFF)
258257
option(USE_GLOG "Use GLOG" OFF)
259258
option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
@@ -836,10 +835,11 @@ include(ExternalProject)
836835

837836
# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
838837
# CMAKE_SYSTEM_PROCESSOR thinks its 64bit
839-
if(USE_FBGEMM
840-
AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
841-
4)
842-
OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
838+
if(USE_FBGEMM AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
839+
message(WARNING
840+
"x64 operating system is required for FBGEMM. "
841+
"Not compiling with FBGEMM. "
842+
"Turn this warning off by USE_FBGEMM=OFF.")
843843
set(USE_FBGEMM OFF)
844844
endif()
845845

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ git submodule update --init --recursive
243243

244244
```bash
245245
conda install cmake ninja
246-
# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section below
246+
# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section above
247247
pip install -r requirements.txt
248248
```
249249

aten/src/ATen/autocast_mode.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ TORCH_LIBRARY_IMPL(aten, AutocastMPS, m) {
239239
KERNEL_MPS(scaled_dot_product_attention, lower_precision_fp)
240240

241241
// fp32
242+
KERNEL_MPS(conv_transpose3d, input, fp32)
242243
KERNEL_MPS(acos, fp32)
243244
KERNEL_MPS(asin, fp32)
244245
KERNEL_MPS(cosh, fp32)

aten/src/ATen/detail/MTIAHooksInterface.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ bool isMTIAHooksBuilt() {
2121

2222
} // namespace detail
2323

24+
bool MTIAHooksInterface::isAvailable() const {
25+
return detail::isMTIAHooksBuilt() && detail::getMTIAHooks().deviceCount() > 0;
26+
}
27+
2428
C10_DEFINE_REGISTRY(MTIAHooksRegistry, MTIAHooksInterface, MTIAHooksArgs)
2529

2630
} // namespace at

aten/src/ATen/detail/MTIAHooksInterface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
149149
FAIL_MTIAHOOKS_FUNC(__func__);
150150
return;
151151
}
152+
153+
virtual bool isAvailable() const override;
152154
};
153155

154156
struct TORCH_API MTIAHooksArgs {};

aten/src/ATen/native/mps/operations/BinaryKernel.mm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ void binary_op_kernel(const std::string func_name,
5353
.add_input(input)
5454
.add_input(other)
5555
.check_all_same_dtype(false)
56+
.promote_inputs_to_common_dtype(true)
5657
.build();
5758

5859
lib.exec_binary_kernel(iter, func_name, alpha);

c10/cuda/CUDAFunctions.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,19 @@ int device_count_impl(bool fail_if_no_driver) {
5353
"https://pytorch.org to install a PyTorch version that has been "
5454
"compiled with your version of the CUDA driver.");
5555
}
56-
} break;
56+
}
5757
case cudaErrorInitializationError:
5858
TORCH_CHECK(
5959
false,
6060
"CUDA driver initialization failed, you might not "
6161
"have a CUDA gpu.");
62-
break;
6362
case cudaErrorUnknown:
6463
TORCH_CHECK(
6564
false,
6665
"CUDA unknown error - this may be due to an "
6766
"incorrectly set up environment, e.g. changing env "
6867
"variable CUDA_VISIBLE_DEVICES after program start. "
6968
"Setting the available devices to be zero.");
70-
break;
7169
#if C10_ASAN_ENABLED
7270
case cudaErrorMemoryAllocation:
7371
// In ASAN mode, we know that a cudaErrorMemoryAllocation error will

c10/cuda/CUDAStream.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,6 @@ static void initSingleStream(int p, DeviceIndex device_index, int i) {
216216
// Creates the low and high priority stream pools for the specified device
217217
// Warning: only call once per device!
218218
static void initDeviceStreamState(DeviceIndex device_index) {
219-
// Switches to the requested device so streams are properly associated
220-
// with it.
221-
CUDAGuard device_guard{device_index};
222219
for (const auto i : c10::irange(kStreamsPerPool)) {
223220
for (const auto p : c10::irange(max_stream_priorities)) {
224221
initSingleStream(p, device_index, i);

cmake/BLAS_ABI.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
include(CMakePushCheckState)
12
# Push host architecture when cross-compiling otherwise check would fail
23
# when cross-compiling for arm64 on x86_64
34
cmake_push_check_state(RESET)

0 commit comments

Comments
 (0)