From 829389179f7ecee227516d08251e6731ba0d85a5 Mon Sep 17 00:00:00 2001 From: "Yu, Guangye" Date: Tue, 29 Jul 2025 15:42:33 +0000 Subject: [PATCH 1/2] Update [ghstack-poisoned] --- aten/src/ATen/core/CachingHostAllocator.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/aten/src/ATen/core/CachingHostAllocator.h b/aten/src/ATen/core/CachingHostAllocator.h index 5049018d731e..ee2832aee0c3 100644 --- a/aten/src/ATen/core/CachingHostAllocator.h +++ b/aten/src/ATen/core/CachingHostAllocator.h @@ -251,6 +251,7 @@ struct CachingHostAllocatorImpl { auto* block = reinterpret_cast(ctx); std::optional> events; + ska::flat_hash_set streams; { std::lock_guard g(block->mutex_); block->allocated_ = false; @@ -259,14 +260,19 @@ struct CachingHostAllocatorImpl { } else { events = std::vector(); events->reserve(block->streams_.size()); - for (auto stream : block->streams_) { - record_stream(events, stream); - } block->event_count_ += events->size(); + // Move out streams to avoid holding the mutex during event recording + streams = std::move(block->streams_); block->streams_.clear(); } } + // Event recording must be done outside the mutex to avoid potential + // deadlocks (e.g., when Python GIL is involved) + for (auto stream : streams) { + record_stream(events, stream); + } + if (!events) { auto index = size_index(block->size_); std::lock_guard g(free_list_[index].mutex_); From ccfbfae6dceec8565ee1b75d4fd548942722db06 Mon Sep 17 00:00:00 2001 From: "Yu, Guangye" Date: Tue, 29 Jul 2025 16:00:36 +0000 Subject: [PATCH 2/2] Update [ghstack-poisoned] --- aten/src/ATen/core/CachingHostAllocator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aten/src/ATen/core/CachingHostAllocator.h b/aten/src/ATen/core/CachingHostAllocator.h index ee2832aee0c3..a8f5f2fd7997 100644 --- a/aten/src/ATen/core/CachingHostAllocator.h +++ b/aten/src/ATen/core/CachingHostAllocator.h @@ -260,7 +260,7 @@ struct CachingHostAllocatorImpl { } else { events = std::vector(); events->reserve(block->streams_.size()); - block->event_count_ += events->size(); + block->event_count_ += block->streams_.size(); // Move out streams to avoid holding the mutex during event recording streams = std::move(block->streams_); block->streams_.clear();