From 78d437359de3ffeb7a2dc806468e3433e62b0e0b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 28 Jul 2025 21:34:06 -0700 Subject: [PATCH] gh-83065: Fix import deadlock by implementing hierarchical module locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a deadlock in the import system that occurs when concurrent threads import modules at different levels of the same package hierarchy while `__init__.py` files have circular imports. The deadlock scenario (correctly analyzed by @emmatyping): - Thread 1 imports `package.subpackage`, which imports `package.subpackage.module` in its `__init__.py` - Thread 2 imports `package.subpackage.module`, which needs to ensure `package.subpackage` is loaded first - Each thread holds a lock the other needs, causing deadlock The fix introduces _HierarchicalLockManager that acquires all necessary module locks in a consistent order (parent before child) for nested modules. This ensures all threads acquire locks in the same order, preventing circular wait conditions. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- Lib/importlib/_bootstrap.py | 48 ++++++++++- .../test_importlib/test_threaded_import.py | 83 +++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 499da1e04efea8..a09abab6d716cb 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -424,6 +424,45 @@ def __exit__(self, *args, **kwargs): self._lock.release() +class _HierarchicalLockManager: + """Manages acquisition of multiple module locks in hierarchical order. + + This prevents deadlocks by ensuring all threads acquire locks in the + same order (parent modules before child modules). + """ + + def __init__(self, name): + self._name = name + self._module_chain = self._get_module_chain(name) + self._locks = [] + + def _get_module_chain(self, name): + """Get all modules in the hierarchy from root to leaf. + + For example: 'a.b.c' -> ['a', 'a.b', 'a.b.c'] + """ + parts = name.split('.') + return ['.'.join(parts[:i+1]) for i in range(len(parts))] + + def __enter__(self): + # Acquire locks for all modules in hierarchy order (parent to child) + for module_name in self._module_chain: + # Only acquire lock if module is not already fully loaded + module = sys.modules.get(module_name) + if (module is None or + getattr(getattr(module, "__spec__", None), "_initializing", False)): + lock = _get_module_lock(module_name) + lock.acquire() + self._locks.append((module_name, lock)) + return self + + def __exit__(self, *args, **kwargs): + # Release locks in reverse order (child to parent) + for module_name, lock in reversed(self._locks): + lock.release() + self._locks.clear() + + # The following two functions are for consumption by Python/import.c. def _get_module_lock(name): @@ -1365,7 +1404,14 @@ def _find_and_load(name, import_): module = sys.modules.get(name, _NEEDS_LOADING) if (module is _NEEDS_LOADING or getattr(getattr(module, "__spec__", None), "_initializing", False)): - with _ModuleLockManager(name): + + # Use hierarchical locking for nested modules to prevent deadlocks + if '.' in name: + lock_manager = _HierarchicalLockManager(name) + else: + lock_manager = _ModuleLockManager(name) + + with lock_manager: module = sys.modules.get(name, _NEEDS_LOADING) if module is _NEEDS_LOADING: return _find_and_load_unlocked(name, import_) diff --git a/Lib/test/test_importlib/test_threaded_import.py b/Lib/test/test_importlib/test_threaded_import.py index f78dc399720c86..373810b17b2caa 100644 --- a/Lib/test/test_importlib/test_threaded_import.py +++ b/Lib/test/test_importlib/test_threaded_import.py @@ -259,6 +259,89 @@ def test_multiprocessing_pool_circular_import(self, size): 'partial', 'pool_in_threads.py') script_helper.assert_python_ok(fn) + def test_hierarchical_import_deadlock(self): + # Regression test for bpo-38884 / gh-83065 + # Tests that concurrent imports at different hierarchy levels + # don't deadlock when parent imports child in __init__.py + + # Create package structure: + # package/__init__.py: from package import subpackage + # package/subpackage/__init__.py: from package.subpackage.module import * + # package/subpackage/module.py: class SomeClass: pass + + pkg_dir = os.path.join(TESTFN, 'hier_deadlock_pkg') + os.makedirs(pkg_dir) + self.addCleanup(shutil.rmtree, TESTFN) + + subpkg_dir = os.path.join(pkg_dir, 'subpackage') + os.makedirs(subpkg_dir) + + # Create package files + with open(os.path.join(pkg_dir, "__init__.py"), "w") as f: + f.write("from hier_deadlock_pkg import subpackage\n") + + with open(os.path.join(subpkg_dir, "__init__.py"), "w") as f: + f.write("from hier_deadlock_pkg.subpackage.module import *\n") + + with open(os.path.join(subpkg_dir, "module.py"), "w") as f: + f.write("class SomeClass:\n pass\n") + + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + self.addCleanup(forget, 'hier_deadlock_pkg') + self.addCleanup(forget, 'hier_deadlock_pkg.subpackage') + self.addCleanup(forget, 'hier_deadlock_pkg.subpackage.module') + + importlib.invalidate_caches() + + errors = [] + results = [] + + def t1(): + try: + import hier_deadlock_pkg.subpackage + results.append('t1_success') + except Exception as e: + errors.append(('t1', e)) + + def t2(): + try: + import hier_deadlock_pkg.subpackage.module + results.append('t2_success') + except Exception as e: + errors.append(('t2', e)) + + # Run multiple times to increase chance of hitting race condition + for i in range(10): + # Clear modules between runs + for mod in ['hier_deadlock_pkg', 'hier_deadlock_pkg.subpackage', + 'hier_deadlock_pkg.subpackage.module']: + sys.modules.pop(mod, None) + + errors.clear() + results.clear() + + thread1 = threading.Thread(target=t1) + thread2 = threading.Thread(target=t2) + + thread1.start() + thread2.start() + + thread1.join(timeout=5) + thread2.join(timeout=5) + + # Check that both threads completed successfully + if thread1.is_alive() or thread2.is_alive(): + self.fail(f"Threads deadlocked on iteration {i}") + + # No deadlock errors should occur + for thread_name, error in errors: + if isinstance(error, ImportError) and "deadlock" in str(error): + self.fail(f"Deadlock detected in {thread_name} on iteration {i}: {error}") + + # Both imports should succeed + self.assertEqual(len(results), 2, f"Not all imports succeeded on iteration {i}") + def setUpModule(): thread_info = threading_helper.threading_setup()