Skip to content

Commit 50e4c5d

Browse files
committed
GH-106176: Fix reference leak when importing across multiple threads
1 parent 4eae1e5 commit 50e4c5d

File tree

2 files changed

+132
-12
lines changed

2 files changed

+132
-12
lines changed

Lib/importlib/_bootstrap.py

+128-12
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,132 @@ def _new_module(name):
5151

5252
# Module-level locking ########################################################
5353

54-
# A dict mapping module names to weakrefs of _ModuleLock instances
55-
# Dictionary protected by the global import lock
54+
# For a list that can have a weakref to it.
55+
class List(list):
56+
pass
57+
58+
59+
# From weakref.py with some simplifications.
60+
class WeakValueDictionary:
61+
62+
def __init__(self):
63+
self_weakref = _weakref.ref(self)
64+
65+
# Inlined to avoid issues with inheriting from _weakref before it's
66+
# set. Since there's only one instance of this class, this is
67+
# not expensive.
68+
class KeyedRef(_weakref.ref):
69+
70+
__slots__ = "key",
71+
72+
def __new__(type, ob, key):
73+
self = super().__new__(type, ob, type.remove)
74+
self.key = key
75+
return self
76+
77+
def __init__(self, ob, key):
78+
super().__init__(ob, self.remove)
79+
80+
@staticmethod
81+
def remove(wr):
82+
nonlocal self_weakref
83+
84+
self = self_weakref()
85+
if self is not None:
86+
if self._iterating:
87+
self._pending_removals.append(wr.key)
88+
else:
89+
# Atomic removal is necessary since this function
90+
# can be called asynchronously by the GC
91+
_weakref._remove_dead_weakref(self.data, wr.key)
92+
93+
self._KeyedRef = KeyedRef
94+
self.clear()
95+
96+
def clear(self):
97+
self._pending_removals = []
98+
self._iterating = set()
99+
self.data = {}
100+
101+
def _commit_removals(self):
102+
pop = self._pending_removals.pop
103+
d = self.data
104+
# We shouldn't encounter any KeyError, because this method should
105+
# always be called *before* mutating the dict.
106+
while True:
107+
try:
108+
key = pop()
109+
except IndexError:
110+
return
111+
_weakref._remove_dead_weakref(d, key)
112+
113+
def __getitem__(self, key):
114+
if self._pending_removals:
115+
self._commit_removals()
116+
o = self.data[key]()
117+
if o is None:
118+
raise KeyError(key)
119+
else:
120+
return o
121+
122+
def __delitem__(self, key):
123+
if self._pending_removals:
124+
self._commit_removals()
125+
del self.data[key]
126+
127+
def __repr__(self):
128+
return "<%s at %#x>" % (self.__class__.__name__, id(self))
129+
130+
def __setitem__(self, key, value):
131+
if self._pending_removals:
132+
self._commit_removals()
133+
self.data[key] = self._KeyedRef(value, key)
134+
135+
def get(self, key, default=None):
136+
if self._pending_removals:
137+
self._commit_removals()
138+
try:
139+
wr = self.data[key]
140+
except KeyError:
141+
return default
142+
else:
143+
o = wr()
144+
if o is None:
145+
# This should only happen
146+
return default
147+
else:
148+
return o
149+
150+
def setdefault(self, key, default=None):
151+
try:
152+
o = self.data[key]()
153+
except KeyError:
154+
o = None
155+
if o is None:
156+
if self._pending_removals:
157+
self._commit_removals()
158+
self.data[key] = self._KeyedRef(default, key)
159+
return default
160+
else:
161+
return o
162+
163+
164+
# A dict mapping module names to weakrefs of _ModuleLock instances.
165+
# Dictionary protected by the global import lock.
56166
_module_locks = {}
57167

58-
# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
59-
# thread to the module locks it is blocking on acquiring. The values are
60-
# lists because a single thread could perform a re-entrant import and be "in
61-
# the process" of blocking on locks for more than one module. A thread can
62-
# be "in the process" because a thread cannot actually block on acquiring
63-
# more than one lock but it can have set up bookkeeping that reflects that
64-
# it intends to block on acquiring more than one lock.
65-
_blocking_on = {}
168+
# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
169+
# This maps a thread to the module locks it is blocking on acquiring. The
170+
# values are lists because a single thread could perform a re-entrant import
171+
# and be "in the process" of blocking on locks for more than one module. A
172+
# thread can be "in the process" because a thread cannot actually block on
173+
# acquiring more than one lock but it can have set up bookkeeping that reflects
174+
# that it intends to block on acquiring more than one lock.
175+
#
176+
# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
177+
# lists around, regardless of GC runs. This way there's no memory leak if
178+
# the list is no longer needed.
179+
_blocking_on = None
66180

67181

68182
class _BlockingOnManager:
@@ -79,7 +193,7 @@ def __enter__(self):
79193
# re-entrant (i.e., a single thread may take it more than once) so it
80194
# wouldn't help us be correct in the face of re-entrancy either.
81195

82-
self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
196+
self.blocked_on = _blocking_on.setdefault(self.thread_id, List())
83197
self.blocked_on.append(self.lock)
84198

85199
def __exit__(self, *args, **kwargs):
@@ -1409,7 +1523,7 @@ def _setup(sys_module, _imp_module):
14091523
modules, those two modules must be explicitly passed in.
14101524
14111525
"""
1412-
global _imp, sys
1526+
global _imp, sys, _blocking_on
14131527
_imp = _imp_module
14141528
sys = sys_module
14151529

@@ -1437,6 +1551,8 @@ def _setup(sys_module, _imp_module):
14371551
builtin_module = sys.modules[builtin_name]
14381552
setattr(self_module, builtin_name, builtin_module)
14391553

1554+
_blocking_on = WeakValueDictionary()
1555+
14401556

14411557
def _install(sys_module, _imp_module):
14421558
"""Install importers for builtin and frozen modules"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Use a ``WeakValueDictionary`` to track the lists containing the modules each
2+
thread is currently importing. This helps avoid a reference leak from
3+
keeping the list around longer than necessary. Weakrefs are used as GC can't
4+
interrupt the cleanup.

0 commit comments

Comments
 (0)