From c314217f90ce5ba586b815cf9a4a7b4c82d31896 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Tue, 23 Apr 2024 23:36:41 +0100 Subject: [PATCH 1/7] wip --- Lib/inspect.py | 13 ++++++++++--- Lib/test/libregrtest/utils.py | 1 - 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index 422c09a92ad141..0105c1a0838f76 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -157,6 +157,7 @@ import types import functools import builtins +import weakref from keyword import iskeyword from operator import attrgetter from collections import namedtuple, OrderedDict @@ -1832,9 +1833,11 @@ def _check_class(klass, attr): return entry.__dict__[attr] return _sentinel + @functools.lru_cache() -def _shadowed_dict_from_mro_tuple(mro): - for entry in mro: +def _shadowed_dict_from_weakref_mro_tuple(weakref_mro): + for weakref_entry in weakref_mro: + entry = weakref_entry() dunder_dict = _get_dunder_dict_of_class(entry) if '__dict__' in dunder_dict: class_dict = dunder_dict['__dict__'] @@ -1844,8 +1847,12 @@ def _shadowed_dict_from_mro_tuple(mro): return class_dict return _sentinel + def _shadowed_dict(klass): - return _shadowed_dict_from_mro_tuple(_static_getmro(klass)) + return _shadowed_dict_from_weakref_mro_tuple( + tuple(map(weakref.ref, _static_getmro(klass))) + ) + def getattr_static(obj, attr, default=_sentinel): """Retrieve attributes without triggering dynamic lookup via the diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py index 791f996127ea58..e0bcd18100532f 100644 --- a/Lib/test/libregrtest/utils.py +++ b/Lib/test/libregrtest/utils.py @@ -275,7 +275,6 @@ def clear_caches(): except KeyError: pass else: - inspect._shadowed_dict_from_mro_tuple.cache_clear() inspect._filesbymodname.clear() inspect.modulesbyfile.clear() From e4b29893e611f0bd2b36dadcc5ce808e76d61d34 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Tue, 23 Apr 2024 23:41:14 +0100 Subject: [PATCH 2/7] wip --- Lib/inspect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index 0105c1a0838f76..d5da6c0012c5a7 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -1850,7 +1850,7 @@ def _shadowed_dict_from_weakref_mro_tuple(weakref_mro): def _shadowed_dict(klass): return _shadowed_dict_from_weakref_mro_tuple( - tuple(map(weakref.ref, _static_getmro(klass))) + tuple([weakref.ref(entry) for entry in _static_getmro(klass)]) ) From 57da345a8d904705e82b141ef8a69f28cf60c634 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Tue, 23 Apr 2024 23:43:12 +0100 Subject: [PATCH 3/7] wip --- Lib/inspect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index d5da6c0012c5a7..39ebc31f59fcdd 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -1835,7 +1835,7 @@ def _check_class(klass, attr): @functools.lru_cache() -def _shadowed_dict_from_weakref_mro_tuple(weakref_mro): +def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro): for weakref_entry in weakref_mro: entry = weakref_entry() dunder_dict = _get_dunder_dict_of_class(entry) @@ -1850,7 +1850,7 @@ def _shadowed_dict_from_weakref_mro_tuple(weakref_mro): def _shadowed_dict(klass): return _shadowed_dict_from_weakref_mro_tuple( - tuple([weakref.ref(entry) for entry in _static_getmro(klass)]) + *[weakref.ref(entry) for entry in _static_getmro(klass)] ) From 4df5d081be32e718a7fc72614ef8d16141a1fb0b Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Tue, 23 Apr 2024 23:49:47 +0100 Subject: [PATCH 4/7] wip --- Lib/inspect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index 39ebc31f59fcdd..616f7d0324ec94 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -157,10 +157,10 @@ import types import functools import builtins -import weakref from keyword import iskeyword from operator import attrgetter from collections import namedtuple, OrderedDict +from weakref import ref as make_weakref # Create constants for the compiler flags in Include/code.h # We try to get them from dis to avoid duplication @@ -1850,7 +1850,7 @@ def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro): def _shadowed_dict(klass): return _shadowed_dict_from_weakref_mro_tuple( - *[weakref.ref(entry) for entry in _static_getmro(klass)] + *[make_weakref(entry) for entry in _static_getmro(klass)] ) From 5847027b18817905987ba3ab807d30be4ab4feb5 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 24 Apr 2024 12:07:00 +0100 Subject: [PATCH 5/7] Tests and comments --- Lib/inspect.py | 12 ++++++++++++ Lib/test/test_inspect/test_inspect.py | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/Lib/inspect.py b/Lib/inspect.py index 616f7d0324ec94..3c346b27b1f06d 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -1837,6 +1837,11 @@ def _check_class(klass, attr): @functools.lru_cache() def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro): for weakref_entry in weakref_mro: + # Normally we'd have to check whether the result of weakref_entry() + # is None here, in case the object the weakref is pointing to has died. + # In this specific case, however, we know that the only caller of this + # function is `_shadowed_dict()`, and that therefore this weakref is + # guaranteed to point to an object that is still alive. entry = weakref_entry() dunder_dict = _get_dunder_dict_of_class(entry) if '__dict__' in dunder_dict: @@ -1849,6 +1854,13 @@ def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro): def _shadowed_dict(klass): + # gh-118013: the inner function here is decorated with lru_cache for + # performance reasons, *but* make sure not to pass strong references + # to the items in the mro. Doing so can lead to unexpected memory + # consumption in cases where classes are dynamically created and + # destroyed, and the dynamically created classes happen to be the only + # objects that hold strong references to other objects that take up a + # significant amount of memory. return _shadowed_dict_from_weakref_mro_tuple( *[make_weakref(entry) for entry in _static_getmro(klass)] ) diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index b2265e44e0c79b..169d1edb706fc3 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -4,6 +4,7 @@ import copy import datetime import functools +import gc import importlib import inspect import io @@ -25,6 +26,7 @@ import unittest import unittest.mock import warnings +import weakref try: @@ -2302,6 +2304,13 @@ def __dict__(self): self.assertEqual(inspect.getattr_static(foo, 'a'), 3) self.assertFalse(test.called) + class Bar(Foo): pass + + bar = Bar() + bar.a = 5 + self.assertEqual(inspect.getattr_static(bar, 'a'), 3) + self.assertFalse(test.called) + def test_mutated_mro(self): test = self test.called = False @@ -2406,6 +2415,21 @@ def __getattribute__(self, attr): self.assertFalse(test.called) + def test_cache_does_not_cause_classes_to_persist(self): + # regression test for gh-118013: + # check that the internal _shadowed_dict cache does not cause + # dynamically created classes to have extended lifetimes even + # when no other strong references to those classes remain. + # Since these classes can themselves hold strong references to + # other objects, this can cause unexpected memory consumption. + class Foo: pass + Foo.instance = Foo() + weakref_to_class = weakref.ref(Foo) + inspect.getattr_static(Foo.instance, 'whatever', 'irrelevant') + del Foo + gc.collect() + self.assertIsNone(weakref_to_class()) + class TestGetGeneratorState(unittest.TestCase): From 677577e07067e49ed03e61225ab98ea2c53bb9cd Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 24 Apr 2024 12:12:09 +0100 Subject: [PATCH 6/7] Clear the cache anyway after running `test_inspect`, for extra safety --- Lib/test/libregrtest/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py index e0bcd18100532f..8253d330b95b81 100644 --- a/Lib/test/libregrtest/utils.py +++ b/Lib/test/libregrtest/utils.py @@ -275,6 +275,7 @@ def clear_caches(): except KeyError: pass else: + inspect._shadowed_dict_from_weakref_mro_tuple.cache_clear() inspect._filesbymodname.clear() inspect.modulesbyfile.clear() From 6891c70cebf41b77300a6814adb9976305a9acf2 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 24 Apr 2024 12:21:03 +0100 Subject: [PATCH 7/7] docs --- Doc/whatsnew/3.12.rst | 7 +++---- .../2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst | 9 +++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index ce3d9ec6a29de8..8757311a484257 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -734,8 +734,7 @@ inspect * The performance of :func:`inspect.getattr_static` has been considerably improved. Most calls to the function should be at least 2x faster than they - were in Python 3.11, and some may be 6x faster or more. (Contributed by Alex - Waygood in :gh:`103193`.) + were in Python 3.11. (Contributed by Alex Waygood in :gh:`103193`.) itertools --------- @@ -1006,8 +1005,8 @@ typing :func:`runtime-checkable protocols ` has changed significantly. Most ``isinstance()`` checks against protocols with only a few members should be at least 2x faster than in 3.11, and some may be 20x - faster or more. However, ``isinstance()`` checks against protocols with fourteen - or more members may be slower than in Python 3.11. (Contributed by Alex + faster or more. However, ``isinstance()`` checks against protocols with many + members may be slower than in Python 3.11. (Contributed by Alex Waygood in :gh:`74690` and :gh:`103193`.) * All :data:`typing.TypedDict` and :data:`typing.NamedTuple` classes now have the diff --git a/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst b/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst new file mode 100644 index 00000000000000..8eb68ebe99ba15 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst @@ -0,0 +1,9 @@ +Fix regression introduced in gh-103193 that meant that calling +:func:`inspect.getattr_static` on an instance would cause a strong reference +to that instance's class to persist in an internal cache in the +:mod:`inspect` module. This caused unexpected memory consumption if the +class was dynamically created, the class held strong references to other +objects which took up a significant amount of memory, and the cache +contained the sole strong reference to the class. The fix for the regression +leads to a slowdown in :func:`getattr_static`, but the function should still +be signficantly faster than it was in Python 3.11. Patch by Alex Waygood.