Skip to content

GH-94822: Respect metaclasses when caching class attributes #94863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
294 changes: 294 additions & 0 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest


class TestLoadAttrCache(unittest.TestCase):
def test_descriptor_added_after_optimization(self):
class Descriptor:
Expand All @@ -21,3 +22,296 @@ def f(o):
Descriptor.__set__ = lambda *args: None

self.assertEqual(f(o), 2)

def test_metaclass_descriptor_added_after_optimization(self):
class Descriptor:
pass

class Metaclass(type):
attribute = Descriptor()

class Class(metaclass=Metaclass):
attribute = True

def __get__(self, instance, owner):
return False

def __set__(self, instance, value):
return None

def f():
return Class.attribute

for _ in range(1025):
self.assertTrue(f())

Descriptor.__get__ = __get__
Descriptor.__set__ = __set__

for _ in range(1025):
self.assertFalse(f())

def test_metaclass_descriptor_shadows_class_attribute(self):
class Metaclass(type):
@property
def attribute(self):
return True

class Class(metaclass=Metaclass):
attribute = False

def f():
return Class.attribute

for _ in range(1025):
self.assertTrue(f())

def test_metaclass_set_descriptor_after_optimization(self):
class Metaclass(type):
pass

class Class(metaclass=Metaclass):
attribute = True

@property
def attribute(self):
return False

def f():
return Class.attribute

for _ in range(1025):
self.assertTrue(f())

Metaclass.attribute = attribute

for _ in range(1025):
self.assertFalse(f())

def test_metaclass_del_descriptor_after_optimization(self):
class Metaclass(type):
@property
def attribute(self):
return True

class Class(metaclass=Metaclass):
attribute = False

def f():
return Class.attribute

for _ in range(1025):
self.assertTrue(f())

del Metaclass.attribute

for _ in range(1025):
self.assertFalse(f())

def test_type_descriptor_shadows_attribute_method(self):
class Class:
mro = None

def f():
return Class.mro

for _ in range(1025):
self.assertIsNone(f())

def test_type_descriptor_shadows_attribute_member(self):
class Class:
__base__ = None

def f():
return Class.__base__

for _ in range(1025):
self.assertIs(f(), object)

def test_type_descriptor_shadows_attribute_getset(self):
class Class:
__name__ = "Spam"

def f():
return Class.__name__

for _ in range(1025):
self.assertEqual(f(), "Class")

def test_metaclass_getattribute(self):
class Metaclass(type):
def __getattribute__(self, name):
return True

class Class(metaclass=Metaclass):
attribute = False

def f():
return Class.attribute

for _ in range(1025):
self.assertTrue(f())


class TestLoadMethodCache(unittest.TestCase):
def test_descriptor_added_after_optimization(self):
class Descriptor:
pass

class Class:
attribute = Descriptor()

def __get__(self, instance, owner):
return lambda: False

def __set__(self, instance, value):
return None

def attribute():
return True

instance = Class()
instance.attribute = attribute

def f():
return instance.attribute()

for _ in range(1025):
self.assertTrue(f())

Descriptor.__get__ = __get__
Descriptor.__set__ = __set__

for _ in range(1025):
self.assertFalse(f())

def test_metaclass_descriptor_added_after_optimization(self):
class Descriptor:
pass

class Metaclass(type):
attribute = Descriptor()

class Class(metaclass=Metaclass):
def attribute():
return True

def __get__(self, instance, owner):
return lambda: False

def __set__(self, instance, value):
return None

def f():
return Class.attribute()

for _ in range(1025):
self.assertTrue(f())

Descriptor.__get__ = __get__
Descriptor.__set__ = __set__

for _ in range(1025):
self.assertFalse(f())

def test_metaclass_descriptor_shadows_class_attribute(self):
class Metaclass(type):
@property
def attribute(self):
return lambda: True

class Class(metaclass=Metaclass):
def attribute():
return False

def f():
return Class.attribute()

for _ in range(1025):
self.assertTrue(f())

def test_metaclass_set_descriptor_after_optimization(self):
class Metaclass(type):
pass

class Class(metaclass=Metaclass):
def attribute():
return True

@property
def attribute(self):
return lambda: False

def f():
return Class.attribute()

for _ in range(1025):
self.assertTrue(f())

Metaclass.attribute = attribute

for _ in range(1025):
self.assertFalse(f())

def test_metaclass_del_descriptor_after_optimization(self):
class Metaclass(type):
@property
def attribute(self):
return lambda: True

class Class(metaclass=Metaclass):
def attribute():
return False

def f():
return Class.attribute()

for _ in range(1025):
self.assertTrue(f())

del Metaclass.attribute

for _ in range(1025):
self.assertFalse(f())

def test_type_descriptor_shadows_attribute_method(self):
class Class:
def mro():
return ["Spam", "eggs"]

def f():
return Class.mro()

for _ in range(1025):
self.assertEqual(f(), ["Spam", "eggs"])

def test_type_descriptor_shadows_attribute_member(self):
class Class:
def __base__():
return "Spam"

def f():
return Class.__base__()

for _ in range(1025):
self.assertNotEqual(f(), "Spam")

def test_metaclass_getattribute(self):
class Metaclass(type):
def __getattribute__(self, name):
return lambda: True

class Class(metaclass=Metaclass):
def attribute():
return False

def f():
return Class.attribute()

for _ in range(1025):
self.assertTrue(f())


if __name__ == "__main__":
import unittest
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix an issue where lookups of metaclass descriptors may be ignored when an
identically-named attribute also exists on the class itself.
4 changes: 3 additions & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -3672,7 +3672,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != type_version,
LOAD_ATTR);
assert(type_version != 0);

uint32_t meta_version = read_u32(cache->keys_version);
DEOPT_IF(Py_TYPE(cls)->tp_version_tag != meta_version, LOAD_ATTR);
assert(meta_version != 0);
STAT_INC(LOAD_ATTR, hit);
PyObject *res = read_obj(cache->descr);
assert(res != NULL);
Expand Down
22 changes: 16 additions & 6 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -945,24 +945,34 @@ specialize_class_load_attr(PyObject *owner, _Py_CODEUNIT *instr,
PyObject *name)
{
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)(instr + 1);
PyTypeObject *metaclass = Py_TYPE(owner);
if (_PyType_Lookup(metaclass, name)) {
// The metaclass has the named attribute. This breaks LOAD_ATTR_CLASS
// when the attribute *also* exists on the class... but it also means we
// can't specialize for things like Class.__doc__, Class.__name__, etc.
// Perhaps LOAD_ATTR_METACLASS may be worth adding in the future?
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METACLASS_ATTRIBUTE);
return -1;
}
if (metaclass->tp_getattro != PyType_Type.tp_getattro) {
// The metaclass defines __getattribute__. All bets are off:
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OVERRIDDEN);
return -1;
}
Comment on lines +957 to +961
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should just play safe and fail whenever we see a non-type type? (ie whenever there's a metaclass)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have simple ways of checking for all known problematic cases (names present in the metaclass, __getattribute__ defined, mutating metaclasses). I guess we could do that, but it sort of feels like overkill to me.

I'm currently running the benchmarks and gathering stats for this branch, but I worry that disallowing all metaclasses would really hurt benchmarks like sympy which have tons of metaclasses. So I'd want to make sure we don't suffer any big regressions before disallowing them entirely.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the quick work @brandtbucher

I don't know if this helps but if you want to time SymPy specifically then one way is:

import sympy
sympy.test()

That takes about half an hour to run on this computer.

Comparing different CPython versions in CI shows that 3.11.0b4 currently gives a speedup for some jobs and is neutral for others:
https://github.com/sympy/sympy/actions/runs/2662492934

If you look at for example the test2 matrix then the timings from there are:

3.8: 19m28s
3.9: 16m44s
3.10: 16m4s
3.11.0b4: 14m45s

The test2 job does roughly half of what sympy.test() does and can be reproduce directly with sympy.test(split='2/2'). The 3.10 timing is shown in CI as test2-latest separately from the test2 matrix of different Python versions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I'll take a look.

Side note, I just realized that potentially removing support for any metaclasses here would probably (further) trash enum performance. :( So that's also something we want visibility into before going that route.

PyObject *descr = NULL;
DescriptorClassification kind = 0;
kind = analyze_descriptor((PyTypeObject *)owner, name, &descr, 0);
switch (kind) {
case METHOD:
case NON_DESCRIPTOR:
write_u32(cache->type_version, ((PyTypeObject *)owner)->tp_version_tag);
write_u32(cache->keys_version, metaclass->tp_version_tag);
write_obj(cache->descr, descr);
_Py_SET_OPCODE(*instr, LOAD_ATTR_CLASS);
return 0;
#ifdef Py_STATS
case ABSENT:
if (_PyType_Lookup(Py_TYPE(owner), name) != NULL) {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METACLASS_ATTRIBUTE);
}
else {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_EXPECTED_ERROR);
}
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_EXPECTED_ERROR);
return -1;
#endif
default:
Expand Down