Skip to content

gh-101346: compare and hash code objects by identity #103297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 7 additions & 46 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,8 @@ def func(arg):
return lambda: arg
code = func.__code__
newcode = code.replace(co_name="func") # Should not raise SystemError
self.assertEqual(code, newcode)
for attr in ("co_names", "co_varnames", "co_cellvars", "co_freevars"):
self.assertEqual(getattr(code, attr), getattr(newcode, attr))

def test_empty_linetable(self):
def func():
Expand Down Expand Up @@ -451,52 +452,12 @@ def func():
self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1)

def test_code_equality(self):
def f():
try:
a()
except:
b()
else:
c()
finally:
d()
code_a = f.__code__
code_b = code_a.replace(co_linetable=b"")
code_c = code_a.replace(co_exceptiontable=b"")
code_d = code_b.replace(co_exceptiontable=b"")
self.assertNotEqual(code_a, code_b)
self.assertNotEqual(code_a, code_c)
self.assertNotEqual(code_a, code_d)
self.assertNotEqual(code_b, code_c)
self.assertNotEqual(code_b, code_d)
self.assertNotEqual(code_c, code_d)

def test_code_hash_uses_firstlineno(self):
c1 = (lambda: 1).__code__
c2 = (lambda: 1).__code__
def test_code_equality_is_identity(self):
def func():
pass
c1 = func.__code__
c2 = c1.replace()
self.assertNotEqual(c1, c2)
self.assertNotEqual(hash(c1), hash(c2))
c3 = c1.replace(co_firstlineno=17)
self.assertNotEqual(c1, c3)
self.assertNotEqual(hash(c1), hash(c3))

def test_code_hash_uses_order(self):
# Swapping posonlyargcount and kwonlyargcount should change the hash.
c = (lambda x, y, *, z=1, w=1: 1).__code__
self.assertEqual(c.co_argcount, 2)
self.assertEqual(c.co_posonlyargcount, 0)
self.assertEqual(c.co_kwonlyargcount, 2)
swapped = c.replace(co_posonlyargcount=2, co_kwonlyargcount=0)
self.assertNotEqual(c, swapped)
self.assertNotEqual(hash(c), hash(swapped))

def test_code_hash_uses_bytecode(self):
c = (lambda x, y: x + y).__code__
d = (lambda x, y: x * y).__code__
c1 = c.replace(co_code=d.co_code)
self.assertNotEqual(c, c1)
self.assertNotEqual(hash(c), hash(c1))


def isinterned(s):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Code objects are now compared by identity rather than by value.
137 changes: 2 additions & 135 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1764,139 +1764,6 @@ code_repr(PyCodeObject *co)
}
}

static PyObject *
code_richcompare(PyObject *self, PyObject *other, int op)
{
PyCodeObject *co, *cp;
int eq;
PyObject *consts1, *consts2;
PyObject *res;

if ((op != Py_EQ && op != Py_NE) ||
!PyCode_Check(self) ||
!PyCode_Check(other)) {
Py_RETURN_NOTIMPLEMENTED;
}

co = (PyCodeObject *)self;
cp = (PyCodeObject *)other;

eq = PyObject_RichCompareBool(co->co_name, cp->co_name, Py_EQ);
if (!eq) goto unequal;
eq = co->co_argcount == cp->co_argcount;
if (!eq) goto unequal;
eq = co->co_posonlyargcount == cp->co_posonlyargcount;
if (!eq) goto unequal;
eq = co->co_kwonlyargcount == cp->co_kwonlyargcount;
if (!eq) goto unequal;
eq = co->co_flags == cp->co_flags;
if (!eq) goto unequal;
eq = co->co_firstlineno == cp->co_firstlineno;
if (!eq) goto unequal;
eq = Py_SIZE(co) == Py_SIZE(cp);
if (!eq) {
goto unequal;
}
for (int i = 0; i < Py_SIZE(co); i++) {
_Py_CODEUNIT co_instr = _PyCode_CODE(co)[i];
_Py_CODEUNIT cp_instr = _PyCode_CODE(cp)[i];
co_instr.op.code = _PyOpcode_Deopt[co_instr.op.code];
cp_instr.op.code = _PyOpcode_Deopt[cp_instr.op.code];
eq = co_instr.cache == cp_instr.cache;
if (!eq) {
goto unequal;
}
i += _PyOpcode_Caches[co_instr.op.code];
}

/* compare constants */
consts1 = _PyCode_ConstantKey(co->co_consts);
if (!consts1)
return NULL;
consts2 = _PyCode_ConstantKey(cp->co_consts);
if (!consts2) {
Py_DECREF(consts1);
return NULL;
}
eq = PyObject_RichCompareBool(consts1, consts2, Py_EQ);
Py_DECREF(consts1);
Py_DECREF(consts2);
if (eq <= 0) goto unequal;

eq = PyObject_RichCompareBool(co->co_names, cp->co_names, Py_EQ);
if (eq <= 0) goto unequal;
eq = PyObject_RichCompareBool(co->co_localsplusnames,
cp->co_localsplusnames, Py_EQ);
if (eq <= 0) goto unequal;
eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
if (eq <= 0) {
goto unequal;
}
eq = PyObject_RichCompareBool(co->co_exceptiontable,
cp->co_exceptiontable, Py_EQ);
if (eq <= 0) {
goto unequal;
}

if (op == Py_EQ)
res = Py_True;
else
res = Py_False;
goto done;

unequal:
if (eq < 0)
return NULL;
if (op == Py_NE)
res = Py_True;
else
res = Py_False;

done:
return Py_NewRef(res);
}

static Py_hash_t
code_hash(PyCodeObject *co)
{
Py_uhash_t uhash = 20221211;
#define SCRAMBLE_IN(H) do { \
uhash ^= (Py_uhash_t)(H); \
uhash *= _PyHASH_MULTIPLIER; \
} while (0)
#define SCRAMBLE_IN_HASH(EXPR) do { \
Py_hash_t h = PyObject_Hash(EXPR); \
if (h == -1) { \
return -1; \
} \
SCRAMBLE_IN(h); \
} while (0)

SCRAMBLE_IN_HASH(co->co_name);
SCRAMBLE_IN_HASH(co->co_consts);
SCRAMBLE_IN_HASH(co->co_names);
SCRAMBLE_IN_HASH(co->co_localsplusnames);
SCRAMBLE_IN_HASH(co->co_linetable);
SCRAMBLE_IN_HASH(co->co_exceptiontable);
SCRAMBLE_IN(co->co_argcount);
SCRAMBLE_IN(co->co_posonlyargcount);
SCRAMBLE_IN(co->co_kwonlyargcount);
SCRAMBLE_IN(co->co_flags);
SCRAMBLE_IN(co->co_firstlineno);
SCRAMBLE_IN(Py_SIZE(co));
for (int i = 0; i < Py_SIZE(co); i++) {
int deop = _PyOpcode_Deopt[_PyCode_CODE(co)[i].op.code];
SCRAMBLE_IN(deop);
SCRAMBLE_IN(_PyCode_CODE(co)[i].op.arg);
i += _PyOpcode_Caches[deop];
}
if ((Py_hash_t)uhash == -1) {
return -2;
}
return (Py_hash_t)uhash;
}


#define OFF(x) offsetof(PyCodeObject, x)

static PyMemberDef code_memberlist[] = {
Expand Down Expand Up @@ -2150,7 +2017,7 @@ PyTypeObject PyCode_Type = {
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
(hashfunc)code_hash, /* tp_hash */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
Expand All @@ -2160,7 +2027,7 @@ PyTypeObject PyCode_Type = {
code_new__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
code_richcompare, /* tp_richcompare */
0, /* tp_richcompare */
offsetof(PyCodeObject, co_weakreflist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Expand Down