From abc121c0268e350cc7482e32d0523e70dd211efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 14:43:14 +0100 Subject: [PATCH 01/26] Add versioning to XML elements. --- Modules/_elementtree.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index e134e096e044b7..cbad0a063bb981 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -230,6 +230,10 @@ typedef struct { PyObject* _children[STATIC_CHILDREN]; + /* incremented whenever 'attrib' is externally mutated */ + uint64_t attrib_version; + /* incremented whenever children are externally mutated */ + uint64_t version; } ElementObjectExtra; typedef struct { @@ -279,6 +283,9 @@ create_extra(ElementObject* self, PyObject* attrib) self->extra->allocated = STATIC_CHILDREN; self->extra->children = self->extra->_children; + self->extra->attrib_version = 0; + self->extra->version = 0; + return 0; } @@ -506,6 +513,7 @@ element_resize(ElementObject* self, Py_ssize_t extra) } self->extra->children = children; self->extra->allocated = size; + self->extra->version++; } return 0; @@ -539,6 +547,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, self->extra->children[self->extra->length] = Py_NewRef(element); self->extra->length++; + self->extra->version++; return 0; } @@ -780,6 +789,7 @@ _elementtree_Element___copy___impl(ElementObject *self, PyTypeObject *cls) assert(!element->extra->length); element->extra->length = self->extra->length; + element->extra->version = self->extra->version; } return (PyObject*) element; @@ -847,6 +857,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (element_resize(element, self->extra->length) < 0) goto error; + // TODO(picnixz): check for an evil child's __deepcopy__ on 'self' for (i = 0; i < self->extra->length; i++) { PyObject* child = deepcopy(st, self->extra->children[i], memo); if (!child || !Element_Check(st, child)) { @@ -862,6 +873,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra->length); element->extra->length = self->extra->length; + element->extra->version = 0; } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -1017,10 +1029,12 @@ element_setstate_from_attributes(elementtreestate *st, Py_XSETREF(self->tag, Py_NewRef(tag)); + // TODO(picnix): determine how versioning would work with an evil text text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; Py_INCREF(JOIN_OBJ(text)); _set_joined_ptr(&self->text, text); + // TODO(picnix): determine how versioning would work with an evil tail tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None; Py_INCREF(JOIN_OBJ(tail)); _set_joined_ptr(&self->tail, tail); @@ -1071,6 +1085,7 @@ element_setstate_from_attributes(elementtreestate *st, assert(!self->extra->length); self->extra->length = nchildren; + self->extra->version++; } else { if (element_resize(self, 0)) { @@ -1080,6 +1095,7 @@ element_setstate_from_attributes(elementtreestate *st, /* Stash attrib. */ Py_XSETREF(self->extra->attrib, Py_XNewRef(attrib)); + self->extra->attrib_version++; dealloc_extra(oldextra); Py_RETURN_NONE; @@ -1548,6 +1564,7 @@ _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, self->extra->children[index] = Py_NewRef(subelement); self->extra->length++; + self->extra->version++; Py_RETURN_NONE; } @@ -1645,6 +1662,7 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) return NULL; } + // TODO(picnixz): check against evil __eq__ for (i = 0; i < self->extra->length; i++) { if (self->extra->children[i] == subelement) break; @@ -1669,6 +1687,7 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) self->extra->length--; for (; i < self->extra->length; i++) self->extra->children[i] = self->extra->children[i+1]; + self->extra->version++; Py_DECREF(found); Py_RETURN_NONE; @@ -1724,6 +1743,7 @@ _elementtree_Element_set_impl(ElementObject *self, PyObject *key, if (PyDict_SetItem(attrib, key, value) < 0) return NULL; + self->extra->attrib_version++; Py_RETURN_NONE; } @@ -1757,6 +1777,7 @@ element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) self->extra->children[i] = self->extra->children[i+1]; } + self->extra->version++; Py_DECREF(old); return 0; @@ -1907,6 +1928,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length -= slicelen; + self->extra->version++; /* Discard the recycle list with all the deleted sub-elements */ Py_DECREF(recycle); @@ -1982,6 +2004,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length += newlen - slicelen; + self->extra->version++; Py_DECREF(seq); @@ -2078,6 +2101,7 @@ element_attrib_setter(ElementObject *self, PyObject *value, void *closure) return -1; } Py_XSETREF(self->extra->attrib, Py_NewRef(value)); + self->extra->attrib_version++; return 0; } From 4efa51739e6f5ff42b97314ad9bf660983bc3cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 15:44:55 +0100 Subject: [PATCH 02/26] fix tests --- Lib/test/test_xml_etree_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 3a0fc572f457ff..8f371951f1c08d 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -234,7 +234,7 @@ class SizeofTest(unittest.TestCase): def setUp(self): self.elementsize = support.calcobjsize('5P') # extra - self.extra = struct.calcsize('PnnP4P') + self.extra = struct.calcsize('PnnP4Pnn') check_sizeof = support.check_sizeof From 4ca3cf04a5c56599c39f7b8caabdc3188d914fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 16:24:26 +0100 Subject: [PATCH 03/26] fix portability issues --- Lib/test/test_xml_etree_c.py | 2 +- Modules/_elementtree.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 8f371951f1c08d..aa95753fed2a49 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -234,7 +234,7 @@ class SizeofTest(unittest.TestCase): def setUp(self): self.elementsize = support.calcobjsize('5P') # extra - self.extra = struct.calcsize('PnnP4Pnn') + self.extra = struct.calcsize('PnnP4PNN') check_sizeof = support.check_sizeof diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index cbad0a063bb981..75870ee27fdd62 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -231,9 +231,9 @@ typedef struct { PyObject* _children[STATIC_CHILDREN]; /* incremented whenever 'attrib' is externally mutated */ - uint64_t attrib_version; + size_t attrib_version; /* incremented whenever children are externally mutated */ - uint64_t version; + size_t version; } ElementObjectExtra; typedef struct { From a1950d1d4b1ac24ddf849623e8da03bad8191fd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:49:21 +0100 Subject: [PATCH 04/26] fixup --- Modules/_elementtree.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 75870ee27fdd62..11fee5c911422e 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -231,9 +231,9 @@ typedef struct { PyObject* _children[STATIC_CHILDREN]; /* incremented whenever 'attrib' is externally mutated */ - size_t attrib_version; + size_t attrs_version; /* incremented whenever children are externally mutated */ - size_t version; + size_t nodes_version; } ElementObjectExtra; typedef struct { @@ -283,8 +283,8 @@ create_extra(ElementObject* self, PyObject* attrib) self->extra->allocated = STATIC_CHILDREN; self->extra->children = self->extra->_children; - self->extra->attrib_version = 0; - self->extra->version = 0; + self->extra->attrs_version = 0; + self->extra->nodes_version = 0; return 0; } @@ -513,7 +513,7 @@ element_resize(ElementObject* self, Py_ssize_t extra) } self->extra->children = children; self->extra->allocated = size; - self->extra->version++; + self->extra->nodes_version++; } return 0; @@ -547,7 +547,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, self->extra->children[self->extra->length] = Py_NewRef(element); self->extra->length++; - self->extra->version++; + self->extra->nodes_version++; return 0; } @@ -789,7 +789,7 @@ _elementtree_Element___copy___impl(ElementObject *self, PyTypeObject *cls) assert(!element->extra->length); element->extra->length = self->extra->length; - element->extra->version = self->extra->version; + element->extra->nodes_version = self->extra->nodes_version; } return (PyObject*) element; @@ -873,7 +873,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra->length); element->extra->length = self->extra->length; - element->extra->version = 0; + element->extra->nodes_version = 0; } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -1029,12 +1029,10 @@ element_setstate_from_attributes(elementtreestate *st, Py_XSETREF(self->tag, Py_NewRef(tag)); - // TODO(picnix): determine how versioning would work with an evil text text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; Py_INCREF(JOIN_OBJ(text)); _set_joined_ptr(&self->text, text); - // TODO(picnix): determine how versioning would work with an evil tail tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None; Py_INCREF(JOIN_OBJ(tail)); _set_joined_ptr(&self->tail, tail); @@ -1085,7 +1083,6 @@ element_setstate_from_attributes(elementtreestate *st, assert(!self->extra->length); self->extra->length = nchildren; - self->extra->version++; } else { if (element_resize(self, 0)) { @@ -1095,7 +1092,6 @@ element_setstate_from_attributes(elementtreestate *st, /* Stash attrib. */ Py_XSETREF(self->extra->attrib, Py_XNewRef(attrib)); - self->extra->attrib_version++; dealloc_extra(oldextra); Py_RETURN_NONE; @@ -1564,7 +1560,7 @@ _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, self->extra->children[index] = Py_NewRef(subelement); self->extra->length++; - self->extra->version++; + self->extra->nodes_version++; Py_RETURN_NONE; } @@ -1687,7 +1683,7 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) self->extra->length--; for (; i < self->extra->length; i++) self->extra->children[i] = self->extra->children[i+1]; - self->extra->version++; + self->extra->nodes_version++; Py_DECREF(found); Py_RETURN_NONE; @@ -1743,7 +1739,7 @@ _elementtree_Element_set_impl(ElementObject *self, PyObject *key, if (PyDict_SetItem(attrib, key, value) < 0) return NULL; - self->extra->attrib_version++; + self->extra->attrs_version++; Py_RETURN_NONE; } @@ -1777,7 +1773,7 @@ element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) self->extra->children[i] = self->extra->children[i+1]; } - self->extra->version++; + self->extra->nodes_version++; Py_DECREF(old); return 0; @@ -1928,7 +1924,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length -= slicelen; - self->extra->version++; + self->extra->nodes_version++; /* Discard the recycle list with all the deleted sub-elements */ Py_DECREF(recycle); @@ -2004,7 +2000,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length += newlen - slicelen; - self->extra->version++; + self->extra->nodes_version++; Py_DECREF(seq); @@ -2101,7 +2097,7 @@ element_attrib_setter(ElementObject *self, PyObject *value, void *closure) return -1; } Py_XSETREF(self->extra->attrib, Py_NewRef(value)); - self->extra->attrib_version++; + self->extra->attrs_version++; return 0; } From 9b6f55970711d78423ca44c522f6d8e8b93ff0cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 29 Oct 2024 09:05:31 +0100 Subject: [PATCH 05/26] unify versioning --- Lib/test/test_xml_etree_c.py | 2 +- Modules/_elementtree.c | 31 ++++++++++++++----------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index aa95753fed2a49..aae8c3b0019951 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -234,7 +234,7 @@ class SizeofTest(unittest.TestCase): def setUp(self): self.elementsize = support.calcobjsize('5P') # extra - self.extra = struct.calcsize('PnnP4PNN') + self.extra = struct.calcsize('PnnP4PN') check_sizeof = support.check_sizeof diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 11fee5c911422e..3e206aa4318eee 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -230,10 +230,8 @@ typedef struct { PyObject* _children[STATIC_CHILDREN]; - /* incremented whenever 'attrib' is externally mutated */ - size_t attrs_version; - /* incremented whenever children are externally mutated */ - size_t nodes_version; + /* incremented whenever the object is externally mutated */ + size_t version; } ElementObjectExtra; typedef struct { @@ -283,8 +281,7 @@ create_extra(ElementObject* self, PyObject* attrib) self->extra->allocated = STATIC_CHILDREN; self->extra->children = self->extra->_children; - self->extra->attrs_version = 0; - self->extra->nodes_version = 0; + self->extra->version = 0; return 0; } @@ -513,7 +510,7 @@ element_resize(ElementObject* self, Py_ssize_t extra) } self->extra->children = children; self->extra->allocated = size; - self->extra->nodes_version++; + self->extra->version++; } return 0; @@ -547,7 +544,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, self->extra->children[self->extra->length] = Py_NewRef(element); self->extra->length++; - self->extra->nodes_version++; + self->extra->version++; return 0; } @@ -789,7 +786,7 @@ _elementtree_Element___copy___impl(ElementObject *self, PyTypeObject *cls) assert(!element->extra->length); element->extra->length = self->extra->length; - element->extra->nodes_version = self->extra->nodes_version; + element->extra->version = self->extra->version; } return (PyObject*) element; @@ -873,7 +870,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra->length); element->extra->length = self->extra->length; - element->extra->nodes_version = 0; + element->extra->version = 0; } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -1560,7 +1557,7 @@ _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, self->extra->children[index] = Py_NewRef(subelement); self->extra->length++; - self->extra->nodes_version++; + self->extra->version++; Py_RETURN_NONE; } @@ -1683,7 +1680,7 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) self->extra->length--; for (; i < self->extra->length; i++) self->extra->children[i] = self->extra->children[i+1]; - self->extra->nodes_version++; + self->extra->version++; Py_DECREF(found); Py_RETURN_NONE; @@ -1739,7 +1736,7 @@ _elementtree_Element_set_impl(ElementObject *self, PyObject *key, if (PyDict_SetItem(attrib, key, value) < 0) return NULL; - self->extra->attrs_version++; + self->extra->version++; Py_RETURN_NONE; } @@ -1773,7 +1770,7 @@ element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) self->extra->children[i] = self->extra->children[i+1]; } - self->extra->nodes_version++; + self->extra->version++; Py_DECREF(old); return 0; @@ -1924,7 +1921,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length -= slicelen; - self->extra->nodes_version++; + self->extra->version++; /* Discard the recycle list with all the deleted sub-elements */ Py_DECREF(recycle); @@ -2000,7 +1997,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length += newlen - slicelen; - self->extra->nodes_version++; + self->extra->version++; Py_DECREF(seq); @@ -2097,7 +2094,7 @@ element_attrib_setter(ElementObject *self, PyObject *value, void *closure) return -1; } Py_XSETREF(self->extra->attrib, Py_NewRef(value)); - self->extra->attrs_version++; + self->extra->version++; return 0; } From 00a7a7e1fed502e0a333c29a7fcd7a4cd3058479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:57:15 +0100 Subject: [PATCH 06/26] handle evil mutations in `Element.remove` --- Lib/test/test_xml_etree.py | 51 +++++++++++++++++++++++++++------ Modules/_elementtree.c | 58 ++++++++++++++++++++++---------------- 2 files changed, 76 insertions(+), 33 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index ae06a9cc11855f..dd2aa58118abe3 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2680,18 +2680,51 @@ class Y(X, ET.Element): e = ET.Element('foo') e.extend(L) - def test_remove_with_mutating(self): - class X(ET.Element): + def test_remove_with_clear_children(self): + # See: https://github.com/python/cpython/issues/126033 + + class EvilElement(ET.Element): def __eq__(self, o): - del e[:] + root.clear() return False - e = ET.Element('foo') - e.extend([X('bar')]) - self.assertRaises(ValueError, e.remove, ET.Element('baz')) - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - self.assertRaises(ValueError, e.remove, X('baz')) + # The pure Python implementation raises a ValueError but the C + # implementation raises a RuntimeError (like OrderedDict does). + exc_type = ValueError if ET is pyET else RuntimeError + + root = ET.Element('.') + root.append(EvilElement('foo')) + root.append(ET.Element('bar')) + self.assertRaises(exc_type, root.remove, ET.Element('pouet')) + + root = ET.Element('.') + root.append(ET.Element('foo')) + root.append(EvilElement('bar')) + self.assertRaises(exc_type, root.remove, EvilElement('pouet')) + + def test_remove_with_mutate_children(self): + # See: https://github.com/python/cpython/issues/126033 + + class EvilElement(ET.Element): + def __eq__(self, o): + # Remove the first element so that the list size changes. + # This causes an infinite recursion error in the Python + # implementation, but we do not really care about it. + root.remove(ET.Element('foo')) + return False + + # The pure Python implementation raises a ValueError (or hits the + # recursion limit) but the C implementation raises a RuntimeError + # (like OrderedDict does). + exc_type = (RecursionError, ValueError) if ET is pyET else RuntimeError + + root = ET.Element('.') + root.extend([ET.Element('foo'), EvilElement('bar')]) + self.assertRaises(exc_type, root.remove, ET.Element('baz')) + + root = ET.Element('.') + root.extend([ET.Element('foo'), EvilElement('bar')]) + self.assertRaises(exc_type, root.remove, EvilElement('baz')) @support.infinite_recursion(25) def test_recursive_repr(self): diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 3e206aa4318eee..2afaf5ad68d2ed 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1642,48 +1642,58 @@ static PyObject * _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ { - Py_ssize_t i; - int rc; - PyObject *found; - - if (!self->extra) { + if (self->extra == NULL) { /* element has no children, so raise exception */ - PyErr_SetString( - PyExc_ValueError, - "list.remove(x): x not in list" - ); - return NULL; + goto not_found; } - // TODO(picnixz): check against evil __eq__ - for (i = 0; i < self->extra->length; i++) { - if (self->extra->children[i] == subelement) + Py_ssize_t i; + size_t old_version = self->extra->version; + for (i = 0; self->extra && i < self->extra->length; i++) { + if (old_version != self->extra->version) { + goto fail; + } + else if (self->extra->children[i] == subelement) { break; - rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); - if (rc > 0) + } + PyObject *child = Py_NewRef(self->extra->children[i]); + int rc = PyObject_RichCompareBool(child, subelement, Py_EQ); + Py_DECREF(child); + if (rc > 0) { break; - if (rc < 0) + } + else if (rc < 0) { return NULL; + } } - if (i >= self->extra->length) { + // An extra check must be done if the mutation occurs at the very last step. + if (self->extra == NULL || old_version != self->extra->version) { + goto fail; + } + else if (i >= self->extra->length) { /* subelement is not in children, so raise exception */ - PyErr_SetString( - PyExc_ValueError, - "list.remove(x): x not in list" - ); - return NULL; + goto not_found; } - found = self->extra->children[i]; + PyObject *found = self->extra->children[i]; self->extra->length--; - for (; i < self->extra->length; i++) + for (; i < self->extra->length; i++) { self->extra->children[i] = self->extra->children[i+1]; + } self->extra->version++; Py_DECREF(found); Py_RETURN_NONE; + +not_found: + PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); + return NULL; + +fail: + PyErr_SetString(PyExc_RuntimeError, "children mutated during iteration"); + return NULL; } static PyObject* From 59ade8fe865f18897d5423faa78e2408d4928cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:59:52 +0100 Subject: [PATCH 07/26] blurb --- .../Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst new file mode 100644 index 00000000000000..5fd2476f955e9c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst @@ -0,0 +1,3 @@ +Fix a crash in :meth:`Element.remove ` +when either the element to remove or one of the children implements an evil +:meth:`~object.__eq__` method. Patch by Bénédikt Tran. From 7fc993206c59249a39f1b792ef4b7c73576adf02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 6 Dec 2024 18:37:55 +0100 Subject: [PATCH 08/26] improve NEWS entry formulation --- .../Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst index 5fd2476f955e9c..223130b5c2113b 100644 --- a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst +++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst @@ -1,3 +1,4 @@ Fix a crash in :meth:`Element.remove ` -when either the element to remove or one of the children implements an evil -:meth:`~object.__eq__` method. Patch by Bénédikt Tran. +when either the element to remove or one of the children implements an +:meth:`~object.__eq__` method mutating its container node. +Patch by Bénédikt Tran. From e30756d7abc33bc85bcf27c764fc43ba8156d35d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 14:06:35 +0100 Subject: [PATCH 09/26] remove versioning --- Modules/_elementtree.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 2afaf5ad68d2ed..2bc3e4f22b614d 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -229,9 +229,6 @@ typedef struct { PyObject* *children; PyObject* _children[STATIC_CHILDREN]; - - /* incremented whenever the object is externally mutated */ - size_t version; } ElementObjectExtra; typedef struct { @@ -281,8 +278,6 @@ create_extra(ElementObject* self, PyObject* attrib) self->extra->allocated = STATIC_CHILDREN; self->extra->children = self->extra->_children; - self->extra->version = 0; - return 0; } @@ -510,7 +505,6 @@ element_resize(ElementObject* self, Py_ssize_t extra) } self->extra->children = children; self->extra->allocated = size; - self->extra->version++; } return 0; @@ -544,7 +538,6 @@ element_add_subelement(elementtreestate *st, ElementObject *self, self->extra->children[self->extra->length] = Py_NewRef(element); self->extra->length++; - self->extra->version++; return 0; } @@ -786,7 +779,6 @@ _elementtree_Element___copy___impl(ElementObject *self, PyTypeObject *cls) assert(!element->extra->length); element->extra->length = self->extra->length; - element->extra->version = self->extra->version; } return (PyObject*) element; @@ -870,7 +862,6 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra->length); element->extra->length = self->extra->length; - element->extra->version = 0; } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -1557,7 +1548,6 @@ _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, self->extra->children[index] = Py_NewRef(subelement); self->extra->length++; - self->extra->version++; Py_RETURN_NONE; } @@ -1648,17 +1638,17 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) } Py_ssize_t i; - size_t old_version = self->extra->version; + const Py_ssize_t length = self->extra->length; for (i = 0; self->extra && i < self->extra->length; i++) { - if (old_version != self->extra->version) { - goto fail; - } - else if (self->extra->children[i] == subelement) { + if (self->extra->children[i] == subelement) { break; } PyObject *child = Py_NewRef(self->extra->children[i]); int rc = PyObject_RichCompareBool(child, subelement, Py_EQ); Py_DECREF(child); + if (self->extra == NULL || self->extra->length != length) { + goto fail; + } if (rc > 0) { break; } @@ -1667,8 +1657,10 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) } } - // An extra check must be done if the mutation occurs at the very last step. - if (self->extra == NULL || old_version != self->extra->version) { + // An extra check must be done if the mutation occurs at the very last + // step and removes or clears the 'extra' list (the condition on the + // length would not be satisfied any more). + if (self->extra == NULL || self->extra->length != length) { goto fail; } else if (i >= self->extra->length) { @@ -1682,7 +1674,6 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) for (; i < self->extra->length; i++) { self->extra->children[i] = self->extra->children[i+1]; } - self->extra->version++; Py_DECREF(found); Py_RETURN_NONE; @@ -1746,7 +1737,6 @@ _elementtree_Element_set_impl(ElementObject *self, PyObject *key, if (PyDict_SetItem(attrib, key, value) < 0) return NULL; - self->extra->version++; Py_RETURN_NONE; } @@ -1780,7 +1770,6 @@ element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) self->extra->children[i] = self->extra->children[i+1]; } - self->extra->version++; Py_DECREF(old); return 0; @@ -1931,7 +1920,6 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length -= slicelen; - self->extra->version++; /* Discard the recycle list with all the deleted sub-elements */ Py_DECREF(recycle); @@ -2007,7 +1995,6 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) } self->extra->length += newlen - slicelen; - self->extra->version++; Py_DECREF(seq); @@ -2104,7 +2091,6 @@ element_attrib_setter(ElementObject *self, PyObject *value, void *closure) return -1; } Py_XSETREF(self->extra->attrib, Py_NewRef(value)); - self->extra->version++; return 0; } From f2b5bb1d26fb2c54a38c16fe8ceeb2322fb73d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 14:06:42 +0100 Subject: [PATCH 10/26] fix tests --- Lib/test/test_xml_etree.py | 63 ++++++++++++++++++++++-------------- Lib/test/test_xml_etree_c.py | 2 +- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index dd2aa58118abe3..d98aec7d03695b 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2680,10 +2680,28 @@ class Y(X, ET.Element): e = ET.Element('foo') e.extend(L) + def test_remove_with_clear_child(self): + class X(ET.Element): + def __eq__(self, o): + del e[:] + return False + + # The pure Python implementation raises a ValueError but the C + # implementation raises a RuntimeError (like OrderedDict does). + exc_type = ValueError if ET is pyET else RuntimeError + + e = ET.Element('foo') + e.extend([X('bar')]) + self.assertRaises(exc_type, e.remove, ET.Element('baz')) + + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + self.assertRaises(exc_type, e.remove, X('baz')) + def test_remove_with_clear_children(self): # See: https://github.com/python/cpython/issues/126033 - class EvilElement(ET.Element): + class X(ET.Element): def __eq__(self, o): root.clear() return False @@ -2692,39 +2710,36 @@ def __eq__(self, o): # implementation raises a RuntimeError (like OrderedDict does). exc_type = ValueError if ET is pyET else RuntimeError - root = ET.Element('.') - root.append(EvilElement('foo')) - root.append(ET.Element('bar')) - self.assertRaises(exc_type, root.remove, ET.Element('pouet')) + for foo_type, rem_type in [(X, ET.Element), (ET.Element, X)]: + with self.subTest(foo_type=foo_type, rem_type=rem_type): + root = ET.Element('.') + root.extend([foo_type('foo'), rem_type('bar')]) + self.assertRaises(exc_type, root.remove, rem_type('baz')) - root = ET.Element('.') - root.append(ET.Element('foo')) - root.append(EvilElement('bar')) - self.assertRaises(exc_type, root.remove, EvilElement('pouet')) - - def test_remove_with_mutate_children(self): + def test_remove_with_mutate_root(self): # See: https://github.com/python/cpython/issues/126033 - class EvilElement(ET.Element): + first_element = ET.Element('foo') + + class X(ET.Element): def __eq__(self, o): # Remove the first element so that the list size changes. # This causes an infinite recursion error in the Python # implementation, but we do not really care about it. - root.remove(ET.Element('foo')) + # + # Depending on whether the first element is or is not + root.remove(first_element) return False - # The pure Python implementation raises a ValueError (or hits the - # recursion limit) but the C implementation raises a RuntimeError - # (like OrderedDict does). - exc_type = (RecursionError, ValueError) if ET is pyET else RuntimeError - - root = ET.Element('.') - root.extend([ET.Element('foo'), EvilElement('bar')]) - self.assertRaises(exc_type, root.remove, ET.Element('baz')) + # The pure Python implementation raises a ValueError but the C + # implementation raises a RuntimeError (like OrderedDict does). + exc_type = ValueError if ET is pyET else RuntimeError - root = ET.Element('.') - root.extend([ET.Element('foo'), EvilElement('bar')]) - self.assertRaises(exc_type, root.remove, EvilElement('baz')) + for bar_type, rem_type in [(X, ET.Element), (ET.Element, X), (X, X)]: + with self.subTest(bar_type=bar_type, rem_type=rem_type): + root = ET.Element('.') + root.extend([first_element, bar_type('bar')]) + self.assertRaises(exc_type, root.remove, rem_type('baz')) @support.infinite_recursion(25) def test_recursive_repr(self): diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index aae8c3b0019951..3a0fc572f457ff 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -234,7 +234,7 @@ class SizeofTest(unittest.TestCase): def setUp(self): self.elementsize = support.calcobjsize('5P') # extra - self.extra = struct.calcsize('PnnP4PN') + self.extra = struct.calcsize('PnnP4P') check_sizeof = support.check_sizeof From 9f735176246e1a901ebec266f8ac16bc72ac7f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Dec 2024 14:08:55 +0100 Subject: [PATCH 11/26] improve detection and tests --- Lib/test/test_xml_etree.py | 20 ++++---------------- Modules/_elementtree.c | 32 +++++++++++++------------------- 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index d98aec7d03695b..b0b5ce14093b93 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2686,17 +2686,13 @@ def __eq__(self, o): del e[:] return False - # The pure Python implementation raises a ValueError but the C - # implementation raises a RuntimeError (like OrderedDict does). - exc_type = ValueError if ET is pyET else RuntimeError - e = ET.Element('foo') e.extend([X('bar')]) - self.assertRaises(exc_type, e.remove, ET.Element('baz')) + self.assertRaises(ValueError, e.remove, ET.Element('baz')) e = ET.Element('foo') e.extend([ET.Element('bar')]) - self.assertRaises(exc_type, e.remove, X('baz')) + self.assertRaises(ValueError, e.remove, X('baz')) def test_remove_with_clear_children(self): # See: https://github.com/python/cpython/issues/126033 @@ -2706,15 +2702,11 @@ def __eq__(self, o): root.clear() return False - # The pure Python implementation raises a ValueError but the C - # implementation raises a RuntimeError (like OrderedDict does). - exc_type = ValueError if ET is pyET else RuntimeError - for foo_type, rem_type in [(X, ET.Element), (ET.Element, X)]: with self.subTest(foo_type=foo_type, rem_type=rem_type): root = ET.Element('.') root.extend([foo_type('foo'), rem_type('bar')]) - self.assertRaises(exc_type, root.remove, rem_type('baz')) + self.assertRaises(ValueError, root.remove, rem_type('baz')) def test_remove_with_mutate_root(self): # See: https://github.com/python/cpython/issues/126033 @@ -2731,15 +2723,11 @@ def __eq__(self, o): root.remove(first_element) return False - # The pure Python implementation raises a ValueError but the C - # implementation raises a RuntimeError (like OrderedDict does). - exc_type = ValueError if ET is pyET else RuntimeError - for bar_type, rem_type in [(X, ET.Element), (ET.Element, X), (X, X)]: with self.subTest(bar_type=bar_type, rem_type=rem_type): root = ET.Element('.') root.extend([first_element, bar_type('bar')]) - self.assertRaises(exc_type, root.remove, rem_type('baz')) + self.assertRaises(ValueError, root.remove, rem_type('baz')) @support.infinite_recursion(25) def test_recursive_repr(self): diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 2bc3e4f22b614d..88a539f4f83c98 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1634,11 +1634,16 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) { if (self->extra == NULL) { /* element has no children, so raise exception */ - goto not_found; + goto error; } Py_ssize_t i; - const Py_ssize_t length = self->extra->length; + // When iterating over the list of children, we need to check that the + // list is not cleared (self->extra != NULL) and that we are still within + // the correct bounds (i < self->extra->length). + // + // We deliberately avoid protecting against children lists that grow + // faster than the index since list objects do not protect against it. for (i = 0; self->extra && i < self->extra->length; i++) { if (self->extra->children[i] == subelement) { break; @@ -1646,26 +1651,19 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) PyObject *child = Py_NewRef(self->extra->children[i]); int rc = PyObject_RichCompareBool(child, subelement, Py_EQ); Py_DECREF(child); - if (self->extra == NULL || self->extra->length != length) { - goto fail; + if (rc < 0) { + return NULL; } - if (rc > 0) { + else if (rc > 0) { break; } - else if (rc < 0) { - return NULL; - } } // An extra check must be done if the mutation occurs at the very last // step and removes or clears the 'extra' list (the condition on the // length would not be satisfied any more). - if (self->extra == NULL || self->extra->length != length) { - goto fail; - } - else if (i >= self->extra->length) { - /* subelement is not in children, so raise exception */ - goto not_found; + if (self->extra == NULL || i >= self->extra->length) { + goto error; } PyObject *found = self->extra->children[i]; @@ -1678,13 +1676,9 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) Py_DECREF(found); Py_RETURN_NONE; -not_found: +error: PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); return NULL; - -fail: - PyErr_SetString(PyExc_RuntimeError, "children mutated during iteration"); - return NULL; } static PyObject* From 70f2aad5177c325ebc3a998c2b36d345c91c277b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Dec 2024 15:08:06 +0100 Subject: [PATCH 12/26] revert whitespaces --- Modules/_elementtree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 88a539f4f83c98..bd24cc9f0addfd 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -229,6 +229,7 @@ typedef struct { PyObject* *children; PyObject* _children[STATIC_CHILDREN]; + } ElementObjectExtra; typedef struct { From 756b1ebe3825f08d9ece792c16f7c6392bbd9809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 10:36:38 +0100 Subject: [PATCH 13/26] amend NEWS --- .../Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst index 223130b5c2113b..8c1d981b54717f 100644 --- a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst +++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst @@ -1,4 +1,2 @@ Fix a crash in :meth:`Element.remove ` -when either the element to remove or one of the children implements an -:meth:`~object.__eq__` method mutating its container node. -Patch by Bénédikt Tran. +when the element is concurrently mutated. Patch by Bénédikt Tran. From 4b74caf9e8fecb97384cf6b716629ceaf52975bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:30:43 +0100 Subject: [PATCH 14/26] improve test coverage --- Lib/test/test_xml_etree.py | 103 ++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b0b5ce14093b93..7729617498dcf4 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2642,6 +2642,11 @@ def test_pickle_issue18997(self): class BadElementTest(ElementTestCase, unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.is_c = ET is not pyET + def test_extend_mutable_list(self): class X: @property @@ -2680,54 +2685,90 @@ class Y(X, ET.Element): e = ET.Element('foo') e.extend(L) - def test_remove_with_clear_child(self): - class X(ET.Element): - def __eq__(self, o): - del e[:] - return False + def test_remove_with_clear(self): + # See: https://github.com/python/cpython/issues/126033 - e = ET.Element('foo') - e.extend([X('bar')]) - self.assertRaises(ValueError, e.remove, ET.Element('baz')) + # Until the discrepency between "del root[:]" and "root.clear()" is + # resolved, we need to keep two tests. Previously, using "del root[:]" + # did not crash with the reproducer of gh-126033 while "root.clear()" + # did. - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - self.assertRaises(ValueError, e.remove, X('baz')) + E = ET.Element - def test_remove_with_clear_children(self): - # See: https://github.com/python/cpython/issues/126033 + class X1(E): + def __eq__(self, o): + del root[:] + return False - class X(ET.Element): + class X2(E): def __eq__(self, o): root.clear() return False - for foo_type, rem_type in [(X, ET.Element), (ET.Element, X)]: - with self.subTest(foo_type=foo_type, rem_type=rem_type): - root = ET.Element('.') - root.extend([foo_type('foo'), rem_type('bar')]) - self.assertRaises(ValueError, root.remove, rem_type('baz')) + class Y1(E): + def __eq__(self, o): + del root[:] + return True + + class Y2(E): + def __eq__(self, o): + root.clear() + return True + + def test_remove(root, target, raises): + if raises: + self.assertRaises(ValueError, root.remove, target) + else: + root.remove(target) + self.assertNotIn(target, root) + + for etype, rem_type, raises in [ + (E, X1, True), (E, X2, True), + (X1, E, True), (X2, E, True), + (Y1, E, self.is_c), (Y2, E, self.is_c), + (E, Y1, self.is_c), (E, Y2, self.is_c), + ]: + with self.subTest(etype=etype, rem_type=rem_type, raises=raises): + with self.subTest("single child"): + root = E('.') + root.append(etype('one')) + test_remove(root, rem_type('baz'), raises) + + with self.subTest("with children"): + root = E('.') + root.extend([etype('one'), rem_type('two')]) + test_remove(root, rem_type('baz'), raises) def test_remove_with_mutate_root(self): # See: https://github.com/python/cpython/issues/126033 - first_element = ET.Element('foo') + E = ET.Element class X(ET.Element): def __eq__(self, o): - # Remove the first element so that the list size changes. - # This causes an infinite recursion error in the Python - # implementation, but we do not really care about it. - # - # Depending on whether the first element is or is not - root.remove(first_element) + del root[0] return False - for bar_type, rem_type in [(X, ET.Element), (ET.Element, X), (X, X)]: - with self.subTest(bar_type=bar_type, rem_type=rem_type): - root = ET.Element('.') - root.extend([first_element, bar_type('bar')]) - self.assertRaises(ValueError, root.remove, rem_type('baz')) + class Y(ET.Element): + def __eq__(self, o): + del root[0] + return True + + for bar_type, rem_type, raises in [ + (E, X, True), + (X, E, True), + (Y, E, True), + (E, Y, False), + ]: + with self.subTest(bar_type=bar_type, rem_type=rem_type, raises=raises): + root = E('.') + root.extend([E('first'), rem_type('bar')]) + to_remove = rem_type('baz') + if raises: + self.assertRaises(ValueError, root.remove, to_remove) + else: + root.remove(to_remove) + self.assertNotIn(to_remove, root) @support.infinite_recursion(25) def test_recursive_repr(self): From fd29203c8370ed4b1a7a6a69b2f3575d037e32c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:55:34 +0100 Subject: [PATCH 15/26] align C implementation with Python implementation as much as possible --- Modules/_elementtree.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index bd24cc9f0addfd..c574e4be9879c2 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1633,11 +1633,6 @@ static PyObject * _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ { - if (self->extra == NULL) { - /* element has no children, so raise exception */ - goto error; - } - Py_ssize_t i; // When iterating over the list of children, we need to check that the // list is not cleared (self->extra != NULL) and that we are still within @@ -1645,12 +1640,14 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) // // We deliberately avoid protecting against children lists that grow // faster than the index since list objects do not protect against it. + int rc = 0; for (i = 0; self->extra && i < self->extra->length; i++) { if (self->extra->children[i] == subelement) { + rc = 1; break; } PyObject *child = Py_NewRef(self->extra->children[i]); - int rc = PyObject_RichCompareBool(child, subelement, Py_EQ); + rc = PyObject_RichCompareBool(child, subelement, Py_EQ); Py_DECREF(child); if (rc < 0) { return NULL; @@ -1660,11 +1657,16 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) } } + if (rc == 0) { + PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); + return NULL; + } + // An extra check must be done if the mutation occurs at the very last // step and removes or clears the 'extra' list (the condition on the // length would not be satisfied any more). if (self->extra == NULL || i >= self->extra->length) { - goto error; + Py_RETURN_NONE; } PyObject *found = self->extra->children[i]; @@ -1676,10 +1678,6 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) Py_DECREF(found); Py_RETURN_NONE; - -error: - PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); - return NULL; } static PyObject* From e7033b63b1da09e3cf9b6391c40f5f876938a784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:55:39 +0100 Subject: [PATCH 16/26] fix tests and improve coverage --- Lib/test/test_xml_etree.py | 114 ++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 7729617498dcf4..55cec36ed1b737 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2643,10 +2643,6 @@ def test_pickle_issue18997(self): class BadElementTest(ElementTestCase, unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.is_c = ET is not pyET - def test_extend_mutable_list(self): class X: @property @@ -2695,26 +2691,6 @@ def test_remove_with_clear(self): E = ET.Element - class X1(E): - def __eq__(self, o): - del root[:] - return False - - class X2(E): - def __eq__(self, o): - root.clear() - return False - - class Y1(E): - def __eq__(self, o): - del root[:] - return True - - class Y2(E): - def __eq__(self, o): - root.clear() - return True - def test_remove(root, target, raises): if raises: self.assertRaises(ValueError, root.remove, target) @@ -2722,53 +2698,77 @@ def test_remove(root, target, raises): root.remove(target) self.assertNotIn(target, root) - for etype, rem_type, raises in [ - (E, X1, True), (E, X2, True), - (X1, E, True), (X2, E, True), - (Y1, E, self.is_c), (Y2, E, self.is_c), - (E, Y1, self.is_c), (E, Y2, self.is_c), - ]: - with self.subTest(etype=etype, rem_type=rem_type, raises=raises): - with self.subTest("single child"): - root = E('.') - root.append(etype('one')) - test_remove(root, rem_type('baz'), raises) - - with self.subTest("with children"): - root = E('.') - root.extend([etype('one'), rem_type('two')]) - test_remove(root, rem_type('baz'), raises) - - def test_remove_with_mutate_root(self): + for raises in [True, False]: + + class X(E): + def __eq__(self, o): + del root[:] + return not raises + + class Y(E): + def __eq__(self, o): + root.clear() + return not raises + + for etype, rem_type in [(E, X), (X, E), (E, Y), (Y, E)]: + with self.subTest( + etype=etype, rem_type=rem_type, raises=raises, + ): + with self.subTest("single child"): + root = E('.') + root.append(etype('one')) + test_remove(root, rem_type('baz'), raises) + + with self.subTest("with children"): + root = E('.') + root.extend([etype('one'), rem_type('two')]) + test_remove(root, rem_type('baz'), raises) + + def test_remove_with_mutate_root_1(self): # See: https://github.com/python/cpython/issues/126033 E = ET.Element - class X(ET.Element): + class X(E): def __eq__(self, o): del root[0] return False - class Y(ET.Element): + for etype, rem_type in [(E, X), (X, E)]: + with self.subTest('missing', etype=etype, rem_type=rem_type): + root = E('.') + root.extend([E('one'), etype('two')]) + to_remove = rem_type('baz') + self.assertRaises(ValueError, root.remove, to_remove) + + with self.subTest('existing', etype=etype, rem_type=rem_type): + root = E('.') + root.extend([E('one'), etype('same')]) + to_remove = rem_type('same') + self.assertRaises(ValueError, root.remove, to_remove) + + def test_remove_with_mutate_root_2(self): + # See: https://github.com/python/cpython/issues/126033 + + E = ET.Element + + class X(E): def __eq__(self, o): del root[0] return True - for bar_type, rem_type, raises in [ - (E, X, True), - (X, E, True), - (Y, E, True), - (E, Y, False), - ]: - with self.subTest(bar_type=bar_type, rem_type=rem_type, raises=raises): + for etype, rem_type in [(E, X), (X, E)]: + with self.subTest('missing', etype=etype, rem_type=rem_type): root = E('.') - root.extend([E('first'), rem_type('bar')]) + root.extend([E('one'), etype('two')]) to_remove = rem_type('baz') - if raises: - self.assertRaises(ValueError, root.remove, to_remove) - else: - root.remove(to_remove) - self.assertNotIn(to_remove, root) + root.remove(to_remove) + + with self.subTest('existing', etype=etype, rem_type=rem_type): + root = E('.') + root.extend([E('one'), etype('same')]) + to_remove = rem_type('same') + root.remove(to_remove) @support.infinite_recursion(25) def test_recursive_repr(self): From 4d3cdd744c9bdee1b67bd5f66dd64bca3f7a11fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:08:35 +0100 Subject: [PATCH 17/26] fix tests (i'll explain afterwards) --- Lib/test/test_xml_etree.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 55cec36ed1b737..3091f8d6a4fc8f 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2714,12 +2714,12 @@ def __eq__(self, o): with self.subTest( etype=etype, rem_type=rem_type, raises=raises, ): - with self.subTest("single child"): + with self.subTest('single child'): root = E('.') root.append(etype('one')) test_remove(root, rem_type('baz'), raises) - with self.subTest("with children"): + with self.subTest('with children'): root = E('.') root.extend([etype('one'), rem_type('two')]) test_remove(root, rem_type('baz'), raises) @@ -2741,11 +2741,14 @@ def __eq__(self, o): to_remove = rem_type('baz') self.assertRaises(ValueError, root.remove, to_remove) - with self.subTest('existing', etype=etype, rem_type=rem_type): + for rem_type, raises in [(X, True), (E, False)]: + with self.subTest('existing', rem_type=rem_type): root = E('.') - root.extend([E('one'), etype('same')]) - to_remove = rem_type('same') - self.assertRaises(ValueError, root.remove, to_remove) + root.extend([E('one'), same := rem_type('same')]) + if raises: + self.assertRaises(ValueError, root.remove, same) + else: + root.remove(same) def test_remove_with_mutate_root_2(self): # See: https://github.com/python/cpython/issues/126033 @@ -2764,11 +2767,11 @@ def __eq__(self, o): to_remove = rem_type('baz') root.remove(to_remove) - with self.subTest('existing', etype=etype, rem_type=rem_type): + for rem_type in [E, X]: + with self.subTest('existing', rem_type=rem_type): root = E('.') - root.extend([E('one'), etype('same')]) - to_remove = rem_type('same') - root.remove(to_remove) + root.extend([E('one'), same := rem_type('same')]) + root.remove(same) @support.infinite_recursion(25) def test_recursive_repr(self): From 883e8d22035b8d3a7f5f5ed10763d19a7aa05c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:27:51 +0100 Subject: [PATCH 18/26] improve comments --- Lib/test/test_xml_etree.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 3091f8d6a4fc8f..286ee5261e2533 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2717,14 +2717,17 @@ def __eq__(self, o): with self.subTest('single child'): root = E('.') root.append(etype('one')) - test_remove(root, rem_type('baz'), raises) + test_remove(root, rem_type('missing'), raises) with self.subTest('with children'): root = E('.') root.extend([etype('one'), rem_type('two')]) - test_remove(root, rem_type('baz'), raises) + test_remove(root, rem_type('missing'), raises) - def test_remove_with_mutate_root_1(self): + def test_remove_with_mutate_root_assume_missing(self): + # Check that a concurrent mutation for an assumed-to-be + # missing element does not make the interpreter crash. + # # See: https://github.com/python/cpython/issues/126033 E = ET.Element @@ -2738,8 +2741,7 @@ def __eq__(self, o): with self.subTest('missing', etype=etype, rem_type=rem_type): root = E('.') root.extend([E('one'), etype('two')]) - to_remove = rem_type('baz') - self.assertRaises(ValueError, root.remove, to_remove) + self.assertRaises(ValueError, root.remove, rem_type('missing')) for rem_type, raises in [(X, True), (E, False)]: with self.subTest('existing', rem_type=rem_type): @@ -2750,7 +2752,10 @@ def __eq__(self, o): else: root.remove(same) - def test_remove_with_mutate_root_2(self): + def test_remove_with_mutate_root_assume_existing(self): + # Check that a concurrent mutation for an assumed-to-be + # existing element does not make the interpreter crash. + # # See: https://github.com/python/cpython/issues/126033 E = ET.Element @@ -2764,8 +2769,7 @@ def __eq__(self, o): with self.subTest('missing', etype=etype, rem_type=rem_type): root = E('.') root.extend([E('one'), etype('two')]) - to_remove = rem_type('baz') - root.remove(to_remove) + root.remove(rem_type('missing')) for rem_type in [E, X]: with self.subTest('existing', rem_type=rem_type): From 220b669540ac7809256c9987d4ed55fc9a03f666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:51:39 +0100 Subject: [PATCH 19/26] change root name to avoid special wildcards --- Lib/test/test_xml_etree.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 286ee5261e2533..88f1004dead82f 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2715,12 +2715,12 @@ def __eq__(self, o): etype=etype, rem_type=rem_type, raises=raises, ): with self.subTest('single child'): - root = E('.') + root = E('top') root.append(etype('one')) test_remove(root, rem_type('missing'), raises) with self.subTest('with children'): - root = E('.') + root = E('top') root.extend([etype('one'), rem_type('two')]) test_remove(root, rem_type('missing'), raises) @@ -2739,13 +2739,13 @@ def __eq__(self, o): for etype, rem_type in [(E, X), (X, E)]: with self.subTest('missing', etype=etype, rem_type=rem_type): - root = E('.') + root = E('top') root.extend([E('one'), etype('two')]) self.assertRaises(ValueError, root.remove, rem_type('missing')) for rem_type, raises in [(X, True), (E, False)]: with self.subTest('existing', rem_type=rem_type): - root = E('.') + root = E('top') root.extend([E('one'), same := rem_type('same')]) if raises: self.assertRaises(ValueError, root.remove, same) @@ -2767,13 +2767,13 @@ def __eq__(self, o): for etype, rem_type in [(E, X), (X, E)]: with self.subTest('missing', etype=etype, rem_type=rem_type): - root = E('.') + root = E('top') root.extend([E('one'), etype('two')]) root.remove(rem_type('missing')) for rem_type in [E, X]: with self.subTest('existing', rem_type=rem_type): - root = E('.') + root = E('top') root.extend([E('one'), same := rem_type('same')]) root.remove(same) From 7f26430212f8b9f43d0088f3c4e2ae7cdba8bd0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:53:35 +0100 Subject: [PATCH 20/26] avoid strong reference on the child to remove --- Lib/test/test_xml_etree.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 88f1004dead82f..0e6d12ea9500e0 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2746,11 +2746,11 @@ def __eq__(self, o): for rem_type, raises in [(X, True), (E, False)]: with self.subTest('existing', rem_type=rem_type): root = E('top') - root.extend([E('one'), same := rem_type('same')]) + root.extend([E('one'), rem_type('same')]) if raises: - self.assertRaises(ValueError, root.remove, same) + self.assertRaises(ValueError, root.remove, root[1]) else: - root.remove(same) + root.remove(root[1]) def test_remove_with_mutate_root_assume_existing(self): # Check that a concurrent mutation for an assumed-to-be @@ -2774,8 +2774,8 @@ def __eq__(self, o): for rem_type in [E, X]: with self.subTest('existing', rem_type=rem_type): root = E('top') - root.extend([E('one'), same := rem_type('same')]) - root.remove(same) + root.extend([E('one'), rem_type('same')]) + root.remove(root[1]) @support.infinite_recursion(25) def test_recursive_repr(self): From e8d84c81e4c97093709a00421d43be390597627f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:40:35 +0100 Subject: [PATCH 21/26] address Serhiy's review In this commit, we carefully determine the tests to run and justify why some tests are needed while some are not. Some cases might seem redundant but they ensure that the implementation does not change without us noticing. --- Lib/test/test_xml_etree.py | 269 ++++++++++++++++++++++++++++--------- 1 file changed, 204 insertions(+), 65 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 0e6d12ea9500e0..b1cab72a4a9972 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -18,9 +18,11 @@ import textwrap import types import unittest +import unittest.mock as mock import warnings import weakref +from contextlib import nullcontext from functools import partial from itertools import product, islice from test import support @@ -121,6 +123,21 @@ """ +def is_python_implementation(): + assert ET is not None, "ET must be initialized" + assert pyET is not None, "pyET must be initialized" + return ET is pyET + + +def equal_wrapper(cls): + """Mock cls.__eq__ to check whether it has been called or not. + + The behaviour of cls.__eq__ (side-effects included) is left as is. + """ + eq = cls.__eq__ + return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq) + + def checkwarnings(*filters, quiet=False): def decorator(test): def newtest(*args, **kwargs): @@ -2681,101 +2698,223 @@ class Y(X, ET.Element): e = ET.Element('foo') e.extend(L) - def test_remove_with_clear(self): + def test_remove_with_clear_assume_missing(self): + # Check that a concurrent clear() for an assumed-to-be + # missing element does not make the interpreter crash. + # + # See: https://github.com/python/cpython/issues/126033 + self.do_test_remove_with_clear(raises=True) + + def test_remove_with_clear_assume_existing(self): + # Check that a concurrent clear() for an assumed-to-be + # existing element does not make the interpreter crash. + # # See: https://github.com/python/cpython/issues/126033 + self.do_test_remove_with_clear(raises=False) + + def do_test_remove_with_clear(self, *, raises): # Until the discrepency between "del root[:]" and "root.clear()" is # resolved, we need to keep two tests. Previously, using "del root[:]" # did not crash with the reproducer of gh-126033 while "root.clear()" # did. - E = ET.Element + class E(ET.Element): + """Local class to be able to mock E.__eq__ for introspection.""" - def test_remove(root, target, raises): - if raises: - self.assertRaises(ValueError, root.remove, target) - else: - root.remove(target) - self.assertNotIn(target, root) - - for raises in [True, False]: - - class X(E): - def __eq__(self, o): - del root[:] - return not raises - - class Y(E): - def __eq__(self, o): - root.clear() - return not raises - - for etype, rem_type in [(E, X), (X, E), (E, Y), (Y, E)]: - with self.subTest( - etype=etype, rem_type=rem_type, raises=raises, - ): - with self.subTest('single child'): - root = E('top') - root.append(etype('one')) - test_remove(root, rem_type('missing'), raises) + class X(E): + def __eq__(self, o): + del root[:] + return not raises - with self.subTest('with children'): + class Y(E): + def __eq__(self, o): + root.clear() + return not raises + + if raises: + get_checker_context = lambda: self.assertRaises(ValueError) + else: + get_checker_context = nullcontext + + self.assertIs(E.__eq__, object.__eq__) + + self.enterContext(self.subTest(raises=raises)) + + for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]: + self.enterContext(self.subTest(side_effect=side_effect)) + + # test removing rem_type() from [child_type()] + for child_type, rem_type, description in [ + (Z, E, "remove missing E() from [Z()]"), + (E, Z, "remove missing Z() from [E()]"), + (Z, Z, "remove missing Z() from [Z()]"), + ]: + with self.subTest(description): + root = E('top') + root.extend([child_type('one')]) + with get_checker_context(): + root.remove(rem_type('missing')) + + # test removing rem_type() from [child_one_type(), child_two_type()] + for child_one_type, child_two_type, rem_type, description in [ + (E, Z, E, "remove missing E() from [E(), Z()]"), + (Z, E, E, "remove missing E() from [Z(), E()]"), + (Z, Z, E, "remove missing E() from [Z(), Z()]"), + (E, E, Z, "remove missing Z() from [E(), E()]"), + (E, Z, Z, "remove missing Z() from [E(), Z()]"), + (Z, E, Z, "remove missing Z() from [Z(), E()]"), + (Z, Z, Z, "remove missing Z() from [Z(), Z()]"), + ]: + with self.subTest(description): + root = E('top') + root.extend([child_one_type('one'), child_two_type('two')]) + with get_checker_context(): + root.remove(rem_type('missing')) + + # Test removing root[0] from [Z()]. + # + # Since we call root.remove() with root[0], Z.__eq__() + # will not be called (we branch on the fast Py_EQ path). + with self.subTest("remove root[0] from [Z()]"): + root = E('top') + root.append(Z('rem')) + with equal_wrapper(E) as f, equal_wrapper(Z) as g: + root.remove(root[0]) + f.assert_not_called() + g.assert_not_called() + + # Test removing root[1] from [child_one_type(), child_rem_type()]. + # + # In pure Python, using root.clear() sets the children + # list to [] without calling list.clear(). + # + # For this reason, the call to root.remove() first + # checks root[0] and sets the children list to [] + # since either root[0] or root[1] is an evil element. + # + # Since checking root[1] still uses the old reference + # to the children list, PyObject_RichCompareBool() branches + # to the fast Py_EQ path and Y.__eq__() is called exactly + # once (when checking root[0]). + # + # NOTE(picnixz): the Python and C implementations + # could be aligned if 'self._children = []' is + # replaced by 'self._children.clear()'; however, + # this should be carefully addressed since any + # reference to 'self._children' will be affected. + is_special = is_python_implementation() and raises and Z is Y + if is_special: + with self.subTest("remove root[1] from [E(), Z()]"): + root = E('top') + root.extend([E('one'), Z('rem')]) + with equal_wrapper(E) as never, equal_wrapper(Z) as f: + root.remove(root[1]) + # Calling PyObject_RichCompareBool(root[0], root[1], Py_EQ) + # delegates to Z.__eq__(root[1], root[0]) since E.__eq__ is + # not implemented. In particular, E.__eq__ is never called + # but Z.__eq__ is called when checking root[0]. + never.assert_not_called() + f.assert_called_once() + self.assertIs(f.call_args[0][0].__class__, Z) + self.assertIs(f.call_args[0][0].tag, 'rem') + self.assertIs(f.call_args[0][1].__class__, E) + self.assertIs(f.call_args[0][1].tag, 'one') + + with self.subTest("remove root[1] from [Z(), E()]"): + root = E('top') + root.extend([Z('one'), E('rem')]) + with equal_wrapper(E) as never, equal_wrapper(Z) as f: + root.remove(root[1]) + # Calling PyObject_RichCompareBool(root[0], root[1], Py_EQ) + # delegates to Z.__eq__(root[0], root[1]). In particular, + # E.__eq__ is never called due to the Py_EQ fast path. + never.assert_not_called() + f.assert_called_once() + self.assertIs(f.call_args[0][0].__class__, Z) + self.assertIs(f.call_args[0][0].tag, 'one') + self.assertIs(f.call_args[0][1].__class__, E) + self.assertIs(f.call_args[0][1].tag, 'rem') + + with self.subTest("remove root[1] from [Z(), Z()]"): + root = E('top') + root.extend([Z('one'), Z('rem')]) + self.assertNotEqual(list(root), []) + with equal_wrapper(E) as never, equal_wrapper(Z) as f: + root.remove(root[1]) + # Same arguments as for the [Z(), E()] case. + never.assert_not_called() + f.assert_called_once() + self.assertIs(f.call_args[0][0].__class__, Z) + self.assertIs(f.call_args[0][0].tag, 'one') + self.assertIs(f.call_args[0][1].__class__, Z) + self.assertIs(f.call_args[0][1].tag, 'rem') + else: + for child_one_type, child_rem_type, description in [ + (E, Z, "remove root[1] from [E(), Z()]"), + (Z, E, "remove root[1] from [Z(), E()]"), + (Z, Z, "remove root[1] from [Z(), Z()]"), + ]: + with self.subTest(description): root = E('top') - root.extend([etype('one'), rem_type('two')]) - test_remove(root, rem_type('missing'), raises) + root.extend([child_one_type('one'), child_rem_type('rem')]) + with get_checker_context(): + root.remove(root[1]) def test_remove_with_mutate_root_assume_missing(self): # Check that a concurrent mutation for an assumed-to-be # missing element does not make the interpreter crash. # # See: https://github.com/python/cpython/issues/126033 - - E = ET.Element - - class X(E): - def __eq__(self, o): - del root[0] - return False - - for etype, rem_type in [(E, X), (X, E)]: - with self.subTest('missing', etype=etype, rem_type=rem_type): - root = E('top') - root.extend([E('one'), etype('two')]) - self.assertRaises(ValueError, root.remove, rem_type('missing')) - - for rem_type, raises in [(X, True), (E, False)]: - with self.subTest('existing', rem_type=rem_type): - root = E('top') - root.extend([E('one'), rem_type('same')]) - if raises: - self.assertRaises(ValueError, root.remove, root[1]) - else: - root.remove(root[1]) + self.do_test_remove_with_mutate_root(raises=True) def test_remove_with_mutate_root_assume_existing(self): # Check that a concurrent mutation for an assumed-to-be # existing element does not make the interpreter crash. # # See: https://github.com/python/cpython/issues/126033 + self.do_test_remove_with_mutate_root(raises=False) + def do_test_remove_with_mutate_root(self, *, raises): E = ET.Element - class X(E): + class Z(E): def __eq__(self, o): del root[0] - return True + return not raises - for etype, rem_type in [(E, X), (X, E)]: - with self.subTest('missing', etype=etype, rem_type=rem_type): + if raises: + get_checker_context = lambda: self.assertRaises(ValueError) + else: + get_checker_context = nullcontext + + # test removing rem_type() from [child_one_type(), child_two_type()] + for child_one_type, child_two_type, rem_type, description in [ + (E, Z, E, "remove missing E() from [E(), Z()]"), + (Z, E, E, "remove missing E() from [Z(), E()]"), + (Z, Z, E, "remove missing E() from [Z(), Z()]"), + (E, E, Z, "remove missing Z() from [E(), E()]"), + (E, Z, Z, "remove missing Z() from [E(), Z()]"), + (Z, E, Z, "remove missing Z() from [Z(), E()]"), + (Z, Z, Z, "remove missing Z() from [Z(), Z()]"), + ]: + with self.subTest(description): root = E('top') - root.extend([E('one'), etype('two')]) - root.remove(rem_type('missing')) - - for rem_type in [E, X]: - with self.subTest('existing', rem_type=rem_type): + root.extend([child_one_type('one'), child_two_type('two')]) + with get_checker_context(): + root.remove(rem_type('missing')) + + # test removing root[1] from [child_one_type(), child_rem_type()] + for child_one_type, child_rem_type, description in [ + (E, Z, "remove root[1] from [E(), Z()]"), + (Z, E, "remove root[1] from [Z(), E()]"), + (Z, Z, "remove root[1] from [Z(), Z()]"), + ]: + with self.subTest(description): root = E('top') - root.extend([E('one'), rem_type('same')]) - root.remove(root[1]) + root.extend([child_one_type('one'), child_rem_type('rem')]) + with get_checker_context(): + root.remove(root[1]) @support.infinite_recursion(25) def test_recursive_repr(self): From bc52c04ebbf55a7a40d2f9a9921fff502a4f4e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 4 Feb 2025 18:16:27 +0100 Subject: [PATCH 22/26] Update Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst Co-authored-by: Victor Stinner --- .../next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst index 8c1d981b54717f..56234cd62aab31 100644 --- a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst +++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst @@ -1,2 +1,2 @@ -Fix a crash in :meth:`Element.remove ` +:mod:`xml.etree`: Fix a crash in :meth:`Element.remove ` when the element is concurrently mutated. Patch by Bénédikt Tran. From f4a4daefcb3c001269ba588a4286ee84e8873dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 8 Feb 2025 11:46:27 +0100 Subject: [PATCH 23/26] remove un-necessary subtest parameter --- Lib/test/test_xml_etree.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b1cab72a4a9972..6537f4097fd9ce 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2739,8 +2739,6 @@ def __eq__(self, o): self.assertIs(E.__eq__, object.__eq__) - self.enterContext(self.subTest(raises=raises)) - for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]: self.enterContext(self.subTest(side_effect=side_effect)) From 09a9fa97309b3809fa1027f642f11a7a4ac4e684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 8 Feb 2025 12:20:43 +0100 Subject: [PATCH 24/26] Reduce the visual size of the tests --- Lib/test/test_xml_etree.py | 144 ++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 75 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 6537f4097fd9ce..dd816a11bd1488 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2699,17 +2699,13 @@ class Y(X, ET.Element): e.extend(L) def test_remove_with_clear_assume_missing(self): - # Check that a concurrent clear() for an assumed-to-be + # gh-126033: Check that a concurrent clear() for an assumed-to-be # missing element does not make the interpreter crash. - # - # See: https://github.com/python/cpython/issues/126033 self.do_test_remove_with_clear(raises=True) def test_remove_with_clear_assume_existing(self): - # Check that a concurrent clear() for an assumed-to-be + # gh-126033: Check that a concurrent clear() for an assumed-to-be # existing element does not make the interpreter crash. - # - # See: https://github.com/python/cpython/issues/126033 self.do_test_remove_with_clear(raises=False) def do_test_remove_with_clear(self, *, raises): @@ -2742,33 +2738,26 @@ def __eq__(self, o): for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]: self.enterContext(self.subTest(side_effect=side_effect)) - # test removing rem_type() from [child_type()] - for child_type, rem_type, description in [ - (Z, E, "remove missing E() from [Z()]"), - (E, Z, "remove missing Z() from [E()]"), + # test removing R() from [U()] + for R, U, description in [ + (E, Z, "remove missing E() from [Z()]"), + (Z, E, "remove missing Z() from [E()]"), (Z, Z, "remove missing Z() from [Z()]"), ]: with self.subTest(description): root = E('top') - root.extend([child_type('one')]) + root.extend([U('one')]) with get_checker_context(): - root.remove(rem_type('missing')) - - # test removing rem_type() from [child_one_type(), child_two_type()] - for child_one_type, child_two_type, rem_type, description in [ - (E, Z, E, "remove missing E() from [E(), Z()]"), - (Z, E, E, "remove missing E() from [Z(), E()]"), - (Z, Z, E, "remove missing E() from [Z(), Z()]"), - (E, E, Z, "remove missing Z() from [E(), E()]"), - (E, Z, Z, "remove missing Z() from [E(), Z()]"), - (Z, E, Z, "remove missing Z() from [Z(), E()]"), - (Z, Z, Z, "remove missing Z() from [Z(), Z()]"), - ]: + root.remove(R('missing')) + + # test removing R() from [U(), V()] + cases = self.cases_for_remove_missing_with_mutations(E, Z) + for R, U, V, description in cases: with self.subTest(description): root = E('top') - root.extend([child_one_type('one'), child_two_type('two')]) + root.extend([U('one'), V('two')]) with get_checker_context(): - root.remove(rem_type('missing')) + root.remove(R('missing')) # Test removing root[0] from [Z()]. # @@ -2782,7 +2771,7 @@ def __eq__(self, o): f.assert_not_called() g.assert_not_called() - # Test removing root[1] from [child_one_type(), child_rem_type()]. + # Test removing root[1] (of type R) from [U(), R()]. # # In pure Python, using root.clear() sets the children # list to [] without calling list.clear(). @@ -2803,6 +2792,18 @@ def __eq__(self, o): # reference to 'self._children' will be affected. is_special = is_python_implementation() and raises and Z is Y if is_special: + def check(f, never, arg0spec, arg1spec): + never.assert_not_called() + f.assert_called_once() + + arg0cls, arg0tag = arg0spec + self.assertIs(f.call_args[0][0].__class__, arg0cls) + self.assertIs(f.call_args[0][0].tag, arg0tag) + + arg1cls, arg1tag = arg1spec + self.assertIs(f.call_args[0][1].__class__, arg1cls) + self.assertIs(f.call_args[0][1].tag, arg1tag) + with self.subTest("remove root[1] from [E(), Z()]"): root = E('top') root.extend([E('one'), Z('rem')]) @@ -2812,12 +2813,7 @@ def __eq__(self, o): # delegates to Z.__eq__(root[1], root[0]) since E.__eq__ is # not implemented. In particular, E.__eq__ is never called # but Z.__eq__ is called when checking root[0]. - never.assert_not_called() - f.assert_called_once() - self.assertIs(f.call_args[0][0].__class__, Z) - self.assertIs(f.call_args[0][0].tag, 'rem') - self.assertIs(f.call_args[0][1].__class__, E) - self.assertIs(f.call_args[0][1].tag, 'one') + check(f, never, (Z, 'rem'), (E, 'one')) with self.subTest("remove root[1] from [Z(), E()]"): root = E('top') @@ -2827,12 +2823,7 @@ def __eq__(self, o): # Calling PyObject_RichCompareBool(root[0], root[1], Py_EQ) # delegates to Z.__eq__(root[0], root[1]). In particular, # E.__eq__ is never called due to the Py_EQ fast path. - never.assert_not_called() - f.assert_called_once() - self.assertIs(f.call_args[0][0].__class__, Z) - self.assertIs(f.call_args[0][0].tag, 'one') - self.assertIs(f.call_args[0][1].__class__, E) - self.assertIs(f.call_args[0][1].tag, 'rem') + check(f, never, (Z, 'one'), (E, 'rem')) with self.subTest("remove root[1] from [Z(), Z()]"): root = E('top') @@ -2841,36 +2832,24 @@ def __eq__(self, o): with equal_wrapper(E) as never, equal_wrapper(Z) as f: root.remove(root[1]) # Same arguments as for the [Z(), E()] case. - never.assert_not_called() - f.assert_called_once() - self.assertIs(f.call_args[0][0].__class__, Z) - self.assertIs(f.call_args[0][0].tag, 'one') - self.assertIs(f.call_args[0][1].__class__, Z) - self.assertIs(f.call_args[0][1].tag, 'rem') + check(f, never, (Z, 'one'), (Z, 'rem')) else: - for child_one_type, child_rem_type, description in [ - (E, Z, "remove root[1] from [E(), Z()]"), - (Z, E, "remove root[1] from [Z(), E()]"), - (Z, Z, "remove root[1] from [Z(), Z()]"), - ]: + cases = self.cases_for_remove_existing_with_mutations(E, Z) + for R, U, description in cases: with self.subTest(description): root = E('top') - root.extend([child_one_type('one'), child_rem_type('rem')]) + root.extend([U('one'), R('rem')]) with get_checker_context(): root.remove(root[1]) def test_remove_with_mutate_root_assume_missing(self): - # Check that a concurrent mutation for an assumed-to-be + # gh-126033: Check that a concurrent mutation for an assumed-to-be # missing element does not make the interpreter crash. - # - # See: https://github.com/python/cpython/issues/126033 self.do_test_remove_with_mutate_root(raises=True) def test_remove_with_mutate_root_assume_existing(self): - # Check that a concurrent mutation for an assumed-to-be + # gh-126033: Check that a concurrent mutation for an assumed-to-be # existing element does not make the interpreter crash. - # - # See: https://github.com/python/cpython/issues/126033 self.do_test_remove_with_mutate_root(raises=False) def do_test_remove_with_mutate_root(self, *, raises): @@ -2886,34 +2865,49 @@ def __eq__(self, o): else: get_checker_context = nullcontext - # test removing rem_type() from [child_one_type(), child_two_type()] - for child_one_type, child_two_type, rem_type, description in [ - (E, Z, E, "remove missing E() from [E(), Z()]"), - (Z, E, E, "remove missing E() from [Z(), E()]"), - (Z, Z, E, "remove missing E() from [Z(), Z()]"), - (E, E, Z, "remove missing Z() from [E(), E()]"), - (E, Z, Z, "remove missing Z() from [E(), Z()]"), - (Z, E, Z, "remove missing Z() from [Z(), E()]"), - (Z, Z, Z, "remove missing Z() from [Z(), Z()]"), - ]: + # test removing R() from [U(), V()] + cases = self.cases_for_remove_missing_with_mutations(E, Z) + for R, U, V, description in cases: with self.subTest(description): root = E('top') - root.extend([child_one_type('one'), child_two_type('two')]) + root.extend([U('one'), V('two')]) with get_checker_context(): - root.remove(rem_type('missing')) + root.remove(R('missing')) - # test removing root[1] from [child_one_type(), child_rem_type()] - for child_one_type, child_rem_type, description in [ - (E, Z, "remove root[1] from [E(), Z()]"), - (Z, E, "remove root[1] from [Z(), E()]"), - (Z, Z, "remove root[1] from [Z(), Z()]"), - ]: + # test removing root[1] (of type R) from [U(), R()] + cases = self.cases_for_remove_existing_with_mutations(E, Z) + for R, U, description in cases: with self.subTest(description): root = E('top') - root.extend([child_one_type('one'), child_rem_type('rem')]) + root.extend([U('one'), R('rem')]) with get_checker_context(): root.remove(root[1]) + def cases_for_remove_missing_with_mutations(self, E, Z): + # Cases for removing R() from [U(), V()]. + # The case U = V = R = E is not interesting as there is no mutation. + for U, V in [(E, Z), (Z, E), (Z, Z)]: + description = (f"remove missing {E.__name__}() from " + f"[{U.__name__}(), {V.__name__}()]") + yield E, U, V, description + + for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]: + description = (f"remove missing {Z.__name__}() from " + f"[{U.__name__}(), {V.__name__}()]") + yield Z, U, V, description + + def cases_for_remove_existing_with_mutations(self, E, Z): + # Cases for removing root[1] (of type R) from [U(), R()]. + # The case U = R = E is not interesting as there is no mutation. + for U, R, description in [ + (E, Z, "remove root[1] from [E(), Z()]"), + (Z, E, "remove root[1] from [Z(), E()]"), + (Z, Z, "remove root[1] from [Z(), Z()]"), + ]: + description = (f"remove root[1] (of type {R.__name__}) " + f"from [{U.__name__}(), {R.__name__}()]") + yield R, U, description + @support.infinite_recursion(25) def test_recursive_repr(self): # Issue #25455 From f5d352f8ec06a5d39891d93fa0529f39e5d45436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 8 Feb 2025 12:22:49 +0100 Subject: [PATCH 25/26] fixup docs --- .../Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst index 56234cd62aab31..fa09c712aa0c4a 100644 --- a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst +++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst @@ -1,2 +1,3 @@ -:mod:`xml.etree`: Fix a crash in :meth:`Element.remove ` -when the element is concurrently mutated. Patch by Bénédikt Tran. +:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove +` when the element is +concurrently mutated. Patch by Bénédikt Tran. From 04cb600a784440c826163e7fcf1f81a97cffe296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 15 Mar 2025 11:21:08 +0100 Subject: [PATCH 26/26] do not pedantically test the pure Python implementation --- Lib/test/test_xml_etree.py | 73 +++++++------------------------------- 1 file changed, 13 insertions(+), 60 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index dd816a11bd1488..2a13553a4c9a36 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2772,67 +2772,20 @@ def __eq__(self, o): g.assert_not_called() # Test removing root[1] (of type R) from [U(), R()]. - # - # In pure Python, using root.clear() sets the children - # list to [] without calling list.clear(). - # - # For this reason, the call to root.remove() first - # checks root[0] and sets the children list to [] - # since either root[0] or root[1] is an evil element. - # - # Since checking root[1] still uses the old reference - # to the children list, PyObject_RichCompareBool() branches - # to the fast Py_EQ path and Y.__eq__() is called exactly - # once (when checking root[0]). - # - # NOTE(picnixz): the Python and C implementations - # could be aligned if 'self._children = []' is - # replaced by 'self._children.clear()'; however, - # this should be carefully addressed since any - # reference to 'self._children' will be affected. is_special = is_python_implementation() and raises and Z is Y - if is_special: - def check(f, never, arg0spec, arg1spec): - never.assert_not_called() - f.assert_called_once() - - arg0cls, arg0tag = arg0spec - self.assertIs(f.call_args[0][0].__class__, arg0cls) - self.assertIs(f.call_args[0][0].tag, arg0tag) - - arg1cls, arg1tag = arg1spec - self.assertIs(f.call_args[0][1].__class__, arg1cls) - self.assertIs(f.call_args[0][1].tag, arg1tag) - - with self.subTest("remove root[1] from [E(), Z()]"): - root = E('top') - root.extend([E('one'), Z('rem')]) - with equal_wrapper(E) as never, equal_wrapper(Z) as f: - root.remove(root[1]) - # Calling PyObject_RichCompareBool(root[0], root[1], Py_EQ) - # delegates to Z.__eq__(root[1], root[0]) since E.__eq__ is - # not implemented. In particular, E.__eq__ is never called - # but Z.__eq__ is called when checking root[0]. - check(f, never, (Z, 'rem'), (E, 'one')) - - with self.subTest("remove root[1] from [Z(), E()]"): - root = E('top') - root.extend([Z('one'), E('rem')]) - with equal_wrapper(E) as never, equal_wrapper(Z) as f: - root.remove(root[1]) - # Calling PyObject_RichCompareBool(root[0], root[1], Py_EQ) - # delegates to Z.__eq__(root[0], root[1]). In particular, - # E.__eq__ is never called due to the Py_EQ fast path. - check(f, never, (Z, 'one'), (E, 'rem')) - - with self.subTest("remove root[1] from [Z(), Z()]"): - root = E('top') - root.extend([Z('one'), Z('rem')]) - self.assertNotEqual(list(root), []) - with equal_wrapper(E) as never, equal_wrapper(Z) as f: - root.remove(root[1]) - # Same arguments as for the [Z(), E()] case. - check(f, never, (Z, 'one'), (Z, 'rem')) + if is_python_implementation() and raises and Z is Y: + # In pure Python, using root.clear() sets the children + # list to [] without calling list.clear(). + # + # For this reason, the call to root.remove() first + # checks root[0] and sets the children list to [] + # since either root[0] or root[1] is an evil element. + # + # Since checking root[1] still uses the old reference + # to the children list, PyObject_RichCompareBool() branches + # to the fast Py_EQ path and Y.__eq__() is called exactly + # once (when checking root[0]). + continue else: cases = self.cases_for_remove_existing_with_mutations(E, Z) for R, U, description in cases: