From e6d9756fb2fbc94317109ea16488f6788bade650 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 23 Jan 2024 15:14:46 -0500 Subject: [PATCH] gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269) The iterator returned by ElementTree.iterparse() may hold on to a file descriptor. The reference cycle prevented prompt clean-up of the file descriptor if the returned iterator was not exhausted. (cherry picked from commit ce01ab536f22a3cf095d621f3b3579c1e3567859) Co-authored-by: Sam Gross --- Lib/xml/etree/ElementTree.py | 27 ++++++++++++------- ...-01-18-22-29-28.gh-issue-101438.1-uUi_.rst | 4 +++ 2 files changed, 21 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 42574eefd81beb..ae6575028be11c 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -99,6 +99,7 @@ import collections import collections.abc import contextlib +import weakref from . import ElementPath @@ -1223,13 +1224,14 @@ def iterparse(source, events=None, parser=None): # parser argument of iterparse is removed, this can be killed. pullparser = XMLPullParser(events=events, _parser=parser) - def iterator(source): + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + else: close_source = False + + def iterator(source): try: - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - yield None while True: yield from pullparser.read_events() # load event buffer @@ -1239,18 +1241,23 @@ def iterator(source): pullparser.feed(data) root = pullparser._close_and_return_root() yield from pullparser.read_events() - it.root = root + it = wr() + if it is not None: + it.root = root finally: if close_source: source.close() class IterParseIterator(collections.abc.Iterator): __next__ = iterator(source).__next__ - it = IterParseIterator() - it.root = None - del iterator, IterParseIterator - next(it) + def __del__(self): + if close_source: + source.close() + + it = IterParseIterator() + wr = weakref.ref(it) + del IterParseIterator return it diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst new file mode 100644 index 00000000000000..9b69b5deb1b5a0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst @@ -0,0 +1,4 @@ +Avoid reference cycle in ElementTree.iterparse. The iterator returned by +``ElementTree.iterparse`` may hold on to a file descriptor. The reference +cycle prevented prompt clean-up of the file descriptor if the returned +iterator was not exhausted.