Skip to content

[3.8] bpo-42536: GC track recycled tuples (GH-23623) #23652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion Lib/test/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import collections
import decimal
import fractions
import gc
import io
import locale
import os
Expand All @@ -27,7 +28,7 @@
from operator import neg
from test.support import (
EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink,
maybe_get_event_loop_policy)
maybe_get_event_loop_policy, cpython_only)
from test.support.script_helper import assert_python_ok
from unittest.mock import MagicMock, patch
try:
Expand Down Expand Up @@ -1573,6 +1574,18 @@ def __iter__(self):

self.assertIs(cm.exception, exception)

@cpython_only
def test_zip_result_gc(self):
# bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
# about what can be untracked. Make sure we re-track result tuples
# whenever we reuse them.
it = zip([[]])
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None,). Make sure it's re-tracked when
# it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))

def test_format(self):
# Test the basic machinery of the format() builtin. Don't test
# the specifics of the various formatters
Expand Down
19 changes: 19 additions & 0 deletions Lib/test/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -1377,6 +1377,25 @@ def items(self):
d = CustomReversedDict(pairs)
self.assertEqual(pairs[::-1], list(dict(d).items()))

@support.cpython_only
def test_dict_items_result_gc(self):
# bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = iter({None: []}.items())
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None, None). Make sure it's re-tracked
# when it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))

@support.cpython_only
def test_dict_items_result_gc(self):
# Same as test_dict_items_result_gc above, but reversed.
it = reversed({None: []}.items())
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))


class CAPITest(unittest.TestCase):

Expand Down
13 changes: 13 additions & 0 deletions Lib/test/test_enumerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import operator
import sys
import pickle
import gc

from test import support

Expand Down Expand Up @@ -134,6 +135,18 @@ def test_tuple_reuse(self):
self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))

@support.cpython_only
def test_enumerate_result_gc(self):
# bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = self.enum([[]])
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None, None). Make sure it's re-tracked
# when it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))

class MyEnum(enumerate):
pass

Expand Down
47 changes: 47 additions & 0 deletions Lib/test/test_itertools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import sys
import struct
import threading
import gc

maxsize = support.MAX_Py_ssize_t
minsize = -maxsize-1

Expand Down Expand Up @@ -1554,6 +1556,51 @@ def test_StopIteration(self):
self.assertRaises(StopIteration, next, f(lambda x:x, []))
self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))

@support.cpython_only
def test_combinations_result_gc(self):
# bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = combinations([None, []], 1)
next(it)
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which has the value (None,). Make sure it's re-tracked when
# it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))

@support.cpython_only
def test_combinations_with_replacement_result_gc(self):
# Ditto for combinations_with_replacement.
it = combinations_with_replacement([None, []], 1)
next(it)
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))

@support.cpython_only
def test_permutations_result_gc(self):
# Ditto for permutations.
it = permutations([None, []], 1)
next(it)
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))

@support.cpython_only
def test_product_result_gc(self):
# Ditto for product.
it = product([None, []])
next(it)
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))

@support.cpython_only
def test_zip_longest_result_gc(self):
# Ditto for zip_longest.
it = zip_longest([[]])
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))


class TestExamples(unittest.TestCase):

def test_accumulate(self):
Expand Down
11 changes: 11 additions & 0 deletions Lib/test/test_ordered_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,17 @@ def test_free_after_iterating(self):
support.check_free_after_iterating(self, lambda d: iter(d.values()), self.OrderedDict)
support.check_free_after_iterating(self, lambda d: iter(d.items()), self.OrderedDict)

@support.cpython_only
def test_ordered_dict_items_result_gc(self):
# bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = iter(self.OrderedDict({None: []}).items())
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None, None). Make sure it's re-tracked
# when it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))

class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Several built-in and standard library types now ensure that their internal
result tuples are always tracked by the :term:`garbage collector
<garbage collection>`:

- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`

- :meth:`dict.items`

- :func:`enumerate`

- :func:`functools.reduce`

- :func:`itertools.combinations`

- :func:`itertools.combinations_with_replacement`

- :func:`itertools.permutations`

- :func:`itertools.product`

- :func:`itertools.zip_longest`

- :func:`zip`

Previously, they could have become untracked by a prior garbage collection.
Patch by Brandt Bucher.
6 changes: 6 additions & 0 deletions Modules/_functoolsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"
#include "structmember.h"
#include "pycore_object.h" // _PyObject_GC_TRACK

/* _functools module written and maintained
by Hye-Shik Chang <perky@FreeBSD.org>
Expand Down Expand Up @@ -633,6 +634,11 @@ functools_reduce(PyObject *self, PyObject *args)
if ((result = PyObject_Call(func, args, NULL)) == NULL) {
goto Fail;
}
// bpo-42536: The GC may have untracked this args tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(args)) {
_PyObject_GC_TRACK(args);
}
}
}

Expand Down
26 changes: 26 additions & 0 deletions Modules/itertoolsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_tupleobject.h"
#include "structmember.h"
#include "pycore_object.h" // _PyObject_GC_TRACK()

/* Itertools module written and maintained
by Raymond D. Hettinger <python@rcn.com>
Expand Down Expand Up @@ -2255,6 +2256,11 @@ product_next(productobject *lz)
lz->result = result;
Py_DECREF(old_result);
}
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place */
assert (npools==0 || Py_REFCNT(result) == 1);

Expand Down Expand Up @@ -2580,6 +2586,11 @@ combinations_next(combinationsobject *co)
co->result = result;
Py_DECREF(old_result);
}
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place
* CPython's empty tuple is a singleton and cached in
* PyTuple's freelist.
Expand Down Expand Up @@ -2916,6 +2927,11 @@ cwr_next(cwrobject *co)
co->result = result;
Py_DECREF(old_result);
}
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place CPython's
empty tuple is a singleton and cached in PyTuple's freelist. */
assert(r == 0 || Py_REFCNT(result) == 1);
Expand Down Expand Up @@ -3259,6 +3275,11 @@ permutations_next(permutationsobject *po)
po->result = result;
Py_DECREF(old_result);
}
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place */
assert(r == 0 || Py_REFCNT(result) == 1);

Expand Down Expand Up @@ -4536,6 +4557,11 @@ zip_longest_next(ziplongestobject *lz)
PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem);
}
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} else {
result = PyTuple_New(tuplesize);
if (result == NULL)
Expand Down
10 changes: 10 additions & 0 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3769,6 +3769,11 @@ dictiter_iternextitem(dictiterobject *di)
Py_INCREF(result);
Py_DECREF(oldkey);
Py_DECREF(oldvalue);
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
}
else {
result = PyTuple_New(2);
Expand Down Expand Up @@ -3884,6 +3889,11 @@ dictreviter_iternext(dictiterobject *di)
Py_INCREF(result);
Py_DECREF(oldkey);
Py_DECREF(oldvalue);
// bpo-42536: The GC may have untracked this result tuple. Since
// we're recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
}
else {
result = PyTuple_New(2);
Expand Down
11 changes: 11 additions & 0 deletions Objects/enumobject.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* enumerate object */

#include "Python.h"
#include "pycore_object.h" // _PyObject_GC_TRACK()

#include "clinic/enumobject.c.h"

Expand Down Expand Up @@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
PyTuple_SET_ITEM(result, 1, next_item);
Py_DECREF(old_index);
Py_DECREF(old_item);
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
return result;
}
result = PyTuple_New(2);
Expand Down Expand Up @@ -175,6 +181,11 @@ enum_next(enumobject *en)
PyTuple_SET_ITEM(result, 1, next_item);
Py_DECREF(old_index);
Py_DECREF(old_item);
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
return result;
}
result = PyTuple_New(2);
Expand Down
5 changes: 5 additions & 0 deletions Objects/odictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1766,6 +1766,11 @@ odictiter_iternext(odictiterobject *di)
Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */
Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
}
else {
result = PyTuple_New(2);
Expand Down
6 changes: 6 additions & 0 deletions Python/bltinmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <ctype.h>
#include "ast.h"
#undef Yield /* undefine macro conflicting with <winbase.h> */
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"

Expand Down Expand Up @@ -2618,6 +2619,11 @@ zip_next(zipobject *lz)
PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem);
}
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} else {
result = PyTuple_New(tuplesize);
if (result == NULL)
Expand Down