Skip to content

Commit 1533d1d

Browse files
committed
Make list, tuple and range iteration more thread-safe, and actually test
concurrent iteration. (This is prep work for enabling specialization of FOR_ITER in free-threaded builds.) The basic premise is: - Iterating over a shared _iterable_ (list, tuple or range) should be safe, not involve data races, and behave like iteration normally does. - Using a shared _iterator_ should not crash or involve data races, and should only produce items regular iteration would produce. It is _not_ guaranteed to produce all items, or produce each item only once. Providing stronger guarantees is possible for some of these iterators, but it's not always straight-forward and can significantly hamper the common case. Since iterators in general aren't shared between threads, and it's simply impossible to concurrently use many iterators (like generators), better to make sharing iterators without explicit synchronization clearly wrong. Specific issues fixed in order to make the tests pass: - List iteration could occasionally crash when a shared list wasn't already marked as shared when reallocated. - Tuple iteration could occasionally crash when the iterator's reference to the tuple was cleared on exhaustion. Like with list iteration, in free-threaded builds we can't safely and efficiently clear the iterator's reference to the iterable (doing it safely would mean extra, slow refcount operations), so just keep the iterable reference around. - Fast range iterators (for integers that fit in C longs) shared between threads would sometimes produce values past the end of the range, because the iterators use two bits of state that we can't efficiently update atomically. Rewriting the iterators to have a single bit of state is possible, but probably means more math for each iteration and may not be worth it. - Long range iterators (for other numbers) shared between threads would crash catastrophically in a variety of ways. This now uses a critical section. Rewriting this to be more efficient is probably possible, but since it deals with arbitrary Python objects it's difficult to get right. There seem to be no more exising races in list_get_item_ref, so drop it from the tsan suppression list.
1 parent f9a5a3a commit 1533d1d

File tree

7 files changed

+201
-33
lines changed

7 files changed

+201
-33
lines changed

Include/internal/pycore_range.h

+6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ typedef struct {
1313
long start;
1414
long step;
1515
long len;
16+
#ifdef Py_GIL_DISABLED
17+
// Make sure multi-threaded use of a single iterator doesn't produce
18+
// values past the end of the range (even though it is NOT guaranteed to
19+
// uniquely produce all the values in the range!)
20+
long stop;
21+
#endif
1622
} _PyRangeIterObject;
1723

1824
#ifdef __cplusplus

Lib/test/libregrtest/tsan.py

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
'test_threadsignals',
2727
'test_weakref',
2828
'test_free_threading.test_slots',
29+
'test_free_threading.test_iteration',
2930
]
3031

3132

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import sys
2+
import threading
3+
import unittest
4+
from test import support
5+
6+
# The race conditions these tests were written for only happen every now and
7+
# then, even with the current numbers. To find rare race conditions, bumping
8+
# these up will help, but it makes the test runtime highly variable under
9+
# free-threading. Overhead is much higher under ThreadSanitizer, but it's
10+
# also much better at detecting certain races, so we don't need as many
11+
# items/threads.
12+
if support.check_sanitizer(thread=True):
13+
NUMITEMS = 1000
14+
NUMTHREADS = 2
15+
else:
16+
NUMITEMS = 50000
17+
NUMTHREADS = 3
18+
NUMMUTATORS = 2
19+
20+
class ContendedTupleIterationTest(unittest.TestCase):
21+
def make_testdata(self, n):
22+
return tuple(range(n))
23+
24+
def assert_iterator_results(self, results, expected):
25+
# Most iterators are not atomic (yet?) so they can skip or duplicate
26+
# items, but they should not invent new items (like the range
27+
# iterator has done in the past).
28+
extra_items = set(results) - set(expected)
29+
self.assertEqual(set(), extra_items)
30+
31+
def run_threads(self, func, *args, numthreads=NUMTHREADS):
32+
threads = []
33+
for _ in range(numthreads):
34+
t = threading.Thread(target=func, args=args)
35+
t.start()
36+
threads.append(t)
37+
return threads
38+
39+
def test_iteration(self):
40+
"""Test iteration over a shared container"""
41+
seq = self.make_testdata(NUMITEMS)
42+
results = []
43+
start = threading.Event()
44+
def worker():
45+
idx = 0
46+
start.wait()
47+
for item in seq:
48+
idx += 1
49+
results.append(idx)
50+
threads = self.run_threads(worker)
51+
start.set()
52+
for t in threads:
53+
t.join()
54+
# Each thread has its own iterator, so results should be entirely predictable.
55+
self.assertEqual(results, [NUMITEMS] * NUMTHREADS)
56+
57+
def test_shared_iterator(self):
58+
"""Test iteration over a shared iterator"""
59+
seq = self.make_testdata(NUMITEMS)
60+
it = iter(seq)
61+
results = []
62+
start = threading.Event()
63+
def worker():
64+
items = []
65+
start.wait()
66+
# We want a tight loop, so put items in the shared list at the end.
67+
for item in it:
68+
items.append(item)
69+
results.extend(items)
70+
threads = self.run_threads(worker)
71+
start.set()
72+
for t in threads:
73+
t.join()
74+
self.assert_iterator_results(sorted(results), seq)
75+
76+
class ContendedListIterationTest(ContendedTupleIterationTest):
77+
def make_testdata(self, n):
78+
return list(range(n))
79+
80+
def test_iteration_while_mutating(self):
81+
"""Test iteration over a shared mutating container."""
82+
seq = self.make_testdata(NUMITEMS)
83+
results = []
84+
start = threading.Event()
85+
endmutate = threading.Event()
86+
def mutator():
87+
orig = seq[:]
88+
# Make changes big enough to cause resizing of the list, with
89+
# items shifted around for good measure.
90+
replacement = (orig * 3)[NUMITEMS//2:]
91+
start.wait()
92+
while not endmutate.is_set():
93+
seq[:] = replacement
94+
seq[:] = orig
95+
def worker():
96+
items = []
97+
start.wait()
98+
# We want a tight loop, so put items in the shared list at the end.
99+
for item in seq:
100+
items.append(item)
101+
results.extend(items)
102+
mutators = ()
103+
try:
104+
threads = self.run_threads(worker)
105+
mutators = self.run_threads(mutator, numthreads=NUMMUTATORS)
106+
start.set()
107+
for t in threads:
108+
t.join()
109+
finally:
110+
endmutate.set()
111+
for m in mutators:
112+
m.join()
113+
self.assert_iterator_results(results, list(seq))
114+
115+
116+
class ContendedRangeIterationTest(ContendedTupleIterationTest):
117+
def make_testdata(self, n):
118+
return range(n)
119+
120+
121+
class ContendedLongRangeIterationTest(ContendedTupleIterationTest):
122+
def make_testdata(self, n):
123+
return range(0, sys.maxsize*n, sys.maxsize)
124+

Objects/listobject.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ list_item_impl(PyListObject *self, Py_ssize_t idx)
344344
static inline PyObject*
345345
list_get_item_ref(PyListObject *op, Py_ssize_t i)
346346
{
347-
if (!_Py_IsOwnedByCurrentThread((PyObject *)op) && !_PyObject_GC_IS_SHARED(op)) {
347+
if (!_Py_IsOwnedByCurrentThread((PyObject *)op)) {
348348
return list_item_impl(op, i);
349349
}
350350
// Need atomic operation for the getting size.

Objects/rangeobject.c

+63-28
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "pycore_modsupport.h" // _PyArg_NoKwnames()
88
#include "pycore_range.h"
99
#include "pycore_tuple.h" // _PyTuple_ITEMS()
10+
#include "pycore_pyatomic_ft_wrappers.h"
1011

1112

1213
/* Support objects whose length is > PY_SSIZE_T_MAX.
@@ -816,10 +817,19 @@ PyTypeObject PyRange_Type = {
816817
static PyObject *
817818
rangeiter_next(_PyRangeIterObject *r)
818819
{
819-
if (r->len > 0) {
820-
long result = r->start;
821-
r->start = result + r->step;
822-
r->len--;
820+
long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len);
821+
if (len > 0) {
822+
long result = FT_ATOMIC_LOAD_LONG_RELAXED(r->start);
823+
FT_ATOMIC_STORE_LONG_RELAXED(r->start, result + r->step);
824+
FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - 1);
825+
#ifdef Py_GIL_DISABLED
826+
// Concurrent calls can cause start to be updated by another thread
827+
// after our len check, so double-check that we're not past the end.
828+
if ((r->step > 0 && result >= r->stop) ||
829+
(r->step < 0 && result <= r->stop)) {
830+
return NULL;
831+
}
832+
#endif
823833
return PyLong_FromLong(result);
824834
}
825835
return NULL;
@@ -828,7 +838,7 @@ rangeiter_next(_PyRangeIterObject *r)
828838
static PyObject *
829839
rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
830840
{
831-
return PyLong_FromLong(r->len);
841+
return PyLong_FromLong(FT_ATOMIC_LOAD_LONG_RELAXED(r->len));
832842
}
833843

834844
PyDoc_STRVAR(length_hint_doc,
@@ -841,10 +851,14 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
841851
PyObject *range;
842852

843853
/* create a range object for pickling */
844-
start = PyLong_FromLong(r->start);
854+
start = PyLong_FromLong(FT_ATOMIC_LOAD_LONG_RELAXED(r->start));
845855
if (start == NULL)
846856
goto err;
857+
#ifdef Py_GIL_DISABLED
858+
stop = PyLong_FromLong(r->stop);
859+
#else
847860
stop = PyLong_FromLong(r->start + r->len * r->step);
861+
#endif
848862
if (stop == NULL)
849863
goto err;
850864
step = PyLong_FromLong(r->step);
@@ -870,13 +884,15 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state)
870884
long index = PyLong_AsLong(state);
871885
if (index == -1 && PyErr_Occurred())
872886
return NULL;
887+
long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len);
873888
/* silently clip the index value */
874889
if (index < 0)
875890
index = 0;
876-
else if (index > r->len)
877-
index = r->len; /* exhausted iterator */
878-
r->start += index * r->step;
879-
r->len -= index;
891+
else if (index > len)
892+
index = len; /* exhausted iterator */
893+
FT_ATOMIC_STORE_LONG_RELAXED(r->start,
894+
FT_ATOMIC_LOAD_LONG_RELAXED(r->start) + index * r->step);
895+
FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - index);
880896
Py_RETURN_NONE;
881897
}
882898

@@ -966,6 +982,9 @@ fast_range_iter(long start, long stop, long step, long len)
966982
it->start = start;
967983
it->step = step;
968984
it->len = len;
985+
#ifdef Py_GIL_DISABLED
986+
it->stop = stop;
987+
#endif
969988
return (PyObject *)it;
970989
}
971990

@@ -979,75 +998,87 @@ typedef struct {
979998
static PyObject *
980999
longrangeiter_len(longrangeiterobject *r, PyObject *no_args)
9811000
{
982-
Py_INCREF(r->len);
983-
return r->len;
1001+
PyObject *len;
1002+
Py_BEGIN_CRITICAL_SECTION(r);
1003+
len = Py_NewRef(r->len);
1004+
Py_END_CRITICAL_SECTION();
1005+
return len;
9841006
}
9851007

9861008
static PyObject *
9871009
longrangeiter_reduce(longrangeiterobject *r, PyObject *Py_UNUSED(ignored))
9881010
{
9891011
PyObject *product, *stop=NULL;
990-
PyObject *range;
1012+
PyObject *range, *result=NULL;
9911013

1014+
Py_BEGIN_CRITICAL_SECTION(r);
9921015
/* create a range object for pickling. Must calculate the "stop" value */
9931016
product = PyNumber_Multiply(r->len, r->step);
9941017
if (product == NULL)
995-
return NULL;
1018+
goto fail;
9961019
stop = PyNumber_Add(r->start, product);
9971020
Py_DECREF(product);
9981021
if (stop == NULL)
999-
return NULL;
1022+
goto fail;
10001023
range = (PyObject*)make_range_object(&PyRange_Type,
10011024
Py_NewRef(r->start), stop, Py_NewRef(r->step));
10021025
if (range == NULL) {
10031026
Py_DECREF(r->start);
10041027
Py_DECREF(stop);
10051028
Py_DECREF(r->step);
1006-
return NULL;
1029+
goto fail;
10071030
}
10081031

10091032
/* return the result */
1010-
return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)),
1011-
range, Py_None);
1033+
result = Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)),
1034+
range, Py_None);
1035+
fail:
1036+
Py_END_CRITICAL_SECTION();
1037+
return result;
10121038
}
10131039

10141040
static PyObject *
10151041
longrangeiter_setstate(longrangeiterobject *r, PyObject *state)
10161042
{
10171043
PyObject *zero = _PyLong_GetZero(); // borrowed reference
10181044
int cmp;
1045+
PyObject *result = NULL;
10191046

1047+
Py_BEGIN_CRITICAL_SECTION(r);
10201048
/* clip the value */
10211049
cmp = PyObject_RichCompareBool(state, zero, Py_LT);
10221050
if (cmp < 0)
1023-
return NULL;
1051+
goto fail;
10241052
if (cmp > 0) {
10251053
state = zero;
10261054
}
10271055
else {
10281056
cmp = PyObject_RichCompareBool(r->len, state, Py_LT);
10291057
if (cmp < 0)
1030-
return NULL;
1058+
goto fail;
10311059
if (cmp > 0)
10321060
state = r->len;
10331061
}
10341062
PyObject *product = PyNumber_Multiply(state, r->step);
10351063
if (product == NULL)
1036-
return NULL;
1064+
goto fail;
10371065
PyObject *new_start = PyNumber_Add(r->start, product);
10381066
Py_DECREF(product);
10391067
if (new_start == NULL)
1040-
return NULL;
1068+
goto fail;
10411069
PyObject *new_len = PyNumber_Subtract(r->len, state);
10421070
if (new_len == NULL) {
10431071
Py_DECREF(new_start);
1044-
return NULL;
1072+
goto fail;
10451073
}
10461074
PyObject *tmp = r->start;
10471075
r->start = new_start;
10481076
Py_SETREF(r->len, new_len);
10491077
Py_DECREF(tmp);
1050-
Py_RETURN_NONE;
1078+
result = Py_NewRef(Py_None);
1079+
fail:
1080+
Py_END_CRITICAL_SECTION();
1081+
return result;
10511082
}
10521083

10531084
static PyMethodDef longrangeiter_methods[] = {
@@ -1072,21 +1103,25 @@ longrangeiter_dealloc(longrangeiterobject *r)
10721103
static PyObject *
10731104
longrangeiter_next(longrangeiterobject *r)
10741105
{
1106+
PyObject *result = NULL;
1107+
Py_BEGIN_CRITICAL_SECTION(r);
10751108
if (PyObject_RichCompareBool(r->len, _PyLong_GetZero(), Py_GT) != 1)
1076-
return NULL;
1109+
goto fail;
10771110

10781111
PyObject *new_start = PyNumber_Add(r->start, r->step);
10791112
if (new_start == NULL) {
1080-
return NULL;
1113+
goto fail;
10811114
}
10821115
PyObject *new_len = PyNumber_Subtract(r->len, _PyLong_GetOne());
10831116
if (new_len == NULL) {
10841117
Py_DECREF(new_start);
1085-
return NULL;
1118+
goto fail;
10861119
}
1087-
PyObject *result = r->start;
1120+
result = r->start;
10881121
r->start = new_start;
10891122
Py_SETREF(r->len, new_len);
1123+
fail:
1124+
Py_END_CRITICAL_SECTION();
10901125
return result;
10911126
}
10921127

Objects/tupleobject.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -1017,14 +1017,17 @@ tupleiter_next(PyObject *obj)
10171017
return NULL;
10181018
assert(PyTuple_Check(seq));
10191019

1020-
if (it->it_index < PyTuple_GET_SIZE(seq)) {
1021-
item = PyTuple_GET_ITEM(seq, it->it_index);
1022-
++it->it_index;
1020+
Py_ssize_t index = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
1021+
if (index < PyTuple_GET_SIZE(seq)) {
1022+
item = PyTuple_GET_ITEM(seq, index);
1023+
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, index + 1);
10231024
return Py_NewRef(item);
10241025
}
10251026

1027+
#ifndef Py_GIL_DISABLED
10261028
it->it_seq = NULL;
10271029
Py_DECREF(seq);
1030+
#endif
10281031
return NULL;
10291032
}
10301033

0 commit comments

Comments
 (0)