Skip to content

Commit 1e34da4

Browse files
embgrhettinger
authored andcommitted
bpo-28685: Optimize sorted() list.sort() with type-specialized comparisons (#582)
1 parent 6c6ddf9 commit 1e34da4

File tree

5 files changed

+462
-71
lines changed

5 files changed

+462
-71
lines changed

Lib/test/test_sort.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,120 @@ def my_cmp_reversed(x, y):
260260
self.assertEqual(data, copy2)
261261

262262
#==============================================================================
263+
def check_against_PyObject_RichCompareBool(self, L):
264+
## The idea here is to exploit the fact that unsafe_tuple_compare uses
265+
## PyObject_RichCompareBool for the second elements of tuples. So we have,
266+
## for (most) L, sorted(L) == [y[1] for y in sorted([(0,x) for x in L])]
267+
## This will work as long as __eq__ => not __lt__ for all the objects in L,
268+
## which holds for all the types used below.
269+
##
270+
## Testing this way ensures that the optimized implementation remains consistent
271+
## with the naive implementation, even if changes are made to any of the
272+
## richcompares.
273+
##
274+
## This function tests sorting for three lists (it randomly shuffles each one):
275+
## 1. L
276+
## 2. [(x,) for x in L]
277+
## 3. [((x,),) for x in L]
278+
279+
random.seed(0)
280+
random.shuffle(L)
281+
L_1 = L[:]
282+
L_2 = [(x,) for x in L]
283+
L_3 = [((x,),) for x in L]
284+
for L in [L_1, L_2, L_3]:
285+
optimized = sorted(L)
286+
reference = [y[1] for y in sorted([(0,x) for x in L])]
287+
for (opt, ref) in zip(optimized, reference):
288+
self.assertIs(opt, ref)
289+
#note: not assertEqual! We want to ensure *identical* behavior.
290+
291+
class TestOptimizedCompares(unittest.TestCase):
292+
def test_safe_object_compare(self):
293+
heterogeneous_lists = [[0, 'foo'],
294+
[0.0, 'foo'],
295+
[('foo',), 'foo']]
296+
for L in heterogeneous_lists:
297+
self.assertRaises(TypeError, L.sort)
298+
self.assertRaises(TypeError, [(x,) for x in L].sort)
299+
self.assertRaises(TypeError, [((x,),) for x in L].sort)
300+
301+
float_int_lists = [[1,1.1],
302+
[1<<70,1.1],
303+
[1.1,1],
304+
[1.1,1<<70]]
305+
for L in float_int_lists:
306+
check_against_PyObject_RichCompareBool(self, L)
307+
308+
def test_unsafe_object_compare(self):
309+
310+
# This test is by ppperry. It ensures that unsafe_object_compare is
311+
# verifying ms->key_richcompare == tp->richcompare before comparing.
312+
313+
class WackyComparator(int):
314+
def __lt__(self, other):
315+
elem.__class__ = WackyList2
316+
return int.__lt__(self, other)
317+
318+
class WackyList1(list):
319+
pass
320+
321+
class WackyList2(list):
322+
def __lt__(self, other):
323+
raise ValueError
324+
325+
L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
326+
elem = L[-1]
327+
with self.assertRaises(ValueError):
328+
L.sort()
329+
330+
L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
331+
elem = L[-1]
332+
with self.assertRaises(ValueError):
333+
[(x,) for x in L].sort()
334+
335+
# The following test is also by ppperry. It ensures that
336+
# unsafe_object_compare handles Py_NotImplemented appropriately.
337+
class PointlessComparator:
338+
def __lt__(self, other):
339+
return NotImplemented
340+
L = [PointlessComparator(), PointlessComparator()]
341+
self.assertRaises(TypeError, L.sort)
342+
self.assertRaises(TypeError, [(x,) for x in L].sort)
343+
344+
# The following tests go through various types that would trigger
345+
# ms->key_compare = unsafe_object_compare
346+
lists = [list(range(100)) + [(1<<70)],
347+
[str(x) for x in range(100)] + ['\uffff'],
348+
[bytes(x) for x in range(100)],
349+
[cmp_to_key(lambda x,y: x<y)(x) for x in range(100)]]
350+
for L in lists:
351+
check_against_PyObject_RichCompareBool(self, L)
352+
353+
def test_unsafe_latin_compare(self):
354+
check_against_PyObject_RichCompareBool(self, [str(x) for
355+
x in range(100)])
356+
357+
def test_unsafe_long_compare(self):
358+
check_against_PyObject_RichCompareBool(self, [x for
359+
x in range(100)])
360+
361+
def test_unsafe_float_compare(self):
362+
check_against_PyObject_RichCompareBool(self, [float(x) for
363+
x in range(100)])
364+
365+
def test_unsafe_tuple_compare(self):
366+
# This test was suggested by Tim Peters. It verifies that the tuple
367+
# comparison respects the current tuple compare semantics, which do not
368+
# guarantee that x < x <=> (x,) < (x,)
369+
#
370+
# Note that we don't have to put anything in tuples here, because
371+
# the check function does a tuple test automatically.
372+
373+
check_against_PyObject_RichCompareBool(self, [float('nan')]*100)
374+
check_against_PyObject_RichCompareBool(self, [float('nan') for
375+
_ in range(100)])
376+
#==============================================================================
263377

264378
if __name__ == "__main__":
265379
unittest.main()

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ Tiago Gonçalves
554554
Chris Gonnerman
555555
Shelley Gooch
556556
David Goodger
557+
Elliot Gorokhovsky
557558
Hans de Graaff
558559
Tim Graham
559560
Kim Gräsman
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Optimize list.sort() and sorted() by using type specialized comparisons when
2+
possible.

0 commit comments

Comments
 (0)