From 89c278ff549b14e64231b44e15e5fc019cd1a99a Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 20:41:57 -0700 Subject: [PATCH 01/47] Added data-aware optimizations to list.sort() --- Objects/listobject.c | 339 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 308 insertions(+), 31 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 473bd20874d0ff..f2da0d252f3de6 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1031,11 +1031,8 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) slice->values += n; } -/* Comparison function: PyObject_RichCompareBool with Py_LT. - * Returns -1 on error, 1 if x < y, 0 if x >= y. - */ - -#define ISLT(X, Y) (PyObject_RichCompareBool(X, Y, Py_LT)) +/* Macros for comparing keys: */ +#define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs)) /* Compare X to Y via "<". Goto "fail" if the comparison raises an error. Else "k" is set to true iff Xob_type->tp_richcompare */ + PyObject* (*key_richcompare)(PyObject*, PyObject*, int); + + /* This function is used by unsafe_tuple_compare to compare the first elements + * of tuples. It may be set to safe_object_compare, but the idea is that hopefully + * we can assume more, and use one of the special-case compares. */ + int (*tuple_elem_compare)(PyObject*, PyObject*, CompareFuncs); +}; + +/* These are the special case compare functions. + * compare_funcs.key_compare will always point to one of these: */ + +/* Heterogeneous compare: default, always safe to fall back on. */ +static int +safe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) +{ + /* No assumptions necessary! */ + return PyObject_RichCompareBool(v, w, Py_LT); +} + +/* Homogeneous compare: safe for any two compareable objects of the same type. + * (compare_funcs.key_richcompare is set to ob_type->tp_richcompare in the + * pre-sort check.) + */ +static int +unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) +{ + /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type->tp_richcompare != NULL && + v->ob_type->tp_richcompare == compare_funcs.key_richcompare); + #endif + + PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); + if (res == NULL) + return -1; + int ok; + if (PyBool_Check(res)){ + ok = (res == Py_True); + } + else { + ok = PyObject_IsTrue(res); + } + Py_DECREF(res); + return ok; +} + +/* Latin string compare: safe for any two latin (one byte per char) strings. */ +static int +unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){ + /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyUnicode_Type && + PyUnicode_KIND(v) == PyUnicode_KIND(w) && + PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + #endif + + int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); + int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); + + return (res != 0 ? + res < 0 : + PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); +} + +/* Bounded int compare: compare any two longs that fit in a single machine word. */ +static int +unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) +{ + /* Modified from Objects/longobject.c:long_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == PyLong_Type && + Py_ABS(Py_SIZE(v)) == Py_ABS(Py_SIZE(w)) && + Py_ABS(Py_SIZE(v)) <= 1); + #endif + + PyLongObject *vl, *wl; + vl = (PyLongObject*)v; + wl = (PyLongObject*)w; + + sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; + sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; + + if (Py_SIZE(vl) < 0) + v0 = -v0; + if (Py_SIZE(wl) < 0) + w0 = -w0; + + return v0 < w0; +} + +/* Float compare: compare any two floats. */ +static int +unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ + /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyFloat_Type); + #endif + + return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); +} + +/* Tuple compare: compare any two non-empty tuples. + * This is the most complicated special case: since the tuple elements themselves + * must of course be compared, we can optimize on two levels. Namely, we make + * the same homogeneity assumptions about the first elements of the tuples in + * our list as we do about the list elements themselves. We then replace the call to + * PyObject_RichCompareBool within the tuple comparison with special case compare, + * based on which assumptions the first elements of the tuples satisfy. + * + * Note that we must therefore ensure assumptions in both unsafe_tuple_compare and + * compare_funcs.tuple_elem_compare are satisfied. If the first elements are not all + * homogeneous, we can always set + * compare_funcs.tuple_elem_compare = safe_object_compare. */ +static int +unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) +{ + /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyTuple_Type && + Py_SIZE(v) > 0 && + Py_SIZE(w) > 0); + #endif + + PyTupleObject *vt, *wt; + Py_ssize_t i; + Py_ssize_t vlen, wlen; + + vt = (PyTupleObject *)v; + wt = (PyTupleObject *)w; + + /* Is v[0] < w[0]? */ + int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0], + wt->ob_item[0], + compare_funcs); + if (k < 0) + return -1; + if (k) + return 1; + + vlen = Py_SIZE(vt); + wlen = Py_SIZE(wt); + + /* Well, are either of the tuples are singleton? */ + if (vlen == 1 || wlen == 1) + return 0; + + /* Well, is w[0] < v[0]? */ + k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0], + vt->ob_item[0], + compare_funcs); + if (k < 0) + return -1; + if (k) + return 0; + + /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. + * We can use code copied straight from tupleobject.c:tuplerichcompare: */ + for (i = 0; i < vlen && i < wlen; i++) { + k = PyObject_RichCompareBool(vt->ob_item[i], + wt->ob_item[i], + Py_EQ); + if (k < 0) + return -1; + if (!k) + break; + } + + if (i >= vlen || i >= wlen) { + return vlen < wlen; + } + + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); +} + /* binarysort is the best method for sorting small arrays: it does few compares, but can do data movement quadratic in the number of elements. @@ -1056,7 +1247,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) the input (nothing is lost or duplicated). */ static int -binarysort(sortslice lo, PyObject **hi, PyObject **start) +binarysort(sortslice lo, PyObject **hi, PyObject **start, CompareFuncs compare_funcs) { Py_ssize_t k; PyObject **l, **p, **r; @@ -1130,7 +1321,7 @@ elements to get out of order). Returns -1 in case of error. */ static Py_ssize_t -count_run(PyObject **lo, PyObject **hi, int *descending) +count_run(PyObject **lo, PyObject **hi, int *descending, CompareFuncs compare_funcs) { Py_ssize_t k; Py_ssize_t n; @@ -1185,7 +1376,8 @@ key, and the last n-k should follow key. Returns -1 on error. See listsort.txt for info on the method. */ static Py_ssize_t -gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) +gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, + CompareFuncs compare_funcs) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1276,7 +1468,8 @@ we're sticking to "<" comparisons that it's much harder to follow if written as one routine with yet another "left or right?" flag. */ static Py_ssize_t -gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) +gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, + CompareFuncs compare_funcs) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1490,7 +1683,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need) */ static Py_ssize_t merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) + sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs) { Py_ssize_t k; sortslice dest; @@ -1557,7 +1750,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, assert(na > 1 && nb > 0); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], ssa.keys, na, 0); + k = gallop_right(ssb.keys[0], ssa.keys, na, 0, compare_funcs); acount = k; if (k) { if (k < 0) @@ -1580,7 +1773,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, if (nb == 0) goto Succeed; - k = gallop_left(ssa.keys[0], ssb.keys, nb, 0); + k = gallop_left(ssa.keys[0], ssb.keys, nb, 0, compare_funcs); bcount = k; if (k) { if (k < 0) @@ -1622,7 +1815,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, */ static Py_ssize_t merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) + sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs) { Py_ssize_t k; sortslice dest, basea, baseb; @@ -1695,7 +1888,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, assert(na > 0 && nb > 1); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], basea.keys, na, na-1); + k = gallop_right(ssb.keys[0], basea.keys, na, na-1, compare_funcs); if (k < 0) goto Fail; k = na - k; @@ -1713,7 +1906,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, if (nb == 1) goto CopyA; - k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1); + k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1, compare_funcs); if (k < 0) goto Fail; k = nb - k; @@ -1760,7 +1953,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, * Returns 0 on success, -1 on error. */ static Py_ssize_t -merge_at(MergeState *ms, Py_ssize_t i) +merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) { sortslice ssa, ssb; Py_ssize_t na, nb; @@ -1790,7 +1983,7 @@ merge_at(MergeState *ms, Py_ssize_t i) /* Where does b start in a? Elements in a before that can be * ignored (already in place). */ - k = gallop_right(*ssb.keys, ssa.keys, na, 0); + k = gallop_right(*ssb.keys, ssa.keys, na, 0, compare_funcs); if (k < 0) return -1; sortslice_advance(&ssa, k); @@ -1801,7 +1994,7 @@ merge_at(MergeState *ms, Py_ssize_t i) /* Where does a end in b? Elements in b after that can be * ignored (already in place). */ - nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1); + nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1, compare_funcs); if (nb <= 0) return nb; @@ -1809,9 +2002,9 @@ merge_at(MergeState *ms, Py_ssize_t i) * min(na, nb) elements. */ if (na <= nb) - return merge_lo(ms, ssa, na, ssb, nb); + return merge_lo(ms, ssa, na, ssb, nb, compare_funcs); else - return merge_hi(ms, ssa, na, ssb, nb); + return merge_hi(ms, ssa, na, ssb, nb, compare_funcs); } /* Examine the stack of runs waiting to be merged, merging adjacent runs @@ -1825,7 +2018,7 @@ merge_at(MergeState *ms, Py_ssize_t i) * Returns 0 on success, -1 on error. */ static int -merge_collapse(MergeState *ms) +merge_collapse(MergeState *ms, CompareFuncs compare_funcs) { struct s_slice *p = ms->pending; @@ -1836,12 +2029,12 @@ merge_collapse(MergeState *ms) (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) { if (p[n-1].len < p[n+1].len) --n; - if (merge_at(ms, n) < 0) + if (merge_at(ms, n, compare_funcs) < 0) return -1; } else if (p[n].len <= p[n+1].len) { - if (merge_at(ms, n) < 0) - return -1; + if (merge_at(ms, n, compare_funcs) < 0) + return -1; } else break; @@ -1855,7 +2048,7 @@ merge_collapse(MergeState *ms) * Returns 0 on success, -1 on error. */ static int -merge_force_collapse(MergeState *ms) +merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs) { struct s_slice *p = ms->pending; @@ -1864,7 +2057,7 @@ merge_force_collapse(MergeState *ms) Py_ssize_t n = ms->n - 2; if (n > 0 && p[n-1].len < p[n+1].len) --n; - if (merge_at(ms, n) < 0) + if (merge_at(ms, n, compare_funcs) < 0) return -1; } return 0; @@ -1908,7 +2101,7 @@ reverse_sortslice(sortslice *s, Py_ssize_t n) */ static PyObject * listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) -{ +{ MergeState ms; Py_ssize_t nremaining; Py_ssize_t minrun; @@ -1970,6 +2163,89 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) lo.values = saved_ob_item; } + + /* The pre-sort check: here's where we decide which compare function to use. + * How much optimization is safe? We test for homogeneity with respect to + * several properties that are expensive to check at compare-time, and + * set compare_funcs appropriately. */ + CompareFuncs compare_funcs; + if (saved_ob_size > 1) { + /* Assume the first element is representative of the whole list. */ + int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type && + Py_SIZE(lo.keys[0]) > 0); + + PyTypeObject* key_type = (keys_are_in_tuples ? + PyTuple_GET_ITEM(lo.keys[0],0)->ob_type : + lo.keys[0]->ob_type); + + int keys_are_all_same_type = 1; + int strings_are_latin = 1; + int ints_are_bounded = 1; + + /* Prove that assumption by checking every key. */ + int i; + for (i=0; i< saved_ob_size; i++) { + + if (keys_are_in_tuples && + (lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) { + keys_are_in_tuples = 0; + keys_are_all_same_type = 0; + break; + } + + /* Note: for lists of tuples, key is the first element of the tuple + * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity + * for lists of tuples in the if-statement directly above. */ + PyObject* key = (keys_are_in_tuples ? + PyTuple_GET_ITEM(lo.keys[i],0) : + lo.keys[i]); + + if (key->ob_type != key_type) { + keys_are_all_same_type = 0; + break; + } + + else if (key_type == &PyLong_Type && ints_are_bounded && + Py_ABS(Py_SIZE(key)) > 1) + ints_are_bounded = 0; + + else if (key_type == &PyUnicode_Type && strings_are_latin && + PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND) + strings_are_latin = 0; + } + + /* Choose the best compare, given what we now know about the keys. */ + if (keys_are_all_same_type) { + + if (key_type == &PyUnicode_Type && strings_are_latin) + compare_funcs.key_compare = unsafe_latin_compare; + + else if (key_type == &PyLong_Type && ints_are_bounded) + compare_funcs.key_compare = unsafe_long_compare; + + else if (key_type == &PyFloat_Type) + compare_funcs.key_compare = unsafe_float_compare; + + else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL) + compare_funcs.key_compare = unsafe_object_compare; + + } else { + compare_funcs.key_compare = safe_object_compare; + } + + if (keys_are_in_tuples) { + /* Make sure we're not dealing with tuples of tuples + * (remember: here, key_type refers list [key[0] for key in keys]) */ + if (key_type == &PyTuple_Type) + compare_funcs.tuple_elem_compare = safe_object_compare; + else + compare_funcs.tuple_elem_compare = compare_funcs.key_compare; + + compare_funcs.key_compare = unsafe_tuple_compare; + } + } + /* End of pre-sort check: compare_funcs is now set properly! */ + merge_init(&ms, saved_ob_size, keys != NULL); nremaining = saved_ob_size; @@ -1993,7 +2269,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) Py_ssize_t n; /* Identify next run. */ - n = count_run(lo.keys, lo.keys + nremaining, &descending); + n = count_run(lo.keys, lo.keys + nremaining, &descending, compare_funcs); if (n < 0) goto fail; if (descending) @@ -2002,7 +2278,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; - if (binarysort(lo, lo.keys + force, lo.keys + n) < 0) + if (binarysort(lo, lo.keys + force, lo.keys + n, compare_funcs) < 0) goto fail; n = force; } @@ -2011,14 +2287,14 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) ms.pending[ms.n].base = lo; ms.pending[ms.n].len = n; ++ms.n; - if (merge_collapse(&ms) < 0) + if (merge_collapse(&ms, compare_funcs) < 0) goto fail; /* Advance to find next run. */ sortslice_advance(&lo, n); nremaining -= n; } while (nremaining); - if (merge_force_collapse(&ms) < 0) + if (merge_force_collapse(&ms, compare_funcs) < 0) goto fail; assert(ms.n == 1); assert(keys == NULL @@ -2080,6 +2356,7 @@ listsort(PyListObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$Oi:sort", kwlist, &keyfunc, &reverse)) return NULL; + return listsort_impl(self, keyfunc, reverse); } From 2ce5e5e7a451ad04f9a75912946307b2f9dea81c Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 20:46:44 -0700 Subject: [PATCH 02/47] Removed trailing whitespace from listsort_impl --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index f2da0d252f3de6..2bc9694198cfb7 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2101,7 +2101,7 @@ reverse_sortslice(sortslice *s, Py_ssize_t n) */ static PyObject * listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) -{ +{ MergeState ms; Py_ssize_t nremaining; Py_ssize_t minrun; From 7d2f44a02e62efd07ca98f713df41ef6d8ae4471 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 20:50:30 -0700 Subject: [PATCH 03/47] fixed typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 2bc9694198cfb7..b9363f4f4b1c7a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2235,7 +2235,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (keys_are_in_tuples) { /* Make sure we're not dealing with tuples of tuples - * (remember: here, key_type refers list [key[0] for key in keys]) */ + * (remember: here, key_type refers list [key[0] for key in keys]) */ if (key_type == &PyTuple_Type) compare_funcs.tuple_elem_compare = safe_object_compare; else From d752fc790f682fc1e4f85415b773036d357dc466 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 20:54:55 -0700 Subject: [PATCH 04/47] Added myself to Misc/ACKS --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index b7f1282c69c0ce..9c77c620806900 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -539,6 +539,7 @@ Tiago Gonçalves Chris Gonnerman Shelley Gooch David Goodger +Elliot Gorokhovsky Hans de Graaff Tim Graham Kim Gräsman From e19728ed3919e5ef52b5f77808be5e92473abee3 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 21:02:00 -0700 Subject: [PATCH 05/47] Made ISLT comment more in line with the current text --- Objects/listobject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index b9363f4f4b1c7a..0181c00fc1047c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1031,7 +1031,10 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) slice->values += n; } -/* Macros for comparing keys: */ +/* Comparison function: compare_funcs.key_compare, which is set at run-time in + * listsort_impl to optimize for various special cases. + * Returns -1 on error, 1 if x < y, 0 if x >= y. + */ #define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs)) /* Compare X to Y via "<". Goto "fail" if the comparison raises an From 7e74c27fc208f77fd855e16ffab5d23f10ab34f8 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 21:02:46 -0700 Subject: [PATCH 06/47] Remove newline --- Objects/listobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/listobject.c b/Objects/listobject.c index 0181c00fc1047c..1030aa61124e10 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1035,6 +1035,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) * listsort_impl to optimize for various special cases. * Returns -1 on error, 1 if x < y, 0 if x >= y. */ + #define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs)) /* Compare X to Y via "<". Goto "fail" if the comparison raises an From 9c566b1c49d148efad927a01e7089704cdab480c Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 21:06:04 -0700 Subject: [PATCH 07/47] untabify --- Objects/listobject.c | 140 +++++++++++++++++++++---------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 1030aa61124e10..3177879b1ae271 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1089,8 +1089,8 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ #ifdef Py_DEBUG assert(v->ob_type == w->ob_type && - v->ob_type->tp_richcompare != NULL && - v->ob_type->tp_richcompare == compare_funcs.key_richcompare); + v->ob_type->tp_richcompare != NULL && + v->ob_type->tp_richcompare == compare_funcs.key_richcompare); #endif PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); @@ -1197,8 +1197,8 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) /* Is v[0] < w[0]? */ int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0], - wt->ob_item[0], - compare_funcs); + wt->ob_item[0], + compare_funcs); if (k < 0) return -1; if (k) @@ -1213,8 +1213,8 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) /* Well, is w[0] < v[0]? */ k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0], - vt->ob_item[0], - compare_funcs); + vt->ob_item[0], + compare_funcs); if (k < 0) return -1; if (k) @@ -1381,7 +1381,7 @@ Returns -1 on error. See listsort.txt for info on the method. */ static Py_ssize_t gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, - CompareFuncs compare_funcs) + CompareFuncs compare_funcs) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1473,7 +1473,7 @@ written as one routine with yet another "left or right?" flag. */ static Py_ssize_t gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, - CompareFuncs compare_funcs) + CompareFuncs compare_funcs) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -2037,8 +2037,8 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs) return -1; } else if (p[n].len <= p[n+1].len) { - if (merge_at(ms, n, compare_funcs) < 0) - return -1; + if (merge_at(ms, n, compare_funcs) < 0) + return -1; } else break; @@ -2174,79 +2174,79 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) * set compare_funcs appropriately. */ CompareFuncs compare_funcs; if (saved_ob_size > 1) { - /* Assume the first element is representative of the whole list. */ - int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type && - Py_SIZE(lo.keys[0]) > 0); - - PyTypeObject* key_type = (keys_are_in_tuples ? - PyTuple_GET_ITEM(lo.keys[0],0)->ob_type : - lo.keys[0]->ob_type); - - int keys_are_all_same_type = 1; - int strings_are_latin = 1; - int ints_are_bounded = 1; - - /* Prove that assumption by checking every key. */ - int i; - for (i=0; i< saved_ob_size; i++) { - - if (keys_are_in_tuples && - (lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) { - keys_are_in_tuples = 0; - keys_are_all_same_type = 0; - break; - } - - /* Note: for lists of tuples, key is the first element of the tuple + /* Assume the first element is representative of the whole list. */ + int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type && + Py_SIZE(lo.keys[0]) > 0); + + PyTypeObject* key_type = (keys_are_in_tuples ? + PyTuple_GET_ITEM(lo.keys[0],0)->ob_type : + lo.keys[0]->ob_type); + + int keys_are_all_same_type = 1; + int strings_are_latin = 1; + int ints_are_bounded = 1; + + /* Prove that assumption by checking every key. */ + int i; + for (i=0; i< saved_ob_size; i++) { + + if (keys_are_in_tuples && + (lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) { + keys_are_in_tuples = 0; + keys_are_all_same_type = 0; + break; + } + + /* Note: for lists of tuples, key is the first element of the tuple * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity * for lists of tuples in the if-statement directly above. */ - PyObject* key = (keys_are_in_tuples ? - PyTuple_GET_ITEM(lo.keys[i],0) : - lo.keys[i]); + PyObject* key = (keys_are_in_tuples ? + PyTuple_GET_ITEM(lo.keys[i],0) : + lo.keys[i]); - if (key->ob_type != key_type) { - keys_are_all_same_type = 0; - break; - } + if (key->ob_type != key_type) { + keys_are_all_same_type = 0; + break; + } - else if (key_type == &PyLong_Type && ints_are_bounded && - Py_ABS(Py_SIZE(key)) > 1) - ints_are_bounded = 0; + else if (key_type == &PyLong_Type && ints_are_bounded && + Py_ABS(Py_SIZE(key)) > 1) + ints_are_bounded = 0; - else if (key_type == &PyUnicode_Type && strings_are_latin && - PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND) - strings_are_latin = 0; - } + else if (key_type == &PyUnicode_Type && strings_are_latin && + PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND) + strings_are_latin = 0; + } - /* Choose the best compare, given what we now know about the keys. */ - if (keys_are_all_same_type) { + /* Choose the best compare, given what we now know about the keys. */ + if (keys_are_all_same_type) { - if (key_type == &PyUnicode_Type && strings_are_latin) - compare_funcs.key_compare = unsafe_latin_compare; + if (key_type == &PyUnicode_Type && strings_are_latin) + compare_funcs.key_compare = unsafe_latin_compare; - else if (key_type == &PyLong_Type && ints_are_bounded) - compare_funcs.key_compare = unsafe_long_compare; + else if (key_type == &PyLong_Type && ints_are_bounded) + compare_funcs.key_compare = unsafe_long_compare; - else if (key_type == &PyFloat_Type) - compare_funcs.key_compare = unsafe_float_compare; + else if (key_type == &PyFloat_Type) + compare_funcs.key_compare = unsafe_float_compare; - else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL) - compare_funcs.key_compare = unsafe_object_compare; + else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL) + compare_funcs.key_compare = unsafe_object_compare; - } else { - compare_funcs.key_compare = safe_object_compare; - } + } else { + compare_funcs.key_compare = safe_object_compare; + } - if (keys_are_in_tuples) { - /* Make sure we're not dealing with tuples of tuples - * (remember: here, key_type refers list [key[0] for key in keys]) */ - if (key_type == &PyTuple_Type) - compare_funcs.tuple_elem_compare = safe_object_compare; - else - compare_funcs.tuple_elem_compare = compare_funcs.key_compare; + if (keys_are_in_tuples) { + /* Make sure we're not dealing with tuples of tuples + * (remember: here, key_type refers list [key[0] for key in keys]) */ + if (key_type == &PyTuple_Type) + compare_funcs.tuple_elem_compare = safe_object_compare; + else + compare_funcs.tuple_elem_compare = compare_funcs.key_compare; - compare_funcs.key_compare = unsafe_tuple_compare; - } + compare_funcs.key_compare = unsafe_tuple_compare; + } } /* End of pre-sort check: compare_funcs is now set properly! */ From 8876e26f27f7b61161135416ec56d56323e7d5b4 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 7 Mar 2017 21:10:58 -0700 Subject: [PATCH 08/47] removed newline --- Objects/listobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 3177879b1ae271..15e5d666484f69 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2360,7 +2360,6 @@ listsort(PyListObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$Oi:sort", kwlist, &keyfunc, &reverse)) return NULL; - return listsort_impl(self, keyfunc, reverse); } From 8accd71f36838887fdcb93944547b98196d915a4 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 8 Mar 2017 22:07:45 -0700 Subject: [PATCH 09/47] simplified description of the tuple compare --- Objects/listobject.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 15e5d666484f69..c2b6d8125c33ac 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1165,18 +1165,11 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } -/* Tuple compare: compare any two non-empty tuples. - * This is the most complicated special case: since the tuple elements themselves - * must of course be compared, we can optimize on two levels. Namely, we make - * the same homogeneity assumptions about the first elements of the tuples in - * our list as we do about the list elements themselves. We then replace the call to - * PyObject_RichCompareBool within the tuple comparison with special case compare, - * based on which assumptions the first elements of the tuples satisfy. - * - * Note that we must therefore ensure assumptions in both unsafe_tuple_compare and - * compare_funcs.tuple_elem_compare are satisfied. If the first elements are not all - * homogeneous, we can always set - * compare_funcs.tuple_elem_compare = safe_object_compare. */ +/* Tuple compare: compare any two non-empty tuples, using + * compare_funcs.tuple_elem_compare to compare the first elements, which is set + * using the same pre-sort check as we use for compare_funcs.key_compare, + * but run on the list [x[0] for x in L]. This allows us to optimize compares + * on two levels as long as [x[0] for x in L] is type-homogeneous. */ static int unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) { From 1567801c7ce6feef7dc957c44c5530d503d3fe61 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 8 Mar 2017 22:08:34 -0700 Subject: [PATCH 10/47] grammar --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index c2b6d8125c33ac..a08f7bca3ad397 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1169,7 +1169,7 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ * compare_funcs.tuple_elem_compare to compare the first elements, which is set * using the same pre-sort check as we use for compare_funcs.key_compare, * but run on the list [x[0] for x in L]. This allows us to optimize compares - * on two levels as long as [x[0] for x in L] is type-homogeneous. */ + * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ static int unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) { From 3820cdba0d2228ab79435c84f282b02c472746d3 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 9 Mar 2017 08:32:34 -0700 Subject: [PATCH 11/47] Bugfix -- gcc ignored the error, but clang caught it! --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index a08f7bca3ad397..e56a75b9650d5d 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1133,7 +1133,7 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) /* Modified from Objects/longobject.c:long_compare, assuming: */ #ifdef Py_DEBUG assert(v->ob_type == w->ob_type && - v->ob_type == PyLong_Type && + v->ob_type == &PyLong_Type && Py_ABS(Py_SIZE(v)) == Py_ABS(Py_SIZE(w)) && Py_ABS(Py_SIZE(v)) <= 1); #endif From 201a4681fb8bfe595a2efa1a0c24bea3dff6f8f5 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 9 Mar 2017 08:40:41 -0700 Subject: [PATCH 12/47] Bugfix -- assertion in unsafe_long_compare was phrased incorrectly. --- Objects/listobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index e56a75b9650d5d..0d0aa887855a1c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1134,8 +1134,8 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) #ifdef Py_DEBUG assert(v->ob_type == w->ob_type && v->ob_type == &PyLong_Type && - Py_ABS(Py_SIZE(v)) == Py_ABS(Py_SIZE(w)) && - Py_ABS(Py_SIZE(v)) <= 1); + Py_ABS(Py_SIZE(v)) <= 1 && + Py_ABS(Py_SIZE(w)) <= 1); #endif PyLongObject *vl, *wl; From c2a9df260cedee8d98635b8188dad4c4db9681f9 Mon Sep 17 00:00:00 2001 From: embg Date: Thu, 9 Mar 2017 14:00:21 -0700 Subject: [PATCH 13/47] fix typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 0d0aa887855a1c..f479e48c9818b1 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1046,7 +1046,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) if (k) /* Here we define custom comparison functions to optimize for the cases one commonly - * in practice: homogeneous lists, often of one of the basic types. */ + * encounters in practice: homogeneous lists, often of one of the basic types. */ /* This struct holds the comparison function and helper functions * selected in the pre-sort check. */ From 37b15b80fa7d3ebc29947d480bf6cf2b64cfb673 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 11 Mar 2017 10:21:49 -0700 Subject: [PATCH 14/47] Added if (v == w) return 1; to all compares --- Objects/listobject.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 0d0aa887855a1c..0e03fbbf31c176 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1092,7 +1092,8 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) v->ob_type->tp_richcompare != NULL && v->ob_type->tp_richcompare == compare_funcs.key_richcompare); #endif - + if (v == w) return 1; + PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); if (res == NULL) return -1; @@ -1117,7 +1118,8 @@ unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){ PyUnicode_KIND(v) == PyUnicode_KIND(w) && PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); #endif - + if (v == w) return 1; + int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); @@ -1137,7 +1139,8 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) Py_ABS(Py_SIZE(v)) <= 1 && Py_ABS(Py_SIZE(w)) <= 1); #endif - + if (v == w) return 1; + PyLongObject *vl, *wl; vl = (PyLongObject*)v; wl = (PyLongObject*)w; @@ -1161,7 +1164,8 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ assert(v->ob_type == w->ob_type && v->ob_type == &PyFloat_Type); #endif - + if (v == w) return 1; + return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } @@ -1180,7 +1184,8 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) Py_SIZE(v) > 0 && Py_SIZE(w) > 0); #endif - + if (v == w) return 1; + PyTupleObject *vt, *wt; Py_ssize_t i; Py_ssize_t vlen, wlen; @@ -1215,7 +1220,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. * We can use code copied straight from tupleobject.c:tuplerichcompare: */ - for (i = 0; i < vlen && i < wlen; i++) { + for (i = 1; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); From ed9b21f5b3ef10fe601142d0b5bbb389de3101cd Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 11 Mar 2017 10:28:32 -0700 Subject: [PATCH 15/47] Added if (v == w) return 0; to all compares, apologies for previous commit --- Objects/listobject.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index f36c3fe8bdfe8a..25536acafa757e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1092,7 +1092,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) v->ob_type->tp_richcompare != NULL && v->ob_type->tp_richcompare == compare_funcs.key_richcompare); #endif - if (v == w) return 1; + if (v == w) return 0; PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); if (res == NULL) @@ -1118,7 +1118,7 @@ unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){ PyUnicode_KIND(v) == PyUnicode_KIND(w) && PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); #endif - if (v == w) return 1; + if (v == w) return 0; int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); @@ -1139,7 +1139,7 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) Py_ABS(Py_SIZE(v)) <= 1 && Py_ABS(Py_SIZE(w)) <= 1); #endif - if (v == w) return 1; + if (v == w) return 0; PyLongObject *vl, *wl; vl = (PyLongObject*)v; @@ -1164,7 +1164,7 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ assert(v->ob_type == w->ob_type && v->ob_type == &PyFloat_Type); #endif - if (v == w) return 1; + if (v == w) return 0; return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } @@ -1184,7 +1184,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) Py_SIZE(v) > 0 && Py_SIZE(w) > 0); #endif - if (v == w) return 1; + if (v == w) return 0; PyTupleObject *vt, *wt; Py_ssize_t i; From acf4c9dedf16feb0fb34be1fc148d09e2eb30bc7 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:11:04 -0600 Subject: [PATCH 16/47] Folded CompareFuncs into MergeState and added safety check to unsafe_object_compare --- Objects/listobject.c | 542 +++++++++++++++++++++---------------------- 1 file changed, 267 insertions(+), 275 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 25536acafa757e..dace48312e3c74 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1031,12 +1031,12 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) slice->values += n; } -/* Comparison function: compare_funcs.key_compare, which is set at run-time in +/* Comparison function: ms->key_compare, which is set at run-time in * listsort_impl to optimize for various special cases. * Returns -1 on error, 1 if x < y, 0 if x >= y. */ -#define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs)) +#define ISLT(X, Y) (*(ms->key_compare))(X, Y, ms) /* Compare X to Y via "<". Goto "fail" if the comparison raises an error. Else "k" is set to true iff Xob_type->tp_richcompare */ - PyObject* (*key_richcompare)(PyObject*, PyObject*, int); - - /* This function is used by unsafe_tuple_compare to compare the first elements - * of tuples. It may be set to safe_object_compare, but the idea is that hopefully - * we can assume more, and use one of the special-case compares. */ - int (*tuple_elem_compare)(PyObject*, PyObject*, CompareFuncs); -}; - -/* These are the special case compare functions. - * compare_funcs.key_compare will always point to one of these: */ - -/* Heterogeneous compare: default, always safe to fall back on. */ -static int -safe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) -{ - /* No assumptions necessary! */ - return PyObject_RichCompareBool(v, w, Py_LT); -} - -/* Homogeneous compare: safe for any two compareable objects of the same type. - * (compare_funcs.key_richcompare is set to ob_type->tp_richcompare in the - * pre-sort check.) +/* The maximum number of entries in a MergeState's pending-runs stack. + * This is enough to sort arrays of size up to about + * 32 * phi ** MAX_MERGE_PENDING + * where phi ~= 1.618. 85 is ridiculouslylarge enough, good for an array + * with 2**64 elements. */ -static int -unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) -{ - /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type->tp_richcompare != NULL && - v->ob_type->tp_richcompare == compare_funcs.key_richcompare); - #endif - if (v == w) return 0; - - PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); - if (res == NULL) - return -1; - int ok; - if (PyBool_Check(res)){ - ok = (res == Py_True); - } - else { - ok = PyObject_IsTrue(res); - } - Py_DECREF(res); - return ok; -} - -/* Latin string compare: safe for any two latin (one byte per char) strings. */ -static int -unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){ - /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyUnicode_Type && - PyUnicode_KIND(v) == PyUnicode_KIND(w) && - PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - #endif - if (v == w) return 0; - - int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); - int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); - - return (res != 0 ? - res < 0 : - PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); -} - -/* Bounded int compare: compare any two longs that fit in a single machine word. */ -static int -unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) -{ - /* Modified from Objects/longobject.c:long_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyLong_Type && - Py_ABS(Py_SIZE(v)) <= 1 && - Py_ABS(Py_SIZE(w)) <= 1); - #endif - if (v == w) return 0; - - PyLongObject *vl, *wl; - vl = (PyLongObject*)v; - wl = (PyLongObject*)w; - - sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; - sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; - - if (Py_SIZE(vl) < 0) - v0 = -v0; - if (Py_SIZE(wl) < 0) - w0 = -w0; - - return v0 < w0; -} +#define MAX_MERGE_PENDING 85 -/* Float compare: compare any two floats. */ -static int -unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ - /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyFloat_Type); - #endif - if (v == w) return 0; - - return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); -} +/* When we get into galloping mode, we stay there until both runs win less + * often than MIN_GALLOP consecutive times. See listsort.txt for more info. + */ +#define MIN_GALLOP 7 -/* Tuple compare: compare any two non-empty tuples, using - * compare_funcs.tuple_elem_compare to compare the first elements, which is set - * using the same pre-sort check as we use for compare_funcs.key_compare, - * but run on the list [x[0] for x in L]. This allows us to optimize compares - * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ -static int -unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) -{ - /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyTuple_Type && - Py_SIZE(v) > 0 && - Py_SIZE(w) > 0); - #endif - if (v == w) return 0; - - PyTupleObject *vt, *wt; - Py_ssize_t i; - Py_ssize_t vlen, wlen; +/* Avoid malloc for small temp arrays. */ +#define MERGESTATE_TEMP_SIZE 256 - vt = (PyTupleObject *)v; - wt = (PyTupleObject *)w; +/* One MergeState exists on the stack per invocation of mergesort. It's just + * a convenient way to pass state around among the helper functions. + */ +struct s_slice { + sortslice base; + Py_ssize_t len; +}; - /* Is v[0] < w[0]? */ - int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0], - wt->ob_item[0], - compare_funcs); - if (k < 0) - return -1; - if (k) - return 1; +typedef struct s_MergeState MergeState; +struct s_MergeState { + /* This controls when we get *into* galloping mode. It's initialized + * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for + * random data, and lower for highly structured data. + */ + Py_ssize_t min_gallop; - vlen = Py_SIZE(vt); - wlen = Py_SIZE(wt); + /* 'a' is temp storage to help with merges. It contains room for + * alloced entries. + */ + sortslice a; /* may point to temparray below */ + Py_ssize_t alloced; - /* Well, are either of the tuples are singleton? */ - if (vlen == 1 || wlen == 1) - return 0; + /* A stack of n pending runs yet to be merged. Run #i starts at + * address base[i] and extends for len[i] elements. It's always + * true (so long as the indices are in bounds) that + * + * pending[i].base + pending[i].len == pending[i+1].base + * + * so we could cut the storage for this, but it's a minor amount, + * and keeping all the info explicit simplifies the code. + */ + int n; + struct s_slice pending[MAX_MERGE_PENDING]; - /* Well, is w[0] < v[0]? */ - k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0], - vt->ob_item[0], - compare_funcs); - if (k < 0) - return -1; - if (k) - return 0; + /* 'a' points to this when possible, rather than muck with malloc. */ + PyObject *temparray[MERGESTATE_TEMP_SIZE]; - /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. - * We can use code copied straight from tupleobject.c:tuplerichcompare: */ - for (i = 1; i < vlen && i < wlen; i++) { - k = PyObject_RichCompareBool(vt->ob_item[i], - wt->ob_item[i], - Py_EQ); - if (k < 0) - return -1; - if (!k) - break; - } + /* This is the function we will use to compare two keys, + * even when none of our special cases apply and we have to use + * safe_object_compare. */ + int (*key_compare)(PyObject*, PyObject*, MergeState*); - if (i >= vlen || i >= wlen) { - return vlen < wlen; - } + /* This function is used by unsafe_object_compare to optimize comparisons + * when we know our list is type-homogeneous but we can't assume anything else. + * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */ + PyObject* (*key_richcompare)(PyObject*, PyObject*, int); - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); -} + /* This function is used by unsafe_tuple_compare to compare the first elements + * of tuples. It may be set to safe_object_compare, but the idea is that hopefully + * we can assume more, and use one of the special-case compares. */ + int (*tuple_elem_compare)(PyObject*, PyObject*, MergeState*); +}; /* binarysort is the best method for sorting small arrays: it does few compares, but can do data movement quadratic in the number of @@ -1249,7 +1126,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) the input (nothing is lost or duplicated). */ static int -binarysort(sortslice lo, PyObject **hi, PyObject **start, CompareFuncs compare_funcs) +binarysort(MergeState* ms, sortslice lo, PyObject **hi, PyObject **start) { Py_ssize_t k; PyObject **l, **p, **r; @@ -1323,7 +1200,7 @@ elements to get out of order). Returns -1 in case of error. */ static Py_ssize_t -count_run(PyObject **lo, PyObject **hi, int *descending, CompareFuncs compare_funcs) +count_run(MergeState* ms, PyObject **lo, PyObject **hi, int *descending) { Py_ssize_t k; Py_ssize_t n; @@ -1378,8 +1255,7 @@ key, and the last n-k should follow key. Returns -1 on error. See listsort.txt for info on the method. */ static Py_ssize_t -gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, - CompareFuncs compare_funcs) +gallop_left(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1470,8 +1346,7 @@ we're sticking to "<" comparisons that it's much harder to follow if written as one routine with yet another "left or right?" flag. */ static Py_ssize_t -gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, - CompareFuncs compare_funcs) +gallop_right(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1547,59 +1422,6 @@ gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, return -1; } -/* The maximum number of entries in a MergeState's pending-runs stack. - * This is enough to sort arrays of size up to about - * 32 * phi ** MAX_MERGE_PENDING - * where phi ~= 1.618. 85 is ridiculouslylarge enough, good for an array - * with 2**64 elements. - */ -#define MAX_MERGE_PENDING 85 - -/* When we get into galloping mode, we stay there until both runs win less - * often than MIN_GALLOP consecutive times. See listsort.txt for more info. - */ -#define MIN_GALLOP 7 - -/* Avoid malloc for small temp arrays. */ -#define MERGESTATE_TEMP_SIZE 256 - -/* One MergeState exists on the stack per invocation of mergesort. It's just - * a convenient way to pass state around among the helper functions. - */ -struct s_slice { - sortslice base; - Py_ssize_t len; -}; - -typedef struct s_MergeState { - /* This controls when we get *into* galloping mode. It's initialized - * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for - * random data, and lower for highly structured data. - */ - Py_ssize_t min_gallop; - - /* 'a' is temp storage to help with merges. It contains room for - * alloced entries. - */ - sortslice a; /* may point to temparray below */ - Py_ssize_t alloced; - - /* A stack of n pending runs yet to be merged. Run #i starts at - * address base[i] and extends for len[i] elements. It's always - * true (so long as the indices are in bounds) that - * - * pending[i].base + pending[i].len == pending[i+1].base - * - * so we could cut the storage for this, but it's a minor amount, - * and keeping all the info explicit simplifies the code. - */ - int n; - struct s_slice pending[MAX_MERGE_PENDING]; - - /* 'a' points to this when possible, rather than muck with malloc. */ - PyObject *temparray[MERGESTATE_TEMP_SIZE]; -} MergeState; - /* Conceptually a MergeState's constructor. */ static void merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc) @@ -1684,8 +1506,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * successful, -1 if error. */ static Py_ssize_t -merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs) +merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest; @@ -1752,7 +1573,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, assert(na > 1 && nb > 0); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], ssa.keys, na, 0, compare_funcs); + k = gallop_right(ms, ssb.keys[0], ssa.keys, na, 0); acount = k; if (k) { if (k < 0) @@ -1775,7 +1596,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, if (nb == 0) goto Succeed; - k = gallop_left(ssa.keys[0], ssb.keys, nb, 0, compare_funcs); + k = gallop_left(ms, ssa.keys[0], ssb.keys, nb, 0); bcount = k; if (k) { if (k < 0) @@ -1816,8 +1637,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, * successful, -1 if error. */ static Py_ssize_t -merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs) +merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest, basea, baseb; @@ -1890,7 +1710,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, assert(na > 0 && nb > 1); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], basea.keys, na, na-1, compare_funcs); + k = gallop_right(ms, ssb.keys[0], basea.keys, na, na-1); if (k < 0) goto Fail; k = na - k; @@ -1908,7 +1728,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, if (nb == 1) goto CopyA; - k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1, compare_funcs); + k = gallop_left(ms, ssa.keys[0], baseb.keys, nb, nb-1); if (k < 0) goto Fail; k = nb - k; @@ -1955,7 +1775,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, * Returns 0 on success, -1 on error. */ static Py_ssize_t -merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) +merge_at(MergeState *ms, Py_ssize_t i) { sortslice ssa, ssb; Py_ssize_t na, nb; @@ -1985,7 +1805,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) /* Where does b start in a? Elements in a before that can be * ignored (already in place). */ - k = gallop_right(*ssb.keys, ssa.keys, na, 0, compare_funcs); + k = gallop_right(ms, *ssb.keys, ssa.keys, na, 0); if (k < 0) return -1; sortslice_advance(&ssa, k); @@ -1996,7 +1816,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) /* Where does a end in b? Elements in b after that can be * ignored (already in place). */ - nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1, compare_funcs); + nb = gallop_left(ms, ssa.keys[na-1], ssb.keys, nb, nb-1); if (nb <= 0) return nb; @@ -2004,9 +1824,9 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) * min(na, nb) elements. */ if (na <= nb) - return merge_lo(ms, ssa, na, ssb, nb, compare_funcs); + return merge_lo(ms, ssa, na, ssb, nb); else - return merge_hi(ms, ssa, na, ssb, nb, compare_funcs); + return merge_hi(ms, ssa, na, ssb, nb); } /* Examine the stack of runs waiting to be merged, merging adjacent runs @@ -2020,7 +1840,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) * Returns 0 on success, -1 on error. */ static int -merge_collapse(MergeState *ms, CompareFuncs compare_funcs) +merge_collapse(MergeState *ms) { struct s_slice *p = ms->pending; @@ -2031,11 +1851,11 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs) (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) { if (p[n-1].len < p[n+1].len) --n; - if (merge_at(ms, n, compare_funcs) < 0) + if (merge_at(ms, n) < 0) return -1; } else if (p[n].len <= p[n+1].len) { - if (merge_at(ms, n, compare_funcs) < 0) + if (merge_at(ms, n) < 0) return -1; } else @@ -2050,7 +1870,7 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs) * Returns 0 on success, -1 on error. */ static int -merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs) +merge_force_collapse(MergeState *ms) { struct s_slice *p = ms->pending; @@ -2059,7 +1879,7 @@ merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs) Py_ssize_t n = ms->n - 2; if (n > 0 && p[n-1].len < p[n+1].len) --n; - if (merge_at(ms, n, compare_funcs) < 0) + if (merge_at(ms, n) < 0) return -1; } return 0; @@ -2096,6 +1916,179 @@ reverse_sortslice(sortslice *s, Py_ssize_t n) reverse_slice(s->values, &s->values[n]); } +/* Here we define custom comparison functions to optimize for the cases one commonly + * encounters in practice: homogeneous lists, often of one of the basic types. */ + +/* This struct holds the comparison function and helper functions + * selected in the pre-sort check. */ + +/* These are the special case compare functions. + * ms->key_compare will always point to one of these: */ + +/* Heterogeneous compare: default, always safe to fall back on. */ +static int +safe_object_compare(PyObject* v, PyObject* w, MergeState* ms) +{ + /* No assumptions necessary! */ + return PyObject_RichCompareBool(v, w, Py_LT); +} + +/* Homogeneous compare: safe for any two compareable objects of the same type. + * (ms->key_richcompare is set to ob_type->tp_richcompare in the + * pre-sort check.) + */ +static int +unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) +{ + /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type->tp_richcompare != NULL); + #endif + if (v == w) return 0; + + if (v->ob_type->tp_richcompare != ms->key_richcompare) + return PyObject_RichCompareBool(v, w, Py_LT); + + PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); + if (res == NULL) + return -1; + int ok; + if (PyBool_Check(res)){ + ok = (res == Py_True); + } + else { + ok = PyObject_IsTrue(res); + } + Py_DECREF(res); + return ok; +} + +/* Latin string compare: safe for any two latin (one byte per char) strings. */ +static int +unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ + /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyUnicode_Type && + PyUnicode_KIND(v) == PyUnicode_KIND(w) && + PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + #endif + if (v == w) return 0; + + int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); + int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); + + return (res != 0 ? + res < 0 : + PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); +} + +/* Bounded int compare: compare any two longs that fit in a single machine word. */ +static int +unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) +{ + /* Modified from Objects/longobject.c:long_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyLong_Type && + Py_ABS(Py_SIZE(v)) <= 1 && + Py_ABS(Py_SIZE(w)) <= 1); + #endif + if (v == w) return 0; + + PyLongObject *vl, *wl; + vl = (PyLongObject*)v; + wl = (PyLongObject*)w; + + sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; + sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; + + if (Py_SIZE(vl) < 0) + v0 = -v0; + if (Py_SIZE(wl) < 0) + w0 = -w0; + + return v0 < w0; +} + +/* Float compare: compare any two floats. */ +static int +unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ + /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyFloat_Type); + #endif + if (v == w) return 0; + + return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); +} + +/* Tuple compare: compare any two non-empty tuples, using + * ms->tuple_elem_compare to compare the first elements, which is set + * using the same pre-sort check as we use for ms->key_compare, + * but run on the list [x[0] for x in L]. This allows us to optimize compares + * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ +static int +unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) +{ + /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyTuple_Type && + Py_SIZE(v) > 0 && + Py_SIZE(w) > 0); + #endif + if (v == w) return 0; + + PyTupleObject *vt, *wt; + Py_ssize_t i; + Py_ssize_t vlen, wlen; + + vt = (PyTupleObject *)v; + wt = (PyTupleObject *)w; + + /* Is v[0] < w[0]? */ + int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); + if (k < 0) + return -1; + if (k) + return 1; + + vlen = Py_SIZE(vt); + wlen = Py_SIZE(wt); + + /* Well, are either of the tuples are singleton? */ + if (vlen == 1 || wlen == 1) + return 0; + + /* Well, is w[0] < v[0]? */ + k = (*(ms->tuple_elem_compare))(wt->ob_item[0], vt->ob_item[0], ms); + if (k < 0) + return -1; + if (k) + return 0; + + /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. + * We can use code copied straight from tupleobject.c:tuplerichcompare: */ + for (i = 1; i < vlen && i < wlen; i++) { + k = PyObject_RichCompareBool(vt->ob_item[i], + wt->ob_item[i], + Py_EQ); + if (k < 0) + return -1; + if (!k) + break; + } + + if (i >= vlen || i >= wlen) { + return vlen < wlen; + } + + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); +} + /* An adaptive, stable, natural mergesort. See listsort.txt. * Returns Py_None on success, NULL on error. Even in case of error, the * list will be some permutation of its input state (nothing is lost or @@ -2169,8 +2162,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* The pre-sort check: here's where we decide which compare function to use. * How much optimization is safe? We test for homogeneity with respect to * several properties that are expensive to check at compare-time, and - * set compare_funcs appropriately. */ - CompareFuncs compare_funcs; + * set ms appropriately. */ if (saved_ob_size > 1) { /* Assume the first element is representative of the whole list. */ int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type && @@ -2220,33 +2212,33 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (keys_are_all_same_type) { if (key_type == &PyUnicode_Type && strings_are_latin) - compare_funcs.key_compare = unsafe_latin_compare; + ms.key_compare = unsafe_latin_compare; else if (key_type == &PyLong_Type && ints_are_bounded) - compare_funcs.key_compare = unsafe_long_compare; + ms.key_compare = unsafe_long_compare; else if (key_type == &PyFloat_Type) - compare_funcs.key_compare = unsafe_float_compare; + ms.key_compare = unsafe_float_compare; - else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL) - compare_funcs.key_compare = unsafe_object_compare; + else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL) + ms.key_compare = unsafe_object_compare; } else { - compare_funcs.key_compare = safe_object_compare; + ms.key_compare = safe_object_compare; } if (keys_are_in_tuples) { /* Make sure we're not dealing with tuples of tuples * (remember: here, key_type refers list [key[0] for key in keys]) */ if (key_type == &PyTuple_Type) - compare_funcs.tuple_elem_compare = safe_object_compare; + ms.tuple_elem_compare = safe_object_compare; else - compare_funcs.tuple_elem_compare = compare_funcs.key_compare; + ms.tuple_elem_compare = ms.key_compare; - compare_funcs.key_compare = unsafe_tuple_compare; + ms.key_compare = unsafe_tuple_compare; } } - /* End of pre-sort check: compare_funcs is now set properly! */ + /* End of pre-sort check: ms is now set properly! */ merge_init(&ms, saved_ob_size, keys != NULL); @@ -2271,7 +2263,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) Py_ssize_t n; /* Identify next run. */ - n = count_run(lo.keys, lo.keys + nremaining, &descending, compare_funcs); + n = count_run(&ms, lo.keys, lo.keys + nremaining, &descending); if (n < 0) goto fail; if (descending) @@ -2280,7 +2272,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; - if (binarysort(lo, lo.keys + force, lo.keys + n, compare_funcs) < 0) + if (binarysort(&ms, lo, lo.keys + force, lo.keys + n) < 0) goto fail; n = force; } @@ -2289,14 +2281,14 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) ms.pending[ms.n].base = lo; ms.pending[ms.n].len = n; ++ms.n; - if (merge_collapse(&ms, compare_funcs) < 0) + if (merge_collapse(&ms) < 0) goto fail; /* Advance to find next run. */ sortslice_advance(&lo, n); nremaining -= n; } while (nremaining); - if (merge_force_collapse(&ms, compare_funcs) < 0) + if (merge_force_collapse(&ms) < 0) goto fail; assert(ms.n == 1); assert(keys == NULL From 395bc7d0a3444f6238ed03d57984e308dda45715 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:12:44 -0600 Subject: [PATCH 17/47] formatting --- Objects/listobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index dace48312e3c74..53691692cceb66 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1506,7 +1506,8 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * successful, -1 if error. */ static Py_ssize_t -merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) +merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest; @@ -1637,7 +1638,8 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t * successful, -1 if error. */ static Py_ssize_t -merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) +merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest, basea, baseb; From e67758616b010a51c767d18b80f5fe9751f27611 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:14:21 -0600 Subject: [PATCH 18/47] formatting --- Objects/listobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 53691692cceb66..de2f5893c3863e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1507,7 +1507,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need) */ static Py_ssize_t merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest; @@ -1639,7 +1639,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, */ static Py_ssize_t merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest, basea, baseb; @@ -1950,7 +1950,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) - return PyObject_RichCompareBool(v, w, Py_LT); + return PyObject_RichCompareBool(v, w, Py_LT); PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); if (res == NULL) From 6070c72def67df06602f2a788e3d2155dacbdc1b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:25:08 -0600 Subject: [PATCH 19/47] don't need (v==w) for ints/strings --- Objects/listobject.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index de2f5893c3863e..ab9647d79433a9 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1976,7 +1976,6 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ PyUnicode_KIND(v) == PyUnicode_KIND(w) && PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); #endif - if (v == w) return 0; int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); @@ -1997,7 +1996,6 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) Py_ABS(Py_SIZE(v)) <= 1 && Py_ABS(Py_SIZE(w)) <= 1); #endif - if (v == w) return 0; PyLongObject *vl, *wl; vl = (PyLongObject*)v; From 294aa1c1496d297de6a8fe33f1d5203084567d7a Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:32:48 -0600 Subject: [PATCH 20/47] went back to i=0 for tuples; we can't infer == from < and >, even with v==w --- Objects/listobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ab9647d79433a9..0997dd46c03d4b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2072,7 +2072,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. * We can use code copied straight from tupleobject.c:tuplerichcompare: */ - for (i = 1; i < vlen && i < wlen; i++) { + for (i = 0; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); @@ -2083,7 +2083,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) } if (i >= vlen || i >= wlen) { - return vlen < wlen; + return vlen < wlen; } return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); From ba05b2ac67bde2749f9ec8ae94280b99265d6ad7 Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 22:54:17 -0600 Subject: [PATCH 21/47] move all declarations to top of their blocks --- Objects/listobject.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 0997dd46c03d4b..4efcbcc9cb18f6 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1947,15 +1947,16 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) assert(v->ob_type == w->ob_type && v->ob_type->tp_richcompare != NULL); #endif - if (v == w) return 0; + int ok; + if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) return PyObject_RichCompareBool(v, w, Py_LT); PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); if (res == NULL) return -1; - int ok; + if (PyBool_Check(res)){ ok = (res == Py_True); } @@ -2040,7 +2041,6 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) Py_SIZE(v) > 0 && Py_SIZE(w) > 0); #endif - if (v == w) return 0; PyTupleObject *vt, *wt; Py_ssize_t i; @@ -2048,6 +2048,10 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; + + int ok; + + if (v == w) return 0; /* Is v[0] < w[0]? */ int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); From 40ba266d072d2dc5412cb998247308ee49e749c0 Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 22:57:30 -0600 Subject: [PATCH 22/47] typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 4efcbcc9cb18f6..006f5edaf0f668 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2049,7 +2049,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - int ok; + int k; if (v == w) return 0; From a1759392153e6cbae88b5f85c493450d380f17be Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 22:58:46 -0600 Subject: [PATCH 23/47] typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 006f5edaf0f668..c656db68216882 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2054,7 +2054,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) if (v == w) return 0; /* Is v[0] < w[0]? */ - int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); + k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); if (k < 0) return -1; if (k) From f0dc847b72424d29386a461aa2dfc78a4c50b768 Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 23:04:32 -0600 Subject: [PATCH 24/47] added Py_NotImplemented check to unsafe_object_compare --- Objects/listobject.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Objects/listobject.c b/Objects/listobject.c index c656db68216882..bad947c9ede479 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1954,6 +1954,11 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) return PyObject_RichCompareBool(v, w, Py_LT); PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); + + if (res == Py_NotImplemented) { + Py_DECREF(res); + return PyObject_RichCompareBool(v, w, Py_LT); + } if (res == NULL) return -1; From 15f87a26511b97e5de080122d81f1dd5b7438e23 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 23:21:00 -0600 Subject: [PATCH 25/47] ACTUALLY moved declarations to the tops of blocks --- Objects/listobject.c | 99 +++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index bad947c9ede479..1baa730b278a48 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1942,18 +1942,20 @@ safe_object_compare(PyObject* v, PyObject* w, MergeState* ms) static int unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) { - /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type->tp_richcompare != NULL); - #endif - int ok; + int ok; PyObject* res; + + /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type->tp_richcompare != NULL); + #endif + if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) return PyObject_RichCompareBool(v, w, Py_LT); - PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); + res = (*(ms->key_richcompare))(v, w, Py_LT); if (res == Py_NotImplemented) { Py_DECREF(res); @@ -1975,16 +1977,18 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) /* Latin string compare: safe for any two latin (one byte per char) strings. */ static int unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ - /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyUnicode_Type && - PyUnicode_KIND(v) == PyUnicode_KIND(w) && - PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - #endif + int len, res; + + /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyUnicode_Type && + PyUnicode_KIND(v) == PyUnicode_KIND(w) && + PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + #endif - int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); - int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); + len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); + res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); return (res != 0 ? res < 0 : @@ -1995,20 +1999,21 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ static int unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) { - /* Modified from Objects/longobject.c:long_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyLong_Type && - Py_ABS(Py_SIZE(v)) <= 1 && - Py_ABS(Py_SIZE(w)) <= 1); - #endif + PyLongObject *vl, *wl; sdigit v0, w0; + + /* Modified from Objects/longobject.c:long_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyLong_Type && + Py_ABS(Py_SIZE(v)) <= 1 && + Py_ABS(Py_SIZE(w)) <= 1); + #endif - PyLongObject *vl, *wl; vl = (PyLongObject*)v; wl = (PyLongObject*)w; - sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; - sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; + v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; + w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; if (Py_SIZE(vl) < 0) v0 = -v0; @@ -2021,13 +2026,13 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) /* Float compare: compare any two floats. */ static int unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ - /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyFloat_Type); - #endif - if (v == w) return 0; - + /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyFloat_Type); + #endif + + if (v == w) return 0; return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } @@ -2038,25 +2043,23 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ static int unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) -{ - /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyTuple_Type && - Py_SIZE(v) > 0 && - Py_SIZE(w) > 0); - #endif - +{ PyTupleObject *vt, *wt; - Py_ssize_t i; - Py_ssize_t vlen, wlen; + Py_ssize_t i, vlen, wlen; + int k; + + /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyTuple_Type && + Py_SIZE(v) > 0 && + Py_SIZE(w) > 0); + #endif + + if (v == w) return 0; vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - - int k; - - if (v == w) return 0; /* Is v[0] < w[0]? */ k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); From 15f2f01cb80ee95285a1e4fe341bc66cb2d68a5b Mon Sep 17 00:00:00 2001 From: embg Date: Thu, 9 Mar 2017 14:00:21 -0700 Subject: [PATCH 26/47] fix typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 0e03fbbf31c176..f36c3fe8bdfe8a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1046,7 +1046,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) if (k) /* Here we define custom comparison functions to optimize for the cases one commonly - * in practice: homogeneous lists, often of one of the basic types. */ + * encounters in practice: homogeneous lists, often of one of the basic types. */ /* This struct holds the comparison function and helper functions * selected in the pre-sort check. */ From 6afa847f599815cf853ad16dfa5b0443b5766697 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 11 Mar 2017 10:28:32 -0700 Subject: [PATCH 27/47] Added if (v == w) return 0; to all compares, apologies for previous commit --- Objects/listobject.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index f36c3fe8bdfe8a..25536acafa757e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1092,7 +1092,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) v->ob_type->tp_richcompare != NULL && v->ob_type->tp_richcompare == compare_funcs.key_richcompare); #endif - if (v == w) return 1; + if (v == w) return 0; PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); if (res == NULL) @@ -1118,7 +1118,7 @@ unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){ PyUnicode_KIND(v) == PyUnicode_KIND(w) && PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); #endif - if (v == w) return 1; + if (v == w) return 0; int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); @@ -1139,7 +1139,7 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) Py_ABS(Py_SIZE(v)) <= 1 && Py_ABS(Py_SIZE(w)) <= 1); #endif - if (v == w) return 1; + if (v == w) return 0; PyLongObject *vl, *wl; vl = (PyLongObject*)v; @@ -1164,7 +1164,7 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ assert(v->ob_type == w->ob_type && v->ob_type == &PyFloat_Type); #endif - if (v == w) return 1; + if (v == w) return 0; return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } @@ -1184,7 +1184,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) Py_SIZE(v) > 0 && Py_SIZE(w) > 0); #endif - if (v == w) return 1; + if (v == w) return 0; PyTupleObject *vt, *wt; Py_ssize_t i; From af7c027773850307cf7809e84fa71abc9f88602d Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:11:04 -0600 Subject: [PATCH 28/47] Folded CompareFuncs into MergeState and added safety check to unsafe_object_compare --- Objects/listobject.c | 542 +++++++++++++++++++++---------------------- 1 file changed, 267 insertions(+), 275 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 25536acafa757e..dace48312e3c74 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1031,12 +1031,12 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) slice->values += n; } -/* Comparison function: compare_funcs.key_compare, which is set at run-time in +/* Comparison function: ms->key_compare, which is set at run-time in * listsort_impl to optimize for various special cases. * Returns -1 on error, 1 if x < y, 0 if x >= y. */ -#define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs)) +#define ISLT(X, Y) (*(ms->key_compare))(X, Y, ms) /* Compare X to Y via "<". Goto "fail" if the comparison raises an error. Else "k" is set to true iff Xob_type->tp_richcompare */ - PyObject* (*key_richcompare)(PyObject*, PyObject*, int); - - /* This function is used by unsafe_tuple_compare to compare the first elements - * of tuples. It may be set to safe_object_compare, but the idea is that hopefully - * we can assume more, and use one of the special-case compares. */ - int (*tuple_elem_compare)(PyObject*, PyObject*, CompareFuncs); -}; - -/* These are the special case compare functions. - * compare_funcs.key_compare will always point to one of these: */ - -/* Heterogeneous compare: default, always safe to fall back on. */ -static int -safe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) -{ - /* No assumptions necessary! */ - return PyObject_RichCompareBool(v, w, Py_LT); -} - -/* Homogeneous compare: safe for any two compareable objects of the same type. - * (compare_funcs.key_richcompare is set to ob_type->tp_richcompare in the - * pre-sort check.) +/* The maximum number of entries in a MergeState's pending-runs stack. + * This is enough to sort arrays of size up to about + * 32 * phi ** MAX_MERGE_PENDING + * where phi ~= 1.618. 85 is ridiculouslylarge enough, good for an array + * with 2**64 elements. */ -static int -unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) -{ - /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type->tp_richcompare != NULL && - v->ob_type->tp_richcompare == compare_funcs.key_richcompare); - #endif - if (v == w) return 0; - - PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT); - if (res == NULL) - return -1; - int ok; - if (PyBool_Check(res)){ - ok = (res == Py_True); - } - else { - ok = PyObject_IsTrue(res); - } - Py_DECREF(res); - return ok; -} - -/* Latin string compare: safe for any two latin (one byte per char) strings. */ -static int -unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){ - /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyUnicode_Type && - PyUnicode_KIND(v) == PyUnicode_KIND(w) && - PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - #endif - if (v == w) return 0; - - int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); - int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); - - return (res != 0 ? - res < 0 : - PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); -} - -/* Bounded int compare: compare any two longs that fit in a single machine word. */ -static int -unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs) -{ - /* Modified from Objects/longobject.c:long_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyLong_Type && - Py_ABS(Py_SIZE(v)) <= 1 && - Py_ABS(Py_SIZE(w)) <= 1); - #endif - if (v == w) return 0; - - PyLongObject *vl, *wl; - vl = (PyLongObject*)v; - wl = (PyLongObject*)w; - - sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; - sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; - - if (Py_SIZE(vl) < 0) - v0 = -v0; - if (Py_SIZE(wl) < 0) - w0 = -w0; - - return v0 < w0; -} +#define MAX_MERGE_PENDING 85 -/* Float compare: compare any two floats. */ -static int -unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){ - /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyFloat_Type); - #endif - if (v == w) return 0; - - return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); -} +/* When we get into galloping mode, we stay there until both runs win less + * often than MIN_GALLOP consecutive times. See listsort.txt for more info. + */ +#define MIN_GALLOP 7 -/* Tuple compare: compare any two non-empty tuples, using - * compare_funcs.tuple_elem_compare to compare the first elements, which is set - * using the same pre-sort check as we use for compare_funcs.key_compare, - * but run on the list [x[0] for x in L]. This allows us to optimize compares - * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ -static int -unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) -{ - /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyTuple_Type && - Py_SIZE(v) > 0 && - Py_SIZE(w) > 0); - #endif - if (v == w) return 0; - - PyTupleObject *vt, *wt; - Py_ssize_t i; - Py_ssize_t vlen, wlen; +/* Avoid malloc for small temp arrays. */ +#define MERGESTATE_TEMP_SIZE 256 - vt = (PyTupleObject *)v; - wt = (PyTupleObject *)w; +/* One MergeState exists on the stack per invocation of mergesort. It's just + * a convenient way to pass state around among the helper functions. + */ +struct s_slice { + sortslice base; + Py_ssize_t len; +}; - /* Is v[0] < w[0]? */ - int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0], - wt->ob_item[0], - compare_funcs); - if (k < 0) - return -1; - if (k) - return 1; +typedef struct s_MergeState MergeState; +struct s_MergeState { + /* This controls when we get *into* galloping mode. It's initialized + * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for + * random data, and lower for highly structured data. + */ + Py_ssize_t min_gallop; - vlen = Py_SIZE(vt); - wlen = Py_SIZE(wt); + /* 'a' is temp storage to help with merges. It contains room for + * alloced entries. + */ + sortslice a; /* may point to temparray below */ + Py_ssize_t alloced; - /* Well, are either of the tuples are singleton? */ - if (vlen == 1 || wlen == 1) - return 0; + /* A stack of n pending runs yet to be merged. Run #i starts at + * address base[i] and extends for len[i] elements. It's always + * true (so long as the indices are in bounds) that + * + * pending[i].base + pending[i].len == pending[i+1].base + * + * so we could cut the storage for this, but it's a minor amount, + * and keeping all the info explicit simplifies the code. + */ + int n; + struct s_slice pending[MAX_MERGE_PENDING]; - /* Well, is w[0] < v[0]? */ - k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0], - vt->ob_item[0], - compare_funcs); - if (k < 0) - return -1; - if (k) - return 0; + /* 'a' points to this when possible, rather than muck with malloc. */ + PyObject *temparray[MERGESTATE_TEMP_SIZE]; - /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. - * We can use code copied straight from tupleobject.c:tuplerichcompare: */ - for (i = 1; i < vlen && i < wlen; i++) { - k = PyObject_RichCompareBool(vt->ob_item[i], - wt->ob_item[i], - Py_EQ); - if (k < 0) - return -1; - if (!k) - break; - } + /* This is the function we will use to compare two keys, + * even when none of our special cases apply and we have to use + * safe_object_compare. */ + int (*key_compare)(PyObject*, PyObject*, MergeState*); - if (i >= vlen || i >= wlen) { - return vlen < wlen; - } + /* This function is used by unsafe_object_compare to optimize comparisons + * when we know our list is type-homogeneous but we can't assume anything else. + * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */ + PyObject* (*key_richcompare)(PyObject*, PyObject*, int); - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); -} + /* This function is used by unsafe_tuple_compare to compare the first elements + * of tuples. It may be set to safe_object_compare, but the idea is that hopefully + * we can assume more, and use one of the special-case compares. */ + int (*tuple_elem_compare)(PyObject*, PyObject*, MergeState*); +}; /* binarysort is the best method for sorting small arrays: it does few compares, but can do data movement quadratic in the number of @@ -1249,7 +1126,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs) the input (nothing is lost or duplicated). */ static int -binarysort(sortslice lo, PyObject **hi, PyObject **start, CompareFuncs compare_funcs) +binarysort(MergeState* ms, sortslice lo, PyObject **hi, PyObject **start) { Py_ssize_t k; PyObject **l, **p, **r; @@ -1323,7 +1200,7 @@ elements to get out of order). Returns -1 in case of error. */ static Py_ssize_t -count_run(PyObject **lo, PyObject **hi, int *descending, CompareFuncs compare_funcs) +count_run(MergeState* ms, PyObject **lo, PyObject **hi, int *descending) { Py_ssize_t k; Py_ssize_t n; @@ -1378,8 +1255,7 @@ key, and the last n-k should follow key. Returns -1 on error. See listsort.txt for info on the method. */ static Py_ssize_t -gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, - CompareFuncs compare_funcs) +gallop_left(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1470,8 +1346,7 @@ we're sticking to "<" comparisons that it's much harder to follow if written as one routine with yet another "left or right?" flag. */ static Py_ssize_t -gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, - CompareFuncs compare_funcs) +gallop_right(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1547,59 +1422,6 @@ gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint, return -1; } -/* The maximum number of entries in a MergeState's pending-runs stack. - * This is enough to sort arrays of size up to about - * 32 * phi ** MAX_MERGE_PENDING - * where phi ~= 1.618. 85 is ridiculouslylarge enough, good for an array - * with 2**64 elements. - */ -#define MAX_MERGE_PENDING 85 - -/* When we get into galloping mode, we stay there until both runs win less - * often than MIN_GALLOP consecutive times. See listsort.txt for more info. - */ -#define MIN_GALLOP 7 - -/* Avoid malloc for small temp arrays. */ -#define MERGESTATE_TEMP_SIZE 256 - -/* One MergeState exists on the stack per invocation of mergesort. It's just - * a convenient way to pass state around among the helper functions. - */ -struct s_slice { - sortslice base; - Py_ssize_t len; -}; - -typedef struct s_MergeState { - /* This controls when we get *into* galloping mode. It's initialized - * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for - * random data, and lower for highly structured data. - */ - Py_ssize_t min_gallop; - - /* 'a' is temp storage to help with merges. It contains room for - * alloced entries. - */ - sortslice a; /* may point to temparray below */ - Py_ssize_t alloced; - - /* A stack of n pending runs yet to be merged. Run #i starts at - * address base[i] and extends for len[i] elements. It's always - * true (so long as the indices are in bounds) that - * - * pending[i].base + pending[i].len == pending[i+1].base - * - * so we could cut the storage for this, but it's a minor amount, - * and keeping all the info explicit simplifies the code. - */ - int n; - struct s_slice pending[MAX_MERGE_PENDING]; - - /* 'a' points to this when possible, rather than muck with malloc. */ - PyObject *temparray[MERGESTATE_TEMP_SIZE]; -} MergeState; - /* Conceptually a MergeState's constructor. */ static void merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc) @@ -1684,8 +1506,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * successful, -1 if error. */ static Py_ssize_t -merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs) +merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest; @@ -1752,7 +1573,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, assert(na > 1 && nb > 0); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], ssa.keys, na, 0, compare_funcs); + k = gallop_right(ms, ssb.keys[0], ssa.keys, na, 0); acount = k; if (k) { if (k < 0) @@ -1775,7 +1596,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, if (nb == 0) goto Succeed; - k = gallop_left(ssa.keys[0], ssb.keys, nb, 0, compare_funcs); + k = gallop_left(ms, ssa.keys[0], ssb.keys, nb, 0); bcount = k; if (k) { if (k < 0) @@ -1816,8 +1637,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, * successful, -1 if error. */ static Py_ssize_t -merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs) +merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest, basea, baseb; @@ -1890,7 +1710,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, assert(na > 0 && nb > 1); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], basea.keys, na, na-1, compare_funcs); + k = gallop_right(ms, ssb.keys[0], basea.keys, na, na-1); if (k < 0) goto Fail; k = na - k; @@ -1908,7 +1728,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, if (nb == 1) goto CopyA; - k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1, compare_funcs); + k = gallop_left(ms, ssa.keys[0], baseb.keys, nb, nb-1); if (k < 0) goto Fail; k = nb - k; @@ -1955,7 +1775,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, * Returns 0 on success, -1 on error. */ static Py_ssize_t -merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) +merge_at(MergeState *ms, Py_ssize_t i) { sortslice ssa, ssb; Py_ssize_t na, nb; @@ -1985,7 +1805,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) /* Where does b start in a? Elements in a before that can be * ignored (already in place). */ - k = gallop_right(*ssb.keys, ssa.keys, na, 0, compare_funcs); + k = gallop_right(ms, *ssb.keys, ssa.keys, na, 0); if (k < 0) return -1; sortslice_advance(&ssa, k); @@ -1996,7 +1816,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) /* Where does a end in b? Elements in b after that can be * ignored (already in place). */ - nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1, compare_funcs); + nb = gallop_left(ms, ssa.keys[na-1], ssb.keys, nb, nb-1); if (nb <= 0) return nb; @@ -2004,9 +1824,9 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) * min(na, nb) elements. */ if (na <= nb) - return merge_lo(ms, ssa, na, ssb, nb, compare_funcs); + return merge_lo(ms, ssa, na, ssb, nb); else - return merge_hi(ms, ssa, na, ssb, nb, compare_funcs); + return merge_hi(ms, ssa, na, ssb, nb); } /* Examine the stack of runs waiting to be merged, merging adjacent runs @@ -2020,7 +1840,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs) * Returns 0 on success, -1 on error. */ static int -merge_collapse(MergeState *ms, CompareFuncs compare_funcs) +merge_collapse(MergeState *ms) { struct s_slice *p = ms->pending; @@ -2031,11 +1851,11 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs) (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) { if (p[n-1].len < p[n+1].len) --n; - if (merge_at(ms, n, compare_funcs) < 0) + if (merge_at(ms, n) < 0) return -1; } else if (p[n].len <= p[n+1].len) { - if (merge_at(ms, n, compare_funcs) < 0) + if (merge_at(ms, n) < 0) return -1; } else @@ -2050,7 +1870,7 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs) * Returns 0 on success, -1 on error. */ static int -merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs) +merge_force_collapse(MergeState *ms) { struct s_slice *p = ms->pending; @@ -2059,7 +1879,7 @@ merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs) Py_ssize_t n = ms->n - 2; if (n > 0 && p[n-1].len < p[n+1].len) --n; - if (merge_at(ms, n, compare_funcs) < 0) + if (merge_at(ms, n) < 0) return -1; } return 0; @@ -2096,6 +1916,179 @@ reverse_sortslice(sortslice *s, Py_ssize_t n) reverse_slice(s->values, &s->values[n]); } +/* Here we define custom comparison functions to optimize for the cases one commonly + * encounters in practice: homogeneous lists, often of one of the basic types. */ + +/* This struct holds the comparison function and helper functions + * selected in the pre-sort check. */ + +/* These are the special case compare functions. + * ms->key_compare will always point to one of these: */ + +/* Heterogeneous compare: default, always safe to fall back on. */ +static int +safe_object_compare(PyObject* v, PyObject* w, MergeState* ms) +{ + /* No assumptions necessary! */ + return PyObject_RichCompareBool(v, w, Py_LT); +} + +/* Homogeneous compare: safe for any two compareable objects of the same type. + * (ms->key_richcompare is set to ob_type->tp_richcompare in the + * pre-sort check.) + */ +static int +unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) +{ + /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type->tp_richcompare != NULL); + #endif + if (v == w) return 0; + + if (v->ob_type->tp_richcompare != ms->key_richcompare) + return PyObject_RichCompareBool(v, w, Py_LT); + + PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); + if (res == NULL) + return -1; + int ok; + if (PyBool_Check(res)){ + ok = (res == Py_True); + } + else { + ok = PyObject_IsTrue(res); + } + Py_DECREF(res); + return ok; +} + +/* Latin string compare: safe for any two latin (one byte per char) strings. */ +static int +unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ + /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyUnicode_Type && + PyUnicode_KIND(v) == PyUnicode_KIND(w) && + PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + #endif + if (v == w) return 0; + + int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); + int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); + + return (res != 0 ? + res < 0 : + PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); +} + +/* Bounded int compare: compare any two longs that fit in a single machine word. */ +static int +unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) +{ + /* Modified from Objects/longobject.c:long_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyLong_Type && + Py_ABS(Py_SIZE(v)) <= 1 && + Py_ABS(Py_SIZE(w)) <= 1); + #endif + if (v == w) return 0; + + PyLongObject *vl, *wl; + vl = (PyLongObject*)v; + wl = (PyLongObject*)w; + + sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; + sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; + + if (Py_SIZE(vl) < 0) + v0 = -v0; + if (Py_SIZE(wl) < 0) + w0 = -w0; + + return v0 < w0; +} + +/* Float compare: compare any two floats. */ +static int +unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ + /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyFloat_Type); + #endif + if (v == w) return 0; + + return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); +} + +/* Tuple compare: compare any two non-empty tuples, using + * ms->tuple_elem_compare to compare the first elements, which is set + * using the same pre-sort check as we use for ms->key_compare, + * but run on the list [x[0] for x in L]. This allows us to optimize compares + * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ +static int +unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) +{ + /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyTuple_Type && + Py_SIZE(v) > 0 && + Py_SIZE(w) > 0); + #endif + if (v == w) return 0; + + PyTupleObject *vt, *wt; + Py_ssize_t i; + Py_ssize_t vlen, wlen; + + vt = (PyTupleObject *)v; + wt = (PyTupleObject *)w; + + /* Is v[0] < w[0]? */ + int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); + if (k < 0) + return -1; + if (k) + return 1; + + vlen = Py_SIZE(vt); + wlen = Py_SIZE(wt); + + /* Well, are either of the tuples are singleton? */ + if (vlen == 1 || wlen == 1) + return 0; + + /* Well, is w[0] < v[0]? */ + k = (*(ms->tuple_elem_compare))(wt->ob_item[0], vt->ob_item[0], ms); + if (k < 0) + return -1; + if (k) + return 0; + + /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. + * We can use code copied straight from tupleobject.c:tuplerichcompare: */ + for (i = 1; i < vlen && i < wlen; i++) { + k = PyObject_RichCompareBool(vt->ob_item[i], + wt->ob_item[i], + Py_EQ); + if (k < 0) + return -1; + if (!k) + break; + } + + if (i >= vlen || i >= wlen) { + return vlen < wlen; + } + + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); +} + /* An adaptive, stable, natural mergesort. See listsort.txt. * Returns Py_None on success, NULL on error. Even in case of error, the * list will be some permutation of its input state (nothing is lost or @@ -2169,8 +2162,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* The pre-sort check: here's where we decide which compare function to use. * How much optimization is safe? We test for homogeneity with respect to * several properties that are expensive to check at compare-time, and - * set compare_funcs appropriately. */ - CompareFuncs compare_funcs; + * set ms appropriately. */ if (saved_ob_size > 1) { /* Assume the first element is representative of the whole list. */ int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type && @@ -2220,33 +2212,33 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (keys_are_all_same_type) { if (key_type == &PyUnicode_Type && strings_are_latin) - compare_funcs.key_compare = unsafe_latin_compare; + ms.key_compare = unsafe_latin_compare; else if (key_type == &PyLong_Type && ints_are_bounded) - compare_funcs.key_compare = unsafe_long_compare; + ms.key_compare = unsafe_long_compare; else if (key_type == &PyFloat_Type) - compare_funcs.key_compare = unsafe_float_compare; + ms.key_compare = unsafe_float_compare; - else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL) - compare_funcs.key_compare = unsafe_object_compare; + else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL) + ms.key_compare = unsafe_object_compare; } else { - compare_funcs.key_compare = safe_object_compare; + ms.key_compare = safe_object_compare; } if (keys_are_in_tuples) { /* Make sure we're not dealing with tuples of tuples * (remember: here, key_type refers list [key[0] for key in keys]) */ if (key_type == &PyTuple_Type) - compare_funcs.tuple_elem_compare = safe_object_compare; + ms.tuple_elem_compare = safe_object_compare; else - compare_funcs.tuple_elem_compare = compare_funcs.key_compare; + ms.tuple_elem_compare = ms.key_compare; - compare_funcs.key_compare = unsafe_tuple_compare; + ms.key_compare = unsafe_tuple_compare; } } - /* End of pre-sort check: compare_funcs is now set properly! */ + /* End of pre-sort check: ms is now set properly! */ merge_init(&ms, saved_ob_size, keys != NULL); @@ -2271,7 +2263,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) Py_ssize_t n; /* Identify next run. */ - n = count_run(lo.keys, lo.keys + nremaining, &descending, compare_funcs); + n = count_run(&ms, lo.keys, lo.keys + nremaining, &descending); if (n < 0) goto fail; if (descending) @@ -2280,7 +2272,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; - if (binarysort(lo, lo.keys + force, lo.keys + n, compare_funcs) < 0) + if (binarysort(&ms, lo, lo.keys + force, lo.keys + n) < 0) goto fail; n = force; } @@ -2289,14 +2281,14 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) ms.pending[ms.n].base = lo; ms.pending[ms.n].len = n; ++ms.n; - if (merge_collapse(&ms, compare_funcs) < 0) + if (merge_collapse(&ms) < 0) goto fail; /* Advance to find next run. */ sortslice_advance(&lo, n); nremaining -= n; } while (nremaining); - if (merge_force_collapse(&ms, compare_funcs) < 0) + if (merge_force_collapse(&ms) < 0) goto fail; assert(ms.n == 1); assert(keys == NULL From 20716cb0c1ddeddec039b21f3959d58a5abc66bd Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:12:44 -0600 Subject: [PATCH 29/47] formatting --- Objects/listobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index dace48312e3c74..53691692cceb66 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1506,7 +1506,8 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * successful, -1 if error. */ static Py_ssize_t -merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) +merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest; @@ -1637,7 +1638,8 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t * successful, -1 if error. */ static Py_ssize_t -merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb) +merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest, basea, baseb; From 5960fbe7ae50400359ce9c99c0743aa841c77fe5 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:14:21 -0600 Subject: [PATCH 30/47] formatting --- Objects/listobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 53691692cceb66..de2f5893c3863e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1507,7 +1507,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need) */ static Py_ssize_t merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest; @@ -1639,7 +1639,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, */ static Py_ssize_t merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) + sortslice ssb, Py_ssize_t nb) { Py_ssize_t k; sortslice dest, basea, baseb; @@ -1950,7 +1950,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) - return PyObject_RichCompareBool(v, w, Py_LT); + return PyObject_RichCompareBool(v, w, Py_LT); PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); if (res == NULL) From 804807bc82956aa2432050b989da67b9ae585afa Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:25:08 -0600 Subject: [PATCH 31/47] don't need (v==w) for ints/strings --- Objects/listobject.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index de2f5893c3863e..ab9647d79433a9 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1976,7 +1976,6 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ PyUnicode_KIND(v) == PyUnicode_KIND(w) && PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); #endif - if (v == w) return 0; int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); @@ -1997,7 +1996,6 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) Py_ABS(Py_SIZE(v)) <= 1 && Py_ABS(Py_SIZE(w)) <= 1); #endif - if (v == w) return 0; PyLongObject *vl, *wl; vl = (PyLongObject*)v; From 5db7158a89ec872a70f5e766811cbd56c1489c7e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 14:32:48 -0600 Subject: [PATCH 32/47] went back to i=0 for tuples; we can't infer == from < and >, even with v==w --- Objects/listobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ab9647d79433a9..0997dd46c03d4b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2072,7 +2072,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. * We can use code copied straight from tupleobject.c:tuplerichcompare: */ - for (i = 1; i < vlen && i < wlen; i++) { + for (i = 0; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); @@ -2083,7 +2083,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) } if (i >= vlen || i >= wlen) { - return vlen < wlen; + return vlen < wlen; } return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); From 934d83f0b993e1b2ec6270094364ac95630e1f14 Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 22:54:17 -0600 Subject: [PATCH 33/47] move all declarations to top of their blocks --- Objects/listobject.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 0997dd46c03d4b..4efcbcc9cb18f6 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1947,15 +1947,16 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) assert(v->ob_type == w->ob_type && v->ob_type->tp_richcompare != NULL); #endif - if (v == w) return 0; + int ok; + if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) return PyObject_RichCompareBool(v, w, Py_LT); PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); if (res == NULL) return -1; - int ok; + if (PyBool_Check(res)){ ok = (res == Py_True); } @@ -2040,7 +2041,6 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) Py_SIZE(v) > 0 && Py_SIZE(w) > 0); #endif - if (v == w) return 0; PyTupleObject *vt, *wt; Py_ssize_t i; @@ -2048,6 +2048,10 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; + + int ok; + + if (v == w) return 0; /* Is v[0] < w[0]? */ int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); From c536ed3c12dce6f6d1c602275910f5881aba4a5f Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 22:57:30 -0600 Subject: [PATCH 34/47] typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 4efcbcc9cb18f6..006f5edaf0f668 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2049,7 +2049,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - int ok; + int k; if (v == w) return 0; From 0b85ac5c087f225035bb3fa3a98678d9f11b14cc Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 22:58:46 -0600 Subject: [PATCH 35/47] typo --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 006f5edaf0f668..c656db68216882 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2054,7 +2054,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) if (v == w) return 0; /* Is v[0] < w[0]? */ - int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); + k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); if (k < 0) return -1; if (k) From a12d7840670f04bab1905bc7804ed340e122ee4d Mon Sep 17 00:00:00 2001 From: embg Date: Sun, 12 Mar 2017 23:04:32 -0600 Subject: [PATCH 36/47] added Py_NotImplemented check to unsafe_object_compare --- Objects/listobject.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Objects/listobject.c b/Objects/listobject.c index c656db68216882..bad947c9ede479 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1954,6 +1954,11 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) return PyObject_RichCompareBool(v, w, Py_LT); PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); + + if (res == Py_NotImplemented) { + Py_DECREF(res); + return PyObject_RichCompareBool(v, w, Py_LT); + } if (res == NULL) return -1; From a54a4e4cfaa58b7fbcbd24ef799d3d87f8c7a4fb Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 12 Mar 2017 23:21:00 -0600 Subject: [PATCH 37/47] ACTUALLY moved declarations to the tops of blocks --- Objects/listobject.c | 99 +++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index bad947c9ede479..1baa730b278a48 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1942,18 +1942,20 @@ safe_object_compare(PyObject* v, PyObject* w, MergeState* ms) static int unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) { - /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type->tp_richcompare != NULL); - #endif - int ok; + int ok; PyObject* res; + + /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type->tp_richcompare != NULL); + #endif + if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) return PyObject_RichCompareBool(v, w, Py_LT); - PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT); + res = (*(ms->key_richcompare))(v, w, Py_LT); if (res == Py_NotImplemented) { Py_DECREF(res); @@ -1975,16 +1977,18 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) /* Latin string compare: safe for any two latin (one byte per char) strings. */ static int unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ - /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyUnicode_Type && - PyUnicode_KIND(v) == PyUnicode_KIND(w) && - PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - #endif + int len, res; + + /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyUnicode_Type && + PyUnicode_KIND(v) == PyUnicode_KIND(w) && + PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + #endif - int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); - int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); + len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); + res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); return (res != 0 ? res < 0 : @@ -1995,20 +1999,21 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ static int unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) { - /* Modified from Objects/longobject.c:long_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyLong_Type && - Py_ABS(Py_SIZE(v)) <= 1 && - Py_ABS(Py_SIZE(w)) <= 1); - #endif + PyLongObject *vl, *wl; sdigit v0, w0; + + /* Modified from Objects/longobject.c:long_compare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyLong_Type && + Py_ABS(Py_SIZE(v)) <= 1 && + Py_ABS(Py_SIZE(w)) <= 1); + #endif - PyLongObject *vl, *wl; vl = (PyLongObject*)v; wl = (PyLongObject*)w; - sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; - sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; + v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0]; + w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0]; if (Py_SIZE(vl) < 0) v0 = -v0; @@ -2021,13 +2026,13 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) /* Float compare: compare any two floats. */ static int unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ - /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyFloat_Type); - #endif - if (v == w) return 0; - + /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyFloat_Type); + #endif + + if (v == w) return 0; return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } @@ -2038,25 +2043,23 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ static int unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) -{ - /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyTuple_Type && - Py_SIZE(v) > 0 && - Py_SIZE(w) > 0); - #endif - +{ PyTupleObject *vt, *wt; - Py_ssize_t i; - Py_ssize_t vlen, wlen; + Py_ssize_t i, vlen, wlen; + int k; + + /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ + #ifdef Py_DEBUG + assert(v->ob_type == w->ob_type && + v->ob_type == &PyTuple_Type && + Py_SIZE(v) > 0 && + Py_SIZE(w) > 0); + #endif + + if (v == w) return 0; vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - - int k; - - if (v == w) return 0; /* Is v[0] < w[0]? */ k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); From 862c7619bce74c13dd16ca888828c4730932c578 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 13 Mar 2017 15:08:38 -0600 Subject: [PATCH 38/47] Made tuple compare precisely consistent with PyObject_RichCompareBool; it was not earler --- Objects/listobject.c | 43 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 1baa730b278a48..649de77b57aa86 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1950,8 +1950,6 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) v->ob_type->tp_richcompare != NULL); #endif - - if (v == w) return 0; if (v->ob_type->tp_richcompare != ms->key_richcompare) return PyObject_RichCompareBool(v, w, Py_LT); @@ -2031,16 +2029,15 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ assert(v->ob_type == w->ob_type && v->ob_type == &PyFloat_Type); #endif - - if (v == w) return 0; return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); } -/* Tuple compare: compare any two non-empty tuples, using +/* Tuple compare: compare *any* two tuples, using * ms->tuple_elem_compare to compare the first elements, which is set * using the same pre-sort check as we use for ms->key_compare, * but run on the list [x[0] for x in L]. This allows us to optimize compares - * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */ + * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is + * that most tuple compares don't involve x[1:]. */ static int unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) { @@ -2056,49 +2053,27 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) Py_SIZE(w) > 0); #endif - if (v == w) return 0; - vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - /* Is v[0] < w[0]? */ - k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms); - if (k < 0) - return -1; - if (k) - return 1; - vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); - /* Well, are either of the tuples are singleton? */ - if (vlen == 1 || wlen == 1) - return 0; - - /* Well, is w[0] < v[0]? */ - k = (*(ms->tuple_elem_compare))(wt->ob_item[0], vt->ob_item[0], ms); - if (k < 0) - return -1; - if (k) - return 0; - - /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:]. - * We can use code copied straight from tupleobject.c:tuplerichcompare: */ for (i = 0; i < vlen && i < wlen; i++) { - k = PyObject_RichCompareBool(vt->ob_item[i], - wt->ob_item[i], - Py_EQ); + k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); if (k < 0) return -1; if (!k) break; } - if (i >= vlen || i >= wlen) { + if (i >= vlen || i >= wlen) return vlen < wlen; - } - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); + if (i == 0) + return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); + else + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); } /* An adaptive, stable, natural mergesort. See listsort.txt. From dd302b50e93bf259597607855cf60646540e6c54 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 15 Mar 2017 11:19:43 -0600 Subject: [PATCH 39/47] Added tests --- Lib/test/test_sort.py | 107 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py index 98ccab5c3930a6..8c498aed4cee9b 100644 --- a/Lib/test/test_sort.py +++ b/Lib/test/test_sort.py @@ -260,6 +260,113 @@ def my_cmp_reversed(x, y): self.assertEqual(data, copy2) #============================================================================== +def check_against_PyObject_RichCompareBool(self, L): + ## The idea here is to exploit the fact that unsafe_tuple_compare uses + ## PyObject_RichCompareBool for the second elements of tuples. So we have, + ## for (most) L, sorted(L) == [y[1] for y in sorted([(0,x) for x in L])] + ## This will work as long as __eq__ => not __lt__ for all the objects in L, + ## which holds for all the types used below. + ## + ## Testing this way ensures that the optimized implementation remains consistent + ## with the naive implementation, even if changes are made to any of the + ## richcompares. + ## + ## This function tests sorting for three lists (it randomly shuffles each one): + ## 1. L + ## 2. [(x,) for x in L] + ## 3. [((x,),) for x in L] + + random.seed(0) + random.shuffle(L) + L_1 = L[:] + L_2 = [(x,) for x in L] + L_3 = [((x,),) for x in L] + for L in [L_1, L_2, L_3]: + optimized = sorted(L) + reference = [y[1] for y in sorted([(0,x) for x in L])] + for (opt, ref) in zip(optimized, reference): + self.assertIs(opt, ref) + #note: not assertEqual! We want to ensure *identical* behavior. + +class TestOptimizedCompares(unittest.TestCase): + def test_safe_object_compare(self): + heterogeneous_lists = [[0, 'foo'], + [0.0, 'foo'], + ['foo', b'foo'], + [('foo',), 'foo']] + for L in heterogeneous_lists: + self.assertRaises(TypeError, L.sort) + self.assertRaises(TypeError, [(x,) for x in L].sort) + self.assertRaises(TypeError, [((x,),) for x in L].sort) + + float_int_lists = [[1,1.1], + [1<<70,1.1], + [1.1,1], + [1.1,1<<70]] + for L in float_int_lists: + check_against_PyObject_RichCompareBool(self, L) + + def test_unsafe_object_compare(self): + + # This test is by ppperry. It ensures that unsafe_object_compare is + # verifying ms->key_richcompare == tp->richcompare before comparing. + class WackyComparator(int): + def __lt__(self, other): + elem.__class__ = WackyList2 + return int.__lt__(self, other) + + class WackyList1(list):pass + class WackyList2(list): + def __lt__(self, other): + raise ValueError + + L = [WackyList1([WackyComparator(i), i]) for i in range(10)] + elem = L[-1] + self.assertRaises(ValueError, L.sort) + self.assertRaises(ValueError, [(x,) for x in L].sort) + + # The following test is also by ppperry. It ensures that + # unsafe_object_compare handles Py_NotImplemented appropriately. + class PointlessComparator: + def __lt__(self, other): + return NotImplemented + L = [PointlessComparator(), PointlessComparator()] + self.assertRaises(TypeError, L.sort) + self.assertRaises(TypeError, [(x,) for x in L].sort) + + # The following tests go through various types that would trigger + # ms->key_compare = unsafe_object_compare + lists = [list(range(100)) + [(1<<70)], + [str(x) for x in range(100)] + ['\uffff'], + [bytes(x) for x in range(100)], + [cmp_to_key(lambda x,y: x (x,) < (x,) + # + # Note that we don't have to put anything in tuples here, because + # the check function does a tuple test automatically. + + check_against_PyObject_RichCompareBool(self, [float('nan')]*100) + check_against_PyObject_RichCompareBool(self, [float('nan') for + _ in range(100)]) +#============================================================================== if __name__ == "__main__": unittest.main() From ab3d520ed6f46e9a8352fda2c4665de9c0ab6d96 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 15 Mar 2017 11:59:00 -0600 Subject: [PATCH 40/47] Implemented all of serhiy-storchaka's changes --- Objects/listobject.c | 168 +++++++++++++++++++++++-------------------- 1 file changed, 89 insertions(+), 79 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 649de77b57aa86..4aa4849c940176 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1126,7 +1126,7 @@ struct s_MergeState { the input (nothing is lost or duplicated). */ static int -binarysort(MergeState* ms, sortslice lo, PyObject **hi, PyObject **start) +binarysort(MergeState *ms, sortslice lo, PyObject **hi, PyObject **start) { Py_ssize_t k; PyObject **l, **p, **r; @@ -1200,7 +1200,7 @@ elements to get out of order). Returns -1 in case of error. */ static Py_ssize_t -count_run(MergeState* ms, PyObject **lo, PyObject **hi, int *descending) +count_run(MergeState *ms, PyObject **lo, PyObject **hi, int *descending) { Py_ssize_t k; Py_ssize_t n; @@ -1255,7 +1255,7 @@ key, and the last n-k should follow key. Returns -1 on error. See listsort.txt for info on the method. */ static Py_ssize_t -gallop_left(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) +gallop_left(MergeState *ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1346,7 +1346,7 @@ we're sticking to "<" comparisons that it's much harder to follow if written as one routine with yet another "left or right?" flag. */ static Py_ssize_t -gallop_right(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) +gallop_right(MergeState *ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) { Py_ssize_t ofs; Py_ssize_t lastofs; @@ -1929,9 +1929,9 @@ reverse_sortslice(sortslice *s, Py_ssize_t n) /* Heterogeneous compare: default, always safe to fall back on. */ static int -safe_object_compare(PyObject* v, PyObject* w, MergeState* ms) +safe_object_compare(PyObject *v, PyObject *w, MergeState *ms) { - /* No assumptions necessary! */ + /* No assumptions necessary! */ return PyObject_RichCompareBool(v, w, Py_LT); } @@ -1940,72 +1940,74 @@ safe_object_compare(PyObject* v, PyObject* w, MergeState* ms) * pre-sort check.) */ static int -unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms) +unsafe_object_compare(PyObject *v, PyObject *w, MergeState *ms) { - int ok; PyObject* res; - - /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type->tp_richcompare != NULL); - #endif + PyObject* res_obj; int res; + /* No assumptions, because we check first: */ if (v->ob_type->tp_richcompare != ms->key_richcompare) return PyObject_RichCompareBool(v, w, Py_LT); + + assert(ms->key_richcompare != NULL); + res_obj = (*(ms->key_richcompare))(v, w, Py_LT); - res = (*(ms->key_richcompare))(v, w, Py_LT); - - if (res == Py_NotImplemented) { - Py_DECREF(res); + if (res_obj == Py_NotImplemented) { + Py_DECREF(res_obj); return PyObject_RichCompareBool(v, w, Py_LT); } - if (res == NULL) + if (res_obj == NULL) return -1; - if (PyBool_Check(res)){ - ok = (res == Py_True); + if (PyBool_Check(res_obj)) { + res = (res_obj == Py_True); } else { - ok = PyObject_IsTrue(res); + res = PyObject_IsTrue(res_obj); } - Py_DECREF(res); - return ok; + Py_DECREF(res_obj); + + /* Note that we can't assert + * res == PyObject_RichCompareBool(v, w, Py_LT); + * because of evil compare functions like this: + * lambda a, b: int(random.random() * 3) - 1) + * (which is actually in test_sort.py) */ + return res; } /* Latin string compare: safe for any two latin (one byte per char) strings. */ static int -unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){ +unsafe_latin_compare(PyObject *v, PyObject *w, MergeState *ms) +{ int len, res; /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyUnicode_Type && - PyUnicode_KIND(v) == PyUnicode_KIND(w) && - PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - #endif + assert(v->ob_type == w->ob_type); + assert(v->ob_type == &PyUnicode_Type); + assert(PyUnicode_KIND(v) == PyUnicode_KIND(w)); + assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); - return (res != 0 ? - res < 0 : - PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); + res = (res != 0 ? + res < 0 : + PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w)); + + assert(res == PyObject_RichCompareBool(v, w, Py_LT));; + return res; } /* Bounded int compare: compare any two longs that fit in a single machine word. */ static int -unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) +unsafe_long_compare(PyObject *v, PyObject *w, MergeState *ms) { - PyLongObject *vl, *wl; sdigit v0, w0; + PyLongObject *vl, *wl; sdigit v0, w0; int res; /* Modified from Objects/longobject.c:long_compare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyLong_Type && - Py_ABS(Py_SIZE(v)) <= 1 && - Py_ABS(Py_SIZE(w)) <= 1); - #endif + assert(v->ob_type == w->ob_type); + assert(v->ob_type == &PyLong_Type); + assert(Py_ABS(Py_SIZE(v)) <= 1); + assert(Py_ABS(Py_SIZE(w)) <= 1); vl = (PyLongObject*)v; wl = (PyLongObject*)w; @@ -2018,18 +2020,24 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms) if (Py_SIZE(wl) < 0) w0 = -w0; - return v0 < w0; + res = v0 < w0; + assert(res == PyObject_RichCompareBool(v, w, Py_LT)); + return res; } /* Float compare: compare any two floats. */ static int -unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ +unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms) +{ + int res; + /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyFloat_Type); - #endif - return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); + assert(v->ob_type == w->ob_type); + assert(v->ob_type == &PyFloat_Type); + + res = PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); + assert(res == PyObject_RichCompareBool(v, w, Py_LT)); + return res; } /* Tuple compare: compare *any* two tuples, using @@ -2039,23 +2047,21 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is * that most tuple compares don't involve x[1:]. */ static int -unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) +unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) { PyTupleObject *vt, *wt; Py_ssize_t i, vlen, wlen; int k; /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ - #ifdef Py_DEBUG - assert(v->ob_type == w->ob_type && - v->ob_type == &PyTuple_Type && - Py_SIZE(v) > 0 && - Py_SIZE(w) > 0); - #endif + assert(v->ob_type == w->ob_type); + assert(v->ob_type == &PyTuple_Type); + assert(Py_SIZE(v) > 0); + assert(Py_SIZE(w) > 0); vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - + vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); @@ -2070,10 +2076,11 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms) if (i >= vlen || i >= wlen) return vlen < wlen; - if (i == 0) - return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); - else + if (i == 0) { + return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); + } else { return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); + } } /* An adaptive, stable, natural mergesort. See listsort.txt. @@ -2156,7 +2163,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) Py_SIZE(lo.keys[0]) > 0); PyTypeObject* key_type = (keys_are_in_tuples ? - PyTuple_GET_ITEM(lo.keys[0],0)->ob_type : + PyTuple_GET_ITEM(lo.keys[0], 0)->ob_type : lo.keys[0]->ob_type); int keys_are_all_same_type = 1; @@ -2165,10 +2172,10 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* Prove that assumption by checking every key. */ int i; - for (i=0; i< saved_ob_size; i++) { + for (i=0; i < saved_ob_size; i++) { if (keys_are_in_tuples && - (lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) { + !(lo.keys[i]->ob_type == &PyTuple_Type && Py_SIZE(lo.keys[i]) != 0)) { keys_are_in_tuples = 0; keys_are_all_same_type = 0; break; @@ -2178,7 +2185,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity * for lists of tuples in the if-statement directly above. */ PyObject* key = (keys_are_in_tuples ? - PyTuple_GET_ITEM(lo.keys[i],0) : + PyTuple_GET_ITEM(lo.keys[i], 0) : lo.keys[i]); if (key->ob_type != key_type) { @@ -2186,31 +2193,34 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) break; } - else if (key_type == &PyLong_Type && ints_are_bounded && - Py_ABS(Py_SIZE(key)) > 1) - ints_are_bounded = 0; - - else if (key_type == &PyUnicode_Type && strings_are_latin && - PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND) + if (key_type == &PyLong_Type) { + if (ints_are_bounded && Py_ABS(Py_SIZE(key)) > 1) + ints_are_bounded = 0; + } + else if (key_type == &PyUnicode_Type){ + if (strings_are_latin && + PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND) strings_are_latin = 0; + } } /* Choose the best compare, given what we now know about the keys. */ if (keys_are_all_same_type) { - if (key_type == &PyUnicode_Type && strings_are_latin) + if (key_type == &PyUnicode_Type && strings_are_latin) { ms.key_compare = unsafe_latin_compare; - - else if (key_type == &PyLong_Type && ints_are_bounded) + } + else if (key_type == &PyLong_Type && ints_are_bounded) { ms.key_compare = unsafe_long_compare; - - else if (key_type == &PyFloat_Type) + } + else if (key_type == &PyFloat_Type) { ms.key_compare = unsafe_float_compare; - - else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL) + } + else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL) { ms.key_compare = unsafe_object_compare; - - } else { + } + } + else { ms.key_compare = safe_object_compare; } From dba3f27f2367fb47aec13fcdee86e77b416bbe2f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 15 Mar 2017 12:00:20 -0600 Subject: [PATCH 41/47] Removed braces at the end of unsafe_tuple_compare --- Objects/listobject.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 4aa4849c940176..977369e967c58d 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2076,11 +2076,10 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) if (i >= vlen || i >= wlen) return vlen < wlen; - if (i == 0) { + if (i == 0) return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); - } else { + else return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); - } } /* An adaptive, stable, natural mergesort. See listsort.txt. From c796422f7d9e6726e445bd97a00aec0d89cc2654 Mon Sep 17 00:00:00 2001 From: embg Date: Wed, 15 Mar 2017 20:14:09 -0700 Subject: [PATCH 42/47] Fixed test_safe_object_compare Comparing bytes and strings yields a warning, not an error, so assertRaises fails. --- Lib/test/test_sort.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py index 8c498aed4cee9b..18a62317dd69cb 100644 --- a/Lib/test/test_sort.py +++ b/Lib/test/test_sort.py @@ -292,7 +292,6 @@ class TestOptimizedCompares(unittest.TestCase): def test_safe_object_compare(self): heterogeneous_lists = [[0, 'foo'], [0.0, 'foo'], - ['foo', b'foo'], [('foo',), 'foo']] for L in heterogeneous_lists: self.assertRaises(TypeError, L.sort) From fa19903f3f3c799ebfb5e47e48d32d9c35753475 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Jan 2018 14:49:34 -0800 Subject: [PATCH 43/47] Fix spacing around PyObject * --- Objects/listobject.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 977369e967c58d..1e469f54e9742d 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1101,17 +1101,17 @@ struct s_MergeState { /* This is the function we will use to compare two keys, * even when none of our special cases apply and we have to use * safe_object_compare. */ - int (*key_compare)(PyObject*, PyObject*, MergeState*); + int (*key_compare)(PyObject *, PyObject *, MergeState *); /* This function is used by unsafe_object_compare to optimize comparisons * when we know our list is type-homogeneous but we can't assume anything else. * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */ - PyObject* (*key_richcompare)(PyObject*, PyObject*, int); + PyObject *(*key_richcompare)(PyObject *, PyObject *, int); /* This function is used by unsafe_tuple_compare to compare the first elements * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ - int (*tuple_elem_compare)(PyObject*, PyObject*, MergeState*); + int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); }; /* binarysort is the best method for sorting small arrays: it does @@ -1481,11 +1481,11 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * we don't care what's in the block. */ merge_freemem(ms); - if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject*) / multiplier) { + if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject *) / multiplier) { PyErr_NoMemory(); return -1; } - ms->a.keys = (PyObject**)PyMem_Malloc(multiplier * need + ms->a.keys = (PyObject **)PyMem_Malloc(multiplier * need * sizeof(PyObject *)); if (ms->a.keys != NULL) { ms->alloced = need; @@ -1942,7 +1942,7 @@ safe_object_compare(PyObject *v, PyObject *w, MergeState *ms) static int unsafe_object_compare(PyObject *v, PyObject *w, MergeState *ms) { - PyObject* res_obj; int res; + PyObject *res_obj; int res; /* No assumptions, because we check first: */ if (v->ob_type->tp_richcompare != ms->key_richcompare) @@ -2183,7 +2183,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* Note: for lists of tuples, key is the first element of the tuple * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity * for lists of tuples in the if-statement directly above. */ - PyObject* key = (keys_are_in_tuples ? + PyObject *key = (keys_are_in_tuples ? PyTuple_GET_ITEM(lo.keys[i], 0) : lo.keys[i]); @@ -2594,7 +2594,7 @@ list_sizeof(PyListObject *self) } static PyObject *list_iter(PyObject *seq); -static PyObject *list_reversed(PyListObject* seq, PyObject* unused); +static PyObject *list_reversed(PyListObject *seq, PyObject *unused); PyDoc_STRVAR(getitem_doc, "x.__getitem__(y) <==> x[y]"); From e4679e2847f94fe92f963587a0d3cd56bef49613 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Jan 2018 15:10:05 -0800 Subject: [PATCH 44/47] Add news blurb --- .../Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst b/Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst new file mode 100644 index 00000000000000..ccc3c0857bc089 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst @@ -0,0 +1,2 @@ +Optimize list.sort() and sorted() by using type specialized comparisons when +possible. From 3b3ce5280411d81bf18218b89893b9095508bb2a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Jan 2018 17:31:32 -0800 Subject: [PATCH 45/47] Update listsort.txt for the optimization --- Objects/listsort.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Objects/listsort.txt b/Objects/listsort.txt index 17d27973f82676..8c877515c72e88 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -753,3 +753,11 @@ example, with the region of uncertainty B[4], B[5], B[6], there are 4 locations: before B[4], between B[4] and B[5], between B[5] and B[6], and after B[6]. In general, across 2**(k-1)-1 elements, there are 2**(k-1) locations. That's why k-1 binary searches are necessary and sufficient. + +OPTIMIZATION OF INDIVIDUAL COMPARISONS +As noted above, even the simplest Python comparison triggers a large pile of +C-level pointer dereferences, conditionals, and function calls. This can be +partially mitigated by pre-scanning the data to determine whether the data is +homogenous with respect to type. If so, it is sometimes possible to +substitute faster type-specific comparisons for the slower, generic +PyObject_RichCompareBool. From afed812b8672f238e9385f1ac3ead19a89c6fc59 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Jan 2018 17:45:05 -0800 Subject: [PATCH 46/47] Fix whitespace --- Lib/test/test_sort.py | 18 +++++++++--------- Objects/listobject.c | 42 +++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py index 18a62317dd69cb..15fc1dd72a5f65 100644 --- a/Lib/test/test_sort.py +++ b/Lib/test/test_sort.py @@ -275,7 +275,7 @@ def check_against_PyObject_RichCompareBool(self, L): ## 1. L ## 2. [(x,) for x in L] ## 3. [((x,),) for x in L] - + random.seed(0) random.shuffle(L) L_1 = L[:] @@ -287,7 +287,7 @@ def check_against_PyObject_RichCompareBool(self, L): for (opt, ref) in zip(optimized, reference): self.assertIs(opt, ref) #note: not assertEqual! We want to ensure *identical* behavior. - + class TestOptimizedCompares(unittest.TestCase): def test_safe_object_compare(self): heterogeneous_lists = [[0, 'foo'], @@ -304,9 +304,9 @@ def test_safe_object_compare(self): [1.1,1<<70]] for L in float_int_lists: check_against_PyObject_RichCompareBool(self, L) - + def test_unsafe_object_compare(self): - + # This test is by ppperry. It ensures that unsafe_object_compare is # verifying ms->key_richcompare == tp->richcompare before comparing. class WackyComparator(int): @@ -318,13 +318,13 @@ class WackyList1(list):pass class WackyList2(list): def __lt__(self, other): raise ValueError - + L = [WackyList1([WackyComparator(i), i]) for i in range(10)] elem = L[-1] self.assertRaises(ValueError, L.sort) self.assertRaises(ValueError, [(x,) for x in L].sort) - # The following test is also by ppperry. It ensures that + # The following test is also by ppperry. It ensures that # unsafe_object_compare handles Py_NotImplemented appropriately. class PointlessComparator: def __lt__(self, other): @@ -332,7 +332,7 @@ def __lt__(self, other): L = [PointlessComparator(), PointlessComparator()] self.assertRaises(TypeError, L.sort) self.assertRaises(TypeError, [(x,) for x in L].sort) - + # The following tests go through various types that would trigger # ms->key_compare = unsafe_object_compare lists = [list(range(100)) + [(1<<70)], @@ -341,7 +341,7 @@ def __lt__(self, other): [cmp_to_key(lambda x,y: xvalues += n; } -/* Comparison function: ms->key_compare, which is set at run-time in +/* Comparison function: ms->key_compare, which is set at run-time in * listsort_impl to optimize for various special cases. * Returns -1 on error, 1 if x < y, 0 if x >= y. */ @@ -1968,10 +1968,10 @@ reverse_sortslice(sortslice *s, Py_ssize_t n) reverse_slice(s->values, &s->values[n]); } -/* Here we define custom comparison functions to optimize for the cases one commonly +/* Here we define custom comparison functions to optimize for the cases one commonly * encounters in practice: homogeneous lists, often of one of the basic types. */ -/* This struct holds the comparison function and helper functions +/* This struct holds the comparison function and helper functions * selected in the pre-sort check. */ /* These are the special case compare functions. @@ -2000,7 +2000,7 @@ unsafe_object_compare(PyObject *v, PyObject *w, MergeState *ms) assert(ms->key_richcompare != NULL); res_obj = (*(ms->key_richcompare))(v, w, Py_LT); - + if (res_obj == Py_NotImplemented) { Py_DECREF(res_obj); return PyObject_RichCompareBool(v, w, Py_LT); @@ -2029,13 +2029,13 @@ static int unsafe_latin_compare(PyObject *v, PyObject *w, MergeState *ms) { int len, res; - + /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */ - assert(v->ob_type == w->ob_type); + assert(v->ob_type == w->ob_type); assert(v->ob_type == &PyUnicode_Type); assert(PyUnicode_KIND(v) == PyUnicode_KIND(w)); assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - + len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w)); res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len); @@ -2054,11 +2054,11 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState *ms) PyLongObject *vl, *wl; sdigit v0, w0; int res; /* Modified from Objects/longobject.c:long_compare, assuming: */ - assert(v->ob_type == w->ob_type); + assert(v->ob_type == w->ob_type); assert(v->ob_type == &PyLong_Type); assert(Py_ABS(Py_SIZE(v)) <= 1); assert(Py_ABS(Py_SIZE(w)) <= 1); - + vl = (PyLongObject*)v; wl = (PyLongObject*)w; @@ -2082,23 +2082,23 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms) int res; /* Modified from Objects/floatobject.c:float_richcompare, assuming: */ - assert(v->ob_type == w->ob_type); + assert(v->ob_type == w->ob_type); assert(v->ob_type == &PyFloat_Type); res = PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w); assert(res == PyObject_RichCompareBool(v, w, Py_LT)); - return res; + return res; } -/* Tuple compare: compare *any* two tuples, using - * ms->tuple_elem_compare to compare the first elements, which is set +/* Tuple compare: compare *any* two tuples, using + * ms->tuple_elem_compare to compare the first elements, which is set * using the same pre-sort check as we use for ms->key_compare, * but run on the list [x[0] for x in L]. This allows us to optimize compares - * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is + * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is * that most tuple compares don't involve x[1:]. */ static int unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) -{ +{ PyTupleObject *vt, *wt; Py_ssize_t i, vlen, wlen; int k; @@ -2111,7 +2111,7 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - + vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); @@ -2126,10 +2126,10 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) if (i >= vlen || i >= wlen) return vlen < wlen; - if (i == 0) + if (i == 0) return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); else - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); } /* An adaptive, stable, natural mergesort. See listsort.txt. @@ -2214,8 +2214,8 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* The pre-sort check: here's where we decide which compare function to use. - * How much optimization is safe? We test for homogeneity with respect to - * several properties that are expensive to check at compare-time, and + * How much optimization is safe? We test for homogeneity with respect to + * several properties that are expensive to check at compare-time, and * set ms appropriately. */ if (saved_ob_size > 1) { /* Assume the first element is representative of the whole list. */ @@ -2242,7 +2242,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) } /* Note: for lists of tuples, key is the first element of the tuple - * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity + * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity * for lists of tuples in the if-statement directly above. */ PyObject *key = (keys_are_in_tuples ? PyTuple_GET_ITEM(lo.keys[i], 0) : From ebb4c1f520a85be2ae01c63caea0d20fcc42c724 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Jan 2018 17:59:16 -0800 Subject: [PATCH 47/47] Clean-up and fix tests for mutation of __class__. * Use the with-statement form of self.assertRaises. * Make the two assertions independent of one another. The second test was invalid because the first list was already sorted, making the ValueError inevitable. --- Lib/test/test_sort.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py index 15fc1dd72a5f65..f2f53cb1a72f63 100644 --- a/Lib/test/test_sort.py +++ b/Lib/test/test_sort.py @@ -309,20 +309,28 @@ def test_unsafe_object_compare(self): # This test is by ppperry. It ensures that unsafe_object_compare is # verifying ms->key_richcompare == tp->richcompare before comparing. + class WackyComparator(int): def __lt__(self, other): elem.__class__ = WackyList2 return int.__lt__(self, other) - class WackyList1(list):pass + class WackyList1(list): + pass + class WackyList2(list): def __lt__(self, other): raise ValueError L = [WackyList1([WackyComparator(i), i]) for i in range(10)] elem = L[-1] - self.assertRaises(ValueError, L.sort) - self.assertRaises(ValueError, [(x,) for x in L].sort) + with self.assertRaises(ValueError): + L.sort() + + L = [WackyList1([WackyComparator(i), i]) for i in range(10)] + elem = L[-1] + with self.assertRaises(ValueError): + [(x,) for x in L].sort() # The following test is also by ppperry. It ensures that # unsafe_object_compare handles Py_NotImplemented appropriately.