From 89c278ff549b14e64231b44e15e5fc019cd1a99a Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 20:41:57 -0700
Subject: [PATCH 01/47] Added data-aware optimizations to list.sort()

---
 Objects/listobject.c | 339 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 308 insertions(+), 31 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 473bd20874d0ff..f2da0d252f3de6 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1031,11 +1031,8 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
         slice->values += n;
 }
 
-/* Comparison function: PyObject_RichCompareBool with Py_LT.
- * Returns -1 on error, 1 if x < y, 0 if x >= y.
- */
-
-#define ISLT(X, Y) (PyObject_RichCompareBool(X, Y, Py_LT))
+/* Macros for comparing keys: */
+#define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs))
 
 /* Compare X to Y via "<".  Goto "fail" if the comparison raises an
    error.  Else "k" is set to true iff X<Y, and an "if (k)" block is
@@ -1044,6 +1041,200 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
 #define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail;  \
            if (k)
 
+/* Here we define custom comparison functions to optimize for the cases one commonly 
+ * in practice: homogeneous lists, often of one of the basic types. */
+
+/* This struct holds the comparison function and helper functions 
+ * selected in the pre-sort check. */
+typedef struct CompareFuncs CompareFuncs;
+struct CompareFuncs {
+  /* This is the function we will use to compare two keys,
+   * even when none of our special cases apply and we have to use
+   * safe_object_compare. */
+  int (*key_compare)(PyObject*, PyObject*, CompareFuncs);
+
+  /* This function is used by unsafe_object_compare to optimize comparisons
+   * when we know our list is type-homogeneous but we can't assume anything else.
+   * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */
+  PyObject* (*key_richcompare)(PyObject*, PyObject*, int);
+
+  /* This function is used by unsafe_tuple_compare to compare the first elements
+   * of tuples. It may be set to safe_object_compare, but the idea is that hopefully 
+   * we can assume more, and use one of the special-case compares. */
+  int (*tuple_elem_compare)(PyObject*, PyObject*, CompareFuncs);
+};
+
+/* These are the special case compare functions.
+ * compare_funcs.key_compare will always point to one of these: */
+
+/* Heterogeneous compare: default, always safe to fall back on. */
+static int
+safe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
+{
+  /* No assumptions necessary! */
+    return PyObject_RichCompareBool(v, w, Py_LT);
+}
+
+/* Homogeneous compare: safe for any two compareable objects of the same type.
+ * (compare_funcs.key_richcompare is set to ob_type->tp_richcompare in the
+ *  pre-sort check.)
+ */
+static int
+unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
+{
+  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+	   v->ob_type->tp_richcompare != NULL &&
+	   v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
+  #endif
+
+    PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
+    if (res == NULL)
+        return -1;
+    int ok;
+    if (PyBool_Check(res)){
+        ok = (res == Py_True);
+    }
+    else {
+        ok = PyObject_IsTrue(res);
+    }
+    Py_DECREF(res);
+    return ok;
+}
+
+/* Latin string compare: safe for any two latin (one byte per char) strings. */
+static int
+unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){
+  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyUnicode_Type &&
+           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
+           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+  #endif
+
+    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
+    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
+
+    return (res != 0 ?
+            res < 0 :
+            PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
+}
+
+/* Bounded int compare: compare any two longs that fit in a single machine word. */
+static int
+unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
+{
+  /* Modified from Objects/longobject.c:long_compare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == PyLong_Type &&
+           Py_ABS(Py_SIZE(v)) == Py_ABS(Py_SIZE(w)) &&
+           Py_ABS(Py_SIZE(v)) <= 1);
+  #endif
+
+    PyLongObject *vl, *wl;
+    vl = (PyLongObject*)v;
+    wl = (PyLongObject*)w;
+
+    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
+    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
+
+    if (Py_SIZE(vl) < 0)
+        v0 = -v0;
+    if (Py_SIZE(wl) < 0)
+        w0 = -w0;
+
+    return v0 < w0;
+}
+
+/* Float compare: compare any two floats. */
+static int
+unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
+  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyFloat_Type);
+  #endif
+
+    return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
+}
+
+/* Tuple compare: compare any two non-empty tuples.
+ * This is the most complicated special case: since the tuple elements themselves
+ * must of course be compared, we can optimize on two levels. Namely, we make
+ * the same homogeneity assumptions about the first elements of the tuples in
+ * our list as we do about the list elements themselves. We then replace the call to 
+ * PyObject_RichCompareBool within the tuple comparison with special case compare, 
+ * based on which assumptions the first elements of the tuples satisfy.
+ *
+ * Note that we must therefore ensure assumptions in both unsafe_tuple_compare and
+ * compare_funcs.tuple_elem_compare are satisfied. If the first elements are not all 
+ * homogeneous, we can always set 
+ * compare_funcs.tuple_elem_compare = safe_object_compare. */
+static int
+unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
+{
+  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyTuple_Type &&
+           Py_SIZE(v) > 0 &&
+           Py_SIZE(w) > 0);
+  #endif
+
+    PyTupleObject *vt, *wt;
+    Py_ssize_t i;
+    Py_ssize_t vlen, wlen;
+
+    vt = (PyTupleObject *)v;
+    wt = (PyTupleObject *)w;
+
+    /* Is v[0] < w[0]? */
+    int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0],
+						wt->ob_item[0],
+						compare_funcs);
+    if (k < 0)
+        return -1;
+    if (k)
+        return 1;
+
+    vlen = Py_SIZE(vt);
+    wlen = Py_SIZE(wt);
+
+    /* Well, are either of the tuples are singleton? */
+    if (vlen == 1 || wlen == 1)
+        return 0;
+
+    /* Well, is w[0] < v[0]? */
+    k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0],
+					    vt->ob_item[0],
+					    compare_funcs);
+    if (k < 0)
+        return -1;
+    if (k)
+        return 0;
+
+    /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
+     * We can use code copied straight from tupleobject.c:tuplerichcompare: */
+    for (i = 0; i < vlen && i < wlen; i++) {
+        k = PyObject_RichCompareBool(vt->ob_item[i],
+                                     wt->ob_item[i],
+                                     Py_EQ);
+        if (k < 0)
+            return -1;
+        if (!k)
+            break;
+    }
+
+    if (i >= vlen || i >= wlen) {
+        return vlen <  wlen;
+    }
+
+    return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
+}
+
 /* binarysort is the best method for sorting small arrays: it does
    few compares, but can do data movement quadratic in the number of
    elements.
@@ -1056,7 +1247,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
    the input (nothing is lost or duplicated).
 */
 static int
-binarysort(sortslice lo, PyObject **hi, PyObject **start)
+binarysort(sortslice lo, PyObject **hi, PyObject **start, CompareFuncs compare_funcs)
 {
     Py_ssize_t k;
     PyObject **l, **p, **r;
@@ -1130,7 +1321,7 @@ elements to get out of order).
 Returns -1 in case of error.
 */
 static Py_ssize_t
-count_run(PyObject **lo, PyObject **hi, int *descending)
+count_run(PyObject **lo, PyObject **hi, int *descending, CompareFuncs compare_funcs)
 {
     Py_ssize_t k;
     Py_ssize_t n;
@@ -1185,7 +1376,8 @@ key, and the last n-k should follow key.
 Returns -1 on error.  See listsort.txt for info on the method.
 */
 static Py_ssize_t
-gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
+gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
+	    CompareFuncs compare_funcs)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1276,7 +1468,8 @@ we're sticking to "<" comparisons that it's much harder to follow if
 written as one routine with yet another "left or right?" flag.
 */
 static Py_ssize_t
-gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
+gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
+	     CompareFuncs compare_funcs)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1490,7 +1683,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  */
 static Py_ssize_t
 merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
-         sortslice ssb, Py_ssize_t nb)
+         sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1557,7 +1750,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
             assert(na > 1 && nb > 0);
             min_gallop -= min_gallop > 1;
             ms->min_gallop = min_gallop;
-            k = gallop_right(ssb.keys[0], ssa.keys, na, 0);
+            k = gallop_right(ssb.keys[0], ssa.keys, na, 0, compare_funcs);
             acount = k;
             if (k) {
                 if (k < 0)
@@ -1580,7 +1773,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
             if (nb == 0)
                 goto Succeed;
 
-            k = gallop_left(ssa.keys[0], ssb.keys, nb, 0);
+            k = gallop_left(ssa.keys[0], ssb.keys, nb, 0, compare_funcs);
             bcount = k;
             if (k) {
                 if (k < 0)
@@ -1622,7 +1815,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
  */
 static Py_ssize_t
 merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
-         sortslice ssb, Py_ssize_t nb)
+         sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;
@@ -1695,7 +1888,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
             assert(na > 0 && nb > 1);
             min_gallop -= min_gallop > 1;
             ms->min_gallop = min_gallop;
-            k = gallop_right(ssb.keys[0], basea.keys, na, na-1);
+            k = gallop_right(ssb.keys[0], basea.keys, na, na-1, compare_funcs);
             if (k < 0)
                 goto Fail;
             k = na - k;
@@ -1713,7 +1906,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
             if (nb == 1)
                 goto CopyA;
 
-            k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1);
+            k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1, compare_funcs);
             if (k < 0)
                 goto Fail;
             k = nb - k;
@@ -1760,7 +1953,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
  * Returns 0 on success, -1 on error.
  */
 static Py_ssize_t
-merge_at(MergeState *ms, Py_ssize_t i)
+merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
 {
     sortslice ssa, ssb;
     Py_ssize_t na, nb;
@@ -1790,7 +1983,7 @@ merge_at(MergeState *ms, Py_ssize_t i)
     /* Where does b start in a?  Elements in a before that can be
      * ignored (already in place).
      */
-    k = gallop_right(*ssb.keys, ssa.keys, na, 0);
+    k = gallop_right(*ssb.keys, ssa.keys, na, 0, compare_funcs);
     if (k < 0)
         return -1;
     sortslice_advance(&ssa, k);
@@ -1801,7 +1994,7 @@ merge_at(MergeState *ms, Py_ssize_t i)
     /* Where does a end in b?  Elements in b after that can be
      * ignored (already in place).
      */
-    nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1);
+    nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1, compare_funcs);
     if (nb <= 0)
         return nb;
 
@@ -1809,9 +2002,9 @@ merge_at(MergeState *ms, Py_ssize_t i)
      * min(na, nb) elements.
      */
     if (na <= nb)
-        return merge_lo(ms, ssa, na, ssb, nb);
+        return merge_lo(ms, ssa, na, ssb, nb, compare_funcs);
     else
-        return merge_hi(ms, ssa, na, ssb, nb);
+        return merge_hi(ms, ssa, na, ssb, nb, compare_funcs);
 }
 
 /* Examine the stack of runs waiting to be merged, merging adjacent runs
@@ -1825,7 +2018,7 @@ merge_at(MergeState *ms, Py_ssize_t i)
  * Returns 0 on success, -1 on error.
  */
 static int
-merge_collapse(MergeState *ms)
+merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
 {
     struct s_slice *p = ms->pending;
 
@@ -1836,12 +2029,12 @@ merge_collapse(MergeState *ms)
             (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) {
             if (p[n-1].len < p[n+1].len)
                 --n;
-            if (merge_at(ms, n) < 0)
+            if (merge_at(ms, n, compare_funcs) < 0)
                 return -1;
         }
         else if (p[n].len <= p[n+1].len) {
-                 if (merge_at(ms, n) < 0)
-                        return -1;
+	    if (merge_at(ms, n, compare_funcs) < 0)
+	        return -1;
         }
         else
             break;
@@ -1855,7 +2048,7 @@ merge_collapse(MergeState *ms)
  * Returns 0 on success, -1 on error.
  */
 static int
-merge_force_collapse(MergeState *ms)
+merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs)
 {
     struct s_slice *p = ms->pending;
 
@@ -1864,7 +2057,7 @@ merge_force_collapse(MergeState *ms)
         Py_ssize_t n = ms->n - 2;
         if (n > 0 && p[n-1].len < p[n+1].len)
             --n;
-        if (merge_at(ms, n) < 0)
+        if (merge_at(ms, n, compare_funcs) < 0)
             return -1;
     }
     return 0;
@@ -1908,7 +2101,7 @@ reverse_sortslice(sortslice *s, Py_ssize_t n)
  */
 static PyObject *
 listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
-{
+{   
     MergeState ms;
     Py_ssize_t nremaining;
     Py_ssize_t minrun;
@@ -1970,6 +2163,89 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         lo.values = saved_ob_item;
     }
 
+
+    /* The pre-sort check: here's where we decide which compare function to use.
+     * How much optimization is safe? We test for homogeneity with respect to 
+     * several properties that are expensive to check at compare-time, and 
+     * set compare_funcs appropriately. */
+    CompareFuncs compare_funcs;
+    if (saved_ob_size > 1) {
+	/* Assume the first element is representative of the whole list. */
+	int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type &&
+				  Py_SIZE(lo.keys[0]) > 0);
+
+	PyTypeObject* key_type = (keys_are_in_tuples ?
+				  PyTuple_GET_ITEM(lo.keys[0],0)->ob_type :
+				  lo.keys[0]->ob_type);
+
+	int keys_are_all_same_type = 1;
+	int strings_are_latin = 1;
+	int ints_are_bounded = 1;
+
+	/* Prove that assumption by checking every key. */
+	int i;
+	for (i=0; i< saved_ob_size; i++) {
+
+	    if (keys_are_in_tuples &&
+		(lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) {
+		keys_are_in_tuples = 0;
+		keys_are_all_same_type = 0;
+		break;
+	    }
+
+	    /* Note: for lists of tuples, key is the first element of the tuple
+             * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity 
+             * for lists of tuples in the if-statement directly above. */
+	    PyObject* key = (keys_are_in_tuples ?
+			     PyTuple_GET_ITEM(lo.keys[i],0) :
+			     lo.keys[i]);
+
+	    if (key->ob_type != key_type) {
+		keys_are_all_same_type = 0;
+		break;
+	    }
+
+	    else if (key_type == &PyLong_Type && ints_are_bounded &&
+		     Py_ABS(Py_SIZE(key)) > 1)
+		ints_are_bounded = 0;
+
+	    else if (key_type == &PyUnicode_Type && strings_are_latin &&
+		     PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND)
+		strings_are_latin = 0;
+	}
+
+	/* Choose the best compare, given what we now know about the keys. */
+	if (keys_are_all_same_type) {
+
+	    if (key_type == &PyUnicode_Type && strings_are_latin)
+		compare_funcs.key_compare = unsafe_latin_compare;
+
+	    else if (key_type == &PyLong_Type && ints_are_bounded)
+		compare_funcs.key_compare = unsafe_long_compare;
+
+	    else if (key_type == &PyFloat_Type)
+		compare_funcs.key_compare = unsafe_float_compare;
+
+	    else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL)
+		compare_funcs.key_compare = unsafe_object_compare;
+
+	} else {
+	    compare_funcs.key_compare = safe_object_compare;
+	}
+
+	if (keys_are_in_tuples) {
+	    /* Make sure we're not dealing with tuples of tuples
+             * (remember: here, key_type refers list [key[0] for key in keys]) */
+	    if (key_type == &PyTuple_Type)
+		compare_funcs.tuple_elem_compare = safe_object_compare;
+	    else
+		compare_funcs.tuple_elem_compare = compare_funcs.key_compare;
+
+	    compare_funcs.key_compare = unsafe_tuple_compare;
+	}
+    }
+    /* End of pre-sort check: compare_funcs is now set properly! */
+
     merge_init(&ms, saved_ob_size, keys != NULL);
 
     nremaining = saved_ob_size;
@@ -1993,7 +2269,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         Py_ssize_t n;
 
         /* Identify next run. */
-        n = count_run(lo.keys, lo.keys + nremaining, &descending);
+        n = count_run(lo.keys, lo.keys + nremaining, &descending, compare_funcs);
         if (n < 0)
             goto fail;
         if (descending)
@@ -2002,7 +2278,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         if (n < minrun) {
             const Py_ssize_t force = nremaining <= minrun ?
                               nremaining : minrun;
-            if (binarysort(lo, lo.keys + force, lo.keys + n) < 0)
+            if (binarysort(lo, lo.keys + force, lo.keys + n, compare_funcs) < 0)
                 goto fail;
             n = force;
         }
@@ -2011,14 +2287,14 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         ms.pending[ms.n].base = lo;
         ms.pending[ms.n].len = n;
         ++ms.n;
-        if (merge_collapse(&ms) < 0)
+        if (merge_collapse(&ms, compare_funcs) < 0)
             goto fail;
         /* Advance to find next run. */
         sortslice_advance(&lo, n);
         nremaining -= n;
     } while (nremaining);
 
-    if (merge_force_collapse(&ms) < 0)
+    if (merge_force_collapse(&ms, compare_funcs) < 0)
         goto fail;
     assert(ms.n == 1);
     assert(keys == NULL
@@ -2080,6 +2356,7 @@ listsort(PyListObject *self, PyObject *args, PyObject *kwds)
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$Oi:sort",
         kwlist, &keyfunc, &reverse))
         return NULL;
+
     return listsort_impl(self, keyfunc, reverse);
 }
 

From 2ce5e5e7a451ad04f9a75912946307b2f9dea81c Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 20:46:44 -0700
Subject: [PATCH 02/47] Removed trailing whitespace from listsort_impl

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index f2da0d252f3de6..2bc9694198cfb7 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2101,7 +2101,7 @@ reverse_sortslice(sortslice *s, Py_ssize_t n)
  */
 static PyObject *
 listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
-{   
+{
     MergeState ms;
     Py_ssize_t nremaining;
     Py_ssize_t minrun;

From 7d2f44a02e62efd07ca98f713df41ef6d8ae4471 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 20:50:30 -0700
Subject: [PATCH 03/47] fixed typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 2bc9694198cfb7..b9363f4f4b1c7a 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2235,7 +2235,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
 
 	if (keys_are_in_tuples) {
 	    /* Make sure we're not dealing with tuples of tuples
-             * (remember: here, key_type refers list [key[0] for key in keys]) */
+	     * (remember: here, key_type refers list [key[0] for key in keys]) */
 	    if (key_type == &PyTuple_Type)
 		compare_funcs.tuple_elem_compare = safe_object_compare;
 	    else

From d752fc790f682fc1e4f85415b773036d357dc466 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 20:54:55 -0700
Subject: [PATCH 04/47] Added myself to Misc/ACKS

---
 Misc/ACKS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Misc/ACKS b/Misc/ACKS
index b7f1282c69c0ce..9c77c620806900 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -539,6 +539,7 @@ Tiago Gonçalves
 Chris Gonnerman
 Shelley Gooch
 David Goodger
+Elliot Gorokhovsky
 Hans de Graaff
 Tim Graham
 Kim Gräsman

From e19728ed3919e5ef52b5f77808be5e92473abee3 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 21:02:00 -0700
Subject: [PATCH 05/47] Made ISLT comment more in line with the current text

---
 Objects/listobject.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index b9363f4f4b1c7a..0181c00fc1047c 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1031,7 +1031,10 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
         slice->values += n;
 }
 
-/* Macros for comparing keys: */
+/* Comparison function: compare_funcs.key_compare, which is set at run-time in 
+ * listsort_impl to optimize for various special cases.
+ * Returns -1 on error, 1 if x < y, 0 if x >= y.
+ */
 #define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs))
 
 /* Compare X to Y via "<".  Goto "fail" if the comparison raises an

From 7e74c27fc208f77fd855e16ffab5d23f10ab34f8 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 21:02:46 -0700
Subject: [PATCH 06/47] Remove newline

---
 Objects/listobject.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 0181c00fc1047c..1030aa61124e10 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1035,6 +1035,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
  * listsort_impl to optimize for various special cases.
  * Returns -1 on error, 1 if x < y, 0 if x >= y.
  */
+
 #define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs))
 
 /* Compare X to Y via "<".  Goto "fail" if the comparison raises an

From 9c566b1c49d148efad927a01e7089704cdab480c Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 21:06:04 -0700
Subject: [PATCH 07/47] untabify

---
 Objects/listobject.c | 140 +++++++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 1030aa61124e10..3177879b1ae271 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1089,8 +1089,8 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
   /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
   #ifdef Py_DEBUG
     assert(v->ob_type == w->ob_type &&
-	   v->ob_type->tp_richcompare != NULL &&
-	   v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
+           v->ob_type->tp_richcompare != NULL &&
+           v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
   #endif
 
     PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
@@ -1197,8 +1197,8 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
 
     /* Is v[0] < w[0]? */
     int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0],
-						wt->ob_item[0],
-						compare_funcs);
+                                                wt->ob_item[0],
+                                                compare_funcs);
     if (k < 0)
         return -1;
     if (k)
@@ -1213,8 +1213,8 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
 
     /* Well, is w[0] < v[0]? */
     k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0],
-					    vt->ob_item[0],
-					    compare_funcs);
+                                            vt->ob_item[0],
+                                            compare_funcs);
     if (k < 0)
         return -1;
     if (k)
@@ -1381,7 +1381,7 @@ Returns -1 on error.  See listsort.txt for info on the method.
 */
 static Py_ssize_t
 gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
-	    CompareFuncs compare_funcs)
+            CompareFuncs compare_funcs)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1473,7 +1473,7 @@ written as one routine with yet another "left or right?" flag.
 */
 static Py_ssize_t
 gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
-	     CompareFuncs compare_funcs)
+             CompareFuncs compare_funcs)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -2037,8 +2037,8 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
                 return -1;
         }
         else if (p[n].len <= p[n+1].len) {
-	    if (merge_at(ms, n, compare_funcs) < 0)
-	        return -1;
+            if (merge_at(ms, n, compare_funcs) < 0)
+                return -1;
         }
         else
             break;
@@ -2174,79 +2174,79 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
      * set compare_funcs appropriately. */
     CompareFuncs compare_funcs;
     if (saved_ob_size > 1) {
-	/* Assume the first element is representative of the whole list. */
-	int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type &&
-				  Py_SIZE(lo.keys[0]) > 0);
-
-	PyTypeObject* key_type = (keys_are_in_tuples ?
-				  PyTuple_GET_ITEM(lo.keys[0],0)->ob_type :
-				  lo.keys[0]->ob_type);
-
-	int keys_are_all_same_type = 1;
-	int strings_are_latin = 1;
-	int ints_are_bounded = 1;
-
-	/* Prove that assumption by checking every key. */
-	int i;
-	for (i=0; i< saved_ob_size; i++) {
-
-	    if (keys_are_in_tuples &&
-		(lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) {
-		keys_are_in_tuples = 0;
-		keys_are_all_same_type = 0;
-		break;
-	    }
-
-	    /* Note: for lists of tuples, key is the first element of the tuple
+        /* Assume the first element is representative of the whole list. */
+        int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type &&
+                                  Py_SIZE(lo.keys[0]) > 0);
+
+        PyTypeObject* key_type = (keys_are_in_tuples ?
+                                  PyTuple_GET_ITEM(lo.keys[0],0)->ob_type :
+                                  lo.keys[0]->ob_type);
+
+        int keys_are_all_same_type = 1;
+        int strings_are_latin = 1;
+        int ints_are_bounded = 1;
+
+        /* Prove that assumption by checking every key. */
+        int i;
+        for (i=0; i< saved_ob_size; i++) {
+
+            if (keys_are_in_tuples &&
+                (lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) {
+                keys_are_in_tuples = 0;
+                keys_are_all_same_type = 0;
+                break;
+            }
+
+            /* Note: for lists of tuples, key is the first element of the tuple
              * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity 
              * for lists of tuples in the if-statement directly above. */
-	    PyObject* key = (keys_are_in_tuples ?
-			     PyTuple_GET_ITEM(lo.keys[i],0) :
-			     lo.keys[i]);
+            PyObject* key = (keys_are_in_tuples ?
+                             PyTuple_GET_ITEM(lo.keys[i],0) :
+                             lo.keys[i]);
 
-	    if (key->ob_type != key_type) {
-		keys_are_all_same_type = 0;
-		break;
-	    }
+            if (key->ob_type != key_type) {
+                keys_are_all_same_type = 0;
+                break;
+            }
 
-	    else if (key_type == &PyLong_Type && ints_are_bounded &&
-		     Py_ABS(Py_SIZE(key)) > 1)
-		ints_are_bounded = 0;
+            else if (key_type == &PyLong_Type && ints_are_bounded &&
+                     Py_ABS(Py_SIZE(key)) > 1)
+                ints_are_bounded = 0;
 
-	    else if (key_type == &PyUnicode_Type && strings_are_latin &&
-		     PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND)
-		strings_are_latin = 0;
-	}
+            else if (key_type == &PyUnicode_Type && strings_are_latin &&
+                     PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND)
+                strings_are_latin = 0;
+        }
 
-	/* Choose the best compare, given what we now know about the keys. */
-	if (keys_are_all_same_type) {
+        /* Choose the best compare, given what we now know about the keys. */
+        if (keys_are_all_same_type) {
 
-	    if (key_type == &PyUnicode_Type && strings_are_latin)
-		compare_funcs.key_compare = unsafe_latin_compare;
+            if (key_type == &PyUnicode_Type && strings_are_latin)
+                compare_funcs.key_compare = unsafe_latin_compare;
 
-	    else if (key_type == &PyLong_Type && ints_are_bounded)
-		compare_funcs.key_compare = unsafe_long_compare;
+            else if (key_type == &PyLong_Type && ints_are_bounded)
+                compare_funcs.key_compare = unsafe_long_compare;
 
-	    else if (key_type == &PyFloat_Type)
-		compare_funcs.key_compare = unsafe_float_compare;
+            else if (key_type == &PyFloat_Type)
+                compare_funcs.key_compare = unsafe_float_compare;
 
-	    else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL)
-		compare_funcs.key_compare = unsafe_object_compare;
+            else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL)
+                compare_funcs.key_compare = unsafe_object_compare;
 
-	} else {
-	    compare_funcs.key_compare = safe_object_compare;
-	}
+        } else {
+            compare_funcs.key_compare = safe_object_compare;
+        }
 
-	if (keys_are_in_tuples) {
-	    /* Make sure we're not dealing with tuples of tuples
-	     * (remember: here, key_type refers list [key[0] for key in keys]) */
-	    if (key_type == &PyTuple_Type)
-		compare_funcs.tuple_elem_compare = safe_object_compare;
-	    else
-		compare_funcs.tuple_elem_compare = compare_funcs.key_compare;
+        if (keys_are_in_tuples) {
+            /* Make sure we're not dealing with tuples of tuples
+             * (remember: here, key_type refers list [key[0] for key in keys]) */
+            if (key_type == &PyTuple_Type)
+                compare_funcs.tuple_elem_compare = safe_object_compare;
+            else
+                compare_funcs.tuple_elem_compare = compare_funcs.key_compare;
 
-	    compare_funcs.key_compare = unsafe_tuple_compare;
-	}
+            compare_funcs.key_compare = unsafe_tuple_compare;
+        }
     }
     /* End of pre-sort check: compare_funcs is now set properly! */
 

From 8876e26f27f7b61161135416ec56d56323e7d5b4 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Tue, 7 Mar 2017 21:10:58 -0700
Subject: [PATCH 08/47] removed newline

---
 Objects/listobject.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 3177879b1ae271..15e5d666484f69 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2360,7 +2360,6 @@ listsort(PyListObject *self, PyObject *args, PyObject *kwds)
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$Oi:sort",
         kwlist, &keyfunc, &reverse))
         return NULL;
-
     return listsort_impl(self, keyfunc, reverse);
 }
 

From 8accd71f36838887fdcb93944547b98196d915a4 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Wed, 8 Mar 2017 22:07:45 -0700
Subject: [PATCH 09/47] simplified description of the tuple compare

---
 Objects/listobject.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 15e5d666484f69..c2b6d8125c33ac 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1165,18 +1165,11 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
 
-/* Tuple compare: compare any two non-empty tuples.
- * This is the most complicated special case: since the tuple elements themselves
- * must of course be compared, we can optimize on two levels. Namely, we make
- * the same homogeneity assumptions about the first elements of the tuples in
- * our list as we do about the list elements themselves. We then replace the call to 
- * PyObject_RichCompareBool within the tuple comparison with special case compare, 
- * based on which assumptions the first elements of the tuples satisfy.
- *
- * Note that we must therefore ensure assumptions in both unsafe_tuple_compare and
- * compare_funcs.tuple_elem_compare are satisfied. If the first elements are not all 
- * homogeneous, we can always set 
- * compare_funcs.tuple_elem_compare = safe_object_compare. */
+/* Tuple compare: compare any two non-empty tuples, using 
+ * compare_funcs.tuple_elem_compare to compare the first elements, which is set 
+ * using the same pre-sort check as we use for compare_funcs.key_compare,
+ * but run on the list [x[0] for x in L]. This allows us to optimize compares
+ * on two levels as long as [x[0] for x in L] is type-homogeneous. */
 static int
 unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
 {

From 1567801c7ce6feef7dc957c44c5530d503d3fe61 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Wed, 8 Mar 2017 22:08:34 -0700
Subject: [PATCH 10/47] grammar

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index c2b6d8125c33ac..a08f7bca3ad397 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1169,7 +1169,7 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
  * compare_funcs.tuple_elem_compare to compare the first elements, which is set 
  * using the same pre-sort check as we use for compare_funcs.key_compare,
  * but run on the list [x[0] for x in L]. This allows us to optimize compares
- * on two levels as long as [x[0] for x in L] is type-homogeneous. */
+ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
 static int
 unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
 {

From 3820cdba0d2228ab79435c84f282b02c472746d3 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Thu, 9 Mar 2017 08:32:34 -0700
Subject: [PATCH 11/47] Bugfix -- gcc ignored the error, but clang caught it!

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index a08f7bca3ad397..e56a75b9650d5d 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1133,7 +1133,7 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
   /* Modified from Objects/longobject.c:long_compare, assuming: */
   #ifdef Py_DEBUG
     assert(v->ob_type == w->ob_type &&
-           v->ob_type == PyLong_Type &&
+           v->ob_type == &PyLong_Type &&
            Py_ABS(Py_SIZE(v)) == Py_ABS(Py_SIZE(w)) &&
            Py_ABS(Py_SIZE(v)) <= 1);
   #endif

From 201a4681fb8bfe595a2efa1a0c24bea3dff6f8f5 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Thu, 9 Mar 2017 08:40:41 -0700
Subject: [PATCH 12/47] Bugfix -- assertion in unsafe_long_compare was phrased
 incorrectly.

---
 Objects/listobject.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index e56a75b9650d5d..0d0aa887855a1c 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1134,8 +1134,8 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
   #ifdef Py_DEBUG
     assert(v->ob_type == w->ob_type &&
            v->ob_type == &PyLong_Type &&
-           Py_ABS(Py_SIZE(v)) == Py_ABS(Py_SIZE(w)) &&
-           Py_ABS(Py_SIZE(v)) <= 1);
+           Py_ABS(Py_SIZE(v)) <= 1 &&
+           Py_ABS(Py_SIZE(w)) <= 1);
   #endif
 
     PyLongObject *vl, *wl;

From c2a9df260cedee8d98635b8188dad4c4db9681f9 Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Thu, 9 Mar 2017 14:00:21 -0700
Subject: [PATCH 13/47] fix typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 0d0aa887855a1c..f479e48c9818b1 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1046,7 +1046,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
            if (k)
 
 /* Here we define custom comparison functions to optimize for the cases one commonly 
- * in practice: homogeneous lists, often of one of the basic types. */
+ * encounters in practice: homogeneous lists, often of one of the basic types. */
 
 /* This struct holds the comparison function and helper functions 
  * selected in the pre-sort check. */

From 37b15b80fa7d3ebc29947d480bf6cf2b64cfb673 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sat, 11 Mar 2017 10:21:49 -0700
Subject: [PATCH 14/47] Added if (v == w) return 1; to all compares

---
 Objects/listobject.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 0d0aa887855a1c..0e03fbbf31c176 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1092,7 +1092,8 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
            v->ob_type->tp_richcompare != NULL &&
            v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
   #endif
-
+    if (v == w) return 1;
+    
     PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
     if (res == NULL)
         return -1;
@@ -1117,7 +1118,8 @@ unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){
            PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
            PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
   #endif
-
+    if (v == w) return 1;
+    
     int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
 
@@ -1137,7 +1139,8 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
            Py_ABS(Py_SIZE(v)) <= 1 &&
            Py_ABS(Py_SIZE(w)) <= 1);
   #endif
-
+    if (v == w) return 1;
+    
     PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;
     wl = (PyLongObject*)w;
@@ -1161,7 +1164,8 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
     assert(v->ob_type == w->ob_type &&
            v->ob_type == &PyFloat_Type);
   #endif
-
+    if (v == w) return 1;
+    
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
 
@@ -1180,7 +1184,8 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
            Py_SIZE(v) > 0 &&
            Py_SIZE(w) > 0);
   #endif
-
+    if (v == w) return 1;
+    
     PyTupleObject *vt, *wt;
     Py_ssize_t i;
     Py_ssize_t vlen, wlen;
@@ -1215,7 +1220,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
 
     /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
      * We can use code copied straight from tupleobject.c:tuplerichcompare: */
-    for (i = 0; i < vlen && i < wlen; i++) {
+    for (i = 1; i < vlen && i < wlen; i++) {
         k = PyObject_RichCompareBool(vt->ob_item[i],
                                      wt->ob_item[i],
                                      Py_EQ);

From ed9b21f5b3ef10fe601142d0b5bbb389de3101cd Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sat, 11 Mar 2017 10:28:32 -0700
Subject: [PATCH 15/47] Added if (v == w) return 0; to all compares, apologies
 for previous commit

---
 Objects/listobject.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index f36c3fe8bdfe8a..25536acafa757e 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1092,7 +1092,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
            v->ob_type->tp_richcompare != NULL &&
            v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
     if (res == NULL)
@@ -1118,7 +1118,7 @@ unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){
            PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
            PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
@@ -1139,7 +1139,7 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
            Py_ABS(Py_SIZE(v)) <= 1 &&
            Py_ABS(Py_SIZE(w)) <= 1);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;
@@ -1164,7 +1164,7 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
     assert(v->ob_type == w->ob_type &&
            v->ob_type == &PyFloat_Type);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
@@ -1184,7 +1184,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
            Py_SIZE(v) > 0 &&
            Py_SIZE(w) > 0);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     PyTupleObject *vt, *wt;
     Py_ssize_t i;

From acf4c9dedf16feb0fb34be1fc148d09e2eb30bc7 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:11:04 -0600
Subject: [PATCH 16/47] Folded CompareFuncs into MergeState and added safety
 check to unsafe_object_compare

---
 Objects/listobject.c | 542 +++++++++++++++++++++----------------------
 1 file changed, 267 insertions(+), 275 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 25536acafa757e..dace48312e3c74 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1031,12 +1031,12 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
         slice->values += n;
 }
 
-/* Comparison function: compare_funcs.key_compare, which is set at run-time in 
+/* Comparison function: ms->key_compare, which is set at run-time in 
  * listsort_impl to optimize for various special cases.
  * Returns -1 on error, 1 if x < y, 0 if x >= y.
  */
 
-#define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs))
+#define ISLT(X, Y) (*(ms->key_compare))(X, Y, ms)
 
 /* Compare X to Y via "<".  Goto "fail" if the comparison raises an
    error.  Else "k" is set to true iff X<Y, and an "if (k)" block is
@@ -1045,197 +1045,74 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
 #define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail;  \
            if (k)
 
-/* Here we define custom comparison functions to optimize for the cases one commonly 
- * encounters in practice: homogeneous lists, often of one of the basic types. */
-
-/* This struct holds the comparison function and helper functions 
- * selected in the pre-sort check. */
-typedef struct CompareFuncs CompareFuncs;
-struct CompareFuncs {
-  /* This is the function we will use to compare two keys,
-   * even when none of our special cases apply and we have to use
-   * safe_object_compare. */
-  int (*key_compare)(PyObject*, PyObject*, CompareFuncs);
-
-  /* This function is used by unsafe_object_compare to optimize comparisons
-   * when we know our list is type-homogeneous but we can't assume anything else.
-   * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */
-  PyObject* (*key_richcompare)(PyObject*, PyObject*, int);
-
-  /* This function is used by unsafe_tuple_compare to compare the first elements
-   * of tuples. It may be set to safe_object_compare, but the idea is that hopefully 
-   * we can assume more, and use one of the special-case compares. */
-  int (*tuple_elem_compare)(PyObject*, PyObject*, CompareFuncs);
-};
-
-/* These are the special case compare functions.
- * compare_funcs.key_compare will always point to one of these: */
-
-/* Heterogeneous compare: default, always safe to fall back on. */
-static int
-safe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
-{
-  /* No assumptions necessary! */
-    return PyObject_RichCompareBool(v, w, Py_LT);
-}
-
-/* Homogeneous compare: safe for any two compareable objects of the same type.
- * (compare_funcs.key_richcompare is set to ob_type->tp_richcompare in the
- *  pre-sort check.)
+/* The maximum number of entries in a MergeState's pending-runs stack.
+ * This is enough to sort arrays of size up to about
+ *     32 * phi ** MAX_MERGE_PENDING
+ * where phi ~= 1.618.  85 is ridiculouslylarge enough, good for an array
+ * with 2**64 elements.
  */
-static int
-unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
-{
-  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type->tp_richcompare != NULL &&
-           v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
-  #endif
-    if (v == w) return 0;
-    
-    PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
-    if (res == NULL)
-        return -1;
-    int ok;
-    if (PyBool_Check(res)){
-        ok = (res == Py_True);
-    }
-    else {
-        ok = PyObject_IsTrue(res);
-    }
-    Py_DECREF(res);
-    return ok;
-}
-
-/* Latin string compare: safe for any two latin (one byte per char) strings. */
-static int
-unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){
-  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyUnicode_Type &&
-           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
-           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-  #endif
-    if (v == w) return 0;
-    
-    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
-    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
-
-    return (res != 0 ?
-            res < 0 :
-            PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
-}
-
-/* Bounded int compare: compare any two longs that fit in a single machine word. */
-static int
-unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
-{
-  /* Modified from Objects/longobject.c:long_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyLong_Type &&
-           Py_ABS(Py_SIZE(v)) <= 1 &&
-           Py_ABS(Py_SIZE(w)) <= 1);
-  #endif
-    if (v == w) return 0;
-    
-    PyLongObject *vl, *wl;
-    vl = (PyLongObject*)v;
-    wl = (PyLongObject*)w;
-
-    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
-    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
-
-    if (Py_SIZE(vl) < 0)
-        v0 = -v0;
-    if (Py_SIZE(wl) < 0)
-        w0 = -w0;
-
-    return v0 < w0;
-}
+#define MAX_MERGE_PENDING 85
 
-/* Float compare: compare any two floats. */
-static int
-unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
-  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyFloat_Type);
-  #endif
-    if (v == w) return 0;
-    
-    return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
-}
+/* When we get into galloping mode, we stay there until both runs win less
+ * often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
+ */
+#define MIN_GALLOP 7
 
-/* Tuple compare: compare any two non-empty tuples, using 
- * compare_funcs.tuple_elem_compare to compare the first elements, which is set 
- * using the same pre-sort check as we use for compare_funcs.key_compare,
- * but run on the list [x[0] for x in L]. This allows us to optimize compares
- * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
-static int
-unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
-{
-  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyTuple_Type &&
-           Py_SIZE(v) > 0 &&
-           Py_SIZE(w) > 0);
-  #endif
-    if (v == w) return 0;
-    
-    PyTupleObject *vt, *wt;
-    Py_ssize_t i;
-    Py_ssize_t vlen, wlen;
+/* Avoid malloc for small temp arrays. */
+#define MERGESTATE_TEMP_SIZE 256
 
-    vt = (PyTupleObject *)v;
-    wt = (PyTupleObject *)w;
+/* One MergeState exists on the stack per invocation of mergesort.  It's just
+ * a convenient way to pass state around among the helper functions.
+ */
+struct s_slice {
+    sortslice base;
+    Py_ssize_t len;
+};
 
-    /* Is v[0] < w[0]? */
-    int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0],
-                                                wt->ob_item[0],
-                                                compare_funcs);
-    if (k < 0)
-        return -1;
-    if (k)
-        return 1;
+typedef struct s_MergeState MergeState;
+struct s_MergeState {
+    /* This controls when we get *into* galloping mode.  It's initialized
+     * to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
+     * random data, and lower for highly structured data.
+     */
+    Py_ssize_t min_gallop;
 
-    vlen = Py_SIZE(vt);
-    wlen = Py_SIZE(wt);
+    /* 'a' is temp storage to help with merges.  It contains room for
+     * alloced entries.
+     */
+    sortslice a;        /* may point to temparray below */
+    Py_ssize_t alloced;
 
-    /* Well, are either of the tuples are singleton? */
-    if (vlen == 1 || wlen == 1)
-        return 0;
+    /* A stack of n pending runs yet to be merged.  Run #i starts at
+     * address base[i] and extends for len[i] elements.  It's always
+     * true (so long as the indices are in bounds) that
+     *
+     *     pending[i].base + pending[i].len == pending[i+1].base
+     *
+     * so we could cut the storage for this, but it's a minor amount,
+     * and keeping all the info explicit simplifies the code.
+     */
+    int n;
+    struct s_slice pending[MAX_MERGE_PENDING];
 
-    /* Well, is w[0] < v[0]? */
-    k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0],
-                                            vt->ob_item[0],
-                                            compare_funcs);
-    if (k < 0)
-        return -1;
-    if (k)
-        return 0;
+    /* 'a' points to this when possible, rather than muck with malloc. */
+    PyObject *temparray[MERGESTATE_TEMP_SIZE];
 
-    /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
-     * We can use code copied straight from tupleobject.c:tuplerichcompare: */
-    for (i = 1; i < vlen && i < wlen; i++) {
-        k = PyObject_RichCompareBool(vt->ob_item[i],
-                                     wt->ob_item[i],
-                                     Py_EQ);
-        if (k < 0)
-            return -1;
-        if (!k)
-            break;
-    }
+    /* This is the function we will use to compare two keys,
+     * even when none of our special cases apply and we have to use
+     * safe_object_compare. */
+    int (*key_compare)(PyObject*, PyObject*, MergeState*);
 
-    if (i >= vlen || i >= wlen) {
-        return vlen <  wlen;
-    }
+    /* This function is used by unsafe_object_compare to optimize comparisons
+     * when we know our list is type-homogeneous but we can't assume anything else.
+     * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */
+    PyObject* (*key_richcompare)(PyObject*, PyObject*, int);
 
-    return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
-}
+    /* This function is used by unsafe_tuple_compare to compare the first elements
+     * of tuples. It may be set to safe_object_compare, but the idea is that hopefully
+     * we can assume more, and use one of the special-case compares. */
+    int (*tuple_elem_compare)(PyObject*, PyObject*, MergeState*);
+};
 
 /* binarysort is the best method for sorting small arrays: it does
    few compares, but can do data movement quadratic in the number of
@@ -1249,7 +1126,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
    the input (nothing is lost or duplicated).
 */
 static int
-binarysort(sortslice lo, PyObject **hi, PyObject **start, CompareFuncs compare_funcs)
+binarysort(MergeState* ms, sortslice lo, PyObject **hi, PyObject **start)
 {
     Py_ssize_t k;
     PyObject **l, **p, **r;
@@ -1323,7 +1200,7 @@ elements to get out of order).
 Returns -1 in case of error.
 */
 static Py_ssize_t
-count_run(PyObject **lo, PyObject **hi, int *descending, CompareFuncs compare_funcs)
+count_run(MergeState* ms, PyObject **lo, PyObject **hi, int *descending)
 {
     Py_ssize_t k;
     Py_ssize_t n;
@@ -1378,8 +1255,7 @@ key, and the last n-k should follow key.
 Returns -1 on error.  See listsort.txt for info on the method.
 */
 static Py_ssize_t
-gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
-            CompareFuncs compare_funcs)
+gallop_left(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1470,8 +1346,7 @@ we're sticking to "<" comparisons that it's much harder to follow if
 written as one routine with yet another "left or right?" flag.
 */
 static Py_ssize_t
-gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
-             CompareFuncs compare_funcs)
+gallop_right(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1547,59 +1422,6 @@ gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
     return -1;
 }
 
-/* The maximum number of entries in a MergeState's pending-runs stack.
- * This is enough to sort arrays of size up to about
- *     32 * phi ** MAX_MERGE_PENDING
- * where phi ~= 1.618.  85 is ridiculouslylarge enough, good for an array
- * with 2**64 elements.
- */
-#define MAX_MERGE_PENDING 85
-
-/* When we get into galloping mode, we stay there until both runs win less
- * often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
- */
-#define MIN_GALLOP 7
-
-/* Avoid malloc for small temp arrays. */
-#define MERGESTATE_TEMP_SIZE 256
-
-/* One MergeState exists on the stack per invocation of mergesort.  It's just
- * a convenient way to pass state around among the helper functions.
- */
-struct s_slice {
-    sortslice base;
-    Py_ssize_t len;
-};
-
-typedef struct s_MergeState {
-    /* This controls when we get *into* galloping mode.  It's initialized
-     * to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
-     * random data, and lower for highly structured data.
-     */
-    Py_ssize_t min_gallop;
-
-    /* 'a' is temp storage to help with merges.  It contains room for
-     * alloced entries.
-     */
-    sortslice a;        /* may point to temparray below */
-    Py_ssize_t alloced;
-
-    /* A stack of n pending runs yet to be merged.  Run #i starts at
-     * address base[i] and extends for len[i] elements.  It's always
-     * true (so long as the indices are in bounds) that
-     *
-     *     pending[i].base + pending[i].len == pending[i+1].base
-     *
-     * so we could cut the storage for this, but it's a minor amount,
-     * and keeping all the info explicit simplifies the code.
-     */
-    int n;
-    struct s_slice pending[MAX_MERGE_PENDING];
-
-    /* 'a' points to this when possible, rather than muck with malloc. */
-    PyObject *temparray[MERGESTATE_TEMP_SIZE];
-} MergeState;
-
 /* Conceptually a MergeState's constructor. */
 static void
 merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc)
@@ -1684,8 +1506,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
-         sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs)
+merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1752,7 +1573,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
             assert(na > 1 && nb > 0);
             min_gallop -= min_gallop > 1;
             ms->min_gallop = min_gallop;
-            k = gallop_right(ssb.keys[0], ssa.keys, na, 0, compare_funcs);
+            k = gallop_right(ms, ssb.keys[0], ssa.keys, na, 0);
             acount = k;
             if (k) {
                 if (k < 0)
@@ -1775,7 +1596,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
             if (nb == 0)
                 goto Succeed;
 
-            k = gallop_left(ssa.keys[0], ssb.keys, nb, 0, compare_funcs);
+            k = gallop_left(ms, ssa.keys[0], ssb.keys, nb, 0);
             bcount = k;
             if (k) {
                 if (k < 0)
@@ -1816,8 +1637,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
-         sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs)
+merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;
@@ -1890,7 +1710,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
             assert(na > 0 && nb > 1);
             min_gallop -= min_gallop > 1;
             ms->min_gallop = min_gallop;
-            k = gallop_right(ssb.keys[0], basea.keys, na, na-1, compare_funcs);
+            k = gallop_right(ms, ssb.keys[0], basea.keys, na, na-1);
             if (k < 0)
                 goto Fail;
             k = na - k;
@@ -1908,7 +1728,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
             if (nb == 1)
                 goto CopyA;
 
-            k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1, compare_funcs);
+            k = gallop_left(ms, ssa.keys[0], baseb.keys, nb, nb-1);
             if (k < 0)
                 goto Fail;
             k = nb - k;
@@ -1955,7 +1775,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
  * Returns 0 on success, -1 on error.
  */
 static Py_ssize_t
-merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
+merge_at(MergeState *ms, Py_ssize_t i)
 {
     sortslice ssa, ssb;
     Py_ssize_t na, nb;
@@ -1985,7 +1805,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
     /* Where does b start in a?  Elements in a before that can be
      * ignored (already in place).
      */
-    k = gallop_right(*ssb.keys, ssa.keys, na, 0, compare_funcs);
+    k = gallop_right(ms, *ssb.keys, ssa.keys, na, 0);
     if (k < 0)
         return -1;
     sortslice_advance(&ssa, k);
@@ -1996,7 +1816,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
     /* Where does a end in b?  Elements in b after that can be
      * ignored (already in place).
      */
-    nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1, compare_funcs);
+    nb = gallop_left(ms, ssa.keys[na-1], ssb.keys, nb, nb-1);
     if (nb <= 0)
         return nb;
 
@@ -2004,9 +1824,9 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
      * min(na, nb) elements.
      */
     if (na <= nb)
-        return merge_lo(ms, ssa, na, ssb, nb, compare_funcs);
+        return merge_lo(ms, ssa, na, ssb, nb);
     else
-        return merge_hi(ms, ssa, na, ssb, nb, compare_funcs);
+        return merge_hi(ms, ssa, na, ssb, nb);
 }
 
 /* Examine the stack of runs waiting to be merged, merging adjacent runs
@@ -2020,7 +1840,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
  * Returns 0 on success, -1 on error.
  */
 static int
-merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
+merge_collapse(MergeState *ms)
 {
     struct s_slice *p = ms->pending;
 
@@ -2031,11 +1851,11 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
             (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) {
             if (p[n-1].len < p[n+1].len)
                 --n;
-            if (merge_at(ms, n, compare_funcs) < 0)
+            if (merge_at(ms, n) < 0)
                 return -1;
         }
         else if (p[n].len <= p[n+1].len) {
-            if (merge_at(ms, n, compare_funcs) < 0)
+            if (merge_at(ms, n) < 0)
                 return -1;
         }
         else
@@ -2050,7 +1870,7 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
  * Returns 0 on success, -1 on error.
  */
 static int
-merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs)
+merge_force_collapse(MergeState *ms)
 {
     struct s_slice *p = ms->pending;
 
@@ -2059,7 +1879,7 @@ merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs)
         Py_ssize_t n = ms->n - 2;
         if (n > 0 && p[n-1].len < p[n+1].len)
             --n;
-        if (merge_at(ms, n, compare_funcs) < 0)
+        if (merge_at(ms, n) < 0)
             return -1;
     }
     return 0;
@@ -2096,6 +1916,179 @@ reverse_sortslice(sortslice *s, Py_ssize_t n)
         reverse_slice(s->values, &s->values[n]);
 }
 
+/* Here we define custom comparison functions to optimize for the cases one commonly 
+ * encounters in practice: homogeneous lists, often of one of the basic types. */
+
+/* This struct holds the comparison function and helper functions 
+ * selected in the pre-sort check. */
+
+/* These are the special case compare functions.
+ * ms->key_compare will always point to one of these: */
+
+/* Heterogeneous compare: default, always safe to fall back on. */
+static int
+safe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
+{
+  /* No assumptions necessary! */
+    return PyObject_RichCompareBool(v, w, Py_LT);
+}
+
+/* Homogeneous compare: safe for any two compareable objects of the same type.
+ * (ms->key_richcompare is set to ob_type->tp_richcompare in the
+ *  pre-sort check.)
+ */
+static int
+unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
+{
+  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type->tp_richcompare != NULL);
+  #endif
+    if (v == w) return 0;
+    
+    if (v->ob_type->tp_richcompare != ms->key_richcompare)
+	return PyObject_RichCompareBool(v, w, Py_LT);
+    
+    PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
+    if (res == NULL)
+        return -1;
+    int ok;
+    if (PyBool_Check(res)){
+        ok = (res == Py_True);
+    }
+    else {
+        ok = PyObject_IsTrue(res);
+    }
+    Py_DECREF(res);
+    return ok;
+}
+
+/* Latin string compare: safe for any two latin (one byte per char) strings. */
+static int
+unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
+  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyUnicode_Type &&
+           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
+           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+  #endif
+    if (v == w) return 0;
+    
+    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
+    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
+
+    return (res != 0 ?
+            res < 0 :
+            PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
+}
+
+/* Bounded int compare: compare any two longs that fit in a single machine word. */
+static int
+unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
+{
+  /* Modified from Objects/longobject.c:long_compare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyLong_Type &&
+           Py_ABS(Py_SIZE(v)) <= 1 &&
+           Py_ABS(Py_SIZE(w)) <= 1);
+  #endif
+    if (v == w) return 0;
+    
+    PyLongObject *vl, *wl;
+    vl = (PyLongObject*)v;
+    wl = (PyLongObject*)w;
+
+    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
+    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
+
+    if (Py_SIZE(vl) < 0)
+        v0 = -v0;
+    if (Py_SIZE(wl) < 0)
+        w0 = -w0;
+
+    return v0 < w0;
+}
+
+/* Float compare: compare any two floats. */
+static int
+unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
+  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyFloat_Type);
+  #endif
+    if (v == w) return 0;
+    
+    return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
+}
+
+/* Tuple compare: compare any two non-empty tuples, using 
+ * ms->tuple_elem_compare to compare the first elements, which is set 
+ * using the same pre-sort check as we use for ms->key_compare,
+ * but run on the list [x[0] for x in L]. This allows us to optimize compares
+ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
+static int
+unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
+{
+  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyTuple_Type &&
+           Py_SIZE(v) > 0 &&
+           Py_SIZE(w) > 0);
+  #endif
+    if (v == w) return 0;
+    
+    PyTupleObject *vt, *wt;
+    Py_ssize_t i;
+    Py_ssize_t vlen, wlen;
+
+    vt = (PyTupleObject *)v;
+    wt = (PyTupleObject *)w;
+
+    /* Is v[0] < w[0]? */
+    int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
+    if (k < 0)
+        return -1;
+    if (k)
+        return 1;
+
+    vlen = Py_SIZE(vt);
+    wlen = Py_SIZE(wt);
+
+    /* Well, are either of the tuples are singleton? */
+    if (vlen == 1 || wlen == 1)
+        return 0;
+
+    /* Well, is w[0] < v[0]? */
+    k = (*(ms->tuple_elem_compare))(wt->ob_item[0], vt->ob_item[0], ms);
+    if (k < 0)
+        return -1;
+    if (k)
+        return 0;
+
+    /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
+     * We can use code copied straight from tupleobject.c:tuplerichcompare: */
+    for (i = 1; i < vlen && i < wlen; i++) {
+        k = PyObject_RichCompareBool(vt->ob_item[i],
+                                     wt->ob_item[i],
+                                     Py_EQ);
+        if (k < 0)
+            return -1;
+        if (!k)
+            break;
+    }
+
+    if (i >= vlen || i >= wlen) {
+        return vlen <  wlen;
+    }
+
+    return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
+}
+
 /* An adaptive, stable, natural mergesort.  See listsort.txt.
  * Returns Py_None on success, NULL on error.  Even in case of error, the
  * list will be some permutation of its input state (nothing is lost or
@@ -2169,8 +2162,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
     /* The pre-sort check: here's where we decide which compare function to use.
      * How much optimization is safe? We test for homogeneity with respect to 
      * several properties that are expensive to check at compare-time, and 
-     * set compare_funcs appropriately. */
-    CompareFuncs compare_funcs;
+     * set ms appropriately. */
     if (saved_ob_size > 1) {
         /* Assume the first element is representative of the whole list. */
         int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type &&
@@ -2220,33 +2212,33 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         if (keys_are_all_same_type) {
 
             if (key_type == &PyUnicode_Type && strings_are_latin)
-                compare_funcs.key_compare = unsafe_latin_compare;
+                ms.key_compare = unsafe_latin_compare;
 
             else if (key_type == &PyLong_Type && ints_are_bounded)
-                compare_funcs.key_compare = unsafe_long_compare;
+                ms.key_compare = unsafe_long_compare;
 
             else if (key_type == &PyFloat_Type)
-                compare_funcs.key_compare = unsafe_float_compare;
+                ms.key_compare = unsafe_float_compare;
 
-            else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL)
-                compare_funcs.key_compare = unsafe_object_compare;
+            else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL)
+                ms.key_compare = unsafe_object_compare;
 
         } else {
-            compare_funcs.key_compare = safe_object_compare;
+            ms.key_compare = safe_object_compare;
         }
 
         if (keys_are_in_tuples) {
             /* Make sure we're not dealing with tuples of tuples
              * (remember: here, key_type refers list [key[0] for key in keys]) */
             if (key_type == &PyTuple_Type)
-                compare_funcs.tuple_elem_compare = safe_object_compare;
+                ms.tuple_elem_compare = safe_object_compare;
             else
-                compare_funcs.tuple_elem_compare = compare_funcs.key_compare;
+                ms.tuple_elem_compare = ms.key_compare;
 
-            compare_funcs.key_compare = unsafe_tuple_compare;
+            ms.key_compare = unsafe_tuple_compare;
         }
     }
-    /* End of pre-sort check: compare_funcs is now set properly! */
+    /* End of pre-sort check: ms is now set properly! */
 
     merge_init(&ms, saved_ob_size, keys != NULL);
 
@@ -2271,7 +2263,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         Py_ssize_t n;
 
         /* Identify next run. */
-        n = count_run(lo.keys, lo.keys + nremaining, &descending, compare_funcs);
+        n = count_run(&ms, lo.keys, lo.keys + nremaining, &descending);
         if (n < 0)
             goto fail;
         if (descending)
@@ -2280,7 +2272,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         if (n < minrun) {
             const Py_ssize_t force = nremaining <= minrun ?
                               nremaining : minrun;
-            if (binarysort(lo, lo.keys + force, lo.keys + n, compare_funcs) < 0)
+            if (binarysort(&ms, lo, lo.keys + force, lo.keys + n) < 0)
                 goto fail;
             n = force;
         }
@@ -2289,14 +2281,14 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         ms.pending[ms.n].base = lo;
         ms.pending[ms.n].len = n;
         ++ms.n;
-        if (merge_collapse(&ms, compare_funcs) < 0)
+        if (merge_collapse(&ms) < 0)
             goto fail;
         /* Advance to find next run. */
         sortslice_advance(&lo, n);
         nremaining -= n;
     } while (nremaining);
 
-    if (merge_force_collapse(&ms, compare_funcs) < 0)
+    if (merge_force_collapse(&ms) < 0)
         goto fail;
     assert(ms.n == 1);
     assert(keys == NULL

From 395bc7d0a3444f6238ed03d57984e308dda45715 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:12:44 -0600
Subject: [PATCH 17/47] formatting

---
 Objects/listobject.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index dace48312e3c74..53691692cceb66 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1506,7 +1506,8 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
+merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
+	 sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1637,7 +1638,8 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
+merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
+	 sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;

From e67758616b010a51c767d18b80f5fe9751f27611 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:14:21 -0600
Subject: [PATCH 18/47] formatting

---
 Objects/listobject.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 53691692cceb66..de2f5893c3863e 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1507,7 +1507,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  */
 static Py_ssize_t
 merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
-	 sortslice ssb, Py_ssize_t nb)
+         sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1639,7 +1639,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
  */
 static Py_ssize_t
 merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
-	 sortslice ssb, Py_ssize_t nb)
+         sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;
@@ -1950,7 +1950,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
     if (v == w) return 0;
     
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
-	return PyObject_RichCompareBool(v, w, Py_LT);
+        return PyObject_RichCompareBool(v, w, Py_LT);
     
     PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
     if (res == NULL)

From 6070c72def67df06602f2a788e3d2155dacbdc1b Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:25:08 -0600
Subject: [PATCH 19/47] don't need (v==w) for ints/strings

---
 Objects/listobject.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index de2f5893c3863e..ab9647d79433a9 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1976,7 +1976,6 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
            PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
            PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
   #endif
-    if (v == w) return 0;
     
     int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
@@ -1997,7 +1996,6 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
            Py_ABS(Py_SIZE(v)) <= 1 &&
            Py_ABS(Py_SIZE(w)) <= 1);
   #endif
-    if (v == w) return 0;
     
     PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;

From 294aa1c1496d297de6a8fe33f1d5203084567d7a Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:32:48 -0600
Subject: [PATCH 20/47] went back to i=0 for tuples; we can't infer == from <
 and >, even with v==w

---
 Objects/listobject.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index ab9647d79433a9..0997dd46c03d4b 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2072,7 +2072,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
 
     /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
      * We can use code copied straight from tupleobject.c:tuplerichcompare: */
-    for (i = 1; i < vlen && i < wlen; i++) {
+    for (i = 0; i < vlen && i < wlen; i++) {
         k = PyObject_RichCompareBool(vt->ob_item[i],
                                      wt->ob_item[i],
                                      Py_EQ);
@@ -2083,7 +2083,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     }
 
     if (i >= vlen || i >= wlen) {
-        return vlen <  wlen;
+        return vlen < wlen;
     }
 
     return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);

From ba05b2ac67bde2749f9ec8ae94280b99265d6ad7 Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 22:54:17 -0600
Subject: [PATCH 21/47] move all declarations to top of their blocks

---
 Objects/listobject.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 0997dd46c03d4b..4efcbcc9cb18f6 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1947,15 +1947,16 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
     assert(v->ob_type == w->ob_type &&
            v->ob_type->tp_richcompare != NULL);
   #endif
-    if (v == w) return 0;
+    int ok; 
     
+    if (v == w) return 0;
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
     PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
     if (res == NULL)
         return -1;
-    int ok;
+
     if (PyBool_Check(res)){
         ok = (res == Py_True);
     }
@@ -2040,7 +2041,6 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
            Py_SIZE(v) > 0 &&
            Py_SIZE(w) > 0);
   #endif
-    if (v == w) return 0;
     
     PyTupleObject *vt, *wt;
     Py_ssize_t i;
@@ -2048,6 +2048,10 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
+    
+    int ok;
+    
+    if (v == w) return 0;
 
     /* Is v[0] < w[0]? */
     int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);

From 40ba266d072d2dc5412cb998247308ee49e749c0 Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 22:57:30 -0600
Subject: [PATCH 22/47] typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 4efcbcc9cb18f6..006f5edaf0f668 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2049,7 +2049,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
     
-    int ok;
+    int k;
     
     if (v == w) return 0;
 

From a1759392153e6cbae88b5f85c493450d380f17be Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 22:58:46 -0600
Subject: [PATCH 23/47] typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 006f5edaf0f668..c656db68216882 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2054,7 +2054,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     if (v == w) return 0;
 
     /* Is v[0] < w[0]? */
-    int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
+    k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
     if (k < 0)
         return -1;
     if (k)

From f0dc847b72424d29386a461aa2dfc78a4c50b768 Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 23:04:32 -0600
Subject: [PATCH 24/47] added Py_NotImplemented check to unsafe_object_compare

---
 Objects/listobject.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index c656db68216882..bad947c9ede479 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1954,6 +1954,11 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
     PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
+    
+    if (res == Py_NotImplemented) {
+        Py_DECREF(res);
+        return PyObject_RichCompareBool(v, w, Py_LT);
+    }
     if (res == NULL)
         return -1;
 

From 15f87a26511b97e5de080122d81f1dd5b7438e23 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 23:21:00 -0600
Subject: [PATCH 25/47] ACTUALLY moved declarations to the tops of blocks

---
 Objects/listobject.c | 99 +++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 48 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index bad947c9ede479..1baa730b278a48 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1942,18 +1942,20 @@ safe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 static int
 unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 {
-  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type->tp_richcompare != NULL);
-  #endif
-    int ok; 
+    int ok; PyObject* res;
+    
+    /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type->tp_richcompare != NULL);
+    #endif
+
     
     if (v == w) return 0;
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
-    PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
+    res = (*(ms->key_richcompare))(v, w, Py_LT);
     
     if (res == Py_NotImplemented) {
         Py_DECREF(res);
@@ -1975,16 +1977,18 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 /* Latin string compare: safe for any two latin (one byte per char) strings. */
 static int
 unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
-  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyUnicode_Type &&
-           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
-           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-  #endif
+    int len, res;
+    
+    /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyUnicode_Type &&
+	       PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
+	       PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+    #endif
     
-    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
-    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
+    len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
+    res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
 
     return (res != 0 ?
             res < 0 :
@@ -1995,20 +1999,21 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
 static int
 unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
 {
-  /* Modified from Objects/longobject.c:long_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyLong_Type &&
-           Py_ABS(Py_SIZE(v)) <= 1 &&
-           Py_ABS(Py_SIZE(w)) <= 1);
-  #endif
+    PyLongObject *vl, *wl; sdigit v0, w0;
+
+    /* Modified from Objects/longobject.c:long_compare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyLong_Type &&
+	       Py_ABS(Py_SIZE(v)) <= 1 &&
+	       Py_ABS(Py_SIZE(w)) <= 1);
+    #endif
     
-    PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;
     wl = (PyLongObject*)w;
 
-    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
-    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
+    v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
+    w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
 
     if (Py_SIZE(vl) < 0)
         v0 = -v0;
@@ -2021,13 +2026,13 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
 /* Float compare: compare any two floats. */
 static int
 unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
-  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyFloat_Type);
-  #endif
-    if (v == w) return 0;
-    
+    /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyFloat_Type);
+    #endif
+      
+    if (v == w) return 0;    
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
 
@@ -2038,25 +2043,23 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
  * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
 static int
 unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
-{
-  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyTuple_Type &&
-           Py_SIZE(v) > 0 &&
-           Py_SIZE(w) > 0);
-  #endif
-    
+{   
     PyTupleObject *vt, *wt;
-    Py_ssize_t i;
-    Py_ssize_t vlen, wlen;
+    Py_ssize_t i, vlen, wlen;
+    int k;
+
+    /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyTuple_Type &&
+	       Py_SIZE(v) > 0 &&
+	       Py_SIZE(w) > 0);
+    #endif
+
+    if (v == w) return 0;
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
-    
-    int k;
-    
-    if (v == w) return 0;
 
     /* Is v[0] < w[0]? */
     k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);

From 15f2f01cb80ee95285a1e4fe341bc66cb2d68a5b Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Thu, 9 Mar 2017 14:00:21 -0700
Subject: [PATCH 26/47] fix typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 0e03fbbf31c176..f36c3fe8bdfe8a 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1046,7 +1046,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
            if (k)
 
 /* Here we define custom comparison functions to optimize for the cases one commonly 
- * in practice: homogeneous lists, often of one of the basic types. */
+ * encounters in practice: homogeneous lists, often of one of the basic types. */
 
 /* This struct holds the comparison function and helper functions 
  * selected in the pre-sort check. */

From 6afa847f599815cf853ad16dfa5b0443b5766697 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sat, 11 Mar 2017 10:28:32 -0700
Subject: [PATCH 27/47] Added if (v == w) return 0; to all compares, apologies
 for previous commit

---
 Objects/listobject.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index f36c3fe8bdfe8a..25536acafa757e 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1092,7 +1092,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
            v->ob_type->tp_richcompare != NULL &&
            v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
     if (res == NULL)
@@ -1118,7 +1118,7 @@ unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){
            PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
            PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
@@ -1139,7 +1139,7 @@ unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
            Py_ABS(Py_SIZE(v)) <= 1 &&
            Py_ABS(Py_SIZE(w)) <= 1);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;
@@ -1164,7 +1164,7 @@ unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
     assert(v->ob_type == w->ob_type &&
            v->ob_type == &PyFloat_Type);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
@@ -1184,7 +1184,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
            Py_SIZE(v) > 0 &&
            Py_SIZE(w) > 0);
   #endif
-    if (v == w) return 1;
+    if (v == w) return 0;
     
     PyTupleObject *vt, *wt;
     Py_ssize_t i;

From af7c027773850307cf7809e84fa71abc9f88602d Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:11:04 -0600
Subject: [PATCH 28/47] Folded CompareFuncs into MergeState and added safety
 check to unsafe_object_compare

---
 Objects/listobject.c | 542 +++++++++++++++++++++----------------------
 1 file changed, 267 insertions(+), 275 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 25536acafa757e..dace48312e3c74 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1031,12 +1031,12 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
         slice->values += n;
 }
 
-/* Comparison function: compare_funcs.key_compare, which is set at run-time in 
+/* Comparison function: ms->key_compare, which is set at run-time in 
  * listsort_impl to optimize for various special cases.
  * Returns -1 on error, 1 if x < y, 0 if x >= y.
  */
 
-#define ISLT(X, Y) ((*compare_funcs.key_compare)(X, Y, compare_funcs))
+#define ISLT(X, Y) (*(ms->key_compare))(X, Y, ms)
 
 /* Compare X to Y via "<".  Goto "fail" if the comparison raises an
    error.  Else "k" is set to true iff X<Y, and an "if (k)" block is
@@ -1045,197 +1045,74 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
 #define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail;  \
            if (k)
 
-/* Here we define custom comparison functions to optimize for the cases one commonly 
- * encounters in practice: homogeneous lists, often of one of the basic types. */
-
-/* This struct holds the comparison function and helper functions 
- * selected in the pre-sort check. */
-typedef struct CompareFuncs CompareFuncs;
-struct CompareFuncs {
-  /* This is the function we will use to compare two keys,
-   * even when none of our special cases apply and we have to use
-   * safe_object_compare. */
-  int (*key_compare)(PyObject*, PyObject*, CompareFuncs);
-
-  /* This function is used by unsafe_object_compare to optimize comparisons
-   * when we know our list is type-homogeneous but we can't assume anything else.
-   * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */
-  PyObject* (*key_richcompare)(PyObject*, PyObject*, int);
-
-  /* This function is used by unsafe_tuple_compare to compare the first elements
-   * of tuples. It may be set to safe_object_compare, but the idea is that hopefully 
-   * we can assume more, and use one of the special-case compares. */
-  int (*tuple_elem_compare)(PyObject*, PyObject*, CompareFuncs);
-};
-
-/* These are the special case compare functions.
- * compare_funcs.key_compare will always point to one of these: */
-
-/* Heterogeneous compare: default, always safe to fall back on. */
-static int
-safe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
-{
-  /* No assumptions necessary! */
-    return PyObject_RichCompareBool(v, w, Py_LT);
-}
-
-/* Homogeneous compare: safe for any two compareable objects of the same type.
- * (compare_funcs.key_richcompare is set to ob_type->tp_richcompare in the
- *  pre-sort check.)
+/* The maximum number of entries in a MergeState's pending-runs stack.
+ * This is enough to sort arrays of size up to about
+ *     32 * phi ** MAX_MERGE_PENDING
+ * where phi ~= 1.618.  85 is ridiculouslylarge enough, good for an array
+ * with 2**64 elements.
  */
-static int
-unsafe_object_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
-{
-  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type->tp_richcompare != NULL &&
-           v->ob_type->tp_richcompare == compare_funcs.key_richcompare);
-  #endif
-    if (v == w) return 0;
-    
-    PyObject* res = (*compare_funcs.key_richcompare)(v, w, Py_LT);
-    if (res == NULL)
-        return -1;
-    int ok;
-    if (PyBool_Check(res)){
-        ok = (res == Py_True);
-    }
-    else {
-        ok = PyObject_IsTrue(res);
-    }
-    Py_DECREF(res);
-    return ok;
-}
-
-/* Latin string compare: safe for any two latin (one byte per char) strings. */
-static int
-unsafe_latin_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs){
-  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyUnicode_Type &&
-           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
-           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-  #endif
-    if (v == w) return 0;
-    
-    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
-    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
-
-    return (res != 0 ?
-            res < 0 :
-            PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
-}
-
-/* Bounded int compare: compare any two longs that fit in a single machine word. */
-static int
-unsafe_long_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs)
-{
-  /* Modified from Objects/longobject.c:long_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyLong_Type &&
-           Py_ABS(Py_SIZE(v)) <= 1 &&
-           Py_ABS(Py_SIZE(w)) <= 1);
-  #endif
-    if (v == w) return 0;
-    
-    PyLongObject *vl, *wl;
-    vl = (PyLongObject*)v;
-    wl = (PyLongObject*)w;
-
-    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
-    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
-
-    if (Py_SIZE(vl) < 0)
-        v0 = -v0;
-    if (Py_SIZE(wl) < 0)
-        w0 = -w0;
-
-    return v0 < w0;
-}
+#define MAX_MERGE_PENDING 85
 
-/* Float compare: compare any two floats. */
-static int
-unsafe_float_compare(PyObject *v, PyObject *w, CompareFuncs compare_funcs){
-  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyFloat_Type);
-  #endif
-    if (v == w) return 0;
-    
-    return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
-}
+/* When we get into galloping mode, we stay there until both runs win less
+ * often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
+ */
+#define MIN_GALLOP 7
 
-/* Tuple compare: compare any two non-empty tuples, using 
- * compare_funcs.tuple_elem_compare to compare the first elements, which is set 
- * using the same pre-sort check as we use for compare_funcs.key_compare,
- * but run on the list [x[0] for x in L]. This allows us to optimize compares
- * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
-static int
-unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
-{
-  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyTuple_Type &&
-           Py_SIZE(v) > 0 &&
-           Py_SIZE(w) > 0);
-  #endif
-    if (v == w) return 0;
-    
-    PyTupleObject *vt, *wt;
-    Py_ssize_t i;
-    Py_ssize_t vlen, wlen;
+/* Avoid malloc for small temp arrays. */
+#define MERGESTATE_TEMP_SIZE 256
 
-    vt = (PyTupleObject *)v;
-    wt = (PyTupleObject *)w;
+/* One MergeState exists on the stack per invocation of mergesort.  It's just
+ * a convenient way to pass state around among the helper functions.
+ */
+struct s_slice {
+    sortslice base;
+    Py_ssize_t len;
+};
 
-    /* Is v[0] < w[0]? */
-    int k = (*compare_funcs.tuple_elem_compare)(vt->ob_item[0],
-                                                wt->ob_item[0],
-                                                compare_funcs);
-    if (k < 0)
-        return -1;
-    if (k)
-        return 1;
+typedef struct s_MergeState MergeState;
+struct s_MergeState {
+    /* This controls when we get *into* galloping mode.  It's initialized
+     * to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
+     * random data, and lower for highly structured data.
+     */
+    Py_ssize_t min_gallop;
 
-    vlen = Py_SIZE(vt);
-    wlen = Py_SIZE(wt);
+    /* 'a' is temp storage to help with merges.  It contains room for
+     * alloced entries.
+     */
+    sortslice a;        /* may point to temparray below */
+    Py_ssize_t alloced;
 
-    /* Well, are either of the tuples are singleton? */
-    if (vlen == 1 || wlen == 1)
-        return 0;
+    /* A stack of n pending runs yet to be merged.  Run #i starts at
+     * address base[i] and extends for len[i] elements.  It's always
+     * true (so long as the indices are in bounds) that
+     *
+     *     pending[i].base + pending[i].len == pending[i+1].base
+     *
+     * so we could cut the storage for this, but it's a minor amount,
+     * and keeping all the info explicit simplifies the code.
+     */
+    int n;
+    struct s_slice pending[MAX_MERGE_PENDING];
 
-    /* Well, is w[0] < v[0]? */
-    k = (*compare_funcs.tuple_elem_compare)(wt->ob_item[0],
-                                            vt->ob_item[0],
-                                            compare_funcs);
-    if (k < 0)
-        return -1;
-    if (k)
-        return 0;
+    /* 'a' points to this when possible, rather than muck with malloc. */
+    PyObject *temparray[MERGESTATE_TEMP_SIZE];
 
-    /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
-     * We can use code copied straight from tupleobject.c:tuplerichcompare: */
-    for (i = 1; i < vlen && i < wlen; i++) {
-        k = PyObject_RichCompareBool(vt->ob_item[i],
-                                     wt->ob_item[i],
-                                     Py_EQ);
-        if (k < 0)
-            return -1;
-        if (!k)
-            break;
-    }
+    /* This is the function we will use to compare two keys,
+     * even when none of our special cases apply and we have to use
+     * safe_object_compare. */
+    int (*key_compare)(PyObject*, PyObject*, MergeState*);
 
-    if (i >= vlen || i >= wlen) {
-        return vlen <  wlen;
-    }
+    /* This function is used by unsafe_object_compare to optimize comparisons
+     * when we know our list is type-homogeneous but we can't assume anything else.
+     * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */
+    PyObject* (*key_richcompare)(PyObject*, PyObject*, int);
 
-    return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
-}
+    /* This function is used by unsafe_tuple_compare to compare the first elements
+     * of tuples. It may be set to safe_object_compare, but the idea is that hopefully
+     * we can assume more, and use one of the special-case compares. */
+    int (*tuple_elem_compare)(PyObject*, PyObject*, MergeState*);
+};
 
 /* binarysort is the best method for sorting small arrays: it does
    few compares, but can do data movement quadratic in the number of
@@ -1249,7 +1126,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, CompareFuncs compare_funcs)
    the input (nothing is lost or duplicated).
 */
 static int
-binarysort(sortslice lo, PyObject **hi, PyObject **start, CompareFuncs compare_funcs)
+binarysort(MergeState* ms, sortslice lo, PyObject **hi, PyObject **start)
 {
     Py_ssize_t k;
     PyObject **l, **p, **r;
@@ -1323,7 +1200,7 @@ elements to get out of order).
 Returns -1 in case of error.
 */
 static Py_ssize_t
-count_run(PyObject **lo, PyObject **hi, int *descending, CompareFuncs compare_funcs)
+count_run(MergeState* ms, PyObject **lo, PyObject **hi, int *descending)
 {
     Py_ssize_t k;
     Py_ssize_t n;
@@ -1378,8 +1255,7 @@ key, and the last n-k should follow key.
 Returns -1 on error.  See listsort.txt for info on the method.
 */
 static Py_ssize_t
-gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
-            CompareFuncs compare_funcs)
+gallop_left(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1470,8 +1346,7 @@ we're sticking to "<" comparisons that it's much harder to follow if
 written as one routine with yet another "left or right?" flag.
 */
 static Py_ssize_t
-gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
-             CompareFuncs compare_funcs)
+gallop_right(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1547,59 +1422,6 @@ gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint,
     return -1;
 }
 
-/* The maximum number of entries in a MergeState's pending-runs stack.
- * This is enough to sort arrays of size up to about
- *     32 * phi ** MAX_MERGE_PENDING
- * where phi ~= 1.618.  85 is ridiculouslylarge enough, good for an array
- * with 2**64 elements.
- */
-#define MAX_MERGE_PENDING 85
-
-/* When we get into galloping mode, we stay there until both runs win less
- * often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
- */
-#define MIN_GALLOP 7
-
-/* Avoid malloc for small temp arrays. */
-#define MERGESTATE_TEMP_SIZE 256
-
-/* One MergeState exists on the stack per invocation of mergesort.  It's just
- * a convenient way to pass state around among the helper functions.
- */
-struct s_slice {
-    sortslice base;
-    Py_ssize_t len;
-};
-
-typedef struct s_MergeState {
-    /* This controls when we get *into* galloping mode.  It's initialized
-     * to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
-     * random data, and lower for highly structured data.
-     */
-    Py_ssize_t min_gallop;
-
-    /* 'a' is temp storage to help with merges.  It contains room for
-     * alloced entries.
-     */
-    sortslice a;        /* may point to temparray below */
-    Py_ssize_t alloced;
-
-    /* A stack of n pending runs yet to be merged.  Run #i starts at
-     * address base[i] and extends for len[i] elements.  It's always
-     * true (so long as the indices are in bounds) that
-     *
-     *     pending[i].base + pending[i].len == pending[i+1].base
-     *
-     * so we could cut the storage for this, but it's a minor amount,
-     * and keeping all the info explicit simplifies the code.
-     */
-    int n;
-    struct s_slice pending[MAX_MERGE_PENDING];
-
-    /* 'a' points to this when possible, rather than muck with malloc. */
-    PyObject *temparray[MERGESTATE_TEMP_SIZE];
-} MergeState;
-
 /* Conceptually a MergeState's constructor. */
 static void
 merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc)
@@ -1684,8 +1506,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
-         sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs)
+merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1752,7 +1573,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
             assert(na > 1 && nb > 0);
             min_gallop -= min_gallop > 1;
             ms->min_gallop = min_gallop;
-            k = gallop_right(ssb.keys[0], ssa.keys, na, 0, compare_funcs);
+            k = gallop_right(ms, ssb.keys[0], ssa.keys, na, 0);
             acount = k;
             if (k) {
                 if (k < 0)
@@ -1775,7 +1596,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
             if (nb == 0)
                 goto Succeed;
 
-            k = gallop_left(ssa.keys[0], ssb.keys, nb, 0, compare_funcs);
+            k = gallop_left(ms, ssa.keys[0], ssb.keys, nb, 0);
             bcount = k;
             if (k) {
                 if (k < 0)
@@ -1816,8 +1637,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
-         sortslice ssb, Py_ssize_t nb, CompareFuncs compare_funcs)
+merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;
@@ -1890,7 +1710,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
             assert(na > 0 && nb > 1);
             min_gallop -= min_gallop > 1;
             ms->min_gallop = min_gallop;
-            k = gallop_right(ssb.keys[0], basea.keys, na, na-1, compare_funcs);
+            k = gallop_right(ms, ssb.keys[0], basea.keys, na, na-1);
             if (k < 0)
                 goto Fail;
             k = na - k;
@@ -1908,7 +1728,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
             if (nb == 1)
                 goto CopyA;
 
-            k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1, compare_funcs);
+            k = gallop_left(ms, ssa.keys[0], baseb.keys, nb, nb-1);
             if (k < 0)
                 goto Fail;
             k = nb - k;
@@ -1955,7 +1775,7 @@ merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
  * Returns 0 on success, -1 on error.
  */
 static Py_ssize_t
-merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
+merge_at(MergeState *ms, Py_ssize_t i)
 {
     sortslice ssa, ssb;
     Py_ssize_t na, nb;
@@ -1985,7 +1805,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
     /* Where does b start in a?  Elements in a before that can be
      * ignored (already in place).
      */
-    k = gallop_right(*ssb.keys, ssa.keys, na, 0, compare_funcs);
+    k = gallop_right(ms, *ssb.keys, ssa.keys, na, 0);
     if (k < 0)
         return -1;
     sortslice_advance(&ssa, k);
@@ -1996,7 +1816,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
     /* Where does a end in b?  Elements in b after that can be
      * ignored (already in place).
      */
-    nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1, compare_funcs);
+    nb = gallop_left(ms, ssa.keys[na-1], ssb.keys, nb, nb-1);
     if (nb <= 0)
         return nb;
 
@@ -2004,9 +1824,9 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
      * min(na, nb) elements.
      */
     if (na <= nb)
-        return merge_lo(ms, ssa, na, ssb, nb, compare_funcs);
+        return merge_lo(ms, ssa, na, ssb, nb);
     else
-        return merge_hi(ms, ssa, na, ssb, nb, compare_funcs);
+        return merge_hi(ms, ssa, na, ssb, nb);
 }
 
 /* Examine the stack of runs waiting to be merged, merging adjacent runs
@@ -2020,7 +1840,7 @@ merge_at(MergeState *ms, Py_ssize_t i, CompareFuncs compare_funcs)
  * Returns 0 on success, -1 on error.
  */
 static int
-merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
+merge_collapse(MergeState *ms)
 {
     struct s_slice *p = ms->pending;
 
@@ -2031,11 +1851,11 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
             (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) {
             if (p[n-1].len < p[n+1].len)
                 --n;
-            if (merge_at(ms, n, compare_funcs) < 0)
+            if (merge_at(ms, n) < 0)
                 return -1;
         }
         else if (p[n].len <= p[n+1].len) {
-            if (merge_at(ms, n, compare_funcs) < 0)
+            if (merge_at(ms, n) < 0)
                 return -1;
         }
         else
@@ -2050,7 +1870,7 @@ merge_collapse(MergeState *ms, CompareFuncs compare_funcs)
  * Returns 0 on success, -1 on error.
  */
 static int
-merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs)
+merge_force_collapse(MergeState *ms)
 {
     struct s_slice *p = ms->pending;
 
@@ -2059,7 +1879,7 @@ merge_force_collapse(MergeState *ms, CompareFuncs compare_funcs)
         Py_ssize_t n = ms->n - 2;
         if (n > 0 && p[n-1].len < p[n+1].len)
             --n;
-        if (merge_at(ms, n, compare_funcs) < 0)
+        if (merge_at(ms, n) < 0)
             return -1;
     }
     return 0;
@@ -2096,6 +1916,179 @@ reverse_sortslice(sortslice *s, Py_ssize_t n)
         reverse_slice(s->values, &s->values[n]);
 }
 
+/* Here we define custom comparison functions to optimize for the cases one commonly 
+ * encounters in practice: homogeneous lists, often of one of the basic types. */
+
+/* This struct holds the comparison function and helper functions 
+ * selected in the pre-sort check. */
+
+/* These are the special case compare functions.
+ * ms->key_compare will always point to one of these: */
+
+/* Heterogeneous compare: default, always safe to fall back on. */
+static int
+safe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
+{
+  /* No assumptions necessary! */
+    return PyObject_RichCompareBool(v, w, Py_LT);
+}
+
+/* Homogeneous compare: safe for any two compareable objects of the same type.
+ * (ms->key_richcompare is set to ob_type->tp_richcompare in the
+ *  pre-sort check.)
+ */
+static int
+unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
+{
+  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type->tp_richcompare != NULL);
+  #endif
+    if (v == w) return 0;
+    
+    if (v->ob_type->tp_richcompare != ms->key_richcompare)
+	return PyObject_RichCompareBool(v, w, Py_LT);
+    
+    PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
+    if (res == NULL)
+        return -1;
+    int ok;
+    if (PyBool_Check(res)){
+        ok = (res == Py_True);
+    }
+    else {
+        ok = PyObject_IsTrue(res);
+    }
+    Py_DECREF(res);
+    return ok;
+}
+
+/* Latin string compare: safe for any two latin (one byte per char) strings. */
+static int
+unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
+  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyUnicode_Type &&
+           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
+           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+  #endif
+    if (v == w) return 0;
+    
+    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
+    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
+
+    return (res != 0 ?
+            res < 0 :
+            PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
+}
+
+/* Bounded int compare: compare any two longs that fit in a single machine word. */
+static int
+unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
+{
+  /* Modified from Objects/longobject.c:long_compare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyLong_Type &&
+           Py_ABS(Py_SIZE(v)) <= 1 &&
+           Py_ABS(Py_SIZE(w)) <= 1);
+  #endif
+    if (v == w) return 0;
+    
+    PyLongObject *vl, *wl;
+    vl = (PyLongObject*)v;
+    wl = (PyLongObject*)w;
+
+    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
+    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
+
+    if (Py_SIZE(vl) < 0)
+        v0 = -v0;
+    if (Py_SIZE(wl) < 0)
+        w0 = -w0;
+
+    return v0 < w0;
+}
+
+/* Float compare: compare any two floats. */
+static int
+unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
+  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyFloat_Type);
+  #endif
+    if (v == w) return 0;
+    
+    return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
+}
+
+/* Tuple compare: compare any two non-empty tuples, using 
+ * ms->tuple_elem_compare to compare the first elements, which is set 
+ * using the same pre-sort check as we use for ms->key_compare,
+ * but run on the list [x[0] for x in L]. This allows us to optimize compares
+ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
+static int
+unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
+{
+  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
+  #ifdef Py_DEBUG
+    assert(v->ob_type == w->ob_type &&
+           v->ob_type == &PyTuple_Type &&
+           Py_SIZE(v) > 0 &&
+           Py_SIZE(w) > 0);
+  #endif
+    if (v == w) return 0;
+    
+    PyTupleObject *vt, *wt;
+    Py_ssize_t i;
+    Py_ssize_t vlen, wlen;
+
+    vt = (PyTupleObject *)v;
+    wt = (PyTupleObject *)w;
+
+    /* Is v[0] < w[0]? */
+    int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
+    if (k < 0)
+        return -1;
+    if (k)
+        return 1;
+
+    vlen = Py_SIZE(vt);
+    wlen = Py_SIZE(wt);
+
+    /* Well, are either of the tuples are singleton? */
+    if (vlen == 1 || wlen == 1)
+        return 0;
+
+    /* Well, is w[0] < v[0]? */
+    k = (*(ms->tuple_elem_compare))(wt->ob_item[0], vt->ob_item[0], ms);
+    if (k < 0)
+        return -1;
+    if (k)
+        return 0;
+
+    /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
+     * We can use code copied straight from tupleobject.c:tuplerichcompare: */
+    for (i = 1; i < vlen && i < wlen; i++) {
+        k = PyObject_RichCompareBool(vt->ob_item[i],
+                                     wt->ob_item[i],
+                                     Py_EQ);
+        if (k < 0)
+            return -1;
+        if (!k)
+            break;
+    }
+
+    if (i >= vlen || i >= wlen) {
+        return vlen <  wlen;
+    }
+
+    return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
+}
+
 /* An adaptive, stable, natural mergesort.  See listsort.txt.
  * Returns Py_None on success, NULL on error.  Even in case of error, the
  * list will be some permutation of its input state (nothing is lost or
@@ -2169,8 +2162,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
     /* The pre-sort check: here's where we decide which compare function to use.
      * How much optimization is safe? We test for homogeneity with respect to 
      * several properties that are expensive to check at compare-time, and 
-     * set compare_funcs appropriately. */
-    CompareFuncs compare_funcs;
+     * set ms appropriately. */
     if (saved_ob_size > 1) {
         /* Assume the first element is representative of the whole list. */
         int keys_are_in_tuples = (lo.keys[0]->ob_type == &PyTuple_Type &&
@@ -2220,33 +2212,33 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         if (keys_are_all_same_type) {
 
             if (key_type == &PyUnicode_Type && strings_are_latin)
-                compare_funcs.key_compare = unsafe_latin_compare;
+                ms.key_compare = unsafe_latin_compare;
 
             else if (key_type == &PyLong_Type && ints_are_bounded)
-                compare_funcs.key_compare = unsafe_long_compare;
+                ms.key_compare = unsafe_long_compare;
 
             else if (key_type == &PyFloat_Type)
-                compare_funcs.key_compare = unsafe_float_compare;
+                ms.key_compare = unsafe_float_compare;
 
-            else if ((compare_funcs.key_richcompare = key_type->tp_richcompare) != NULL)
-                compare_funcs.key_compare = unsafe_object_compare;
+            else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL)
+                ms.key_compare = unsafe_object_compare;
 
         } else {
-            compare_funcs.key_compare = safe_object_compare;
+            ms.key_compare = safe_object_compare;
         }
 
         if (keys_are_in_tuples) {
             /* Make sure we're not dealing with tuples of tuples
              * (remember: here, key_type refers list [key[0] for key in keys]) */
             if (key_type == &PyTuple_Type)
-                compare_funcs.tuple_elem_compare = safe_object_compare;
+                ms.tuple_elem_compare = safe_object_compare;
             else
-                compare_funcs.tuple_elem_compare = compare_funcs.key_compare;
+                ms.tuple_elem_compare = ms.key_compare;
 
-            compare_funcs.key_compare = unsafe_tuple_compare;
+            ms.key_compare = unsafe_tuple_compare;
         }
     }
-    /* End of pre-sort check: compare_funcs is now set properly! */
+    /* End of pre-sort check: ms is now set properly! */
 
     merge_init(&ms, saved_ob_size, keys != NULL);
 
@@ -2271,7 +2263,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         Py_ssize_t n;
 
         /* Identify next run. */
-        n = count_run(lo.keys, lo.keys + nremaining, &descending, compare_funcs);
+        n = count_run(&ms, lo.keys, lo.keys + nremaining, &descending);
         if (n < 0)
             goto fail;
         if (descending)
@@ -2280,7 +2272,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         if (n < minrun) {
             const Py_ssize_t force = nremaining <= minrun ?
                               nremaining : minrun;
-            if (binarysort(lo, lo.keys + force, lo.keys + n, compare_funcs) < 0)
+            if (binarysort(&ms, lo, lo.keys + force, lo.keys + n) < 0)
                 goto fail;
             n = force;
         }
@@ -2289,14 +2281,14 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         ms.pending[ms.n].base = lo;
         ms.pending[ms.n].len = n;
         ++ms.n;
-        if (merge_collapse(&ms, compare_funcs) < 0)
+        if (merge_collapse(&ms) < 0)
             goto fail;
         /* Advance to find next run. */
         sortslice_advance(&lo, n);
         nremaining -= n;
     } while (nremaining);
 
-    if (merge_force_collapse(&ms, compare_funcs) < 0)
+    if (merge_force_collapse(&ms) < 0)
         goto fail;
     assert(ms.n == 1);
     assert(keys == NULL

From 20716cb0c1ddeddec039b21f3959d58a5abc66bd Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:12:44 -0600
Subject: [PATCH 29/47] formatting

---
 Objects/listobject.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index dace48312e3c74..53691692cceb66 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1506,7 +1506,8 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
+merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
+	 sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1637,7 +1638,8 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t
  * successful, -1 if error.
  */
 static Py_ssize_t
-merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, sortslice ssb, Py_ssize_t nb)
+merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
+	 sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;

From 5960fbe7ae50400359ce9c99c0743aa841c77fe5 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:14:21 -0600
Subject: [PATCH 30/47] formatting

---
 Objects/listobject.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 53691692cceb66..de2f5893c3863e 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1507,7 +1507,7 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
  */
 static Py_ssize_t
 merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
-	 sortslice ssb, Py_ssize_t nb)
+         sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest;
@@ -1639,7 +1639,7 @@ merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na,
  */
 static Py_ssize_t
 merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na,
-	 sortslice ssb, Py_ssize_t nb)
+         sortslice ssb, Py_ssize_t nb)
 {
     Py_ssize_t k;
     sortslice dest, basea, baseb;
@@ -1950,7 +1950,7 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
     if (v == w) return 0;
     
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
-	return PyObject_RichCompareBool(v, w, Py_LT);
+        return PyObject_RichCompareBool(v, w, Py_LT);
     
     PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
     if (res == NULL)

From 804807bc82956aa2432050b989da67b9ae585afa Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:25:08 -0600
Subject: [PATCH 31/47] don't need (v==w) for ints/strings

---
 Objects/listobject.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index de2f5893c3863e..ab9647d79433a9 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1976,7 +1976,6 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
            PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
            PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
   #endif
-    if (v == w) return 0;
     
     int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
@@ -1997,7 +1996,6 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
            Py_ABS(Py_SIZE(v)) <= 1 &&
            Py_ABS(Py_SIZE(w)) <= 1);
   #endif
-    if (v == w) return 0;
     
     PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;

From 5db7158a89ec872a70f5e766811cbd56c1489c7e Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 14:32:48 -0600
Subject: [PATCH 32/47] went back to i=0 for tuples; we can't infer == from <
 and >, even with v==w

---
 Objects/listobject.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index ab9647d79433a9..0997dd46c03d4b 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2072,7 +2072,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
 
     /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
      * We can use code copied straight from tupleobject.c:tuplerichcompare: */
-    for (i = 1; i < vlen && i < wlen; i++) {
+    for (i = 0; i < vlen && i < wlen; i++) {
         k = PyObject_RichCompareBool(vt->ob_item[i],
                                      wt->ob_item[i],
                                      Py_EQ);
@@ -2083,7 +2083,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     }
 
     if (i >= vlen || i >= wlen) {
-        return vlen <  wlen;
+        return vlen < wlen;
     }
 
     return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);

From 934d83f0b993e1b2ec6270094364ac95630e1f14 Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 22:54:17 -0600
Subject: [PATCH 33/47] move all declarations to top of their blocks

---
 Objects/listobject.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 0997dd46c03d4b..4efcbcc9cb18f6 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1947,15 +1947,16 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
     assert(v->ob_type == w->ob_type &&
            v->ob_type->tp_richcompare != NULL);
   #endif
-    if (v == w) return 0;
+    int ok; 
     
+    if (v == w) return 0;
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
     PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
     if (res == NULL)
         return -1;
-    int ok;
+
     if (PyBool_Check(res)){
         ok = (res == Py_True);
     }
@@ -2040,7 +2041,6 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
            Py_SIZE(v) > 0 &&
            Py_SIZE(w) > 0);
   #endif
-    if (v == w) return 0;
     
     PyTupleObject *vt, *wt;
     Py_ssize_t i;
@@ -2048,6 +2048,10 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
+    
+    int ok;
+    
+    if (v == w) return 0;
 
     /* Is v[0] < w[0]? */
     int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);

From c536ed3c12dce6f6d1c602275910f5881aba4a5f Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 22:57:30 -0600
Subject: [PATCH 34/47] typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 4efcbcc9cb18f6..006f5edaf0f668 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2049,7 +2049,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
     
-    int ok;
+    int k;
     
     if (v == w) return 0;
 

From 0b85ac5c087f225035bb3fa3a98678d9f11b14cc Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 22:58:46 -0600
Subject: [PATCH 35/47] typo

---
 Objects/listobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 006f5edaf0f668..c656db68216882 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2054,7 +2054,7 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     if (v == w) return 0;
 
     /* Is v[0] < w[0]? */
-    int k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
+    k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
     if (k < 0)
         return -1;
     if (k)

From a12d7840670f04bab1905bc7804ed340e122ee4d Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 23:04:32 -0600
Subject: [PATCH 36/47] added Py_NotImplemented check to unsafe_object_compare

---
 Objects/listobject.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index c656db68216882..bad947c9ede479 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1954,6 +1954,11 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
     PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
+    
+    if (res == Py_NotImplemented) {
+        Py_DECREF(res);
+        return PyObject_RichCompareBool(v, w, Py_LT);
+    }
     if (res == NULL)
         return -1;
 

From a54a4e4cfaa58b7fbcbd24ef799d3d87f8c7a4fb Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Sun, 12 Mar 2017 23:21:00 -0600
Subject: [PATCH 37/47] ACTUALLY moved declarations to the tops of blocks

---
 Objects/listobject.c | 99 +++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 48 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index bad947c9ede479..1baa730b278a48 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1942,18 +1942,20 @@ safe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 static int
 unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 {
-  /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type->tp_richcompare != NULL);
-  #endif
-    int ok; 
+    int ok; PyObject* res;
+    
+    /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type->tp_richcompare != NULL);
+    #endif
+
     
     if (v == w) return 0;
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
-    PyObject* res = (*(ms->key_richcompare))(v, w, Py_LT);
+    res = (*(ms->key_richcompare))(v, w, Py_LT);
     
     if (res == Py_NotImplemented) {
         Py_DECREF(res);
@@ -1975,16 +1977,18 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 /* Latin string compare: safe for any two latin (one byte per char) strings. */
 static int
 unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
-  /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyUnicode_Type &&
-           PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
-           PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-  #endif
+    int len, res;
+    
+    /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyUnicode_Type &&
+	       PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
+	       PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+    #endif
     
-    int len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
-    int res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
+    len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
+    res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
 
     return (res != 0 ?
             res < 0 :
@@ -1995,20 +1999,21 @@ unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
 static int
 unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
 {
-  /* Modified from Objects/longobject.c:long_compare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyLong_Type &&
-           Py_ABS(Py_SIZE(v)) <= 1 &&
-           Py_ABS(Py_SIZE(w)) <= 1);
-  #endif
+    PyLongObject *vl, *wl; sdigit v0, w0;
+
+    /* Modified from Objects/longobject.c:long_compare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyLong_Type &&
+	       Py_ABS(Py_SIZE(v)) <= 1 &&
+	       Py_ABS(Py_SIZE(w)) <= 1);
+    #endif
     
-    PyLongObject *vl, *wl;
     vl = (PyLongObject*)v;
     wl = (PyLongObject*)w;
 
-    sdigit v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
-    sdigit w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
+    v0 = Py_SIZE(vl) == 0 ? 0 : (sdigit)vl->ob_digit[0];
+    w0 = Py_SIZE(wl) == 0 ? 0 : (sdigit)wl->ob_digit[0];
 
     if (Py_SIZE(vl) < 0)
         v0 = -v0;
@@ -2021,13 +2026,13 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
 /* Float compare: compare any two floats. */
 static int
 unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
-  /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyFloat_Type);
-  #endif
-    if (v == w) return 0;
-    
+    /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyFloat_Type);
+    #endif
+      
+    if (v == w) return 0;    
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
 
@@ -2038,25 +2043,23 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
  * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
 static int
 unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
-{
-  /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
-  #ifdef Py_DEBUG
-    assert(v->ob_type == w->ob_type &&
-           v->ob_type == &PyTuple_Type &&
-           Py_SIZE(v) > 0 &&
-           Py_SIZE(w) > 0);
-  #endif
-    
+{   
     PyTupleObject *vt, *wt;
-    Py_ssize_t i;
-    Py_ssize_t vlen, wlen;
+    Py_ssize_t i, vlen, wlen;
+    int k;
+
+    /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
+    #ifdef Py_DEBUG
+        assert(v->ob_type == w->ob_type &&
+	       v->ob_type == &PyTuple_Type &&
+	       Py_SIZE(v) > 0 &&
+	       Py_SIZE(w) > 0);
+    #endif
+
+    if (v == w) return 0;
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
-    
-    int k;
-    
-    if (v == w) return 0;
 
     /* Is v[0] < w[0]? */
     k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);

From 862c7619bce74c13dd16ca888828c4730932c578 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Mon, 13 Mar 2017 15:08:38 -0600
Subject: [PATCH 38/47] Made tuple compare precisely consistent with
 PyObject_RichCompareBool; it was not earler

---
 Objects/listobject.c | 43 +++++++++----------------------------------
 1 file changed, 9 insertions(+), 34 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 1baa730b278a48..649de77b57aa86 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1950,8 +1950,6 @@ unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
 	       v->ob_type->tp_richcompare != NULL);
     #endif
 
-    
-    if (v == w) return 0;
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
         return PyObject_RichCompareBool(v, w, Py_LT);
     
@@ -2031,16 +2029,15 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
         assert(v->ob_type == w->ob_type &&
 	       v->ob_type == &PyFloat_Type);
     #endif
-      
-    if (v == w) return 0;    
     return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
 }
 
-/* Tuple compare: compare any two non-empty tuples, using 
+/* Tuple compare: compare *any* two tuples, using 
  * ms->tuple_elem_compare to compare the first elements, which is set 
  * using the same pre-sort check as we use for ms->key_compare,
  * but run on the list [x[0] for x in L]. This allows us to optimize compares
- * on two levels (as long as [x[0] for x in L] is type-homogeneous.) */
+ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is 
+ * that most tuple compares don't involve x[1:]. */
 static int
 unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
 {   
@@ -2056,49 +2053,27 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
 	       Py_SIZE(w) > 0);
     #endif
 
-    if (v == w) return 0;
-
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
 
-    /* Is v[0] < w[0]? */
-    k = (*(ms->tuple_elem_compare))(vt->ob_item[0], wt->ob_item[0], ms);
-    if (k < 0)
-        return -1;
-    if (k)
-        return 1;
-
     vlen = Py_SIZE(vt);
     wlen = Py_SIZE(wt);
 
-    /* Well, are either of the tuples are singleton? */
-    if (vlen == 1 || wlen == 1)
-        return 0;
-
-    /* Well, is w[0] < v[0]? */
-    k = (*(ms->tuple_elem_compare))(wt->ob_item[0], vt->ob_item[0], ms);
-    if (k < 0)
-        return -1;
-    if (k)
-        return 0;
-
-    /* Out of options: v[0] == w[0]! We need to look at v[1:] and w[1:].
-     * We can use code copied straight from tupleobject.c:tuplerichcompare: */
     for (i = 0; i < vlen && i < wlen; i++) {
-        k = PyObject_RichCompareBool(vt->ob_item[i],
-                                     wt->ob_item[i],
-                                     Py_EQ);
+        k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ);
         if (k < 0)
             return -1;
         if (!k)
             break;
     }
 
-    if (i >= vlen || i >= wlen) {
+    if (i >= vlen || i >= wlen)
         return vlen < wlen;
-    }
 
-    return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
+    if (i == 0)
+	return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
+    else
+	return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
 }
 
 /* An adaptive, stable, natural mergesort.  See listsort.txt.

From dd302b50e93bf259597607855cf60646540e6c54 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Wed, 15 Mar 2017 11:19:43 -0600
Subject: [PATCH 39/47] Added tests

---
 Lib/test/test_sort.py | 107 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py
index 98ccab5c3930a6..8c498aed4cee9b 100644
--- a/Lib/test/test_sort.py
+++ b/Lib/test/test_sort.py
@@ -260,6 +260,113 @@ def my_cmp_reversed(x, y):
         self.assertEqual(data, copy2)
 
 #==============================================================================
+def check_against_PyObject_RichCompareBool(self, L):
+    ## The idea here is to exploit the fact that unsafe_tuple_compare uses
+    ## PyObject_RichCompareBool for the second elements of tuples. So we have,
+    ## for (most) L, sorted(L) == [y[1] for y in sorted([(0,x) for x in L])]
+    ## This will work as long as __eq__ => not __lt__ for all the objects in L,
+    ## which holds for all the types used below.
+    ##
+    ## Testing this way ensures that the optimized implementation remains consistent
+    ## with the naive implementation, even if changes are made to any of the
+    ## richcompares.
+    ##
+    ## This function tests sorting for three lists (it randomly shuffles each one):
+    ##                        1. L
+    ##                        2. [(x,) for x in L]
+    ##                        3. [((x,),) for x in L]
+    
+    random.seed(0)
+    random.shuffle(L)
+    L_1 = L[:]
+    L_2 = [(x,) for x in L]
+    L_3 = [((x,),) for x in L]
+    for L in [L_1, L_2, L_3]:
+        optimized = sorted(L)
+        reference = [y[1] for y in sorted([(0,x) for x in L])]
+        for (opt, ref) in zip(optimized, reference):
+            self.assertIs(opt, ref)
+            #note: not assertEqual! We want to ensure *identical* behavior.
+                         
+class TestOptimizedCompares(unittest.TestCase):
+    def test_safe_object_compare(self):
+        heterogeneous_lists = [[0, 'foo'],
+                               [0.0, 'foo'],
+                               ['foo', b'foo'],
+                               [('foo',), 'foo']]
+        for L in heterogeneous_lists:
+            self.assertRaises(TypeError, L.sort)
+            self.assertRaises(TypeError, [(x,) for x in L].sort)
+            self.assertRaises(TypeError, [((x,),) for x in L].sort)
+
+        float_int_lists = [[1,1.1],
+                           [1<<70,1.1],
+                           [1.1,1],
+                           [1.1,1<<70]]
+        for L in float_int_lists:
+            check_against_PyObject_RichCompareBool(self, L)
+      
+    def test_unsafe_object_compare(self):
+        
+        # This test is by ppperry. It ensures that unsafe_object_compare is
+        # verifying ms->key_richcompare == tp->richcompare before comparing.
+        class WackyComparator(int):
+            def __lt__(self, other):
+                elem.__class__ = WackyList2
+                return int.__lt__(self, other)
+
+        class WackyList1(list):pass
+        class WackyList2(list):
+            def __lt__(self, other):
+                raise ValueError
+                    
+        L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
+        elem = L[-1]
+        self.assertRaises(ValueError, L.sort)
+        self.assertRaises(ValueError, [(x,) for x in L].sort)
+
+        # The following test is also by ppperry. It ensures that 
+        # unsafe_object_compare handles Py_NotImplemented appropriately.
+        class PointlessComparator:
+            def __lt__(self, other):
+                return NotImplemented
+        L = [PointlessComparator(), PointlessComparator()]
+        self.assertRaises(TypeError, L.sort)
+        self.assertRaises(TypeError, [(x,) for x in L].sort)
+        
+        # The following tests go through various types that would trigger
+        # ms->key_compare = unsafe_object_compare
+        lists = [list(range(100)) + [(1<<70)],
+                 [str(x) for x in range(100)] + ['\uffff'],
+                 [bytes(x) for x in range(100)],
+                 [cmp_to_key(lambda x,y: x<y)(x) for x in range(100)]]
+        for L in lists:
+            check_against_PyObject_RichCompareBool(self, L)
+            
+    def test_unsafe_latin_compare(self):
+        check_against_PyObject_RichCompareBool(self, [str(x) for
+                                                      x in range(100)])
+
+    def test_unsafe_long_compare(self):
+        check_against_PyObject_RichCompareBool(self, [x for
+                                                      x in range(100)])
+
+    def test_unsafe_float_compare(self):
+        check_against_PyObject_RichCompareBool(self, [float(x) for
+                                                      x in range(100)])
+
+    def test_unsafe_tuple_compare(self):
+        # This test was suggested by Tim Peters. It verifies that the tuple
+        # comparison respects the current tuple compare semantics, which do not
+        # guarantee that x < x <=> (x,) < (x,)
+        #
+        # Note that we don't have to put anything in tuples here, because
+        # the check function does a tuple test automatically.
+        
+        check_against_PyObject_RichCompareBool(self, [float('nan')]*100)
+        check_against_PyObject_RichCompareBool(self, [float('nan') for
+                                                      _ in range(100)])
+#==============================================================================
 
 if __name__ == "__main__":
     unittest.main()

From ab3d520ed6f46e9a8352fda2c4665de9c0ab6d96 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Wed, 15 Mar 2017 11:59:00 -0600
Subject: [PATCH 40/47] Implemented all of serhiy-storchaka's changes

---
 Objects/listobject.c | 168 +++++++++++++++++++++++--------------------
 1 file changed, 89 insertions(+), 79 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 649de77b57aa86..4aa4849c940176 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1126,7 +1126,7 @@ struct s_MergeState {
    the input (nothing is lost or duplicated).
 */
 static int
-binarysort(MergeState* ms, sortslice lo, PyObject **hi, PyObject **start)
+binarysort(MergeState *ms, sortslice lo, PyObject **hi, PyObject **start)
 {
     Py_ssize_t k;
     PyObject **l, **p, **r;
@@ -1200,7 +1200,7 @@ elements to get out of order).
 Returns -1 in case of error.
 */
 static Py_ssize_t
-count_run(MergeState* ms, PyObject **lo, PyObject **hi, int *descending)
+count_run(MergeState *ms, PyObject **lo, PyObject **hi, int *descending)
 {
     Py_ssize_t k;
     Py_ssize_t n;
@@ -1255,7 +1255,7 @@ key, and the last n-k should follow key.
 Returns -1 on error.  See listsort.txt for info on the method.
 */
 static Py_ssize_t
-gallop_left(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
+gallop_left(MergeState *ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1346,7 +1346,7 @@ we're sticking to "<" comparisons that it's much harder to follow if
 written as one routine with yet another "left or right?" flag.
 */
 static Py_ssize_t
-gallop_right(MergeState* ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
+gallop_right(MergeState *ms, PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
 {
     Py_ssize_t ofs;
     Py_ssize_t lastofs;
@@ -1929,9 +1929,9 @@ reverse_sortslice(sortslice *s, Py_ssize_t n)
 
 /* Heterogeneous compare: default, always safe to fall back on. */
 static int
-safe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
+safe_object_compare(PyObject *v, PyObject *w, MergeState *ms)
 {
-  /* No assumptions necessary! */
+    /* No assumptions necessary! */
     return PyObject_RichCompareBool(v, w, Py_LT);
 }
 
@@ -1940,72 +1940,74 @@ safe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
  *  pre-sort check.)
  */
 static int
-unsafe_object_compare(PyObject* v, PyObject* w, MergeState* ms)
+unsafe_object_compare(PyObject *v, PyObject *w, MergeState *ms)
 {
-    int ok; PyObject* res;
-    
-    /* Modified from Objects/object.c:PyObject_RichCompareBool, assuming: */
-    #ifdef Py_DEBUG
-        assert(v->ob_type == w->ob_type &&
-	       v->ob_type->tp_richcompare != NULL);
-    #endif
+    PyObject* res_obj; int res;
 
+    /* No assumptions, because we check first: */
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
         return PyObject_RichCompareBool(v, w, Py_LT);
+
+    assert(ms->key_richcompare != NULL);
+    res_obj = (*(ms->key_richcompare))(v, w, Py_LT);
     
-    res = (*(ms->key_richcompare))(v, w, Py_LT);
-    
-    if (res == Py_NotImplemented) {
-        Py_DECREF(res);
+    if (res_obj == Py_NotImplemented) {
+        Py_DECREF(res_obj);
         return PyObject_RichCompareBool(v, w, Py_LT);
     }
-    if (res == NULL)
+    if (res_obj == NULL)
         return -1;
 
-    if (PyBool_Check(res)){
-        ok = (res == Py_True);
+    if (PyBool_Check(res_obj)) {
+        res = (res_obj == Py_True);
     }
     else {
-        ok = PyObject_IsTrue(res);
+        res = PyObject_IsTrue(res_obj);
     }
-    Py_DECREF(res);
-    return ok;
+    Py_DECREF(res_obj);
+
+    /* Note that we can't assert
+     *     res == PyObject_RichCompareBool(v, w, Py_LT);
+     * because of evil compare functions like this:
+     *     lambda a, b:  int(random.random() * 3) - 1)
+     * (which is actually in test_sort.py) */
+    return res;
 }
 
 /* Latin string compare: safe for any two latin (one byte per char) strings. */
 static int
-unsafe_latin_compare(PyObject* v, PyObject* w, MergeState* ms){
+unsafe_latin_compare(PyObject *v, PyObject *w, MergeState *ms)
+{
     int len, res;
     
     /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
-    #ifdef Py_DEBUG
-        assert(v->ob_type == w->ob_type &&
-	       v->ob_type == &PyUnicode_Type &&
-	       PyUnicode_KIND(v) == PyUnicode_KIND(w) &&
-	       PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-    #endif
+    assert(v->ob_type == w->ob_type); 
+    assert(v->ob_type == &PyUnicode_Type);
+    assert(PyUnicode_KIND(v) == PyUnicode_KIND(w));
+    assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
     
     len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
 
-    return (res != 0 ?
-            res < 0 :
-            PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
+    res = (res != 0 ?
+           res < 0 :
+           PyUnicode_GET_LENGTH(v) < PyUnicode_GET_LENGTH(w));
+
+    assert(res == PyObject_RichCompareBool(v, w, Py_LT));;
+    return res;
 }
 
 /* Bounded int compare: compare any two longs that fit in a single machine word. */
 static int
-unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
+unsafe_long_compare(PyObject *v, PyObject *w, MergeState *ms)
 {
-    PyLongObject *vl, *wl; sdigit v0, w0;
+    PyLongObject *vl, *wl; sdigit v0, w0; int res;
 
     /* Modified from Objects/longobject.c:long_compare, assuming: */
-    #ifdef Py_DEBUG
-        assert(v->ob_type == w->ob_type &&
-	       v->ob_type == &PyLong_Type &&
-	       Py_ABS(Py_SIZE(v)) <= 1 &&
-	       Py_ABS(Py_SIZE(w)) <= 1);
-    #endif
+    assert(v->ob_type == w->ob_type); 
+    assert(v->ob_type == &PyLong_Type);
+    assert(Py_ABS(Py_SIZE(v)) <= 1);
+    assert(Py_ABS(Py_SIZE(w)) <= 1);
     
     vl = (PyLongObject*)v;
     wl = (PyLongObject*)w;
@@ -2018,18 +2020,24 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState* ms)
     if (Py_SIZE(wl) < 0)
         w0 = -w0;
 
-    return v0 < w0;
+    res = v0 < w0;
+    assert(res == PyObject_RichCompareBool(v, w, Py_LT));
+    return res;
 }
 
 /* Float compare: compare any two floats. */
 static int
-unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
+unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
+{
+    int res;
+
     /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
-    #ifdef Py_DEBUG
-        assert(v->ob_type == w->ob_type &&
-	       v->ob_type == &PyFloat_Type);
-    #endif
-    return PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
+    assert(v->ob_type == w->ob_type); 
+    assert(v->ob_type == &PyFloat_Type);
+
+    res = PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
+    assert(res == PyObject_RichCompareBool(v, w, Py_LT));
+    return res; 
 }
 
 /* Tuple compare: compare *any* two tuples, using 
@@ -2039,23 +2047,21 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState* ms){
  * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is 
  * that most tuple compares don't involve x[1:]. */
 static int
-unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
+unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
 {   
     PyTupleObject *vt, *wt;
     Py_ssize_t i, vlen, wlen;
     int k;
 
     /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */
-    #ifdef Py_DEBUG
-        assert(v->ob_type == w->ob_type &&
-	       v->ob_type == &PyTuple_Type &&
-	       Py_SIZE(v) > 0 &&
-	       Py_SIZE(w) > 0);
-    #endif
+    assert(v->ob_type == w->ob_type);
+    assert(v->ob_type == &PyTuple_Type);
+    assert(Py_SIZE(v) > 0);
+    assert(Py_SIZE(w) > 0);
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
-
+    
     vlen = Py_SIZE(vt);
     wlen = Py_SIZE(wt);
 
@@ -2070,10 +2076,11 @@ unsafe_tuple_compare(PyObject* v, PyObject* w, MergeState* ms)
     if (i >= vlen || i >= wlen)
         return vlen < wlen;
 
-    if (i == 0)
-	return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
-    else
+    if (i == 0) {
+        return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
+    } else {
 	return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
+    }
 }
 
 /* An adaptive, stable, natural mergesort.  See listsort.txt.
@@ -2156,7 +2163,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
                                   Py_SIZE(lo.keys[0]) > 0);
 
         PyTypeObject* key_type = (keys_are_in_tuples ?
-                                  PyTuple_GET_ITEM(lo.keys[0],0)->ob_type :
+                                  PyTuple_GET_ITEM(lo.keys[0], 0)->ob_type :
                                   lo.keys[0]->ob_type);
 
         int keys_are_all_same_type = 1;
@@ -2165,10 +2172,10 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
 
         /* Prove that assumption by checking every key. */
         int i;
-        for (i=0; i< saved_ob_size; i++) {
+        for (i=0; i < saved_ob_size; i++) {
 
             if (keys_are_in_tuples &&
-                (lo.keys[i]->ob_type != &PyTuple_Type || Py_SIZE(lo.keys[i]) == 0)) {
+                !(lo.keys[i]->ob_type == &PyTuple_Type && Py_SIZE(lo.keys[i]) != 0)) {
                 keys_are_in_tuples = 0;
                 keys_are_all_same_type = 0;
                 break;
@@ -2178,7 +2185,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
              * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity 
              * for lists of tuples in the if-statement directly above. */
             PyObject* key = (keys_are_in_tuples ?
-                             PyTuple_GET_ITEM(lo.keys[i],0) :
+                             PyTuple_GET_ITEM(lo.keys[i], 0) :
                              lo.keys[i]);
 
             if (key->ob_type != key_type) {
@@ -2186,31 +2193,34 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
                 break;
             }
 
-            else if (key_type == &PyLong_Type && ints_are_bounded &&
-                     Py_ABS(Py_SIZE(key)) > 1)
-                ints_are_bounded = 0;
-
-            else if (key_type == &PyUnicode_Type && strings_are_latin &&
-                     PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND)
+            if (key_type == &PyLong_Type) {
+                if (ints_are_bounded && Py_ABS(Py_SIZE(key)) > 1)
+                    ints_are_bounded = 0;
+            }
+            else if (key_type == &PyUnicode_Type){
+                if (strings_are_latin &&
+                    PyUnicode_KIND(key) != PyUnicode_1BYTE_KIND)
                 strings_are_latin = 0;
+            }
         }
 
         /* Choose the best compare, given what we now know about the keys. */
         if (keys_are_all_same_type) {
 
-            if (key_type == &PyUnicode_Type && strings_are_latin)
+            if (key_type == &PyUnicode_Type && strings_are_latin) {
                 ms.key_compare = unsafe_latin_compare;
-
-            else if (key_type == &PyLong_Type && ints_are_bounded)
+            }
+            else if (key_type == &PyLong_Type && ints_are_bounded) {
                 ms.key_compare = unsafe_long_compare;
-
-            else if (key_type == &PyFloat_Type)
+            }
+            else if (key_type == &PyFloat_Type) {
                 ms.key_compare = unsafe_float_compare;
-
-            else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL)
+            }
+            else if ((ms.key_richcompare = key_type->tp_richcompare) != NULL) {
                 ms.key_compare = unsafe_object_compare;
-
-        } else {
+            }
+        }
+        else {
             ms.key_compare = safe_object_compare;
         }
 

From dba3f27f2367fb47aec13fcdee86e77b416bbe2f Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <elliot.gorokhovsky@gmail.com>
Date: Wed, 15 Mar 2017 12:00:20 -0600
Subject: [PATCH 41/47] Removed braces at the end of unsafe_tuple_compare

---
 Objects/listobject.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 4aa4849c940176..977369e967c58d 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2076,11 +2076,10 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
     if (i >= vlen || i >= wlen)
         return vlen < wlen;
 
-    if (i == 0) {
+    if (i == 0) 
         return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
-    } else {
+    else
 	return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
-    }
 }
 
 /* An adaptive, stable, natural mergesort.  See listsort.txt.

From c796422f7d9e6726e445bd97a00aec0d89cc2654 Mon Sep 17 00:00:00 2001
From: embg <elliot.gorokhovsky@gmail.com>
Date: Wed, 15 Mar 2017 20:14:09 -0700
Subject: [PATCH 42/47] Fixed test_safe_object_compare

Comparing bytes and strings yields a warning, not an error, so assertRaises fails.
---
 Lib/test/test_sort.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py
index 8c498aed4cee9b..18a62317dd69cb 100644
--- a/Lib/test/test_sort.py
+++ b/Lib/test/test_sort.py
@@ -292,7 +292,6 @@ class TestOptimizedCompares(unittest.TestCase):
     def test_safe_object_compare(self):
         heterogeneous_lists = [[0, 'foo'],
                                [0.0, 'foo'],
-                               ['foo', b'foo'],
                                [('foo',), 'foo']]
         for L in heterogeneous_lists:
             self.assertRaises(TypeError, L.sort)

From fa19903f3f3c799ebfb5e47e48d32d9c35753475 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 28 Jan 2018 14:49:34 -0800
Subject: [PATCH 43/47] Fix spacing around PyObject *

---
 Objects/listobject.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Objects/listobject.c b/Objects/listobject.c
index 977369e967c58d..1e469f54e9742d 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1101,17 +1101,17 @@ struct s_MergeState {
     /* This is the function we will use to compare two keys,
      * even when none of our special cases apply and we have to use
      * safe_object_compare. */
-    int (*key_compare)(PyObject*, PyObject*, MergeState*);
+    int (*key_compare)(PyObject *, PyObject *, MergeState *);
 
     /* This function is used by unsafe_object_compare to optimize comparisons
      * when we know our list is type-homogeneous but we can't assume anything else.
      * In the pre-sort check it is set equal to key->ob_type->tp_richcompare */
-    PyObject* (*key_richcompare)(PyObject*, PyObject*, int);
+    PyObject *(*key_richcompare)(PyObject *, PyObject *, int);
 
     /* This function is used by unsafe_tuple_compare to compare the first elements
      * of tuples. It may be set to safe_object_compare, but the idea is that hopefully
      * we can assume more, and use one of the special-case compares. */
-    int (*tuple_elem_compare)(PyObject*, PyObject*, MergeState*);
+    int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *);
 };
 
 /* binarysort is the best method for sorting small arrays: it does
@@ -1481,11 +1481,11 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
      * we don't care what's in the block.
      */
     merge_freemem(ms);
-    if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject*) / multiplier) {
+    if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject *) / multiplier) {
         PyErr_NoMemory();
         return -1;
     }
-    ms->a.keys = (PyObject**)PyMem_Malloc(multiplier * need
+    ms->a.keys = (PyObject **)PyMem_Malloc(multiplier * need
                                           * sizeof(PyObject *));
     if (ms->a.keys != NULL) {
         ms->alloced = need;
@@ -1942,7 +1942,7 @@ safe_object_compare(PyObject *v, PyObject *w, MergeState *ms)
 static int
 unsafe_object_compare(PyObject *v, PyObject *w, MergeState *ms)
 {
-    PyObject* res_obj; int res;
+    PyObject *res_obj; int res;
 
     /* No assumptions, because we check first: */
     if (v->ob_type->tp_richcompare != ms->key_richcompare)
@@ -2183,7 +2183,7 @@ listsort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
             /* Note: for lists of tuples, key is the first element of the tuple
              * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity 
              * for lists of tuples in the if-statement directly above. */
-            PyObject* key = (keys_are_in_tuples ?
+            PyObject *key = (keys_are_in_tuples ?
                              PyTuple_GET_ITEM(lo.keys[i], 0) :
                              lo.keys[i]);
 
@@ -2594,7 +2594,7 @@ list_sizeof(PyListObject *self)
 }
 
 static PyObject *list_iter(PyObject *seq);
-static PyObject *list_reversed(PyListObject* seq, PyObject* unused);
+static PyObject *list_reversed(PyListObject *seq, PyObject *unused);
 
 PyDoc_STRVAR(getitem_doc,
 "x.__getitem__(y) <==> x[y]");

From e4679e2847f94fe92f963587a0d3cd56bef49613 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 28 Jan 2018 15:10:05 -0800
Subject: [PATCH 44/47] Add news blurb

---
 .../Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst  | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst

diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst b/Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst
new file mode 100644
index 00000000000000..ccc3c0857bc089
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-01-28-15-09-33.bpo-28685.cHThLM.rst	
@@ -0,0 +1,2 @@
+Optimize list.sort() and sorted() by using type specialized comparisons when
+possible.

From 3b3ce5280411d81bf18218b89893b9095508bb2a Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 28 Jan 2018 17:31:32 -0800
Subject: [PATCH 45/47] Update listsort.txt for the optimization

---
 Objects/listsort.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Objects/listsort.txt b/Objects/listsort.txt
index 17d27973f82676..8c877515c72e88 100644
--- a/Objects/listsort.txt
+++ b/Objects/listsort.txt
@@ -753,3 +753,11 @@ example, with the region of uncertainty B[4], B[5], B[6], there are 4
 locations:  before B[4], between B[4] and B[5], between B[5] and B[6], and
 after B[6].  In general, across 2**(k-1)-1 elements, there are 2**(k-1)
 locations.  That's why k-1 binary searches are necessary and sufficient.
+
+OPTIMIZATION OF INDIVIDUAL COMPARISONS
+As noted above, even the simplest Python comparison triggers a large pile of
+C-level pointer dereferences, conditionals, and function calls.  This can be
+partially mitigated by pre-scanning the data to determine whether the data is
+homogenous with respect to type.  If so, it is sometimes possible to
+substitute faster type-specific comparisons for the slower, generic
+PyObject_RichCompareBool.

From afed812b8672f238e9385f1ac3ead19a89c6fc59 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 28 Jan 2018 17:45:05 -0800
Subject: [PATCH 46/47] Fix whitespace

---
 Lib/test/test_sort.py | 18 +++++++++---------
 Objects/listobject.c  | 42 +++++++++++++++++++++---------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py
index 18a62317dd69cb..15fc1dd72a5f65 100644
--- a/Lib/test/test_sort.py
+++ b/Lib/test/test_sort.py
@@ -275,7 +275,7 @@ def check_against_PyObject_RichCompareBool(self, L):
     ##                        1. L
     ##                        2. [(x,) for x in L]
     ##                        3. [((x,),) for x in L]
-    
+
     random.seed(0)
     random.shuffle(L)
     L_1 = L[:]
@@ -287,7 +287,7 @@ def check_against_PyObject_RichCompareBool(self, L):
         for (opt, ref) in zip(optimized, reference):
             self.assertIs(opt, ref)
             #note: not assertEqual! We want to ensure *identical* behavior.
-                         
+
 class TestOptimizedCompares(unittest.TestCase):
     def test_safe_object_compare(self):
         heterogeneous_lists = [[0, 'foo'],
@@ -304,9 +304,9 @@ def test_safe_object_compare(self):
                            [1.1,1<<70]]
         for L in float_int_lists:
             check_against_PyObject_RichCompareBool(self, L)
-      
+
     def test_unsafe_object_compare(self):
-        
+
         # This test is by ppperry. It ensures that unsafe_object_compare is
         # verifying ms->key_richcompare == tp->richcompare before comparing.
         class WackyComparator(int):
@@ -318,13 +318,13 @@ class WackyList1(list):pass
         class WackyList2(list):
             def __lt__(self, other):
                 raise ValueError
-                    
+
         L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
         elem = L[-1]
         self.assertRaises(ValueError, L.sort)
         self.assertRaises(ValueError, [(x,) for x in L].sort)
 
-        # The following test is also by ppperry. It ensures that 
+        # The following test is also by ppperry. It ensures that
         # unsafe_object_compare handles Py_NotImplemented appropriately.
         class PointlessComparator:
             def __lt__(self, other):
@@ -332,7 +332,7 @@ def __lt__(self, other):
         L = [PointlessComparator(), PointlessComparator()]
         self.assertRaises(TypeError, L.sort)
         self.assertRaises(TypeError, [(x,) for x in L].sort)
-        
+
         # The following tests go through various types that would trigger
         # ms->key_compare = unsafe_object_compare
         lists = [list(range(100)) + [(1<<70)],
@@ -341,7 +341,7 @@ def __lt__(self, other):
                  [cmp_to_key(lambda x,y: x<y)(x) for x in range(100)]]
         for L in lists:
             check_against_PyObject_RichCompareBool(self, L)
-            
+
     def test_unsafe_latin_compare(self):
         check_against_PyObject_RichCompareBool(self, [str(x) for
                                                       x in range(100)])
@@ -361,7 +361,7 @@ def test_unsafe_tuple_compare(self):
         #
         # Note that we don't have to put anything in tuples here, because
         # the check function does a tuple test automatically.
-        
+
         check_against_PyObject_RichCompareBool(self, [float('nan')]*100)
         check_against_PyObject_RichCompareBool(self, [float('nan') for
                                                       _ in range(100)])
diff --git a/Objects/listobject.c b/Objects/listobject.c
index b940b5ce6a47a4..9e321371aae97f 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1081,7 +1081,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
         slice->values += n;
 }
 
-/* Comparison function: ms->key_compare, which is set at run-time in 
+/* Comparison function: ms->key_compare, which is set at run-time in
  * listsort_impl to optimize for various special cases.
  * Returns -1 on error, 1 if x < y, 0 if x >= y.
  */
@@ -1968,10 +1968,10 @@ reverse_sortslice(sortslice *s, Py_ssize_t n)
         reverse_slice(s->values, &s->values[n]);
 }
 
-/* Here we define custom comparison functions to optimize for the cases one commonly 
+/* Here we define custom comparison functions to optimize for the cases one commonly
  * encounters in practice: homogeneous lists, often of one of the basic types. */
 
-/* This struct holds the comparison function and helper functions 
+/* This struct holds the comparison function and helper functions
  * selected in the pre-sort check. */
 
 /* These are the special case compare functions.
@@ -2000,7 +2000,7 @@ unsafe_object_compare(PyObject *v, PyObject *w, MergeState *ms)
 
     assert(ms->key_richcompare != NULL);
     res_obj = (*(ms->key_richcompare))(v, w, Py_LT);
-    
+
     if (res_obj == Py_NotImplemented) {
         Py_DECREF(res_obj);
         return PyObject_RichCompareBool(v, w, Py_LT);
@@ -2029,13 +2029,13 @@ static int
 unsafe_latin_compare(PyObject *v, PyObject *w, MergeState *ms)
 {
     int len, res;
-    
+
     /* Modified from Objects/unicodeobject.c:unicode_compare, assuming: */
-    assert(v->ob_type == w->ob_type); 
+    assert(v->ob_type == w->ob_type);
     assert(v->ob_type == &PyUnicode_Type);
     assert(PyUnicode_KIND(v) == PyUnicode_KIND(w));
     assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-    
+
     len = Py_MIN(PyUnicode_GET_LENGTH(v), PyUnicode_GET_LENGTH(w));
     res = memcmp(PyUnicode_DATA(v), PyUnicode_DATA(w), len);
 
@@ -2054,11 +2054,11 @@ unsafe_long_compare(PyObject *v, PyObject *w, MergeState *ms)
     PyLongObject *vl, *wl; sdigit v0, w0; int res;
 
     /* Modified from Objects/longobject.c:long_compare, assuming: */
-    assert(v->ob_type == w->ob_type); 
+    assert(v->ob_type == w->ob_type);
     assert(v->ob_type == &PyLong_Type);
     assert(Py_ABS(Py_SIZE(v)) <= 1);
     assert(Py_ABS(Py_SIZE(w)) <= 1);
-    
+
     vl = (PyLongObject*)v;
     wl = (PyLongObject*)w;
 
@@ -2082,23 +2082,23 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
     int res;
 
     /* Modified from Objects/floatobject.c:float_richcompare, assuming: */
-    assert(v->ob_type == w->ob_type); 
+    assert(v->ob_type == w->ob_type);
     assert(v->ob_type == &PyFloat_Type);
 
     res = PyFloat_AS_DOUBLE(v) < PyFloat_AS_DOUBLE(w);
     assert(res == PyObject_RichCompareBool(v, w, Py_LT));
-    return res; 
+    return res;
 }
 
-/* Tuple compare: compare *any* two tuples, using 
- * ms->tuple_elem_compare to compare the first elements, which is set 
+/* Tuple compare: compare *any* two tuples, using
+ * ms->tuple_elem_compare to compare the first elements, which is set
  * using the same pre-sort check as we use for ms->key_compare,
  * but run on the list [x[0] for x in L]. This allows us to optimize compares
- * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is 
+ * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
  * that most tuple compares don't involve x[1:]. */
 static int
 unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
-{   
+{
     PyTupleObject *vt, *wt;
     Py_ssize_t i, vlen, wlen;
     int k;
@@ -2111,7 +2111,7 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
-    
+
     vlen = Py_SIZE(vt);
     wlen = Py_SIZE(wt);
 
@@ -2126,10 +2126,10 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
     if (i >= vlen || i >= wlen)
         return vlen < wlen;
 
-    if (i == 0) 
+    if (i == 0)
         return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
     else
-	return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
+        return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
 }
 
 /* An adaptive, stable, natural mergesort.  See listsort.txt.
@@ -2214,8 +2214,8 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
 
 
     /* The pre-sort check: here's where we decide which compare function to use.
-     * How much optimization is safe? We test for homogeneity with respect to 
-     * several properties that are expensive to check at compare-time, and 
+     * How much optimization is safe? We test for homogeneity with respect to
+     * several properties that are expensive to check at compare-time, and
      * set ms appropriately. */
     if (saved_ob_size > 1) {
         /* Assume the first element is representative of the whole list. */
@@ -2242,7 +2242,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
             }
 
             /* Note: for lists of tuples, key is the first element of the tuple
-             * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity 
+             * lo.keys[i], not lo.keys[i] itself! We verify type-homogeneity
              * for lists of tuples in the if-statement directly above. */
             PyObject *key = (keys_are_in_tuples ?
                              PyTuple_GET_ITEM(lo.keys[i], 0) :

From ebb4c1f520a85be2ae01c63caea0d20fcc42c724 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 28 Jan 2018 17:59:16 -0800
Subject: [PATCH 47/47] Clean-up and fix tests for mutation of __class__.

* Use the with-statement form of self.assertRaises.

* Make the two assertions independent of one another.
  The second test was invalid because the first list
  was already sorted, making the ValueError inevitable.
---
 Lib/test/test_sort.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py
index 15fc1dd72a5f65..f2f53cb1a72f63 100644
--- a/Lib/test/test_sort.py
+++ b/Lib/test/test_sort.py
@@ -309,20 +309,28 @@ def test_unsafe_object_compare(self):
 
         # This test is by ppperry. It ensures that unsafe_object_compare is
         # verifying ms->key_richcompare == tp->richcompare before comparing.
+
         class WackyComparator(int):
             def __lt__(self, other):
                 elem.__class__ = WackyList2
                 return int.__lt__(self, other)
 
-        class WackyList1(list):pass
+        class WackyList1(list):
+            pass
+
         class WackyList2(list):
             def __lt__(self, other):
                 raise ValueError
 
         L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
         elem = L[-1]
-        self.assertRaises(ValueError, L.sort)
-        self.assertRaises(ValueError, [(x,) for x in L].sort)
+        with self.assertRaises(ValueError):
+            L.sort()
+
+        L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
+        elem = L[-1]
+        with self.assertRaises(ValueError):
+            [(x,) for x in L].sort()
 
         # The following test is also by ppperry. It ensures that
         # unsafe_object_compare handles Py_NotImplemented appropriately.