From 5f6c6149e36bc36f5ed86c55b27c0b7e6136099e Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Mon, 16 Jul 2018 06:17:25 -0400
Subject: [PATCH 01/11] test for None and ones for sample_weight added

---
 sklearn/utils/estimator_checks.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d25abbe6377db..bfcf58737f75a 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -88,6 +88,7 @@ def _yield_non_meta_checks(name, estimator):
     yield check_dtype_object
     yield check_sample_weights_pandas_series
     yield check_sample_weights_list
+    yield check_sample_weight_invariance
     yield check_estimators_fit_returns_self
     yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
     yield check_complex_data
@@ -553,6 +554,24 @@ def check_sample_weights_list(name, estimator_orig):
         estimator.fit(X, y, sample_weight=sample_weight)
 
 
+@ignore_warnings(category=(DeprecationWarning, FutureWarning))
+def check_sample_weight_invariance(name, estimator_orig):
+    if has_fit_parameter(estimator_orig, "sample_weight"):
+        estimator1 = clone(estimator_orig)
+        estimator2 = clone(estimator_orig)
+        X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
+                      [2, 1], [2, 2], [2, 3], [2, 4]])
+        y = np.array([1, 1, 1, 1, 2, 2, 2, 2])
+        estimator1.fit(X, y, sample_weight=None)
+        estimator2.fit(X, y, sample_weight=np.ones(shape=len(y)))
+        X_pred1 = estimator1.predict(X)
+        X_pred2 = estimator2.predict(X)
+        try:
+            assert_array_equal(X_pred1, X_pred2)
+        except ValueError:
+            raise ValueError("For %s sample_weight=None is not equivalent to "
+                             "sample_weight=ones" % name)
+
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
 def check_dtype_object(name, estimator_orig):
     # check that estimators treat dtype object as numeric if possible

From e47a059306b7b1d53af8e30172ed2b252a786f15 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Mon, 16 Jul 2018 06:17:25 -0400
Subject: [PATCH 02/11] test for None and ones for sample_weight added

---
 sklearn/utils/estimator_checks.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d25abbe6377db..bfcf58737f75a 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -88,6 +88,7 @@ def _yield_non_meta_checks(name, estimator):
     yield check_dtype_object
     yield check_sample_weights_pandas_series
     yield check_sample_weights_list
+    yield check_sample_weight_invariance
     yield check_estimators_fit_returns_self
     yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
     yield check_complex_data
@@ -553,6 +554,24 @@ def check_sample_weights_list(name, estimator_orig):
         estimator.fit(X, y, sample_weight=sample_weight)
 
 
+@ignore_warnings(category=(DeprecationWarning, FutureWarning))
+def check_sample_weight_invariance(name, estimator_orig):
+    if has_fit_parameter(estimator_orig, "sample_weight"):
+        estimator1 = clone(estimator_orig)
+        estimator2 = clone(estimator_orig)
+        X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
+                      [2, 1], [2, 2], [2, 3], [2, 4]])
+        y = np.array([1, 1, 1, 1, 2, 2, 2, 2])
+        estimator1.fit(X, y, sample_weight=None)
+        estimator2.fit(X, y, sample_weight=np.ones(shape=len(y)))
+        X_pred1 = estimator1.predict(X)
+        X_pred2 = estimator2.predict(X)
+        try:
+            assert_array_equal(X_pred1, X_pred2)
+        except ValueError:
+            raise ValueError("For %s sample_weight=None is not equivalent to "
+                             "sample_weight=ones" % name)
+
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
 def check_dtype_object(name, estimator_orig):
     # check that estimators treat dtype object as numeric if possible

From 688be6af17eda17f54b3193c771d921dcc989412 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Mon, 16 Jul 2018 10:47:22 -0400
Subject: [PATCH 03/11] skip KMeans based estimators

---
 sklearn/utils/estimator_checks.py | 46 ++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index bfcf58737f75a..d54532c6644fc 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -556,21 +556,41 @@ def check_sample_weights_list(name, estimator_orig):
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_sample_weight_invariance(name, estimator_orig):
-    if has_fit_parameter(estimator_orig, "sample_weight"):
+    if (has_fit_parameter(estimator_orig, "sample_weight") and
+                    name not in ["KMeans", "MiniBatchKMeans"]):
         estimator1 = clone(estimator_orig)
         estimator2 = clone(estimator_orig)
-        X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
-                      [2, 1], [2, 2], [2, 3], [2, 4]])
-        y = np.array([1, 1, 1, 1, 2, 2, 2, 2])
-        estimator1.fit(X, y, sample_weight=None)
-        estimator2.fit(X, y, sample_weight=np.ones(shape=len(y)))
-        X_pred1 = estimator1.predict(X)
-        X_pred2 = estimator2.predict(X)
-        try:
-            assert_array_equal(X_pred1, X_pred2)
-        except ValueError:
-            raise ValueError("For %s sample_weight=None is not equivalent to "
-                             "sample_weight=ones" % name)
+
+        X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
+                      [2, 1], [2, 1], [2, 1], [2, 1],
+                      [3, 3], [3, 3], [3, 3], [3, 3],
+                      [4, 1], [4, 1], [4, 1], [4, 1]])
+        y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
+                      1, 1, 1, 1, 2, 2, 2, 2])
+
+        if has_fit_parameter(estimator_orig, "random_state"):
+            estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)), random_state=0)
+            estimator2.fit(X, y=y, sample_weight=None, random_state=0)
+        else:
+            estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)))
+            estimator2.fit(X, y=y, sample_weight=None)
+
+        if hasattr(estimator_orig, "predict"):
+            X_pred1 = estimator1.predict(X)
+            X_pred2 = estimator2.predict(X)
+            try:
+                assert_allclose(X_pred1, X_pred2, rtol=0.5)
+            except ValueError:
+                raise ValueError("For %s sample_weight=None is not equivalent to "
+                                 "sample_weight=ones" % name)
+        if hasattr(estimator_orig, "transform"):
+            X_pred1 = estimator1.transform(X)
+            X_pred2 = estimator2.transform(X)
+            try:
+                assert_allclose(X_pred1, X_pred2, rtol=0.5)
+            except ValueError:
+                raise ValueError("For %s sample_weight=None is not equivalent to "
+                                 "sample_weight=ones" % name)
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
 def check_dtype_object(name, estimator_orig):

From 3533647e365ffbae6b9c8cfde67b4f5dcabd7a94 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Mon, 16 Jul 2018 10:59:22 -0400
Subject: [PATCH 04/11] cleaning

---
 sklearn/utils/estimator_checks.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5234e92323da8..744406f9a6829 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -556,7 +556,6 @@ def check_sample_weights_list(name, estimator_orig):
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_sample_weight_invariance(name, estimator_orig):
-<<<<<<< HEAD
     if (has_fit_parameter(estimator_orig, "sample_weight") and
                     name not in ["KMeans", "MiniBatchKMeans"]):
         estimator1 = clone(estimator_orig)
@@ -592,7 +591,7 @@ def check_sample_weight_invariance(name, estimator_orig):
             except ValueError:
                 raise ValueError("For %s sample_weight=None is not equivalent to "
                                  "sample_weight=ones" % name)
-            
+
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
 def check_dtype_object(name, estimator_orig):

From 0738abc8a4f682f998a6893d4eb0cd4fc6b48706 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Mon, 16 Jul 2018 11:38:32 -0400
Subject: [PATCH 05/11] dtype and X_trans

---
 sklearn/utils/estimator_checks.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 744406f9a6829..873fe91bace60 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -564,9 +564,9 @@ def check_sample_weight_invariance(name, estimator_orig):
         X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
                       [2, 1], [2, 1], [2, 1], [2, 1],
                       [3, 3], [3, 3], [3, 3], [3, 3],
-                      [4, 1], [4, 1], [4, 1], [4, 1]])
+                      [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float'))
         y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
-                      1, 1, 1, 1, 2, 2, 2, 2])
+                      1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('float'))
 
         if has_fit_parameter(estimator_orig, "random_state"):
             estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)), random_state=0)
@@ -584,10 +584,10 @@ def check_sample_weight_invariance(name, estimator_orig):
                 raise ValueError("For %s sample_weight=None is not equivalent to "
                                  "sample_weight=ones" % name)
         if hasattr(estimator_orig, "transform"):
-            X_pred1 = estimator1.transform(X)
-            X_pred2 = estimator2.transform(X)
+            X_trans1 = estimator1.transform(X)
+            X_trans2 = estimator2.transform(X)
             try:
-                assert_allclose(X_pred1, X_pred2, rtol=0.5)
+                assert_allclose(X_trans1, X_trans2, rtol=0.5)
             except ValueError:
                 raise ValueError("For %s sample_weight=None is not equivalent to "
                                  "sample_weight=ones" % name)

From 1d93961215937b45dfbdc07378578d068c643494 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Tue, 17 Jul 2018 02:12:52 -0400
Subject: [PATCH 06/11] after second reviews

---
 sklearn/utils/estimator_checks.py | 41 ++++++++++++-------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 873fe91bace60..d3ea1b5b866f4 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -556,41 +556,32 @@ def check_sample_weights_list(name, estimator_orig):
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_sample_weight_invariance(name, estimator_orig):
+    # check that the estimators yield same results for
+    # unit weights and no weights
     if (has_fit_parameter(estimator_orig, "sample_weight") and
                     name not in ["KMeans", "MiniBatchKMeans"]):
         estimator1 = clone(estimator_orig)
         estimator2 = clone(estimator_orig)
+        set_random_state(estimator1, random_state=42)
+        set_random_state(estimator2, random_state=42)
 
         X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
                       [2, 1], [2, 1], [2, 1], [2, 1],
                       [3, 3], [3, 3], [3, 3], [3, 3],
                       [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float'))
         y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
-                      1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('float'))
-
-        if has_fit_parameter(estimator_orig, "random_state"):
-            estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)), random_state=0)
-            estimator2.fit(X, y=y, sample_weight=None, random_state=0)
-        else:
-            estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)))
-            estimator2.fit(X, y=y, sample_weight=None)
-
-        if hasattr(estimator_orig, "predict"):
-            X_pred1 = estimator1.predict(X)
-            X_pred2 = estimator2.predict(X)
-            try:
-                assert_allclose(X_pred1, X_pred2, rtol=0.5)
-            except ValueError:
-                raise ValueError("For %s sample_weight=None is not equivalent to "
-                                 "sample_weight=ones" % name)
-        if hasattr(estimator_orig, "transform"):
-            X_trans1 = estimator1.transform(X)
-            X_trans2 = estimator2.transform(X)
-            try:
-                assert_allclose(X_trans1, X_trans2, rtol=0.5)
-            except ValueError:
-                raise ValueError("For %s sample_weight=None is not equivalent to "
-                                 "sample_weight=ones" % name)
+                      1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('int'))
+
+        estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)))
+        estimator2.fit(X, y=y, sample_weight=None)
+
+        for method in ["predict", "transform"]:
+            if hasattr(estimator_orig, method):
+                X_pred1 = getattr(estimator1, method)(X)
+                X_pred2 = getattr(estimator2, method)(X)
+                assert_allclose(X_pred1, X_pred2, rtol=0.5,
+                                err_msg="For %s sample_weight=None is not equivalent to "
+                                "sample_weight=ones" % name)
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))

From 6b7f20a1d4793d29b9486aa87d4c0980733d008e Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Tue, 17 Jul 2018 03:38:22 -0400
Subject: [PATCH 07/11] pyflake errors

---
 sklearn/utils/estimator_checks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d3ea1b5b866f4..c003aff3996af 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -559,7 +559,7 @@ def check_sample_weight_invariance(name, estimator_orig):
     # check that the estimators yield same results for
     # unit weights and no weights
     if (has_fit_parameter(estimator_orig, "sample_weight") and
-                    name not in ["KMeans", "MiniBatchKMeans"]):
+            name not in ["KMeans", "MiniBatchKMeans"]):
         estimator1 = clone(estimator_orig)
         estimator2 = clone(estimator_orig)
         set_random_state(estimator1, random_state=42)
@@ -580,8 +580,8 @@ def check_sample_weight_invariance(name, estimator_orig):
                 X_pred1 = getattr(estimator1, method)(X)
                 X_pred2 = getattr(estimator2, method)(X)
                 assert_allclose(X_pred1, X_pred2, rtol=0.5,
-                                err_msg="For %s sample_weight=None is not equivalent to "
-                                "sample_weight=ones" % name)
+                                err_msg="For %s sample_weight=None is not equivalent"
+                                        " to sample_weight=ones" % name)
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))

From ad50ab98fa3d0bfab7f68478407dd4a1f09d4fa7 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Tue, 17 Jul 2018 07:03:03 -0400
Subject: [PATCH 08/11] fixed pairwise error

---
 sklearn/utils/estimator_checks.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 481b7c511bee0..3af29350067b8 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -88,7 +88,7 @@ def _yield_non_meta_checks(name, estimator):
     yield check_dtype_object
     yield check_sample_weights_pandas_series
     yield check_sample_weights_list
-    yield check_sample_weight_invariance
+    yield check_sample_weights_invariance
     yield check_estimators_fit_returns_self
     yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
     yield check_complex_data
@@ -556,10 +556,12 @@ def check_sample_weights_list(name, estimator_orig):
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
-def check_sample_weight_invariance(name, estimator_orig):
+def check_sample_weights_invariance(name, estimator_orig):
     # check that the estimators yield same results for
     # unit weights and no weights
     if (has_fit_parameter(estimator_orig, "sample_weight") and
+            not (hasattr(estimator_orig, "_pairwise")
+                 and estimator_orig._pairwise) and
             name not in ["KMeans", "MiniBatchKMeans"]):
         estimator1 = clone(estimator_orig)
         estimator2 = clone(estimator_orig)

From 033a84a123d197b36806eda161216f0d2fbd8f03 Mon Sep 17 00:00:00 2001
From: Gael Varoquaux <gael.varoquaux@normalesup.org>
Date: Tue, 17 Jul 2018 15:03:16 +0200
Subject: [PATCH 09/11] PEP8: line too long

---
 sklearn/utils/estimator_checks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3af29350067b8..6246d45974e45 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -583,8 +583,8 @@ def check_sample_weights_invariance(name, estimator_orig):
                 X_pred1 = getattr(estimator1, method)(X)
                 X_pred2 = getattr(estimator2, method)(X)
                 assert_allclose(X_pred1, X_pred2, rtol=0.5,
-                                err_msg="For %s sample_weight=None is not equivalent"
-                                        " to sample_weight=ones" % name)
+                    err_msg="For %s sample_weight=None is not equivalent"
+                            " to sample_weight=ones" % name)
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))

From ef0371c4aecda03cadb3a402c1975aa71d4dc6ac Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Tue, 17 Jul 2018 09:59:07 -0400
Subject: [PATCH 10/11] add comments and pep8 issues

---
 sklearn/utils/estimator_checks.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 6246d45974e45..00abf31187d0a 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -563,6 +563,8 @@ def check_sample_weights_invariance(name, estimator_orig):
             not (hasattr(estimator_orig, "_pairwise")
                  and estimator_orig._pairwise) and
             name not in ["KMeans", "MiniBatchKMeans"]):
+        # We skip pairwise because the data is not pairwise
+        # KMeans and MiniBatchKMeans were unstable; hence skipped.
         estimator1 = clone(estimator_orig)
         estimator2 = clone(estimator_orig)
         set_random_state(estimator1, random_state=42)
@@ -583,8 +585,9 @@ def check_sample_weights_invariance(name, estimator_orig):
                 X_pred1 = getattr(estimator1, method)(X)
                 X_pred2 = getattr(estimator2, method)(X)
                 assert_allclose(X_pred1, X_pred2, rtol=0.5,
-                    err_msg="For %s sample_weight=None is not equivalent"
-                            " to sample_weight=ones" % name)
+                                err_msg="For %s sample_weight=None is not"
+                                        " equivalent to sample_weight=ones"
+                                        % name)
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))

From 1ca44abb85e0915b28a85bd6ab57c7110d006e30 Mon Sep 17 00:00:00 2001
From: sergul <sergulaydore@gmail.com>
Date: Tue, 17 Jul 2018 10:20:31 -0400
Subject: [PATCH 11/11] KMeans methods are not skipped anymore

---
 sklearn/utils/estimator_checks.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 00abf31187d0a..1db6031e8d702 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -561,14 +561,13 @@ def check_sample_weights_invariance(name, estimator_orig):
     # unit weights and no weights
     if (has_fit_parameter(estimator_orig, "sample_weight") and
             not (hasattr(estimator_orig, "_pairwise")
-                 and estimator_orig._pairwise) and
-            name not in ["KMeans", "MiniBatchKMeans"]):
+                 and estimator_orig._pairwise)):
         # We skip pairwise because the data is not pairwise
-        # KMeans and MiniBatchKMeans were unstable; hence skipped.
+
         estimator1 = clone(estimator_orig)
         estimator2 = clone(estimator_orig)
-        set_random_state(estimator1, random_state=42)
-        set_random_state(estimator2, random_state=42)
+        set_random_state(estimator1, random_state=0)
+        set_random_state(estimator2, random_state=0)
 
         X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
                       [2, 1], [2, 1], [2, 1], [2, 1],