scikit-learn · rth · May 10, 2020 · Sep 18, 2019 · Sep 18, 2019 · Oct 9, 2019
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
@@ -233,6 +233,14 @@ class that has the highest probability, and can thus be different
         check_is_fitted(self)
         return self.classes_[np.argmax(self.predict_proba(X), axis=1)]
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class _CalibratedClassifier:
     """Probability calibration with isotonic regression or sigmoid.

diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
@@ -1205,6 +1205,14 @@ def score(self, X, y=None, sample_weight=None):
         return -_labels_inertia(X, sample_weight, x_squared_norms,
                                 self.cluster_centers_)[1]
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
                      old_center_buffer, compute_squared_diff,
@@ -1859,3 +1867,11 @@ def predict(self, X, sample_weight=None):
 
         X = self._check_test_data(X)
         return self._labels_inertia_minibatch(X, sample_weight)[0]
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
@@ -469,6 +469,14 @@ def _compute_score_samples(self, X, subsample_features):
         )
         return scores
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 def _average_path_length(n_samples_leaf):
     """

diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
@@ -2084,3 +2084,11 @@ def score(self, X, y, sample_weight=None):
         scoring = get_scorer(scoring)
 
         return scoring(self, X, y, sample_weight=sample_weight)
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
@@ -493,3 +493,11 @@ def score(self, X, y):
         check_is_fitted(self)
 
         return self.estimator_.score(X, y)
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
@@ -1905,3 +1905,11 @@ def fit(self, X, y, sample_weight=None):
     @property
     def classes_(self):
         return self._label_binarizer.classes_
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -1073,6 +1073,14 @@ def predict_log_proba(self):
     def _predict_log_proba(self, X):
         return np.log(self.predict_proba(X))
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class BaseSGDRegressor(RegressorMixin, BaseSGD):
 
@@ -1537,3 +1545,11 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
             validation_fraction=validation_fraction,
             n_iter_no_change=n_iter_no_change, warm_start=warm_start,
             average=average)
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
@@ -268,3 +268,11 @@ def sample(self, n_samples=1, random_state=None):
             correction = (gammainc(0.5 * dim, 0.5 * s_sq) ** (1. / dim)
                           * self.bandwidth / np.sqrt(s_sq))
             return data[i] + X * correction[:, np.newaxis]
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'sample_weight must have positive values',
+            }
+        }
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
@@ -237,6 +237,14 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class LinearSVR(RegressorMixin, LinearModel):
     """Linear Support Vector Regression.
@@ -408,6 +416,14 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class SVC(BaseSVC):
     """C-Support Vector Classification.
@@ -625,6 +641,14 @@ def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='scale',
             break_ties=break_ties,
             random_state=random_state)
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class NuSVC(BaseSVC):
     """Nu-Support Vector Classification.
@@ -833,7 +857,9 @@ def _more_tags(self):
             '_xfail_test': {
                 'check_methods_subset_invariance':
                 'fails for the decision_function method',
-                'check_class_weight_classifiers': 'class_weight is ignored.'
+                'check_class_weight_classifiers': 'class_weight is ignored.',
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
             }
         }
 
@@ -986,6 +1012,14 @@ def probA_(self):
     def probB_(self):
         return self._probB
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class NuSVR(RegressorMixin, BaseLibSVM):
     """Nu Support Vector Regression.
@@ -1111,6 +1145,14 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3,
             probability=False, cache_size=cache_size, class_weight=None,
             verbose=verbose, max_iter=max_iter, random_state=None)
 
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
+
 
 class OneClassSVM(OutlierMixin, BaseLibSVM):
     """Unsupervised Outlier Detection.
@@ -1319,3 +1361,11 @@ def probA_(self):
     @property
     def probB_(self):
         return self._probB
+
+    def _more_tags(self):
+        return {
+            '_xfail_test': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -81,7 +81,8 @@ def _yield_checks(name, estimator):
     yield check_sample_weights_not_an_array
     yield check_sample_weights_list
     yield check_sample_weights_shape
-    yield check_sample_weights_invariance
+    yield partial(check_sample_weights_invariance, kind='ones')
+    yield partial(check_sample_weights_invariance, kind='zeros')
     yield check_estimators_fit_returns_self
     yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
 
@@ -443,6 +444,7 @@ def check_estimator(Estimator, generate_only=False):
     if isinstance(Estimator, type):
         # got a class
         checks_generator = _generate_class_checks(Estimator)
+        estimator = _construct_instance(Estimator)
     else:
         # got an instance
         estimator = Estimator
@@ -452,12 +454,19 @@ def check_estimator(Estimator, generate_only=False):
     if generate_only:
         return checks_generator
 
+    xfail_checks = _safe_tags(estimator, '_xfail_test')
+
     for estimator, check in checks_generator:
+        check_name = _set_check_estimator_ids(check)
+        if xfail_checks and check_name in xfail_checks:
+            # skip tests marked as a known failure and raise a warning
+            msg = xfail_checks[check_name]
+            warnings.warn(f'Skipping {check_name}: {msg}', SkipTestWarning)
+            continue
         try:
             check(estimator)
         except SkipTest as exception:
-            # the only SkipTest thrown currently results from not
-            # being able to import pandas.
+            # raise warning for tests that are are skipped
             warnings.warn(str(exception), SkipTestWarning)
 
 
@@ -816,7 +825,7 @@ def check_sample_weights_shape(name, estimator_orig):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_sample_weights_invariance(name, estimator_orig):
+def check_sample_weights_invariance(name, estimator_orig, kind="ones"):
     # check that the estimators yield same results for
     # unit weights and no weights
     if (has_fit_parameter(estimator_orig, "sample_weight") and
@@ -832,25 +841,45 @@ def check_sample_weights_invariance(name, estimator_orig):
         X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
                       [2, 1], [2, 1], [2, 1], [2, 1],
                       [3, 3], [3, 3], [3, 3], [3, 3],
-                      [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float'))
+                      [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.float64)
         y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
-                      1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('int'))
+                      1, 1, 1, 1, 2, 2, 2, 2], dtype=np.int)
+
+        if kind == 'ones':
+            X2 = X
+            y2 = y
+            sw2 = np.ones(shape=len(y))
+            err_msg = (f"For {name} sample_weight=None is not equivalent to "
+                       f"sample_weight=ones")
+        elif kind == 'zeros':
+            # Construct a dataset that is very different to (X, y) if weights
+            # are disregarded, but identical to (X, y) given weights.
+            X2 = np.vstack([X, X + 1])
+            y2 = np.hstack([y, 3 - y])
+            sw2 = np.ones(shape=len(y) * 2)
+            sw2[len(y):] = 0
+            X2, y2, sw2 = shuffle(X2, y2, sw2, random_state=0)
+
+            err_msg = (f"For {name} sample_weight is not equivalent "
+                       f"to removing samples")
+        else:
+            raise ValueError
+
         y = _enforce_estimator_tags_y(estimator1, y)
+        y2 = _enforce_estimator_tags_y(estimator2, y2)
 
-        estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)))
-        estimator2.fit(X, y=y, sample_weight=None)
+        estimator1.fit(X, y=y, sample_weight=None)
+        estimator2.fit(X2, y=y2, sample_weight=sw2)
 
-        for method in ["predict", "transform"]:
+        for method in ["predict", "predict_proba",
+                       "decision_function", "transform"]:
             if hasattr(estimator_orig, method):
                 X_pred1 = getattr(estimator1, method)(X)
                 X_pred2 = getattr(estimator2, method)(X)
                 if sparse.issparse(X_pred1):
                     X_pred1 = X_pred1.toarray()
                     X_pred2 = X_pred2.toarray()
-                assert_allclose(X_pred1, X_pred2,
-                                err_msg="For %s sample_weight=None is not"
-                                        " equivalent to sample_weight=ones"
-                                        % name)
+                assert_allclose(X_pred1, X_pred2, err_msg=err_msg)
 
 
 @ignore_warnings(category=(FutureWarning, UserWarning))