scikit-learn · rth · Aug 6, 2020 · May 16, 2020 · May 19, 2020 · May 19, 2020
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
@@ -371,7 +371,7 @@ class that has the highest probability, and can thus be different
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }

diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
@@ -1163,7 +1163,7 @@ def score(self, X, y=None, sample_weight=None):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -1889,7 +1889,7 @@ def predict(self, X, sample_weight=None):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
@@ -457,7 +457,7 @@ def _compute_score_samples(self, X, subsample_features):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }

diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
@@ -2090,7 +2090,7 @@ def score(self, X, y, sample_weight=None):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
@@ -506,7 +506,7 @@ def score(self, X, y):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
@@ -1913,7 +1913,7 @@ def classes_(self):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -1098,7 +1098,7 @@ def _predict_log_proba(self, X):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -1588,7 +1588,7 @@ def __init__(self, loss="squared_loss", *, penalty="l2", alpha=0.0001,
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
@@ -284,7 +284,7 @@ def sample(self, n_samples=1, random_state=None):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'sample_weight must have positive values',
             }
         }
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
@@ -248,7 +248,7 @@ def fit(self, X, y, sample_weight=None):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -436,7 +436,7 @@ def fit(self, X, y, sample_weight=None):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -670,7 +670,7 @@ def __init__(self, *, C=1.0, kernel='rbf', degree=3, gamma='scale',
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -895,7 +895,7 @@ def _more_tags(self):
                 'check_methods_subset_invariance':
                 'fails for the decision_function method',
                 'check_class_weight_classifiers': 'class_weight is ignored.',
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -1072,7 +1072,7 @@ def probB_(self):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -1226,7 +1226,7 @@ def __init__(self, *, nu=0.5, C=1.0, kernel='rbf', degree=3,
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
@@ -1459,7 +1459,7 @@ def probB_(self):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -15,24 +15,27 @@
 from functools import partial
 
 import pytest
-
+import numpy as np
 
 from sklearn.utils import all_estimators
 from sklearn.utils._testing import ignore_warnings
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning, SkipTestWarning
 from sklearn.utils.estimator_checks import check_estimator
 
 import sklearn
 from sklearn.base import BiclusterMixin
 
+from sklearn.decomposition import NMF
+from sklearn.utils.validation import check_non_negative, check_array
 from sklearn.linear_model._base import LinearClassifierMixin
 from sklearn.linear_model import LogisticRegression
+from sklearn.svm import NuSVC
 from sklearn.utils import IS_PYPY
 from sklearn.utils._testing import SkipTest
 from sklearn.utils.estimator_checks import (
     _construct_instance,
     _set_checking_parameters,
-    _set_check_estimator_ids,
+    _get_check_estimator_ids,
     check_class_weight_balanced_linear_classifier,
     parametrize_with_checks)
 
@@ -59,8 +62,8 @@ def _sample_func(x, y=1):
      "LogisticRegression(class_weight='balanced',random_state=1,"
      "solver='newton-cg',warm_start=True)")
 ])
-def test_set_check_estimator_ids(val, expected):
-    assert _set_check_estimator_ids(val) == expected
+def test_get_check_estimator_ids(val, expected):
+    assert _get_check_estimator_ids(val) == expected
 
 
 def _tested_estimators():
@@ -204,3 +207,64 @@ def test_class_support_removed():
 
     with pytest.raises(TypeError, match=msg):
         parametrize_with_checks([LogisticRegression])
+
+
+class MyNMFWithBadErrorMessage(NMF):
+    # Same as NMF but raises an uninformative error message if X has negative
+    # value. This estimator would fail the check suite in strict mode,
+    # specifically it would fail check_fit_non_negative
+    def fit(self, X, y=None, **params):
+        X = check_array(X, accept_sparse=('csr', 'csc'),
+                        dtype=[np.float64, np.float32])
+        try:
+            check_non_negative(X, whom='')
+        except ValueError:
+            raise ValueError("Some non-informative error msg")
+
+        return super().fit(X, y, **params)
+
+
+def test_strict_mode_check_estimator():
+    # Tests various conditions for the strict mode of check_estimator()
+    # Details are in the comments
+
+    # LogisticRegression has no _xfail_checks, so when strict_mode is on, there
+    # should be no skipped tests.
+    with pytest.warns(None) as catched_warnings:
+        check_estimator(LogisticRegression(), strict_mode=True)
+    assert not any(isinstance(w, SkipTestWarning) for w in catched_warnings)
+    # When strict mode is off, check_n_features should be skipped because it's
+    # a fully strict check
+    msg_check_n_features_in = 'check_n_features_in is fully strict '
+    with pytest.warns(SkipTestWarning, match=msg_check_n_features_in):
+        check_estimator(LogisticRegression(), strict_mode=False)
+
+    # NuSVC has some _xfail_checks. They should be skipped regardless of
+    # strict_mode
+    with pytest.warns(SkipTestWarning,
+                      match='fails for the decision_function method'):
+        check_estimator(NuSVC(), strict_mode=True)
+    # When strict mode is off, check_n_features_in is skipped along with the
+    # rest of the xfail_checks
+    with pytest.warns(SkipTestWarning, match=msg_check_n_features_in):
+        check_estimator(NuSVC(), strict_mode=False)
+
+    # MyNMF will fail check_fit_non_negative() in strict mode because it yields
+    # a bad error message
+    with pytest.raises(AssertionError, match='does not match'):
+        check_estimator(MyNMFWithBadErrorMessage(), strict_mode=True)
+    # However, it should pass the test suite in non-strict mode because when
+    # strict mode is off, check_fit_non_negative() will not check the exact
+    # error messsage. (We still assert that the warning from
+    # check_n_features_in is raised)
+    with pytest.warns(SkipTestWarning, match=msg_check_n_features_in):
+        check_estimator(MyNMFWithBadErrorMessage(), strict_mode=False)
+
+
+@parametrize_with_checks([LogisticRegression(),
+                          NuSVC(),
+                          MyNMFWithBadErrorMessage()],
+                         strict_mode=False)
+def test_strict_mode_parametrize_with_checks(estimator, check):
+    # Ideally we should assert that the strict checks are Xfailed...
+    check(estimator)