From 4f5097520c95ed3089c45397e1def893e09a61b2 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 28 Sep 2020 18:50:24 -0400
Subject: [PATCH 01/21] WIP

---
 sklearn/utils/estimator_checks.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5b99e8e56c420..257da7dad4ada 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -760,6 +760,9 @@ def _generate_sparse_matrix(X_csr):
 
 
 def check_estimator_sparse_data(name, estimator_orig, strict_mode=True):
+    # Make sure that the estimator either accepts sparse data in fit and
+    # predict, or that it fails with a helpful error message.
+    # XXX this is a non-API check
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
@@ -817,6 +820,7 @@ def check_estimator_sparse_data(name, estimator_orig, strict_mode=True):
 def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True):
     # check that estimators will accept a 'sample_weight' parameter of
     # type pandas.Series in the 'fit' function.
+    # XXX pure API check
     estimator = clone(estimator_orig)
     if has_fit_parameter(estimator, "sample_weight"):
         try:
@@ -844,6 +848,7 @@ def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True):
 def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True):
     # check that estimators will accept a 'sample_weight' parameter of
     # type _NotAnArray in the 'fit' function.
+    # XXX pure API check
     estimator = clone(estimator_orig)
     if has_fit_parameter(estimator, "sample_weight"):
         X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
@@ -861,6 +866,7 @@ def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True):
 def check_sample_weights_list(name, estimator_orig, strict_mode=True):
     # check that estimators will accept a 'sample_weight' parameter of
     # type list in the 'fit' function.
+    # XXX: pure API check
     if has_fit_parameter(estimator_orig, "sample_weight"):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
@@ -878,6 +884,7 @@ def check_sample_weights_list(name, estimator_orig, strict_mode=True):
 def check_sample_weights_shape(name, estimator_orig, strict_mode=True):
     # check that estimators raise an error if sample_weight
     # shape mismatches the input
+    # XXX: pure API check?????? Are error checks API checks?????
     if (has_fit_parameter(estimator_orig, "sample_weight") and
             not (hasattr(estimator_orig, "_pairwise")
                  and estimator_orig._pairwise)):
@@ -906,6 +913,7 @@ def check_sample_weights_invariance(name, estimator_orig, kind="ones",
     # unit weights and no weights
     # For kind="zeros" check that setting sample_weight to 0 is equivalent
     # to removing corresponding samples.
+    # XXX: non-API check
     estimator1 = clone(estimator_orig)
     estimator2 = clone(estimator_orig)
     set_random_state(estimator1, random_state=0)
@@ -955,6 +963,7 @@ def check_sample_weights_invariance(name, estimator_orig, kind="ones",
 @ignore_warnings(category=(FutureWarning, UserWarning))
 def check_dtype_object(name, estimator_orig, strict_mode=True):
     # check that estimators treat dtype object as numeric if possible
+    # XXXX api or not????? partially????
     rng = np.random.RandomState(0)
     X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
@@ -988,6 +997,7 @@ def check_dtype_object(name, estimator_orig, strict_mode=True):
 
 def check_complex_data(name, estimator_orig, strict_mode=True):
     # check that estimators raise an exception on providing complex data
+    #XXX: error check... ?????
     X = np.random.sample(10) + 1j * np.random.sample(10)
     X = X.reshape(-1, 1)
     y = np.random.sample(10) + 1j * np.random.sample(10)
@@ -998,12 +1008,9 @@ def check_complex_data(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings
 def check_dict_unchanged(name, estimator_orig, strict_mode=True):
-    # this estimator raises
-    # ValueError: Found array with 0 feature(s) (shape=(23, 0))
-    # while a minimum of 1 is required.
-    # error
-    if name in ['SpectralCoclustering']:
-        return
+    # check that calling the prediction method does not alter the __dict__
+    # attribute of the estimator.
+    # XXX: pure API check
     rnd = np.random.RandomState(0)
     if name in ['RANSACRegressor']:
         X = 3 * rnd.uniform(size=(20, 3))

From ef04fceb957a73869412514929439902a3f98127 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 5 Oct 2020 13:39:41 -0400
Subject: [PATCH 02/21] WIP

---
 sklearn/model_selection/_split.py |  15 ++--
 sklearn/utils/estimator_checks.py | 115 +++++++++++++++++++++++-------
 2 files changed, 100 insertions(+), 30 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index dfdbdebeb8b58..65aac104af6e4 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -1744,15 +1744,20 @@ def _iter_indices(self, X, y, groups=None):
             test = []
 
             for i in range(n_classes):
-                permutation = rng.permutation(class_counts[i])
-                perm_indices_class_i = class_indices[i].take(permutation,
-                                                             mode='clip')
+                # permutation = rng.permutation(class_counts[i])
+                # perm_indices_class_i = class_indices[i].take(permutation,
+                #                                              mode='clip')
+                perm_indices_class_i = class_indices[i]
+
+
 
                 train.extend(perm_indices_class_i[:n_i[i]])
                 test.extend(perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]])
 
-            train = rng.permutation(train)
-            test = rng.permutation(test)
+            # train = rng.permutation(train)
+            # test = rng.permutation(test)
+            train = np.array(train)
+            test = np.array(test)
 
             yield train, test
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 257da7dad4ada..9456dae78ffe3 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -963,7 +963,7 @@ def check_sample_weights_invariance(name, estimator_orig, kind="ones",
 @ignore_warnings(category=(FutureWarning, UserWarning))
 def check_dtype_object(name, estimator_orig, strict_mode=True):
     # check that estimators treat dtype object as numeric if possible
-    # XXXX api or not????? partially????
+    # XXX probably API except for error msg
     rng = np.random.RandomState(0)
     X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
@@ -1050,6 +1050,7 @@ def _is_public_parameter(attr):
 @ignore_warnings(category=FutureWarning)
 def check_dont_overwrite_parameters(name, estimator_orig, strict_mode=True):
     # check that fit method only changes or sets private attributes
+    #XXX pure API check
     if hasattr(estimator_orig.__init__, "deprecated_original"):
         # to not check deprecated classes
         return
@@ -1101,7 +1102,8 @@ def check_dont_overwrite_parameters(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_fit2d_predict1d(name, estimator_orig, strict_mode=True):
-    # check by fitting a 2d array and predicting with a 1d array
+    # check that predicting with a 1d array raises an error
+    # XXX Make message validation optional
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1151,6 +1153,7 @@ def _apply_on_subsets(func, X):
 def check_methods_subset_invariance(name, estimator_orig, strict_mode=True):
     # check that method gives invariant results if applied
     # on mini batches or the whole set
+    # XXX: non API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1184,6 +1187,7 @@ def check_fit2d_1sample(name, estimator_orig, strict_mode=True):
     # Check that fitting a 2d array with only one sample either works or
     # returns an informative message. The error message should either mention
     # the number of samples or the number of classes.
+    # XXX Non API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(1, 10))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1214,6 +1218,7 @@ def check_fit2d_1sample(name, estimator_orig, strict_mode=True):
 def check_fit2d_1feature(name, estimator_orig, strict_mode=True):
     # check fitting a 2d array with only 1 feature either works or returns
     # informative message
+    # XXX non API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1244,6 +1249,7 @@ def check_fit2d_1feature(name, estimator_orig, strict_mode=True):
 @ignore_warnings
 def check_fit1d(name, estimator_orig, strict_mode=True):
     # check fitting 1d X array raises a ValueError
+    # XXX Pure API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20))
     y = X.astype(int)
@@ -1297,6 +1303,9 @@ def check_transformer_data_not_an_array(name, transformer, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_transformers_unfitted(name, transformer, strict_mode=True):
+    # Make sure the unfitted transformer raises an error when transform is
+    # called
+    # XXX: non API check
     X, y = _regression_dataset()
 
     transformer = clone(transformer)
@@ -1311,6 +1320,13 @@ def check_transformers_unfitted(name, transformer, strict_mode=True):
 
 
 def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
+    # Check that:
+    # - fit_transform returns n_samples transformed samples
+    # - fit_transform and transform give equivalent results.
+    # - fit_transform gives the same results twice
+    # - an error is raised if transform is called with an incorrect number of
+    #   features
+    # XXX: Only make first and last checks part of API
     n_samples, n_features = np.asarray(X).shape
     transformer = clone(transformer_orig)
     set_random_state(transformer)
@@ -1331,12 +1347,13 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
     X_pred = transformer_clone.fit_transform(X, y=y_)
 
     if isinstance(X_pred, tuple):
+        # for cross-decomposition estimators that transform both X and y
         for x_pred in X_pred:
             assert x_pred.shape[0] == n_samples
     else:
         # check for consistent n_samples
         assert X_pred.shape[0] == n_samples
-
+    
     if hasattr(transformer, 'transform'):
         if name in CROSS_DECOMPOSITION:
             X_pred2 = transformer.transform(X, y_)
@@ -1379,7 +1396,6 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
            not transformer._get_tags()["stateless"] and \
            X.ndim == 2 and X.shape[1] > 1:
 
-            # If it's not an array, it does not have a 'T' property
             with raises(
                 ValueError,
                 err_msg=f"The transformer {name} does not raise an error "
@@ -1391,11 +1407,14 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
 
 @ignore_warnings
 def check_pipeline_consistency(name, estimator_orig, strict_mode=True):
+    # check that make_pipeline(est) gives results as est for scores and
+    # transforms
+    # XXX: full API
+
     if estimator_orig._get_tags()['non_deterministic']:
         msg = name + ' is non deterministic'
         raise SkipTest(msg)
 
-    # check that make_pipeline(est) gives same score as est
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X -= X.min()
@@ -1422,6 +1441,7 @@ def check_pipeline_consistency(name, estimator_orig, strict_mode=True):
 def check_fit_score_takes_y(name, estimator_orig, strict_mode=True):
     # check that all estimators accept an optional y
     # in fit and score so they can be used in pipelines
+    # XXX : full API check
     rnd = np.random.RandomState(0)
     n_samples = 30
     X = rnd.uniform(size=(n_samples, 3))
@@ -1449,6 +1469,8 @@ def check_fit_score_takes_y(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings
 def check_estimators_dtypes(name, estimator_orig, strict_mode=True):
+    # Check that methods can handle X input of different float and int dtypes
+    # XXX not an API check
     rnd = np.random.RandomState(0)
     X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
     X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig)
@@ -1475,6 +1497,7 @@ def check_transformer_preserve_dtypes(
 ):
     # check that dtype are preserved meaning if input X is of some dtype
     # X_transformed should be from the same dtype.
+    # XXX: not an API check
     X, y = make_blobs(
         n_samples=30,
         centers=[[0, 0, 0], [1, 1, 1]],
@@ -1506,6 +1529,9 @@ def check_transformer_preserve_dtypes(
 @ignore_warnings(category=FutureWarning)
 def check_estimators_empty_data_messages(name, estimator_orig,
                                          strict_mode=True):
+    # Make sure that a ValueError is raised when fit is called on data with no
+    # sample or no features.
+    # XXX: API or not?
     e = clone(estimator_orig)
     set_random_state(e, 1)
 
@@ -1531,7 +1557,9 @@ def check_estimators_empty_data_messages(name, estimator_orig,
 
 @ignore_warnings(category=FutureWarning)
 def check_estimators_nan_inf(name, estimator_orig, strict_mode=True):
-    # Checks that Estimator X's do not contain NaN or inf.
+    # Checks that fit, predict and transform raise an error if X contains nans
+    # or inf.
+    # XXX: probably not API?
     rnd = np.random.RandomState(0)
     X_train_finite = _pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
                                                    estimator_orig)
@@ -1581,7 +1609,8 @@ def check_estimators_nan_inf(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings
 def check_nonsquare_error(name, estimator_orig, strict_mode=True):
-    """Test that error is thrown when non-square data provided."""
+    # Check that error is raised when non-square data is provided in fit
+    # XXX: API
 
     X, y = make_blobs(n_samples=20, n_features=10)
     estimator = clone(estimator_orig)
@@ -1596,7 +1625,9 @@ def check_nonsquare_error(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings
 def check_estimators_pickle(name, estimator_orig, strict_mode=True):
-    """Test that we can pickle all estimators."""
+    # Test that we can pickle all estimators and that the pickled estimator
+    # gives the same predictions
+    # XXX: Non API check
     check_methods = ["predict", "transform", "decision_function",
                      "predict_proba"]
 
@@ -1641,7 +1672,9 @@ def check_estimators_pickle(name, estimator_orig, strict_mode=True):
 @ignore_warnings(category=FutureWarning)
 def check_estimators_partial_fit_n_features(name, estimator_orig,
                                             strict_mode=True):
-    # check if number of features changes between calls to partial_fit.
+    # check that an error is raised when number of features changes between
+    # calls to partial_fit.
+    # XXX: non API check
     if not hasattr(estimator_orig, 'partial_fit'):
         return
     estimator = clone(estimator_orig)
@@ -1668,6 +1701,11 @@ def check_estimators_partial_fit_n_features(name, estimator_orig,
 
 @ignore_warnings(category=FutureWarning)
 def check_classifier_multioutput(name, estimator, strict_mode=True):
+    # Make sure that the output of predict_proba and decision_function is
+    # correct for multiouput classification (multilabel, multiclass). Also
+    # checks that predict_proba and decision_function have consistent
+    # predictions, i.e. the orders are consistent.
+    # XXX: full API check
     n_samples, n_labels, n_classes = 42, 5, 3
     tags = estimator._get_tags()
     estimator = clone(estimator)
@@ -1726,6 +1764,9 @@ def check_classifier_multioutput(name, estimator, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressor_multioutput(name, estimator, strict_mode=True):
+    # Make sure that multioutput regressors output float64 predictions and that
+    # the shape is correct.
+    # XXX: make the first check not an API check
     estimator = clone(estimator)
     n_samples = n_features = 10
 
@@ -1743,7 +1784,7 @@ def check_regressor_multioutput(name, estimator, strict_mode=True):
         "Multioutput predictions by a regressor are expected to be"
         " floating-point precision. Got {} instead".format(y_pred.dtype))
     assert y_pred.shape == y.shape, (
-        "The shape of the orediction for multioutput data is incorrect."
+        "The shape of the prediction for multioutput data is incorrect."
         " Expected {}, got {}.")
 
 
@@ -1776,6 +1817,7 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False,
 
     pred = clusterer.labels_
     assert pred.shape == (n_samples,)
+    # XXX: skip the rest when api_only is True
     assert adjusted_rand_score(pred, y) > 0.4
     if clusterer._get_tags()['non_deterministic']:
         return
@@ -1810,7 +1852,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False,
 @ignore_warnings(category=FutureWarning)
 def check_clusterer_compute_labels_predict(name, clusterer_orig,
                                            strict_mode=True):
-    """Check that predict is invariant of compute_labels."""
+    # Check that predict is invariant of compute_labels
+    # XXX: non API check
     X, y = make_blobs(n_samples=20, random_state=0)
     clusterer = clone(clusterer_orig)
     set_random_state(clusterer)
@@ -1825,6 +1868,10 @@ def check_clusterer_compute_labels_predict(name, clusterer_orig,
 
 @ignore_warnings(category=FutureWarning)
 def check_classifiers_one_label(name, classifier_orig, strict_mode=True):
+    # Check that a classifier can fit when there's only 1 class, or that it
+    # raises a proper error. If it can fit, we also make sure that it can
+    # predict.
+    # XXX: non API check
     error_string_fit = "Classifier can't train when only one class is present."
     error_string_predict = ("Classifier can't predict when only one class is "
                             "present.")
@@ -1902,7 +1949,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
 
         assert y_pred.shape == (n_samples,)
         # training set performance
-        if not tags['poor_score']:
+        if not tags['poor_score']:  # XXX: not API
             assert accuracy_score(y, y_pred) > 0.83
 
         # raises error on malformed input for predict
@@ -1933,10 +1980,10 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
                     else:
                         assert decision.shape == (n_samples, 1)
                     dec_pred = (decision.ravel() > 0).astype(int)
-                    assert_array_equal(dec_pred, y_pred)
+                    assert_array_equal(dec_pred, y_pred)  # XXX not API
                 else:
                     assert decision.shape == (n_samples, n_classes)
-                    assert_array_equal(np.argmax(decision, axis=1), y_pred)
+                    assert_array_equal(np.argmax(decision, axis=1), y_pred)  # XXX not API
 
                 # raises error on malformed input for decision_function
                 if not tags["no_validation"]:
@@ -1961,9 +2008,9 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
             # predict_proba agrees with predict
             y_prob = classifier.predict_proba(X)
             assert y_prob.shape == (n_samples, n_classes)
-            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
+            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)# XXX not API
             # check that probas for all classes sum to one
-            assert_array_almost_equal(np.sum(y_prob, axis=1),
+            assert_array_almost_equal(np.sum(y_prob, axis=1),# XXX not API
                                       np.ones(n_samples))
             if not tags["no_validation"]:
                 # raises error on malformed input for predict_proba
@@ -1979,7 +2026,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
                         err_msg=msg.format(name, "predict_proba"),
                     ):
                         classifier.predict_proba(X.T)
-            if hasattr(classifier, "predict_log_proba"):
+            if hasattr(classifier, "predict_log_proba"):# XXX not API
                 # predict_log_proba is a transformation of predict_proba
                 y_log_prob = classifier.predict_log_proba(X)
                 assert_allclose(y_log_prob, np.log(y_prob), 8, atol=1e-9)
@@ -2040,7 +2087,7 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True,
     with raises(ValueError):
         estimator.predict(X.T)
 
-    # decision_function agrees with predict
+    # decision_function agrees with predict  XXX not API
     dec_pred = (decision >= 0).astype(int)
     dec_pred[dec_pred == 0] = -1
     assert_array_equal(dec_pred, y_pred)
@@ -2058,6 +2105,7 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True,
         estimator.score_samples(X.T)
 
     # contamination parameter (not for OneClassSVM which has the nu parameter)
+    # XXX: not API
     if (hasattr(estimator, 'contamination')
             and not hasattr(estimator, 'novelty')):
         # proportion of outliers equal to contamination parameter when not
@@ -2090,6 +2138,8 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True,
 @ignore_warnings(category=(FutureWarning))
 def check_classifiers_multilabel_representation_invariance(
         name, classifier_orig, strict_mode=True):
+    # check different target representations for multilabel classifiers
+    # XXX: pure API check
 
     X, y = make_multilabel_classification(n_samples=100, n_features=20,
                                           n_classes=5, n_labels=3,
@@ -2125,7 +2175,8 @@ def check_classifiers_multilabel_representation_invariance(
 @ignore_warnings(category=FutureWarning)
 def check_estimators_fit_returns_self(name, estimator_orig,
                                       readonly_memmap=False, strict_mode=True):
-    """Check if self is returned when calling fit."""
+    # Check that self is returned when calling fit.
+    # XXX pure API check
     X, y = make_blobs(random_state=0, n_samples=21)
     # some want non-negative input
     X -= X.min()
@@ -2143,11 +2194,10 @@ def check_estimators_fit_returns_self(name, estimator_orig,
 
 @ignore_warnings
 def check_estimators_unfitted(name, estimator_orig, strict_mode=True):
-    """Check that predict raises an exception in an unfitted estimator.
-
-    Unfitted estimators should raise a NotFittedError.
-    """
+    # Check that predict raises an exception in an unfitted estimator.
+    # Unfitted estimators should raise a NotFittedError.
     # Common test for Regressors, Classifiers and Outlier detection estimators
+    # XXX pure API
     X, y = _regression_dataset()
 
     estimator = clone(estimator_orig)
@@ -2160,6 +2210,9 @@ def check_estimators_unfitted(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_supervised_y_2d(name, estimator_orig, strict_mode=True):
+    # Check that estimators that don't support multi-ouput raise a warning if y
+    # is not 1d, and that they just ravel y
+    # XXX pure API check
     tags = estimator_orig._get_tags()
     if tags['multioutput_only']:
         # These only work on 2d, so this test makes no sense
@@ -2227,7 +2280,6 @@ def check_classifiers_predictions(X, y, name, classifier_orig,
                                (classifier, ", ".join(map(str, y_exp)),
                                 ", ".join(map(str, y_pred))))
 
-    # training set performance
     if name != "ComplementNB":
         # This is a pathological data set for ComplementNB.
         # For some specific cases 'ComplementNB' predicts less classes
@@ -2245,6 +2297,9 @@ def _choose_check_classifiers_labels(name, y, y_names):
 
 
 def check_classifiers_classes(name, classifier_orig, strict_mode=True):
+    # Check that decision function > 0 => pos class
+    # Also checks the classes_ attribute.
+    # XXX pure API check
     X_multiclass, y_multiclass = make_blobs(n_samples=30, random_state=0,
                                             cluster_std=0.1)
     X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass,
@@ -2283,6 +2338,9 @@ def check_classifiers_classes(name, classifier_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressors_int(name, regressor_orig, strict_mode=True):
+    # Check that regressors give same prediction when y is encoded as int or
+    # float
+    # XXX: API check ?
     X, _ = _regression_dataset()
     X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
@@ -2312,6 +2370,12 @@ def check_regressors_int(name, regressor_orig, strict_mode=True):
 @ignore_warnings(category=FutureWarning)
 def check_regressors_train(name, regressor_orig, readonly_memmap=False,
                            X_dtype=np.float64, strict_mode=True):
+    # Check that regressors:
+    # - raise an error when X and y have different number of samples
+    # - accept lists as input to fit
+    # - predict n_samples predictions
+    # - have a score > .5 on simple data
+    # XXX: all API checks except the last one
     X, y = _regression_dataset()
     X = X.astype(X_dtype)
     X = _pairwise_estimator_convert_X(X, regressor_orig)
@@ -2353,6 +2417,7 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False,
     # TODO: find out why PLS and CCA fail. RANSAC is random
     # and furthermore assumes the presence of outliers, hence
     # skipped
+    # XXX: non API
     if not regressor._get_tags()["poor_score"]:
         assert regressor.score(X, y_) > 0.5
 
@@ -2360,7 +2425,7 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False,
 @ignore_warnings
 def check_regressors_no_decision_function(name, regressor_orig,
                                           strict_mode=True):
-    # checks whether regressors have decision_function or predict_proba
+    # check that regressors decision_function or predict_proba
     rng = np.random.RandomState(0)
     regressor = clone(regressor_orig)
 

From 94b069ea73bc9c7eb89b0e6bec2ce8777348e4e4 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 6 Oct 2020 11:24:20 -0400
Subject: [PATCH 03/21] WIP

---
 sklearn/utils/estimator_checks.py | 46 +++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 9456dae78ffe3..79ce263441381 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2426,6 +2426,7 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False,
 def check_regressors_no_decision_function(name, regressor_orig,
                                           strict_mode=True):
     # check that regressors decision_function or predict_proba
+    # XXX: full API check
     rng = np.random.RandomState(0)
     regressor = clone(regressor_orig)
 
@@ -2451,7 +2452,11 @@ def check_regressors_no_decision_function(name, regressor_orig,
 
 @ignore_warnings(category=FutureWarning)
 def check_class_weight_classifiers(name, classifier_orig, strict_mode=True):
-
+    # Make sure that classifiers take class_weight into account by creating a
+    # very noisy balanced dataset. We make sure that passing a very imbalanced
+    # class_weights helps recovering a good score.
+    # XXX: full non-API check
+ 
     if classifier_orig._get_tags()['binary_only']:
         problems = [2]
     else:
@@ -2499,6 +2504,7 @@ def check_class_weight_classifiers(name, classifier_orig, strict_mode=True):
 def check_class_weight_balanced_classifiers(name, classifier_orig, X_train,
                                             y_train, X_test, y_test, weights,
                                             strict_mode=True):
+    # XXX: it's never ever used, just ignore
     classifier = clone(classifier_orig)
     if hasattr(classifier, "n_iter"):
         classifier.set_params(n_iter=100)
@@ -2519,8 +2525,10 @@ def check_class_weight_balanced_classifiers(name, classifier_orig, X_train,
 @ignore_warnings(category=FutureWarning)
 def check_class_weight_balanced_linear_classifier(name, Classifier,
                                                   strict_mode=True):
-    """Test class weights with non-contiguous class labels."""
+    # Check that class_weight='balanced' is equivalent to manually passing
+    # class proportions.
     # this is run on classes, not instances, though this should be changed
+    # XXX: non API check
     X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
     y = np.array([1, 1, 1, -1, -1])
@@ -2558,6 +2566,8 @@ def check_class_weight_balanced_linear_classifier(name, Classifier,
 
 @ignore_warnings(category=FutureWarning)
 def check_estimators_overwrite_params(name, estimator_orig, strict_mode=True):
+    # Check that calling fit does not alter the output of get_params
+    # XXX: full API check
     X, y = make_blobs(random_state=0, n_samples=21)
     # some want non-negative input
     X -= X.min()
@@ -2593,7 +2603,10 @@ def check_estimators_overwrite_params(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_no_attributes_set_in_init(name, estimator_orig, strict_mode=True):
-    """Check setting during init."""
+    # Check that:
+    # - init does not set any attribute apart from the parameters
+    # - all parameters of init are set as attributes
+    # XXX: full API check
     estimator = clone(estimator_orig)
     if hasattr(type(estimator).__init__, "deprecated_original"):
         return
@@ -2627,6 +2640,9 @@ def check_no_attributes_set_in_init(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_sparsify_coefficients(name, estimator_orig, strict_mode=True):
+    # Check that sparsified coefs produce the same predictions as the
+    # originals coefs
+    # XXX: full non API check
     X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1],
                   [-1, -2], [2, 2], [-2, -2]])
     y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])
@@ -2651,6 +2667,9 @@ def check_sparsify_coefficients(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_classifier_data_not_an_array(name, estimator_orig, strict_mode=True):
+    # Check that estimator yields same predictions whether an array was passed
+    # or not
+    # XXX: full API
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1],
                   [0, 3], [1, 0], [2, 0], [4, 4], [2, 3], [3, 2]])
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -2663,6 +2682,9 @@ def check_classifier_data_not_an_array(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressor_data_not_an_array(name, estimator_orig, strict_mode=True):
+    # Check that estimator yields same predictions whether an array was passed
+    # or not
+    # XXX: full API
     X, y = _regression_dataset()
     X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = _enforce_estimator_tags_y(estimator_orig, y)
@@ -2716,8 +2738,9 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type,
 
 
 def check_parameters_default_constructible(name, Estimator, strict_mode=True):
-    # test default-constructibility
-    # get rid of deprecation warnings
+    # Check that the estimator's default parameters are immutable (sort of).
+    # Also check that get_params returns exactly the default parameters values 
+    # XXX: full API check
 
     Estimator = Estimator.__class__
 
@@ -2846,6 +2869,7 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig,
                                             strict_mode=True):
     # Test that estimators that are not transformers with a parameter
     # max_iter, return the attribute of n_iter_ at least 1.
+    # XXX: full API
 
     # These models are dependent on external solvers like
     # libsvm and accessing the iter parameter is non-trivial.
@@ -2880,6 +2904,7 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig,
 def check_transformer_n_iter(name, estimator_orig, strict_mode=True):
     # Test that transformers with a parameter max_iter, return the
     # attribute of n_iter_ at least 1.
+    # XXX: full API
     estimator = clone(estimator_orig)
     if hasattr(estimator, "max_iter"):
         if name in CROSS_DECOMPOSITION:
@@ -2904,7 +2929,8 @@ def check_transformer_n_iter(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_get_params_invariance(name, estimator_orig, strict_mode=True):
-    # Checks if get_params(deep=False) is a subset of get_params(deep=True)
+    # Checks that get_params(deep=False) is a subset of get_params(deep=True)
+    # XXX: full API
     e = clone(estimator_orig)
 
     shallow_params = e.get_params(deep=False)
@@ -2918,6 +2944,7 @@ def check_get_params_invariance(name, estimator_orig, strict_mode=True):
 def check_set_params(name, estimator_orig, strict_mode=True):
     # Check that get_params() returns the same thing
     # before and after set_params() with some fuzz
+    # XXX: full API check
     estimator = clone(estimator_orig)
 
     orig_params = estimator.get_params(deep=False)
@@ -2972,6 +2999,7 @@ def check_set_params(name, estimator_orig, strict_mode=True):
 def check_classifiers_regression_target(name, estimator_orig,
                                         strict_mode=True):
     # Check if classifier throws an exception when fed regression targets
+    # XXX API check
 
     X, y = _regression_dataset()
 
@@ -2987,6 +3015,7 @@ def check_classifiers_regression_target(name, estimator_orig,
 def check_decision_proba_consistency(name, estimator_orig, strict_mode=True):
     # Check whether an estimator having both decision_function and
     # predict_proba methods has outputs with perfect rank correlation.
+    # XXX: fulll non API check
 
     centers = [(2, 2), (4, 4)]
     X, y = make_blobs(n_samples=100, random_state=0, n_features=4,
@@ -3028,6 +3057,8 @@ def check_outliers_fit_predict(name, estimator_orig, strict_mode=True):
     if hasattr(estimator, 'predict'):
         y_pred_2 = estimator.fit(X).predict(X)
         assert_array_equal(y_pred, y_pred_2)
+    
+    # XXX: next check isn't API check
 
     if hasattr(estimator, "contamination"):
         # proportion of outliers equal to contamination parameter when not
@@ -3057,6 +3088,7 @@ def check_outliers_fit_predict(name, estimator_orig, strict_mode=True):
 def check_fit_non_negative(name, estimator_orig, strict_mode=True):
     # Check that proper warning is raised for non-negative X
     # when tag requires_positive_X is present
+    # XXX: full non API check + remove if else
     X = np.array([[-1., 1], [-1., 1]])
     y = np.array([1, 2])
     estimator = clone(estimator_orig)
@@ -3076,6 +3108,8 @@ def check_fit_idempotent(name, estimator_orig, strict_mode=True):
     # predict(), predict_proba(), decision_function() and transform() return
     # the same results.
 
+    # XXX full API check
+
     check_methods = ["predict", "transform", "decision_function",
                      "predict_proba"]
     rng = np.random.RandomState(0)

From e4f889ffa44b31f92f375130789062c77c21415b Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 9 Oct 2020 10:35:45 -0400
Subject: [PATCH 04/21] some more

---
 sklearn/tests/test_common.py      |  62 ++---
 sklearn/utils/estimator_checks.py | 434 ++++++++++++++----------------
 2 files changed, 236 insertions(+), 260 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index b84b66d1fb919..e0ed782fe596b 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -212,8 +212,9 @@ def test_class_support_removed():
 
 class MyNMFWithBadErrorMessage(NMF):
     # Same as NMF but raises an uninformative error message if X has negative
-    # value. This estimator would fail the check suite in strict mode,
-    # specifically it would fail check_fit_non_negative
+    # value. This estimator would fail the check suite with api_only=False,
+    # specifically it would fail check_fit_non_negative because its error
+    # message doesn't match
     def fit(self, X, y=None, **params):
         X = check_array(X, accept_sparse=('csr', 'csc'),
                         dtype=[np.float64, np.float32])
@@ -225,51 +226,52 @@ def fit(self, X, y=None, **params):
         return super().fit(X, y, **params)
 
 
-def test_strict_mode_check_estimator():
-    # Tests various conditions for the strict mode of check_estimator()
+def test_api_only_check_estimator():
+    # Tests various conditions for the api_only parameter of check_estimator()
     # Details are in the comments
 
-    # LogisticRegression has no _xfail_checks, so when strict_mode is on, there
+    # LogisticRegression has no _xfail_checks, so when api_only=False, there
     # should be no skipped tests.
     with pytest.warns(None) as catched_warnings:
-        check_estimator(LogisticRegression(), strict_mode=True)
+        check_estimator(LogisticRegression(), api_only=False)
     assert not any(isinstance(w, SkipTestWarning) for w in catched_warnings)
-    # When strict mode is off, check_n_features should be skipped because it's
-    # a fully strict check
-    msg_check_n_features_in = 'check_n_features_in is fully strict '
-    with pytest.warns(SkipTestWarning, match=msg_check_n_features_in):
-        check_estimator(LogisticRegression(), strict_mode=False)
+    # When api_only is True, check_fit2d_1sample should be skipped
+    # because it's not an API check
+    skip_match = 'check_fit2d_1sample is not an API check'
+    with pytest.warns(SkipTestWarning, match=skip_match):
+        check_estimator(LogisticRegression(), api_only=True)
 
     # NuSVC has some _xfail_checks. They should be skipped regardless of
-    # strict_mode
+    # api_only
     with pytest.warns(SkipTestWarning,
                       match='fails for the decision_function method'):
-        check_estimator(NuSVC(), strict_mode=True)
-    # When strict mode is off, check_n_features_in is skipped along with the
-    # rest of the xfail_checks
-    with pytest.warns(SkipTestWarning, match=msg_check_n_features_in):
-        check_estimator(NuSVC(), strict_mode=False)
-
-    # MyNMF will fail check_fit_non_negative() in strict mode because it yields
-    # a bad error message
+        check_estimator(NuSVC(), api_only=False)
+    # When api_only is True, check_fit2d_1sample is skipped along
+    # with the rest of the xfail_checks
+    with pytest.warns(SkipTestWarning, match=skip_match):
+        check_estimator(NuSVC(), api_only=True)
+
+    # MyNMF will fail check_fit_non_negative() with api_only=False because it
+    # yields a bad error message
     with pytest.raises(
         AssertionError, match="The error message should contain"
     ):
-        check_estimator(MyNMFWithBadErrorMessage(), strict_mode=True)
-    # However, it should pass the test suite in non-strict mode because when
-    # strict mode is off, check_fit_non_negative() will not check the exact
-    # error messsage. (We still assert that the warning from
-    # check_n_features_in is raised)
-    with pytest.warns(SkipTestWarning, match=msg_check_n_features_in):
-        check_estimator(MyNMFWithBadErrorMessage(), strict_mode=False)
+        check_estimator(MyNMFWithBadErrorMessage(), api_only=False)
+    # However, it should pass the test suite with api_only=True because when in
+    # this case, check_fit_non_negative() will not check the exact error
+    # messsage. (We still assert that the warning from
+    # check_fit2d_1sample is raised)
+    with pytest.warns(SkipTestWarning, match=skip_match):
+        check_estimator(MyNMFWithBadErrorMessage(), api_only=True)
 
 
 @parametrize_with_checks([LogisticRegression(),
                           NuSVC(),
                           MyNMFWithBadErrorMessage()],
-                         strict_mode=False)
-def test_strict_mode_parametrize_with_checks(estimator, check):
-    # Ideally we should assert that the strict checks are Xfailed...
+                         api_only=True)
+def test_api_only_parametrize_with_checks(estimator, check):
+    # Ideally we should assert that the NON_API checks are either Xfailed or
+    # Xpassed
     check(estimator)
 
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 6dfbc55d6d956..267c5677831c6 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -14,7 +14,6 @@
 from . import IS_PYPY
 from .. import config_context
 from ._testing import _get_args
-from ._testing import assert_raise_message
 from ._testing import assert_array_equal
 from ._testing import assert_array_almost_equal
 from ._testing import assert_allclose
@@ -150,7 +149,7 @@ def _yield_classifier_checks(classifier):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_supervised_y_no_nan(name, estimator_orig, strict_mode=True):
+def check_supervised_y_no_nan(name, estimator_orig, api_only=False):
     # Checks that the Estimator targets are not NaN.
     estimator = clone(estimator_orig)
     rng = np.random.RandomState(888)
@@ -370,14 +369,14 @@ def _construct_instance(Estimator):
     return estimator
 
 
-def _maybe_mark_xfail(estimator, check, strict_mode, pytest):
+def _maybe_mark_xfail(estimator, check, api_only, pytest):
     # Mark (estimator, check) pairs as XFAIL if needed (see conditions in
     # _should_be_skipped_or_marked())
     # This is similar to _maybe_skip(), but this one is used by
     # @parametrize_with_checks() instead of check_estimator()
 
     should_be_marked, reason = _should_be_skipped_or_marked(estimator, check,
-                                                            strict_mode)
+                                                            api_only)
     if not should_be_marked:
         return estimator, check
     else:
@@ -385,14 +384,14 @@ def _maybe_mark_xfail(estimator, check, strict_mode, pytest):
                             marks=pytest.mark.xfail(reason=reason))
 
 
-def _maybe_skip(estimator, check, strict_mode):
+def _maybe_skip(estimator, check, api_only):
     # Wrap a check so that it's skipped if needed (see conditions in
     # _should_be_skipped_or_marked())
     # This is similar to _maybe_mark_xfail(), but this one is used by
     # check_estimator() instead of @parametrize_with_checks which requires
     # pytest
     should_be_skipped, reason = _should_be_skipped_or_marked(estimator, check,
-                                                             strict_mode)
+                                                             api_only)
     if not should_be_skipped:
         return check
 
@@ -409,15 +408,15 @@ def wrapped(*args, **kwargs):
     return wrapped
 
 
-def _should_be_skipped_or_marked(estimator, check, strict_mode):
+def _should_be_skipped_or_marked(estimator, check, api_only):
     # Return whether a check should be skipped (when using check_estimator())
     # or marked as XFAIL (when using @parametrize_with_checks()), along with a
     # reason.
     # A check should be skipped or marked if either:
     # - the check is in the _xfail_checks tag of the estimator
-    # - the check is fully strict and strict mode is off
-    # Checks that are only partially strict will not be skipped since we want
-    # to run their non-strict parts.
+    # - the check is not an API check and api_only is True
+    # Checks that are a mix of API and non-API checks will not be skipped since
+    # we want to run their API-checking parts.
 
     check_name = (check.func.__name__ if isinstance(check, partial)
                   else check.__name__)
@@ -426,13 +425,13 @@ def _should_be_skipped_or_marked(estimator, check, strict_mode):
     if check_name in xfail_checks:
         return True, xfail_checks[check_name]
 
-    if check_name in _FULLY_STRICT_CHECKS and not strict_mode:
-        return True, f'{check_name} is fully strict and strict mode is off'
+    if check_name in _NON_API_CHECKS and api_only:
+        return True, f'{check_name} is not an API check and api_only is True.'
 
     return False, 'placeholder reason that will never be used'
 
 
-def parametrize_with_checks(estimators, strict_mode=True):
+def parametrize_with_checks(estimators, api_only=False):
     """Pytest specific decorator for parametrizing estimator checks.
 
     The `id` of each check is set to be a pprint version of the estimator
@@ -450,18 +449,18 @@ def parametrize_with_checks(estimators, strict_mode=True):
            Passing a class was deprecated in version 0.23, and support for
            classes was removed in 0.24. Pass an instance instead.
 
-    strict_mode : bool, default=True
-        If True, the full check suite is run.
-        If False, only the non-strict part of the check suite is run.
+    api_only : bool, default=False
+        If True, the check suite will only ensure pure API-compatibility, and
+        will ignore other checks like controlling error messages or
+        prediction performance on easy datasets.
+        By default, the entire check suite is run.
 
-        In non-strict mode, some checks will be easier to pass: e.g., they
-        will only make sure an error is raised instead of also checking the
-        full error message.
-        Some checks are considered completely strict, in which case they are
-        treated as if they were in the estimators' `_xfails_checks` tag: they
-        will be marked as `xfail` for pytest. See :ref:`estimator_tags` for
-        more info on the `_xfails_check` tag. The set of strict checks is in
-        `sklearn.utils.estimator_checks._FULLY_STRICT_CHECKS`.
+        When True, some checks will be easier to pass. Some other checks will
+        be treated as if they were in the estimators' `_xfails_checks` tag:
+        they will be marked as `xfail` for pytest, but they will still be
+        run. If they pass, pytest will label them as `xpass`. These checks
+        are in `sklearn.utils.estimator_checks._NON_API_CHECKS`. See
+        :ref:`estimator_tags` for more info on the `_xfails_check` tag.
 
         .. versionadded:: 0.24
 
@@ -493,14 +492,14 @@ def checks_generator():
         for estimator in estimators:
             name = type(estimator).__name__
             for check in _yield_all_checks(estimator):
-                check = partial(check, name, strict_mode=strict_mode)
-                yield _maybe_mark_xfail(estimator, check, strict_mode, pytest)
+                check = partial(check, name, api_only=api_only)
+                yield _maybe_mark_xfail(estimator, check, api_only, pytest)
 
     return pytest.mark.parametrize("estimator, check", checks_generator(),
                                    ids=_get_check_estimator_ids)
 
 
-def check_estimator(Estimator, generate_only=False, strict_mode=True):
+def check_estimator(Estimator, generate_only=False, api_only=False):
     """Check if estimator adheres to scikit-learn conventions.
 
     This estimator will run an extensive test-suite for input validation,
@@ -536,18 +535,17 @@ def check_estimator(Estimator, generate_only=False, strict_mode=True):
 
         .. versionadded:: 0.22
 
-    strict_mode : bool, default=True
-        If True, the full check suite is run.
-        If False, only the non-strict part of the check suite is run.
+    api_only : bool, default=False
+        If True, the check suite will only ensure pure API-compatibility, and
+        will ignore other checks like controlling error messages or
+        prediction performance on easy datasets.
+        By default, the entire check suite is run.
 
-        In non-strict mode, some checks will be easier to pass: e.g., they
-        will only make sure an error is raised instead of also checking the
-        full error message.
-        Some checks are considered completely strict, in which case they are
-        treated as if they were in the estimators' `_xfails_checks` tag: they
-        will be ignored with a warning. See :ref:`estimator_tags` for more
-        info on the `_xfails_check` tag. The set of strict checks is in
-        `sklearn.utils.estimator_checks._FULLY_STRICT_CHECKS`.
+        When True, some checks will be easier to pass. Some other checks will
+        be treated as if they were in the estimators' `_xfails_checks` tag:
+        they will be ignored with a warning. These checks are in
+        `sklearn.utils.estimator_checks._NON_API_CHECKS`. See
+        :ref:`estimator_tags` for more info on the `_xfails_check` tag.
 
         .. versionadded:: 0.24
 
@@ -568,8 +566,8 @@ def check_estimator(Estimator, generate_only=False, strict_mode=True):
 
     def checks_generator():
         for check in _yield_all_checks(estimator):
-            check = _maybe_skip(estimator, check, strict_mode)
-            yield estimator, partial(check, name, strict_mode=strict_mode)
+            check = _maybe_skip(estimator, check, api_only)
+            yield estimator, partial(check, name, api_only=api_only)
 
     if generate_only:
         return checks_generator()
@@ -761,10 +759,9 @@ def _generate_sparse_matrix(X_csr):
         yield sparse_format + "_64", X
 
 
-def check_estimator_sparse_data(name, estimator_orig, strict_mode=True):
+def check_estimator_sparse_data(name, estimator_orig, api_only=False):
     # Make sure that the estimator either accepts sparse data in fit and
     # predict, or that it fails with a helpful error message.
-    # XXX this is a non-API check
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
@@ -819,10 +816,9 @@ def check_estimator_sparse_data(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True):
+def check_sample_weights_pandas_series(name, estimator_orig, api_only=False):
     # check that estimators will accept a 'sample_weight' parameter of
     # type pandas.Series in the 'fit' function.
-    # XXX pure API check
     estimator = clone(estimator_orig)
     if has_fit_parameter(estimator, "sample_weight"):
         try:
@@ -847,10 +843,9 @@ def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=(FutureWarning))
-def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True):
+def check_sample_weights_not_an_array(name, estimator_orig, api_only=False):
     # check that estimators will accept a 'sample_weight' parameter of
     # type _NotAnArray in the 'fit' function.
-    # XXX pure API check
     estimator = clone(estimator_orig)
     if has_fit_parameter(estimator, "sample_weight"):
         X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
@@ -865,10 +860,9 @@ def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=(FutureWarning))
-def check_sample_weights_list(name, estimator_orig, strict_mode=True):
+def check_sample_weights_list(name, estimator_orig, api_only=False):
     # check that estimators will accept a 'sample_weight' parameter of
     # type list in the 'fit' function.
-    # XXX: pure API check
     if has_fit_parameter(estimator_orig, "sample_weight"):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
@@ -883,10 +877,9 @@ def check_sample_weights_list(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_sample_weights_shape(name, estimator_orig, strict_mode=True):
+def check_sample_weights_shape(name, estimator_orig, api_only=False):
     # check that estimators raise an error if sample_weight
     # shape mismatches the input
-    # XXX: pure API check?????? Are error checks API checks?????
     if (has_fit_parameter(estimator_orig, "sample_weight") and
             not _is_pairwise(estimator_orig)):
         estimator = clone(estimator_orig)
@@ -909,12 +902,11 @@ def check_sample_weights_shape(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_sample_weights_invariance(name, estimator_orig, kind="ones",
-                                    strict_mode=True):
+                                    api_only=False):
     # For kind="ones" check that the estimators yield same results for
     # unit weights and no weights
     # For kind="zeros" check that setting sample_weight to 0 is equivalent
     # to removing corresponding samples.
-    # XXX: non-API check
     estimator1 = clone(estimator_orig)
     estimator2 = clone(estimator_orig)
     set_random_state(estimator1, random_state=0)
@@ -962,9 +954,8 @@ def check_sample_weights_invariance(name, estimator_orig, kind="ones",
 
 
 @ignore_warnings(category=(FutureWarning, UserWarning))
-def check_dtype_object(name, estimator_orig, strict_mode=True):
+def check_dtype_object(name, estimator_orig, api_only=False):
     # check that estimators treat dtype object as numeric if possible
-    # XXX probably API except for error msg
     rng = np.random.RandomState(0)
     X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
@@ -985,8 +976,8 @@ def check_dtype_object(name, estimator_orig, strict_mode=True):
 
     if 'string' not in tags['X_types']:
         X[0, 0] = {'foo': 'bar'}
-        msg = "argument must be a string.* number"
-        with raises(TypeError, match=msg):
+        match = None if api_only else "argument must be a string.* number"
+        with raises(TypeError, match=match):
             estimator.fit(X, y)
     else:
         # Estimators supporting string will not call np.asarray to convert the
@@ -996,9 +987,8 @@ def check_dtype_object(name, estimator_orig, strict_mode=True):
         estimator.fit(X, y)
 
 
-def check_complex_data(name, estimator_orig, strict_mode=True):
+def check_complex_data(name, estimator_orig, api_only=False):
     # check that estimators raise an exception on providing complex data
-    #XXX: error check... ?????
     X = np.random.sample(10) + 1j * np.random.sample(10)
     X = X.reshape(-1, 1)
     y = np.random.sample(10) + 1j * np.random.sample(10)
@@ -1008,10 +998,9 @@ def check_complex_data(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_dict_unchanged(name, estimator_orig, strict_mode=True):
+def check_dict_unchanged(name, estimator_orig, api_only=False):
     # check that calling the prediction method does not alter the __dict__
     # attribute of the estimator.
-    # XXX: pure API check
     rnd = np.random.RandomState(0)
     if name in ['RANSACRegressor']:
         X = 3 * rnd.uniform(size=(20, 3))
@@ -1049,9 +1038,8 @@ def _is_public_parameter(attr):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_dont_overwrite_parameters(name, estimator_orig, strict_mode=True):
+def check_dont_overwrite_parameters(name, estimator_orig, api_only=False):
     # check that fit method only changes or sets private attributes
-    #XXX pure API check
     if hasattr(estimator_orig.__init__, "deprecated_original"):
         # to not check deprecated classes
         return
@@ -1102,9 +1090,8 @@ def check_dont_overwrite_parameters(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_fit2d_predict1d(name, estimator_orig, strict_mode=True):
+def check_fit2d_predict1d(name, estimator_orig, api_only=False):
     # check that predicting with a 1d array raises an error
-    # XXX Make message validation optional
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1123,8 +1110,9 @@ def check_fit2d_predict1d(name, estimator_orig, strict_mode=True):
     for method in ["predict", "transform", "decision_function",
                    "predict_proba"]:
         if hasattr(estimator, method):
-            assert_raise_message(ValueError, "Reshape your data",
-                                 getattr(estimator, method), X[0])
+            match = None if api_only else "Reshape your data"
+            with raises(ValueError, match=match):
+                getattr(estimator, method)(X[0])
 
 
 def _apply_on_subsets(func, X):
@@ -1147,10 +1135,9 @@ def _apply_on_subsets(func, X):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_methods_subset_invariance(name, estimator_orig, strict_mode=True):
+def check_methods_subset_invariance(name, estimator_orig, api_only=False):
     # check that method gives invariant results if applied
     # on mini batches or the whole set
-    # XXX: non API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1181,7 +1168,7 @@ def check_methods_subset_invariance(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_methods_sample_order_invariance(
-    name, estimator_orig, strict_mode=True
+    name, estimator_orig, api_only=False
 ):
     # check that method gives invariant results if applied
     # on a subset with different sample order
@@ -1217,11 +1204,10 @@ def check_methods_sample_order_invariance(
 
 
 @ignore_warnings
-def check_fit2d_1sample(name, estimator_orig, strict_mode=True):
+def check_fit2d_1sample(name, estimator_orig, api_only=False):
     # Check that fitting a 2d array with only one sample either works or
     # returns an informative message. The error message should either mention
     # the number of samples or the number of classes.
-    # XXX Non API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(1, 10))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1249,10 +1235,9 @@ def check_fit2d_1sample(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_fit2d_1feature(name, estimator_orig, strict_mode=True):
+def check_fit2d_1feature(name, estimator_orig, api_only=False):
     # check fitting a 2d array with only 1 feature either works or returns
     # informative message
-    # XXX non API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -1281,9 +1266,8 @@ def check_fit2d_1feature(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_fit1d(name, estimator_orig, strict_mode=True):
+def check_fit1d(name, estimator_orig, api_only=False):
     # check fitting 1d X array raises a ValueError
-    # XXX Pure API check
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20))
     y = X.astype(int)
@@ -1302,7 +1286,7 @@ def check_fit1d(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_transformer_general(name, transformer, readonly_memmap=False,
-                              strict_mode=True):
+                              api_only=False):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X = StandardScaler().fit_transform(X)
@@ -1316,7 +1300,7 @@ def check_transformer_general(name, transformer, readonly_memmap=False,
 
 
 @ignore_warnings(category=FutureWarning)
-def check_transformer_data_not_an_array(name, transformer, strict_mode=True):
+def check_transformer_data_not_an_array(name, transformer, api_only=False):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X = StandardScaler().fit_transform(X)
@@ -1332,10 +1316,9 @@ def check_transformer_data_not_an_array(name, transformer, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_transformers_unfitted(name, transformer, strict_mode=True):
+def check_transformers_unfitted(name, transformer, api_only=False):
     # Make sure the unfitted transformer raises an error when transform is
     # called
-    # XXX: non API check
     X, y = _regression_dataset()
 
     transformer = clone(transformer)
@@ -1349,14 +1332,13 @@ def check_transformers_unfitted(name, transformer, strict_mode=True):
         transformer.transform(X)
 
 
-def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
+def _check_transformer(name, transformer_orig, X, y, api_only=False):
     # Check that:
     # - fit_transform returns n_samples transformed samples
-    # - fit_transform and transform give equivalent results.
-    # - fit_transform gives the same results twice
     # - an error is raised if transform is called with an incorrect number of
     #   features
-    # XXX: Only make first and last checks part of API
+    # - fit_transform and transform give equivalent results.
+    # - fit_transform gives the same results twice
     n_samples, n_features = np.asarray(X).shape
     transformer = clone(transformer_orig)
     set_random_state(transformer)
@@ -1385,6 +1367,22 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
         assert X_pred.shape[0] == n_samples
     
     if hasattr(transformer, 'transform'):
+
+        # raises error on malformed input for transform
+        if hasattr(X, 'shape') and \
+           not transformer._get_tags()["stateless"] and \
+           X.ndim == 2 and X.shape[1] > 1:
+
+            with raises(
+                ValueError,
+                err_msg=f"The transformer {name} does not raise an error "
+                "when the number of features in transform is different from "
+                "the number of features in fit."
+            ):
+                transformer.transform(X[:, :-1])
+        if api_only:
+            return
+
         if name in CROSS_DECOMPOSITION:
             X_pred2 = transformer.transform(X, y_)
             X_pred3 = transformer.fit_transform(X, y=y_)
@@ -1421,25 +1419,11 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
             assert _num_samples(X_pred2) == n_samples
             assert _num_samples(X_pred3) == n_samples
 
-        # raises error on malformed input for transform
-        if hasattr(X, 'shape') and \
-           not transformer._get_tags()["stateless"] and \
-           X.ndim == 2 and X.shape[1] > 1:
-
-            with raises(
-                ValueError,
-                err_msg=f"The transformer {name} does not raise an error "
-                "when the number of features in transform is different from "
-                "the number of features in fit."
-            ):
-                transformer.transform(X[:, :-1])
-
 
 @ignore_warnings
-def check_pipeline_consistency(name, estimator_orig, strict_mode=True):
+def check_pipeline_consistency(name, estimator_orig, api_only=False):
     # check that make_pipeline(est) gives results as est for scores and
     # transforms
-    # XXX: full API
 
     if estimator_orig._get_tags()['non_deterministic']:
         msg = name + ' is non deterministic'
@@ -1468,10 +1452,9 @@ def check_pipeline_consistency(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_fit_score_takes_y(name, estimator_orig, strict_mode=True):
+def check_fit_score_takes_y(name, estimator_orig, api_only=False):
     # check that all estimators accept an optional y
     # in fit and score so they can be used in pipelines
-    # XXX : full API check
     rnd = np.random.RandomState(0)
     n_samples = 30
     X = rnd.uniform(size=(n_samples, 3))
@@ -1498,9 +1481,8 @@ def check_fit_score_takes_y(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_estimators_dtypes(name, estimator_orig, strict_mode=True):
+def check_estimators_dtypes(name, estimator_orig, api_only=False):
     # Check that methods can handle X input of different float and int dtypes
-    # XXX not an API check
     rnd = np.random.RandomState(0)
     X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
     X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig)
@@ -1523,11 +1505,10 @@ def check_estimators_dtypes(name, estimator_orig, strict_mode=True):
 
 
 def check_transformer_preserve_dtypes(
-    name, transformer_orig, strict_mode=True
+    name, transformer_orig, api_only=False
 ):
     # check that dtype are preserved meaning if input X is of some dtype
     # X_transformed should be from the same dtype.
-    # XXX: not an API check
     X, y = make_blobs(
         n_samples=30,
         centers=[[0, 0, 0], [1, 1, 1]],
@@ -1558,10 +1539,9 @@ def check_transformer_preserve_dtypes(
 
 @ignore_warnings(category=FutureWarning)
 def check_estimators_empty_data_messages(name, estimator_orig,
-                                         strict_mode=True):
+                                         api_only=False):
     # Make sure that a ValueError is raised when fit is called on data with no
     # sample or no features.
-    # XXX: API or not?
     e = clone(estimator_orig)
     set_random_state(e, 1)
 
@@ -1580,16 +1560,15 @@ def check_estimators_empty_data_messages(name, estimator_orig,
     # and ignored by unsupervised models
     y = _enforce_estimator_tags_y(e, np.array([1, 0, 1]))
     msg = (r"0 feature\(s\) \(shape=\(3, 0\)\) while a minimum of \d* "
-           "is required.")
+           "is required.") if not api_only else None
     with raises(ValueError, match=msg):
         e.fit(X_zero_features, y)
 
 
 @ignore_warnings(category=FutureWarning)
-def check_estimators_nan_inf(name, estimator_orig, strict_mode=True):
+def check_estimators_nan_inf(name, estimator_orig, api_only=False):
     # Checks that fit, predict and transform raise an error if X contains nans
     # or inf.
-    # XXX: probably not API?
     rnd = np.random.RandomState(0)
     X_train_finite = _pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
                                                    estimator_orig)
@@ -1638,9 +1617,9 @@ def check_estimators_nan_inf(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_nonsquare_error(name, estimator_orig, strict_mode=True):
-    # Check that error is raised when non-square data is provided in fit
-    # XXX: API
+def check_nonsquare_error(name, estimator_orig, api_only=False):
+    # Check that error is raised when non-square data is provided in fit for a
+    # pairwise estimator
 
     X, y = make_blobs(n_samples=20, n_features=10)
     estimator = clone(estimator_orig)
@@ -1654,10 +1633,9 @@ def check_nonsquare_error(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings
-def check_estimators_pickle(name, estimator_orig, strict_mode=True):
+def check_estimators_pickle(name, estimator_orig, api_only=False):
     # Test that we can pickle all estimators and that the pickled estimator
     # gives the same predictions
-    # XXX: Non API check
     check_methods = ["predict", "transform", "decision_function",
                      "predict_proba"]
 
@@ -1701,10 +1679,9 @@ def check_estimators_pickle(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_estimators_partial_fit_n_features(name, estimator_orig,
-                                            strict_mode=True):
+                                            api_only=False):
     # check that an error is raised when number of features changes between
     # calls to partial_fit.
-    # XXX: non API check
     if not hasattr(estimator_orig, 'partial_fit'):
         return
     estimator = clone(estimator_orig)
@@ -1730,12 +1707,11 @@ def check_estimators_partial_fit_n_features(name, estimator_orig,
 
 
 @ignore_warnings(category=FutureWarning)
-def check_classifier_multioutput(name, estimator, strict_mode=True):
+def check_classifier_multioutput(name, estimator, api_only=False):
     # Make sure that the output of predict_proba and decision_function is
     # correct for multiouput classification (multilabel, multiclass). Also
     # checks that predict_proba and decision_function have consistent
     # predictions, i.e. the orders are consistent.
-    # XXX: full API check
     n_samples, n_labels, n_classes = 42, 5, 3
     tags = estimator._get_tags()
     estimator = clone(estimator)
@@ -1760,9 +1736,10 @@ def check_classifier_multioutput(name, estimator, strict_mode=True):
             "multioutput data is incorrect. Expected {}, got {}."
             .format((n_samples, n_classes), decision.shape))
 
-        dec_pred = (decision > 0).astype(int)
-        dec_exp = estimator.classes_[dec_pred]
-        assert_array_equal(dec_exp, y_pred)
+        if not api_only:
+            dec_pred = (decision > 0).astype(int)
+            dec_exp = estimator.classes_[dec_pred]
+            assert_array_equal(dec_exp, y_pred)
 
     if hasattr(estimator, "predict_proba"):
         y_prob = estimator.predict_proba(X)
@@ -1773,16 +1750,21 @@ def check_classifier_multioutput(name, estimator, strict_mode=True):
                     "The shape of the probability for multioutput data is"
                     " incorrect. Expected {}, got {}."
                     .format((n_samples, 2), y_prob[i].shape))
-                assert_array_equal(
-                    np.argmax(y_prob[i], axis=1).astype(int),
-                    y_pred[:, i]
-                )
+                if not api_only:
+                    assert_array_equal(
+                        np.argmax(y_prob[i], axis=1).astype(int),
+                        y_pred[:, i]
+                    )
         elif not tags['poor_score']:
             assert y_prob.shape == (n_samples, n_classes), (
                 "The shape of the probability for multioutput data is"
                 " incorrect. Expected {}, got {}."
                 .format((n_samples, n_classes), y_prob.shape))
-            assert_array_equal(y_prob.round().astype(int), y_pred)
+            if not api_only:
+                assert_array_equal(y_prob.round().astype(int), y_pred)
+
+    if api_only:
+        return
 
     if (hasattr(estimator, "decision_function") and
             hasattr(estimator, "predict_proba")):
@@ -1793,10 +1775,9 @@ def check_classifier_multioutput(name, estimator, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_regressor_multioutput(name, estimator, strict_mode=True):
+def check_regressor_multioutput(name, estimator, api_only=False):
     # Make sure that multioutput regressors output float64 predictions and that
     # the shape is correct.
-    # XXX: make the first check not an API check
     estimator = clone(estimator)
     n_samples = n_features = 10
 
@@ -1810,9 +1791,10 @@ def check_regressor_multioutput(name, estimator, strict_mode=True):
     estimator.fit(X, y)
     y_pred = estimator.predict(X)
 
-    assert y_pred.dtype == np.dtype('float64'), (
-        "Multioutput predictions by a regressor are expected to be"
-        " floating-point precision. Got {} instead".format(y_pred.dtype))
+    if not api_only:
+        assert y_pred.dtype == np.dtype('float64'), (
+            "Multioutput predictions by a regressor are expected to be"
+            " floating-point precision. Got {} instead".format(y_pred.dtype))
     assert y_pred.shape == y.shape, (
         "The shape of the prediction for multioutput data is incorrect."
         " Expected {}, got {}.")
@@ -1820,7 +1802,7 @@ def check_regressor_multioutput(name, estimator, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_clustering(name, clusterer_orig, readonly_memmap=False,
-                     strict_mode=True):
+                     api_only=False):
     clusterer = clone(clusterer_orig)
     X, y = make_blobs(n_samples=50, random_state=1)
     X, y = shuffle(X, y, random_state=7)
@@ -1847,7 +1829,10 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False,
 
     pred = clusterer.labels_
     assert pred.shape == (n_samples,)
-    # XXX: skip the rest when api_only is True
+
+    if api_only:
+        return
+
     assert adjusted_rand_score(pred, y) > 0.4
     if clusterer._get_tags()['non_deterministic']:
         return
@@ -1881,9 +1866,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False,
 
 @ignore_warnings(category=FutureWarning)
 def check_clusterer_compute_labels_predict(name, clusterer_orig,
-                                           strict_mode=True):
+                                           api_only=False):
     # Check that predict is invariant of compute_labels
-    # XXX: non API check
     X, y = make_blobs(n_samples=20, random_state=0)
     clusterer = clone(clusterer_orig)
     set_random_state(clusterer)
@@ -1897,11 +1881,10 @@ def check_clusterer_compute_labels_predict(name, clusterer_orig,
 
 
 @ignore_warnings(category=FutureWarning)
-def check_classifiers_one_label(name, classifier_orig, strict_mode=True):
+def check_classifiers_one_label(name, classifier_orig, api_only=False):
     # Check that a classifier can fit when there's only 1 class, or that it
     # raises a proper error. If it can fit, we also make sure that it can
     # predict.
-    # XXX: non API check
     error_string_fit = "Classifier can't train when only one class is present."
     error_string_predict = ("Classifier can't predict when only one class is "
                             "present.")
@@ -1928,7 +1911,7 @@ def check_classifiers_one_label(name, classifier_orig, strict_mode=True):
 
 @ignore_warnings  # Warnings are raised by decision function
 def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
-                            X_dtype='float64', strict_mode=True):
+                            X_dtype='float64', api_only=False):
     X_m, y_m = make_blobs(n_samples=300, random_state=0)
     X_m = X_m.astype(X_dtype)
     X_m, y_m = shuffle(X_m, y_m, random_state=7)
@@ -1979,7 +1962,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
 
         assert y_pred.shape == (n_samples,)
         # training set performance
-        if not tags['poor_score']:  # XXX: not API
+        if not tags['poor_score'] and not api_only:
             assert accuracy_score(y, y_pred) > 0.83
 
         # raises error on malformed input for predict
@@ -2009,11 +1992,13 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
                         assert decision.shape == (n_samples,)
                     else:
                         assert decision.shape == (n_samples, 1)
-                    dec_pred = (decision.ravel() > 0).astype(int)
-                    assert_array_equal(dec_pred, y_pred)  # XXX not API
+                    if not api_only:
+                        dec_pred = (decision.ravel() > 0).astype(int)
+                        assert_array_equal(dec_pred, y_pred)
                 else:
                     assert decision.shape == (n_samples, n_classes)
-                    assert_array_equal(np.argmax(decision, axis=1), y_pred)  # XXX not API
+                    if not api_only:
+                        assert_array_equal(np.argmax(decision, axis=1), y_pred)
 
                 # raises error on malformed input for decision_function
                 if not tags["no_validation"]:
@@ -2038,10 +2023,11 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
             # predict_proba agrees with predict
             y_prob = classifier.predict_proba(X)
             assert y_prob.shape == (n_samples, n_classes)
-            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)# XXX not API
-            # check that probas for all classes sum to one
-            assert_array_almost_equal(np.sum(y_prob, axis=1),# XXX not API
-                                      np.ones(n_samples))
+            if not api_only:
+                assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
+                # check that probas for all classes sum to one
+                assert_array_almost_equal(np.sum(y_prob, axis=1),
+                                          np.ones(n_samples))
             if not tags["no_validation"]:
                 # raises error on malformed input for predict_proba
                 if _is_pairwise(classifier_orig):
@@ -2056,7 +2042,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
                         err_msg=msg.format(name, "predict_proba"),
                     ):
                         classifier.predict_proba(X.T)
-            if hasattr(classifier, "predict_log_proba"):# XXX not API
+            if hasattr(classifier, "predict_log_proba") and not api_only:
                 # predict_log_proba is a transformation of predict_proba
                 y_log_prob = classifier.predict_log_proba(X)
                 assert_allclose(y_log_prob, np.log(y_prob), 8, atol=1e-9)
@@ -2064,7 +2050,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
 
 
 def check_outlier_corruption(num_outliers, expected_outliers, decision,
-                             strict_mode=True):
+                             api_only=False):
     # Check for deviation from the precise given contamination level that may
     # be due to ties in the anomaly scores.
     if num_outliers < expected_outliers:
@@ -2085,7 +2071,7 @@ def check_outlier_corruption(num_outliers, expected_outliers, decision,
 
 
 def check_outliers_train(name, estimator_orig, readonly_memmap=True,
-                         strict_mode=True):
+                         api_only=False):
     n_samples = 300
     X, _ = make_blobs(n_samples=n_samples, random_state=0)
     X = shuffle(X, random_state=7)
@@ -2117,10 +2103,11 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True,
     with raises(ValueError):
         estimator.predict(X.T)
 
-    # decision_function agrees with predict  XXX not API
-    dec_pred = (decision >= 0).astype(int)
-    dec_pred[dec_pred == 0] = -1
-    assert_array_equal(dec_pred, y_pred)
+    # decision_function agrees with predict
+    if not api_only:
+        dec_pred = (decision >= 0).astype(int)
+        dec_pred[dec_pred == 0] = -1
+        assert_array_equal(dec_pred, y_pred)
 
     # raises error on malformed input for decision_function
     with raises(ValueError):
@@ -2135,9 +2122,9 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True,
         estimator.score_samples(X.T)
 
     # contamination parameter (not for OneClassSVM which has the nu parameter)
-    # XXX: not API
     if (hasattr(estimator, 'contamination')
-            and not hasattr(estimator, 'novelty')):
+            and not hasattr(estimator, 'novelty')
+            and not api_only):
         # proportion of outliers equal to contamination parameter when not
         # set to 'auto'. This is true for the training set and cannot thus be
         # checked as follows for estimators with a novelty parameter such as
@@ -2167,9 +2154,8 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True,
 
 @ignore_warnings(category=(FutureWarning))
 def check_classifiers_multilabel_representation_invariance(
-        name, classifier_orig, strict_mode=True):
+        name, classifier_orig, api_only=False):
     # check different target representations for multilabel classifiers
-    # XXX: pure API check
 
     X, y = make_multilabel_classification(n_samples=100, n_features=20,
                                           n_classes=5, n_labels=3,
@@ -2204,9 +2190,8 @@ def check_classifiers_multilabel_representation_invariance(
 
 @ignore_warnings(category=FutureWarning)
 def check_estimators_fit_returns_self(name, estimator_orig,
-                                      readonly_memmap=False, strict_mode=True):
+                                      readonly_memmap=False, api_only=False):
     # Check that self is returned when calling fit.
-    # XXX pure API check
     X, y = make_blobs(random_state=0, n_samples=21)
     # some want non-negative input
     X -= X.min()
@@ -2223,11 +2208,10 @@ def check_estimators_fit_returns_self(name, estimator_orig,
 
 
 @ignore_warnings
-def check_estimators_unfitted(name, estimator_orig, strict_mode=True):
+def check_estimators_unfitted(name, estimator_orig, api_only=False):
     # Check that predict raises an exception in an unfitted estimator.
     # Unfitted estimators should raise a NotFittedError.
     # Common test for Regressors, Classifiers and Outlier detection estimators
-    # XXX pure API
     X, y = _regression_dataset()
 
     estimator = clone(estimator_orig)
@@ -2239,10 +2223,9 @@ def check_estimators_unfitted(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_supervised_y_2d(name, estimator_orig, strict_mode=True):
+def check_supervised_y_2d(name, estimator_orig, api_only=False):
     # Check that estimators that don't support multi-ouput raise a warning if y
     # is not 1d, and that they just ravel y
-    # XXX pure API check
     tags = estimator_orig._get_tags()
     rnd = np.random.RandomState(0)
     n_samples = 30
@@ -2277,7 +2260,7 @@ def check_supervised_y_2d(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings
 def check_classifiers_predictions(X, y, name, classifier_orig,
-                                  strict_mode=True):
+                                  api_only=False):
     classes = np.unique(y)
     classifier = clone(classifier_orig)
     if name == 'BernoulliNB':
@@ -2323,10 +2306,9 @@ def _choose_check_classifiers_labels(name, y, y_names):
     return y if name in ["LabelPropagation", "LabelSpreading"] else y_names
 
 
-def check_classifiers_classes(name, classifier_orig, strict_mode=True):
+def check_classifiers_classes(name, classifier_orig, api_only=False):
     # Check that decision function > 0 => pos class
     # Also checks the classes_ attribute.
-    # XXX pure API check
     X_multiclass, y_multiclass = make_blobs(n_samples=30, random_state=0,
                                             cluster_std=0.1)
     X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass,
@@ -2364,10 +2346,9 @@ def check_classifiers_classes(name, classifier_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_regressors_int(name, regressor_orig, strict_mode=True):
+def check_regressors_int(name, regressor_orig, api_only=False):
     # Check that regressors give same prediction when y is encoded as int or
     # float
-    # XXX: API check ?
     X, _ = _regression_dataset()
     X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
@@ -2396,13 +2377,12 @@ def check_regressors_int(name, regressor_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_regressors_train(name, regressor_orig, readonly_memmap=False,
-                           X_dtype=np.float64, strict_mode=True):
+                           X_dtype=np.float64, api_only=False):
     # Check that regressors:
     # - raise an error when X and y have different number of samples
     # - accept lists as input to fit
     # - predict n_samples predictions
     # - have a score > .5 on simple data
-    # XXX: all API checks except the last one
     X, y = _regression_dataset()
     X = X.astype(X_dtype)
     X = _pairwise_estimator_convert_X(X, regressor_orig)
@@ -2444,17 +2424,15 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False,
     # TODO: find out why PLS and CCA fail. RANSAC is random
     # and furthermore assumes the presence of outliers, hence
     # skipped
-    # XXX: non API
-    if not regressor._get_tags()["poor_score"]:
+    if not regressor._get_tags()["poor_score"] and not api_only:
         assert regressor.score(X, y_) > 0.5
 
 
 @ignore_warnings
 def check_regressors_no_decision_function(name, regressor_orig,
-                                          strict_mode=True):
+                                          api_only=False):
     # check that regressors don't have a decision_function, predict_proba, or
     # predict_log_proba method.
-    # XXX: full API check
     rng = np.random.RandomState(0)
     regressor = clone(regressor_orig)
 
@@ -2469,11 +2447,10 @@ def check_regressors_no_decision_function(name, regressor_orig,
 
 
 @ignore_warnings(category=FutureWarning)
-def check_class_weight_classifiers(name, classifier_orig, strict_mode=True):
+def check_class_weight_classifiers(name, classifier_orig, api_only=False):
     # Make sure that classifiers take class_weight into account by creating a
     # very noisy balanced dataset. We make sure that passing a very imbalanced
     # class_weights helps recovering a good score.
-    # XXX: full non-API check
  
     if classifier_orig._get_tags()['binary_only']:
         problems = [2]
@@ -2521,8 +2498,7 @@ def check_class_weight_classifiers(name, classifier_orig, strict_mode=True):
 @ignore_warnings(category=FutureWarning)
 def check_class_weight_balanced_classifiers(name, classifier_orig, X_train,
                                             y_train, X_test, y_test, weights,
-                                            strict_mode=True):
-    # XXX: it's never ever used, just ignore
+                                            api_only=False):
     classifier = clone(classifier_orig)
     if hasattr(classifier, "n_iter"):
         classifier.set_params(n_iter=100)
@@ -2542,11 +2518,10 @@ def check_class_weight_balanced_classifiers(name, classifier_orig, X_train,
 
 @ignore_warnings(category=FutureWarning)
 def check_class_weight_balanced_linear_classifier(name, Classifier,
-                                                  strict_mode=True):
+                                                  api_only=False):
     # Check that class_weight='balanced' is equivalent to manually passing
     # class proportions.
     # this is run on classes, not instances, though this should be changed
-    # XXX: non API check
     X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
     y = np.array([1, 1, 1, -1, -1])
@@ -2583,9 +2558,8 @@ def check_class_weight_balanced_linear_classifier(name, Classifier,
 
 
 @ignore_warnings(category=FutureWarning)
-def check_estimators_overwrite_params(name, estimator_orig, strict_mode=True):
+def check_estimators_overwrite_params(name, estimator_orig, api_only=False):
     # Check that calling fit does not alter the output of get_params
-    # XXX: full API check
     X, y = make_blobs(random_state=0, n_samples=21)
     # some want non-negative input
     X -= X.min()
@@ -2620,11 +2594,10 @@ def check_estimators_overwrite_params(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_no_attributes_set_in_init(name, estimator_orig, strict_mode=True):
+def check_no_attributes_set_in_init(name, estimator_orig, api_only=False):
     # Check that:
     # - init does not set any attribute apart from the parameters
     # - all parameters of init are set as attributes
-    # XXX: full API check
     estimator = clone(estimator_orig)
     if hasattr(type(estimator).__init__, "deprecated_original"):
         return
@@ -2657,10 +2630,9 @@ def check_no_attributes_set_in_init(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_sparsify_coefficients(name, estimator_orig, strict_mode=True):
+def check_sparsify_coefficients(name, estimator_orig, api_only=False):
     # Check that sparsified coefs produce the same predictions as the
     # originals coefs
-    # XXX: full non API check
     X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1],
                   [-1, -2], [2, 2], [-2, -2]])
     y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])
@@ -2684,10 +2656,9 @@ def check_sparsify_coefficients(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_classifier_data_not_an_array(name, estimator_orig, strict_mode=True):
+def check_classifier_data_not_an_array(name, estimator_orig, api_only=False):
     # Check that estimator yields same predictions whether an array was passed
     # or not
-    # XXX: full API
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1],
                   [0, 3], [1, 0], [2, 0], [4, 4], [2, 3], [3, 2]])
     X = _pairwise_estimator_convert_X(X, estimator_orig)
@@ -2699,10 +2670,9 @@ def check_classifier_data_not_an_array(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_regressor_data_not_an_array(name, estimator_orig, strict_mode=True):
+def check_regressor_data_not_an_array(name, estimator_orig, api_only=False):
     # Check that estimator yields same predictions whether an array was passed
     # or not
-    # XXX: full API
     X, y = _regression_dataset()
     X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = _enforce_estimator_tags_y(estimator_orig, y)
@@ -2713,7 +2683,7 @@ def check_regressor_data_not_an_array(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type,
-                                       strict_mode=True):
+                                       api_only=False):
     if name in CROSS_DECOMPOSITION:
         raise SkipTest("Skipping check_estimators_data_not_an_array "
                        "for cross decomposition module as estimators "
@@ -2755,10 +2725,10 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type,
     assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)
 
 
-def check_parameters_default_constructible(name, Estimator, strict_mode=True):
+def check_parameters_default_constructible(name, Estimator, api_only=False):
     # Check that the estimator's default parameters are immutable (sort of).
-    # Also check that get_params returns exactly the default parameters values 
-    # XXX: full API check
+    # Also check that get_params returns exactly the default parameters values
+    # on an unfitted estimator
 
     Estimator = Estimator.__class__
 
@@ -2884,10 +2854,9 @@ def _enforce_estimator_tags_x(estimator, X):
 
 @ignore_warnings(category=FutureWarning)
 def check_non_transformer_estimators_n_iter(name, estimator_orig,
-                                            strict_mode=True):
+                                            api_only=False):
     # Test that estimators that are not transformers with a parameter
     # max_iter, return the attribute of n_iter_ at least 1.
-    # XXX: full API
 
     # These models are dependent on external solvers like
     # libsvm and accessing the iter parameter is non-trivial.
@@ -2919,10 +2888,9 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig,
 
 
 @ignore_warnings(category=FutureWarning)
-def check_transformer_n_iter(name, estimator_orig, strict_mode=True):
+def check_transformer_n_iter(name, estimator_orig, api_only=False):
     # Test that transformers with a parameter max_iter, return the
     # attribute of n_iter_ at least 1.
-    # XXX: full API
     estimator = clone(estimator_orig)
     if hasattr(estimator, "max_iter"):
         if name in CROSS_DECOMPOSITION:
@@ -2946,9 +2914,8 @@ def check_transformer_n_iter(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_get_params_invariance(name, estimator_orig, strict_mode=True):
+def check_get_params_invariance(name, estimator_orig, api_only=False):
     # Checks that get_params(deep=False) is a subset of get_params(deep=True)
-    # XXX: full API
     e = clone(estimator_orig)
 
     shallow_params = e.get_params(deep=False)
@@ -2959,10 +2926,9 @@ def check_get_params_invariance(name, estimator_orig, strict_mode=True):
 
 
 @ignore_warnings(category=FutureWarning)
-def check_set_params(name, estimator_orig, strict_mode=True):
+def check_set_params(name, estimator_orig, api_only=False):
     # Check that get_params() returns the same thing
     # before and after set_params() with some fuzz
-    # XXX: full API check
     estimator = clone(estimator_orig)
 
     orig_params = estimator.get_params(deep=False)
@@ -3015,25 +2981,23 @@ def check_set_params(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_classifiers_regression_target(name, estimator_orig,
-                                        strict_mode=True):
+                                        api_only=False):
     # Check if classifier throws an exception when fed regression targets
-    # XXX API check
 
     X, y = _regression_dataset()
 
     X = X + 1 + abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)
-    msg = "Unknown label type: "
+    match = None if api_only else "Unknown label type: "
     if not e._get_tags()["no_validation"]:
-        with raises(ValueError, match=msg):
+        with raises(ValueError, match=match):
             e.fit(X, y)
 
 
 @ignore_warnings(category=FutureWarning)
-def check_decision_proba_consistency(name, estimator_orig, strict_mode=True):
+def check_decision_proba_consistency(name, estimator_orig, api_only=False):
     # Check whether an estimator having both decision_function and
     # predict_proba methods has outputs with perfect rank correlation.
-    # XXX: fulll non API check
 
     centers = [(2, 2), (4, 4)]
     X, y = make_blobs(n_samples=100, random_state=0, n_features=4,
@@ -3053,7 +3017,7 @@ def check_decision_proba_consistency(name, estimator_orig, strict_mode=True):
         assert_array_equal(rankdata(a), rankdata(b))
 
 
-def check_outliers_fit_predict(name, estimator_orig, strict_mode=True):
+def check_outliers_fit_predict(name, estimator_orig, api_only=False):
     # Check fit_predict for outlier detectors.
 
     n_samples = 300
@@ -3076,9 +3040,7 @@ def check_outliers_fit_predict(name, estimator_orig, strict_mode=True):
         y_pred_2 = estimator.fit(X).predict(X)
         assert_array_equal(y_pred, y_pred_2)
     
-    # XXX: next check isn't API check
-
-    if hasattr(estimator, "contamination"):
+    if hasattr(estimator, "contamination") and not api_only:
         # proportion of outliers equal to contamination parameter when not
         # set to 'auto'
         expected_outliers = 30
@@ -3103,22 +3065,18 @@ def check_outliers_fit_predict(name, estimator_orig, strict_mode=True):
                 estimator.fit_predict(X)
 
 
-def check_fit_non_negative(name, estimator_orig, strict_mode=True):
-    # Check that proper warning is raised for non-negative X
+def check_fit_non_negative(name, estimator_orig, api_only=False):
+    # Check that proper error is raised for non-negative X
     # when tag requires_positive_X is present
-    # XXX: full non API check + remove if else
     X = np.array([[-1., 1], [-1., 1]])
     y = np.array([1, 2])
     estimator = clone(estimator_orig)
-    if strict_mode:
-        with raises(ValueError, match="Negative values in data passed to"):
-            estimator.fit(X, y)
-    else:  # Don't check error message if strict mode is off
-        with raises(ValueError):
-            estimator.fit(X, y)
+    match = None if api_only else "Negative values in data passed to"
+    with raises(ValueError, match=match):
+        estimator.fit(X, y)
 
 
-def check_fit_idempotent(name, estimator_orig, strict_mode=True):
+def check_fit_idempotent(name, estimator_orig, api_only=False):
     # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would
     # check that the estimated parameters during training (e.g. coefs_) are
     # the same, but having a universal comparison function for those
@@ -3126,8 +3084,6 @@ def check_fit_idempotent(name, estimator_orig, strict_mode=True):
     # predict(), predict_proba(), decision_function() and transform() return
     # the same results.
 
-    # XXX full API check
-
     check_methods = ["predict", "transform", "decision_function",
                      "predict_proba"]
     rng = np.random.RandomState(0)
@@ -3175,7 +3131,7 @@ def check_fit_idempotent(name, estimator_orig, strict_mode=True):
             )
 
 
-def check_n_features_in(name, estimator_orig, strict_mode=True):
+def check_n_features_in(name, estimator_orig, api_only=False):
     # Make sure that n_features_in_ attribute doesn't exist until fit is
     # called, and that its value is correct.
 
@@ -3213,7 +3169,7 @@ def check_n_features_in(name, estimator_orig, strict_mode=True):
         )
 
 
-def check_requires_y_none(name, estimator_orig, strict_mode=True):
+def check_requires_y_none(name, estimator_orig, api_only=False):
     # Make sure that an estimator with requires_y=True fails gracefully when
     # given y=None
 
@@ -3245,7 +3201,7 @@ def check_requires_y_none(name, estimator_orig, strict_mode=True):
             warnings.warn(warning_msg, FutureWarning)
 
 
-def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True):
+def check_n_features_in_after_fitting(name, estimator_orig, api_only=False):
     # Make sure that n_features_in are checked after fitting
     tags = estimator_orig._get_tags()
 
@@ -3299,7 +3255,25 @@ def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True):
         estimator.partial_fit(X_bad, y)
 
 
-# set of checks that are completely strict, i.e. they have no non-strict part
-_FULLY_STRICT_CHECKS = set([
-    'check_n_features_in',
+# set of checks that do not check API-compatibility. They are ignored when
+# api_only is True.
+_NON_API_CHECKS = set([
+    'check_estimator_sparse_data',
+    'check_sample_weights_invariance',
+    'check_complex_data',
+    'check_methods_subset_invariance',
+    'check_methods_sample_order_invariance',
+    'check_fit2d_1sample',
+    'check_fit2d_1featureucheck_estimators_dtypes',
+    'check_estimators_dtypes',
+    'check_transformer_preserve_dtypes',
+    'check_estimators_nan_inf',
+    'check_estimators_pickle',
+    'check_clusterer_compute_labels_predict',
+    'check_classifiers_one_label',
+    'check_regressors_int',
+    'check_class_weight_classifiers',
+    'check_class_weight_balanced_linear_classifier',
+    'check_sparsify_coefficients',
+    'check_decision_proba_consistency',
 ])

From 6fda30c3cb7621d9dee285aa2f3c179fecb1b5b1 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 9 Oct 2020 10:37:01 -0400
Subject: [PATCH 05/21] ooops

---
 sklearn/model_selection/_split.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index b99985cf91eca..c48cdc486e2b0 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -1744,20 +1744,15 @@ def _iter_indices(self, X, y, groups=None):
             test = []
 
             for i in range(n_classes):
-                # permutation = rng.permutation(class_counts[i])
-                # perm_indices_class_i = class_indices[i].take(permutation,
-                #                                              mode='clip')
-                perm_indices_class_i = class_indices[i]
-
-
+                permutation = rng.permutation(class_counts[i])
+                perm_indices_class_i = class_indices[i].take(permutation,
+                                                             mode='clip')
 
                 train.extend(perm_indices_class_i[:n_i[i]])
                 test.extend(perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]])
 
-            # train = rng.permutation(train)
-            # test = rng.permutation(test)
-            train = np.array(train)
-            test = np.array(test)
+            train = rng.permutation(train)
+            test = rng.permutation(test)
 
             yield train, test
 

From db71e0f92f92d2d9fc7125e3859a6011a13a9a66 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 9 Oct 2020 11:23:53 -0400
Subject: [PATCH 06/21] some more

---
 doc/developers/develop.rst        | 43 ++++++++++++++++++++++++-------
 doc/glossary.rst                  |  4 ++-
 doc/whats_new/v0.24.rst           |  6 +++++
 sklearn/utils/estimator_checks.py |  6 ++---
 4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
index b7b5d2ac0316f..858fd92d2e69b 100644
--- a/doc/developers/develop.rst
+++ b/doc/developers/develop.rst
@@ -249,22 +249,16 @@ Rolling your own estimator
 If you want to implement a new estimator that is scikit-learn-compatible,
 whether it is just for you or for contributing it to scikit-learn, there are
 several internals of scikit-learn that you should be aware of in addition to
-the scikit-learn API outlined above. You can check whether your estimator
-adheres to the scikit-learn interface and standards by running
-:func:`~sklearn.utils.estimator_checks.check_estimator` on an instance. The
-:func:`~sklearn.utils.estimator_checks.parametrize_with_checks` pytest
-decorator can also be used (see its docstring for details and possible
-interactions with `pytest`)::
-
-  >>> from sklearn.utils.estimator_checks import check_estimator
-  >>> from sklearn.svm import LinearSVC
-  >>> check_estimator(LinearSVC())  # passes
+the scikit-learn API outlined above.
 
 The main motivation to make a class compatible to the scikit-learn estimator
 interface might be that you want to use it together with model evaluation and
 selection tools such as :class:`model_selection.GridSearchCV` and
 :class:`pipeline.Pipeline`.
 
+Checking the compatibility of your estimator with scikit-learn is described
+in :ref:`checking_compatibility`
+
 Before detailing the required interface below, we describe two ways to achieve
 the correct interface more easily.
 
@@ -499,6 +493,35 @@ patterns.
 The :mod:`sklearn.utils.multiclass` module contains useful functions
 for working with multiclass and multilabel problems.
 
+.. _checking_compatibility:
+
+Checking the estimator's compatibility
+--------------------------------------
+
+You can check whether your estimator adheres to the scikit-learn interface
+and standards by running
+:func:`~sklearn.utils.estimator_checks.check_estimator` on an instance.
+
+The :func:`~sklearn.utils.estimator_checks.parametrize_with_checks` pytest
+decorator can also be used (see its docstring for details and possible
+interactions with `pytest`)::
+
+  >>> from sklearn.utils.estimator_checks import check_estimator
+  >>> from sklearn.svm import LinearSVC
+  >>> check_estimator(LinearSVC())  # passes
+
+Both :func:`~sklearn.utils.estimator_checks.check_estimator` and
+:func:`~sklearn.utils.estimator_checks.parametrize_with_checks` expose an
+`api_only` parameter: when True, the check suite will only consider pure
+API-compatibility checks. Some more advanced checks will be ignored, such as
+ensuring that error messages are informative, or ensuring that a classifier
+is able to properly discriminate classes on a simple problem. We recommend
+leaving this parameter to False to guarantee robust and user-friendly
+estimators.
+
+The kind of checks that the check suite will run can also be partially
+controlled by setting estimator tags, described below:
+
 .. _estimator_tags:
 
 Estimator Tags
diff --git a/doc/glossary.rst b/doc/glossary.rst
index 8530e966486aa..624a2a73c0a07 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -142,7 +142,9 @@ General Concepts
             We provide limited backwards compatibility assurances for the
             estimator checks: we may add extra requirements on estimators
             tested with this function, usually when these were informally
-            assumed but not formally tested.
+            assumed but not formally tested. In particular, checks that are
+            not API-related (i.e. those that are ignored when `api_only` is
+            True) may enforce backward-incompatible requirements.
 
         Despite this informal contract with our users, the software is provided
         as is, as stated in the license.  When a release inadvertently
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index b52fbfc14bd40..ba305c5da03b7 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -621,6 +621,12 @@ Changelog
   estimator methods are invariant if applied to the same dataset
   with different sample order :pr:`17598` by :user:`Jason Ngo <ngojason9>`.
 
+- |Feature| :func:`~utils.estimator_checks.check_estimator` and 
+  :func:`~utils.estimator_checks.parametrize_with_checks` now expose an
+  `api_only` parameter which allows to control whether the check suite should
+  only check for pure API-compatibility, or also run more advanced checks.
+  :pr:`TODO` and :pr:`17361` by `Nicolas Hug`_.
+
 Miscellaneous
 .............
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 267c5677831c6..a76b17b7a48bf 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1365,7 +1365,7 @@ def _check_transformer(name, transformer_orig, X, y, api_only=False):
     else:
         # check for consistent n_samples
         assert X_pred.shape[0] == n_samples
-    
+
     if hasattr(transformer, 'transform'):
 
         # raises error on malformed input for transform
@@ -2451,7 +2451,7 @@ def check_class_weight_classifiers(name, classifier_orig, api_only=False):
     # Make sure that classifiers take class_weight into account by creating a
     # very noisy balanced dataset. We make sure that passing a very imbalanced
     # class_weights helps recovering a good score.
- 
+
     if classifier_orig._get_tags()['binary_only']:
         problems = [2]
     else:
@@ -3039,7 +3039,7 @@ def check_outliers_fit_predict(name, estimator_orig, api_only=False):
     if hasattr(estimator, 'predict'):
         y_pred_2 = estimator.fit(X).predict(X)
         assert_array_equal(y_pred, y_pred_2)
-    
+
     if hasattr(estimator, "contamination") and not api_only:
         # proportion of outliers equal to contamination parameter when not
         # set to 'auto'

From b4b813820402a21c21a9b5e9441ff354f3952d84 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 9 Oct 2020 11:43:54 -0400
Subject: [PATCH 07/21] whatsnew

---
 doc/whats_new/v0.24.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index ba305c5da03b7..8550dea9afd8f 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -625,7 +625,7 @@ Changelog
   :func:`~utils.estimator_checks.parametrize_with_checks` now expose an
   `api_only` parameter which allows to control whether the check suite should
   only check for pure API-compatibility, or also run more advanced checks.
-  :pr:`TODO` and :pr:`17361` by `Nicolas Hug`_.
+  :pr:`18582` and :pr:`17361` by `Nicolas Hug`_.
 
 Miscellaneous
 .............

From 41393fa9915cada6a80659addc530456977a47b8 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 17 Oct 2020 09:07:30 -0400
Subject: [PATCH 08/21] addressed comments

---
 sklearn/tests/test_common.py      |  2 +-
 sklearn/utils/estimator_checks.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index c662b8fc55c11..8eac4f25a82f3 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -221,7 +221,7 @@ class MyNMFWithBadErrorMessage(NMF):
     # Same as NMF but raises an uninformative error message if X has negative
     # value. This estimator would fail the check suite with api_only=False,
     # specifically it would fail check_fit_non_negative because its error
-    # message doesn't match
+    # message doesn't match the expected one.
 
     def __init__(self):
         # declare init to avoid deprecation warning since default has changed
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5d4417d8d7105..fa48d71f2f963 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1297,7 +1297,7 @@ def check_transformer_general(name, transformer, readonly_memmap=False,
     if readonly_memmap:
         X, y = create_memmap_backed_data([X, y])
 
-    _check_transformer(name, transformer, X, y)
+    _check_transformer(name, transformer, X, y, api_only=api_only)
 
 
 @ignore_warnings(category=FutureWarning)
@@ -1311,9 +1311,10 @@ def check_transformer_data_not_an_array(name, transformer, api_only=False):
     X = _pairwise_estimator_convert_X(X, transformer)
     this_X = _NotAnArray(X)
     this_y = _NotAnArray(np.asarray(y))
-    _check_transformer(name, transformer, this_X, this_y)
+    _check_transformer(name, transformer, this_X, this_y, api_only=api_only)
     # try the same with some list
-    _check_transformer(name, transformer, X.tolist(), y.tolist())
+    _check_transformer(name, transformer, X.tolist(), y.tolist(),
+                       api_only=api_only)
 
 
 @ignore_warnings(category=FutureWarning)
@@ -3269,8 +3270,7 @@ def check_n_features_in_after_fitting(name, estimator_orig, api_only=False):
     'check_methods_subset_invariance',
     'check_methods_sample_order_invariance',
     'check_fit2d_1sample',
-    'check_fit2d_1featureucheck_estimators_dtypes',
-    'check_estimators_dtypes',
+    'check_fit2d_1feature',
     'check_transformer_preserve_dtypes',
     'check_estimators_nan_inf',
     'check_estimators_pickle',

From cb66293d245de4f0f0f44c29cce141ad6f931713 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 28 Oct 2020 18:01:15 -0400
Subject: [PATCH 09/21] make pickle full API check

---
 sklearn/utils/estimator_checks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index aa9d81bba5ad0..f61ffcbc2d7a8 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3272,7 +3272,6 @@ def check_n_features_in_after_fitting(name, estimator_orig, api_only=False):
     'check_fit2d_1feature',
     'check_transformer_preserve_dtypes',
     'check_estimators_nan_inf',
-    'check_estimators_pickle',
     'check_clusterer_compute_labels_predict',
     'check_classifiers_one_label',
     'check_regressors_int',

From 1ff888766a2fe90937af8ed67923aecba95fc980 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 28 Oct 2020 18:02:35 -0400
Subject: [PATCH 10/21] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/estimator_checks.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index bd2566c12dcce..9d84d1de60f15 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1382,6 +1382,7 @@ def _check_transformer(name, transformer_orig, X, y, api_only=False):
             ):
                 transformer.transform(X[:, :-1])
         if api_only:
+            # The remaining asserts are non-API asserts
             return
 
         if name in CROSS_DECOMPOSITION:
@@ -1769,6 +1770,7 @@ def check_classifier_multioutput(name, estimator, api_only=False):
                 assert_array_equal(y_prob.round().astype(int), y_pred)
 
     if api_only:
+        # The remaining asserts are non-API asserts
         return
 
     if (hasattr(estimator, "decision_function") and
@@ -1836,6 +1838,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False,
     assert pred.shape == (n_samples,)
 
     if api_only:
+        # The remaining asserts are non-API asserts
+
         return
 
     assert adjusted_rand_score(pred, y) > 0.4

From a68194be052e781e01151a5c009996774ac22931 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 9 Nov 2020 16:59:51 +0100
Subject: [PATCH 11/21] TST reintroduce _safe_tags for estimator not inheriting
 from BaseEstimator

---
 sklearn/utils/estimator_checks.py | 97 ++++++++++++++++++++-----------
 1 file changed, 64 insertions(+), 33 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3cd19967ba9c1..98fd46a0b776d 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -32,6 +32,7 @@
 from ..base import (
     clone,
     ClusterMixin,
+    _DEFAULT_TAGS,
     is_classifier,
     is_regressor,
     is_outlier_detector,
@@ -66,9 +67,39 @@
 CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
 
 
+def _safe_tags(estimator, key=None):
+    """Safely get estimator tags for common checks.
+
+    :class:`~sklearn.BaseEstimator` provides the estimator tags machinery.
+    However, if a compatible estimator does not inherit from this base class,
+    we should default to the default tag.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        The estimator from which to get the tag.
+    key : str, default=None
+        Tag name to get. By default (`None`), all tags are returned.
+
+    Returns
+    -------
+    tags : dict
+        The estimator tags.
+    """
+    if hasattr(estimator, "_get_tags"):
+        if key is not None:
+            return estimator._get_tags().get(key, _DEFAULT_TAGS[key])
+        tags = estimator._get_tags()
+        return {key: tags.get(key, _DEFAULT_TAGS[key])
+                for key in _DEFAULT_TAGS.keys()}
+    if key is not None:
+        return _DEFAULT_TAGS[key]
+    return _DEFAULT_TAGS
+
+
 def _yield_checks(estimator):
     name = estimator.__class__.__name__
-    tags = estimator._get_tags()
+    tags = _safe_tags(estimator)
     pairwise = _is_pairwise(estimator)
 
     yield check_no_attributes_set_in_init
@@ -116,7 +147,7 @@ def _yield_checks(estimator):
 
 
 def _yield_classifier_checks(classifier):
-    tags = classifier._get_tags()
+    tags = _safe_tags(classifier)
 
     # test classifiers can handle non-array data and pandas objects
     yield check_classifier_data_not_an_array
@@ -170,7 +201,7 @@ def check_supervised_y_no_nan(name, estimator_orig, strict_mode=True):
 
 
 def _yield_regressor_checks(regressor):
-    tags = regressor._get_tags()
+    tags = _safe_tags(regressor)
     # TODO: test with intercept
     # TODO: test with multiple responses
     # basic testing
@@ -196,7 +227,7 @@ def _yield_regressor_checks(regressor):
 
 
 def _yield_transformer_checks(transformer):
-    tags = transformer._get_tags()
+    tags = _safe_tags(transformer)
     # All transformers should either deal with sparse data or raise an
     # exception with type TypeError and an intelligible error message
     if not tags["no_validation"]:
@@ -206,7 +237,7 @@ def _yield_transformer_checks(transformer):
     if tags["preserves_dtype"]:
         yield check_transformer_preserve_dtypes
     yield partial(check_transformer_general, readonly_memmap=True)
-    if not transformer._get_tags()["stateless"]:
+    if not _safe_tags(transformer, key="stateless"):
         yield check_transformers_unfitted
     # Dependent on external solvers and hence accessing the iter
     # param is non-trivial.
@@ -243,13 +274,13 @@ def _yield_outliers_checks(estimator):
         # test outlier detectors can handle non-array data
         yield check_classifier_data_not_an_array
         # test if NotFittedError is raised
-        if estimator._get_tags()["requires_fit"]:
+        if _safe_tags(estimator, key="requires_fit"):
             yield check_estimators_unfitted
 
 
 def _yield_all_checks(estimator):
     name = estimator.__class__.__name__
-    tags = estimator._get_tags()
+    tags = _safe_tags(estimator)
     if "2darray" not in tags["X_types"]:
         warnings.warn("Can't test estimator {} which requires input "
                       " of type {}".format(name, tags["X_types"]),
@@ -421,7 +452,7 @@ def _should_be_skipped_or_marked(estimator, check, strict_mode):
     check_name = (check.func.__name__ if isinstance(check, partial)
                   else check.__name__)
 
-    xfail_checks = estimator._get_tags()['_xfail_checks'] or {}
+    xfail_checks = _safe_tags(estimator, key='_xfail_checks') or {}
     if check_name in xfail_checks:
         return True, xfail_checks[check_name]
 
@@ -772,7 +803,7 @@ def check_estimator_sparse_data(name, estimator_orig, strict_mode=True):
     with ignore_warnings(category=FutureWarning):
         estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
-    tags = estimator_orig._get_tags()
+    tags = _safe_tags(estimator_orig)
     for matrix_format, X in _generate_sparse_matrix(X_csr):
         # catch deprecation warnings
         with ignore_warnings(category=FutureWarning):
@@ -829,7 +860,7 @@ def check_sample_weights_pandas_series(name, estimator_orig, strict_mode=True):
             X = pd.DataFrame(_pairwise_estimator_convert_X(X, estimator_orig))
             y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])
             weights = pd.Series([1] * 12)
-            if estimator._get_tags()["multioutput_only"]:
+            if _safe_tags(estimator, key="multioutput_only"):
                 y = pd.DataFrame(y)
             try:
                 estimator.fit(X, y, sample_weight=weights)
@@ -854,7 +885,7 @@ def check_sample_weights_not_an_array(name, estimator_orig, strict_mode=True):
         X = _NotAnArray(_pairwise_estimator_convert_X(X, estimator_orig))
         y = _NotAnArray([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])
         weights = _NotAnArray([1] * 12)
-        if estimator._get_tags()["multioutput_only"]:
+        if _safe_tags(estimator, key="multioutput_only"):
             y = _NotAnArray(y.data.reshape(-1, 1))
         estimator.fit(X, y, sample_weight=weights)
 
@@ -959,7 +990,7 @@ def check_dtype_object(name, estimator_orig, strict_mode=True):
     rng = np.random.RandomState(0)
     X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
-    tags = estimator_orig._get_tags()
+    tags = _safe_tags(estimator_orig)
     y = (X[:, 0] * 4).astype(int)
     estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
@@ -1179,7 +1210,7 @@ def check_methods_sample_order_invariance(
     X = 3 * rnd.uniform(size=(20, 3))
     X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
-    if estimator_orig._get_tags()['binary_only']:
+    if _safe_tags(estimator_orig, key='binary_only'):
         y[y == 2] = 1
     estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
@@ -1368,7 +1399,7 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
             X_pred2 = transformer.transform(X)
             X_pred3 = transformer.fit_transform(X, y=y_)
 
-        if transformer_orig._get_tags()['non_deterministic']:
+        if _safe_tags(transformer_orig, key='non_deterministic'):
             msg = name + ' is non deterministic'
             raise SkipTest(msg)
         if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple):
@@ -1399,7 +1430,7 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
 
         # raises error on malformed input for transform
         if hasattr(X, 'shape') and \
-           not transformer._get_tags()["stateless"] and \
+           not _safe_tags(transformer, key="stateless") and \
            X.ndim == 2 and X.shape[1] > 1:
 
             # If it's not an array, it does not have a 'T' property
@@ -1414,7 +1445,7 @@ def _check_transformer(name, transformer_orig, X, y, strict_mode=True):
 
 @ignore_warnings
 def check_pipeline_consistency(name, estimator_orig, strict_mode=True):
-    if estimator_orig._get_tags()['non_deterministic']:
+    if _safe_tags(estimator_orig, key='non_deterministic'):
         msg = name + ' is non deterministic'
         raise SkipTest(msg)
 
@@ -1508,7 +1539,7 @@ def check_transformer_preserve_dtypes(
     X -= X.min()
     X = _pairwise_estimator_convert_X(X, transformer_orig)
 
-    for dtype in transformer_orig._get_tags()["preserves_dtype"]:
+    for dtype in _safe_tags(transformer_orig, key="preserves_dtype"):
         X_cast = X.astype(dtype)
         transformer = clone(transformer_orig)
         set_random_state(transformer)
@@ -1634,7 +1665,7 @@ def check_estimators_pickle(name, estimator_orig, strict_mode=True):
     X -= X.min()
     X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
 
-    tags = estimator_orig._get_tags()
+    tags = _safe_tags(estimator_orig)
     # include NaN values when the estimator should deal with them
     if tags['allow_nan']:
         # set randomly 10 elements to np.nan
@@ -1696,7 +1727,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig,
 @ignore_warnings(category=FutureWarning)
 def check_classifier_multioutput(name, estimator, strict_mode=True):
     n_samples, n_labels, n_classes = 42, 5, 3
-    tags = estimator._get_tags()
+    tags = _safe_tags(estimator)
     estimator = clone(estimator)
     X, y = make_multilabel_classification(random_state=42,
                                           n_samples=n_samples,
@@ -1804,7 +1835,7 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False,
     pred = clusterer.labels_
     assert pred.shape == (n_samples,)
     assert adjusted_rand_score(pred, y) > 0.4
-    if clusterer._get_tags()['non_deterministic']:
+    if _safe_tags(clusterer, key='non_deterministic'):
         return
     set_random_state(clusterer)
     with warnings.catch_warnings(record=True):
@@ -1896,7 +1927,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False,
         X_m, y_m, X_b, y_b = create_memmap_backed_data([X_m, y_m, X_b, y_b])
 
     problems = [(X_b, y_b)]
-    tags = classifier_orig._get_tags()
+    tags = _safe_tags(classifier_orig)
     if not tags['binary_only']:
         problems.append((X_m, y_m))
 
@@ -2187,7 +2218,7 @@ def check_estimators_unfitted(name, estimator_orig, strict_mode=True):
 
 @ignore_warnings(category=FutureWarning)
 def check_supervised_y_2d(name, estimator_orig, strict_mode=True):
-    tags = estimator_orig._get_tags()
+    tags = _safe_tags(estimator_orig)
     rnd = np.random.RandomState(0)
     n_samples = 30
     X = _pairwise_estimator_convert_X(
@@ -2291,7 +2322,7 @@ def check_classifiers_classes(name, classifier_orig, strict_mode=True):
     y_names_binary = np.take(labels_binary, y_binary)
 
     problems = [(X_binary, y_binary, y_names_binary)]
-    if not classifier_orig._get_tags()['binary_only']:
+    if not _safe_tags(classifier_orig, key='binary_only'):
         problems.append((X_multiclass, y_multiclass, y_names_multiclass))
 
     for X, y, y_names in problems:
@@ -2377,7 +2408,7 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False,
     # TODO: find out why PLS and CCA fail. RANSAC is random
     # and furthermore assumes the presence of outliers, hence
     # skipped
-    if not regressor._get_tags()["poor_score"]:
+    if not _safe_tags(regressor, key="poor_score"):
         assert regressor.score(X, y_) > 0.5
 
 
@@ -2402,7 +2433,7 @@ def check_regressors_no_decision_function(name, regressor_orig,
 @ignore_warnings(category=FutureWarning)
 def check_class_weight_classifiers(name, classifier_orig, strict_mode=True):
 
-    if classifier_orig._get_tags()['binary_only']:
+    if _safe_tags(classifier_orig, key='binary_only'):
         problems = [2]
     else:
         problems = [2, 3]
@@ -2441,7 +2472,7 @@ def check_class_weight_classifiers(name, classifier_orig, strict_mode=True):
         y_pred = classifier.predict(X_test)
         # XXX: Generally can use 0.89 here. On Windows, LinearSVC gets
         #      0.88 (Issue #9111)
-        if not classifier_orig._get_tags()['poor_score']:
+        if not _safe_tags(classifier_orig, key='poor_score'):
             assert np.mean(y_pred == 0) > 0.87
 
 
@@ -2761,16 +2792,16 @@ def param_filter(p):
 def _enforce_estimator_tags_y(estimator, y):
     # Estimators with a `requires_positive_y` tag only accept strictly positive
     # data
-    if estimator._get_tags()["requires_positive_y"]:
+    if _safe_tags(estimator, key="requires_positive_y"):
         # Create strictly positive y. The minimal increment above 0 is 1, as
         # y could be of integer dtype.
         y += 1 + abs(y.min())
     # Estimators with a `binary_only` tag only accept up to two unique y values
-    if estimator._get_tags()["binary_only"] and y.size > 0:
+    if _safe_tags(estimator, key="binary_only") and y.size > 0:
         y = np.where(y == y.flat[0], y, y.flat[0] + 1)
     # Estimators in mono_output_task_error raise ValueError if y is of 1-D
     # Convert into a 2-D y for those estimators.
-    if estimator._get_tags()["multioutput_only"]:
+    if _safe_tags(estimator, key="multioutput_only"):
         return np.reshape(y, (-1, 1))
     return y
 
@@ -2782,11 +2813,11 @@ def _enforce_estimator_tags_x(estimator, X):
         X = X.dot(X.T)
     # Estimators with `1darray` in `X_types` tag only accept
     # X of shape (`n_samples`,)
-    if '1darray' in estimator._get_tags()['X_types']:
+    if '1darray' in _safe_tags(estimator, key='X_types'):
         X = X[:, 0]
     # Estimators with a `requires_positive_X` tag only accept
     # strictly positive data
-    if estimator._get_tags()['requires_positive_X']:
+    if _safe_tags(estimator, key='requires_positive_X'):
         X -= X.min()
     return X
 
@@ -2928,7 +2959,7 @@ def check_classifiers_regression_target(name, estimator_orig,
     X = X + 1 + abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)
     msg = "Unknown label type: "
-    if not e._get_tags()["no_validation"]:
+    if not _safe_tags(e, keyy="no_validation"):
         with raises(ValueError, match=msg):
             e.fit(X, y)
 
@@ -3145,7 +3176,7 @@ def check_requires_y_none(name, estimator_orig, strict_mode=True):
 
 def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True):
     # Make sure that n_features_in are checked after fitting
-    tags = estimator_orig._get_tags()
+    tags = _safe_tags(estimator_orig)
 
     if "2darray" not in tags["X_types"] or tags["no_validation"]:
         return

From 36f1c5c68f3ff16a5b9fa936e100de2bc7a59ffa Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 9 Nov 2020 17:06:29 +0100
Subject: [PATCH 12/21] typo

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 98fd46a0b776d..ac30f66d41866 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2959,7 +2959,7 @@ def check_classifiers_regression_target(name, estimator_orig,
     X = X + 1 + abs(X.min(axis=0))  # be sure that X is non-negative
     e = clone(estimator_orig)
     msg = "Unknown label type: "
-    if not _safe_tags(e, keyy="no_validation"):
+    if not _safe_tags(e, key="no_validation"):
         with raises(ValueError, match=msg):
             e.fit(X, y)
 

From 9e540141319126dce275b76a996679d45495de2e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 11 Nov 2020 13:20:26 +0100
Subject: [PATCH 13/21] TST implement minimal classifier

---
 sklearn/utils/tests/test_estimator_checks.py | 41 +++++++++++++++++++-
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index ecbf7cb7be7f4..9a069224f88ba 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -23,6 +23,7 @@
 from sklearn.utils.estimator_checks import check_regressor_data_not_an_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.estimator_checks import check_outlier_corruption
+from sklearn.utils.estimator_checks import parametrize_with_checks
 from sklearn.utils.fixes import np_version, parse_version
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import LinearRegression, SGDClassifier
@@ -418,8 +419,8 @@ def test_check_estimator():
     # check that we have a set_params and can clone
     msg = "Passing a class was deprecated"
     assert_raises_regex(TypeError, msg, check_estimator, object)
-    msg = "object has no attribute '_get_tags'"
-    assert_raises_regex(AttributeError, msg, check_estimator, object())
+    # msg = "object has no attribute '_get_tags'"
+    # assert_raises_regex(AttributeError, msg, check_estimator, object())
     msg = (
         "Parameter 'p' of estimator 'HasMutableParameters' is of type "
         "object which is not allowed"
@@ -620,6 +621,42 @@ def test_check_estimator_pairwise():
     check_estimator(est)
 
 
+class MinimalEstimator:
+
+    # Our minimal required supposed that the following are implemented
+    _get_param_names = BaseEstimator._get_param_names  # used by get_params
+    set_params = BaseEstimator.set_params
+    get_params = BaseEstimator.get_params
+    __setstate__ = BaseEstimator.__setstate__
+    __getstate__ = BaseEstimator.__getstate__
+
+    def fit(self, X, y):
+        return self
+
+
+class MinimalClassifier(MinimalEstimator):
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        return super().fit(X, y)
+
+    def predict_proba(self, X):
+        proba_shape = (len(X), self.classes_.size)
+        y_proba = np.zeros(shape=proba_shape, dtype=np.float64)
+        y_proba[:, 0] = 1.0
+        return y_proba
+
+    def predict(self, X):
+        y_proba = self.predict_proba(X)
+        y_pred = y_proba.argmax(axis=1)
+        return self.classes_[y_pred]
+
+
+@parametrize_with_checks([MinimalClassifier()], strict_mode=False)
+def test_check_estimator_minimal(estimator, check):
+    check(estimator)
+
+
 def test_check_classifier_data_not_an_array():
     assert_raises_regex(AssertionError,
                         'Not equal to tolerance',

From eaca564fff2482539ea8ed822ec579698aca2eb5 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 11 Nov 2020 14:06:15 +0100
Subject: [PATCH 14/21] allow pickling

---
 sklearn/utils/tests/test_estimator_checks.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 84098d052d551..14555069a73e4 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -627,8 +627,15 @@ class MinimalEstimator:
     _get_param_names = BaseEstimator._get_param_names  # used by get_params
     set_params = BaseEstimator.set_params
     get_params = BaseEstimator.get_params
-    # __setstate__ = BaseEstimator.__setstate__
-    # __getstate__ = BaseEstimator.__getstate__
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        # only because we are within scikit-learn source code
+        from sklearn import __version__
+        return dict(state.items(), _sklearn_version=__version__)
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
 
     def fit(self, X, y):
         X = check_array(X)

From a06dfc469bcccb7334fc7420e3bb6d4bab671797 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 11:45:50 +0100
Subject: [PATCH 15/21] remove base class

---
 sklearn/utils/estimator_checks.py            |  3 +-
 sklearn/utils/tests/test_estimator_checks.py | 61 ++++++++++++++------
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index cf3b2d52f4230..1e3d684b219e9 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1700,7 +1700,8 @@ def check_estimators_pickle(name, estimator_orig, api_only=False):
 
     # pickle and unpickle!
     pickled_estimator = pickle.dumps(estimator)
-    if estimator.__module__.startswith('sklearn.'):
+    module_name = estimator.__module__
+    if module_name.startswith('sklearn.') and "test_" not in module_name:
         assert b"version" in pickled_estimator
     unpickled_estimator = pickle.loads(pickled_estimator)
 
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 14555069a73e4..c1e1c468574f3 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -621,18 +621,16 @@ def test_check_estimator_pairwise():
     check_estimator(est)
 
 
-class MinimalEstimator:
+class MinimalClassifier:
 
-    # Our minimal required supposed that the following are implemented
-    _get_param_names = BaseEstimator._get_param_names  # used by get_params
-    set_params = BaseEstimator.set_params
-    get_params = BaseEstimator.get_params
+    def get_params(self, **params):
+        return {}
+
+    def set_params(self, deep=True):
+        return self
 
     def __getstate__(self):
-        state = self.__dict__.copy()
-        # only because we are within scikit-learn source code
-        from sklearn import __version__
-        return dict(state.items(), _sklearn_version=__version__)
+        return self.__dict__.copy()
 
     def __setstate__(self, state):
         self.__dict__.update(state)
@@ -640,14 +638,8 @@ def __setstate__(self, state):
     def fit(self, X, y):
         X = check_array(X)
         self.n_features_in_ = X.shape[1]
-        return self
-
-
-class MinimalClassifier(MinimalEstimator):
-
-    def fit(self, X, y):
         self.classes_ = np.unique(y)
-        return super().fit(X, y)
+        return self
 
     def predict_proba(self, X):
         check_is_fitted(self)
@@ -666,11 +658,25 @@ def score(self, X, y):
         return 1.0
 
 
-class MinimalRegressor(MinimalEstimator):
+class MinimalRegressor:
+
+    def get_params(self, **params):
+        return {}
+
+    def set_params(self, deep=True):
+        return self
+
+    def __getstate__(self):
+        return self.__dict__.copy()
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
 
     def fit(self, X, y):
+        X = check_array(X)
+        self.n_features_in_ = X.shape[1]
         self._mean = np.mean(y)
-        return super().fit(X, y)
+        return self
 
     def predict(self, X):
         X = check_array(X)
@@ -680,7 +686,24 @@ def score(self, X, y):
         return 1.0
 
 
-class MinimalTransformer(MinimalEstimator):
+class MinimalTransformer:
+
+    def get_params(self, **params):
+        return {}
+
+    def set_params(self, deep=True):
+        return self
+
+    def __getstate__(self):
+        return self.__dict__.copy()
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+
+    def fit(self, X, y):
+        X = check_array(X)
+        self.n_features_in_ = X.shape[1]
+        return self
 
     def transform(self, X, y=None):
         check_is_fitted(self)

From 111ef8ebda12004acb84385aab84cdf81f022865 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 12:03:43 +0100
Subject: [PATCH 16/21] fix issue with id

---
 sklearn/utils/tests/test_estimator_checks.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index c1e1c468574f3..c174fd6beb56a 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -623,6 +623,11 @@ def test_check_estimator_pairwise():
 
 class MinimalClassifier:
 
+    def __repr__(self):
+        # Only required when using pytest-xdist to get an id not associated
+        # with the memory location
+        return self.__name__
+
     def get_params(self, **params):
         return {}
 
@@ -660,6 +665,11 @@ def score(self, X, y):
 
 class MinimalRegressor:
 
+    def __repr__(self):
+        # Only required when using pytest-xdist to get an id not associated
+        # with the memory location
+        return self.__name__
+
     def get_params(self, **params):
         return {}
 
@@ -688,6 +698,11 @@ def score(self, X, y):
 
 class MinimalTransformer:
 
+    def __repr__(self):
+        # Only required when using pytest-xdist to get an id not associated
+        # with the memory location
+        return self.__name__
+
     def get_params(self, **params):
         return {}
 

From 3383bda3669cbf9802dcbc7646408d23114f1e28 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 12:06:34 +0100
Subject: [PATCH 17/21] fix

---
 sklearn/utils/tests/test_estimator_checks.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index c174fd6beb56a..bce33e6d44f93 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -626,7 +626,7 @@ class MinimalClassifier:
     def __repr__(self):
         # Only required when using pytest-xdist to get an id not associated
         # with the memory location
-        return self.__name__
+        return self.__class__.__name__
 
     def get_params(self, **params):
         return {}
@@ -660,7 +660,8 @@ def predict(self, X):
         return self.classes_[y_pred]
 
     def score(self, X, y):
-        return 1.0
+        from sklearn.metrics import accuracy_score
+        return accuracy_score(y, self.predict(X))
 
 
 class MinimalRegressor:
@@ -668,7 +669,7 @@ class MinimalRegressor:
     def __repr__(self):
         # Only required when using pytest-xdist to get an id not associated
         # with the memory location
-        return self.__name__
+        return self.__class__.__name__
 
     def get_params(self, **params):
         return {}
@@ -693,7 +694,8 @@ def predict(self, X):
         return np.ones(shape=(X.shape[0],)) * self._mean
 
     def score(self, X, y):
-        return 1.0
+        from sklearn.metrics import r2_score
+        return r2_score(y, self.predict(X))
 
 
 class MinimalTransformer:
@@ -701,7 +703,7 @@ class MinimalTransformer:
     def __repr__(self):
         # Only required when using pytest-xdist to get an id not associated
         # with the memory location
-        return self.__name__
+        return self.__class__.__name__
 
     def get_params(self, **params):
         return {}

From 425746d5662a72ebd8335d6368637ced36833624 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 12:09:03 +0100
Subject: [PATCH 18/21] create most frequent for classifier

---
 sklearn/utils/tests/test_estimator_checks.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index bce33e6d44f93..2f7a23bc8afec 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -643,7 +643,8 @@ def __setstate__(self, state):
     def fit(self, X, y):
         X = check_array(X)
         self.n_features_in_ = X.shape[1]
-        self.classes_ = np.unique(y)
+        self.classes_, counts = np.unique(y, return_counts=True)
+        self._most_frequent_class = self.classes_[counts.argmax()]
         return self
 
     def predict_proba(self, X):
@@ -651,7 +652,7 @@ def predict_proba(self, X):
         X = check_array(X)
         proba_shape = (X.shape[0], self.classes_.size)
         y_proba = np.zeros(shape=proba_shape, dtype=np.float64)
-        y_proba[:, 0] = 1.0
+        y_proba[:, self._most_frequent_class] = 1.0
         return y_proba
 
     def predict(self, X):

From c012e093efea2a658cac00f536196949a10032d4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 12:39:35 +0100
Subject: [PATCH 19/21] iter

---
 sklearn/utils/tests/test_estimator_checks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 2f7a23bc8afec..bead3db69de96 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -622,6 +622,7 @@ def test_check_estimator_pairwise():
 
 
 class MinimalClassifier:
+    _estimator_type = "classifier"
 
     def __repr__(self):
         # Only required when using pytest-xdist to get an id not associated
@@ -666,6 +667,7 @@ def score(self, X, y):
 
 
 class MinimalRegressor:
+    _estimator_type = "regressor"
 
     def __repr__(self):
         # Only required when using pytest-xdist to get an id not associated

From 9bf5cbcabf1d0ec07f74a70b7485ecbd50b85ee9 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 13:30:04 +0100
Subject: [PATCH 20/21] iter

---
 sklearn/utils/estimator_checks.py            |  2 +-
 sklearn/utils/tests/test_estimator_checks.py | 24 ++++++++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 56907be9136ff..67d269d62f168 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2331,7 +2331,7 @@ def check_classifiers_predictions(X, y, name, classifier_orig,
                                (classifier, ", ".join(map(str, y_exp)),
                                 ", ".join(map(str, y_pred))))
 
-    if name != "ComplementNB":
+    if not api_only and name != "ComplementNB":
         # This is a pathological data set for ComplementNB.
         # For some specific cases 'ComplementNB' predicts less classes
         # than expected
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index bead3db69de96..61e7099d64084 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -21,7 +21,6 @@
 from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
 from sklearn.utils.estimator_checks import check_classifier_data_not_an_array
 from sklearn.utils.estimator_checks import check_regressor_data_not_an_array
-from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.estimator_checks import check_outlier_corruption
 from sklearn.utils.estimator_checks import parametrize_with_checks
 from sklearn.utils.fixes import np_version, parse_version
@@ -33,7 +32,12 @@
 from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression
 from sklearn.svm import SVC, NuSVC
 from sklearn.neighbors import KNeighborsRegressor
-from sklearn.utils.validation import check_array
+from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.validation import (
+    check_array,
+    check_is_fitted,
+    check_X_y,
+)
 from sklearn.utils import all_estimators
 from sklearn.exceptions import SkipTestWarning
 
@@ -642,18 +646,21 @@ def __setstate__(self, state):
         self.__dict__.update(state)
 
     def fit(self, X, y):
-        X = check_array(X)
+        X, y = check_X_y(X, y)
+        check_classification_targets(y)
         self.n_features_in_ = X.shape[1]
         self.classes_, counts = np.unique(y, return_counts=True)
-        self._most_frequent_class = self.classes_[counts.argmax()]
+        self._most_frequent_class_idx = counts.argmax()
         return self
 
     def predict_proba(self, X):
         check_is_fitted(self)
         X = check_array(X)
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError
         proba_shape = (X.shape[0], self.classes_.size)
         y_proba = np.zeros(shape=proba_shape, dtype=np.float64)
-        y_proba[:, self._most_frequent_class] = 1.0
+        y_proba[:, self._most_frequent_class_idx] = 1.0
         return y_proba
 
     def predict(self, X):
@@ -687,13 +694,16 @@ def __setstate__(self, state):
         self.__dict__.update(state)
 
     def fit(self, X, y):
-        X = check_array(X)
+        X, y = check_X_y(X, y)
         self.n_features_in_ = X.shape[1]
         self._mean = np.mean(y)
         return self
 
     def predict(self, X):
+        check_is_fitted(self)
         X = check_array(X)
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError
         return np.ones(shape=(X.shape[0],)) * self._mean
 
     def score(self, X, y):
@@ -720,7 +730,7 @@ def __getstate__(self):
     def __setstate__(self, state):
         self.__dict__.update(state)
 
-    def fit(self, X, y):
+    def fit(self, X, y=None):
         X = check_array(X)
         self.n_features_in_ = X.shape[1]
         return self

From d254c880297ef01b8b3177c34d976402033f2221 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Nov 2020 13:41:50 +0100
Subject: [PATCH 21/21] iter

---
 sklearn/utils/estimator_checks.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 67d269d62f168..ff3e32978dbb8 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2382,12 +2382,16 @@ def check_classifiers_classes(name, classifier_orig, api_only=False):
     for X, y, y_names in problems:
         for y_names_i in [y_names, y_names.astype('O')]:
             y_ = _choose_check_classifiers_labels(name, y, y_names_i)
-            check_classifiers_predictions(X, y_, name, classifier_orig)
+            check_classifiers_predictions(
+                X, y_, name, classifier_orig, api_only
+            )
 
     labels_binary = [-1, 1]
     y_names_binary = np.take(labels_binary, y_binary)
     y_binary = _choose_check_classifiers_labels(name, y_binary, y_names_binary)
-    check_classifiers_predictions(X_binary, y_binary, name, classifier_orig)
+    check_classifiers_predictions(
+        X_binary, y_binary, name, classifier_orig, api_only
+    )
 
 
 @ignore_warnings(category=FutureWarning)