From 305a63cfa40129e5a5e0d389d98a800546492603 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 26 Jul 2021 16:38:45 +0200
Subject: [PATCH] TST split API checks from other checks

---
 sklearn/utils/common_utils_checks.py  | 114 ++++++++++++++++++++
 sklearn/utils/estimator_api_checks.py |  92 ++++++++++++++++
 sklearn/utils/estimator_checks.py     | 148 +++-----------------------
 3 files changed, 222 insertions(+), 132 deletions(-)
 create mode 100644 sklearn/utils/common_utils_checks.py
 create mode 100644 sklearn/utils/estimator_api_checks.py

diff --git a/sklearn/utils/common_utils_checks.py b/sklearn/utils/common_utils_checks.py
new file mode 100644
index 0000000000000..589747d03acbb
--- /dev/null
+++ b/sklearn/utils/common_utils_checks.py
@@ -0,0 +1,114 @@
+import numpy as np
+
+from ..base import _is_pairwise
+from ..metrics.pairwise import linear_kernel, pairwise_distances
+
+from ._tags import _safe_tags
+
+
+def _enforce_estimator_tags_y(estimator, y):
+    """Modify `y` to be compatible with the available estimator tags.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    y : ndarray
+        The data to be converted.
+
+    Returns
+    -------
+    y : ndarray
+        The converted data.
+    """
+    # Estimators with a `requires_positive_y` tag only accept strictly positive
+    # data
+    if _safe_tags(estimator, key="requires_positive_y"):
+        # Create strictly positive y. The minimal increment above 0 is 1, as
+        # y could be of integer dtype.
+        y += 1 + abs(y.min())
+    # Estimators with a `binary_only` tag only accept up to two unique y values
+    if _safe_tags(estimator, key="binary_only") and y.size > 0:
+        y = np.where(y == y.flat[0], y, y.flat[0] + 1)
+    # Estimators in mono_output_task_error raise ValueError if y is of 1-D
+    # Convert into a 2-D y for those estimators.
+    if _safe_tags(estimator, key="multioutput_only"):
+        return np.reshape(y, (-1, 1))
+    return y
+
+
+def _enforce_estimator_tags_x(estimator, X):
+    """Modify `X` to be compatible with the available estimator tags.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    X : ndarray
+        The data to be converted.
+
+    Returns
+    -------
+    X : ndarray
+        The converted data.
+    """
+    # Pairwise estimators only accept
+    # X of shape (`n_samples`, `n_samples`)
+    if _is_pairwise(estimator):
+        X = X.dot(X.T)
+    # Estimators with `1darray` in `X_types` tag only accept
+    # X of shape (`n_samples`,)
+    if "1darray" in _safe_tags(estimator, key="X_types"):
+        X = X[:, 0]
+    # Estimators with a `requires_positive_X` tag only accept
+    # strictly positive data
+    if _safe_tags(estimator, key="requires_positive_X"):
+        X -= X.min()
+    return X
+
+
+def _is_pairwise_metric(estimator):
+    """Returns True if estimator accepts pairwise metric.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if _pairwise is set to True and False otherwise.
+    """
+    metric = getattr(estimator, "metric", None)
+
+    return bool(metric == "precomputed")
+
+
+def _pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
+    """Convert `X` so to be used by a pairwise estimator.
+
+    Parameters
+    ----------
+    X : ndarray of shape (n_samples, n_features)
+        The data to be converted.
+
+    estimator : object
+        An estimator to apply on `X`.
+
+    kernel : callable, default=linear_kernel
+        If `estimator` requires a kernel, this parameter will transform `X`
+        into a kernel matrix.
+
+    Returns
+    -------
+    X_new : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
+        The converted `X`.
+    """
+    if _is_pairwise_metric(estimator):
+        return pairwise_distances(X, metric="euclidean")
+    if _is_pairwise(estimator):
+        return kernel(X, X)
+    return X
diff --git a/sklearn/utils/estimator_api_checks.py b/sklearn/utils/estimator_api_checks.py
new file mode 100644
index 0000000000000..bb153189b863d
--- /dev/null
+++ b/sklearn/utils/estimator_api_checks.py
@@ -0,0 +1,92 @@
+"""Checks for minimal scikit-learn estimator support."""
+from inspect import signature
+
+import numpy as np
+
+# from . import IS_PYPY
+from ..base import clone
+from ._testing import _get_args, ignore_warnings, set_random_state
+
+from .common_utils_checks import (
+    # _enforce_estimator_tags_x,
+    _enforce_estimator_tags_y,
+    _pairwise_estimator_convert_X,
+)
+
+
+def _yield_api_estimator_checks(estimator):
+    # name = estimator.__class__.__name__
+    # tags = _safe_tags(estimator)
+    # pairwise = _is_pairwise(estimator)
+
+    yield check_no_attributes_set_in_init
+    yield check_takes_at_least_optional_y
+
+
+@ignore_warnings(category=FutureWarning)
+def check_no_attributes_set_in_init(name, estimator_orig):
+    """Check attribute setting at `__init__`."""
+    try:
+        # Clone fails if the estimator does not store
+        # all parameters as an attribute during init
+        estimator = clone(estimator_orig)
+    except AttributeError:
+        raise AttributeError(
+            f"Estimator {name} should store all parameters as an attribute during init."
+            " Cloning mechanism will not work otherwise."
+        )
+
+    init_params = _get_args(type(estimator).__init__)
+    # TODO: check if we can get more generic and not have a special case for PyPy
+    # if IS_PYPY:
+    #     # __init__ signature has additional objects in PyPy
+    #     for key in ["obj"]:
+    #         if key in init_params:
+    #             init_params.remove(key)
+    parents_init_params = [
+        param
+        for params_parent in (_get_args(parent) for parent in type(estimator).__mro__)
+        for param in params_parent
+    ]
+
+    # Test for no setting apart from parameters during init
+    invalid_attr = set(vars(estimator)) - set(init_params) - set(parents_init_params)
+    assert not invalid_attr, (
+        f"Estimator {name} should not set any attribute apart"
+        f" from parameters during init. Found attributes {sorted(invalid_attr)}."
+    )
+
+
+@ignore_warnings
+def check_takes_at_least_optional_y(name, estimator_orig):
+    """Check that estimator accepts an optional `y` to be compatible with
+    `Pipeline`.
+
+    Tha parameter `y` should be available in the following methods: `fit`,
+    `score`, `partial_fit`, `fit_predict`, `fit_transform`.
+    """
+    estimator = clone(estimator_orig)
+    set_random_state(estimator)
+
+    rnd = np.random.RandomState(0)
+    n_samples = 30
+    X = rnd.uniform(size=(n_samples, 3))
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
+    y = np.arange(n_samples) % 3
+    y = _enforce_estimator_tags_y(estimator, y)
+
+    supported_methods = ["fit", "score", "partial_fit", "fit_predict", "fit_transform"]
+    for method_name in supported_methods:
+        method = getattr(estimator, method_name, None)
+        if method is not None:
+            method(X, y)
+            args = [p.name for p in signature(method).parameters.values()]
+            if args[0] == "self":
+                # `if_delegate_has_method` or `available_if` makes methods
+                # into functions with an explicit "self", so need to shift
+                # arguments
+                args = args[1:]
+            assert args[1] in ["y", "Y"], (
+                "Expected `y` or `Y` as second argument for method "
+                f"{method_name}of {name}. Got arguments: {repr(args)}."
+            )
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index ff853be22f663..eb3aa425d76c6 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -11,9 +11,10 @@
 from scipy.stats import rankdata
 import joblib
 
-from . import IS_PYPY
+# from . import IS_PYPY
 from .. import config_context
-from ._testing import _get_args
+
+# from ._testing import _get_args
 from ._testing import assert_raise_message
 from ._testing import assert_array_equal
 from ._testing import assert_array_almost_equal
@@ -49,7 +50,8 @@
 from ..model_selection import train_test_split
 from ..model_selection import ShuffleSplit
 from ..model_selection._validation import _safe_split
-from ..metrics.pairwise import rbf_kernel, linear_kernel, pairwise_distances
+from ..metrics.pairwise import rbf_kernel
+
 
 from . import shuffle
 from ._tags import (
@@ -66,6 +68,14 @@
     make_regression,
 )
 
+from .common_utils_checks import (
+    _enforce_estimator_tags_x,
+    _enforce_estimator_tags_y,
+    _is_pairwise_metric,
+    _pairwise_estimator_convert_X,
+)
+from .estimator_api_checks import _yield_api_estimator_checks
+
 REGRESSION_DATASET = None
 CROSS_DECOMPOSITION = ["PLSCanonical", "PLSRegression", "CCA", "PLSSVD"]
 
@@ -75,9 +85,7 @@ def _yield_checks(estimator):
     tags = _safe_tags(estimator)
     pairwise = _is_pairwise(estimator)
 
-    yield check_no_attributes_set_in_init
     yield check_estimators_dtypes
-    yield check_fit_score_takes_y
     if has_fit_parameter(estimator, "sample_weight"):
         yield check_sample_weights_pandas_series
         yield check_sample_weights_not_an_array
@@ -274,6 +282,9 @@ def _yield_all_checks(estimator):
         )
         return
 
+    for check in _yield_api_estimator_checks(estimator):
+        yield check
+
     for check in _yield_checks(estimator):
         yield check
     if is_classifier(estimator):
@@ -685,34 +696,6 @@ def __array_function__(self, func, types, args, kwargs):
         raise TypeError("Don't want to call array_function {}!".format(func.__name__))
 
 
-def _is_pairwise_metric(estimator):
-    """Returns True if estimator accepts pairwise metric.
-
-    Parameters
-    ----------
-    estimator : object
-        Estimator object to test.
-
-    Returns
-    -------
-    out : bool
-        True if _pairwise is set to True and False otherwise.
-    """
-    metric = getattr(estimator, "metric", None)
-
-    return bool(metric == "precomputed")
-
-
-def _pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
-
-    if _is_pairwise_metric(estimator):
-        return pairwise_distances(X, metric="euclidean")
-    if _is_pairwise(estimator):
-        return kernel(X, X)
-
-    return X
-
-
 def _generate_sparse_matrix(X_csr):
     """Generate sparse matrices with {32,64}bit indices of diverse format.
 
@@ -1580,36 +1563,6 @@ def check_pipeline_consistency(name, estimator_orig):
             assert_allclose_dense_sparse(result, result_pipe)
 
 
-@ignore_warnings
-def check_fit_score_takes_y(name, estimator_orig):
-    # check that all estimators accept an optional y
-    # in fit and score so they can be used in pipelines
-    rnd = np.random.RandomState(0)
-    n_samples = 30
-    X = rnd.uniform(size=(n_samples, 3))
-    X = _pairwise_estimator_convert_X(X, estimator_orig)
-    y = np.arange(n_samples) % 3
-    estimator = clone(estimator_orig)
-    y = _enforce_estimator_tags_y(estimator, y)
-    set_random_state(estimator)
-
-    funcs = ["fit", "score", "partial_fit", "fit_predict", "fit_transform"]
-    for func_name in funcs:
-        func = getattr(estimator, func_name, None)
-        if func is not None:
-            func(X, y)
-            args = [p.name for p in signature(func).parameters.values()]
-            if args[0] == "self":
-                # if_delegate_has_method makes methods into functions
-                # with an explicit "self", so need to shift arguments
-                args = args[1:]
-            assert args[1] in ["y", "Y"], (
-                "Expected y or Y as second argument for method "
-                "%s of %s. Got arguments: %r."
-                % (func_name, type(estimator).__name__, args)
-            )
-
-
 @ignore_warnings
 def check_estimators_dtypes(name, estimator_orig):
     rnd = np.random.RandomState(0)
@@ -2723,42 +2676,6 @@ def check_estimators_overwrite_params(name, estimator_orig):
         )
 
 
-@ignore_warnings(category=FutureWarning)
-def check_no_attributes_set_in_init(name, estimator_orig):
-    """Check setting during init."""
-    try:
-        # Clone fails if the estimator does not store
-        # all parameters as an attribute during init
-        estimator = clone(estimator_orig)
-    except AttributeError:
-        raise AttributeError(
-            f"Estimator {name} should store all parameters as an attribute during init."
-        )
-
-    if hasattr(type(estimator).__init__, "deprecated_original"):
-        return
-
-    init_params = _get_args(type(estimator).__init__)
-    if IS_PYPY:
-        # __init__ signature has additional objects in PyPy
-        for key in ["obj"]:
-            if key in init_params:
-                init_params.remove(key)
-    parents_init_params = [
-        param
-        for params_parent in (_get_args(parent) for parent in type(estimator).__mro__)
-        for param in params_parent
-    ]
-
-    # Test for no setting apart from parameters during init
-    invalid_attr = set(vars(estimator)) - set(init_params) - set(parents_init_params)
-    assert not invalid_attr, (
-        "Estimator %s should not set any attribute apart"
-        " from parameters during init. Found attributes %s."
-        % (name, sorted(invalid_attr))
-    )
-
-
 @ignore_warnings(category=FutureWarning)
 def check_sparsify_coefficients(name, estimator_orig):
     X = np.array(
@@ -2973,39 +2890,6 @@ def param_filter(p):
                     assert param_value == init_param.default, failure_text
 
 
-def _enforce_estimator_tags_y(estimator, y):
-    # Estimators with a `requires_positive_y` tag only accept strictly positive
-    # data
-    if _safe_tags(estimator, key="requires_positive_y"):
-        # Create strictly positive y. The minimal increment above 0 is 1, as
-        # y could be of integer dtype.
-        y += 1 + abs(y.min())
-    # Estimators with a `binary_only` tag only accept up to two unique y values
-    if _safe_tags(estimator, key="binary_only") and y.size > 0:
-        y = np.where(y == y.flat[0], y, y.flat[0] + 1)
-    # Estimators in mono_output_task_error raise ValueError if y is of 1-D
-    # Convert into a 2-D y for those estimators.
-    if _safe_tags(estimator, key="multioutput_only"):
-        return np.reshape(y, (-1, 1))
-    return y
-
-
-def _enforce_estimator_tags_x(estimator, X):
-    # Pairwise estimators only accept
-    # X of shape (`n_samples`, `n_samples`)
-    if _is_pairwise(estimator):
-        X = X.dot(X.T)
-    # Estimators with `1darray` in `X_types` tag only accept
-    # X of shape (`n_samples`,)
-    if "1darray" in _safe_tags(estimator, key="X_types"):
-        X = X[:, 0]
-    # Estimators with a `requires_positive_X` tag only accept
-    # strictly positive data
-    if _safe_tags(estimator, key="requires_positive_X"):
-        X -= X.min()
-    return X
-
-
 @ignore_warnings(category=FutureWarning)
 def check_non_transformer_estimators_n_iter(name, estimator_orig):
     # Test that estimators that are not transformers with a parameter