scikit-learn · thomasjpfan · Dec 9, 2022 · Dec 7, 2022 · Dec 7, 2022 · Dec 7, 2022
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
@@ -28,6 +28,7 @@
 from .utils.validation import check_memory
 from .utils.validation import check_is_fitted
 from .utils import check_pandas_support
+from .utils._param_validation import HasMethods, Hidden
 from .utils._set_output import _safe_set_output, _get_output_config
 from .utils.fixes import delayed
 from .exceptions import NotFittedError
@@ -40,7 +41,7 @@
 def _final_estimator_has(attr):
     """Check that final_estimator has `attr`.
 
-    Used together with `avaliable_if` in `Pipeline`."""
+    Used together with `available_if` in `Pipeline`."""
 
     def check(self):
         # raise original `AttributeError` if `attr` does not exist
@@ -143,6 +144,12 @@ class Pipeline(_BaseComposition):
     # BaseEstimator interface
     _required_parameters = ["steps"]
 
+    _parameter_constraints: dict = {
+        "steps": [list, Hidden(tuple)],
+        "memory": [None, str, HasMethods(["cache"])],
+        "verbose": ["boolean"],
+    }
+
     def __init__(self, steps, *, memory=None, verbose=False):
         self.steps = steps
         self.memory = memory
@@ -307,8 +314,15 @@ def named_steps(self):
 
     @property
     def _final_estimator(self):
-        estimator = self.steps[-1][1]
-        return "passthrough" if estimator is None else estimator
+        try:
+            estimator = self.steps[-1][1]
+            return "passthrough" if estimator is None else estimator
+        except (ValueError, AttributeError, TypeError):
+            # This condition happens when a call to a method is first calling
+            # `_available_if` and `fit` did not validate `steps` yet. We
+            # return `None` and an `InvalidParameterError` will be raised
+            # right after.
+            return None
 
     def _log_message(self, step_idx):
         if not self.verbose:
@@ -398,6 +412,7 @@ def fit(self, X, y=None, **fit_params):
         self : object
             Pipeline with fitted steps.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
         with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
@@ -434,6 +449,7 @@ def fit_transform(self, X, y=None, **fit_params):
         Xt : ndarray of shape (n_samples, n_transformed_features)
             Transformed samples.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
 
@@ -510,6 +526,7 @@ def fit_predict(self, X, y=None, **fit_params):
         y_pred : ndarray
             Result of calling `fit_predict` on the final estimator.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
 
@@ -728,8 +745,12 @@ def classes_(self):
         return self.steps[-1][1].classes_
 
     def _more_tags(self):
-        # check if first estimator expects pairwise input
-        return {"pairwise": _safe_tags(self.steps[0][1], "pairwise")}
+        try:
+            return {"pairwise": _safe_tags(self.steps[0][1], "pairwise")}
+        except (ValueError, AttributeError, TypeError):
+            # This happens when the `steps` is not a list of (name, estimator)
+            # tuples and `fit` is not called yet to validate the steps.
+            return {}
 
     def get_feature_names_out(self, input_features=None):
         """Get output feature names for transformation.

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -58,7 +58,7 @@
 from sklearn.model_selection import RandomizedSearchCV
 from sklearn.model_selection import HalvingGridSearchCV
 from sklearn.model_selection import HalvingRandomSearchCV
-from sklearn.pipeline import make_pipeline
+from sklearn.pipeline import make_pipeline, Pipeline
 
 from sklearn.utils import IS_PYPY
 from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags
@@ -273,6 +273,16 @@ def test_class_support_removed():
         parametrize_with_checks([LogisticRegression])
 
 
+def _generate_pipeline():
+    for final_estimator in [Ridge(), LogisticRegression()]:
+        yield Pipeline(
+            steps=[
+                ("scaler", StandardScaler()),
+                ("final_estimator", final_estimator),
+            ]
+        )
+
+
 def _generate_search_cv_instances():
     for SearchCV, (Estimator, param_grid) in product(
         [
@@ -458,7 +468,9 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator):
 
 
 @pytest.mark.parametrize(
-    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+    "estimator",
+    chain(_tested_estimators(), _generate_pipeline()),
+    ids=_get_check_estimator_ids,
 )
 def test_check_param_validation(estimator):
     name = estimator.__class__.__name__

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
@@ -1185,46 +1185,6 @@ def test_set_params_nested_pipeline():
     estimator.set_params(a__steps=[("b", LogisticRegression())], a__b__C=5)
 
 
-def test_pipeline_wrong_memory():
-    # Test that an error is raised when memory is not a string or a Memory
-    # instance
-    X = iris.data
-    y = iris.target
-    # Define memory as an integer
-    memory = 1
-    cached_pipe = Pipeline([("transf", DummyTransf()), ("svc", SVC())], memory=memory)
-
-    msg = re.escape(
-        "'memory' should be None, a string or have the same interface "
-        "as joblib.Memory. Got memory='1' instead."
-    )
-    with pytest.raises(ValueError, match=msg):
-        cached_pipe.fit(X, y)
-
-
-class DummyMemory:
-    def cache(self, func):
-        return func
-
-
-class WrongDummyMemory:
-    pass
-
-
-def test_pipeline_with_cache_attribute():
-    X = np.array([[1, 2]])
-    pipe = Pipeline([("transf", Transf()), ("clf", Mult())], memory=DummyMemory())
-    pipe.fit(X, y=None)
-    dummy = WrongDummyMemory()
-    pipe = Pipeline([("transf", Transf()), ("clf", Mult())], memory=dummy)
-    msg = re.escape(
-        "'memory' should be None, a string or have the same interface "
-        f"as joblib.Memory. Got memory='{dummy}' instead."
-    )
-    with pytest.raises(ValueError, match=msg):
-        pipe.fit(X)
-
-
 def test_pipeline_memory():
     X = iris.data
     y = iris.target