diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 12f50ca7fc2b5..7ca3e28847736 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -459,6 +459,10 @@ Changelog `pandas.DataFrame` :pr:`25220` by :user:`Ian Thompson `. +- |Fix| :meth:`pipeline.Pipeline.fit_transform` now raises an `AttributeError` + if the last step of the pipeline does not support `fit_transform`. + :pr:`26325` by `Adrin Jalali`_. + :mod:`sklearn.preprocessing` ............................ diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index a604bb6fc6e6e..8c5dc3bd82917 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -421,6 +421,14 @@ def fit(self, X, y=None, **fit_params): return self + def _can_fit_transform(self): + return ( + self._final_estimator == "passthrough" + or hasattr(self._final_estimator, "transform") + or hasattr(self._final_estimator, "fit_transform") + ) + + @available_if(_can_fit_transform) def fit_transform(self, X, y=None, **fit_params): """Fit the model and transform with the final estimator. @@ -744,12 +752,34 @@ def classes_(self): return self.steps[-1][1].classes_ def _more_tags(self): + tags = { + "_xfail_checks": { + "check_dont_overwrite_parameters": ( + "Pipeline changes the `steps` parameter, which it shouldn't." + "Therefore this test is x-fail until we fix this." + ), + "check_estimators_overwrite_params": ( + "Pipeline changes the `steps` parameter, which it shouldn't." + "Therefore this test is x-fail until we fix this." + ), + } + } + try: - return {"pairwise": _safe_tags(self.steps[0][1], "pairwise")} + tags["pairwise"] = _safe_tags(self.steps[0][1], "pairwise") except (ValueError, AttributeError, TypeError): # This happens when the `steps` is not a list of (name, estimator) # tuples and `fit` is not called yet to validate the steps. - return {} + pass + + try: + tags["multioutput"] = _safe_tags(self.steps[-1][1], "multioutput") + except (ValueError, AttributeError, TypeError): + # This happens when the `steps` is not a list of (name, estimator) + # tuples and `fit` is not called yet to validate the steps. + pass + + return tags def get_feature_names_out(self, input_features=None): """Get output feature names for transformation. diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index ae5c2d9cd6953..176a2d463d162 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -133,7 +133,17 @@ def _tested_estimators(type_filter=None): yield estimator -@parametrize_with_checks(list(_tested_estimators())) +def _generate_pipeline(): + for final_estimator in [Ridge(), LogisticRegression()]: + yield Pipeline( + steps=[ + ("scaler", StandardScaler()), + ("final_estimator", final_estimator), + ] + ) + + +@parametrize_with_checks(list(chain(_tested_estimators(), _generate_pipeline()))) def test_estimators(estimator, check, request): # Common tests for estimator instances with ignore_warnings(category=(FutureWarning, ConvergenceWarning, UserWarning)): @@ -283,16 +293,6 @@ def _generate_column_transformer_instances(): ) -def _generate_pipeline(): - for final_estimator in [Ridge(), LogisticRegression()]: - yield Pipeline( - steps=[ - ("scaler", StandardScaler()), - ("final_estimator", final_estimator), - ] - ) - - def _generate_search_cv_instances(): for SearchCV, (Estimator, param_grid) in product( [ diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index fa7fa2ad20dcf..28067ea316074 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -670,7 +670,8 @@ def test_set_pipeline_steps(): with pytest.raises(TypeError, match=msg): pipeline.fit([[1]], [1]) - with pytest.raises(TypeError, match=msg): + msg = "This 'Pipeline' has no attribute 'fit_transform'" + with pytest.raises(AttributeError, match=msg): pipeline.fit_transform([[1]], [1])