Skip to content

FIX make pipeline pass check_estimator #26325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,10 @@ Changelog
`pandas.DataFrame`
:pr:`25220` by :user:`Ian Thompson <it176131>`.

- |Fix| :meth:`pipeline.Pipeline.fit_transform` now raises an `AttributeError`
if the last step of the pipeline does not support `fit_transform`.
:pr:`26325` by `Adrin Jalali`_.

:mod:`sklearn.preprocessing`
............................

Expand Down
34 changes: 32 additions & 2 deletions sklearn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,14 @@ def fit(self, X, y=None, **fit_params):

return self

def _can_fit_transform(self):
return (
self._final_estimator == "passthrough"
or hasattr(self._final_estimator, "transform")
or hasattr(self._final_estimator, "fit_transform")
)

@available_if(_can_fit_transform)
def fit_transform(self, X, y=None, **fit_params):
"""Fit the model and transform with the final estimator.

Expand Down Expand Up @@ -744,12 +752,34 @@ def classes_(self):
return self.steps[-1][1].classes_

def _more_tags(self):
tags = {
"_xfail_checks": {
"check_dont_overwrite_parameters": (
"Pipeline changes the `steps` parameter, which it shouldn't."
"Therefore this test is x-fail until we fix this."
),
"check_estimators_overwrite_params": (
"Pipeline changes the `steps` parameter, which it shouldn't."
"Therefore this test is x-fail until we fix this."
),
}
}

try:
return {"pairwise": _safe_tags(self.steps[0][1], "pairwise")}
tags["pairwise"] = _safe_tags(self.steps[0][1], "pairwise")
except (ValueError, AttributeError, TypeError):
# This happens when the `steps` is not a list of (name, estimator)
# tuples and `fit` is not called yet to validate the steps.
return {}
pass

try:
tags["multioutput"] = _safe_tags(self.steps[-1][1], "multioutput")
except (ValueError, AttributeError, TypeError):
# This happens when the `steps` is not a list of (name, estimator)
# tuples and `fit` is not called yet to validate the steps.
pass

return tags

def get_feature_names_out(self, input_features=None):
"""Get output feature names for transformation.
Expand Down
22 changes: 11 additions & 11 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,17 @@ def _tested_estimators(type_filter=None):
yield estimator


@parametrize_with_checks(list(_tested_estimators()))
def _generate_pipeline():
for final_estimator in [Ridge(), LogisticRegression()]:
yield Pipeline(
steps=[
("scaler", StandardScaler()),
("final_estimator", final_estimator),
]
)


@parametrize_with_checks(list(chain(_tested_estimators(), _generate_pipeline())))
def test_estimators(estimator, check, request):
# Common tests for estimator instances
with ignore_warnings(category=(FutureWarning, ConvergenceWarning, UserWarning)):
Expand Down Expand Up @@ -283,16 +293,6 @@ def _generate_column_transformer_instances():
)


def _generate_pipeline():
for final_estimator in [Ridge(), LogisticRegression()]:
yield Pipeline(
steps=[
("scaler", StandardScaler()),
("final_estimator", final_estimator),
]
)


def _generate_search_cv_instances():
for SearchCV, (Estimator, param_grid) in product(
[
Expand Down
3 changes: 2 additions & 1 deletion sklearn/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,8 @@ def test_set_pipeline_steps():
with pytest.raises(TypeError, match=msg):
pipeline.fit([[1]], [1])

with pytest.raises(TypeError, match=msg):
msg = "This 'Pipeline' has no attribute 'fit_transform'"
with pytest.raises(AttributeError, match=msg):
pipeline.fit_transform([[1]], [1])


Expand Down