Skip to content

BUG Fixes FunctionTransformer validation in inverse_transform #20961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,9 @@ Changelog
`n_features_in_` and will be removed in 1.2. :pr:`20240` by
:user:`Jérémie du Boisberranger <jeremiedbb>`.

- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.

:mod:`sklearn.svm`
...................

Expand Down
27 changes: 21 additions & 6 deletions sklearn/preprocessing/_function_transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import warnings

from ..base import BaseEstimator, TransformerMixin
from ..utils.validation import _allclose_dense_sparse
from ..utils.validation import _allclose_dense_sparse, check_array


def _identity(X):
Expand Down Expand Up @@ -71,6 +71,20 @@ class FunctionTransformer(TransformerMixin, BaseEstimator):

.. versionadded:: 0.18

Attributes
----------
n_features_in_ : int
Number of features seen during :term:`fit`. Defined only when
`validate=True`.

.. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `validate=True`
and `X` has feature names that are all strings.

.. versionadded:: 1.0

See Also
--------
MaxAbsScaler : Scale each feature by its maximum absolute value.
Expand Down Expand Up @@ -110,9 +124,9 @@ def __init__(
self.kw_args = kw_args
self.inv_kw_args = inv_kw_args

def _check_input(self, X):
def _check_input(self, X, *, reset):
if self.validate:
return self._validate_data(X, accept_sparse=self.accept_sparse)
return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)
return X

def _check_inverse_transform(self, X):
Expand Down Expand Up @@ -146,7 +160,7 @@ def fit(self, X, y=None):
self : object
FunctionTransformer class instance.
"""
X = self._check_input(X)
X = self._check_input(X, reset=True)
if self.check_inverse and not (self.func is None or self.inverse_func is None):
self._check_inverse_transform(X)
return self
Expand All @@ -164,6 +178,7 @@ def transform(self, X):
X_out : array-like, shape (n_samples, n_features)
Transformed input.
"""
X = self._check_input(X, reset=False)
return self._transform(X, func=self.func, kw_args=self.kw_args)

def inverse_transform(self, X):
Expand All @@ -179,11 +194,11 @@ def inverse_transform(self, X):
X_out : array-like, shape (n_samples, n_features)
Transformed input.
"""
if self.validate:
X = check_array(X, accept_sparse=self.accept_sparse)
return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)

def _transform(self, X, func=None, kw_args=None):
X = self._check_input(X)

if func is None:
func = _identity

Expand Down
24 changes: 24 additions & 0 deletions sklearn/preprocessing/tests/test_function_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,27 @@ def test_function_transformer_frame():
transformer = FunctionTransformer()
X_df_trans = transformer.fit_transform(X_df)
assert hasattr(X_df_trans, "loc")


def test_function_transformer_validate_inverse():
"""Test that function transformer does not reset estimator in
`inverse_transform`."""

def add_constant_feature(X):
X_one = np.ones((X.shape[0], 1))
return np.concatenate((X, X_one), axis=1)

def inverse_add_constant(X):
return X[:, :-1]

X = np.array([[1, 2], [3, 4], [3, 4]])
trans = FunctionTransformer(
func=add_constant_feature,
inverse_func=inverse_add_constant,
validate=True,
)
X_trans = trans.fit_transform(X)
assert trans.n_features_in_ == X.shape[1]

trans.inverse_transform(X_trans)
assert trans.n_features_in_ == X.shape[1]