scikit-learn · thomasjpfan · Nov 30, 2021 · Nov 6, 2021 · Nov 6, 2021 · Nov 6, 2021
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
@@ -237,6 +237,15 @@ Changelog
   instead of `__init__`.
   :pr:`21434` by :user:`Krum Arnaudov <krumeto>`.
 
+- |Enhancement| Added the `get_feature_names_out` method and a new parameter
+  `feature_names_out` to :class:`preprocessing.FunctionTransformer`. You can set
+  `feature_names_out` to 'one-to-one' to use the input features names as the
+  output feature names, or you can set it to a callable that returns the output
+  feature names. This is especially useful when the transformer changes the
+  number of features. If `feature_names_out` is None (which is the default),
+  then `get_output_feature_names` is not defined.
+  :pr:`21569` by :user:`Aurélien Geron <ageron>`.
+
 :mod:`sklearn.svm`
 ..................
 

diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
@@ -1,7 +1,14 @@
 import warnings
 
+import numpy as np
+
 from ..base import BaseEstimator, TransformerMixin
-from ..utils.validation import _allclose_dense_sparse, check_array
+from ..utils.metaestimators import available_if
+from ..utils.validation import (
+    _allclose_dense_sparse,
+    _check_feature_names_in,
+    check_array,
+)
 
 
 def _identity(X):
@@ -61,6 +68,20 @@ class FunctionTransformer(TransformerMixin, BaseEstimator):
 
        .. versionadded:: 0.20
 
+    feature_names_out : callable, 'one-to-one' or None, default=None
+        Determines the list of feature names that will be returned by the
+        `get_feature_names_out` method. If it is 'one-to-one', then the output
+        feature names will be equal to the input feature names. If it is a
+        callable, then it must take two positional arguments: this
+        `FunctionTransformer` (`self`) and an array-like of input feature names
+        (`input_features`). It must return an array-like of output feature
+        names. The `get_feature_names_out` method is only defined if
+        `feature_names_out` is not None.
+
+        See ``get_feature_names_out`` for more details.
+
+        .. versionadded:: 1.1
+
     kw_args : dict, default=None
         Dictionary of additional keyword arguments to pass to func.
 
@@ -113,6 +134,7 @@ def __init__(
         validate=False,
         accept_sparse=False,
         check_inverse=True,
+        feature_names_out=None,
         kw_args=None,
         inv_kw_args=None,
     ):
@@ -121,6 +143,7 @@ def __init__(
         self.validate = validate
         self.accept_sparse = accept_sparse
         self.check_inverse = check_inverse
+        self.feature_names_out = feature_names_out
         self.kw_args = kw_args
         self.inv_kw_args = inv_kw_args
 
@@ -198,6 +221,63 @@ def inverse_transform(self, X):
             X = check_array(X, accept_sparse=self.accept_sparse)
         return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)
 
+    @available_if(lambda self: self.feature_names_out is not None)
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
+
+        This method is only defined if `feature_names_out` is not None.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Input feature names.
+
+            - If `input_features` is None, then `feature_names_in_` is
+              used as the input feature names. If `feature_names_in_` is not
+              defined, then names are generated:
+              `[x0, x1, ..., x(n_features_in_)]`.
+            - If `input_features` is array-like, then `input_features` must
+              match `feature_names_in_` if `feature_names_in_` is defined.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names.
+
+            - If `feature_names_out` is 'one-to-one', the input feature names
+              are returned (see `input_features` above). This requires
+              `feature_names_in_` and/or `n_features_in_` to be defined, which
+              is done automatically if `validate=True`. Alternatively, you can
+              set them in `func`.
+            - If `feature_names_out` is a callable, then it is called with two
+              arguments, `self` and `input_features`, and its return value is
+              returned by this method.
+        """
+        if hasattr(self, "n_features_in_") or input_features is not None:
+            input_features = _check_feature_names_in(self, input_features)
+        if self.feature_names_out == "one-to-one":
+            if input_features is None:
+                raise ValueError(
+                    "When 'feature_names_out' is 'one-to-one', either "
+                    "'input_features' must be passed, or 'feature_names_in_' "
+                    "and/or 'n_features_in_' must be defined. If you set "
+                    "'validate' to 'True', then they will be defined "
+                    "automatically when 'fit' is called. Alternatively, you "
+                    "can set them in 'func'."
+                )
+            names_out = input_features
+        elif callable(self.feature_names_out):
+            names_out = self.feature_names_out(self, input_features)
+        else:
+            raise ValueError(
+                f"feature_names_out={self.feature_names_out!r} is invalid. "
+                'It must either be "one-to-one" or a callable with two '
+                "arguments: the function transformer and an array-like of "
+                "input feature names. The callable must return an array-like "
+                "of output feature names."
+            )
+        return np.asarray(names_out, dtype=object)
+
     def _transform(self, X, func=None, kw_args=None):
         if func is None:
             func = _identity

diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -176,6 +176,158 @@ def test_function_transformer_frame():
     assert hasattr(X_df_trans, "loc")
 
 
+@pytest.mark.parametrize(
+    "X, feature_names_out, input_features, expected",
+    [
+        (
+            # NumPy inputs, default behavior: generate names
+            np.random.rand(100, 3),
+            "one-to-one",
+            None,
+            ("x0", "x1", "x2"),
+        ),
+        (
+            # Pandas input, default behavior: use input feature names
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            "one-to-one",
+            None,
+            ("a", "b"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable
+            np.random.rand(100, 3),
+            lambda transformer, input_features: ("a", "b"),
+            None,
+            ("a", "b"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: ("c", "d", "e"),
+            None,
+            ("c", "d", "e"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable – default input_features
+            np.random.rand(100, 3),
+            lambda transformer, input_features: tuple(input_features) + ("a",),
+            None,
+            ("x0", "x1", "x2", "a"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable – default input_features
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: tuple(input_features) + ("c",),
+            None,
+            ("a", "b", "c"),
+        ),
+        (
+            # NumPy input, input_features=list of names
+            np.random.rand(100, 3),
+            "one-to-one",
+            ("a", "b", "c"),
+            ("a", "b", "c"),
+        ),
+        (
+            # Pandas input, input_features=list of names
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            "one-to-one",
+            ("a", "b"),  # must match feature_names_in_
+            ("a", "b"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable, input_features=list
+            np.random.rand(100, 3),
+            lambda transformer, input_features: tuple(input_features) + ("d",),
+            ("a", "b", "c"),
+            ("a", "b", "c", "d"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable, input_features=list
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: tuple(input_features) + ("c",),
+            ("a", "b"),  # must match feature_names_in_
+            ("a", "b", "c"),
+        ),
+    ],
+)
+def test_function_transformer_get_feature_names_out(
+    X, feature_names_out, input_features, expected
+):
+    if isinstance(X, dict):
+        pd = pytest.importorskip("pandas")
+        X = pd.DataFrame(X)
+
+    transformer = FunctionTransformer(
+        feature_names_out=feature_names_out, validate=True
+    )
+    transformer.fit_transform(X)
+    names = transformer.get_feature_names_out(input_features)
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, expected)
+
+
+def test_function_transformer_get_feature_names_out_without_validation():
+    transformer = FunctionTransformer(feature_names_out="one-to-one", validate=False)
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    msg = "When 'feature_names_out' is 'one-to-one', either"
+    with pytest.raises(ValueError, match=msg):
+        transformer.get_feature_names_out()
+
+    names = transformer.get_feature_names_out(("a", "b"))
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, ("a", "b"))
+
+
+@pytest.mark.parametrize("feature_names_out", ["x0", ["x0"], ("x0",)])
+def test_function_transformer_feature_names_out_string(feature_names_out):
+    transformer = FunctionTransformer(feature_names_out=feature_names_out)
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    msg = """must either be "one-to-one" or a callable"""
+    with pytest.raises(ValueError, match=msg):
+        transformer.get_feature_names_out()
+
+
+def test_function_transformer_feature_names_out_is_None():
+    transformer = FunctionTransformer()
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    msg = "This 'FunctionTransformer' has no attribute 'get_feature_names_out'"
+    with pytest.raises(AttributeError, match=msg):
+        transformer.get_feature_names_out()
+
+
+def test_function_transformer_feature_names_out_uses_estimator():
+    def add_n_random_features(X, n):
+        return np.concatenate([X, np.random.rand(len(X), n)], axis=1)
+
+    def feature_names_out(transformer, input_features):
+        n = transformer.kw_args["n"]
+        return list(input_features) + [f"rnd{i}" for i in range(n)]
+
+    transformer = FunctionTransformer(
+        func=add_n_random_features,
+        feature_names_out=feature_names_out,
+        kw_args=dict(n=3),
+        validate=True,
+    )
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame({"a": np.random.rand(100), "b": np.random.rand(100)})
+    transformer.fit_transform(df)
+    names = transformer.get_feature_names_out()
+
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, ("a", "b", "rnd0", "rnd1", "rnd2"))
+
+
 def test_function_transformer_validate_inverse():
     """Test that function transformer does not reset estimator in
     `inverse_transform`."""