From 0476ca857549bf24d0dad4f7e7c6b05e26a3af17 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Wed, 21 Dec 2022 17:10:05 -0600
Subject: [PATCH 01/10] modified:   sklearn/pipeline.py 	- added
 self._check_feature_names(...) to the .fit(...) method in FeatureUnion to
 allow access to the `.feature_names_in_` attribute if `X` has features names,
 e.g. a pandas.DataFrame 	- updated FeatureUnion docstring to reflect
 the addition of .feature_names_in_ attribute

modified:   sklearn/tests/test_pipeline.py
	- added test_feature_union_feature_names_in_() to test that FeatureUnion has a `.feature_names_in_` attribute if fitted with a pandas.DataFrame and not if fitted with a numpy array
---
 sklearn/pipeline.py            |  7 +++++++
 sklearn/tests/test_pipeline.py | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 5236c4499a728..7930732f886a9 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -995,6 +995,12 @@ class FeatureUnion(TransformerMixin, _BaseComposition):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when
+        `X` has feature names that are all strings.
+
+        .. versionadded:: 1.3
+
     See Also
     --------
     make_union : Convenience function for simplified feature union
@@ -1182,6 +1188,7 @@ def fit(self, X, y=None, **fit_params):
         self : object
             FeatureUnion class instance.
         """
+        self._check_feature_names(X, reset=True)
         transformers = self._parallel_func(X, y, fit_params, _fit_one)
         if not transformers:
             # All transformers are None
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 342dc12b966c9..7b6384d7521ef 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1647,3 +1647,24 @@ def test_feature_union_getitem_error(key):
     msg = "Only string keys are supported"
     with pytest.raises(KeyError, match=msg):
         union[key]
+
+
+def test_feature_union_feature_names_in_():
+    """Ensure feature union has `.feature_names_in_` attribute if input
+    is pandas.DataFrame.
+
+    Test for #24754"""
+    X, _ = load_iris(as_frame=True, return_X_y=True)
+    X_train, X_test = train_test_split(X, random_state=0)
+
+    # fit with pandas.DataFrame
+    union = FeatureUnion([("pass", "passthrough")])
+    union.fit(X_train)
+    assert hasattr(union, "feature_names_in_")
+    assert_array_equal(X_train.columns, union.feature_names_in_)
+
+    # fit with numpy array
+    X_array = X_train.to_numpy()
+    union = FeatureUnion([("pass", "passthrough")])
+    union.fit(X_array)
+    assert not hasattr(union, "feature_names_in_")

From 58c907ace34ab61767c38b6b41546ab231deee91 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Wed, 21 Dec 2022 18:09:26 -0600
Subject: [PATCH 02/10] modified:   doc/whats_new/v1.3.rst 	- changelog
 updated with description of work

---
 doc/whats_new/v1.3.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index 68a569acb14e5..c6b3e93509d5b 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -41,6 +41,10 @@ Changelog
 - |Feature| :class:`pipeline.FeatureUnion` can now use indexing notation (e.g.
   `feature_union["scalar"]`) to access transformers by name. :pr:`25093` by
   `Thomas Fan`_.
+- |Feature| :class:`pipeline.FeatureUnion` can now access the
+  `feature_names_in_` attribute if the value seen during `.fit` was a `pandas
+  .DataFrame`.
+  :pr:`25220` by :user:`Ian Thompson <ianiat11>`.
 
 :mod:`sklearn.preprocessing`
 ............................

From cebfa4a4147074682e82b5b91ddd810212e0dbae Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Thu, 22 Dec 2022 09:32:39 -0600
Subject: [PATCH 03/10] modified:   doc/whats_new/v1.3.rst 	- made
 changelog description more precise

---
 doc/whats_new/v1.3.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index c6b3e93509d5b..fc69d2c88e574 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -42,8 +42,8 @@ Changelog
   `feature_union["scalar"]`) to access transformers by name. :pr:`25093` by
   `Thomas Fan`_.
 - |Feature| :class:`pipeline.FeatureUnion` can now access the
-  `feature_names_in_` attribute if the value seen during `.fit` was a `pandas
-  .DataFrame`.
+  `feature_names_in_` attribute if the `X` value seen during `.fit` has a
+  `.columns` attribute and all columns are strings.
   :pr:`25220` by :user:`Ian Thompson <ianiat11>`.
 
 :mod:`sklearn.preprocessing`

From e60a2d9f1ec41e3d85342ac35fe2ef3278d517f3 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Thu, 22 Dec 2022 09:36:19 -0600
Subject: [PATCH 04/10] modified:   doc/whats_new/v1.3.rst 	- typo --
 removed period (.) before `columns`

---
 doc/whats_new/v1.3.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index fc69d2c88e574..6a9c3632e0124 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -43,7 +43,7 @@ Changelog
   `Thomas Fan`_.
 - |Feature| :class:`pipeline.FeatureUnion` can now access the
   `feature_names_in_` attribute if the `X` value seen during `.fit` has a
-  `.columns` attribute and all columns are strings.
+  `columns` attribute and all columns are strings.
   :pr:`25220` by :user:`Ian Thompson <ianiat11>`.
 
 :mod:`sklearn.preprocessing`

From 4fcfac9dad0e719b9b438c814382b779731243c8 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Thu, 22 Dec 2022 12:06:42 -0600
Subject: [PATCH 05/10] modified:   sklearn/pipeline.py 	- removed
 `self._check_feature_names(...) from `.fit(...)` method in `FeatureUnion` 
 - added `feature_names_in_()` property to `FeatureUnion` to use first
 transformer's `feature_names_in_` attribute if present

modified:   sklearn/tests/test_pipeline.py
	- updated docstring for `test_feature_union_feature_names_in_()` to be more precise
	- added additional assertions to check if the `feature_names_in_` attribute is available to `FeatureUnion` if it's instantiated with a transformer that has already been fit
---
 sklearn/pipeline.py            |  7 ++++++-
 sklearn/tests/test_pipeline.py | 13 +++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 7930732f886a9..5107064f73576 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -1188,7 +1188,6 @@ def fit(self, X, y=None, **fit_params):
         self : object
             FeatureUnion class instance.
         """
-        self._check_feature_names(X, reset=True)
         transformers = self._parallel_func(X, y, fit_params, _fit_one)
         if not transformers:
             # All transformers are None
@@ -1304,6 +1303,12 @@ def n_features_in_(self):
         # X is passed to all transformers so we just delegate to the first one
         return self.transformer_list[0][1].n_features_in_
 
+    @property
+    def feature_names_in_(self):
+        """Names of features seen during :term:`fit`."""
+        # X is passed to all transformers -- delegate to the first one
+        return self.transformer_list[0][1].feature_names_in_
+
     def __sklearn_is_fitted__(self):
         # Delegate whether feature union was fitted
         for _, transformer, _ in self._iter():
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 7b6384d7521ef..f430f7e192800 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1650,13 +1650,22 @@ def test_feature_union_getitem_error(key):
 
 
 def test_feature_union_feature_names_in_():
-    """Ensure feature union has `.feature_names_in_` attribute if input
-    is pandas.DataFrame.
+    """Ensure feature union has `.feature_names_in_` attribute if `X` has a
+    `columns` attribute.
 
     Test for #24754"""
     X, _ = load_iris(as_frame=True, return_X_y=True)
     X_train, X_test = train_test_split(X, random_state=0)
 
+    # FeatureUnion should have the feature_names_in_ attribute if the
+    # first transformer also has it
+    scaler = StandardScaler()
+    scaler.fit(X_train)
+    union = FeatureUnion([("scale", scaler)])
+    assert hasattr(union, "feature_names_in_")
+    assert_array_equal(X_train.columns, union.feature_names_in_)
+    assert_array_equal(scaler.feature_names_in_, union.feature_names_in_)
+
     # fit with pandas.DataFrame
     union = FeatureUnion([("pass", "passthrough")])
     union.fit(X_train)

From 62c5267d2ccbad586339dd0820203e08b80be869 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Thu, 22 Dec 2022 13:05:38 -0600
Subject: [PATCH 06/10] modified:   doc/whats_new/v1.3.rst 	- updated
 changelog description to include `pandas.DataFrame` 	- corrected user
 signature to match github account

---
 doc/whats_new/v1.3.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index 9886155ac947d..ad773ce924cd4 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -55,8 +55,9 @@ Changelog
   `Thomas Fan`_.
 - |Feature| :class:`pipeline.FeatureUnion` can now access the
   `feature_names_in_` attribute if the `X` value seen during `.fit` has a
-  `columns` attribute and all columns are strings.
-  :pr:`25220` by :user:`Ian Thompson <ianiat11>`.
+  `columns` attribute and all columns are strings. e.g. when `X` is a
+  `pandas.DataFrame`
+  :pr:`25220` by :user:`Ian Thompson <it176131>`.
 
 :mod:`sklearn.preprocessing`
 ............................

From d73327adc02c64f7dcc6e271aacd106468204830 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Wed, 28 Dec 2022 13:11:11 -0600
Subject: [PATCH 07/10] modified:   sklearn/tests/test_pipeline.py 	-
 added pandas import to `test_feature_union_feature_names_in_` so ImportError
 in azure-pipelines will pass

---
 sklearn/tests/test_pipeline.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index f430f7e192800..dbebe1f0c892b 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1654,6 +1654,8 @@ def test_feature_union_feature_names_in_():
     `columns` attribute.
 
     Test for #24754"""
+    pytest.importorskip("pandas")
+
     X, _ = load_iris(as_frame=True, return_X_y=True)
     X_train, X_test = train_test_split(X, random_state=0)
 

From 1ec500063ea1e6e9b593ab3e3aac993e5e843dca Mon Sep 17 00:00:00 2001
From: Ian Thompson <ianiat11@gmail.com>
Date: Tue, 3 Jan 2023 08:56:27 -0600
Subject: [PATCH 08/10] Update doc/whats_new/v1.3.rst

newline/whitespace between change log updates.

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 doc/whats_new/v1.3.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index 69ed2d88813a7..85dbd9378d558 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -91,6 +91,7 @@ Changelog
 - |Feature| :class:`pipeline.FeatureUnion` can now use indexing notation (e.g.
   `feature_union["scalar"]`) to access transformers by name. :pr:`25093` by
   `Thomas Fan`_.
+
 - |Feature| :class:`pipeline.FeatureUnion` can now access the
   `feature_names_in_` attribute if the `X` value seen during `.fit` has a
   `columns` attribute and all columns are strings. e.g. when `X` is a

From af78798e080d31f7932aeef59d2432be334e19d1 Mon Sep 17 00:00:00 2001
From: Ian Thompson <ianiat11@gmail.com>
Date: Tue, 3 Jan 2023 08:57:16 -0600
Subject: [PATCH 09/10] Update sklearn/tests/test_pipeline.py

added period at end of docstring

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/tests/test_pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index dbebe1f0c892b..aab775f475342 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1653,7 +1653,8 @@ def test_feature_union_feature_names_in_():
     """Ensure feature union has `.feature_names_in_` attribute if `X` has a
     `columns` attribute.
 
-    Test for #24754"""
+    Test for #24754.
+    """
     pytest.importorskip("pandas")
 
     X, _ = load_iris(as_frame=True, return_X_y=True)

From 073daf7a37dfb69c79ba4fba692be5544a2cdaba Mon Sep 17 00:00:00 2001
From: Ian Thompson <ian.thompson@hrblock.com>
Date: Tue, 3 Jan 2023 09:37:13 -0600
Subject: [PATCH 10/10] modified:   sklearn/tests/test_pipeline.py 	-
 removed train-test-split per code suggestion -- using `X` directly

---
 sklearn/tests/test_pipeline.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index aab775f475342..fa7fa2ad20dcf 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1658,25 +1658,24 @@ def test_feature_union_feature_names_in_():
     pytest.importorskip("pandas")
 
     X, _ = load_iris(as_frame=True, return_X_y=True)
-    X_train, X_test = train_test_split(X, random_state=0)
 
     # FeatureUnion should have the feature_names_in_ attribute if the
     # first transformer also has it
     scaler = StandardScaler()
-    scaler.fit(X_train)
+    scaler.fit(X)
     union = FeatureUnion([("scale", scaler)])
     assert hasattr(union, "feature_names_in_")
-    assert_array_equal(X_train.columns, union.feature_names_in_)
+    assert_array_equal(X.columns, union.feature_names_in_)
     assert_array_equal(scaler.feature_names_in_, union.feature_names_in_)
 
     # fit with pandas.DataFrame
     union = FeatureUnion([("pass", "passthrough")])
-    union.fit(X_train)
+    union.fit(X)
     assert hasattr(union, "feature_names_in_")
-    assert_array_equal(X_train.columns, union.feature_names_in_)
+    assert_array_equal(X.columns, union.feature_names_in_)
 
     # fit with numpy array
-    X_array = X_train.to_numpy()
+    X_array = X.to_numpy()
     union = FeatureUnion([("pass", "passthrough")])
     union.fit(X_array)
     assert not hasattr(union, "feature_names_in_")