From 3241ead7cdb79a1ff5dee6098de9cfa7c8167073 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 23 Aug 2021 18:28:49 +0200
Subject: [PATCH] ENH feature_names_in_ for sklearn.ensemble

---
 sklearn/ensemble/_forest.py   | 27 ++++++++++++++++++++++++++-
 sklearn/ensemble/_stacking.py | 15 ++++++++++++++-
 sklearn/ensemble/_voting.py   | 13 +++++++++++++
 sklearn/tests/test_common.py  |  1 -
 4 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index d56a5e9856872..eaecf77d16023 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -565,7 +565,7 @@ def _validate_X_predict(self, X):
         """
         Validate X whenever one tries to predict, apply, predict_proba."""
         check_is_fitted(self)
-
+        self._check_feature_names(X, reset=False)
         return self.estimators_[0]._validate_X_predict(X, check_input=True)
 
     @property
@@ -1265,6 +1265,11 @@ class labels (multi-output problem).
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+        .. versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -1590,6 +1595,11 @@ class RandomForestRegressor(ForestRegressor):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+        .. versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -1920,6 +1930,11 @@ class labels (multi-output problem).
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+        .. versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -2222,6 +2237,11 @@ class ExtraTreesRegressor(ForestRegressor):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+        .. versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs.
 
@@ -2454,6 +2474,11 @@ class RandomTreesEmbedding(BaseForest):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+        .. versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index bf86b6d9f08de..7e2af439d22ec 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -165,8 +165,11 @@ def fit(self, X, y, sample_weight=None):
         est_fitted_idx = 0
         for name_est, org_est in zip(names, all_estimators):
             if org_est != "drop":
-                self.named_estimators_[name_est] = self.estimators_[est_fitted_idx]
+                current_estimator = self.estimators_[est_fitted_idx]
+                self.named_estimators_[name_est] = current_estimator
                 est_fitted_idx += 1
+                if hasattr(current_estimator, "feature_names_in_"):
+                    self.feature_names_in_ = current_estimator.feature_names_in_
             else:
                 self.named_estimators_[name_est] = "drop"
 
@@ -373,6 +376,11 @@ class StackingClassifier(ClassifierMixin, _BaseStacking):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimators expose such an attribute when fit.
+        .. versionadded:: 1.0
+
     final_estimator_ : estimator
         The classifier which predicts given the output of `estimators_`.
 
@@ -649,6 +657,11 @@ class StackingRegressor(RegressorMixin, _BaseStacking):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimators expose such an attribute when fit.
+        .. versionadded:: 1.0
+
     final_estimator_ : estimator
         The regressor to stacked the base estimators fitted.
 
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index cb315c0695191..3d296154f7dc4 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -92,6 +92,9 @@ def fit(self, X, y, sample_weight=None):
             current_est = est if est == "drop" else next(est_iter)
             self.named_estimators_[name] = current_est
 
+            if hasattr(current_est, "feature_names_in_"):
+                self.feature_names_in_ = current_est.feature_names_in_
+
         return self
 
     def fit_transform(self, X, y=None, **fit_params):
@@ -217,6 +220,11 @@ class VotingClassifier(ClassifierMixin, _BaseVoting):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimators expose such an attribute when fit.
+        .. versionadded:: 1.0
+
     See Also
     --------
     VotingRegressor : Prediction voting regressor.
@@ -466,6 +474,11 @@ class VotingRegressor(RegressorMixin, _BaseVoting):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimators expose such an attribute when fit.
+        .. versionadded:: 1.0
+
     See Also
     --------
     VotingClassifier : Soft Voting/Majority Rule classifier.
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 008bdee7e646b..cf4accea7290c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -325,7 +325,6 @@ def test_check_n_features_in_after_fitting(estimator):
 
 COLUMN_NAME_MODULES_TO_IGNORE = {
     "compose",
-    "ensemble",
     "feature_extraction",
     "kernel_approximation",
     "model_selection",