From 3241ead7cdb79a1ff5dee6098de9cfa7c8167073 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 23 Aug 2021 18:28:49 +0200 Subject: [PATCH] ENH feature_names_in_ for sklearn.ensemble --- sklearn/ensemble/_forest.py | 27 ++++++++++++++++++++++++++- sklearn/ensemble/_stacking.py | 15 ++++++++++++++- sklearn/ensemble/_voting.py | 13 +++++++++++++ sklearn/tests/test_common.py | 1 - 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index d56a5e9856872..eaecf77d16023 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -565,7 +565,7 @@ def _validate_X_predict(self, X): """ Validate X whenever one tries to predict, apply, predict_proba.""" check_is_fitted(self) - + self._check_feature_names(X, reset=False) return self.estimators_[0]._validate_X_predict(X, check_input=True) @property @@ -1265,6 +1265,11 @@ class labels (multi-output problem). .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + .. versionadded:: 1.0 + n_outputs_ : int The number of outputs when ``fit`` is performed. @@ -1590,6 +1595,11 @@ class RandomForestRegressor(ForestRegressor): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + .. versionadded:: 1.0 + n_outputs_ : int The number of outputs when ``fit`` is performed. @@ -1920,6 +1930,11 @@ class labels (multi-output problem). .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + .. versionadded:: 1.0 + n_outputs_ : int The number of outputs when ``fit`` is performed. @@ -2222,6 +2237,11 @@ class ExtraTreesRegressor(ForestRegressor): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + .. versionadded:: 1.0 + n_outputs_ : int The number of outputs. @@ -2454,6 +2474,11 @@ class RandomTreesEmbedding(BaseForest): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + .. versionadded:: 1.0 + n_outputs_ : int The number of outputs when ``fit`` is performed. diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index bf86b6d9f08de..7e2af439d22ec 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -165,8 +165,11 @@ def fit(self, X, y, sample_weight=None): est_fitted_idx = 0 for name_est, org_est in zip(names, all_estimators): if org_est != "drop": - self.named_estimators_[name_est] = self.estimators_[est_fitted_idx] + current_estimator = self.estimators_[est_fitted_idx] + self.named_estimators_[name_est] = current_estimator est_fitted_idx += 1 + if hasattr(current_estimator, "feature_names_in_"): + self.feature_names_in_ = current_estimator.feature_names_in_ else: self.named_estimators_[name_est] = "drop" @@ -373,6 +376,11 @@ class StackingClassifier(ClassifierMixin, _BaseStacking): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimators expose such an attribute when fit. + .. versionadded:: 1.0 + final_estimator_ : estimator The classifier which predicts given the output of `estimators_`. @@ -649,6 +657,11 @@ class StackingRegressor(RegressorMixin, _BaseStacking): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimators expose such an attribute when fit. + .. versionadded:: 1.0 + final_estimator_ : estimator The regressor to stacked the base estimators fitted. diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index cb315c0695191..3d296154f7dc4 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -92,6 +92,9 @@ def fit(self, X, y, sample_weight=None): current_est = est if est == "drop" else next(est_iter) self.named_estimators_[name] = current_est + if hasattr(current_est, "feature_names_in_"): + self.feature_names_in_ = current_est.feature_names_in_ + return self def fit_transform(self, X, y=None, **fit_params): @@ -217,6 +220,11 @@ class VotingClassifier(ClassifierMixin, _BaseVoting): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimators expose such an attribute when fit. + .. versionadded:: 1.0 + See Also -------- VotingRegressor : Prediction voting regressor. @@ -466,6 +474,11 @@ class VotingRegressor(RegressorMixin, _BaseVoting): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimators expose such an attribute when fit. + .. versionadded:: 1.0 + See Also -------- VotingClassifier : Soft Voting/Majority Rule classifier. diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 008bdee7e646b..cf4accea7290c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -325,7 +325,6 @@ def test_check_n_features_in_after_fitting(estimator): COLUMN_NAME_MODULES_TO_IGNORE = { "compose", - "ensemble", "feature_extraction", "kernel_approximation", "model_selection",