@@ -62,7 +62,7 @@ def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer, routed_params):
62
62
** fit_params ,
63
63
)
64
64
65
- return rfe .step_scores_ , rfe .step_n_features_
65
+ return rfe .step_scores_ , rfe .step_support_ , rfe . step_ranking_ , rfe . step_n_features_
66
66
67
67
68
68
class RFE (SelectorMixin , MetaEstimatorMixin , BaseEstimator ):
@@ -318,6 +318,8 @@ def _fit(self, X, y, step_score=None, **fit_params):
318
318
if step_score :
319
319
self .step_n_features_ = []
320
320
self .step_scores_ = []
321
+ self .step_support_ = []
322
+ self .step_ranking_ = []
321
323
322
324
# Elimination
323
325
while np .sum (support_ ) > n_features_to_select :
@@ -331,6 +333,14 @@ def _fit(self, X, y, step_score=None, **fit_params):
331
333
332
334
estimator .fit (X [:, features ], y , ** fit_params )
333
335
336
+ # Compute step values on the previous selection iteration because
337
+ # 'estimator' must use features that have not been eliminated yet
338
+ if step_score :
339
+ self .step_n_features_ .append (len (features ))
340
+ self .step_scores_ .append (step_score (estimator , features ))
341
+ self .step_support_ .append (list (support_ ))
342
+ self .step_ranking_ .append (list (ranking_ ))
343
+
334
344
# Get importance and rank them
335
345
importances = _get_feature_importances (
336
346
estimator ,
@@ -345,12 +355,6 @@ def _fit(self, X, y, step_score=None, **fit_params):
345
355
# Eliminate the worse features
346
356
threshold = min (step , np .sum (support_ ) - n_features_to_select )
347
357
348
- # Compute step score on the previous selection iteration
349
- # because 'estimator' must use features
350
- # that have not been eliminated yet
351
- if step_score :
352
- self .step_n_features_ .append (len (features ))
353
- self .step_scores_ .append (step_score (estimator , features ))
354
358
support_ [features [ranks ][:threshold ]] = False
355
359
ranking_ [np .logical_not (support_ )] += 1
356
360
@@ -359,10 +363,12 @@ def _fit(self, X, y, step_score=None, **fit_params):
359
363
self .estimator_ = clone (self .estimator )
360
364
self .estimator_ .fit (X [:, features ], y , ** fit_params )
361
365
362
- # Compute step score when only n_features_to_select features left
366
+ # Compute step values when only n_features_to_select features left
363
367
if step_score :
364
368
self .step_n_features_ .append (len (features ))
365
369
self .step_scores_ .append (step_score (self .estimator_ , features ))
370
+ self .step_support_ .append (support_ )
371
+ self .step_ranking_ .append (ranking_ )
366
372
self .n_features_ = support_ .sum ()
367
373
self .support_ = support_
368
374
self .ranking_ = ranking_
@@ -674,6 +680,20 @@ class RFECV(RFE):
674
680
675
681
.. versionadded:: 1.5
676
682
683
+ split(k)_ranking : ndarray of shape (n_subsets_of_features,)
684
+ The cross-validation rankings across (k)th fold.
685
+ Selected (i.e., estimated best) features are assigned rank 1.
686
+ Illustration in
687
+ :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`
688
+
689
+ .. versionadded:: 1.7
690
+
691
+ split(k)_support : ndarray of shape (n_subsets_of_features,)
692
+ The cross-validation supports across (k)th fold. The support
693
+ is the mask of selected features.
694
+
695
+ .. versionadded:: 1.7
696
+
677
697
n_features_ : int
678
698
The number of selected features with cross-validation.
679
699
@@ -874,14 +894,16 @@ def fit(self, X, y, *, groups=None, **params):
874
894
parallel = Parallel (n_jobs = self .n_jobs )
875
895
func = delayed (_rfe_single_fit )
876
896
877
- scores_features = parallel (
897
+ step_results = parallel (
878
898
func (clone (rfe ), self .estimator , X , y , train , test , scorer , routed_params )
879
899
for train , test in cv .split (X , y , ** routed_params .splitter .split )
880
900
)
881
- scores , step_n_features = zip (* scores_features )
901
+ scores , supports , rankings , step_n_features = zip (* step_results )
882
902
883
903
step_n_features_rev = np .array (step_n_features [0 ])[::- 1 ]
884
904
scores = np .array (scores )
905
+ rankings = np .array (rankings )
906
+ supports = np .array (supports )
885
907
886
908
# Reverse order such that lowest number of features is selected in case of tie.
887
909
scores_sum_rev = np .sum (scores , axis = 0 )[::- 1 ]
@@ -907,10 +929,14 @@ def fit(self, X, y, *, groups=None, **params):
907
929
908
930
# reverse to stay consistent with before
909
931
scores_rev = scores [:, ::- 1 ]
932
+ supports_rev = supports [:, ::- 1 ]
933
+ rankings_rev = rankings [:, ::- 1 ]
910
934
self .cv_results_ = {
911
935
"mean_test_score" : np .mean (scores_rev , axis = 0 ),
912
936
"std_test_score" : np .std (scores_rev , axis = 0 ),
913
937
** {f"split{ i } _test_score" : scores_rev [i ] for i in range (scores .shape [0 ])},
938
+ ** {f"split{ i } _ranking" : rankings_rev [i ] for i in range (rankings .shape [0 ])},
939
+ ** {f"split{ i } _support" : supports_rev [i ] for i in range (supports .shape [0 ])},
914
940
"n_features" : step_n_features_rev ,
915
941
}
916
942
return self
0 commit comments