@@ -560,71 +560,6 @@ def _set_oob_score_and_ufi_attributes(
560
560
(accuracy score).
561
561
"""
562
562
563
- def _compute_oob_predictions (self , X , y ):
564
- """Compute and set the OOB score.
565
-
566
- Parameters
567
- ----------
568
- X : array-like of shape (n_samples, n_features)
569
- The data matrix.
570
- y : ndarray of shape (n_samples, n_outputs)
571
- The target matrix.
572
-
573
- Returns
574
- -------
575
- oob_pred : ndarray of shape (n_samples, n_classes, n_outputs) or \
576
- (n_samples, 1, n_outputs)
577
- The OOB predictions.
578
- """
579
- # Prediction requires X to be in CSR format
580
- if issparse (X ):
581
- X = X .tocsr ()
582
-
583
- n_samples = y .shape [0 ]
584
- n_outputs = self .n_outputs_
585
- if is_classifier (self ) and hasattr (self , "n_classes_" ):
586
- # n_classes_ is a ndarray at this stage
587
- # all the supported type of target will have the same number of
588
- # classes in all outputs
589
- oob_pred_shape = (n_samples , self .n_classes_ [0 ], n_outputs )
590
- else :
591
- # for regression, n_classes_ does not exist and we create an empty
592
- # axis to be consistent with the classification case and make
593
- # the array operations compatible with the 2 settings
594
- oob_pred_shape = (n_samples , 1 , n_outputs )
595
-
596
- oob_pred = np .zeros (shape = oob_pred_shape , dtype = np .float64 )
597
- n_oob_pred = np .zeros ((n_samples , n_outputs ), dtype = np .int64 )
598
-
599
- n_samples_bootstrap = _get_n_samples_bootstrap (
600
- n_samples ,
601
- self .max_samples ,
602
- )
603
- for estimator in self .estimators_ :
604
- unsampled_indices = _generate_unsampled_indices (
605
- estimator .random_state ,
606
- n_samples ,
607
- n_samples_bootstrap ,
608
- )
609
- y_pred = self ._get_oob_predictions (estimator , X [unsampled_indices , :])
610
- oob_pred [unsampled_indices , ...] += y_pred
611
- n_oob_pred [unsampled_indices , :] += 1
612
-
613
- for k in range (n_outputs ):
614
- if (n_oob_pred == 0 ).any ():
615
- warn (
616
- (
617
- "Some inputs do not have OOB scores. This probably means "
618
- "too few trees were used to compute any reliable OOB "
619
- "estimates."
620
- ),
621
- UserWarning ,
622
- )
623
- n_oob_pred [n_oob_pred == 0 ] = 1
624
- oob_pred [..., k ] /= n_oob_pred [..., [k ]]
625
-
626
- return oob_pred
627
-
628
563
def _validate_y_class_weight (self , y ):
629
564
# Default implementation
630
565
return y , None
@@ -757,18 +692,17 @@ def _compute_unbiased_feature_importance_and_oob_predictions(
757
692
758
693
importances /= self .n_estimators
759
694
760
- for k in range (self .n_outputs_ ):
761
- if (n_oob_pred == 0 ).any ():
762
- warn (
763
- (
764
- "Some inputs do not have OOB scores. This probably means "
765
- "too few trees were used to compute any reliable OOB "
766
- "estimates."
767
- ),
768
- UserWarning ,
769
- )
770
- n_oob_pred [n_oob_pred == 0 ] = 1
771
- oob_pred [..., k ] /= n_oob_pred [..., [k ]]
695
+ if (n_oob_pred == 0 ).any ():
696
+ warn (
697
+ (
698
+ "Some inputs do not have OOB scores. This probably means "
699
+ "too few trees were used to compute any reliable OOB "
700
+ "estimates."
701
+ ),
702
+ UserWarning ,
703
+ )
704
+ n_oob_pred [n_oob_pred == 0 ] = 1
705
+ oob_pred /= n_oob_pred [..., np .newaxis ]
772
706
773
707
return importances , oob_pred
774
708
@@ -865,34 +799,6 @@ def __init__(
865
799
max_samples = max_samples ,
866
800
)
867
801
868
- @staticmethod
869
- def _get_oob_predictions (tree , X ):
870
- """Compute the OOB predictions for an individual tree.
871
-
872
- Parameters
873
- ----------
874
- tree : DecisionTreeClassifier object
875
- A single decision tree classifier.
876
- X : ndarray of shape (n_samples, n_features)
877
- The OOB samples.
878
-
879
- Returns
880
- -------
881
- y_pred : ndarray of shape (n_samples, n_classes, n_outputs)
882
- The OOB associated predictions.
883
- """
884
- y_pred = tree .predict_proba (X , check_input = False )
885
- y_pred = np .asarray (y_pred )
886
- if y_pred .ndim == 2 :
887
- # binary and multiclass
888
- y_pred = y_pred [..., np .newaxis ]
889
- else :
890
- # Roll the first `n_outputs` axis to the last axis. We will reshape
891
- # from a shape of (n_outputs, n_samples, n_classes) to a shape of
892
- # (n_samples, n_classes, n_outputs).
893
- y_pred = np .rollaxis (y_pred , axis = 0 , start = 3 )
894
- return y_pred
895
-
896
802
def _set_oob_score_and_ufi_attributes (
897
803
self , X , y , sample_weight , scoring_function = None
898
804
):
@@ -1215,31 +1121,6 @@ def predict(self, X):
1215
1121
1216
1122
return y_hat
1217
1123
1218
- @staticmethod
1219
- def _get_oob_predictions (tree , X ):
1220
- """Compute the OOB predictions for an individual tree.
1221
-
1222
- Parameters
1223
- ----------
1224
- tree : DecisionTreeRegressor object
1225
- A single decision tree regressor.
1226
- X : ndarray of shape (n_samples, n_features)
1227
- The OOB samples.
1228
-
1229
- Returns
1230
- -------
1231
- y_pred : ndarray of shape (n_samples, 1, n_outputs)
1232
- The OOB associated predictions.
1233
- """
1234
- y_pred = tree .predict (X , check_input = False )
1235
- if y_pred .ndim == 1 :
1236
- # single output regression
1237
- y_pred = y_pred [:, np .newaxis , np .newaxis ]
1238
- else :
1239
- # multioutput regression
1240
- y_pred = y_pred [:, np .newaxis , :]
1241
- return y_pred
1242
-
1243
1124
def _set_oob_score_and_ufi_attributes (
1244
1125
self , X , y , sample_weight , scoring_function = None
1245
1126
):
@@ -1288,7 +1169,8 @@ def unbiased_feature_importances_(self):
1288
1169
Corrected version of the Mean Decrease Impurity, proposed by Zhou and Hooker in
1289
1170
"Unbiased Measurement of Feature Importance in Tree-Based Methods".
1290
1171
1291
- It is only available if the chosen split criterion is `squared_error` or `friedman_mse`.
1172
+ It is only available if the chosen split criterion is `squared_error` or
1173
+ `friedman_mse`.
1292
1174
1293
1175
Returns
1294
1176
-------
0 commit comments