Skip to content

Commit cd131bc

Browse files
committed
remove unused code for oob_pred, clean aggregation of oob_pred
1 parent 227ec3d commit cd131bc

File tree

1 file changed

+13
-131
lines changed

1 file changed

+13
-131
lines changed

sklearn/ensemble/_forest.py

Lines changed: 13 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -560,71 +560,6 @@ def _set_oob_score_and_ufi_attributes(
560560
(accuracy score).
561561
"""
562562

563-
def _compute_oob_predictions(self, X, y):
564-
"""Compute and set the OOB score.
565-
566-
Parameters
567-
----------
568-
X : array-like of shape (n_samples, n_features)
569-
The data matrix.
570-
y : ndarray of shape (n_samples, n_outputs)
571-
The target matrix.
572-
573-
Returns
574-
-------
575-
oob_pred : ndarray of shape (n_samples, n_classes, n_outputs) or \
576-
(n_samples, 1, n_outputs)
577-
The OOB predictions.
578-
"""
579-
# Prediction requires X to be in CSR format
580-
if issparse(X):
581-
X = X.tocsr()
582-
583-
n_samples = y.shape[0]
584-
n_outputs = self.n_outputs_
585-
if is_classifier(self) and hasattr(self, "n_classes_"):
586-
# n_classes_ is a ndarray at this stage
587-
# all the supported type of target will have the same number of
588-
# classes in all outputs
589-
oob_pred_shape = (n_samples, self.n_classes_[0], n_outputs)
590-
else:
591-
# for regression, n_classes_ does not exist and we create an empty
592-
# axis to be consistent with the classification case and make
593-
# the array operations compatible with the 2 settings
594-
oob_pred_shape = (n_samples, 1, n_outputs)
595-
596-
oob_pred = np.zeros(shape=oob_pred_shape, dtype=np.float64)
597-
n_oob_pred = np.zeros((n_samples, n_outputs), dtype=np.int64)
598-
599-
n_samples_bootstrap = _get_n_samples_bootstrap(
600-
n_samples,
601-
self.max_samples,
602-
)
603-
for estimator in self.estimators_:
604-
unsampled_indices = _generate_unsampled_indices(
605-
estimator.random_state,
606-
n_samples,
607-
n_samples_bootstrap,
608-
)
609-
y_pred = self._get_oob_predictions(estimator, X[unsampled_indices, :])
610-
oob_pred[unsampled_indices, ...] += y_pred
611-
n_oob_pred[unsampled_indices, :] += 1
612-
613-
for k in range(n_outputs):
614-
if (n_oob_pred == 0).any():
615-
warn(
616-
(
617-
"Some inputs do not have OOB scores. This probably means "
618-
"too few trees were used to compute any reliable OOB "
619-
"estimates."
620-
),
621-
UserWarning,
622-
)
623-
n_oob_pred[n_oob_pred == 0] = 1
624-
oob_pred[..., k] /= n_oob_pred[..., [k]]
625-
626-
return oob_pred
627-
628563
def _validate_y_class_weight(self, y):
629564
# Default implementation
630565
return y, None
@@ -757,18 +692,17 @@ def _compute_unbiased_feature_importance_and_oob_predictions(
757692

758693
importances /= self.n_estimators
759694

760-
for k in range(self.n_outputs_):
761-
if (n_oob_pred == 0).any():
762-
warn(
763-
(
764-
"Some inputs do not have OOB scores. This probably means "
765-
"too few trees were used to compute any reliable OOB "
766-
"estimates."
767-
),
768-
UserWarning,
769-
)
770-
n_oob_pred[n_oob_pred == 0] = 1
771-
oob_pred[..., k] /= n_oob_pred[..., [k]]
695+
if (n_oob_pred == 0).any():
696+
warn(
697+
(
698+
"Some inputs do not have OOB scores. This probably means "
699+
"too few trees were used to compute any reliable OOB "
700+
"estimates."
701+
),
702+
UserWarning,
703+
)
704+
n_oob_pred[n_oob_pred == 0] = 1
705+
oob_pred /= n_oob_pred[..., np.newaxis]
772706

773707
return importances, oob_pred
774708

@@ -865,34 +799,6 @@ def __init__(
865799
max_samples=max_samples,
866800
)
867801

868-
@staticmethod
869-
def _get_oob_predictions(tree, X):
870-
"""Compute the OOB predictions for an individual tree.
871-
872-
Parameters
873-
----------
874-
tree : DecisionTreeClassifier object
875-
A single decision tree classifier.
876-
X : ndarray of shape (n_samples, n_features)
877-
The OOB samples.
878-
879-
Returns
880-
-------
881-
y_pred : ndarray of shape (n_samples, n_classes, n_outputs)
882-
The OOB associated predictions.
883-
"""
884-
y_pred = tree.predict_proba(X, check_input=False)
885-
y_pred = np.asarray(y_pred)
886-
if y_pred.ndim == 2:
887-
# binary and multiclass
888-
y_pred = y_pred[..., np.newaxis]
889-
else:
890-
# Roll the first `n_outputs` axis to the last axis. We will reshape
891-
# from a shape of (n_outputs, n_samples, n_classes) to a shape of
892-
# (n_samples, n_classes, n_outputs).
893-
y_pred = np.rollaxis(y_pred, axis=0, start=3)
894-
return y_pred
895-
896802
def _set_oob_score_and_ufi_attributes(
897803
self, X, y, sample_weight, scoring_function=None
898804
):
@@ -1215,31 +1121,6 @@ def predict(self, X):
12151121

12161122
return y_hat
12171123

1218-
@staticmethod
1219-
def _get_oob_predictions(tree, X):
1220-
"""Compute the OOB predictions for an individual tree.
1221-
1222-
Parameters
1223-
----------
1224-
tree : DecisionTreeRegressor object
1225-
A single decision tree regressor.
1226-
X : ndarray of shape (n_samples, n_features)
1227-
The OOB samples.
1228-
1229-
Returns
1230-
-------
1231-
y_pred : ndarray of shape (n_samples, 1, n_outputs)
1232-
The OOB associated predictions.
1233-
"""
1234-
y_pred = tree.predict(X, check_input=False)
1235-
if y_pred.ndim == 1:
1236-
# single output regression
1237-
y_pred = y_pred[:, np.newaxis, np.newaxis]
1238-
else:
1239-
# multioutput regression
1240-
y_pred = y_pred[:, np.newaxis, :]
1241-
return y_pred
1242-
12431124
def _set_oob_score_and_ufi_attributes(
12441125
self, X, y, sample_weight, scoring_function=None
12451126
):
@@ -1288,7 +1169,8 @@ def unbiased_feature_importances_(self):
12881169
Corrected version of the Mean Decrease Impurity, proposed by Zhou and Hooker in
12891170
"Unbiased Measurement of Feature Importance in Tree-Based Methods".
12901171
1291-
It is only available if the chosen split criterion is `squared_error` or `friedman_mse`.
1172+
It is only available if the chosen split criterion is `squared_error` or
1173+
`friedman_mse`.
12921174
12931175
Returns
12941176
-------

0 commit comments

Comments
 (0)