17
17
"""
18
18
19
19
# Authors: Peter Prettenhofer, Scott White, Gilles Louppe, Emanuele Olivetti,
20
- # Arnaud Joly
20
+ # Arnaud Joly, Jacob Schreiber
21
21
# License: BSD 3 clause
22
22
23
23
from __future__ import print_function
@@ -898,6 +898,13 @@ def _resize_state(self):
898
898
def _is_initialized (self ):
899
899
return len (getattr (self , 'estimators_' , [])) > 0
900
900
901
+ def _check_initialized (self ):
902
+ """Check that the estimator is initialized, raising an error if not."""
903
+ if self .estimators_ is None or len (self .estimators_ ) == 0 :
904
+ raise NotFittedError ("Estimator not fitted, call `fit`"
905
+ " before making predictions`." )
906
+
907
+
901
908
def fit (self , X , y , sample_weight = None , monitor = None ):
902
909
"""Fit the gradient boosting model.
903
910
@@ -1067,9 +1074,7 @@ def _make_estimator(self, append=True):
1067
1074
1068
1075
def _init_decision_function (self , X ):
1069
1076
"""Check input and compute prediction of ``init``. """
1070
- if self .estimators_ is None or len (self .estimators_ ) == 0 :
1071
- raise NotFittedError ("Estimator not fitted, call `fit`"
1072
- " before making predictions`." )
1077
+ self ._check_initialized ()
1073
1078
if X .shape [1 ] != self .n_features :
1074
1079
raise ValueError ("X.shape[1] should be {0:d}, not {1:d}." .format (
1075
1080
self .n_features , X .shape [1 ]))
@@ -1164,9 +1169,7 @@ def feature_importances_(self):
1164
1169
-------
1165
1170
feature_importances_ : array, shape = [n_features]
1166
1171
"""
1167
- if self .estimators_ is None or len (self .estimators_ ) == 0 :
1168
- raise NotFittedError ("Estimator not fitted, call `fit` before"
1169
- " `feature_importances_`." )
1172
+ self ._check_initialized ()
1170
1173
1171
1174
total_sum = np .zeros ((self .n_features , ), dtype = np .float64 )
1172
1175
for stage in self .estimators_ :
@@ -1184,6 +1187,36 @@ def _validate_y(self, y):
1184
1187
# Default implementation
1185
1188
return y
1186
1189
1190
+ def apply (self , X ):
1191
+ """Apply trees in the ensemble to X, return leaf indices.
1192
+
1193
+ Parameters
1194
+ ----------
1195
+ X : array-like or sparse matrix, shape = [n_samples, n_features]
1196
+ The input samples. Internally, it will be converted to
1197
+ ``dtype=np.float32`` and if a sparse matrix is provided
1198
+ to a sparse ``csr_matrix``.
1199
+
1200
+ Returns
1201
+ -------
1202
+ X_leaves : array_like, shape = [n_samples, n_estimators, n_classes]
1203
+ For each datapoint x in X and for each tree in the ensemble,
1204
+ return the index of the leaf x ends up in in each estimator.
1205
+ In the case of binary classification n_classes is 1.
1206
+ """
1207
+
1208
+ self ._check_initialized ()
1209
+ X = self .estimators_ [0 , 0 ]._validate_X_predict (X , check_input = True )
1210
+
1211
+ n_estimators , n_classes = self .estimators_ .shape
1212
+ leaves = np .zeros ((X .shape [0 ], n_estimators , n_classes ))
1213
+
1214
+ for i in range (n_estimators ):
1215
+ for j in range (n_classes ):
1216
+ estimator = self .estimators_ [i , j ]
1217
+ leaves [:, i , j ] = estimator .apply (X , check_input = False )
1218
+
1219
+ return leaves
1187
1220
1188
1221
class GradientBoostingClassifier (BaseGradientBoosting , ClassifierMixin ):
1189
1222
"""Gradient Boosting for classification.
@@ -1704,3 +1737,24 @@ def staged_predict(self, X):
1704
1737
"""
1705
1738
for y in self ._staged_decision_function (X ):
1706
1739
yield y .ravel ()
1740
+
1741
+ def apply (self , X ):
1742
+ """Apply trees in the ensemble to X, return leaf indices.
1743
+
1744
+ Parameters
1745
+ ----------
1746
+ X : array-like or sparse matrix, shape = [n_samples, n_features]
1747
+ The input samples. Internally, it will be converted to
1748
+ ``dtype=np.float32`` and if a sparse matrix is provided
1749
+ to a sparse ``csr_matrix``.
1750
+
1751
+ Returns
1752
+ -------
1753
+ X_leaves : array_like, shape = [n_samples, n_estimators]
1754
+ For each datapoint x in X and for each tree in the ensemble,
1755
+ return the index of the leaf x ends up in in each estimator.
1756
+ """
1757
+
1758
+ leaves = super (GradientBoostingRegressor , self ).apply (X )
1759
+ leaves = leaves .reshape (X .shape [0 ], self .estimators_ .shape [0 ])
1760
+ return leaves
0 commit comments