@@ -123,7 +123,8 @@ class and a random sample with replacement of the same size from all
123
123
random_instance = check_random_state (random_state )
124
124
sample_indices = np .empty (n_class * min_count , dtype = int )
125
125
126
- for i , cls , count , indices in zip (range (n_class ), classes , class_counts , class_indices ):
126
+ for i , cls , count , indices in zip (range (n_class ), classes , class_counts ,
127
+ class_indices ):
127
128
random_instances = random_instance .randint (0 , count , min_count )
128
129
random_indices = indices [random_instances ]
129
130
sample_indices [i * min_count :(i + 1 )* min_count ] = random_indices
@@ -158,7 +159,8 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
158
159
if balance_data is None :
159
160
indices = _generate_sample_indices (tree .random_state , n_samples )
160
161
else :
161
- indices = _generate_balanced_sample_indices (tree .random_state , balance_data )
162
+ indices = _generate_balanced_sample_indices (tree .random_state ,
163
+ balance_data )
162
164
163
165
sample_counts = bincount (indices , minlength = n_samples )
164
166
curr_sample_weight *= sample_counts
@@ -278,8 +280,8 @@ def fit(self, X, y, sample_weight=None):
278
280
Parameters
279
281
----------
280
282
X : array-like or sparse matrix of shape = [n_samples, n_features]
281
- The training input samples. Internally, its dtype will be converted to
282
- ``dtype=np.float32``. If a sparse matrix is provided, it will be
283
+ The training input samples. Internally, its dtype will be converted
284
+ to ``dtype=np.float32``. If a sparse matrix is provided, it will be
283
285
converted into a sparse ``csc_matrix``.
284
286
285
287
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
@@ -326,8 +328,6 @@ def fit(self, X, y, sample_weight=None):
326
328
self .n_outputs_ = y .shape [1 ]
327
329
328
330
y , expanded_class_weight = self ._validate_y_class_weight (y )
329
- # if self.balanced and self.n_outputs_ > 1:
330
- # raise NotImplementedError("Multi-output balanced random forest is not impemented.")
331
331
332
332
if getattr (y , "dtype" , None ) != DOUBLE or not y .flags .contiguous :
333
333
y = np .ascontiguousarray (y , dtype = DOUBLE )
@@ -373,7 +373,8 @@ def fit(self, X, y, sample_weight=None):
373
373
random_state = random_state )
374
374
trees .append (tree )
375
375
376
- balance_data = _get_class_balance_data (y ) if self .balanced else None
376
+ balance_data = _get_class_balance_data (y )\
377
+ if self .balanced else None
377
378
378
379
# Parallel loop: we use the threading backend as the Cython code
379
380
# for fitting the trees is internally releasing the Python GIL
@@ -542,7 +543,8 @@ def _validate_y_class_weight(self, y):
542
543
543
544
y_store_unique_indices = np .zeros (y .shape , dtype = np .int )
544
545
for k in range (self .n_outputs_ ):
545
- classes_k , y_store_unique_indices [:, k ] = np .unique (y [:, k ], return_inverse = True )
546
+ classes_k , y_store_unique_indices [:, k ] = np .unique (
547
+ y [:, k ], return_inverse = True )
546
548
self .classes_ .append (classes_k )
547
549
self .n_classes_ .append (classes_k .shape [0 ])
548
550
y = y_store_unique_indices
@@ -552,16 +554,18 @@ def _validate_y_class_weight(self, y):
552
554
if isinstance (self .class_weight , six .string_types ):
553
555
if self .class_weight not in valid_presets :
554
556
raise ValueError ('Valid presets for class_weight include '
555
- '"balanced" and "balanced_subsample". Given "%s".'
557
+ '"balanced" and "balanced_subsample". '
558
+ 'Given "%s".'
556
559
% self .class_weight )
557
560
if self .warm_start :
558
- warn ('class_weight presets "balanced" or "balanced_subsample" are '
561
+ warn ('class_weight presets "balanced" or '
562
+ '"balanced_subsample" are '
559
563
'not recommended for warm_start if the fitted data '
560
564
'differs from the full dataset. In order to use '
561
- '"balanced" weights, use compute_class_weight("balanced", '
562
- 'classes, y). In place of y you can use a large '
563
- 'enough sample of the full training set target to '
564
- 'properly estimate the class frequency '
565
+ '"balanced" weights, use compute_class_weight('
566
+ '"balanced", classes, y). In place of y you can use a'
567
+ 'large enough sample of the full training set target '
568
+ 'to properly estimate the class frequency '
565
569
'distributions. Pass the resulting weights as the '
566
570
'class_weight parameter.' )
567
571
@@ -617,8 +621,8 @@ def predict_proba(self, X):
617
621
618
622
The predicted class probabilities of an input sample are computed as
619
623
the mean predicted class probabilities of the trees in the forest. The
620
- class probability of a single tree is the fraction of samples of the same
621
- class in a leaf.
624
+ class probability of a single tree is the fraction of samples of the
625
+ same class in a leaf.
622
626
623
627
Parameters
624
628
----------
@@ -1376,8 +1380,9 @@ class ExtraTreesClassifier(ForestClassifier):
1376
1380
weights inversely proportional to class frequencies in the input data
1377
1381
as ``n_samples / (n_classes * np.bincount(y))``
1378
1382
1379
- The "balanced_subsample" mode is the same as "balanced" except that weights are
1380
- computed based on the bootstrap sample for every tree grown.
1383
+ The "balanced_subsample" mode is the same as "balanced" except that
1384
+ weights are computed based on the bootstrap sample for every tree
1385
+ grown.
1381
1386
1382
1387
For multi-output, the weights of each column of y will be multiplied.
1383
1388
0 commit comments