scikit-learn-contrib · glemaitre · Dec 6, 2019 · Dec 5, 2019 · Dec 6, 2019 · Dec 6, 2019
diff --git a/doc/whats_new/v0.6.rst b/doc/whats_new/v0.6.rst
@@ -1,3 +1,24 @@
+.. _changes_0_6_1:
+
+Version 0.6.1
+==============
+
+**In Development**
+
+This is a bug-fix release to primarily resolve some packaging issues in version
+0.6.0. It also includes minor documentation improvements and some bug fixes.
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Fix a bug in :class:`imblearn.ensemble.BalancedRandomForestClassifier`
+  leading to a wrong number of samples used during fitting due `max_samples`
+  and therefore a bad computation of the OOB score.
+  :pr:`656` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 .. _changes_0_6:
 
 Version 0.6.0

diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
@@ -53,6 +53,8 @@ def _local_parallel_build_trees(
     X_resampled, y_resampled = sampler.fit_resample(X, y)
     if sample_weight is not None:
         sample_weight = _safe_indexing(sample_weight, sampler.sample_indices_)
+    if _get_n_samples_bootstrap is not None:
+        n_samples_bootstrap = min(n_samples_bootstrap, X_resampled.shape[0])
     tree = _parallel_build_trees(
         tree,
         forest,
@@ -214,6 +216,9 @@ class BalancedRandomForestClassifier(RandomForestClassifier):
             - If int, then draw `max_samples` samples.
             - If float, then draw `max_samples * X.shape[0]` samples. Thus,
               `max_samples` should be in the interval `(0, 1)`.
+        Be aware that the final number samples used will be the minimum between
+        the number of samples given in `max_samples` and the number of samples
+        obtained after resampling.
 
         .. versionadded:: 0.22
            Added in `scikit-learn` in 0.22

diff --git a/imblearn/ensemble/tests/test_forest.py b/imblearn/ensemble/tests/test_forest.py
@@ -115,7 +115,9 @@ def test_balanced_random_forest_oob(imbalanced_dataset):
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, random_state=42, stratify=y
     )
-    est = BalancedRandomForestClassifier(oob_score=True, random_state=0)
+    est = BalancedRandomForestClassifier(
+        oob_score=True, random_state=0, n_estimators=1000
+    )
 
     est.fit(X_train, y_train)
     test_score = est.score(X_test, y_test)
@@ -182,14 +184,16 @@ def test_balanced_random_forest_pruning(imbalanced_dataset):
     assert n_nodes_no_pruning > n_nodes_pruning
 
 
-def test_balanced_random_forest_oob_binomial():
+@pytest.mark.parametrize("ratio", [0.5, 0.1])
+@pytest.mark.filterwarnings("ignore:Some inputs do not have OOB scores")
+def test_balanced_random_forest_oob_binomial(ratio):
     # Regression test for #655: check that the oob score is closed to 0.5
     # a binomial experiment.
     rng = np.random.RandomState(42)
     n_samples = 1000
     X = np.arange(n_samples).reshape(-1, 1)
-    y = rng.binomial(1, 0.5, size=n_samples)
+    y = rng.binomial(1, ratio, size=n_samples)
 
     erf = BalancedRandomForestClassifier(oob_score=True, random_state=42)
     erf.fit(X, y)
-    assert np.abs(erf.oob_score_ - 0.5) < 0.05
+    assert np.abs(erf.oob_score_ - 0.5) < 0.1