diff --git a/doc/ensemble.rst b/doc/ensemble.rst index f86765f43..8a73837ce 100644 --- a/doc/ensemble.rst +++ b/doc/ensemble.rst @@ -100,13 +100,29 @@ takes the same parameters than the scikit-learn ... ratio='auto', ... replacement=False, ... random_state=0) - >>> bbc.fit(X, y) # doctest: +ELLIPSIS + >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS BalancedBaggingClassifier(...) >>> y_pred = bbc.predict(X_test) >>> confusion_matrix(y_test, y_pred) - array([[ 12, 0, 0], - [ 1, 54, 4], - [ 49, 53, 1077]]) + array([[ 9, 1, 2], + [ 0, 55, 4], + [ 42, 46, 1091]]) + +It also possible to turn a balanced bagging classifier into a balanced random +forest using a decision tree classifier and setting the parameter +``max_features='auto'`. It allows to randomly select a subset of features for +each tree:: + + >>> brf = BalancedBaggingClassifier( + ... base_estimator=DecisionTreeClassifier(max_features='auto'), + ... random_state=0) + >>> brf.fit(X_train, y_train) # doctest: +ELLIPSIS + BalancedBaggingClassifier(...) + >>> y_pred = brf.predict(X_test) + >>> confusion_matrix(y_test, y_pred) + array([[ 9, 1, 2], + [ 0, 54, 5], + [ 31, 34, 1114]]) See :ref:`sphx_glr_auto_examples_ensemble_plot_comparison_bagging_classifier.py`. diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst index 4c1e92347..3cf54dc59 100644 --- a/doc/whats_new/v0.0.4.rst +++ b/doc/whats_new/v0.0.4.rst @@ -6,6 +6,12 @@ Version 0.4 (under development) Changelog --------- +Enhancement +........... + +- Add a documentation node to create a balanced random forest from a balanced + bagging classifier. :issue:`372` by :user:`Guillaume Lemaitre `. + Bug fixes ......... diff --git a/examples/ensemble/plot_comparison_bagging_classifier.py b/examples/ensemble/plot_comparison_bagging_classifier.py index 37f05cb3f..c6e148c9d 100644 --- a/examples/ensemble/plot_comparison_bagging_classifier.py +++ b/examples/ensemble/plot_comparison_bagging_classifier.py @@ -26,6 +26,7 @@ from sklearn.model_selection import train_test_split from sklearn.ensemble import BaggingClassifier +from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import confusion_matrix from imblearn.datasets import fetch_datasets @@ -99,4 +100,25 @@ def plot_confusion_matrix(cm, classes, plot_confusion_matrix(cm_balanced_bagging, classes=np.unique(ozone.target), title='Confusion matrix using BalancedBaggingClassifier') +############################################################################### +# Turning the balanced bagging classifier into a balanced random forest +############################################################################### +# It is possible to turn the ``BalancedBaggingClassifier`` into a balanced +# random forest by using a ``DecisionTreeClassifier`` with +# ``max_features='auto'``. We illustrate such changes below. + +balanced_random_forest = BalancedBaggingClassifier( + base_estimator=DecisionTreeClassifier(max_features='auto'), + random_state=0) + +balanced_random_forest.fit(X_train, y_train) +print('Classification results using a balanced random forest classifier on' + ' imbalanced data') +y_pred_balanced_rf = balanced_random_forest.predict(X_test) +print(classification_report_imbalanced(y_test, y_pred_balanced_rf)) +cm_bagging = confusion_matrix(y_test, y_pred_balanced_rf) +plt.figure() +plot_confusion_matrix(cm_bagging, classes=np.unique(ozone.target), + title='Confusion matrix using balanced random forest') + plt.show() diff --git a/imblearn/ensemble/classifier.py b/imblearn/ensemble/classifier.py index ad4cd09cf..98a6243c4 100644 --- a/imblearn/ensemble/classifier.py +++ b/imblearn/ensemble/classifier.py @@ -8,12 +8,10 @@ import numpy as np -import sklearn from sklearn.base import clone from sklearn.ensemble import BaggingClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble.bagging import _generate_bagging_indices -from sklearn.utils import indices_to_mask from ..pipeline import Pipeline from ..under_sampling import RandomUnderSampler @@ -136,6 +134,9 @@ class BalancedBaggingClassifier(BaggingClassifier): Notes ----- + This is possible to turn this classifier into a balanced random forest [5]_ + by passing a :class:`sklearn.tree.DecisionTreeClassifier` with + `max_features='auto'` as a base estimator. See :ref:`sphx_glr_auto_examples_ensemble_plot_comparison_bagging_classifier.py`. @@ -155,6 +156,9 @@ class BalancedBaggingClassifier(BaggingClassifier): 1998. .. [4] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. + .. [5] Chen, Chao, Andy Liaw, and Leo Breiman. "Using random forest to + learn imbalanced data." University of California, Berkeley 110, + 2004. Examples --------