scikit-learn-contrib · glemaitre · Dec 3, 2022 · Dec 3, 2022 · Dec 3, 2022 · Dec 3, 2022
diff --git a/README.rst b/README.rst
@@ -30,7 +30,7 @@
 .. |PythonMinVersion| replace:: 3.8
 .. |NumPyMinVersion| replace:: 1.17.3
 .. |SciPyMinVersion| replace:: 1.3.2
-.. |ScikitLearnMinVersion| replace:: 1.1.3
+.. |ScikitLearnMinVersion| replace:: 1.0.2
 .. |MatplotlibMinVersion| replace:: 3.1.2
 .. |PandasMinVersion| replace:: 1.0.5
 .. |TensorflowMinVersion| replace:: 2.4.3

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -158,7 +158,7 @@ jobs:
       # Linux environment to test the latest available dependencies and MKL.
       pylatest_pip_openblas_pandas:
         DISTRIB: 'conda-pip-latest'
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '*'
         TEST_DOCS: 'true'
         TEST_DOCSTRINGS: 'true'
         CHECK_WARNINGS: 'true'
@@ -185,7 +185,7 @@ jobs:
         TENSORFLOW_VERSION: 'min'
         TEST_DOCS: 'true'
         TEST_DOCSTRINGS: 'false'  # it is going to fail because of scikit-learn inheritance
-        CHECK_WARNINGS: 'true'
+        CHECK_WARNINGS: 'false'  # in case the older version raise some FutureWarnings
       pylatest_pip_keras:
         DISTRIB: 'conda-pip-latest-keras'
         CONDA_CHANNEL: 'conda-forge'
@@ -209,7 +209,7 @@ jobs:
         KERAS_VERSION: 'min'
         TEST_DOCS: 'true'
         TEST_DOCSTRINGS: 'false'  # it is going to fail because of scikit-learn inheritance
-        CHECK_WARNINGS: 'true'
+        CHECK_WARNINGS: 'false'  # in case the older version raise some FutureWarnings
 
 # Currently runs on Python 3.8 while only Python 3.7 available
 # - template: build_tools/azure/posix-docker.yml

diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml
@@ -30,6 +30,7 @@ jobs:
     THREADPOOLCTL_VERSION: 'latest'
     COVERAGE: 'false'
     TEST_DOCSTRINGS: 'false'
+    CHECK_WARNINGS: 'false'
     BLAS: 'openblas'
     # Set in azure-pipelines.yml
     DISTRIB: ''

diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml
@@ -36,6 +36,7 @@ jobs:
     COVERAGE: 'true'
     TEST_DOCS: 'false'
     TEST_DOCSTRINGS: 'false'
+    CHECK_WARNINGS: 'false'
     SHOW_SHORT_SUMMARY: 'false'
   strategy:
     matrix:

diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh
@@ -34,7 +34,7 @@ if [[ "$COVERAGE" == "true" ]]; then
     TEST_CMD="$TEST_CMD --cov-config='$COVERAGE_PROCESS_START' --cov imblearn --cov-report="
 fi
 
-if [[ -n "$CHECK_WARNINGS" ]]; then
+if [[ "$CHECK_WARNINGS" == "true" ]]; then
     # numpy's 1.19.0's tostring() deprecation is ignored until scipy and joblib removes its usage
     TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Wignore:tostring:DeprecationWarning"
 

diff --git a/build_tools/azure/windows.yml b/build_tools/azure/windows.yml
@@ -21,6 +21,7 @@ jobs:
     PYTEST_XDIST_VERSION: 'latest'
     TEST_DIR: '$(Agent.WorkFolder)/tmp_folder'
     CPU_COUNT: '2'
+    CHECK_WARNINGS: 'false'
   strategy:
     matrix:
       ${{ insert }}: ${{ parameters.matrix }}

diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -38,7 +38,7 @@ data set, this classifier will favor the majority classes::
   >>> bc.fit(X_train, y_train) #doctest:
   BaggingClassifier(...)
   >>> y_pred = bc.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
+  >>> balanced_accuracy_score(y_test, y_pred)
   0.77...
 
 In :class:`BalancedBaggingClassifier`, each bootstrap sample will be further
@@ -54,10 +54,10 @@ sampling is controlled by the parameter `sampler` or the two parameters
   ...                                 sampling_strategy='auto',
   ...                                 replacement=False,
   ...                                 random_state=0)
-  >>> bbc.fit(X_train, y_train) # doctest:
+  >>> bbc.fit(X_train, y_train)
   BalancedBaggingClassifier(...)
   >>> y_pred = bbc.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
+  >>> balanced_accuracy_score(y_test, y_pred)
   0.8...
 
 Changing the `sampler` will give rise to different known implementation
@@ -78,10 +78,10 @@ each tree of the forest will be provided a balanced bootstrap sample
 
   >>> from imblearn.ensemble import BalancedRandomForestClassifier
   >>> brf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
-  >>> brf.fit(X_train, y_train) # doctest:
+  >>> brf.fit(X_train, y_train)
   BalancedRandomForestClassifier(...)
   >>> y_pred = brf.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
+  >>> balanced_accuracy_score(y_test, y_pred)
   0.8...
 
 .. _boosting:
@@ -97,10 +97,10 @@ a boosting iteration :cite:`seiffert2009rusboost`::
   >>> from imblearn.ensemble import RUSBoostClassifier
   >>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
   ...                               random_state=0)
-  >>> rusboost.fit(X_train, y_train)  # doctest:
+  >>> rusboost.fit(X_train, y_train)
   RUSBoostClassifier(...)
   >>> y_pred = rusboost.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
+  >>> balanced_accuracy_score(y_test, y_pred)
   0...
 
 A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as
@@ -111,10 +111,10 @@ the :class:`BalancedBaggingClassifier` API, one can construct the ensemble as::
 
   >>> from imblearn.ensemble import EasyEnsembleClassifier
   >>> eec = EasyEnsembleClassifier(random_state=0)
-  >>> eec.fit(X_train, y_train) # doctest:
+  >>> eec.fit(X_train, y_train)
   EasyEnsembleClassifier(...)
   >>> y_pred = eec.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
+  >>> balanced_accuracy_score(y_test, y_pred)
   0.6...
 
 .. topic:: Examples

diff --git a/imblearn/_min_dependencies.py b/imblearn/_min_dependencies.py
@@ -4,7 +4,7 @@
 NUMPY_MIN_VERSION = "1.17.3"
 SCIPY_MIN_VERSION = "1.3.2"
 PANDAS_MIN_VERSION = "1.0.5"
-SKLEARN_MIN_VERSION = "1.1.3"
+SKLEARN_MIN_VERSION = "1.0.2"
 TENSORFLOW_MIN_VERSION = "2.4.3"
 KERAS_MIN_VERSION = "2.4.3"
 JOBLIB_MIN_VERSION = "1.1.1"

diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py
@@ -89,7 +89,7 @@ class SMOTEENN(BaseSampler):
 
     >>> from collections import Counter
     >>> from sklearn.datasets import make_classification
-    >>> from imblearn.combine import SMOTEENN # doctest:
+    >>> from imblearn.combine import SMOTEENN
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py
@@ -87,8 +87,7 @@ class SMOTETomek(BaseSampler):
 
     >>> from collections import Counter
     >>> from sklearn.datasets import make_classification
-    >>> from imblearn.combine import \
-SMOTETomek # doctest:
+    >>> from imblearn.combine import SMOTETomek
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
@@ -9,16 +9,23 @@
 import warnings
 
 import numpy as np
+from joblib import Parallel
 from sklearn.base import clone
 from sklearn.ensemble import BaggingClassifier
+from sklearn.ensemble._bagging import _parallel_decision_function
+from sklearn.ensemble._base import _partition_estimators
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils.fixes import delayed
+from sklearn.utils.validation import check_is_fitted
 
 from ..pipeline import Pipeline
 from ..under_sampling import RandomUnderSampler
 from ..under_sampling.base import BaseUnderSampler
 from ..utils import Substitution, check_sampling_strategy, check_target_type
+from ..utils._available_if import available_if
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._validation import _deprecate_positional_args
+from ._common import _estimator_has
 
 
 @Substitution(
@@ -229,8 +236,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
     >>> from sklearn.datasets import make_classification
     >>> from sklearn.model_selection import train_test_split
     >>> from sklearn.metrics import confusion_matrix
-    >>> from imblearn.ensemble import \
-BalancedBaggingClassifier # doctest:
+    >>> from imblearn.ensemble import BalancedBaggingClassifier
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
@@ -239,7 +245,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
     ...                                                     random_state=0)
     >>> bbc = BalancedBaggingClassifier(random_state=42)
-    >>> bbc.fit(X_train, y_train) # doctest:
+    >>> bbc.fit(X_train, y_train)
     BalancedBaggingClassifier(...)
     >>> y_pred = bbc.predict(X_test)
     >>> print(confusion_matrix(y_test, y_pred))
@@ -408,6 +414,53 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         # None.
         return super()._fit(X, y, self.max_samples, sample_weight=None)
 
+    # TODO: remove when minimum supported version of scikit-learn is 1.1
+    @available_if(_estimator_has("decision_function"))
+    def decision_function(self, X):
+        """Average of the decision functions of the base classifiers.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only if
+            they are supported by the base estimator.
+
+        Returns
+        -------
+        score : ndarray of shape (n_samples, k)
+            The decision function of the input samples. The columns correspond
+            to the classes in sorted order, as they appear in the attribute
+            ``classes_``. Regression and binary classification are special
+            cases with ``k == 1``, otherwise ``k==n_classes``.
+        """
+        check_is_fitted(self)
+
+        # Check data
+        X = self._validate_data(
+            X,
+            accept_sparse=["csr", "csc"],
+            dtype=None,
+            force_all_finite=False,
+            reset=False,
+        )
+
+        # Parallel loop
+        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
+
+        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
+            delayed(_parallel_decision_function)(
+                self.estimators_[starts[i] : starts[i + 1]],
+                self.estimators_features_[starts[i] : starts[i + 1]],
+                X,
+            )
+            for i in range(n_jobs)
+        )
+
+        # Reduce
+        decisions = sum(all_decisions) / self.n_estimators
+
+        return decisions
+
     def _more_tags(self):
         tags = super()._more_tags()
         tags_key = "_xfail_checks"

diff --git a/imblearn/ensemble/_common.py b/imblearn/ensemble/_common.py
@@ -0,0 +1,15 @@
+def _estimator_has(attr):
+    """Check if we can delegate a method to the underlying estimator.
+    First, we check the first fitted estimator if available, otherwise we
+    check the estimator attribute.
+    """
+
+    def check(self):
+        if hasattr(self, "estimators_"):
+            return hasattr(self.estimators_[0], attr)
+        elif self.estimator is not None:
+            return hasattr(self.estimator, attr)
+        else:  # TODO(1.4): Remove when the base_estimator deprecation cycle ends
+            return hasattr(self.base_estimator, attr)
+
+    return check
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
@@ -9,15 +9,22 @@
 import warnings
 
 import numpy as np
+from joblib import Parallel
 from sklearn.base import clone
 from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
+from sklearn.ensemble._bagging import _parallel_decision_function
+from sklearn.ensemble._base import _partition_estimators
+from sklearn.utils.fixes import delayed
+from sklearn.utils.validation import check_is_fitted
 
 from ..pipeline import Pipeline
 from ..under_sampling import RandomUnderSampler
 from ..under_sampling.base import BaseUnderSampler
 from ..utils import Substitution, check_sampling_strategy, check_target_type
+from ..utils._available_if import available_if
 from ..utils._docstring import _n_jobs_docstring, _random_state_docstring
 from ..utils._validation import _deprecate_positional_args
+from ._common import _estimator_has
 
 MAX_INT = np.iinfo(np.int32).max
 
@@ -31,7 +38,7 @@ class EasyEnsembleClassifier(BaggingClassifier):
     """Bag of balanced boosted learners also known as EasyEnsemble.
 
     This algorithm is known as EasyEnsemble [1]_. The classifier is an
-    ensemble of AdaBoost learners trained on different balanced boostrap
+    ensemble of AdaBoost learners trained on different balanced bootstrap
     samples. The balancing is achieved by random under-sampling.
 
     Read more in the :ref:`User Guide <boosting>`.
@@ -154,8 +161,7 @@ class EasyEnsembleClassifier(BaggingClassifier):
     >>> from sklearn.datasets import make_classification
     >>> from sklearn.model_selection import train_test_split
     >>> from sklearn.metrics import confusion_matrix
-    >>> from imblearn.ensemble import \
-EasyEnsembleClassifier # doctest:
+    >>> from imblearn.ensemble import EasyEnsembleClassifier
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
@@ -164,7 +170,7 @@ class EasyEnsembleClassifier(BaggingClassifier):
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
     ...                                                     random_state=0)
     >>> eec = EasyEnsembleClassifier(random_state=42)
-    >>> eec.fit(X_train, y_train) # doctest:
+    >>> eec.fit(X_train, y_train)
     EasyEnsembleClassifier(...)
     >>> y_pred = eec.predict(X_test)
     >>> print(confusion_matrix(y_test, y_pred))
@@ -314,3 +320,50 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         # RandomUnderSampler is not supporting sample_weight. We need to pass
         # None.
         return super()._fit(X, y, self.max_samples, sample_weight=None)
+
+    # TODO: remove when minimum supported version of scikit-learn is 1.1
+    @available_if(_estimator_has("decision_function"))
+    def decision_function(self, X):
+        """Average of the decision functions of the base classifiers.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only if
+            they are supported by the base estimator.
+
+        Returns
+        -------
+        score : ndarray of shape (n_samples, k)
+            The decision function of the input samples. The columns correspond
+            to the classes in sorted order, as they appear in the attribute
+            ``classes_``. Regression and binary classification are special
+            cases with ``k == 1``, otherwise ``k==n_classes``.
+        """
+        check_is_fitted(self)
+
+        # Check data
+        X = self._validate_data(
+            X,
+            accept_sparse=["csr", "csc"],
+            dtype=None,
+            force_all_finite=False,
+            reset=False,
+        )
+
+        # Parallel loop
+        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)
+
+        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
+            delayed(_parallel_decision_function)(
+                self.estimators_[starts[i] : starts[i + 1]],
+                self.estimators_features_[starts[i] : starts[i + 1]],
+                X,
+            )
+            for i in range(n_jobs)
+        )
+
+        # Reduce
+        decisions = sum(all_decisions) / self.n_estimators
+
+        return decisions