scikit-learn-contrib · glemaitre · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022
diff --git a/README.rst b/README.rst
@@ -30,7 +30,7 @@
 .. |PythonMinVersion| replace:: 3.8
 .. |NumPyMinVersion| replace:: 1.17.3
 .. |SciPyMinVersion| replace:: 1.3.2
-.. |ScikitLearnMinVersion| replace:: 1.1.0
+.. |ScikitLearnMinVersion| replace:: 1.1.3
 .. |MatplotlibMinVersion| replace:: 3.1.2
 .. |PandasMinVersion| replace:: 1.0.5
 .. |TensorflowMinVersion| replace:: 2.4.3

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -51,7 +51,7 @@ jobs:
         black --check --diff .
       displayName: Run black
     - bash: |
-        ./build_tools/circle/linting.sh
+        ./build_tools/azure/linting.sh
       displayName: Run linting
     - bash: |
         mypy imblearn/
@@ -112,7 +112,7 @@ jobs:
         ne(variables['Build.Reason'], 'Schedule')
       )
     matrix:
-      py37_conda_forge_openblas_ubuntu_1804:
+      py38_conda_forge_openblas_ubuntu_1804:
         DISTRIB: 'conda'
         CONDA_CHANNEL: 'conda-forge'
         PYTHON_VERSION: '3.8'
@@ -141,12 +141,12 @@ jobs:
         THREADPOOLCTL_VERSION: 'min'
         COVERAGE: 'false'
       # Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB
-      py37_conda_defaults_openblas:
+      py38_conda_defaults_openblas:
         DISTRIB: 'conda'
         CONDA_CHANNEL: 'conda-forge'
         PYTHON_VERSION: '3.8'
         BLAS: 'openblas'
-        NUMPY_VERSION: '1.19.5'  # we cannot get an older version of the dependencies resolution
+        NUMPY_VERSION: '1.21.0'  # we cannot get an older version of the dependencies resolution
         SCIPY_VERSION: 'min'
         SKLEARN_VERSION: 'min'
         MATPLOTLIB_VERSION: 'none'
@@ -275,6 +275,3 @@ jobs:
         PYTHON_ARCH: '64'
         PYTEST_VERSION: '*'
         COVERAGE: 'true'
-      py38_pip_openblas_32bit:
-        PYTHON_VERSION: '3.8'
-        PYTHON_ARCH: '32'
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
@@ -67,7 +67,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
     make_conda "python=$PYTHON_VERSION"
     python -m pip install -U pip
 
-    python -m pip install scikit-learn pandas matplotlib
+    python -m pip install pandas matplotlib
+    python -m pip install --pre scikit-learn
 
 elif [[ "$DISTRIB" == "conda-pip-latest-tensorflow" ]]; then
     make_conda "python=$PYTHON_VERSION"

diff --git a/build_tools/azure/linting.sh b/build_tools/azure/linting.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -e
+# pipefail is necessary to propagate exit codes
+set -o pipefail
+
+flake8 --show-source .
+echo -e "No problem detected by flake8\n"
+
+# For docstrings and warnings of deprecated attributes to be rendered
+# properly, the property decorator must come before the deprecated decorator
+# (else they are treated as functions)
+
+# do not error when grep -B1 "@property" finds nothing
+set +e
+bad_deprecation_property_order=`git grep -A 10 "@property"  -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"`
+
+if [ ! -z "$bad_deprecation_property_order" ]
+then
+    echo "property decorator should come before deprecated decorator"
+    echo "found the following occurrencies:"
+    echo $bad_deprecation_property_order
+    exit 1
+fi
+
+# Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE
+
+doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")"
+
+if [ ! -z "$doctest_directive" ]
+then
+    echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:"
+    echo "$doctest_directive"
+    exit 1
+fi
+
+joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"
+
+if [ ! -z "$joblib_import" ]; then
+    echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
+    echo "$joblib_import"
+    exit 1
+fi
diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -35,10 +35,10 @@ data set, this classifier will favor the majority classes::
   >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
   >>> bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
   ...                        random_state=0)
-  >>> bc.fit(X_train, y_train) #doctest: +ELLIPSIS
+  >>> bc.fit(X_train, y_train) #doctest:
   BaggingClassifier(...)
   >>> y_pred = bc.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
+  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
   0.77...
 
 In :class:`BalancedBaggingClassifier`, each bootstrap sample will be further
@@ -54,10 +54,10 @@ sampling is controlled by the parameter `sampler` or the two parameters
   ...                                 sampling_strategy='auto',
   ...                                 replacement=False,
   ...                                 random_state=0)
-  >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
+  >>> bbc.fit(X_train, y_train) # doctest:
   BalancedBaggingClassifier(...)
   >>> y_pred = bbc.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
+  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
   0.8...
 
 Changing the `sampler` will give rise to different known implementation
@@ -78,10 +78,10 @@ each tree of the forest will be provided a balanced bootstrap sample
 
   >>> from imblearn.ensemble import BalancedRandomForestClassifier
   >>> brf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
-  >>> brf.fit(X_train, y_train) # doctest: +ELLIPSIS
+  >>> brf.fit(X_train, y_train) # doctest:
   BalancedRandomForestClassifier(...)
   >>> y_pred = brf.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
+  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
   0.8...
 
 .. _boosting:
@@ -97,10 +97,10 @@ a boosting iteration :cite:`seiffert2009rusboost`::
   >>> from imblearn.ensemble import RUSBoostClassifier
   >>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
   ...                               random_state=0)
-  >>> rusboost.fit(X_train, y_train)  # doctest: +ELLIPSIS
+  >>> rusboost.fit(X_train, y_train)  # doctest:
   RUSBoostClassifier(...)
   >>> y_pred = rusboost.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
+  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
   0...
 
 A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as
@@ -111,10 +111,10 @@ the :class:`BalancedBaggingClassifier` API, one can construct the ensemble as::
 
   >>> from imblearn.ensemble import EasyEnsembleClassifier
   >>> eec = EasyEnsembleClassifier(random_state=0)
-  >>> eec.fit(X_train, y_train) # doctest: +ELLIPSIS
+  >>> eec.fit(X_train, y_train) # doctest:
   EasyEnsembleClassifier(...)
   >>> y_pred = eec.predict(X_test)
-  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
+  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
   0.6...
 
 .. topic:: Examples

diff --git a/doc/over_sampling.rst b/doc/over_sampling.rst
@@ -40,7 +40,7 @@ a classifier::
 
   >>> from sklearn.svm import LinearSVC
   >>> clf = LinearSVC()
-  >>> clf.fit(X_resampled, y_resampled) # doctest : +ELLIPSIS
+  >>> clf.fit(X_resampled, y_resampled)
   LinearSVC(...)
 
 In the figure below, we compare the decision functions of a classifier trained

diff --git a/imblearn/_min_dependencies.py b/imblearn/_min_dependencies.py
@@ -4,10 +4,10 @@
 NUMPY_MIN_VERSION = "1.17.3"
 SCIPY_MIN_VERSION = "1.3.2"
 PANDAS_MIN_VERSION = "1.0.5"
-SKLEARN_MIN_VERSION = "1.1.0"
+SKLEARN_MIN_VERSION = "1.1.3"
 TENSORFLOW_MIN_VERSION = "2.4.3"
 KERAS_MIN_VERSION = "2.4.3"
-JOBLIB_MIN_VERSION = "1.0.0"
+JOBLIB_MIN_VERSION = "1.1.1"
 THREADPOOLCTL_MIN_VERSION = "2.0.0"
 PYTEST_MIN_VERSION = "5.0.1"
 

diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py
@@ -91,7 +91,7 @@ class SMOTEENN(BaseSampler):
 
     >>> from collections import Counter
     >>> from sklearn.datasets import make_classification
-    >>> from imblearn.combine import SMOTEENN # doctest: +NORMALIZE_WHITESPACE
+    >>> from imblearn.combine import SMOTEENN # doctest:
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py
@@ -90,7 +90,7 @@ class SMOTETomek(BaseSampler):
     >>> from collections import Counter
     >>> from sklearn.datasets import make_classification
     >>> from imblearn.combine import \
-SMOTETomek # doctest: +NORMALIZE_WHITESPACE
+SMOTETomek # doctest:
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
@@ -4,7 +4,9 @@
 #          Christos Aridas
 # License: MIT
 
+import inspect
 import numbers
+import warnings
 
 import numpy as np
 
@@ -41,10 +43,12 @@ class BalancedBaggingClassifier(BaggingClassifier):
 
     Parameters
     ----------
-    base_estimator : estimator object, default=None
+    estimator : estimator object, default=None
         The base estimator to fit on random subsets of the dataset.
         If None, then the base estimator is a decision tree.
 
+        .. versionadded:: 0.10
+
     n_estimators : int, default=10
         The number of base estimators in the ensemble.
 
@@ -100,18 +104,37 @@ class BalancedBaggingClassifier(BaggingClassifier):
 
         .. versionadded:: 0.8
 
+    base_estimator : estimator object, default=None
+        The base estimator to fit on random subsets of the dataset.
+        If None, then the base estimator is a decision tree.
+
+        .. deprecated:: 0.10
+           `base_estimator` was renamed to `estimator` in version 0.10 and
+           will be removed in 0.12.
+
     Attributes
     ----------
+    estimator_ : estimator
+        The base estimator from which the ensemble is grown.
+
+        .. versionadded:: 0.10
+
     base_estimator_ : estimator
         The base estimator from which the ensemble is grown.
 
+        .. deprecated:: 1.2
+           `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be
+           removed in 1.4. Use `estimator_` instead. When the minimum version
+           of `scikit-learn` supported by `imbalanced-learn` will reach 1.4,
+           this attribute will be removed.
+
     n_features_ : int
         The number of features when `fit` is performed.
 
         .. deprecated:: 1.0
            `n_features_` is deprecated in `scikit-learn` 1.0 and will be removed
-           in version 1.2. Depending of the version of `scikit-learn` installed,
-           you will get be warned or not.
+           in version 1.2. When the minimum version of `scikit-learn` supported
+           by `imbalanced-learn` will reach 1.2, this attribute will be removed.
 
     estimators_ : list of estimators
         The collection of fitted base estimators.
@@ -209,7 +232,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
     >>> from sklearn.model_selection import train_test_split
     >>> from sklearn.metrics import confusion_matrix
     >>> from imblearn.ensemble import \
-BalancedBaggingClassifier # doctest: +NORMALIZE_WHITESPACE
+BalancedBaggingClassifier # doctest:
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
@@ -218,7 +241,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
     ...                                                     random_state=0)
     >>> bbc = BalancedBaggingClassifier(random_state=42)
-    >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
+    >>> bbc.fit(X_train, y_train) # doctest:
     BalancedBaggingClassifier(...)
     >>> y_pred = bbc.predict(X_test)
     >>> print(confusion_matrix(y_test, y_pred))
@@ -229,7 +252,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
     @_deprecate_positional_args
     def __init__(
         self,
-        base_estimator=None,
+        estimator=None,
         n_estimators=10,
         *,
         max_samples=1.0,
@@ -244,10 +267,18 @@ def __init__(
         random_state=None,
         verbose=0,
         sampler=None,
+        base_estimator="deprecated",
     ):
+        # TODO: remove when supporting scikit-learn>=1.2
+        bagging_classifier_signature = inspect.signature(super().__init__)
+        estimator_params = {"base_estimator": base_estimator}
+        if "estimator" in bagging_classifier_signature.parameters:
+            estimator_params["estimator"] = estimator
+        else:
+            self.estimator = estimator
 
         super().__init__(
-            base_estimator,
+            **estimator_params,
             n_estimators=n_estimators,
             max_samples=max_samples,
             max_features=max_features,
@@ -294,20 +325,54 @@ def _validate_estimator(self, default=DecisionTreeClassifier()):
                 f"n_estimators must be greater than zero, " f"got {self.n_estimators}."
             )
 
-        if self.base_estimator is not None:
+        if self.estimator is not None and (
+            self.base_estimator not in [None, "deprecated"]
+        ):
+            raise ValueError(
+                "Both `estimator` and `base_estimator` were set. Only set `estimator`."
+            )
+
+        if self.estimator is not None:
+            base_estimator = clone(self.estimator)
+        elif self.base_estimator not in [None, "deprecated"]:
+            warnings.warn(
+                "`base_estimator` was renamed to `estimator` in version 0.10 and "
+                "will be removed in 0.12.",
+                FutureWarning,
+            )
             base_estimator = clone(self.base_estimator)
         else:
             base_estimator = clone(default)
 
         if self.sampler_._sampling_type != "bypass":
             self.sampler_.set_params(sampling_strategy=self._sampling_strategy)
 
-        self.base_estimator_ = Pipeline(
-            [
-                ("sampler", self.sampler_),
-                ("classifier", base_estimator),
-            ]
+        self._estimator = Pipeline(
+            [("sampler", self.sampler_), ("classifier", base_estimator)]
+        )
+        try:
+            # scikit-learn < 1.2
+            self.base_estimator_ = self._estimator
+        except AttributeError:
+            pass
+
+    # TODO: remove when supporting scikit-learn>=1.4
+    @property
+    def estimator_(self):
+        """Estimator used to grow the ensemble."""
+        return self._estimator
+
+    # TODO: remove when supporting scikit-learn>=1.2
+    @property
+    def n_features_(self):
+        """Number of features when ``fit`` is performed."""
+        warnings.warn(
+            "`n_features_` was deprecated in scikit-learn 1.0. This attribute will "
+            "not be accessible when the minimum supported version of scikit-learn "
+            "is 1.2.",
+            FutureWarning,
         )
+        return self.n_features_in_
 
     def fit(self, X, y):
         """Build a Bagging ensemble of estimators from the training set (X, y).