scikit-learn · thomasjpfan · Mar 22, 2023 · Mar 22, 2023 · Mar 22, 2023
diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
@@ -15,11 +15,6 @@
 from ..base import is_classifier, is_regressor
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
-from ..tree import (
-    DecisionTreeRegressor,
-    BaseDecisionTree,
-    DecisionTreeClassifier,
-)
 from ..utils import Bunch, _print_elapsed_time, deprecated
 from ..utils import check_random_state
 from ..utils.metaestimators import _BaseComposition
@@ -192,16 +187,6 @@ def _make_estimator(self, append=True, random_state=None):
         estimator = clone(self.estimator_)
         estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})
 
-        # TODO(1.3): Remove
-        # max_features = 'auto' would cause warnings in every call to
-        # Tree.fit(..)
-        if isinstance(estimator, BaseDecisionTree):
-            if getattr(estimator, "max_features", None) == "auto":
-                if isinstance(estimator, DecisionTreeClassifier):
-                    estimator.set_params(max_features="sqrt")
-                elif isinstance(estimator, DecisionTreeRegressor):
-                    estimator.set_params(max_features=1.0)
-
         if random_state is not None:
             _set_random_states(estimator, random_state)
 

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
@@ -408,28 +408,6 @@ def fit(self, X, y, sample_weight=None):
             n_samples_bootstrap = None
 
         self._validate_estimator()
-        if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)):
-            # TODO(1.3): Remove "auto"
-            if self.max_features == "auto":
-                warn(
-                    "`max_features='auto'` has been deprecated in 1.1 "
-                    "and will be removed in 1.3. To keep the past behaviour, "
-                    "explicitly set `max_features=1.0` or remove this "
-                    "parameter as it is also the default value for "
-                    "RandomForestRegressors and ExtraTreesRegressors.",
-                    FutureWarning,
-                )
-        elif isinstance(self, (RandomForestClassifier, ExtraTreesClassifier)):
-            # TODO(1.3): Remove "auto"
-            if self.max_features == "auto":
-                warn(
-                    "`max_features='auto'` has been deprecated in 1.1 "
-                    "and will be removed in 1.3. To keep the past behaviour, "
-                    "explicitly set `max_features='sqrt'` or remove this "
-                    "parameter as it is also the default value for "
-                    "RandomForestClassifiers and ExtraTreesClassifiers.",
-                    FutureWarning,
-                )
 
         if not self.bootstrap and self.oob_score:
             raise ValueError("Out of bag estimation only available if bootstrap=True")
@@ -1172,18 +1150,13 @@ class RandomForestClassifier(ForestClassifier):
         - If float, then `max_features` is a fraction and
           `max(1, int(max_features * n_features_in_))` features are considered at each
           split.
-        - If "auto", then `max_features=sqrt(n_features)`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
 
         .. versionchanged:: 1.1
             The default of `max_features` changed from `"auto"` to `"sqrt"`.
 
-        .. deprecated:: 1.1
-            The `"auto"` option was deprecated in 1.1 and will be removed
-            in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -1547,7 +1520,6 @@ class RandomForestRegressor(ForestRegressor):
         - If float, then `max_features` is a fraction and
           `max(1, int(max_features * n_features_in_))` features are considered at each
           split.
-        - If "auto", then `max_features=n_features`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None or 1.0, then `max_features=n_features`.
@@ -1559,10 +1531,6 @@ class RandomForestRegressor(ForestRegressor):
         .. versionchanged:: 1.1
             The default of `max_features` changed from `"auto"` to 1.0.
 
-        .. deprecated:: 1.1
-            The `"auto"` option was deprecated in 1.1 and will be removed
-            in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -1716,7 +1684,7 @@ class RandomForestRegressor(ForestRegressor):
     search of the best split. To obtain a deterministic behaviour during
     fitting, ``random_state`` has to be fixed.
 
-    The default value ``max_features="auto"`` uses ``n_features``
+    The default value ``max_features=1.0`` uses ``n_features``
     rather than ``n_features / 3``. The latter was originally suggested in
     [1], whereas the former was more recently justified empirically in [2].
 
@@ -1871,18 +1839,13 @@ class ExtraTreesClassifier(ForestClassifier):
         - If float, then `max_features` is a fraction and
           `max(1, int(max_features * n_features_in_))` features are considered at each
           split.
-        - If "auto", then `max_features=sqrt(n_features)`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
 
         .. versionchanged:: 1.1
             The default of `max_features` changed from `"auto"` to `"sqrt"`.
 
-        .. deprecated:: 1.1
-            The `"auto"` option was deprecated in 1.1 and will be removed
-            in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -2237,7 +2200,6 @@ class ExtraTreesRegressor(ForestRegressor):
         - If float, then `max_features` is a fraction and
           `max(1, int(max_features * n_features_in_))` features are considered at each
           split.
-        - If "auto", then `max_features=n_features`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None or 1.0, then `max_features=n_features`.
@@ -2249,10 +2211,6 @@ class ExtraTreesRegressor(ForestRegressor):
         .. versionchanged:: 1.1
             The default of `max_features` changed from `"auto"` to 1.0.
 
-        .. deprecated:: 1.1
-            The `"auto"` option was deprecated in 1.1 and will be removed
-            in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.

diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
@@ -965,13 +965,12 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
 
-    max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None
+    max_features : {'sqrt', 'log2'}, int or float, default=None
         The number of features to consider when looking for the best split:
 
         - If int, values must be in the range `[1, inf)`.
         - If float, values must be in the range `(0.0, 1.0]` and the features
           considered at each split will be `max(1, int(max_features * n_features_in_))`.
-        - If 'auto', then `max_features=sqrt(n_features)`.
         - If 'sqrt', then `max_features=sqrt(n_features)`.
         - If 'log2', then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
@@ -1531,13 +1530,12 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
 
-    max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None
+    max_features : {'sqrt', 'log2'}, int or float, default=None
         The number of features to consider when looking for the best split:
 
         - If int, values must be in the range `[1, inf)`.
         - If float, values must be in the range `(0.0, 1.0]` and the features
           considered at each split will be `max(1, int(max_features * n_features_in_))`.
-        - If "auto", then `max_features=n_features`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.

diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -1700,35 +1700,6 @@ def test_little_tree_with_small_max_samples(ForestClass):
     assert tree1.node_count > tree2.node_count, msg
 
 
-# TODO: Remove in v1.3
-@pytest.mark.parametrize(
-    "Estimator",
-    [
-        ExtraTreesClassifier,
-        ExtraTreesRegressor,
-        RandomForestClassifier,
-        RandomForestRegressor,
-    ],
-)
-def test_max_features_deprecation(Estimator):
-    """Check warning raised for max_features="auto" deprecation."""
-    X = np.array([[1, 2], [3, 4]])
-    y = np.array([1, 0])
-    est = Estimator(max_features="auto")
-
-    err_msg = (
-        r"`max_features='auto'` has been deprecated in 1.1 "
-        r"and will be removed in 1.3. To keep the past behaviour, "
-        r"explicitly set `max_features=(1.0|'sqrt')` or remove this "
-        r"parameter as it is also the default value for RandomForest"
-        r"(Regressors|Classifiers) and ExtraTrees(Regressors|"
-        r"Classifiers)\."
-    )
-
-    with pytest.warns(FutureWarning, match=err_msg):
-        est.fit(X, y)
-
-
 @pytest.mark.parametrize("Forest", FOREST_REGRESSORS)
 def test_mse_criterion_object_segfault_smoke_test(Forest):
     # This is a smoke test to ensure that passing a mutable criterion

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -345,21 +345,19 @@ def test_feature_importance_regression(
     assert set(sorted_features[1:4]) == {"Longitude", "AveOccup", "Latitude"}
 
 
-# TODO(1.3): Remove warning filter
-@pytest.mark.filterwarnings("ignore:`max_features='auto'` has been deprecated in 1.1")
-def test_max_feature_auto():
+def test_max_features():
     # Test if max features is set properly for floats and str.
     X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
     _, n_features = X.shape
 
     X_train = X[:2000]
     y_train = y[:2000]
 
-    gbrt = GradientBoostingClassifier(n_estimators=1, max_features="auto")
+    gbrt = GradientBoostingClassifier(n_estimators=1, max_features=None)
     gbrt.fit(X_train, y_train)
-    assert gbrt.max_features_ == int(np.sqrt(n_features))
+    assert gbrt.max_features_ == n_features
 
-    gbrt = GradientBoostingRegressor(n_estimators=1, max_features="auto")
+    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=None)
     gbrt.fit(X_train, y_train)
     assert gbrt.max_features_ == n_features
 

diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
@@ -110,7 +110,7 @@ class BaseDecisionTree(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):
         "max_features": [
             Interval(Integral, 1, None, closed="left"),
             Interval(RealNotInt, 0.0, 1.0, closed="right"),
-            StrOptions({"auto", "sqrt", "log2"}, deprecated={"auto"}),
+            StrOptions({"sqrt", "log2"}),
             None,
         ],
         "random_state": ["random_state"],
@@ -653,15 +653,10 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree):
             - If float, then `max_features` is a fraction and
               `max(1, int(max_features * n_features_in_))` features are considered at
               each split.
-            - If "auto", then `max_features=sqrt(n_features)`.
             - If "sqrt", then `max_features=sqrt(n_features)`.
             - If "log2", then `max_features=log2(n_features)`.
             - If None, then `max_features=n_features`.
 
-            .. deprecated:: 1.1
-                The `"auto"` option was deprecated in 1.1 and will be removed
-                in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -1047,15 +1042,10 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):
         - If float, then `max_features` is a fraction and
           `max(1, int(max_features * n_features_in_))` features are considered at each
           split.
-        - If "auto", then `max_features=n_features`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
 
-        .. deprecated:: 1.1
-            The `"auto"` option was deprecated in 1.1 and will be removed
-            in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -1350,18 +1340,13 @@ class ExtraTreeClassifier(DecisionTreeClassifier):
             - If float, then `max_features` is a fraction and
               `max(1, int(max_features * n_features_in_))` features are considered at
               each split.
-            - If "auto", then `max_features=sqrt(n_features)`.
             - If "sqrt", then `max_features=sqrt(n_features)`.
             - If "log2", then `max_features=log2(n_features)`.
             - If None, then `max_features=n_features`.
 
             .. versionchanged:: 1.1
                 The default of `max_features` changed from `"auto"` to `"sqrt"`.
 
-            .. deprecated:: 1.1
-                The `"auto"` option was deprecated in 1.1 and will be removed
-                in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -1620,18 +1605,13 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
         - If float, then `max_features` is a fraction and
           `max(1, int(max_features * n_features_in_))` features are considered at each
           split.
-        - If "auto", then `max_features=n_features`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
 
         .. versionchanged:: 1.1
             The default of `max_features` changed from `"auto"` to `1.0`.
 
-        .. deprecated:: 1.1
-            The `"auto"` option was deprecated in 1.1 and will be removed
-            in 1.3.
-
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.

diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
@@ -503,20 +503,8 @@ def test_importances_gini_equal_squared_error():
     assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
 
 
-# TODO(1.3): Remove warning filter
-@pytest.mark.filterwarnings("ignore:`max_features='auto'` has been deprecated in 1.1")
 def test_max_features():
     # Check max_features.
-    for name, TreeRegressor in REG_TREES.items():
-        reg = TreeRegressor(max_features="auto")
-        reg.fit(diabetes.data, diabetes.target)
-        assert reg.max_features_ == diabetes.data.shape[1]
-
-    for name, TreeClassifier in CLF_TREES.items():
-        clf = TreeClassifier(max_features="auto")
-        clf.fit(iris.data, iris.target)
-        assert clf.max_features_ == 2
-
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_features="sqrt")
         est.fit(iris.data, iris.target)
@@ -2369,27 +2357,6 @@ def test_check_node_ndarray():
         _check_node_ndarray(problematic_node_ndarray, expected_dtype=expected_dtype)
 
 
-# TODO(1.3): Remove
-def test_max_features_auto_deprecated():
-    for Tree in CLF_TREES.values():
-        tree = Tree(max_features="auto")
-        msg = (
-            "`max_features='auto'` has been deprecated in 1.1 and will be removed in"
-            " 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'`."
-        )
-        with pytest.warns(FutureWarning, match=msg):
-            tree.fit(X, y)
-
-    for Tree in REG_TREES.values():
-        tree = Tree(max_features="auto")
-        msg = (
-            "`max_features='auto'` has been deprecated in 1.1 and will be removed in"
-            " 1.3. To keep the past behaviour, explicitly set `max_features=1.0'`."
-        )
-        with pytest.warns(FutureWarning, match=msg):
-            tree.fit(X, y)
-
-
 @pytest.mark.parametrize(
     "Splitter", chain(DENSE_SPLITTERS.values(), SPARSE_SPLITTERS.values())
 )