diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py index 5ee48b5645a64..fa1041799afcf 100644 --- a/sklearn/ensemble/_base.py +++ b/sklearn/ensemble/_base.py @@ -15,11 +15,6 @@ from ..base import is_classifier, is_regressor from ..base import BaseEstimator from ..base import MetaEstimatorMixin -from ..tree import ( - DecisionTreeRegressor, - BaseDecisionTree, - DecisionTreeClassifier, -) from ..utils import Bunch, _print_elapsed_time, deprecated from ..utils import check_random_state from ..utils.metaestimators import _BaseComposition @@ -192,16 +187,6 @@ def _make_estimator(self, append=True, random_state=None): estimator = clone(self.estimator_) estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params}) - # TODO(1.3): Remove - # max_features = 'auto' would cause warnings in every call to - # Tree.fit(..) - if isinstance(estimator, BaseDecisionTree): - if getattr(estimator, "max_features", None) == "auto": - if isinstance(estimator, DecisionTreeClassifier): - estimator.set_params(max_features="sqrt") - elif isinstance(estimator, DecisionTreeRegressor): - estimator.set_params(max_features=1.0) - if random_state is not None: _set_random_states(estimator, random_state) diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index c33d833aeb95f..19203da4fce1f 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -408,28 +408,6 @@ def fit(self, X, y, sample_weight=None): n_samples_bootstrap = None self._validate_estimator() - if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)): - # TODO(1.3): Remove "auto" - if self.max_features == "auto": - warn( - "`max_features='auto'` has been deprecated in 1.1 " - "and will be removed in 1.3. To keep the past behaviour, " - "explicitly set `max_features=1.0` or remove this " - "parameter as it is also the default value for " - "RandomForestRegressors and ExtraTreesRegressors.", - FutureWarning, - ) - elif isinstance(self, (RandomForestClassifier, ExtraTreesClassifier)): - # TODO(1.3): Remove "auto" - if self.max_features == "auto": - warn( - "`max_features='auto'` has been deprecated in 1.1 " - "and will be removed in 1.3. To keep the past behaviour, " - "explicitly set `max_features='sqrt'` or remove this " - "parameter as it is also the default value for " - "RandomForestClassifiers and ExtraTreesClassifiers.", - FutureWarning, - ) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available if bootstrap=True") @@ -1172,7 +1150,6 @@ class RandomForestClassifier(ForestClassifier): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. @@ -1180,10 +1157,6 @@ class RandomForestClassifier(ForestClassifier): .. versionchanged:: 1.1 The default of `max_features` changed from `"auto"` to `"sqrt"`. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -1547,7 +1520,6 @@ class RandomForestRegressor(ForestRegressor): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None or 1.0, then `max_features=n_features`. @@ -1559,10 +1531,6 @@ class RandomForestRegressor(ForestRegressor): .. versionchanged:: 1.1 The default of `max_features` changed from `"auto"` to 1.0. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -1716,7 +1684,7 @@ class RandomForestRegressor(ForestRegressor): search of the best split. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed. - The default value ``max_features="auto"`` uses ``n_features`` + The default value ``max_features=1.0`` uses ``n_features`` rather than ``n_features / 3``. The latter was originally suggested in [1], whereas the former was more recently justified empirically in [2]. @@ -1871,7 +1839,6 @@ class ExtraTreesClassifier(ForestClassifier): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. @@ -1879,10 +1846,6 @@ class ExtraTreesClassifier(ForestClassifier): .. versionchanged:: 1.1 The default of `max_features` changed from `"auto"` to `"sqrt"`. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -2237,7 +2200,6 @@ class ExtraTreesRegressor(ForestRegressor): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None or 1.0, then `max_features=n_features`. @@ -2249,10 +2211,6 @@ class ExtraTreesRegressor(ForestRegressor): .. versionchanged:: 1.1 The default of `max_features` changed from `"auto"` to 1.0. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index fff35ab6c33b4..fab8d8710a868 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -965,13 +965,12 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. - max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None + max_features : {'sqrt', 'log2'}, int or float, default=None The number of features to consider when looking for the best split: - If int, values must be in the range `[1, inf)`. - If float, values must be in the range `(0.0, 1.0]` and the features considered at each split will be `max(1, int(max_features * n_features_in_))`. - - If 'auto', then `max_features=sqrt(n_features)`. - If 'sqrt', then `max_features=sqrt(n_features)`. - If 'log2', then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. @@ -1531,13 +1530,12 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. - max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None + max_features : {'sqrt', 'log2'}, int or float, default=None The number of features to consider when looking for the best split: - If int, values must be in the range `[1, inf)`. - If float, values must be in the range `(0.0, 1.0]` and the features considered at each split will be `max(1, int(max_features * n_features_in_))`. - - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index f063026a6ba33..9bf0bb2becd9b 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -1700,35 +1700,6 @@ def test_little_tree_with_small_max_samples(ForestClass): assert tree1.node_count > tree2.node_count, msg -# TODO: Remove in v1.3 -@pytest.mark.parametrize( - "Estimator", - [ - ExtraTreesClassifier, - ExtraTreesRegressor, - RandomForestClassifier, - RandomForestRegressor, - ], -) -def test_max_features_deprecation(Estimator): - """Check warning raised for max_features="auto" deprecation.""" - X = np.array([[1, 2], [3, 4]]) - y = np.array([1, 0]) - est = Estimator(max_features="auto") - - err_msg = ( - r"`max_features='auto'` has been deprecated in 1.1 " - r"and will be removed in 1.3. To keep the past behaviour, " - r"explicitly set `max_features=(1.0|'sqrt')` or remove this " - r"parameter as it is also the default value for RandomForest" - r"(Regressors|Classifiers) and ExtraTrees(Regressors|" - r"Classifiers)\." - ) - - with pytest.warns(FutureWarning, match=err_msg): - est.fit(X, y) - - @pytest.mark.parametrize("Forest", FOREST_REGRESSORS) def test_mse_criterion_object_segfault_smoke_test(Forest): # This is a smoke test to ensure that passing a mutable criterion diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 3f2a1aae31bcb..ad31b2ed732e9 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -345,9 +345,7 @@ def test_feature_importance_regression( assert set(sorted_features[1:4]) == {"Longitude", "AveOccup", "Latitude"} -# TODO(1.3): Remove warning filter -@pytest.mark.filterwarnings("ignore:`max_features='auto'` has been deprecated in 1.1") -def test_max_feature_auto(): +def test_max_features(): # Test if max features is set properly for floats and str. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) _, n_features = X.shape @@ -355,11 +353,11 @@ def test_max_feature_auto(): X_train = X[:2000] y_train = y[:2000] - gbrt = GradientBoostingClassifier(n_estimators=1, max_features="auto") + gbrt = GradientBoostingClassifier(n_estimators=1, max_features=None) gbrt.fit(X_train, y_train) - assert gbrt.max_features_ == int(np.sqrt(n_features)) + assert gbrt.max_features_ == n_features - gbrt = GradientBoostingRegressor(n_estimators=1, max_features="auto") + gbrt = GradientBoostingRegressor(n_estimators=1, max_features=None) gbrt.fit(X_train, y_train) assert gbrt.max_features_ == n_features diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index e0e341d9a89f6..b175275ea92dc 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -110,7 +110,7 @@ class BaseDecisionTree(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta): "max_features": [ Interval(Integral, 1, None, closed="left"), Interval(RealNotInt, 0.0, 1.0, closed="right"), - StrOptions({"auto", "sqrt", "log2"}, deprecated={"auto"}), + StrOptions({"sqrt", "log2"}), None, ], "random_state": ["random_state"], @@ -653,15 +653,10 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -1047,15 +1042,10 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -1350,7 +1340,6 @@ class ExtraTreeClassifier(DecisionTreeClassifier): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. @@ -1358,10 +1347,6 @@ class ExtraTreeClassifier(DecisionTreeClassifier): .. versionchanged:: 1.1 The default of `max_features` changed from `"auto"` to `"sqrt"`. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -1620,7 +1605,6 @@ class ExtraTreeRegressor(DecisionTreeRegressor): - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. @@ -1628,10 +1612,6 @@ class ExtraTreeRegressor(DecisionTreeRegressor): .. versionchanged:: 1.1 The default of `max_features` changed from `"auto"` to `1.0`. - .. deprecated:: 1.1 - The `"auto"` option was deprecated in 1.1 and will be removed - in 1.3. - Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index c796177ad814c..1f3a9bf394b9b 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -503,20 +503,8 @@ def test_importances_gini_equal_squared_error(): assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples) -# TODO(1.3): Remove warning filter -@pytest.mark.filterwarnings("ignore:`max_features='auto'` has been deprecated in 1.1") def test_max_features(): # Check max_features. - for name, TreeRegressor in REG_TREES.items(): - reg = TreeRegressor(max_features="auto") - reg.fit(diabetes.data, diabetes.target) - assert reg.max_features_ == diabetes.data.shape[1] - - for name, TreeClassifier in CLF_TREES.items(): - clf = TreeClassifier(max_features="auto") - clf.fit(iris.data, iris.target) - assert clf.max_features_ == 2 - for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(max_features="sqrt") est.fit(iris.data, iris.target) @@ -2369,27 +2357,6 @@ def test_check_node_ndarray(): _check_node_ndarray(problematic_node_ndarray, expected_dtype=expected_dtype) -# TODO(1.3): Remove -def test_max_features_auto_deprecated(): - for Tree in CLF_TREES.values(): - tree = Tree(max_features="auto") - msg = ( - "`max_features='auto'` has been deprecated in 1.1 and will be removed in" - " 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'`." - ) - with pytest.warns(FutureWarning, match=msg): - tree.fit(X, y) - - for Tree in REG_TREES.values(): - tree = Tree(max_features="auto") - msg = ( - "`max_features='auto'` has been deprecated in 1.1 and will be removed in" - " 1.3. To keep the past behaviour, explicitly set `max_features=1.0'`." - ) - with pytest.warns(FutureWarning, match=msg): - tree.fit(X, y) - - @pytest.mark.parametrize( "Splitter", chain(DENSE_SPLITTERS.values(), SPARSE_SPLITTERS.values()) )