diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 85e1d8477b991..24d8a55df4f7d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD-3-Clause import itertools -import warnings from abc import ABC, abstractmethod from contextlib import contextmanager, nullcontext, suppress from functools import partial @@ -37,7 +36,7 @@ from ...utils import check_random_state, compute_sample_weight, resample from ...utils._missing import is_scalar_nan from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils._param_validation import Hidden, Interval, RealNotInt, StrOptions +from ...utils._param_validation import Interval, RealNotInt, StrOptions from ...utils.multiclass import check_classification_targets from ...utils.validation import ( _check_monotonic_cst, @@ -166,12 +165,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): ], "tol": [Interval(Real, 0, None, closed="left")], "max_bins": [Interval(Integral, 2, 255, closed="both")], - "categorical_features": [ - "array-like", - StrOptions({"from_dtype"}), - Hidden(StrOptions({"warn"})), - None, - ], + "categorical_features": ["array-like", StrOptions({"from_dtype"}), None], "warm_start": ["boolean"], "early_stopping": [StrOptions({"auto"}), "boolean"], "scoring": [str, callable, None], @@ -378,7 +372,6 @@ def _check_categorical_features(self, X): if _is_pandas_df(X): X_is_dataframe = True categorical_columns_mask = np.asarray(X.dtypes == "category") - X_has_categorical_columns = categorical_columns_mask.any() elif hasattr(X, "__dataframe__"): X_is_dataframe = True categorical_columns_mask = np.asarray( @@ -387,29 +380,11 @@ def _check_categorical_features(self, X): for c in X.__dataframe__().get_columns() ] ) - X_has_categorical_columns = categorical_columns_mask.any() else: X_is_dataframe = False categorical_columns_mask = None - X_has_categorical_columns = False - # TODO(1.6): Remove warning and change default to "from_dtype" in v1.6 - if ( - isinstance(self.categorical_features, str) - and self.categorical_features == "warn" - ): - if X_has_categorical_columns: - warnings.warn( - ( - "The categorical_features parameter will change to 'from_dtype'" - " in v1.6. The 'from_dtype' option automatically treats" - " categorical dtypes in a DataFrame as categorical features." - ), - FutureWarning, - ) - categorical_features = None - else: - categorical_features = self.categorical_features + categorical_features = self.categorical_features categorical_by_dtype = ( isinstance(categorical_features, str) @@ -1545,8 +1520,10 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): Added support for feature names. .. versionchanged:: 1.4 - Added `"from_dtype"` option. The default will change to `"from_dtype"` in - v1.6. + Added `"from_dtype"` option. + + .. versionchanged:: 1.6 + The default value changed from `None` to `"from_dtype"`. monotonic_cst : array-like of int of shape (n_features) or dict, default=None Monotonic constraint to enforce on each feature are specified using the @@ -1719,7 +1696,7 @@ def __init__( l2_regularization=0.0, max_features=1.0, max_bins=255, - categorical_features="warn", + categorical_features="from_dtype", monotonic_cst=None, interaction_cst=None, warm_start=False, @@ -1923,8 +1900,10 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): Added support for feature names. .. versionchanged:: 1.4 - Added `"from_dtype"` option. The default will change to `"from_dtype"` in - v1.6. + Added `"from_dtype"` option. + + .. versionchanged::1.6 + The default will changed from `None` to `"from_dtype"`. monotonic_cst : array-like of int of shape (n_features) or dict, default=None Monotonic constraint to enforce on each feature are specified using the @@ -2099,7 +2078,7 @@ def __init__( l2_regularization=0.0, max_features=1.0, max_bins=255, - categorical_features="warn", + categorical_features="from_dtype", monotonic_cst=None, interaction_cst=None, warm_start=False, diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index b5711413f9b75..190251da92615 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1569,26 +1569,6 @@ def test_categorical_different_order_same_model(dataframe_lib): assert len(predictor_1[0].nodes) == len(predictor_2[0].nodes) -# TODO(1.6): Remove warning and change default in 1.6 -def test_categorical_features_warn(): - """Raise warning when there are categorical features in the input DataFrame. - - This is not tested for polars because polars categories must always be - strings and strings can only be handled as categories. Therefore the - situation in which a categorical column is currently being treated as - numbers and in the future will be treated as categories cannot occur with - polars. - """ - pd = pytest.importorskip("pandas") - X = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="category"), "b": [4, 5, 6]}) - y = [0, 1, 0] - hist = HistGradientBoostingClassifier(random_state=0) - - msg = "The categorical_features parameter will change to 'from_dtype' in v1.6" - with pytest.warns(FutureWarning, match=msg): - hist.fit(X, y) - - def get_different_bitness_node_ndarray(node_ndarray): new_dtype_for_indexing_fields = np.int64 if _IS_32BIT else np.int32