scikit-learn · glemaitre · Oct 8, 2024 · Oct 3, 2024
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -4,7 +4,6 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import itertools
-import warnings
 from abc import ABC, abstractmethod
 from contextlib import contextmanager, nullcontext, suppress
 from functools import partial
@@ -37,7 +36,7 @@
 from ...utils import check_random_state, compute_sample_weight, resample
 from ...utils._missing import is_scalar_nan
 from ...utils._openmp_helpers import _openmp_effective_n_threads
-from ...utils._param_validation import Hidden, Interval, RealNotInt, StrOptions
+from ...utils._param_validation import Interval, RealNotInt, StrOptions
 from ...utils.multiclass import check_classification_targets
 from ...utils.validation import (
     _check_monotonic_cst,
@@ -166,12 +165,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC):
         ],
         "tol": [Interval(Real, 0, None, closed="left")],
         "max_bins": [Interval(Integral, 2, 255, closed="both")],
-        "categorical_features": [
-            "array-like",
-            StrOptions({"from_dtype"}),
-            Hidden(StrOptions({"warn"})),
-            None,
-        ],
+        "categorical_features": ["array-like", StrOptions({"from_dtype"}), None],
         "warm_start": ["boolean"],
         "early_stopping": [StrOptions({"auto"}), "boolean"],
         "scoring": [str, callable, None],
@@ -378,7 +372,6 @@ def _check_categorical_features(self, X):
         if _is_pandas_df(X):
             X_is_dataframe = True
             categorical_columns_mask = np.asarray(X.dtypes == "category")
-            X_has_categorical_columns = categorical_columns_mask.any()
         elif hasattr(X, "__dataframe__"):
             X_is_dataframe = True
             categorical_columns_mask = np.asarray(
@@ -387,29 +380,11 @@ def _check_categorical_features(self, X):
                     for c in X.__dataframe__().get_columns()
                 ]
             )
-            X_has_categorical_columns = categorical_columns_mask.any()
         else:
             X_is_dataframe = False
             categorical_columns_mask = None
-            X_has_categorical_columns = False
 
-        # TODO(1.6): Remove warning and change default to "from_dtype" in v1.6
-        if (
-            isinstance(self.categorical_features, str)
-            and self.categorical_features == "warn"
-        ):
-            if X_has_categorical_columns:
-                warnings.warn(
-                    (
-                        "The categorical_features parameter will change to 'from_dtype'"
-                        " in v1.6. The 'from_dtype' option automatically treats"
-                        " categorical dtypes in a DataFrame as categorical features."
-                    ),
-                    FutureWarning,
-                )
-            categorical_features = None
-        else:
-            categorical_features = self.categorical_features
+        categorical_features = self.categorical_features
 
         categorical_by_dtype = (
             isinstance(categorical_features, str)
@@ -1545,8 +1520,10 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
            Added support for feature names.
 
         .. versionchanged:: 1.4
-           Added `"from_dtype"` option. The default will change to `"from_dtype"` in
-           v1.6.
+           Added `"from_dtype"` option.
+
+        .. versionchanged:: 1.6
+           The default value changed from `None` to `"from_dtype"`.
 
     monotonic_cst : array-like of int of shape (n_features) or dict, default=None
         Monotonic constraint to enforce on each feature are specified using the
@@ -1719,7 +1696,7 @@ def __init__(
         l2_regularization=0.0,
         max_features=1.0,
         max_bins=255,
-        categorical_features="warn",
+        categorical_features="from_dtype",
         monotonic_cst=None,
         interaction_cst=None,
         warm_start=False,
@@ -1923,8 +1900,10 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
            Added support for feature names.
 
         .. versionchanged:: 1.4
-           Added `"from_dtype"` option. The default will change to `"from_dtype"` in
-           v1.6.
+           Added `"from_dtype"` option.
+
+        .. versionchanged::1.6
+           The default will changed from `None` to `"from_dtype"`.
 
     monotonic_cst : array-like of int of shape (n_features) or dict, default=None
         Monotonic constraint to enforce on each feature are specified using the
@@ -2099,7 +2078,7 @@ def __init__(
         l2_regularization=0.0,
         max_features=1.0,
         max_bins=255,
-        categorical_features="warn",
+        categorical_features="from_dtype",
         monotonic_cst=None,
         interaction_cst=None,
         warm_start=False,

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -1569,26 +1569,6 @@ def test_categorical_different_order_same_model(dataframe_lib):
         assert len(predictor_1[0].nodes) == len(predictor_2[0].nodes)
 
 
-# TODO(1.6): Remove warning and change default in 1.6
-def test_categorical_features_warn():
-    """Raise warning when there are categorical features in the input DataFrame.
-
-    This is not tested for polars because polars categories must always be
-    strings and strings can only be handled as categories. Therefore the
-    situation in which a categorical column is currently being treated as
-    numbers and in the future will be treated as categories cannot occur with
-    polars.
-    """
-    pd = pytest.importorskip("pandas")
-    X = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="category"), "b": [4, 5, 6]})
-    y = [0, 1, 0]
-    hist = HistGradientBoostingClassifier(random_state=0)
-
-    msg = "The categorical_features parameter will change to 'from_dtype' in v1.6"
-    with pytest.warns(FutureWarning, match=msg):
-        hist.fit(X, y)
-
-
 def get_different_bitness_node_ndarray(node_ndarray):
     new_dtype_for_indexing_fields = np.int64 if _IS_32BIT else np.int32