Skip to content

MAINT Clean up deprecations for 1.6: in HGBT #30002

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 13 additions & 34 deletions sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# SPDX-License-Identifier: BSD-3-Clause

import itertools
import warnings
from abc import ABC, abstractmethod
from contextlib import contextmanager, nullcontext, suppress
from functools import partial
Expand Down Expand Up @@ -37,7 +36,7 @@
from ...utils import check_random_state, compute_sample_weight, resample
from ...utils._missing import is_scalar_nan
from ...utils._openmp_helpers import _openmp_effective_n_threads
from ...utils._param_validation import Hidden, Interval, RealNotInt, StrOptions
from ...utils._param_validation import Interval, RealNotInt, StrOptions
from ...utils.multiclass import check_classification_targets
from ...utils.validation import (
_check_monotonic_cst,
Expand Down Expand Up @@ -166,12 +165,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC):
],
"tol": [Interval(Real, 0, None, closed="left")],
"max_bins": [Interval(Integral, 2, 255, closed="both")],
"categorical_features": [
"array-like",
StrOptions({"from_dtype"}),
Hidden(StrOptions({"warn"})),
None,
],
"categorical_features": ["array-like", StrOptions({"from_dtype"}), None],
"warm_start": ["boolean"],
"early_stopping": [StrOptions({"auto"}), "boolean"],
"scoring": [str, callable, None],
Expand Down Expand Up @@ -378,7 +372,6 @@ def _check_categorical_features(self, X):
if _is_pandas_df(X):
X_is_dataframe = True
categorical_columns_mask = np.asarray(X.dtypes == "category")
X_has_categorical_columns = categorical_columns_mask.any()
elif hasattr(X, "__dataframe__"):
X_is_dataframe = True
categorical_columns_mask = np.asarray(
Expand All @@ -387,29 +380,11 @@ def _check_categorical_features(self, X):
for c in X.__dataframe__().get_columns()
]
)
X_has_categorical_columns = categorical_columns_mask.any()
else:
X_is_dataframe = False
categorical_columns_mask = None
X_has_categorical_columns = False

# TODO(1.6): Remove warning and change default to "from_dtype" in v1.6
if (
isinstance(self.categorical_features, str)
and self.categorical_features == "warn"
):
if X_has_categorical_columns:
warnings.warn(
(
"The categorical_features parameter will change to 'from_dtype'"
" in v1.6. The 'from_dtype' option automatically treats"
" categorical dtypes in a DataFrame as categorical features."
),
FutureWarning,
)
categorical_features = None
else:
categorical_features = self.categorical_features
categorical_features = self.categorical_features

categorical_by_dtype = (
isinstance(categorical_features, str)
Expand Down Expand Up @@ -1545,8 +1520,10 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
Added support for feature names.

.. versionchanged:: 1.4
Added `"from_dtype"` option. The default will change to `"from_dtype"` in
v1.6.
Added `"from_dtype"` option.

.. versionchanged:: 1.6
The default value changed from `None` to `"from_dtype"`.

monotonic_cst : array-like of int of shape (n_features) or dict, default=None
Monotonic constraint to enforce on each feature are specified using the
Expand Down Expand Up @@ -1719,7 +1696,7 @@ def __init__(
l2_regularization=0.0,
max_features=1.0,
max_bins=255,
categorical_features="warn",
categorical_features="from_dtype",
monotonic_cst=None,
interaction_cst=None,
warm_start=False,
Expand Down Expand Up @@ -1923,8 +1900,10 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
Added support for feature names.

.. versionchanged:: 1.4
Added `"from_dtype"` option. The default will change to `"from_dtype"` in
v1.6.
Added `"from_dtype"` option.

.. versionchanged::1.6
The default will changed from `None` to `"from_dtype"`.

monotonic_cst : array-like of int of shape (n_features) or dict, default=None
Monotonic constraint to enforce on each feature are specified using the
Expand Down Expand Up @@ -2099,7 +2078,7 @@ def __init__(
l2_regularization=0.0,
max_features=1.0,
max_bins=255,
categorical_features="warn",
categorical_features="from_dtype",
monotonic_cst=None,
interaction_cst=None,
warm_start=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1569,26 +1569,6 @@ def test_categorical_different_order_same_model(dataframe_lib):
assert len(predictor_1[0].nodes) == len(predictor_2[0].nodes)


# TODO(1.6): Remove warning and change default in 1.6
def test_categorical_features_warn():
"""Raise warning when there are categorical features in the input DataFrame.

This is not tested for polars because polars categories must always be
strings and strings can only be handled as categories. Therefore the
situation in which a categorical column is currently being treated as
numbers and in the future will be treated as categories cannot occur with
polars.
"""
pd = pytest.importorskip("pandas")
X = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="category"), "b": [4, 5, 6]})
y = [0, 1, 0]
hist = HistGradientBoostingClassifier(random_state=0)

msg = "The categorical_features parameter will change to 'from_dtype' in v1.6"
with pytest.warns(FutureWarning, match=msg):
hist.fit(X, y)


def get_different_bitness_node_ndarray(node_ndarray):
new_dtype_for_indexing_fields = np.int64 if _IS_32BIT else np.int32

Expand Down
Loading