diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 87814e102ad98..060ebca31722f 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -80,6 +80,13 @@ Changelog - |Enhancement| Added a function :func:`base.is_clusterer` which determines whether a given estimator is of category clusterer. :pr:`28936` by :user:`Christian Veenhuis `. + +:mod:`sklearn.linear_model` +........................... + +- |API| Deprecates `copy_X` in :class:`linear_model.TheilSenRegressor` as the parameter + has no effect. `copy_X` will be removed in 1.8. + :pr:`29105` by :user:`Adam Li `. :mod:`sklearn.metrics` ...................... diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index cc774e8783762..b1ebd6524cb27 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -20,7 +20,7 @@ from ..base import RegressorMixin, _fit_context from ..exceptions import ConvergenceWarning from ..utils import check_random_state -from ..utils._param_validation import Interval +from ..utils._param_validation import Hidden, Interval, StrOptions from ..utils.parallel import Parallel, delayed from ._base import LinearModel @@ -228,6 +228,10 @@ class TheilSenRegressor(RegressorMixin, LinearModel): copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. + .. deprecated:: 1.6 + `copy_X` was deprecated in 1.6 and will be removed in 1.8. + It has no effect as a copy is always made. + max_subpopulation : int, default=1e4 Instead of computing with a set of cardinality 'n choose k', where n is the number of samples and k is the number of subsamples (at least @@ -324,7 +328,7 @@ class TheilSenRegressor(RegressorMixin, LinearModel): _parameter_constraints: dict = { "fit_intercept": ["boolean"], - "copy_X": ["boolean"], + "copy_X": ["boolean", Hidden(StrOptions({"deprecated"}))], # target_type should be Integral but can accept Real for backward compatibility "max_subpopulation": [Interval(Real, 1, None, closed="left")], "n_subsamples": [None, Integral], @@ -339,7 +343,7 @@ def __init__( self, *, fit_intercept=True, - copy_X=True, + copy_X="deprecated", max_subpopulation=1e4, n_subsamples=None, max_iter=300, @@ -411,6 +415,14 @@ def fit(self, X, y): self : returns an instance of self. Fitted `TheilSenRegressor` estimator. """ + if self.copy_X != "deprecated": + warnings.warn( + "`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it " + "has no effect internally. Simply leave this parameter to its default " + "value to avoid this warning.", + FutureWarning, + ) + random_state = check_random_state(self.random_state) X, y = self._validate_data(X, y, y_numeric=True) n_samples, n_features = X.shape diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py index c8415d02be80a..f72c18c24a809 100644 --- a/sklearn/linear_model/tests/test_theil_sen.py +++ b/sklearn/linear_model/tests/test_theil_sen.py @@ -292,3 +292,11 @@ def test_less_samples_than_features(): theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y) y_pred = theil_sen.predict(X) assert_array_almost_equal(y_pred, y, 12) + + +# TODO(1.8): Remove +def test_copy_X_deprecated(): + X, y, _, _ = gen_toy_problem_1d() + theil_sen = TheilSenRegressor(copy_X=True, random_state=0) + with pytest.warns(FutureWarning, match="`copy_X` was deprecated"): + theil_sen.fit(X, y)