Skip to content

MNT Clean-up deprecations for 1.7: sample_weight as positional arg when not consumed #31119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions sklearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
from ..utils.validation import (
_check_method_params,
_check_sample_weight,
_deprecate_positional_args,
_estimator_has,
check_is_fitted,
has_fit_parameter,
Expand Down Expand Up @@ -338,15 +337,11 @@ def __init__(
self.random_state = random_state
self.verbose = verbose

# TODO(1.7): remove `sample_weight` from the signature after deprecation
# cycle; pop it from `fit_params` before the `_raise_for_params` check and
# reinsert later, for backwards compatibility
@_deprecate_positional_args(version="1.7")
@_fit_context(
# BaseBagging.estimator is not validated yet
prefer_skip_nested_validation=False
)
def fit(self, X, y, *, sample_weight=None, **fit_params):
def fit(self, X, y, sample_weight=None, **fit_params):
"""Build a Bagging ensemble of estimators from the training set (X, y).

Parameters
Expand All @@ -363,7 +358,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if the base estimator supports
sample weighting.

**fit_params : dict
Parameters to pass to the underlying estimators.

Expand Down Expand Up @@ -393,11 +387,13 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
multi_output=True,
)

if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
fit_params["sample_weight"] = sample_weight

Comment on lines -397 to -399
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The allow suggestion didn't work here but I removed this validation because bagging doesn't consume sample_weight. So it should delegate validation to the underlying estimator.

Otherwise, if it turns out that bagging should be a consumer, we'll need to put back sample_weight as explicit param, right ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to keep sample_weight as explicit param for this release, as indeed we will need them for a PR fixing sample weight in Bagging estimators.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand how that affects your work, can you elaborate?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's for #31165 where we use sample_weight for drawing the samples instead of passing them to the subestimators. Bagging is then is a consumer of sample_weight, and consumer-only as it should not pass them to underlying estimators.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh interesting. I'd say we can merge this one, and your PR can add it as an explicit parameter. Nothing changes for the user's code, since they can pass sample_weight=sample_weight regardless of it being a part of kwargs or an explicit parameter.

return self._fit(X, y, max_samples=self.max_samples, **fit_params)
return self._fit(
X,
y,
max_samples=self.max_samples,
sample_weight=sample_weight,
**fit_params,
)

def _parallel_args(self):
return {}
Expand All @@ -409,6 +405,7 @@ def _fit(
max_samples=None,
max_depth=None,
check_input=True,
sample_weight=None,
**fit_params,
):
"""Build a Bagging ensemble of estimators from the training
Expand Down Expand Up @@ -437,6 +434,11 @@ def _fit(
If the meta-estimator already checks the input, set this value to
False to prevent redundant input validation.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if the base estimator supports
sample weighting.

**fit_params : dict, default=None
Parameters to pass to the :term:`fit` method of the underlying
estimator.
Expand All @@ -456,6 +458,9 @@ def _fit(
# Check parameters
self._validate_estimator(self._get_estimator())

if sample_weight is not None:
fit_params["sample_weight"] = sample_weight

if _routing_enabled():
routed_params = process_routing(self, "fit", **fit_params)
else:
Expand Down
50 changes: 10 additions & 40 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from ..utils.validation import (
_check_feature_names_in,
_check_response_method,
_deprecate_positional_args,
_estimator_has,
check_is_fitted,
column_or_1d,
Expand Down Expand Up @@ -657,11 +656,7 @@ def _validate_estimators(self):

return names, estimators

# TODO(1.7): remove `sample_weight` from the signature after deprecation
# cycle; pop it from `fit_params` before the `_raise_for_params` check and
# reinsert afterwards, for backwards compatibility
@_deprecate_positional_args(version="1.7")
def fit(self, X, y, *, sample_weight=None, **fit_params):
def fit(self, X, y, **fit_params):
"""Fit the estimators.

Parameters
Expand All @@ -676,11 +671,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
matter (e.g. for ordinal regression), one should numerically encode
the target `y` before calling :term:`fit`.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if all underlying estimators
support sample weights.

**fit_params : dict
Parameters to pass to the underlying estimators.

Expand All @@ -696,7 +686,8 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
self : object
Returns a fitted instance of estimator.
"""
_raise_for_params(fit_params, self, "fit")
_raise_for_params(fit_params, self, "fit", allow=["sample_weight"])

check_classification_targets(y)
if type_of_target(y) == "multilabel-indicator":
self._label_encoder = [LabelEncoder().fit(yk) for yk in y.T]
Expand All @@ -712,8 +703,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
self.classes_ = self._label_encoder.classes_
y_encoded = self._label_encoder.transform(y)

if sample_weight is not None:
fit_params["sample_weight"] = sample_weight
return super().fit(X, y_encoded, **fit_params)

@available_if(
Expand Down Expand Up @@ -1020,11 +1009,7 @@ def _validate_final_estimator(self):
)
)

# TODO(1.7): remove `sample_weight` from the signature after deprecation
# cycle; pop it from `fit_params` before the `_raise_for_params` check and
# reinsert afterwards, for backwards compatibility
@_deprecate_positional_args(version="1.7")
def fit(self, X, y, *, sample_weight=None, **fit_params):
def fit(self, X, y, **fit_params):
"""Fit the estimators.

Parameters
Expand All @@ -1036,11 +1021,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
y : array-like of shape (n_samples,)
Target values.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if all underlying estimators
support sample weights.

**fit_params : dict
Parameters to pass to the underlying estimators.

Expand All @@ -1056,10 +1036,10 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
self : object
Returns a fitted instance.
"""
_raise_for_params(fit_params, self, "fit")
_raise_for_params(fit_params, self, "fit", allow=["sample_weight"])

y = column_or_1d(y, warn=True)
if sample_weight is not None:
fit_params["sample_weight"] = sample_weight

return super().fit(X, y, **fit_params)

def transform(self, X):
Expand All @@ -1078,11 +1058,7 @@ def transform(self, X):
"""
return self._transform(X)

# TODO(1.7): remove `sample_weight` from the signature after deprecation
# cycle; pop it from `fit_params` before the `_raise_for_params` check and
# reinsert afterwards, for backwards compatibility
@_deprecate_positional_args(version="1.7")
def fit_transform(self, X, y, *, sample_weight=None, **fit_params):
def fit_transform(self, X, y, **fit_params):
"""Fit the estimators and return the predictions for X for each estimator.

Parameters
Expand All @@ -1094,11 +1070,6 @@ def fit_transform(self, X, y, *, sample_weight=None, **fit_params):
y : array-like of shape (n_samples,)
Target values.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if all underlying estimators
support sample weights.

**fit_params : dict
Parameters to pass to the underlying estimators.

Expand All @@ -1114,9 +1085,8 @@ def fit_transform(self, X, y, *, sample_weight=None, **fit_params):
y_preds : ndarray of shape (n_samples, n_estimators)
Prediction outputs for each estimator.
"""
_raise_for_params(fit_params, self, "fit")
if sample_weight is not None:
fit_params["sample_weight"] = sample_weight
_raise_for_params(fit_params, self, "fit", allow=["sample_weight"])

return super().fit_transform(X, y, **fit_params)

@available_if(
Expand Down
37 changes: 7 additions & 30 deletions sklearn/ensemble/_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from ..utils.parallel import Parallel, delayed
from ..utils.validation import (
_check_feature_names_in,
_deprecate_positional_args,
check_is_fitted,
column_or_1d,
)
Expand Down Expand Up @@ -352,11 +351,7 @@ def __init__(
# estimators in VotingClassifier.estimators are not validated yet
prefer_skip_nested_validation=False
)
# TODO(1.7): remove `sample_weight` from the signature after deprecation
# cycle; pop it from `fit_params` before the `_raise_for_params` check and
# reinsert later, for backwards compatibility
@_deprecate_positional_args(version="1.7")
def fit(self, X, y, *, sample_weight=None, **fit_params):
def fit(self, X, y, **fit_params):
"""Fit the estimators.

Parameters
Expand All @@ -368,13 +363,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
y : array-like of shape (n_samples,)
Target values.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if all underlying estimators
support sample weights.

.. versionadded:: 0.18

**fit_params : dict
Parameters to pass to the underlying estimators.

Expand All @@ -391,7 +379,8 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
self : object
Returns the instance itself.
"""
_raise_for_params(fit_params, self, "fit")
_raise_for_params(fit_params, self, "fit", allow=["sample_weight"])

y_type = type_of_target(y, input_name="y")
if y_type in ("unknown", "continuous"):
# raise a specific ValueError for non-classification tasks
Expand All @@ -413,9 +402,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
self.classes_ = self.le_.classes_
transformed_y = self.le_.transform(y)

if sample_weight is not None:
fit_params["sample_weight"] = sample_weight

return super().fit(X, transformed_y, **fit_params)

def predict(self, X):
Expand Down Expand Up @@ -657,11 +643,7 @@ def __init__(self, estimators, *, weights=None, n_jobs=None, verbose=False):
# estimators in VotingRegressor.estimators are not validated yet
prefer_skip_nested_validation=False
)
# TODO(1.7): remove `sample_weight` from the signature after deprecation cycle;
# pop it from `fit_params` before the `_raise_for_params` check and reinsert later,
# for backwards compatibility
@_deprecate_positional_args(version="1.7")
def fit(self, X, y, *, sample_weight=None, **fit_params):
def fit(self, X, y, **fit_params):
"""Fit the estimators.

Parameters
Expand All @@ -673,11 +655,6 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
y : array-like of shape (n_samples,)
Target values.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Note that this is supported only if all underlying estimators
support sample weights.

**fit_params : dict
Parameters to pass to the underlying estimators.

Expand All @@ -694,10 +671,10 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
self : object
Fitted estimator.
"""
_raise_for_params(fit_params, self, "fit")
_raise_for_params(fit_params, self, "fit", allow=["sample_weight"])

y = column_or_1d(y, warn=True)
if sample_weight is not None:
fit_params["sample_weight"] = sample_weight

return super().fit(X, y, **fit_params)

def predict(self, X):
Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/tests/test_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def test_sample_weight(global_random_seed):
)
sample_weight = np.random.RandomState(global_random_seed).uniform(size=(len(y),))
eclf3 = VotingClassifier(estimators=[("lr", clf1)], voting="soft")
eclf3.fit(X_scaled, y, sample_weight)
eclf3.fit(X_scaled, y, sample_weight=sample_weight)
clf1.fit(X_scaled, y, sample_weight)
assert_array_equal(eclf3.predict(X_scaled), clf1.predict(X_scaled))
assert_array_almost_equal(
Expand All @@ -355,7 +355,7 @@ def test_sample_weight(global_random_seed):
)
msg = "Underlying estimator KNeighborsClassifier does not support sample weights."
with pytest.raises(TypeError, match=msg):
eclf3.fit(X_scaled, y, sample_weight)
eclf3.fit(X_scaled, y, sample_weight=sample_weight)

# check that _fit_single_estimator will raise the right error
# it should raise the original error if this is not linked to sample_weight
Expand Down
7 changes: 1 addition & 6 deletions sklearn/linear_model/_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from ..utils.validation import (
_check_method_params,
_check_sample_weight,
_deprecate_positional_args,
check_is_fitted,
has_fit_parameter,
validate_data,
Expand Down Expand Up @@ -319,11 +318,7 @@ def __init__(
# RansacRegressor.estimator is not validated yet
prefer_skip_nested_validation=False
)
# TODO(1.7): remove `sample_weight` from the signature after deprecation
# cycle; for backwards compatibility: pop it from `fit_params` before the
# `_raise_for_params` check and reinsert it after the check
@_deprecate_positional_args(version="1.7")
def fit(self, X, y, *, sample_weight=None, **fit_params):
def fit(self, X, y, sample_weight=None, **fit_params):
"""Fit estimator using RANSAC algorithm.

Parameters
Expand Down
11 changes: 9 additions & 2 deletions sklearn/utils/_metadata_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _routing_enabled():
return get_config().get("enable_metadata_routing", False)


def _raise_for_params(params, owner, method):
def _raise_for_params(params, owner, method, allow=None):
"""Raise an error if metadata routing is not enabled and params are passed.

.. versionadded:: 1.4
Expand All @@ -146,6 +146,10 @@ def _raise_for_params(params, owner, method):
method : str
The name of the method, e.g. "fit".

allow : list of str, default=None
A list of parameters which are allowed to be passed even if metadata
routing is not enabled.

Raises
------
ValueError
Expand All @@ -154,7 +158,10 @@ def _raise_for_params(params, owner, method):
caller = (
f"{owner.__class__.__name__}.{method}" if method else owner.__class__.__name__
)
if not _routing_enabled() and params:

allow = allow if allow is not None else {}

if not _routing_enabled() and (params.keys() - allow):
raise ValueError(
f"Passing extra keyword arguments to {caller} is only supported if"
" enable_metadata_routing=True, which you can set using"
Expand Down