Skip to content

[WIP] Add a supports_sample_weight tag #13565

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
'stateless': False,
'multilabel': False,
'_skip_test': False,
'multioutput_only': False}
'multioutput_only': False,
'supports_sample_weight': False}


def clone(estimator, safe=True):
Expand Down
11 changes: 11 additions & 0 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,17 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
if self.base_estimator is None:
# base_estimator can be None in which case we use LinearSVC
# which accepts sample_weight
supports_sample_weight = True
else:
supports_sample_weight = (
self.base_estimator._get_tags()['supports_sample_weight'])

return {'supports_sample_weight': supports_sample_weight}

def predict_proba(self, X):
"""Posterior probabilities of classification

Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/dbscan_.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,3 +382,6 @@ def fit_predict(self, X, y=None, sample_weight=None):
"""
self.fit(X, sample_weight=sample_weight)
return self.labels_

def _more_tags(self):
return {'supports_sample_weight': True}
2 changes: 2 additions & 0 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,8 @@ def score(self, X, y=None, sample_weight=None):
return -_labels_inertia(X, sample_weight, x_squared_norms,
self.cluster_centers_)[1]

def _more_tags(self):
return {'supports_sample_weight': True}

def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
old_center_buffer, compute_squared_diff,
Expand Down
12 changes: 11 additions & 1 deletion sklearn/compose/_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,14 @@ def predict(self, X):
return pred_trans

def _more_tags(self):
return {'poor_score': True, 'no_validation': True}
if self.regressor is None:
# base_estimator can be None in which case we use LinearRegression
# which accepts sample_weight
supports_sample_weight = True
else:
supports_sample_weight = (
self.base_estimator._get_tags()['supports_sample_weight'])

return {'poor_score': True, 'no_validation': True,
'supports_sample_weight': supports_sample_weight}

6 changes: 4 additions & 2 deletions sklearn/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ def predict_log_proba(self, X):
return [np.log(p) for p in proba]

def _more_tags(self):
return {'poor_score': True, 'no_validation': True}
return {'poor_score': True, 'no_validation': True,
'supports_sample_weight': True}

def score(self, X, y, sample_weight=None):
"""Returns the mean accuracy on the given test data and labels.
Expand Down Expand Up @@ -510,7 +511,8 @@ def predict(self, X, return_std=False):
return (y, y_std) if return_std else y

def _more_tags(self):
return {'poor_score': True, 'no_validation': True}
return {'poor_score': True, 'no_validation': True,
'supports_sample_weight': True}

def score(self, X, y, sample_weight=None):
"""Returns the coefficient of determination R^2 of the prediction.
Expand Down
16 changes: 13 additions & 3 deletions sklearn/ensemble/bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ..utils.metaestimators import if_delegate_has_method
from ..utils.multiclass import check_classification_targets
from ..utils.random import sample_without_replacement
from ..utils.validation import has_fit_parameter, check_is_fitted
from ..utils.validation import check_is_fitted


__all__ = ["BaggingClassifier",
Expand Down Expand Up @@ -66,8 +66,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight,
max_samples = ensemble._max_samples
bootstrap = ensemble.bootstrap
bootstrap_features = ensemble.bootstrap_features
support_sample_weight = has_fit_parameter(ensemble.base_estimator_,
"sample_weight")
support_sample_weight = ensemble._get_tags()['supports_sample_weight']
if not support_sample_weight and sample_weight is not None:
raise ValueError("The base estimator doesn't support sample weight")

Expand Down Expand Up @@ -427,6 +426,17 @@ def estimators_samples_(self):
return [sample_indices
for _, sample_indices in self._get_estimators_indices()]

def _more_tags(self):
if self.base_estimator is None:
# base_estimator can be None in which case we use a decision tree,
# which accepts sample_weight
supports_sample_weight = True
else:
supports_sample_weight = (
self.base_estimator._get_tags()['supports_sample_weight'])

return {'supports_sample_weight': supports_sample_weight}


class BaggingClassifier(BaseBagging, ClassifierMixin):
"""A Bagging classifier.
Expand Down
3 changes: 3 additions & 0 deletions sklearn/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def __iter__(self):
"""Returns iterator over estimators in the ensemble."""
return iter(self.estimators_)

def _more_tags(self):
return {'supports_sample_weight': True}


def _partition_estimators(n_estimators, n_jobs):
"""Private function used to partition estimators between jobs."""
Expand Down
16 changes: 5 additions & 11 deletions sklearn/ensemble/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1478,21 +1478,15 @@ def fit(self, X, y, sample_weight=None, monitor=None):
raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),
dtype=np.float64)
else:
# XXX clean this once we have a support_sample_weight tag
if sample_weight_is_none:
self.init_.fit(X, y)
else:
msg = ("The initial estimator {} does not support sample "
"weights.".format(self.init_.__class__.__name__))
try:
self.init_.fit(X, y, sample_weight=sample_weight)
except TypeError: # regular estimator without SW support
if not self.init_._get_tags()['supports_sample_weight']:
msg = ("The initial estimator {} does not "
"support sample weights."
.format(self.init_.__class__.__name__))
raise ValueError(msg)
except ValueError as e:
if 'not enough values to unpack' in str(e): # pipeline
raise ValueError(msg) from e
else: # regular estimator whose input checking failed
raise
self.init_.fit(X, y, sample_weight=sample_weight)

raw_predictions = \
self.loss_.get_init_raw_predictions(X, self.init_)
Expand Down
12 changes: 0 additions & 12 deletions sklearn/ensemble/tests/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,18 +1395,6 @@ def test_gradient_boosting_with_init_pipeline():
'weights'):
gb.fit(X, y, sample_weight=np.ones(X.shape[0]))

# Passing sample_weight to a pipeline raises a ValueError. This test makes
# sure we make the distinction between ValueError raised by a pipeline that
# was passed sample_weight, and a ValueError raised by a regular estimator
# whose input checking failed.
with pytest.raises(
ValueError,
match='nu <= 0 or nu > 1'):
# Note that NuSVR properly supports sample_weight
init = NuSVR(gamma='auto', nu=1.5)
gb = GradientBoostingRegressor(init=init)
gb.fit(X, y, sample_weight=np.ones(X.shape[0]))


@pytest.mark.parametrize('estimator, missing_method', [
(GradientBoostingClassifier(init=LinearSVC()), 'predict_proba'),
Expand Down
17 changes: 13 additions & 4 deletions sklearn/ensemble/voting_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ..base import clone
from ..preprocessing import LabelEncoder
from ..utils._joblib import Parallel, delayed
from ..utils.validation import has_fit_parameter, check_is_fitted
from ..utils.validation import check_is_fitted
from ..utils.metaestimators import _BaseComposition
from ..utils import Bunch

Expand Down Expand Up @@ -176,10 +176,11 @@ def fit(self, X, y, sample_weight=None):
% (len(self.weights), len(self.estimators)))

if sample_weight is not None:
for name, step in self.estimators:
if not has_fit_parameter(step, 'sample_weight'):
for est in self.estimators:
if not est._get_tags()['supports_sample_weight']:
raise ValueError('Underlying estimator \'%s\' does not'
' support sample weights.' % name)
' support sample weights.' %
est.__class__.__name__)
names, clfs = zip(*self.estimators)
self._validate_names(names)

Expand Down Expand Up @@ -343,3 +344,11 @@ def get_params(self, deep=True):
def _predict(self, X):
"""Collect results from clf.predict calls. """
return np.asarray([clf.predict(X) for clf in self.estimators_]).T

def _more_tags(self):
supports_sample_weight = all(
est._get_tags()['supports_sample_weight']
for est in self.estimators
)

return {'supports_sample_weight': supports_sample_weight}
3 changes: 1 addition & 2 deletions sklearn/ensemble/weight_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from ..utils.extmath import stable_cumsum
from ..metrics import accuracy_score, r2_score
from ..utils.validation import check_is_fitted
from ..utils.validation import has_fit_parameter
from ..utils.validation import _num_samples

__all__ = [
Expand Down Expand Up @@ -440,7 +439,7 @@ def _validate_estimator(self):
"probabilities with a predict_proba method.\n"
"Please change the base estimator or set "
"algorithm='SAMME' instead.")
if not has_fit_parameter(self.base_estimator_, "sample_weight"):
if not self.base_estimator_._get_tags()['supports_sample_weight']:
raise ValueError("%s doesn't support sample_weight."
% self.base_estimator_.__class__.__name__)

Expand Down
3 changes: 2 additions & 1 deletion sklearn/isotonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,5 @@ def __setstate__(self, state):
self._build_f(self._necessary_X_, self._necessary_y_)

def _more_tags(self):
return {'X_types': ['1darray']}
return {'X_types': ['1darray'],
'supports_sample_weight': True}
3 changes: 3 additions & 0 deletions sklearn/kernel_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,6 @@ def predict(self, X):
check_is_fitted(self, ["X_fit_", "dual_coef_"])
K = self._get_kernel(X, self.X_fit_)
return np.dot(K, self.dual_coef_)

def _more_tags(self):
return {'supports_sample_weight': True}
3 changes: 3 additions & 0 deletions sklearn/linear_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,9 @@ def rmatvec(b):
self._set_intercept(X_offset, y_offset, X_scale)
return self

def _more_tags(self):
return {'supports_sample_weight': True}


def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
check_input=True):
Expand Down
3 changes: 3 additions & 0 deletions sklearn/linear_model/bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,

return score

def _more_tags(self):
return {'supports_sample_weight': True}


###############################################################################
# ARD (Automatic Relevance Determination) regression
Expand Down
3 changes: 3 additions & 0 deletions sklearn/linear_model/huber.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,6 @@ def fit(self, X, y, sample_weight=None):
y - safe_sparse_dot(X, self.coef_) - self.intercept_)
self.outliers_ = residual > self.scale_ * self.epsilon
return self

def _more_tags(self):
return {'supports_sample_weight': True}
6 changes: 6 additions & 0 deletions sklearn/linear_model/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,9 @@ def predict_log_proba(self, X):
"""
return np.log(self.predict_proba(X))

def _more_tags(self):
return {'supports_sample_weight': True}


class LogisticRegressionCV(LogisticRegression, BaseEstimator,
LinearClassifierMixin):
Expand Down Expand Up @@ -2260,3 +2263,6 @@ def score(self, X, y, sample_weight=None):
scoring = get_scorer(scoring)

return scoring(self, X, y, sample_weight=sample_weight)

def _more_tags(self):
return {'supports_sample_weight': True}
3 changes: 3 additions & 0 deletions sklearn/linear_model/perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,6 @@ def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True,
validation_fraction=validation_fraction,
n_iter_no_change=n_iter_no_change, power_t=0.5,
warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)

def _more_tags(self):
return {'supports_sample_weight': True}
18 changes: 13 additions & 5 deletions sklearn/linear_model/ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from ..utils.random import sample_without_replacement
from ..utils.validation import check_is_fitted
from .base import LinearRegression
from ..utils.validation import has_fit_parameter
from ..exceptions import ConvergenceWarning

_EPSILON = np.spacing(1)
Expand Down Expand Up @@ -316,11 +315,9 @@ def fit(self, X, y, sample_weight=None):
except ValueError:
pass

estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,
"sample_weight")
supports_sample_weight = self._get_tags()['supports_sample_weight']
estimator_name = type(base_estimator).__name__
if (sample_weight is not None and not
estimator_fit_has_sample_weight):
if sample_weight is not None and not supports_sample_weight:
raise ValueError("%s does not support sample_weight. Samples"
" weights are only used for the calibration"
" itself." % estimator_name)
Expand Down Expand Up @@ -492,3 +489,14 @@ def score(self, X, y):
check_is_fitted(self, 'estimator_')

return self.estimator_.score(X, y)

def _more_tags(self):
if self.base_estimator is None:
# base_estimator can be None in which case we use LinearRegression
# which accepts sample_weight
supports_sample_weight = True
else:
supports_sample_weight = (
self.base_estimator._get_tags()['supports_sample_weight'])

return {'supports_sample_weight': supports_sample_weight}
6 changes: 6 additions & 0 deletions sklearn/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,9 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
return {'supports_sample_weight': True}


class Ridge(_BaseRidge, RegressorMixin):
"""Linear least squares with l2 regularization.
Expand Down Expand Up @@ -1223,6 +1226,9 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
return {'supports_sample_weight': True}


class RidgeCV(_BaseRidgeCV, RegressorMixin):
"""Ridge regression with built-in cross-validation.
Expand Down
6 changes: 6 additions & 0 deletions sklearn/linear_model/stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,9 @@ def predict_log_proba(self):
def _predict_log_proba(self, X):
return np.log(self.predict_proba(X))

def _more_tags(self):
return {'supports_sample_weight': True}


class BaseSGDRegressor(BaseSGD, RegressorMixin):

Expand Down Expand Up @@ -1526,3 +1529,6 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
validation_fraction=validation_fraction,
n_iter_no_change=n_iter_no_change, warm_start=warm_start,
average=average)

def _more_tags(self):
return {'supports_sample_weight': True}
Loading