Skip to content

Fixes #4632: Allow cross validation to have weighted test scores #10806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ def __len__(self):


def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
verbose, error_score='raise-deprecating', **fit_params):
verbose, error_score='raise-deprecating',
test_score_weight=None, **fit_params):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a test_score_weight, I would consider a (Boolean) parameter (called sample_weight_in_score or something) that says whether to use the sample_weight parameter that would appear in fit_params. I think that unless you can think of a realistic example where you would want different importance weights for metric calculation than training, a flag would be better because it would potentially avoid screw ups.

"""Run fit on one set of parameters.

Parameters
Expand Down Expand Up @@ -318,6 +319,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
step, which will always raise the error. Default is 'raise' but from
version 0.22 it will change to np.nan.

test_score_weight : string, optional, default: None
If specified, this is the sample weight key in the `fit_params` dict.
If there's no such key found, we will assume unweighted test score.

Returns
-------
score : float
Expand All @@ -337,7 +342,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
test, verbose, parameters,
fit_params=fit_params,
return_n_test_samples=True,
error_score=error_score)
error_score=error_score,
test_score_weight=test_score_weight
)
return scores, parameters, n_samples_test


Expand Down Expand Up @@ -392,7 +399,8 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
def __init__(self, estimator, scoring=None,
fit_params=None, n_jobs=1, iid='warn',
refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs',
error_score='raise-deprecating', return_train_score=True):
error_score='raise-deprecating', return_train_score=True,
test_score_weight=None):

self.scoring = scoring
self.estimator = estimator
Expand All @@ -405,12 +413,13 @@ def __init__(self, estimator, scoring=None,
self.pre_dispatch = pre_dispatch
self.error_score = error_score
self.return_train_score = return_train_score
self.test_score_weight = test_score_weight

@property
def _estimator_type(self):
return self.estimator._estimator_type

def score(self, X, y=None):
def score(self, X, y=None, sample_weight=None):
"""Returns the score on the given data, if the estimator has been refit.

This uses the score defined by ``scoring`` where provided, and the
Expand All @@ -426,6 +435,9 @@ def score(self, X, y=None):
Target relative to X for classification or regression;
None for unsupervised learning.

sample_weight : array-like of shape = (n_samples), optional
Sample weights.

Returns
-------
score : float
Expand All @@ -436,7 +448,9 @@ def score(self, X, y=None):
"and the estimator doesn't provide one %s"
% self.best_estimator_)
score = self.scorer_[self.refit] if self.multimetric_ else self.scorer_
return score(self.best_estimator_, X, y)
score_kwgs = {} if sample_weight is None else \
{'sample_weight': sample_weight}
return score(self.best_estimator_, X, y, **score_kwgs)

def _check_is_fitted(self, method_name):
if not self.refit:
Expand Down Expand Up @@ -636,7 +650,8 @@ def fit(self, X, y=None, groups=None, **fit_params):
return_train_score=self.return_train_score,
return_n_test_samples=True,
return_times=True, return_parameters=False,
error_score=self.error_score)
error_score=self.error_score,
test_score_weight=self.test_score_weight)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't pass the test_score_weight here. Just the flag to use the sample_weight in the scoring.

for parameters, (train, test) in product(candidate_params,
cv.split(X, y, groups)))

Expand Down Expand Up @@ -1088,12 +1103,13 @@ class GridSearchCV(BaseSearchCV):
def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
n_jobs=1, iid='warn', refit=True, cv=None, verbose=0,
pre_dispatch='2*n_jobs', error_score='raise-deprecating',
return_train_score="warn"):
return_train_score="warn", test_score_weight=None):
super(GridSearchCV, self).__init__(
estimator=estimator, scoring=scoring, fit_params=fit_params,
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score)
return_train_score=return_train_score,
test_score_weight=test_score_weight)
self.param_grid = param_grid
_check_param_grid(param_grid)

Expand Down Expand Up @@ -1389,15 +1405,17 @@ class RandomizedSearchCV(BaseSearchCV):
def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
fit_params=None, n_jobs=1, iid='warn', refit=True, cv=None,
verbose=0, pre_dispatch='2*n_jobs', random_state=None,
error_score='raise-deprecating', return_train_score="warn"):
error_score='raise-deprecating', return_train_score="warn",
test_score_weight=None):
self.param_distributions = param_distributions
self.n_iter = n_iter
self.random_state = random_state
super(RandomizedSearchCV, self).__init__(
estimator=estimator, scoring=scoring, fit_params=fit_params,
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score)
return_train_score=return_train_score,
test_score_weight=test_score_weight)

def _get_param_iterator(self):
"""Return ParameterSampler instance for the given distributions"""
Expand Down
54 changes: 42 additions & 12 deletions sklearn/model_selection/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
parameters, fit_params, return_train_score=False,
return_parameters=False, return_n_test_samples=False,
return_times=False, return_estimator=False,
error_score='raise-deprecating'):
error_score='raise-deprecating', test_score_weight=None):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have the flag here. Whether it's a parameter or in fit_params doesn't really matter that much in my opinion.

"""Fit estimator and compute scores for a given dataset split.

Parameters
Expand Down Expand Up @@ -402,6 +402,10 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
step, which will always raise the error. Default is 'raise' but from
version 0.22 it will change to np.nan.

test_score_weight : string, optional, default: None
If specified, this is the sample weight key in the `fit_params` dict.
If there's no such key found, we will assume unweighted test score.

parameters : dict or None
Parameters to be set on the estimator.

Expand Down Expand Up @@ -455,8 +459,26 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
for k, v in parameters.items()))
print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))

# Adjust length of sample weights
# Init to empty dict if pass in as None
fit_params = fit_params if fit_params is not None else {}

sample_weight_key, weighted_test_score = None, False

if test_score_weight is not None and test_score_weight in fit_params:
sample_weight_key, weighted_test_score = test_score_weight, True

if weighted_test_score:
train_sample_weight = _index_param_value(X,
fit_params[sample_weight_key],
train)
test_sample_weight = _index_param_value(X,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work? I'm not fully convinced. When I discovered this issue, I debugged through it and it seemed to me that _index_param_value was destructive to the calling parameter(s). I had to copy the sample_weight in the fit_params in order to call _index_param_value two different times with train and test.

fit_params[sample_weight_key],
test)
else:
train_sample_weight = None
test_sample_weight = None

# Adjust length of sample weights
fit_params = dict([(k, _index_param_value(X, v, train))
for k, v in fit_params.items()])

Expand Down Expand Up @@ -516,11 +538,13 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
else:
fit_time = time.time() - start_time
# _score will return dict if is_multimetric is True
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric,
sample_weight=test_sample_weight)
score_time = time.time() - start_time - fit_time
if return_train_score:
train_scores = _score(estimator, X_train, y_train, scorer,
is_multimetric)
is_multimetric,
sample_weight=train_sample_weight)

if verbose > 2:
if is_multimetric:
Expand All @@ -546,19 +570,23 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
return ret


def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
def _score(estimator, X_test, y_test, scorer, is_multimetric=False,
sample_weight=None):
"""Compute the score(s) of an estimator on a given test set.

Will return a single float if is_multimetric is False and a dict of floats,
if is_multimetric is True
"""
if is_multimetric:
return _multimetric_score(estimator, X_test, y_test, scorer)
return _multimetric_score(estimator, X_test, y_test, scorer,
sample_weight=sample_weight)
else:
score_kwgs = {} if sample_weight is None else \
{'sample_weight': sample_weight}
if y_test is None:
score = scorer(estimator, X_test)
score = scorer(estimator, X_test, **score_kwgs)
else:
score = scorer(estimator, X_test, y_test)
score = scorer(estimator, X_test, y_test, **score_kwgs)

if hasattr(score, 'item'):
try:
Expand All @@ -575,15 +603,17 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
return score


def _multimetric_score(estimator, X_test, y_test, scorers):
def _multimetric_score(estimator, X_test, y_test, scorers,
sample_weight=None):
"""Return a dict of score for multimetric scoring"""
scores = {}

score_kwgs = {} if sample_weight is None else \
{'sample_weight': sample_weight}
for name, scorer in scorers.items():
if y_test is None:
score = scorer(estimator, X_test)
score = scorer(estimator, X_test, **score_kwgs)
else:
score = scorer(estimator, X_test, y_test)
score = scorer(estimator, X_test, y_test, **score_kwgs)

if hasattr(score, 'item'):
try:
Expand Down