-
-
Notifications
You must be signed in to change notification settings - Fork 25.8k
Fixes #4632: Allow cross validation to have weighted test scores #10806
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -273,7 +273,8 @@ def __len__(self): | |
|
||
|
||
def fit_grid_point(X, y, estimator, parameters, train, test, scorer, | ||
verbose, error_score='raise-deprecating', **fit_params): | ||
verbose, error_score='raise-deprecating', | ||
test_score_weight=None, **fit_params): | ||
"""Run fit on one set of parameters. | ||
|
||
Parameters | ||
|
@@ -318,6 +319,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, | |
step, which will always raise the error. Default is 'raise' but from | ||
version 0.22 it will change to np.nan. | ||
|
||
test_score_weight : string, optional, default: None | ||
If specified, this is the sample weight key in the `fit_params` dict. | ||
If there's no such key found, we will assume unweighted test score. | ||
|
||
Returns | ||
------- | ||
score : float | ||
|
@@ -337,7 +342,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, | |
test, verbose, parameters, | ||
fit_params=fit_params, | ||
return_n_test_samples=True, | ||
error_score=error_score) | ||
error_score=error_score, | ||
test_score_weight=test_score_weight | ||
) | ||
return scores, parameters, n_samples_test | ||
|
||
|
||
|
@@ -392,7 +399,8 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, | |
def __init__(self, estimator, scoring=None, | ||
fit_params=None, n_jobs=1, iid='warn', | ||
refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', | ||
error_score='raise-deprecating', return_train_score=True): | ||
error_score='raise-deprecating', return_train_score=True, | ||
test_score_weight=None): | ||
|
||
self.scoring = scoring | ||
self.estimator = estimator | ||
|
@@ -405,12 +413,13 @@ def __init__(self, estimator, scoring=None, | |
self.pre_dispatch = pre_dispatch | ||
self.error_score = error_score | ||
self.return_train_score = return_train_score | ||
self.test_score_weight = test_score_weight | ||
|
||
@property | ||
def _estimator_type(self): | ||
return self.estimator._estimator_type | ||
|
||
def score(self, X, y=None): | ||
def score(self, X, y=None, sample_weight=None): | ||
"""Returns the score on the given data, if the estimator has been refit. | ||
|
||
This uses the score defined by ``scoring`` where provided, and the | ||
|
@@ -426,6 +435,9 @@ def score(self, X, y=None): | |
Target relative to X for classification or regression; | ||
None for unsupervised learning. | ||
|
||
sample_weight : array-like of shape = (n_samples), optional | ||
Sample weights. | ||
|
||
Returns | ||
------- | ||
score : float | ||
|
@@ -436,7 +448,9 @@ def score(self, X, y=None): | |
"and the estimator doesn't provide one %s" | ||
% self.best_estimator_) | ||
score = self.scorer_[self.refit] if self.multimetric_ else self.scorer_ | ||
return score(self.best_estimator_, X, y) | ||
score_kwgs = {} if sample_weight is None else \ | ||
{'sample_weight': sample_weight} | ||
return score(self.best_estimator_, X, y, **score_kwgs) | ||
|
||
def _check_is_fitted(self, method_name): | ||
if not self.refit: | ||
|
@@ -636,7 +650,8 @@ def fit(self, X, y=None, groups=None, **fit_params): | |
return_train_score=self.return_train_score, | ||
return_n_test_samples=True, | ||
return_times=True, return_parameters=False, | ||
error_score=self.error_score) | ||
error_score=self.error_score, | ||
test_score_weight=self.test_score_weight) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wouldn't pass the |
||
for parameters, (train, test) in product(candidate_params, | ||
cv.split(X, y, groups))) | ||
|
||
|
@@ -1088,12 +1103,13 @@ class GridSearchCV(BaseSearchCV): | |
def __init__(self, estimator, param_grid, scoring=None, fit_params=None, | ||
n_jobs=1, iid='warn', refit=True, cv=None, verbose=0, | ||
pre_dispatch='2*n_jobs', error_score='raise-deprecating', | ||
return_train_score="warn"): | ||
return_train_score="warn", test_score_weight=None): | ||
super(GridSearchCV, self).__init__( | ||
estimator=estimator, scoring=scoring, fit_params=fit_params, | ||
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, | ||
pre_dispatch=pre_dispatch, error_score=error_score, | ||
return_train_score=return_train_score) | ||
return_train_score=return_train_score, | ||
test_score_weight=test_score_weight) | ||
self.param_grid = param_grid | ||
_check_param_grid(param_grid) | ||
|
||
|
@@ -1389,15 +1405,17 @@ class RandomizedSearchCV(BaseSearchCV): | |
def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, | ||
fit_params=None, n_jobs=1, iid='warn', refit=True, cv=None, | ||
verbose=0, pre_dispatch='2*n_jobs', random_state=None, | ||
error_score='raise-deprecating', return_train_score="warn"): | ||
error_score='raise-deprecating', return_train_score="warn", | ||
test_score_weight=None): | ||
self.param_distributions = param_distributions | ||
self.n_iter = n_iter | ||
self.random_state = random_state | ||
super(RandomizedSearchCV, self).__init__( | ||
estimator=estimator, scoring=scoring, fit_params=fit_params, | ||
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, | ||
pre_dispatch=pre_dispatch, error_score=error_score, | ||
return_train_score=return_train_score) | ||
return_train_score=return_train_score, | ||
test_score_weight=test_score_weight) | ||
|
||
def _get_param_iterator(self): | ||
"""Return ParameterSampler instance for the given distributions""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -361,7 +361,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, | |
parameters, fit_params, return_train_score=False, | ||
return_parameters=False, return_n_test_samples=False, | ||
return_times=False, return_estimator=False, | ||
error_score='raise-deprecating'): | ||
error_score='raise-deprecating', test_score_weight=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have the flag here. Whether it's a parameter or in |
||
"""Fit estimator and compute scores for a given dataset split. | ||
|
||
Parameters | ||
|
@@ -402,6 +402,10 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, | |
step, which will always raise the error. Default is 'raise' but from | ||
version 0.22 it will change to np.nan. | ||
|
||
test_score_weight : string, optional, default: None | ||
If specified, this is the sample weight key in the `fit_params` dict. | ||
If there's no such key found, we will assume unweighted test score. | ||
|
||
parameters : dict or None | ||
Parameters to be set on the estimator. | ||
|
||
|
@@ -455,8 +459,26 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, | |
for k, v in parameters.items())) | ||
print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) | ||
|
||
# Adjust length of sample weights | ||
# Init to empty dict if pass in as None | ||
fit_params = fit_params if fit_params is not None else {} | ||
|
||
sample_weight_key, weighted_test_score = None, False | ||
|
||
if test_score_weight is not None and test_score_weight in fit_params: | ||
sample_weight_key, weighted_test_score = test_score_weight, True | ||
|
||
if weighted_test_score: | ||
train_sample_weight = _index_param_value(X, | ||
fit_params[sample_weight_key], | ||
train) | ||
test_sample_weight = _index_param_value(X, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work? I'm not fully convinced. When I discovered this issue, I debugged through it and it seemed to me that |
||
fit_params[sample_weight_key], | ||
test) | ||
else: | ||
train_sample_weight = None | ||
test_sample_weight = None | ||
|
||
# Adjust length of sample weights | ||
fit_params = dict([(k, _index_param_value(X, v, train)) | ||
for k, v in fit_params.items()]) | ||
|
||
|
@@ -516,11 +538,13 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, | |
else: | ||
fit_time = time.time() - start_time | ||
# _score will return dict if is_multimetric is True | ||
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric) | ||
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric, | ||
sample_weight=test_sample_weight) | ||
score_time = time.time() - start_time - fit_time | ||
if return_train_score: | ||
train_scores = _score(estimator, X_train, y_train, scorer, | ||
is_multimetric) | ||
is_multimetric, | ||
sample_weight=train_sample_weight) | ||
|
||
if verbose > 2: | ||
if is_multimetric: | ||
|
@@ -546,19 +570,23 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, | |
return ret | ||
|
||
|
||
def _score(estimator, X_test, y_test, scorer, is_multimetric=False): | ||
def _score(estimator, X_test, y_test, scorer, is_multimetric=False, | ||
sample_weight=None): | ||
"""Compute the score(s) of an estimator on a given test set. | ||
|
||
Will return a single float if is_multimetric is False and a dict of floats, | ||
if is_multimetric is True | ||
""" | ||
if is_multimetric: | ||
return _multimetric_score(estimator, X_test, y_test, scorer) | ||
return _multimetric_score(estimator, X_test, y_test, scorer, | ||
sample_weight=sample_weight) | ||
else: | ||
score_kwgs = {} if sample_weight is None else \ | ||
{'sample_weight': sample_weight} | ||
if y_test is None: | ||
score = scorer(estimator, X_test) | ||
score = scorer(estimator, X_test, **score_kwgs) | ||
else: | ||
score = scorer(estimator, X_test, y_test) | ||
score = scorer(estimator, X_test, y_test, **score_kwgs) | ||
|
||
if hasattr(score, 'item'): | ||
try: | ||
|
@@ -575,15 +603,17 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False): | |
return score | ||
|
||
|
||
def _multimetric_score(estimator, X_test, y_test, scorers): | ||
def _multimetric_score(estimator, X_test, y_test, scorers, | ||
sample_weight=None): | ||
"""Return a dict of score for multimetric scoring""" | ||
scores = {} | ||
|
||
score_kwgs = {} if sample_weight is None else \ | ||
{'sample_weight': sample_weight} | ||
for name, scorer in scorers.items(): | ||
if y_test is None: | ||
score = scorer(estimator, X_test) | ||
score = scorer(estimator, X_test, **score_kwgs) | ||
else: | ||
score = scorer(estimator, X_test, y_test) | ||
score = scorer(estimator, X_test, y_test, **score_kwgs) | ||
|
||
if hasattr(score, 'item'): | ||
try: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of using a
test_score_weight
, I would consider a (Boolean) parameter (calledsample_weight_in_score
or something) that says whether to use thesample_weight
parameter that would appear infit_params
. I think that unless you can think of a realistic example where you would want different importance weights for metric calculation than training, a flag would be better because it would potentially avoid screw ups.