Skip to content

Weighted scoring in cross validation (Closes #4632) #13432

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
b305d1a
Updated _validation to use sample_weight from training in the metric …
ryan-deak-zefr Feb 14, 2019
3f2e9da
Removed old commented out code.
ryan-deak-zefr Feb 14, 2019
8a8faea
test comment
ryan-deak-zefr Feb 14, 2019
a8801a3
rearranged comments in test
ryan-deak-zefr Feb 15, 2019
81100fa
formatting and comment
ryan-deak-zefr Feb 15, 2019
255c720
comments
ryan-deak-zefr Feb 15, 2019
e09ecf9
added comment for motivation behind directly passing sample_weight to…
ryan-deak-zefr Feb 15, 2019
e12bd17
added else: raise e to _apply_scorer
ryan-deak-zefr Feb 15, 2019
b364f40
Merge pull request #1 from ZEFR-INC/DAS-1145_sample_wt_validation
Feb 15, 2019
699a661
sample weighting for cross validation inside model_selection/_search.py
ryan-deak-zefr Feb 26, 2019
38313b6
make tests more difficult to pass.
ryan-deak-zefr Feb 26, 2019
a32c4de
Fixed bugs that caused other tests to fail.
ryan-deak-zefr Feb 27, 2019
73bc931
collape_nones (sic) -> collapse_nones
ryan-deak-zefr Feb 27, 2019
c83f5eb
Swapped subtraction arguments.
ryan-deak-zefr Feb 27, 2019
2e6c892
added more tests
ryan-deak-zefr Feb 27, 2019
b042193
even more tests.
ryan-deak-zefr Feb 27, 2019
7e54469
Show that the math in test_sample_weight_cross_validation works
ryan-deak-zefr Feb 27, 2019
853c92c
Use exact rational values for expected value
ryan-deak-zefr Feb 27, 2019
5d39641
renaming test variables
ryan-deak-zefr Feb 27, 2019
4c823fb
more testing the tests
ryan-deak-zefr Feb 27, 2019
e5b876f
updated tests
ryan-deak-zefr Feb 27, 2019
d145b50
simplified test
ryan-deak-zefr Feb 27, 2019
fc84b51
testing testing testing
ryan-deak-zefr Feb 27, 2019
0ac5755
added comment
ryan-deak-zefr Feb 27, 2019
940b9a0
removed commented code
ryan-deak-zefr Feb 27, 2019
0e5704e
comma in comment
ryan-deak-zefr Feb 27, 2019
5913e40
Merge pull request #2 from ZEFR-INC/DAS-1145_sample_wt_validation
ryan-deak-zefr Feb 27, 2019
0688a16
whitespace issues failing on public build.
ryan-deak-zefr Mar 11, 2019
f3493bd
Merge pull request #3 from ZEFR-INC/pub_PR_build_check_failures
ryan-deak-zefr Mar 11, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 75 additions & 7 deletions sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,35 @@ def fit(self, X, y=None, groups=None, **fit_params):
all_candidate_params = []
all_out = []

def is_none(x):
return x is None

def collapse_nones(xs):
return None if xs is None or any(map(is_none, xs)) else xs

def weights_sums(train_ind, test_ind, sample_weight):
if sample_weight is None:
train_wt = None
test_wt = None
else:
train_wt = np.sum(sample_weight[train_ind])
test_wt = np.sum(sample_weight[test_ind])
return train_wt, test_wt

def fit_and_score_and_sw_sum(est, X, y, train, test,
parameters,
**fit_and_score_kwargs):
res = _fit_and_score(est, X, y, train=train, test=test,
parameters=parameters,
**fit_and_score_kwargs)

sample_weight = fit_and_score_kwargs \
.get("fit_params", {}) \
.get("sample_weight", None)

train_wt, test_wt = weights_sums(train, test, sample_weight)
return res, train_wt, test_wt

def evaluate_candidates(candidate_params):
candidate_params = list(candidate_params)
n_candidates = len(candidate_params)
Expand All @@ -657,7 +686,8 @@ def evaluate_candidates(candidate_params):
" totalling {2} fits".format(
n_splits, n_candidates, n_candidates * n_splits))

out = parallel(delayed(_fit_and_score)(clone(base_estimator),
out = parallel(delayed(fit_and_score_and_sw_sum)(
clone(base_estimator),
X, y,
train=train, test=test,
parameters=parameters,
Expand All @@ -666,6 +696,15 @@ def evaluate_candidates(candidate_params):
in product(candidate_params,
cv.split(X, y, groups)))

out = list(out)
if 0 < len(out):
out, train_wts, test_wts = zip(*out)
else:
out, train_wts, test_wts = ([], [], [])

train_wts = collapse_nones(train_wts)
test_wts = collapse_nones(test_wts)

if len(out) < 1:
raise ValueError('No fits were performed. '
'Was the CV iterator empty? '
Expand All @@ -682,7 +721,8 @@ def evaluate_candidates(candidate_params):

nonlocal results
results = self._format_results(
all_candidate_params, scorers, n_splits, all_out)
all_candidate_params, scorers, n_splits, all_out,
train_wts, test_wts)
return results

self._run_search(evaluate_candidates)
Expand Down Expand Up @@ -725,7 +765,13 @@ def evaluate_candidates(candidate_params):

return self

def _format_results(self, candidate_params, scorers, n_splits, out):
def _format_results(self, candidate_params, scorers, n_splits, out,
train_sample_weight_sums=None,
test_sample_weight_sums=None):
# train_sample_weight_sums is a tuple/list of float. If not supplied,
# the corresponding number of examples associated with the fold(s)
# will be used instead. The same is true for test_sample_weight_sums.

n_candidates = len(candidate_params)

# if one choose to see train score, "out" will contain train score info
Expand Down Expand Up @@ -788,9 +834,30 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
# Store a list of param dicts at the key 'params'
results['params'] = candidate_params

# NOTE test_sample counts (weights) remain the same for all candidates
test_sample_counts = np.array(test_sample_counts[:n_splits],
dtype=np.int)
# training train_sample_weight_sums needs to be done first because
# test_sample_counts overrides itself in the IF statement below with
# test_sample_weight_sums.
if self.return_train_score:
if train_sample_weight_sums is None:
# Because the cv iterators may not iterate over the entire
# dataset, we can't just use the dataset size directly.
samples = int(np.sum(test_sample_counts[:n_splits]))
train_sample_counts = samples - \
np.array(test_sample_counts[:n_splits], dtype=np.int)
else:
train_sample_counts = np.array(
train_sample_weight_sums[:n_splits],
dtype=np.float64)

if test_sample_weight_sums is None:
# NOTE test_sample counts (weights) remain the same for all
# candidates
test_sample_counts = np.array(test_sample_counts[:n_splits],
dtype=np.int)
else:
test_sample_counts = np.array(test_sample_weight_sums[:n_splits],
dtype=np.float64)

iid = self.iid
if self.iid == 'warn':
warn = False
Expand Down Expand Up @@ -820,7 +887,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
weights=test_sample_counts if iid else None)
if self.return_train_score:
_store('train_%s' % scorer_name, train_scores[scorer_name],
splits=True)
splits=True,
weights=train_sample_counts if iid else None)

return results

Expand Down
122 changes: 104 additions & 18 deletions sklearn/model_selection/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,10 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
error_score='raise-deprecating'):
"""Fit estimator and compute scores for a given dataset split.

NOTE: If sample_weight is supplied in ``fit_params``, it will be used for
both learning and will be passed to scorer for use in metric
calculations.

Parameters
----------
estimator : estimator object implementing 'fit'
Expand Down Expand Up @@ -487,6 +491,18 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,

# Adjust length of sample weights
fit_params = fit_params if fit_params is not None else {}

# Appears before fit_params indexing because the update to fit_params
# is reassigned to fit_params and throws away test-based sample weights.
if 'sample_weight' in fit_params and \
fit_params['sample_weight'] is not None:
test_sample_weight = _index_param_value(
X,
fit_params['sample_weight'],
test)
else:
test_sample_weight = None

fit_params = {k: _index_param_value(X, v, train)
for k, v in fit_params.items()}

Expand Down Expand Up @@ -515,8 +531,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
raise
elif error_score == 'raise-deprecating':
warnings.warn("From version 0.22, errors during fit will result "
"in a cross validation score of NaN by default. Use "
"error_score='raise' if you want an exception "
"in a cross validation score of NaN by default. Use"
" error_score='raise' if you want an exception "
"raised or error_score=np.nan to adopt the "
"behavior from version 0.22.",
FutureWarning)
Expand All @@ -533,23 +549,27 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
if return_train_score:
train_scores = error_score
warnings.warn("Estimator fit failed. The score on this train-test"
" partition for these parameters will be set to %f. "
"Details: \n%s" %
" partition for these parameters will be set to %f."
" Details: \n%s" %
(error_score, format_exception_only(type(e), e)[0]),
FitFailedWarning)
else:
raise ValueError("error_score must be the string 'raise' or a"
" numeric value. (Hint: if using 'raise', please"
" make sure that it has been spelled correctly.)")
" make sure that it has been spelled correctly.)"
)

else:
fit_time = time.time() - start_time
# _score will return dict if is_multimetric is True
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
test_scores = _score(estimator, X_test, y_test, scorer,
is_multimetric, test_sample_weight)

score_time = time.time() - start_time - fit_time
if return_train_score:
train_scores = _score(estimator, X_train, y_train, scorer,
is_multimetric)
is_multimetric,
fit_params.get('sample_weight', None))
if verbose > 2:
if is_multimetric:
for scorer_name in sorted(test_scores):
Expand Down Expand Up @@ -582,19 +602,22 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
return ret


def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
def _score(estimator, X_test, y_test, scorer, is_multimetric=False,
sample_weight=None):
"""Compute the score(s) of an estimator on a given test set.

Will return a single float if is_multimetric is False and a dict of floats,
if is_multimetric is True
"""

# sample_weight is optional because we want to put it at the end to allow
# backward compatibility.

if is_multimetric:
return _multimetric_score(estimator, X_test, y_test, scorer)
return _multimetric_score(estimator, X_test, y_test, scorer,
sample_weight)
else:
if y_test is None:
score = scorer(estimator, X_test)
else:
score = scorer(estimator, X_test, y_test)
score = _apply_scorer(estimator, X_test, y_test, scorer, sample_weight)

if hasattr(score, 'item'):
try:
Expand All @@ -611,15 +634,12 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
return score


def _multimetric_score(estimator, X_test, y_test, scorers):
def _multimetric_score(estimator, X_test, y_test, scorers, sample_weight):
"""Return a dict of score for multimetric scoring"""
scores = {}

for name, scorer in scorers.items():
if y_test is None:
score = scorer(estimator, X_test)
else:
score = scorer(estimator, X_test, y_test)
score = _apply_scorer(estimator, X_test, y_test, scorer, sample_weight)

if hasattr(score, 'item'):
try:
Expand All @@ -637,6 +657,72 @@ def _multimetric_score(estimator, X_test, y_test, scorers):
return scores


def _apply_scorer(estimator, X, y, scorer, sample_weight):
"""Applies the scorer to the estimator, given the data and sample_weight.

If ``sample_weight`` is None or contains all ones, ``sample_weight`` WILL
NOT be passed to ``scorer``; otherwise, it will be passed.

In the event that ``sample_weight`` is provided and used but ``scorer``
doesn't accept a ``sample_weight`` parameter, then a ``TypeError`` should
likely be raised.

Parameters
----------
estimator : estimator object implementing 'fit'
The object that was used to fit the data.

X : array-like of shape at least 2D
The data to fit.

y : array-like
The target variable to try to predict in the case of
supervised learning. (May be None)

scorer : A single callable.
Should return a single float.

The callable object / fn should have signature
``scorer(estimator, X, y, sample_weight=None)`` if ``sample_weight``.

sample_weight : array-like, shape (y)
sample weights to use during metric calculation. May be None.

Returns
-------
score : float
Score returned by ``scorer`` applied to ``X`` and ``y`` given
``sample_weight``.
"""
if sample_weight is None or np.all(sample_weight == 1):
if y is None:
score = scorer(estimator, X)
else:
score = scorer(estimator, X, y)
else:
try:
# Explicitly force the sample_weight parameter so that an error
# will be raised in the event that the scorer doesn't take a
# sample_weight argument. This is preferable to passing it as
# a keyword args dict in the case that it just ignores parameters
# that are not accepted by the scorer.
if y is None:
score = scorer(estimator, X, sample_weight=sample_weight)
else:
score = scorer(estimator, X, y, sample_weight=sample_weight)
except TypeError as e:
if 'sample_weight' in str(e):
raise TypeError(
(
"Attempted to use 'sample_weight' for training "
"but supplied a scorer that doesn't accept a "
"'sample_weight' parameter."
), e)
else:
raise e
return score


def cross_val_predict(estimator, X, y=None, groups=None, cv='warn',
n_jobs=None, verbose=0, fit_params=None,
pre_dispatch='2*n_jobs', method='predict'):
Expand Down
Loading