scikit-learn · ryan-deak-zefr · Feb 14, 2019 · Feb 14, 2019 · Feb 14, 2019 · Feb 15, 2019
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -648,6 +648,35 @@ def fit(self, X, y=None, groups=None, **fit_params):
             all_candidate_params = []
             all_out = []
 
+            def is_none(x):
+                return x is None
+
+            def collapse_nones(xs):
+                return None if xs is None or any(map(is_none, xs)) else xs
+
+            def weights_sums(train_ind, test_ind, sample_weight):
+                if sample_weight is None:
+                    train_wt = None
+                    test_wt = None
+                else:
+                    train_wt = np.sum(sample_weight[train_ind])
+                    test_wt = np.sum(sample_weight[test_ind])
+                return train_wt, test_wt
+
+            def fit_and_score_and_sw_sum(est, X, y, train, test,
+                                         parameters,
+                                         **fit_and_score_kwargs):
+                res = _fit_and_score(est, X, y, train=train, test=test,
+                                     parameters=parameters,
+                                     **fit_and_score_kwargs)
+
+                sample_weight = fit_and_score_kwargs \
+                    .get("fit_params", {}) \
+                    .get("sample_weight", None)
+
+                train_wt, test_wt = weights_sums(train, test, sample_weight)
+                return res, train_wt, test_wt
+
             def evaluate_candidates(candidate_params):
                 candidate_params = list(candidate_params)
                 n_candidates = len(candidate_params)
@@ -657,7 +686,8 @@ def evaluate_candidates(candidate_params):
                           " totalling {2} fits".format(
                               n_splits, n_candidates, n_candidates * n_splits))
 
-                out = parallel(delayed(_fit_and_score)(clone(base_estimator),
+                out = parallel(delayed(fit_and_score_and_sw_sum)(
+                                                       clone(base_estimator),
                                                        X, y,
                                                        train=train, test=test,
                                                        parameters=parameters,
@@ -666,6 +696,15 @@ def evaluate_candidates(candidate_params):
                                in product(candidate_params,
                                           cv.split(X, y, groups)))
 
+                out = list(out)
+                if 0 < len(out):
+                    out, train_wts, test_wts = zip(*out)
+                else:
+                    out, train_wts, test_wts = ([], [], [])
+
+                train_wts = collapse_nones(train_wts)
+                test_wts = collapse_nones(test_wts)
+
                 if len(out) < 1:
                     raise ValueError('No fits were performed. '
                                      'Was the CV iterator empty? '
@@ -682,7 +721,8 @@ def evaluate_candidates(candidate_params):
 
                 nonlocal results
                 results = self._format_results(
-                    all_candidate_params, scorers, n_splits, all_out)
+                    all_candidate_params, scorers, n_splits, all_out,
+                    train_wts, test_wts)
                 return results
 
             self._run_search(evaluate_candidates)
@@ -725,7 +765,13 @@ def evaluate_candidates(candidate_params):
 
         return self
 
-    def _format_results(self, candidate_params, scorers, n_splits, out):
+    def _format_results(self, candidate_params, scorers, n_splits, out,
+                        train_sample_weight_sums=None,
+                        test_sample_weight_sums=None):
+        # train_sample_weight_sums is a tuple/list of float.  If not supplied,
+        # the corresponding number of examples associated with the fold(s)
+        # will be used instead.  The same is true for test_sample_weight_sums.
+
         n_candidates = len(candidate_params)
 
         # if one choose to see train score, "out" will contain train score info
@@ -788,9 +834,30 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
         # Store a list of param dicts at the key 'params'
         results['params'] = candidate_params
 
-        # NOTE test_sample counts (weights) remain the same for all candidates
-        test_sample_counts = np.array(test_sample_counts[:n_splits],
-                                      dtype=np.int)
+        # training train_sample_weight_sums needs to be done first because
+        # test_sample_counts overrides itself in the IF statement below with
+        # test_sample_weight_sums.
+        if self.return_train_score:
+            if train_sample_weight_sums is None:
+                # Because the cv iterators may not iterate over the entire
+                # dataset, we can't just use the dataset size directly.
+                samples = int(np.sum(test_sample_counts[:n_splits]))
+                train_sample_counts = samples - \
+                    np.array(test_sample_counts[:n_splits], dtype=np.int)
+            else:
+                train_sample_counts = np.array(
+                    train_sample_weight_sums[:n_splits],
+                    dtype=np.float64)
+
+        if test_sample_weight_sums is None:
+            # NOTE test_sample counts (weights) remain the same for all
+            #      candidates
+            test_sample_counts = np.array(test_sample_counts[:n_splits],
+                                          dtype=np.int)
+        else:
+            test_sample_counts = np.array(test_sample_weight_sums[:n_splits],
+                                          dtype=np.float64)
+
         iid = self.iid
         if self.iid == 'warn':
             warn = False
@@ -820,7 +887,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                    weights=test_sample_counts if iid else None)
             if self.return_train_score:
                 _store('train_%s' % scorer_name, train_scores[scorer_name],
-                       splits=True)
+                       splits=True,
+                       weights=train_sample_counts if iid else None)
 
         return results
 

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
@@ -391,6 +391,10 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                    error_score='raise-deprecating'):
     """Fit estimator and compute scores for a given dataset split.
 
+    NOTE: If sample_weight is supplied in ``fit_params``, it will be used for
+          both learning and will be passed to scorer for use in metric
+          calculations.
+
     Parameters
     ----------
     estimator : estimator object implementing 'fit'
@@ -487,6 +491,18 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
 
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
+
+    # Appears before fit_params indexing because the update to fit_params
+    # is reassigned to fit_params and throws away test-based sample weights.
+    if 'sample_weight' in fit_params and \
+            fit_params['sample_weight'] is not None:
+        test_sample_weight = _index_param_value(
+            X,
+            fit_params['sample_weight'],
+            test)
+    else:
+        test_sample_weight = None
+
     fit_params = {k: _index_param_value(X, v, train)
                   for k, v in fit_params.items()}
 
@@ -515,8 +531,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
             raise
         elif error_score == 'raise-deprecating':
             warnings.warn("From version 0.22, errors during fit will result "
-                          "in a cross validation score of NaN by default. Use "
-                          "error_score='raise' if you want an exception "
+                          "in a cross validation score of NaN by default. Use"
+                          " error_score='raise' if you want an exception "
                           "raised or error_score=np.nan to adopt the "
                           "behavior from version 0.22.",
                           FutureWarning)
@@ -533,23 +549,27 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                 if return_train_score:
                     train_scores = error_score
             warnings.warn("Estimator fit failed. The score on this train-test"
-                          " partition for these parameters will be set to %f. "
-                          "Details: \n%s" %
+                          " partition for these parameters will be set to %f."
+                          " Details: \n%s" %
                           (error_score, format_exception_only(type(e), e)[0]),
                           FitFailedWarning)
         else:
             raise ValueError("error_score must be the string 'raise' or a"
                              " numeric value. (Hint: if using 'raise', please"
-                             " make sure that it has been spelled correctly.)")
+                             " make sure that it has been spelled correctly.)"
+                             )
 
     else:
         fit_time = time.time() - start_time
         # _score will return dict if is_multimetric is True
-        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
+        test_scores = _score(estimator, X_test, y_test, scorer,
+                             is_multimetric, test_sample_weight)
+
         score_time = time.time() - start_time - fit_time
         if return_train_score:
             train_scores = _score(estimator, X_train, y_train, scorer,
-                                  is_multimetric)
+                                  is_multimetric,
+                                  fit_params.get('sample_weight', None))
     if verbose > 2:
         if is_multimetric:
             for scorer_name in sorted(test_scores):
@@ -582,19 +602,22 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
     return ret
 
 
-def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
+def _score(estimator, X_test, y_test, scorer, is_multimetric=False,
+           sample_weight=None):
     """Compute the score(s) of an estimator on a given test set.
 
     Will return a single float if is_multimetric is False and a dict of floats,
     if is_multimetric is True
     """
+
+    # sample_weight is optional because we want to put it at the end to allow
+    # backward compatibility.
+
     if is_multimetric:
-        return _multimetric_score(estimator, X_test, y_test, scorer)
+        return _multimetric_score(estimator, X_test, y_test, scorer,
+                                  sample_weight)
     else:
-        if y_test is None:
-            score = scorer(estimator, X_test)
-        else:
-            score = scorer(estimator, X_test, y_test)
+        score = _apply_scorer(estimator, X_test, y_test, scorer, sample_weight)
 
         if hasattr(score, 'item'):
             try:
@@ -611,15 +634,12 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
     return score
 
 
-def _multimetric_score(estimator, X_test, y_test, scorers):
+def _multimetric_score(estimator, X_test, y_test, scorers, sample_weight):
     """Return a dict of score for multimetric scoring"""
     scores = {}
 
     for name, scorer in scorers.items():
-        if y_test is None:
-            score = scorer(estimator, X_test)
-        else:
-            score = scorer(estimator, X_test, y_test)
+        score = _apply_scorer(estimator, X_test, y_test, scorer, sample_weight)
 
         if hasattr(score, 'item'):
             try:
@@ -637,6 +657,72 @@ def _multimetric_score(estimator, X_test, y_test, scorers):
     return scores
 
 
+def _apply_scorer(estimator, X, y, scorer, sample_weight):
+    """Applies the scorer to the estimator, given the data and sample_weight.
+
+    If ``sample_weight`` is None or contains all ones, ``sample_weight`` WILL
+    NOT be passed to ``scorer``; otherwise, it will be passed.
+
+    In the event that ``sample_weight`` is provided and used but ``scorer``
+    doesn't accept a ``sample_weight`` parameter, then a ``TypeError`` should
+    likely be raised.
+
+    Parameters
+    ----------
+    estimator : estimator object implementing 'fit'
+        The object that was used to fit the data.
+
+    X : array-like of shape at least 2D
+        The data to fit.
+
+    y : array-like
+        The target variable to try to predict in the case of
+        supervised learning.  (May be None)
+
+    scorer : A single callable.
+        Should return a single float.
+
+        The callable object / fn should have signature
+        ``scorer(estimator, X, y, sample_weight=None)`` if ``sample_weight``.
+
+    sample_weight : array-like, shape (y)
+        sample weights to use during metric calculation.  May be None.
+
+    Returns
+    -------
+    score : float
+        Score returned by ``scorer`` applied to ``X`` and ``y`` given
+        ``sample_weight``.
+    """
+    if sample_weight is None or np.all(sample_weight == 1):
+        if y is None:
+            score = scorer(estimator, X)
+        else:
+            score = scorer(estimator, X, y)
+    else:
+        try:
+            # Explicitly force the sample_weight parameter so that an error
+            # will be raised in the event that the scorer doesn't take a
+            # sample_weight argument.  This is preferable to passing it as
+            # a keyword args dict in the case that it just ignores parameters
+            # that are not accepted by the scorer.
+            if y is None:
+                score = scorer(estimator, X, sample_weight=sample_weight)
+            else:
+                score = scorer(estimator, X, y, sample_weight=sample_weight)
+        except TypeError as e:
+            if 'sample_weight' in str(e):
+                raise TypeError(
+                    (
+                        "Attempted to use 'sample_weight' for training "
+                        "but supplied a scorer that doesn't accept a "
+                        "'sample_weight' parameter."
+                    ), e)
+            else:
+                raise e
+    return score
+
+
 def cross_val_predict(estimator, X, y=None, groups=None, cv='warn',
                       n_jobs=None, verbose=0, fit_params=None,
                       pre_dispatch='2*n_jobs', method='predict'):