scikit-learn · algoGuruZ · Mar 12, 2018 · Mar 13, 2018 · deaktator · Feb 13, 2019
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -273,7 +273,8 @@ def __len__(self):
 
 
 def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
-                   verbose, error_score='raise-deprecating', **fit_params):
+                   verbose, error_score='raise-deprecating',
+                   test_score_weight=None, **fit_params):
     """Run fit on one set of parameters.
 
     Parameters
@@ -318,6 +319,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
         step, which will always raise the error. Default is 'raise' but from
         version 0.22 it will change to np.nan.
 
+    test_score_weight : string, optional, default: None
+        If specified, this is the sample weight key in the `fit_params` dict.
+        If there's no such key found, we will assume unweighted test score.
+
     Returns
     -------
     score : float
@@ -337,7 +342,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
                                             test, verbose, parameters,
                                             fit_params=fit_params,
                                             return_n_test_samples=True,
-                                            error_score=error_score)
+                                            error_score=error_score,
+                                            test_score_weight=test_score_weight
+                                            )
     return scores, parameters, n_samples_test
 
 
@@ -392,7 +399,8 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
     def __init__(self, estimator, scoring=None,
                  fit_params=None, n_jobs=1, iid='warn',
                  refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs',
-                 error_score='raise-deprecating', return_train_score=True):
+                 error_score='raise-deprecating', return_train_score=True,
+                 test_score_weight=None):
 
         self.scoring = scoring
         self.estimator = estimator
@@ -405,12 +413,13 @@ def __init__(self, estimator, scoring=None,
         self.pre_dispatch = pre_dispatch
         self.error_score = error_score
         self.return_train_score = return_train_score
+        self.test_score_weight = test_score_weight
 
     @property
     def _estimator_type(self):
         return self.estimator._estimator_type
 
-    def score(self, X, y=None):
+    def score(self, X, y=None, sample_weight=None):
         """Returns the score on the given data, if the estimator has been refit.
 
         This uses the score defined by ``scoring`` where provided, and the
@@ -426,6 +435,9 @@ def score(self, X, y=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
+        sample_weight : array-like of shape = (n_samples), optional
+            Sample weights.
+
         Returns
         -------
         score : float
@@ -436,7 +448,9 @@ def score(self, X, y=None):
                              "and the estimator doesn't provide one %s"
                              % self.best_estimator_)
         score = self.scorer_[self.refit] if self.multimetric_ else self.scorer_
-        return score(self.best_estimator_, X, y)
+        score_kwgs = {} if sample_weight is None else \
+            {'sample_weight': sample_weight}
+        return score(self.best_estimator_, X, y, **score_kwgs)
 
     def _check_is_fitted(self, method_name):
         if not self.refit:
@@ -636,7 +650,8 @@ def fit(self, X, y=None, groups=None, **fit_params):
                                   return_train_score=self.return_train_score,
                                   return_n_test_samples=True,
                                   return_times=True, return_parameters=False,
-                                  error_score=self.error_score)
+                                  error_score=self.error_score,
+                                  test_score_weight=self.test_score_weight)
           for parameters, (train, test) in product(candidate_params,
                                                    cv.split(X, y, groups)))
 
@@ -1088,12 +1103,13 @@ class GridSearchCV(BaseSearchCV):
     def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
                  n_jobs=1, iid='warn', refit=True, cv=None, verbose=0,
                  pre_dispatch='2*n_jobs', error_score='raise-deprecating',
-                 return_train_score="warn"):
+                 return_train_score="warn", test_score_weight=None):
         super(GridSearchCV, self).__init__(
             estimator=estimator, scoring=scoring, fit_params=fit_params,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
-            return_train_score=return_train_score)
+            return_train_score=return_train_score,
+            test_score_weight=test_score_weight)
         self.param_grid = param_grid
         _check_param_grid(param_grid)
 
@@ -1389,15 +1405,17 @@ class RandomizedSearchCV(BaseSearchCV):
     def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
                  fit_params=None, n_jobs=1, iid='warn', refit=True, cv=None,
                  verbose=0, pre_dispatch='2*n_jobs', random_state=None,
-                 error_score='raise-deprecating', return_train_score="warn"):
+                 error_score='raise-deprecating', return_train_score="warn",
+                 test_score_weight=None):
         self.param_distributions = param_distributions
         self.n_iter = n_iter
         self.random_state = random_state
         super(RandomizedSearchCV, self).__init__(
             estimator=estimator, scoring=scoring, fit_params=fit_params,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
-            return_train_score=return_train_score)
+            return_train_score=return_train_score,
+            test_score_weight=test_score_weight)
 
     def _get_param_iterator(self):
         """Return ParameterSampler instance for the given distributions"""

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
@@ -361,7 +361,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                    parameters, fit_params, return_train_score=False,
                    return_parameters=False, return_n_test_samples=False,
                    return_times=False, return_estimator=False,
-                   error_score='raise-deprecating'):
+                   error_score='raise-deprecating', test_score_weight=None):
     """Fit estimator and compute scores for a given dataset split.
 
     Parameters
@@ -402,6 +402,10 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
         step, which will always raise the error. Default is 'raise' but from
         version 0.22 it will change to np.nan.
 
+    test_score_weight : string, optional, default: None
+        If specified, this is the sample weight key in the `fit_params` dict.
+        If there's no such key found, we will assume unweighted test score.
+
     parameters : dict or None
         Parameters to be set on the estimator.
 
@@ -455,8 +459,26 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                           for k, v in parameters.items()))
         print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))
 
-    # Adjust length of sample weights
+    # Init to empty dict if pass in as None
     fit_params = fit_params if fit_params is not None else {}
+
+    sample_weight_key, weighted_test_score = None, False
+
+    if test_score_weight is not None and test_score_weight in fit_params:
+        sample_weight_key, weighted_test_score = test_score_weight, True
+
+    if weighted_test_score:
+        train_sample_weight = _index_param_value(X,
+                                                 fit_params[sample_weight_key],
+                                                 train)
+        test_sample_weight = _index_param_value(X,
+                                                fit_params[sample_weight_key],
+                                                test)
+    else:
+        train_sample_weight = None
+        test_sample_weight = None
+
+    # Adjust length of sample weights
     fit_params = dict([(k, _index_param_value(X, v, train))
                       for k, v in fit_params.items()])
 
@@ -516,11 +538,13 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
     else:
         fit_time = time.time() - start_time
         # _score will return dict if is_multimetric is True
-        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
+        test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric,
+                             sample_weight=test_sample_weight)
         score_time = time.time() - start_time - fit_time
         if return_train_score:
             train_scores = _score(estimator, X_train, y_train, scorer,
-                                  is_multimetric)
+                                  is_multimetric,
+                                  sample_weight=train_sample_weight)
 
     if verbose > 2:
         if is_multimetric:
@@ -546,19 +570,23 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
     return ret
 
 
-def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
+def _score(estimator, X_test, y_test, scorer, is_multimetric=False,
+           sample_weight=None):
     """Compute the score(s) of an estimator on a given test set.
 
     Will return a single float if is_multimetric is False and a dict of floats,
     if is_multimetric is True
     """
     if is_multimetric:
-        return _multimetric_score(estimator, X_test, y_test, scorer)
+        return _multimetric_score(estimator, X_test, y_test, scorer,
+                                  sample_weight=sample_weight)
     else:
+        score_kwgs = {} if sample_weight is None else \
+            {'sample_weight': sample_weight}
         if y_test is None:
-            score = scorer(estimator, X_test)
+            score = scorer(estimator, X_test, **score_kwgs)
         else:
-            score = scorer(estimator, X_test, y_test)
+            score = scorer(estimator, X_test, y_test, **score_kwgs)
 
         if hasattr(score, 'item'):
             try:
@@ -575,15 +603,17 @@ def _score(estimator, X_test, y_test, scorer, is_multimetric=False):
     return score
 
 
-def _multimetric_score(estimator, X_test, y_test, scorers):
+def _multimetric_score(estimator, X_test, y_test, scorers,
+                       sample_weight=None):
     """Return a dict of score for multimetric scoring"""
     scores = {}
-
+    score_kwgs = {} if sample_weight is None else \
+        {'sample_weight': sample_weight}
     for name, scorer in scorers.items():
         if y_test is None:
-            score = scorer(estimator, X_test)
+            score = scorer(estimator, X_test, **score_kwgs)
         else:
-            score = scorer(estimator, X_test, y_test)
+            score = scorer(estimator, X_test, y_test, **score_kwgs)
 
         if hasattr(score, 'item'):
             try: