From e313f81c93ac1d6d913ea7cad0827d9f08eea9c3 Mon Sep 17 00:00:00 2001
From: Noel Dawe <noel.dawe@gmail.com>
Date: Sun, 6 Apr 2014 15:08:37 -0700
Subject: [PATCH 1/5] grid_search: add sample_weight support

---
 sklearn/cross_validation.py            | 31 ++++++++++++----
 sklearn/grid_search.py                 | 49 ++++++++++++++++++--------
 sklearn/tests/test_cross_validation.py |  8 ++---
 sklearn/tests/test_grid_search.py      | 27 ++++++++++++--
 4 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index ebcf4f934f043..bb0d7cc85c226 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1150,7 +1150,8 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     return np.array(scores)[:, 0]
 
 
-def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
+def _fit_and_score(estimator, X, y, sample_weight,
+                   scorer, train, test, verbose, parameters,
                    fit_params, return_train_score=False,
                    return_parameters=False):
     """Fit estimator and compute scores for a given dataset split.
@@ -1163,10 +1164,13 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
     X : array-like of shape at least 2D
         The data to fit.
 
-    y : array-like, optional, default: None
+    y : array-like or None
         The target variable to try to predict in the case of
         supervised learning.
 
+    sample_weight : array-like or None
+        Sample weights.
+
     scoring : callable
         A scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
@@ -1231,13 +1235,26 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
 
     X_train, y_train = _safe_split(estimator, X, y, train)
     X_test, y_test = _safe_split(estimator, X, y, test, train)
+
+    test_score_params = dict()
+    train_score_params = dict()
+    if sample_weight is not None:
+        # move to _safe_split?
+        sample_weight_train = sample_weight[safe_mask(sample_weight, train)]
+        sample_weight_test = sample_weight[safe_mask(sample_weight, test)]
+        fit_params['sample_weight'] = sample_weight_train
+        test_score_params['sample_weight'] = sample_weight_test
+        train_score_params['sample_weight'] = sample_weight_train
+
     if y_train is None:
         estimator.fit(X_train, **fit_params)
     else:
         estimator.fit(X_train, y_train, **fit_params)
-    test_score = _score(estimator, X_test, y_test, scorer)
+    test_score = _score(estimator, X_test, y_test, scorer,
+                        **test_score_params)
     if return_train_score:
-        train_score = _score(estimator, X_train, y_train, scorer)
+        train_score = _score(estimator, X_train, y_train, scorer,
+                             **train_score_params)
 
     scoring_time = time.time() - start_time
 
@@ -1286,12 +1303,12 @@ def _safe_split(estimator, X, y, indices, train_indices=None):
     return X_subset, y_subset
 
 
-def _score(estimator, X_test, y_test, scorer):
+def _score(estimator, X_test, y_test, scorer, **params):
     """Compute the score of an estimator on a given test set."""
     if y_test is None:
-        score = scorer(estimator, X_test)
+        score = scorer(estimator, X_test, **params)
     else:
-        score = scorer(estimator, X_test, y_test)
+        score = scorer(estimator, X_test, y_test, **params)
     if not isinstance(score, numbers.Number):
         raise ValueError("scoring must return a number, got %s (%s) instead."
                          % (str(score), type(score)))
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 280dbb32b1e54..65c32fef6109b 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -8,6 +8,7 @@
 #         Gael Varoquaux <gael.varoquaux@normalesup.org>
 #         Andreas Mueller <amueller@ais.uni-bonn.de>
 #         Olivier Grisel <olivier.grisel@ensta.org>
+#         Noel Dawe <noel@dawe.me>
 # License: BSD 3 clause
 
 from abc import ABCMeta, abstractmethod
@@ -226,7 +227,8 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
     n_samples_test : int
         Number of test samples in this split.
     """
-    score, n_samples_test, _ = _fit_and_score(estimator, X, y, scorer, train,
+    score, n_samples_test, _ = _fit_and_score(estimator, X, y, None,
+                                              scorer, train,
                                               test, verbose, parameters,
                                               fit_params)
     return score, parameters, n_samples_test
@@ -291,7 +293,7 @@ def __init__(self, estimator, scoring=None,
         self.verbose = verbose
         self.pre_dispatch = pre_dispatch
 
-    def score(self, X, y=None):
+    def score(self, X, y=None, sample_weight=None):
         """Returns the score on the given test data and labels, if the search
         estimator has been refit. The ``score`` function of the best estimator
         is used, or the ``scoring`` parameter where unavailable.
@@ -306,18 +308,24 @@ def score(self, X, y=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
         Returns
         -------
         score : float
 
         """
+        kwargs = {}
+        if sample_weight is not None:
+            kwargs['sample_weight'] = sample_weight
         if hasattr(self.best_estimator_, 'score'):
-            return self.best_estimator_.score(X, y)
+            return self.best_estimator_.score(X, y, **kwargs)
         if self.scorer_ is None:
             raise ValueError("No score function explicitly defined, "
                              "and the estimator doesn't provide one %s"
                              % self.best_estimator_)
-        return self.scorer_(self.best_estimator_, X, y)
+        return self.scorer_(self.best_estimator_, X, y, **kwargs)
 
     @property
     def predict(self):
@@ -335,7 +343,7 @@ def decision_function(self):
     def transform(self):
         return self.best_estimator_.transform
 
-    def _fit(self, X, y, parameter_iterable):
+    def _fit(self, X, y, sample_weight, parameter_iterable):
         """Actual fitting,  performing the search over parameters."""
 
         estimator = self.estimator
@@ -343,13 +351,14 @@ def _fit(self, X, y, parameter_iterable):
         self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
 
         n_samples = _num_samples(X)
-        X, y = indexable(X, y)
+        X, y, sample_weight = indexable(X, y, sample_weight)
 
         if y is not None:
             if len(y) != n_samples:
                 raise ValueError('Target variable (y) has a different number '
                                  'of samples (%i) than data (X: %i samples)'
                                  % (len(y), n_samples))
+
         cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
 
         if self.verbose > 0:
@@ -367,9 +376,10 @@ def _fit(self, X, y, parameter_iterable):
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=pre_dispatch
         )(
-            delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
-                                    train, test, self.verbose, parameters,
-                                    self.fit_params, return_parameters=True)
+            delayed(_fit_and_score)(clone(base_estimator), X, y, sample_weight,
+                                    self.scorer_, train, test,
+                                    self.verbose, parameters, self.fit_params,
+                                    return_parameters=True)
             for parameters in parameter_iterable
             for train, test in cv)
 
@@ -411,14 +421,18 @@ def _fit(self, X, y, parameter_iterable):
         self.best_score_ = best.mean_validation_score
 
         if self.refit:
+            fit_params = self.fit_params
+            if sample_weight is not None:
+                fit_params = fit_params.copy()
+                fit_params['sample_weight'] = sample_weight
             # fit the best estimator using the entire dataset
             # clone first to work around broken estimators
             best_estimator = clone(base_estimator).set_params(
                 **best.parameters)
             if y is not None:
-                best_estimator.fit(X, y, **self.fit_params)
+                best_estimator.fit(X, y, **fit_params)
             else:
-                best_estimator.fit(X, **self.fit_params)
+                best_estimator.fit(X, **fit_params)
             self.best_estimator_ = best_estimator
         return self
 
@@ -573,7 +587,7 @@ def __init__(self, estimator, param_grid, scoring=None,
         self.param_grid = param_grid
         _check_param_grid(param_grid)
 
-    def fit(self, X, y=None):
+    def fit(self, X, y=None, sample_weight=None):
         """Run fit with all sets of parameters.
 
         Parameters
@@ -587,8 +601,10 @@ def fit(self, X, y=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
         """
-        return self._fit(X, y, ParameterGrid(self.param_grid))
+        return self._fit(X, y, sample_weight, ParameterGrid(self.param_grid))
 
 
 class RandomizedSearchCV(BaseSearchCV):
@@ -724,7 +740,7 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch)
 
-    def fit(self, X, y=None):
+    def fit(self, X, y=None, sample_weight=None):
         """Run fit on the estimator with randomly drawn parameters.
 
         Parameters
@@ -737,8 +753,11 @@ def fit(self, X, y=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
         """
         sampled_params = ParameterSampler(self.param_distributions,
                                           self.n_iter,
                                           random_state=self.random_state)
-        return self._fit(X, y, sampled_params)
+        return self._fit(X, y, sample_weight, sampled_params)
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index e3af30a1b2bae..a495871290ce4 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -914,12 +914,12 @@ def test_safe_split_with_precomputed_kernel():
     cv = cval.ShuffleSplit(X.shape[0], test_size=0.25, random_state=0)
     tr, te = list(cv)[0]
 
-    X_tr, y_tr = cval._safe_split(clf, X, y, tr)
-    K_tr, y_tr2 = cval._safe_split(clfp, K, y, tr)
+    X_tr, y_tr, _ = cval._safe_split(clf, X, y, None, tr)
+    K_tr, y_tr2, _ = cval._safe_split(clfp, K, y, None, tr)
     assert_array_almost_equal(K_tr, np.dot(X_tr, X_tr.T))
 
-    X_te, y_te = cval._safe_split(clf, X, y, te, tr)
-    K_te, y_te2 = cval._safe_split(clfp, K, y, te, tr)
+    X_te, y_te, _ = cval._safe_split(clf, X, y, None, te, tr)
+    K_te, y_te2, _ = cval._safe_split(clfp, K, y, None, te, tr)
     assert_array_almost_equal(K_te, np.dot(X_te, X_tr.T))
 
 
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index 02183d18cd2fc..b1573abbb4fb7 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -51,8 +51,13 @@ class MockClassifier(object):
     def __init__(self, foo_param=0):
         self.foo_param = foo_param
 
-    def fit(self, X, Y):
+    def fit(self, X, Y, sample_weight=None):
         assert_true(len(X) == len(Y))
+        if sample_weight is not None:
+            assert_true(len(sample_weight) == len(X),
+                        'MockClassifier sample_weight.shape[0]'
+                        ' is {0}, should be {1}'.format(len(sample_weight),
+                                                        len(X)))
         return self
 
     def predict(self, T):
@@ -62,7 +67,12 @@ def predict(self, T):
     decision_function = predict
     transform = predict
 
-    def score(self, X=None, Y=None):
+    def score(self, X=None, Y=None, sample_weight=None):
+        if X is not None and sample_weight is not None:
+            assert_true(len(sample_weight) == len(X),
+                        'MockClassifier sample_weight.shape[0]'
+                        ' is {0}, should be {1}'.format(len(sample_weight),
+                                                        len(X)))
         if self.foo_param > 1:
             score = 1.
         else:
@@ -85,6 +95,7 @@ def score(self):
 
 X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
 y = np.array([1, 1, 2, 2])
+sample_weight = np.array([1, 2, 3, 4])
 
 
 def test_parameter_grid():
@@ -638,3 +649,15 @@ def test_grid_search_allows_nans():
         ('classifier', MockClassifier()),
     ])
     GridSearchCV(p, {'classifier__foo_param': [1, 2, 3]}, cv=2).fit(X, y)
+
+
+def test_grid_search_with_sample_weights():
+    """Test grid searching with sample weights"""
+    est_parameters = {"foo_param": [1, 2, 3]}
+    cv = KFold(y.shape[0], n_folds=2, random_state=0)
+    for search_cls in (GridSearchCV, RandomizedSearchCV):
+        grid_search = search_cls(MockClassifier(), est_parameters, cv=cv)
+        grid_search.fit(X, y, sample_weight=sample_weight)
+        # check that sample_weight can be a list
+        grid_search = GridSearchCV(MockClassifier(), est_parameters, cv=cv)
+        grid_search.fit(X, y, sample_weight=sample_weight.tolist())

From 5816618981e334fd58fc49b44e1f5e054f137f59 Mon Sep 17 00:00:00 2001
From: Noel Dawe <noel.dawe@gmail.com>
Date: Mon, 21 Apr 2014 18:53:08 -0700
Subject: [PATCH 2/5] cross_validation: add sample_weight support

---
 sklearn/cross_validation.py            | 34 +++++++++++++++++---------
 sklearn/tests/test_cross_validation.py | 16 ++++++++----
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index bb0d7cc85c226..64ed338ffdc2b 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1077,7 +1077,8 @@ def __len__(self):
 
 
 def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
-                    verbose=0, fit_params=None, pre_dispatch='2*n_jobs'):
+                    verbose=0, fit_params=None, pre_dispatch='2*n_jobs',
+                    sample_weight=None):
     """Evaluate a score by cross-validation
 
     Parameters
@@ -1092,6 +1093,9 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
         The target variable to try to predict in the case of
         supervised learning.
 
+    sample_weight : array-like, optional, default: None
+        Sample weights.
+
     scoring : string, callable or None, optional, default: None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
@@ -1135,7 +1139,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     scores : array of float, shape=(len(list(cv)),)
         Array of scores of the estimator for each run of the cross validation.
     """
-    X, y = indexable(X, y)
+    X, y, sample_weight = indexable(X, y, sample_weight)
 
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
     scorer = check_scoring(estimator, scoring=scoring)
@@ -1143,7 +1147,8 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
-    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
+    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y,
+                                              sample_weight, scorer,
                                               train, test, verbose, None,
                                               fit_params)
                       for train, test in cv)
@@ -1233,15 +1238,15 @@ def _fit_and_score(estimator, X, y, sample_weight,
 
     start_time = time.time()
 
-    X_train, y_train = _safe_split(estimator, X, y, train)
-    X_test, y_test = _safe_split(estimator, X, y, test, train)
+    X_train, y_train, sample_weight_train = _safe_split(
+        estimator, X, y, sample_weight, train)
+    X_test, y_test, sample_weight_test = _safe_split(
+        estimator, X, y, sample_weight, test, train)
 
-    test_score_params = dict()
-    train_score_params = dict()
+    test_score_params = {}
+    train_score_params = {}
     if sample_weight is not None:
-        # move to _safe_split?
-        sample_weight_train = sample_weight[safe_mask(sample_weight, train)]
-        sample_weight_test = sample_weight[safe_mask(sample_weight, test)]
+        fit_params = fit_params.copy()
         fit_params['sample_weight'] = sample_weight_train
         test_score_params['sample_weight'] = sample_weight_test
         train_score_params['sample_weight'] = sample_weight_train
@@ -1271,7 +1276,7 @@ def _fit_and_score(estimator, X, y, sample_weight,
     return ret
 
 
-def _safe_split(estimator, X, y, indices, train_indices=None):
+def _safe_split(estimator, X, y, sample_weight, indices, train_indices=None):
     """Create subset of dataset and properly handle kernels."""
     if hasattr(estimator, 'kernel') and callable(estimator.kernel):
         # cannot compute the kernel values with custom function
@@ -1300,7 +1305,12 @@ def _safe_split(estimator, X, y, indices, train_indices=None):
     else:
         y_subset = None
 
-    return X_subset, y_subset
+    if sample_weight is not None:
+        sample_weight_subset = np.asarray(sample_weight)[indices]
+    else:
+        sample_weight_subset = None
+
+    return X_subset, y_subset, sample_weight_subset
 
 
 def _score(estimator, X_test, y_test, scorer, **params):
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index a495871290ce4..bdb791ad080f7 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -54,9 +54,9 @@ def fit(self, X, Y=None, sample_weight=None, class_prior=None):
         if X.ndim >= 3 and not self.allow_nd:
             raise ValueError('X cannot be d')
         if sample_weight is not None:
-            assert_true(sample_weight.shape[0] == X.shape[0],
+            assert_true(len(sample_weight) == X.shape[0],
                         'MockClassifier extra fit_param sample_weight.shape[0]'
-                        ' is {0}, should be {1}'.format(sample_weight.shape[0],
+                        ' is {0}, should be {1}'.format(len(sample_weight),
                                                         X.shape[0]))
         if class_prior is not None:
             assert_true(class_prior.shape[0] == len(np.unique(y)),
@@ -70,13 +70,15 @@ def predict(self, T):
             T = T.reshape(len(T), -1)
         return T.shape[0]
 
-    def score(self, X=None, Y=None):
+    def score(self, X=None, Y=None, sample_weight=None):
         return 1. / (1 + np.abs(self.a))
 
 
 X = np.ones((10, 2))
 X_sparse = coo_matrix(X)
 y = np.arange(10) // 2
+rng = np.random.RandomState(0)
+int_weights = rng.randint(10, size=y.shape)
 
 ##############################################################################
 # Tests
@@ -466,8 +468,8 @@ def test_cross_val_score():
     for a in range(-10, 10):
         clf.a = a
         # Smoke test
-        scores = cval.cross_val_score(clf, X, y)
-        assert_array_equal(scores, clf.score(X, y))
+        scores = cval.cross_val_score(clf, X, y, sample_weight=int_weights)
+        assert_array_equal(scores, clf.score(X, y, sample_weight=int_weights))
 
         # test with multioutput y
         scores = cval.cross_val_score(clf, X_sparse, X)
@@ -480,6 +482,10 @@ def test_cross_val_score():
         scores = cval.cross_val_score(clf, X_sparse, X)
         assert_array_equal(scores, clf.score(X_sparse, X))
 
+    # test with sample_weight as list
+    scores = cval.cross_val_score(
+        clf, X, y, sample_weight=int_weights.tolist())
+
     # test with X and y as list
     list_check = lambda x: isinstance(x, list)
     clf = CheckingClassifier(check_X=list_check)

From 1e5c53687bfa9e0cc5b6ea7ba1b94b6ad60b0dbf Mon Sep 17 00:00:00 2001
From: Noel Dawe <noel.dawe@gmail.com>
Date: Mon, 21 Apr 2014 18:31:40 -0700
Subject: [PATCH 3/5] rfe: sample_weight support

---
 sklearn/feature_selection/rfe.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index 86c56e1f3264a..c3fd283b1ca34 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -306,7 +306,7 @@ def __init__(self, estimator, step=1, cv=None, scoring=None,
         self.estimator_params = estimator_params
         self.verbose = verbose
 
-    def fit(self, X, y):
+    def fit(self, X, y, sample_weight=None):
         """Fit the RFE model and automatically tune the number of selected
            features.
 
@@ -319,6 +319,9 @@ def fit(self, X, y):
         y : array-like, shape = [n_samples]
             Target values (integers for classification, real numbers for
             regression).
+
+        sample_weight : array-like, shape = [n_samples], optional (default=None)
+            Sample weights.
         """
         X, y = check_X_y(X, y, "csr")
         # Initialization
@@ -332,17 +335,26 @@ def fit(self, X, y):
 
         # Cross-validation
         for n, (train, test) in enumerate(cv):
-            X_train, y_train = _safe_split(self.estimator, X, y, train)
-            X_test, y_test = _safe_split(self.estimator, X, y, test, train)
+            X_train, y_train, sample_weight_train = _safe_split(
+                self.estimator, X, y, sample_weight, train)
+            X_test, y_test, sample_weight_test = _safe_split(
+                self.estimator, X, y, sample_weight, test, train)
+
+            fit_params = dict()
+            score_params = dict()
+            if sample_weight is not None:
+                fit_params['sample_weight'] = sample_weight_train
+                score_params['sample_weight'] = sample_weight_test
 
             # Compute a full ranking of the features
-            ranking_ = rfe.fit(X_train, y_train).ranking_
+            ranking_ = rfe.fit(X_train, y_train, **fit_params).ranking_
             # Score each subset of features
             for k in range(0, max(ranking_)):
                 mask = np.where(ranking_ <= k + 1)[0]
                 estimator = clone(self.estimator)
-                estimator.fit(X_train[:, mask], y_train)
-                score = _score(estimator, X_test[:, mask], y_test, scorer)
+                estimator.fit(X_train[:, mask], y_train, **fit_params)
+                score = _score(
+                    estimator, X_test[:, mask], y_test, scorer, **score_params)
 
                 if self.verbose > 0:
                     print("Finished fold with %d / %d feature ranks, score=%f"
@@ -358,7 +370,10 @@ def fit(self, X, y):
                   n_features_to_select=k+1,
                   step=self.step, estimator_params=self.estimator_params)
 
-        rfe.fit(X, y)
+        if sample_weight is not None:
+            rfe.fit(X, y, sample_weight=sample_weight)
+        else:
+            rfe.fit(X, y)
 
         # Set final attributes
         self.support_ = rfe.support_

From 7612a1fe5b19a70d116959c79692b5b37366ac98 Mon Sep 17 00:00:00 2001
From: Noel Dawe <noel.dawe@gmail.com>
Date: Mon, 21 Apr 2014 18:32:01 -0700
Subject: [PATCH 4/5] learning_curve: sample_weight support

---
 sklearn/learning_curve.py | 55 ++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 55c4cf6547d86..835a4ec9ac398 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -17,7 +17,8 @@
 from .utils.fixes import astype
 
 
-def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 5),
+def learning_curve(estimator, X, y, sample_weight=None,
+                   train_sizes=np.linspace(0.1, 1.0, 10),
                    cv=None, scoring=None, exploit_incremental_learning=False,
                    n_jobs=1, pre_dispatch="all", verbose=0):
     """Learning curve.
@@ -44,6 +45,9 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 5),
         Target relative to X for classification or regression;
         None for unsupervised learning.
 
+    sample_weight : array-like, shape (n_samples), optional
+        Sample weights.
+
     train_sizes : array-like, shape (n_ticks,), dtype float or int
         Relative or absolute numbers of training examples that will be used to
         generate the learning curve. If the dtype is float, it is regarded as a
@@ -128,11 +132,13 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 5),
     if exploit_incremental_learning:
         classes = np.unique(y) if is_classifier(estimator) else None
         out = parallel(delayed(_incremental_fit_estimator)(
-            clone(estimator), X, y, classes, train, test, train_sizes_abs,
+            clone(estimator), X, y, sample_weight,
+            classes, train, test, train_sizes_abs,
             scorer, verbose) for train, test in cv)
     else:
         out = parallel(delayed(_fit_and_score)(
-            clone(estimator), X, y, scorer, train[:n_train_samples], test,
+            clone(estimator), X, y, sample_weight,
+            scorer, train[:n_train_samples], test,
             verbose, parameters=None, fit_params=None, return_train_score=True)
             for train, test in cv for n_train_samples in train_sizes_abs)
         out = np.array(out)[:, :2]
@@ -203,29 +209,45 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
     return train_sizes_abs
 
 
-def _incremental_fit_estimator(estimator, X, y, classes, train, test,
+def _incremental_fit_estimator(estimator, X, y, sample_weight,
+                               classes, train, test,
                                train_sizes, scorer, verbose):
     """Train estimator on training subsets incrementally and compute scores."""
     train_scores, test_scores = [], []
     partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])
     for n_train_samples, partial_train in partitions:
         train_subset = train[:n_train_samples]
-        X_train, y_train = _safe_split(estimator, X, y, train_subset)
-        X_partial_train, y_partial_train = _safe_split(estimator, X, y,
-                                                       partial_train)
-        X_test, y_test = _safe_split(estimator, X, y, test, train_subset)
+        X_train, y_train, sample_weight_train = _safe_split(
+            estimator, X, y, sample_weight, train_subset)
+        X_partial_train, y_partial_train, sample_weight_partial_train = \
+            _safe_split(estimator, X, y, sample_weight, partial_train)
+        X_test, y_test, sample_weight_test = _safe_split(
+            estimator, X, y, sample_weight, test, train_subset)
+
+        fit_params = dict()
+        train_score_params = dict()
+        test_score_params = dict()
+        if sample_weight is not None:
+            fit_params['sample_weight'] = sample_weight_partial_train
+            train_score_params['sample_weight'] = sample_weight_train
+            test_score_params['sample_weight'] = sample_weight_test
+
         if y_partial_train is None:
-            estimator.partial_fit(X_partial_train, classes=classes)
+            estimator.partial_fit(X_partial_train,
+                                  classes=classes, **fit_params)
         else:
             estimator.partial_fit(X_partial_train, y_partial_train,
-                                  classes=classes)
-        train_scores.append(_score(estimator, X_train, y_train, scorer))
-        test_scores.append(_score(estimator, X_test, y_test, scorer))
+                                  classes=classes, **fit_params)
+        train_scores.append(_score(
+            estimator, X_train, y_train, scorer, **train_score_params))
+        test_scores.append(_score(
+            estimator, X_test, y_test, scorer, **test_score_params))
     return np.array((train_scores, test_scores)).T
 
 
-def validation_curve(estimator, X, y, param_name, param_range, cv=None,
-                     scoring=None, n_jobs=1, pre_dispatch="all", verbose=0):
+def validation_curve(estimator, X, y, param_name, param_range,
+                     sample_weight=None, cv=None, scoring=None,
+                     n_jobs=1, pre_dispatch="all", verbose=0):
     """Validation curve.
 
     Determine training and test scores for varying parameter values.
@@ -254,6 +276,9 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
     param_range : array-like, shape (n_values,)
         The values of the parameter that will be evaluated.
 
+    sample_weight : array-like, shape (n_samples,), optional
+        Sample weights.
+
     cv : integer, cross-validation generator, optional
         If an integer is passed, it is the number of folds (defaults to 3).
         Specific cross-validation objects can be passed, see
@@ -296,7 +321,7 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     out = parallel(delayed(_fit_and_score)(
-        estimator, X, y, scorer, train, test, verbose,
+        estimator, X, y, sample_weight, scorer, train, test, verbose,
         parameters={param_name: v}, fit_params=None, return_train_score=True)
         for train, test in cv for v in param_range)
 

From f1f6a3cb9beab85e1f108a463f8a9fea6af205f6 Mon Sep 17 00:00:00 2001
From: Vlad Niculae <vlad@vene.ro>
Date: Fri, 1 Aug 2014 16:11:30 +0200
Subject: [PATCH 5/5] Refactor sample_weights as generic scorer_params

---
 sklearn/cross_validation.py            | 67 ++++++++++++--------------
 sklearn/feature_selection/rfe.py       | 13 ++---
 sklearn/grid_search.py                 | 52 ++++++++------------
 sklearn/learning_curve.py              | 48 ++++++++++++------
 sklearn/tests/test_cross_validation.py | 15 +++---
 sklearn/tests/test_grid_search.py      | 12 +++--
 sklearn/tests/test_learning_curve.py   |  4 +-
 7 files changed, 110 insertions(+), 101 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 64ed338ffdc2b..22e370de35577 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1078,7 +1078,7 @@ def __len__(self):
 
 def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                     verbose=0, fit_params=None, pre_dispatch='2*n_jobs',
-                    sample_weight=None):
+                    scorer_params=None):
     """Evaluate a score by cross-validation
 
     Parameters
@@ -1093,9 +1093,6 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
         The target variable to try to predict in the case of
         supervised learning.
 
-    sample_weight : array-like, optional, default: None
-        Sample weights.
-
     scoring : string, callable or None, optional, default: None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
@@ -1134,12 +1131,16 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
+    scorer_params : dict, optional
+        Parameters to pass to the scorer.  Can be used for sample weights
+        and sample groups.
+
     Returns
     -------
     scores : array of float, shape=(len(list(cv)),)
         Array of scores of the estimator for each run of the cross validation.
     """
-    X, y, sample_weight = indexable(X, y, sample_weight)
+    X, y = indexable(X, y)
 
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
     scorer = check_scoring(estimator, scoring=scoring)
@@ -1148,16 +1149,14 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
     scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y,
-                                              sample_weight, scorer,
-                                              train, test, verbose, None,
-                                              fit_params)
+                                              scorer, train, test, verbose,
+                                              None, fit_params, scorer_params)
                       for train, test in cv)
     return np.array(scores)[:, 0]
 
 
-def _fit_and_score(estimator, X, y, sample_weight,
-                   scorer, train, test, verbose, parameters,
-                   fit_params, return_train_score=False,
+def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
+                   fit_params, scorer_params, return_train_score=False,
                    return_parameters=False):
     """Fit estimator and compute scores for a given dataset split.
 
@@ -1173,9 +1172,6 @@ def _fit_and_score(estimator, X, y, sample_weight,
         The target variable to try to predict in the case of
         supervised learning.
 
-    sample_weight : array-like or None
-        Sample weights.
-
     scoring : callable
         A scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
@@ -1195,6 +1191,9 @@ def _fit_and_score(estimator, X, y, sample_weight,
     fit_params : dict or None
         Parameters that will be passed to ``estimator.fit``.
 
+    scorer_params : dict or None
+        Parameters that will be passed to the scorer.
+
     return_train_score : boolean, optional, default: False
         Compute and return score on training set.
 
@@ -1233,33 +1232,36 @@ def _fit_and_score(estimator, X, y, sample_weight,
                        if hasattr(v, '__len__') and len(v) == n_samples else v)
                        for k, v in fit_params.items()])
 
+    # Same, but take both slices
+    scorer_params = scorer_params if scorer_params is not None else {}
+    train_scorer_params = dict([(k, np.asarray(v)[train]
+                                 if hasattr(v, '__len__')
+                                 and len(v) == n_samples
+                                 else v)
+                                for k, v in scorer_params.items()])
+    test_scorer_params = dict([(k, np.asarray(v)[test]
+                                if hasattr(v, '__len__')
+                                and len(v) == n_samples
+                                else v)
+                               for k, v in scorer_params.items()])
+
     if parameters is not None:
         estimator.set_params(**parameters)
 
     start_time = time.time()
 
-    X_train, y_train, sample_weight_train = _safe_split(
-        estimator, X, y, sample_weight, train)
-    X_test, y_test, sample_weight_test = _safe_split(
-        estimator, X, y, sample_weight, test, train)
-
-    test_score_params = {}
-    train_score_params = {}
-    if sample_weight is not None:
-        fit_params = fit_params.copy()
-        fit_params['sample_weight'] = sample_weight_train
-        test_score_params['sample_weight'] = sample_weight_test
-        train_score_params['sample_weight'] = sample_weight_train
+    X_train, y_train = _safe_split(estimator, X, y, train)
+    X_test, y_test = _safe_split(estimator, X, y, test, train)
 
     if y_train is None:
         estimator.fit(X_train, **fit_params)
     else:
         estimator.fit(X_train, y_train, **fit_params)
     test_score = _score(estimator, X_test, y_test, scorer,
-                        **test_score_params)
+                        **test_scorer_params)
     if return_train_score:
         train_score = _score(estimator, X_train, y_train, scorer,
-                             **train_score_params)
+                             **train_scorer_params)
 
     scoring_time = time.time() - start_time
 
@@ -1276,7 +1278,7 @@ def _fit_and_score(estimator, X, y, sample_weight,
     return ret
 
 
-def _safe_split(estimator, X, y, sample_weight, indices, train_indices=None):
+def _safe_split(estimator, X, y, indices, train_indices=None):
     """Create subset of dataset and properly handle kernels."""
     if hasattr(estimator, 'kernel') and callable(estimator.kernel):
         # cannot compute the kernel values with custom function
@@ -1305,12 +1307,7 @@ def _safe_split(estimator, X, y, sample_weight, indices, train_indices=None):
     else:
         y_subset = None
 
-    if sample_weight is not None:
-        sample_weight_subset = np.asarray(sample_weight)[indices]
-    else:
-        sample_weight_subset = None
-
-    return X_subset, y_subset, sample_weight_subset
+    return X_subset, y_subset
 
 
 def _score(estimator, X_test, y_test, scorer, **params):
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index c3fd283b1ca34..abe3caf370a26 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -335,16 +335,17 @@ def fit(self, X, y, sample_weight=None):
 
         # Cross-validation
         for n, (train, test) in enumerate(cv):
-            X_train, y_train, sample_weight_train = _safe_split(
-                self.estimator, X, y, sample_weight, train)
-            X_test, y_test, sample_weight_test = _safe_split(
-                self.estimator, X, y, sample_weight, test, train)
+            X_train, y_train = _safe_split(
+                self.estimator, X, y, train)
+            X_test, y_test  = _safe_split(
+                self.estimator, X, y, test, train)
 
             fit_params = dict()
             score_params = dict()
             if sample_weight is not None:
-                fit_params['sample_weight'] = sample_weight_train
-                score_params['sample_weight'] = sample_weight_test
+                sample_weight = np.asarray(sample_weight)
+                fit_params['sample_weight'] = sample_weight[train]
+                score_params['sample_weight'] = sample_weight[test]
 
             # Compute a full ranking of the features
             ranking_ = rfe.fit(X_train, y_train, **fit_params).ranking_
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 65c32fef6109b..4d7cb28816762 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -281,7 +281,8 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
     @abstractmethod
     def __init__(self, estimator, scoring=None,
                  fit_params=None, n_jobs=1, iid=True,
-                 refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs'):
+                 refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs',
+                 scorer_params=None):
 
         self.scoring = scoring
         self.estimator = estimator
@@ -292,8 +293,9 @@ def __init__(self, estimator, scoring=None,
         self.cv = cv
         self.verbose = verbose
         self.pre_dispatch = pre_dispatch
+        self.scorer_params = scorer_params
 
-    def score(self, X, y=None, sample_weight=None):
+    def score(self, X, y=None, **scorer_params):
         """Returns the score on the given test data and labels, if the search
         estimator has been refit. The ``score`` function of the best estimator
         is used, or the ``scoring`` parameter where unavailable.
@@ -308,24 +310,18 @@ def score(self, X, y=None, sample_weight=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
-        sample_weight : array-like, shape = [n_samples], optional
-            Sample weights.
-
         Returns
         -------
         score : float
 
         """
-        kwargs = {}
-        if sample_weight is not None:
-            kwargs['sample_weight'] = sample_weight
         if hasattr(self.best_estimator_, 'score'):
-            return self.best_estimator_.score(X, y, **kwargs)
+            return self.best_estimator_.score(X, y, **scorer_params)
         if self.scorer_ is None:
             raise ValueError("No score function explicitly defined, "
                              "and the estimator doesn't provide one %s"
                              % self.best_estimator_)
-        return self.scorer_(self.best_estimator_, X, y, **kwargs)
+        return self.scorer_(self.best_estimator_, X, y, **scorer_params)
 
     @property
     def predict(self):
@@ -343,7 +339,7 @@ def decision_function(self):
     def transform(self):
         return self.best_estimator_.transform
 
-    def _fit(self, X, y, sample_weight, parameter_iterable):
+    def _fit(self, X, y, parameter_iterable):
         """Actual fitting,  performing the search over parameters."""
 
         estimator = self.estimator
@@ -351,7 +347,7 @@ def _fit(self, X, y, sample_weight, parameter_iterable):
         self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
 
         n_samples = _num_samples(X)
-        X, y, sample_weight = indexable(X, y, sample_weight)
+        X, y = indexable(X, y)
 
         if y is not None:
             if len(y) != n_samples:
@@ -376,10 +372,10 @@ def _fit(self, X, y, sample_weight, parameter_iterable):
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=pre_dispatch
         )(
-            delayed(_fit_and_score)(clone(base_estimator), X, y, sample_weight,
+            delayed(_fit_and_score)(clone(base_estimator), X, y,
                                     self.scorer_, train, test,
                                     self.verbose, parameters, self.fit_params,
-                                    return_parameters=True)
+                                    self.scorer_params, return_parameters=True)
             for parameters in parameter_iterable
             for train, test in cv)
 
@@ -422,9 +418,6 @@ def _fit(self, X, y, sample_weight, parameter_iterable):
 
         if self.refit:
             fit_params = self.fit_params
-            if sample_weight is not None:
-                fit_params = fit_params.copy()
-                fit_params['sample_weight'] = sample_weight
             # fit the best estimator using the entire dataset
             # clone first to work around broken estimators
             best_estimator = clone(base_estimator).set_params(
@@ -580,14 +573,15 @@ class GridSearchCV(BaseSearchCV):
 
     def __init__(self, estimator, param_grid, scoring=None,
                  fit_params=None, n_jobs=1, iid=True,
-                 refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs'):
+                 refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs',
+                 scorer_params=None):
         super(GridSearchCV, self).__init__(
             estimator, scoring, fit_params, n_jobs, iid,
-            refit, cv, verbose, pre_dispatch)
+            refit, cv, verbose, pre_dispatch, scorer_params)
         self.param_grid = param_grid
         _check_param_grid(param_grid)
 
-    def fit(self, X, y=None, sample_weight=None):
+    def fit(self, X, y=None):
         """Run fit with all sets of parameters.
 
         Parameters
@@ -600,11 +594,8 @@ def fit(self, X, y=None, sample_weight=None):
         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
             Target relative to X for classification or regression;
             None for unsupervised learning.
-
-        sample_weight : array-like, shape = [n_samples], optional
-            Sample weights.
         """
-        return self._fit(X, y, sample_weight, ParameterGrid(self.param_grid))
+        return self._fit(X, y, ParameterGrid(self.param_grid))
 
 
 class RandomizedSearchCV(BaseSearchCV):
@@ -730,7 +721,8 @@ class RandomizedSearchCV(BaseSearchCV):
 
     def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
                  fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
-                 verbose=0, pre_dispatch='2*n_jobs', random_state=None):
+                 verbose=0, pre_dispatch='2*n_jobs', random_state=None,
+                 scorer_params=None):
 
         self.param_distributions = param_distributions
         self.n_iter = n_iter
@@ -738,9 +730,9 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
         super(RandomizedSearchCV, self).__init__(
             estimator=estimator, scoring=scoring, fit_params=fit_params,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
-            pre_dispatch=pre_dispatch)
+            pre_dispatch=pre_dispatch, scorer_params=scorer_params)
 
-    def fit(self, X, y=None, sample_weight=None):
+    def fit(self, X, y=None):
         """Run fit on the estimator with randomly drawn parameters.
 
         Parameters
@@ -752,12 +744,8 @@ def fit(self, X, y=None, sample_weight=None):
         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
             Target relative to X for classification or regression;
             None for unsupervised learning.
-
-        sample_weight : array-like, shape = [n_samples], optional
-            Sample weights.
-
         """
         sampled_params = ParameterSampler(self.param_distributions,
                                           self.n_iter,
                                           random_state=self.random_state)
-        return self._fit(X, y, sample_weight, sampled_params)
+        return self._fit(X, y, sampled_params)
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 835a4ec9ac398..3b9bff5613561 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -136,10 +136,15 @@ def learning_curve(estimator, X, y, sample_weight=None,
             classes, train, test, train_sizes_abs,
             scorer, verbose) for train, test in cv)
     else:
+        if sample_weight is not None:
+            params = dict(sample_weight=sample_weight)
+        else:
+            params = None
         out = parallel(delayed(_fit_and_score)(
-            clone(estimator), X, y, sample_weight,
+            clone(estimator), X, y,
             scorer, train[:n_train_samples], test,
-            verbose, parameters=None, fit_params=None, return_train_score=True)
+            verbose, parameters=None, fit_params=params, scorer_params=params,
+            return_train_score=True)
             for train, test in cv for n_train_samples in train_sizes_abs)
         out = np.array(out)[:, :2]
         n_cv_folds = out.shape[0] // n_unique_ticks
@@ -217,20 +222,26 @@ def _incremental_fit_estimator(estimator, X, y, sample_weight,
     partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])
     for n_train_samples, partial_train in partitions:
         train_subset = train[:n_train_samples]
-        X_train, y_train, sample_weight_train = _safe_split(
-            estimator, X, y, sample_weight, train_subset)
-        X_partial_train, y_partial_train, sample_weight_partial_train = \
-            _safe_split(estimator, X, y, sample_weight, partial_train)
-        X_test, y_test, sample_weight_test = _safe_split(
-            estimator, X, y, sample_weight, test, train_subset)
+        X_train, y_train = _safe_split(
+            estimator, X, y, train_subset)
+        X_partial_train, y_partial_train = \
+            _safe_split(estimator, X, y, partial_train)
+        X_test, y_test = _safe_split(
+            estimator, X, y, test, train_subset)
+
+        # TODO: replace sample_weight with fit_params and scorer_params
 
         fit_params = dict()
-        train_score_params = dict()
-        test_score_params = dict()
+        train_scorer_params = dict()
+        test_scorer_params = dict()
         if sample_weight is not None:
+            sample_weight = np.asarray(sample_weight)
+            sample_weight_train = sample_weight[train_subset]
+            sample_weight_partial_train = sample_weight[partial_train]
+            sample_weight_test = sample_weight[test]
             fit_params['sample_weight'] = sample_weight_partial_train
-            train_score_params['sample_weight'] = sample_weight_train
-            test_score_params['sample_weight'] = sample_weight_test
+            train_scorer_params['sample_weight'] = sample_weight_train
+            test_scorer_params['sample_weight'] = sample_weight_test
 
         if y_partial_train is None:
             estimator.partial_fit(X_partial_train,
@@ -239,9 +250,9 @@ def _incremental_fit_estimator(estimator, X, y, sample_weight,
             estimator.partial_fit(X_partial_train, y_partial_train,
                                   classes=classes, **fit_params)
         train_scores.append(_score(
-            estimator, X_train, y_train, scorer, **train_score_params))
+            estimator, X_train, y_train, scorer, **train_scorer_params))
         test_scores.append(_score(
-            estimator, X_test, y_test, scorer, **test_score_params))
+            estimator, X_test, y_test, scorer, **test_scorer_params))
     return np.array((train_scores, test_scores)).T
 
 
@@ -320,9 +331,14 @@ def validation_curve(estimator, X, y, param_name, param_range,
 
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
+    if sample_weight is not None:
+        params = dict(sample_weight=sample_weight)
+    else:
+        params = None
     out = parallel(delayed(_fit_and_score)(
-        estimator, X, y, sample_weight, scorer, train, test, verbose,
-        parameters={param_name: v}, fit_params=None, return_train_score=True)
+        estimator, X, y, scorer, train, test, verbose,
+        parameters={param_name: v}, fit_params=params, scorer_params=params,
+        return_train_score=True)
         for train, test in cv for v in param_range)
 
     out = np.asarray(out)[:, :2]
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index bdb791ad080f7..f0ccca66da02c 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -468,7 +468,9 @@ def test_cross_val_score():
     for a in range(-10, 10):
         clf.a = a
         # Smoke test
-        scores = cval.cross_val_score(clf, X, y, sample_weight=int_weights)
+        params = dict(sample_weight=int_weights)
+        scores = cval.cross_val_score(clf, X, y,
+            fit_params=params, scorer_params=params)
         assert_array_equal(scores, clf.score(X, y, sample_weight=int_weights))
 
         # test with multioutput y
@@ -483,8 +485,9 @@ def test_cross_val_score():
         assert_array_equal(scores, clf.score(X_sparse, X))
 
     # test with sample_weight as list
+    params = dict(sample_weight=int_weights.tolist())
     scores = cval.cross_val_score(
-        clf, X, y, sample_weight=int_weights.tolist())
+        clf, X, y, fit_params=params, scorer_params=params)
 
     # test with X and y as list
     list_check = lambda x: isinstance(x, list)
@@ -920,12 +923,12 @@ def test_safe_split_with_precomputed_kernel():
     cv = cval.ShuffleSplit(X.shape[0], test_size=0.25, random_state=0)
     tr, te = list(cv)[0]
 
-    X_tr, y_tr, _ = cval._safe_split(clf, X, y, None, tr)
-    K_tr, y_tr2, _ = cval._safe_split(clfp, K, y, None, tr)
+    X_tr, y_tr = cval._safe_split(clf, X, y, tr)
+    K_tr, y_tr2 = cval._safe_split(clfp, K, y, tr)
     assert_array_almost_equal(K_tr, np.dot(X_tr, X_tr.T))
 
-    X_te, y_te, _ = cval._safe_split(clf, X, y, None, te, tr)
-    K_te, y_te2, _ = cval._safe_split(clfp, K, y, None, te, tr)
+    X_te, y_te = cval._safe_split(clf, X, y, te, tr)
+    K_te, y_te2 = cval._safe_split(clfp, K, y, te, tr)
     assert_array_almost_equal(K_te, np.dot(X_te, X_tr.T))
 
 
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index b1573abbb4fb7..81245a6ccf22f 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -656,8 +656,12 @@ def test_grid_search_with_sample_weights():
     est_parameters = {"foo_param": [1, 2, 3]}
     cv = KFold(y.shape[0], n_folds=2, random_state=0)
     for search_cls in (GridSearchCV, RandomizedSearchCV):
-        grid_search = search_cls(MockClassifier(), est_parameters, cv=cv)
-        grid_search.fit(X, y, sample_weight=sample_weight)
+        params=dict(sample_weight=sample_weight)
+        grid_search = search_cls(MockClassifier(), est_parameters, cv=cv,
+                                 fit_params=params, scorer_params=params)
+        grid_search.fit(X, y)
         # check that sample_weight can be a list
-        grid_search = GridSearchCV(MockClassifier(), est_parameters, cv=cv)
-        grid_search.fit(X, y, sample_weight=sample_weight.tolist())
+        params=dict(sample_weight=sample_weight.tolist())
+        grid_search = GridSearchCV(MockClassifier(), est_parameters, cv=cv,
+                                   fit_params=params, scorer_params=params)
+        grid_search.fit(X, y)
diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py
index 62a05dd19799e..c039567669274 100644
--- a/sklearn/tests/test_learning_curve.py
+++ b/sklearn/tests/test_learning_curve.py
@@ -25,7 +25,7 @@ def __init__(self, n_max_train_sizes):
         self.train_sizes = 0
         self.X_subset = None
 
-    def fit(self, X_subset, y_subset=None):
+    def fit(self, X_subset, y_subset=None, **params):
         self.X_subset = X_subset
         self.train_sizes = X_subset.shape[0]
         return self
@@ -65,7 +65,7 @@ def __init__(self, param=0.5):
         self.X_subset = None
         self.param = param
 
-    def fit(self, X_subset, y_subset):
+    def fit(self, X_subset, y_subset, **params):
         self.X_subset = X_subset
         self.train_sizes = X_subset.shape[0]
         return self