From b217697e4c3b6e2cf5f01cb87fe7a094b2168669 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@googlemail.com>
Date: Thu, 9 Jan 2014 09:18:02 +0100
Subject: [PATCH 01/51] Refactor cv code

---
 sklearn/cross_validation.py | 153 ++++++++++++++++++++++++------------
 sklearn/grid_search.py      |  75 +-----------------
 sklearn/learning_curve.py   |   2 +-
 3 files changed, 103 insertions(+), 127 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 84045105d56a9..06d909ce35dfa 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -22,6 +22,7 @@
 
 from .base import is_classifier, clone
 from .utils import check_arrays, check_random_state, safe_mask
+from .utils.validation import _num_samples
 from .utils.fixes import unique
 from .externals.joblib import Parallel, delayed
 from .externals.six import string_types, with_metaclass
@@ -1023,48 +1024,6 @@ def __len__(self):
 
 ##############################################################################
 
-def _cross_val_score(estimator, X, y, scorer, train, test, verbose,
-                     fit_params):
-    """Inner loop for cross validation"""
-    n_samples = X.shape[0] if sp.issparse(X) else len(X)
-    fit_params = dict([(k, np.asarray(v)[train]
-                       if hasattr(v, '__len__') and len(v) == n_samples else v)
-                       for k, v in fit_params.items()])
-    if not hasattr(X, "shape"):
-        if getattr(estimator, "_pairwise", False):
-            raise ValueError("Precomputed kernels or affinity matrices have "
-                             "to be passed as arrays or sparse matrices.")
-        X_train = [X[idx] for idx in train]
-        X_test = [X[idx] for idx in test]
-    else:
-        if getattr(estimator, "_pairwise", False):
-            # X is a precomputed square kernel matrix
-            if X.shape[0] != X.shape[1]:
-                raise ValueError("X should be a square kernel matrix")
-            X_train = X[np.ix_(train, train)]
-            X_test = X[np.ix_(test, train)]
-        else:
-            X_train = X[safe_mask(X, train)]
-            X_test = X[safe_mask(X, test)]
-
-    if y is None:
-        y_train = None
-        y_test = None
-    else:
-        y_train = y[train]
-        y_test = y[test]
-    estimator.fit(X_train, y_train, **fit_params)
-    if scorer is None:
-        score = estimator.score(X_test, y_test)
-    else:
-        score = scorer(estimator, X_test, y_test)
-        if not isinstance(score, numbers.Number):
-            raise ValueError("scoring must return a number, got %s (%s)"
-                             " instead." % (str(score), type(score)))
-    if verbose > 1:
-        print("score: %f" % score)
-    return score
-
 
 def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                     verbose=0, fit_params=None, score_func=None,
@@ -1127,16 +1086,9 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    scorer = _deprecate_loss_and_score_funcs(
-        loss_func=None,
-        score_func=score_func,
-        scoring=scoring
-    )
-    if scorer is None and not hasattr(estimator, 'score'):
-        raise TypeError(
-            "If no scoring is specified, the estimator passed "
-            "should have a 'score' method. The estimator %s "
-            "does not." % estimator)
+    _check_scorable(estimator, score_func=score_func, scoring=scoring)
+    scorer = _deprecate_loss_and_score_funcs(score_func=score_func,
+                                             scoring=scoring)
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
     fit_params = fit_params if fit_params is not None else {}
@@ -1149,6 +1101,85 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     return np.array(scores)
 
 
+def _cross_val_score(estimator, X, y, scorer, train, test, verbose,
+                     fit_params):
+    """Inner loop for cross validation"""
+    # TODO replace with grid_search.fit_grid_point()
+    n_samples = _num_samples(X)
+    fit_params = dict([(k, np.asarray(v)[train] # TODO why is this necessary?
+                       if hasattr(v, '__len__') and len(v) == n_samples else v)
+                       for k, v in fit_params.items()])
+
+    X_train, y_train = _split(estimator, X, y, train)
+    X_test, y_test = _split(estimator, X, y, test, train)
+    estimator.fit(X_train, y_train, **fit_params)
+    score = _score(estimator, X_test, y_test, scorer)
+
+    if verbose > 1:
+        print("score: %f" % score)
+    return score
+
+
+def _split(estimator, X, y, indices, train_indices=None):
+    """Create subset of dataset."""
+    if hasattr(estimator, 'kernel') and callable(estimator.kernel):
+        # cannot compute the kernel values with custom function
+        raise ValueError("Cannot use a custom kernel function. "
+                         "Precompute the kernel matrix instead.")
+
+    if not hasattr(X, "shape"):
+        if getattr(estimator, "_pairwise", False):
+            raise ValueError("Precomputed kernels or affinity matrices have "
+                             "to be passed as arrays or sparse matrices.")
+        X_subset = [X[idx] for idx in indices]
+    else:
+        if getattr(estimator, "_pairwise", False):
+            # X is a precomputed square kernel matrix
+            if X.shape[0] != X.shape[1]:
+                raise ValueError("X should be a square kernel matrix")
+            if train_indices is None:
+                X_subset = X[np.ix_(indices, indices)]
+            else:
+                X_subset = X[np.ix_(indices, train_indices)]
+        else:
+            X_subset = X[safe_mask(X, indices)]
+
+    if y is not None:
+        y_subset = y[safe_mask(y, indices)]
+    else:
+        y_subset = None
+
+    return X_subset, y_subset
+
+
+def _fit(fit_function, X_train, y_train, **fit_params):
+    """Fit and estimator on a given training set."""
+    if y_train is None:
+        fit_function(X_train, **fit_params)
+    else:
+        fit_function(X_train, y_train, **fit_params)
+
+
+def _score(estimator, X_test, y_test, scorer):
+    """Compute the score of an estimator on a given test set."""
+    if y_test is None:
+        if scorer is None:
+            score = estimator.score(X_test)
+        else:
+            score = scorer(estimator, X_test)
+    else:
+        if scorer is None:
+            score = estimator.score(X_test, y_test)
+        else:
+            score = scorer(estimator, X_test, y_test)
+
+    if not isinstance(score, numbers.Number):
+        raise ValueError("scoring must return a number, got %s (%s) instead."
+                         % (str(score), type(score)))
+
+    return score
+
+
 def _permutation_test_score(estimator, X, y, cv, scorer):
     """Auxiliary function for permutation_test_score"""
     avg_score = []
@@ -1226,6 +1257,24 @@ def _check_cv(cv, X=None, y=None, classifier=False, warn_mask=False):
     return cv
 
 
+def _check_scorable(estimator, scoring=None, loss_func=None, score_func=None):
+    """Check that estimator can be fitted and score can be computed."""
+    if (not hasattr(estimator, 'fit') or
+            not (hasattr(estimator, 'predict')
+                    or hasattr(estimator, 'score'))):
+        raise TypeError("estimator should a be an estimator implementing"
+                        " 'fit' and 'predict' or 'score' methods,"
+                        " %s (type %s) was passed" %
+                        (estimator, type(estimator)))
+    if (scoring is None and loss_func is None and score_func
+            is None):
+        if not hasattr(estimator, 'score'):
+            raise TypeError(
+                "If no scoring is specified, the estimator passed "
+                "should have a 'score' method. The estimator %s "
+                "does not." % estimator)
+
+
 def permutation_test_score(estimator, X, y, score_func=None, cv=None,
                            n_permutations=100, n_jobs=1, labels=None,
                            random_state=0, verbose=0, scoring=None):
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 108d320139c2e..b3fa04dc4bc8a 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -24,6 +24,7 @@
 from .base import BaseEstimator, is_classifier, clone
 from .base import MetaEstimatorMixin
 from .cross_validation import _check_cv as check_cv
+from .cross_validation import _check_scorable, _split, _fit, _score
 from .externals.joblib import Parallel, delayed, logger
 from .externals import six
 from .utils import safe_mask, check_random_state
@@ -255,62 +256,6 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer,
     return this_score, parameters, _num_samples(X_test)
 
 
-def _split(estimator, X, y, indices, train_indices=None):
-    """Create subset of dataset."""
-    if hasattr(estimator, 'kernel') and callable(estimator.kernel):
-        # cannot compute the kernel values with custom function
-        raise ValueError("Cannot use a custom kernel function. "
-                         "Precompute the kernel matrix instead.")
-
-    if not hasattr(X, "shape"):
-        if getattr(estimator, "_pairwise", False):
-            raise ValueError("Precomputed kernels or affinity matrices have "
-                             "to be passed as arrays or sparse matrices.")
-        X_subset = [X[idx] for idx in indices]
-    else:
-        if getattr(estimator, "_pairwise", False):
-            # X is a precomputed square kernel matrix
-            if X.shape[0] != X.shape[1]:
-                raise ValueError("X should be a square kernel matrix")
-            if train_indices is None:
-                X_subset = X[np.ix_(indices, indices)]
-            else:
-                X_subset = X[np.ix_(indices, train_indices)]
-        else:
-            X_subset = X[safe_mask(X, indices)]
-
-    if y is not None:
-        y_subset = y[safe_mask(y, indices)]
-    else:
-        y_subset = None
-
-    return X_subset, y_subset
-
-
-def _fit(fit_function, X_train, y_train, **fit_params):
-    """Fit and estimator on a given training set."""
-    if y_train is None:
-        fit_function(X_train, **fit_params)
-    else:
-        fit_function(X_train, y_train, **fit_params)
-
-
-def _score(estimator, X_test, y_test, scorer):
-    """Compute the score of an estimator on a given test set."""
-    if y_test is None:
-        if scorer is None:
-            this_score = estimator.score(X_test)
-        else:
-            this_score = scorer(estimator, X_test)
-    else:
-        if scorer is None:
-            this_score = estimator.score(X_test, y_test)
-        else:
-            this_score = scorer(estimator, X_test, y_test)
-
-    return this_score
-
-
 def _check_param_grid(param_grid):
     if hasattr(param_grid, 'items'):
         param_grid = [param_grid]
@@ -351,24 +296,6 @@ def __repr__(self):
             self.parameters)
 
 
-def _check_scorable(estimator, scoring=None, loss_func=None, score_func=None):
-    """Check that estimator can be fitted and score can be computed."""
-    if (not hasattr(estimator, 'fit') or
-            not (hasattr(estimator, 'predict')
-                    or hasattr(estimator, 'score'))):
-        raise TypeError("estimator should a be an estimator implementing"
-                        " 'fit' and 'predict' or 'score' methods,"
-                        " %s (type %s) was passed" %
-                        (estimator, type(estimator)))
-    if (scoring is None and loss_func is None and score_func
-            is None):
-        if not hasattr(estimator, 'score'):
-            raise TypeError(
-                "If no scoring is specified, the estimator passed "
-                "should have a 'score' method. The estimator %s "
-                "does not." % estimator)
-
-
 class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
                                       MetaEstimatorMixin)):
     """Base class for hyper parameter search with cross-validation."""
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 406f28b12c280..f17c9a5a9fe30 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -11,7 +11,7 @@
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
 from .metrics.scorer import get_scorer
-from .grid_search import _check_scorable, _split, _fit, _score
+from .cross_validation import _check_scorable, _split, _fit, _score
 
 
 def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),

From c4d6278c83859de73bb6c53a62dc98973d6a9c79 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@googlemail.com>
Date: Thu, 9 Jan 2014 23:37:01 +0100
Subject: [PATCH 02/51] Clean up

---
 sklearn/cross_validation.py |  5 ++---
 sklearn/grid_search.py      | 11 +++++------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 06d909ce35dfa..4d42a626c24eb 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1091,7 +1091,6 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                                              scoring=scoring)
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
-    fit_params = fit_params if fit_params is not None else {}
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
     scores = parallel(
@@ -1104,15 +1103,15 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
 def _cross_val_score(estimator, X, y, scorer, train, test, verbose,
                      fit_params):
     """Inner loop for cross validation"""
-    # TODO replace with grid_search.fit_grid_point()
     n_samples = _num_samples(X)
+    fit_params = fit_params if fit_params is not None else {}
     fit_params = dict([(k, np.asarray(v)[train] # TODO why is this necessary?
                        if hasattr(v, '__len__') and len(v) == n_samples else v)
                        for k, v in fit_params.items()])
 
     X_train, y_train = _split(estimator, X, y, train)
     X_test, y_test = _split(estimator, X, y, test, train)
-    estimator.fit(X_train, y_train, **fit_params)
+    _fit(estimator.fit, X_train, y_train, **fit_params)
     score = _score(estimator, X_test, y_test, scorer)
 
     if verbose > 1:
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index b3fa04dc4bc8a..bdbc26c9436c6 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -243,17 +243,16 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer,
     X_train, y_train = _split(estimator, X, y, train)
     X_test, y_test = _split(estimator, X, y, test, train)
     _fit(estimator.fit, X_train, y_train, **fit_params)
-    this_score = _score(estimator, X_test, y_test, scorer)
+    score = _score(estimator, X_test, y_test, scorer)
 
     if verbose > 2:
-        msg += ", score=%f" % this_score
+        msg += ", score=%f" % score
     if verbose > 1:
-        end_msg = "%s -%s" % (msg,
-                              logger.short_format_time(time.time() -
-                                                       start_time))
+        end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() -
+                                                            start_time))
         print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
 
-    return this_score, parameters, _num_samples(X_test)
+    return score, parameters, _num_samples(X_test)
 
 
 def _check_param_grid(param_grid):

From 1599952d022fee81fc043a712dee4eae5a2dae5a Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@googlemail.com>
Date: Fri, 10 Jan 2014 09:27:43 +0100
Subject: [PATCH 03/51] Refactor RFE and add _check_scorable

---
 sklearn/feature_selection/rfe.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index cc80d7ffdcbda..49820742289f8 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -13,6 +13,7 @@
 from ..base import clone
 from ..base import is_classifier
 from ..cross_validation import _check_cv as check_cv
+from ..cross_validation import _check_scorable, _split, _score
 from .base import SelectorMixin
 from ..metrics.scorer import _deprecate_loss_and_score_funcs
 
@@ -325,29 +326,31 @@ def fit(self, X, y):
                   verbose=self.verbose - 1)
 
         cv = check_cv(self.cv, X, y, is_classifier(self.estimator))
+        _check_scorable(self.estimator, scoring=self.scoring,
+                        loss_func=self.loss_func)
         scores = np.zeros(X.shape[1])
 
         # Cross-validation
         for n, (train, test) in enumerate(cv):
-            X_train, X_test = X[train], X[test]
-            y_train, y_test = y[train], y[test]
+            X_train, y_train = _split(self.estimator, X, y, train)
+            X_test, y_test = _split(self.estimator, X, y, test, train)
 
             # Compute a full ranking of the features
             ranking_ = rfe.fit(X_train, y_train).ranking_
             # Score each subset of features
             for k in range(0, max(ranking_)):
                 mask = np.where(ranking_ <= k + 1)[0]
+                X_train_subset = X_train[:, mask]
+                X_test_subset = X_test[:, mask]
+
                 estimator = clone(self.estimator)
-                estimator.fit(X_train[:, mask], y_train)
-
-                if self.loss_func is None and self.scoring is None:
-                    score = estimator.score(X_test[:, mask], y_test)
-                else:
-                    scorer = _deprecate_loss_and_score_funcs(
-                        loss_func=self.loss_func,
-                        scoring=self.scoring
-                    )
-                    score = scorer(estimator, X_test[:, mask], y_test)
+                estimator.fit(X_train_subset, y_train)
+
+                scorer = _deprecate_loss_and_score_funcs(
+                    loss_func=self.loss_func,
+                    scoring=self.scoring
+                )
+                score = _score(estimator, X_test_subset, y_test, scorer)
 
                 if self.verbose > 0:
                     print("Finished fold with %d / %d feature ranks, score=%f"

From 5e520318c508d7fa151495e637ecbdb23264dc6c Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@googlemail.com>
Date: Fri, 10 Jan 2014 09:41:08 +0100
Subject: [PATCH 04/51] FIX typo in docstring

---
 sklearn/cross_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 4d42a626c24eb..ca818076c3dcb 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1152,7 +1152,7 @@ def _split(estimator, X, y, indices, train_indices=None):
 
 
 def _fit(fit_function, X_train, y_train, **fit_params):
-    """Fit and estimator on a given training set."""
+    """Fit an estimator on a given training set."""
     if y_train is None:
         fit_function(X_train, **fit_params)
     else:

From 4b5f468c05814efa9baf8d8d3c34a54f5ae61f1e Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@googlemail.com>
Date: Fri, 10 Jan 2014 10:53:54 +0100
Subject: [PATCH 05/51] Merge `fit_grid_point` into `_cross_val_score`

---
 sklearn/cross_validation.py | 35 +++++++++++++++++++++++--------
 sklearn/grid_search.py      | 41 ++++++++++---------------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index ca818076c3dcb..050e70c2d8089 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -15,6 +15,7 @@
 from itertools import chain, combinations
 from math import ceil, floor, factorial
 import numbers
+import time
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
@@ -24,7 +25,7 @@
 from .utils import check_arrays, check_random_state, safe_mask
 from .utils.validation import _num_samples
 from .utils.fixes import unique
-from .externals.joblib import Parallel, delayed
+from .externals.joblib import Parallel, delayed, logger
 from .externals.six import string_types, with_metaclass
 from .metrics.scorer import _deprecate_loss_and_score_funcs
 
@@ -1095,17 +1096,30 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                         pre_dispatch=pre_dispatch)
     scores = parallel(
         delayed(_cross_val_score)(clone(estimator), X, y, scorer, train, test,
-                                  verbose, fit_params)
+                                  parameters=None, verbose=verbose,
+                                  fit_params=fit_params,
+                                  log_label="cross_val_score")
         for train, test in cv)
-    return np.array(scores)
+    return np.array(scores)[:, 0]
 
 
-def _cross_val_score(estimator, X, y, scorer, train, test, verbose,
-                     fit_params):
+def _cross_val_score(estimator, X, y, scorer, train, test, parameters, verbose,
+                     fit_params, log_label):
     """Inner loop for cross validation"""
+    if parameters is not None:
+        estimator.set_params(**parameters)
+    if verbose > 1:
+        start_time = time.time()
+        if parameters is None:
+            msg = "Evaluating..."
+        else:
+            msg = '%s' % (', '.join('%s=%s' % (k, v)
+                        for k, v in parameters.items()))
+        print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.'))
+
     n_samples = _num_samples(X)
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = dict([(k, np.asarray(v)[train] # TODO why is this necessary?
+    fit_params = dict([(k, np.asarray(v)[train]
                        if hasattr(v, '__len__') and len(v) == n_samples else v)
                        for k, v in fit_params.items()])
 
@@ -1114,9 +1128,14 @@ def _cross_val_score(estimator, X, y, scorer, train, test, verbose,
     _fit(estimator.fit, X_train, y_train, **fit_params)
     score = _score(estimator, X_test, y_test, scorer)
 
+    if verbose > 2:
+        msg += ", score=%f" % score
     if verbose > 1:
-        print("score: %f" % score)
-    return score
+        end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() -
+                                                            start_time))
+        print("[%s] %s %s" % (log_label, (64 - len(end_msg)) * '.', end_msg))
+
+    return score, _num_samples(X_test)
 
 
 def _split(estimator, X, y, indices, train_indices=None):
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index bdbc26c9436c6..87ff9dedbdb1b 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -16,7 +16,6 @@
 from itertools import product
 import numbers
 import operator
-import time
 import warnings
 
 import numpy as np
@@ -24,8 +23,8 @@
 from .base import BaseEstimator, is_classifier, clone
 from .base import MetaEstimatorMixin
 from .cross_validation import _check_cv as check_cv
-from .cross_validation import _check_scorable, _split, _fit, _score
-from .externals.joblib import Parallel, delayed, logger
+from .cross_validation import _check_scorable, _cross_val_score
+from .externals.joblib import Parallel, delayed
 from .externals import six
 from .utils import safe_mask, check_random_state
 from .utils.validation import _num_samples, check_arrays
@@ -184,7 +183,7 @@ def __len__(self):
         return self.n_iter
 
 
-def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer,
+def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
                    verbose, loss_func=None, **fit_params):
     """Run fit on one set of parameters.
 
@@ -196,11 +195,11 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer,
     y : array-like or None
         Targets for input data.
 
-    base_estimator : estimator object
+    estimator : estimator object
         This estimator will be cloned and then fitted.
 
     parameters : dict
-        Parameters to be set on base_estimator clone for this grid point.
+        Parameters to be set on estimator for this grid point.
 
     train : ndarray, dtype int or bool
         Boolean mask or indices for training set.
@@ -230,29 +229,11 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer,
     n_samples_test : int
         Number of test samples in this split.
     """
-    if verbose > 1:
-        start_time = time.time()
-        msg = '%s' % (', '.join('%s=%s' % (k, v)
-                      for k, v in parameters.items()))
-        print("[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.'))
-
-    # update parameters of the classifier after a copy of its base structure
-    estimator = clone(base_estimator)
-    estimator.set_params(**parameters)
-
-    X_train, y_train = _split(estimator, X, y, train)
-    X_test, y_test = _split(estimator, X, y, test, train)
-    _fit(estimator.fit, X_train, y_train, **fit_params)
-    score = _score(estimator, X_test, y_test, scorer)
-
-    if verbose > 2:
-        msg += ", score=%f" % score
-    if verbose > 1:
-        end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() -
-                                                            start_time))
-        print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
-
-    return score, parameters, _num_samples(X_test)
+    score, n_samples_test = _cross_val_score(estimator, X, y, scorer, train,
+                                             test, parameters, verbose,
+                                             fit_params,
+                                             log_label="GridSearchCV")
+    return score, parameters, n_samples_test
 
 
 def _check_param_grid(param_grid):
@@ -397,7 +378,7 @@ def _fit(self, X, y, parameter_iterable):
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=pre_dispatch)(
                 delayed(fit_grid_point)(
-                    X, y, base_estimator, parameters, train, test,
+                    X, y, clone(base_estimator), parameters, train, test,
                     self.scorer_, self.verbose, **self.fit_params)
                 for parameters in parameter_iterable
                 for train, test in cv)

From 38081fdd56b6372a3ef6e768f134d46fd6a187ec Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sat, 11 Jan 2014 00:21:23 +0100
Subject: [PATCH 06/51] Return time

---
 sklearn/cross_validation.py | 29 +++++++++++++++--------------
 sklearn/grid_search.py      | 10 +++++-----
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 050e70c2d8089..25efa841fcf35 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1103,39 +1103,40 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     return np.array(scores)[:, 0]
 
 
-def _cross_val_score(estimator, X, y, scorer, train, test, parameters, verbose,
-                     fit_params, log_label):
+def _cross_val_score(estimator, X, y, scorer, train, test, parameters,
+                     verbose, fit_params, log_label):
     """Inner loop for cross validation"""
     if parameters is not None:
         estimator.set_params(**parameters)
-    if verbose > 1:
-        start_time = time.time()
-        if parameters is None:
-            msg = "Evaluating..."
-        else:
-            msg = '%s' % (', '.join('%s=%s' % (k, v)
-                        for k, v in parameters.items()))
-        print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.'))
-
     n_samples = _num_samples(X)
     fit_params = fit_params if fit_params is not None else {}
     fit_params = dict([(k, np.asarray(v)[train]
                        if hasattr(v, '__len__') and len(v) == n_samples else v)
                        for k, v in fit_params.items()])
 
+    start_time = time.time()
+
+    if verbose > 1:
+        if parameters is None:
+            msg = ""
+        else:
+            msg = '%s' % (', '.join('%s=%s' % (k, v)
+                          for k, v in parameters.items()))
+        print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.'))
+
     X_train, y_train = _split(estimator, X, y, train)
     X_test, y_test = _split(estimator, X, y, test, train)
     _fit(estimator.fit, X_train, y_train, **fit_params)
     score = _score(estimator, X_test, y_test, scorer)
 
+    scoring_time = time.time() - start_time
     if verbose > 2:
         msg += ", score=%f" % score
     if verbose > 1:
-        end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() -
-                                                            start_time))
+        end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time))
         print("[%s] %s %s" % (log_label, (64 - len(end_msg)) * '.', end_msg))
 
-    return score, _num_samples(X_test)
+    return score, _num_samples(X_test), scoring_time
 
 
 def _split(estimator, X, y, indices, train_indices=None):
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 87ff9dedbdb1b..c7824f2e8b63f 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -184,7 +184,7 @@ def __len__(self):
 
 
 def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
-                   verbose, loss_func=None, **fit_params):
+                   verbose, **fit_params):
     """Run fit on one set of parameters.
 
     Parameters
@@ -229,10 +229,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
     n_samples_test : int
         Number of test samples in this split.
     """
-    score, n_samples_test = _cross_val_score(estimator, X, y, scorer, train,
-                                             test, parameters, verbose,
-                                             fit_params,
-                                             log_label="GridSearchCV")
+    score, n_samples_test, _ = _cross_val_score(estimator, X, y, scorer,
+                                                train, test, parameters,
+                                                verbose, fit_params,
+                                                log_label="GridSearchCV")
     return score, parameters, n_samples_test
 
 

From 30c86ea2ddccab0610944a83bffea1719cc810fd Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sat, 11 Jan 2014 00:32:19 +0100
Subject: [PATCH 07/51] Move set_params back to fit_grid_point

---
 sklearn/cross_validation.py | 25 ++++---------------------
 sklearn/grid_search.py      | 21 ++++++++++++++++-----
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 25efa841fcf35..377dbcced6572 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -25,7 +25,7 @@
 from .utils import check_arrays, check_random_state, safe_mask
 from .utils.validation import _num_samples
 from .utils.fixes import unique
-from .externals.joblib import Parallel, delayed, logger
+from .externals.joblib import Parallel, delayed
 from .externals.six import string_types, with_metaclass
 from .metrics.scorer import _deprecate_loss_and_score_funcs
 
@@ -1096,18 +1096,14 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                         pre_dispatch=pre_dispatch)
     scores = parallel(
         delayed(_cross_val_score)(clone(estimator), X, y, scorer, train, test,
-                                  parameters=None, verbose=verbose,
-                                  fit_params=fit_params,
-                                  log_label="cross_val_score")
+                                  verbose=verbose, fit_params=fit_params)
         for train, test in cv)
     return np.array(scores)[:, 0]
 
 
-def _cross_val_score(estimator, X, y, scorer, train, test, parameters,
-                     verbose, fit_params, log_label):
+def _cross_val_score(estimator, X, y, scorer, train, test,
+                     verbose, fit_params):
     """Inner loop for cross validation"""
-    if parameters is not None:
-        estimator.set_params(**parameters)
     n_samples = _num_samples(X)
     fit_params = fit_params if fit_params is not None else {}
     fit_params = dict([(k, np.asarray(v)[train]
@@ -1116,25 +1112,12 @@ def _cross_val_score(estimator, X, y, scorer, train, test, parameters,
 
     start_time = time.time()
 
-    if verbose > 1:
-        if parameters is None:
-            msg = ""
-        else:
-            msg = '%s' % (', '.join('%s=%s' % (k, v)
-                          for k, v in parameters.items()))
-        print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.'))
-
     X_train, y_train = _split(estimator, X, y, train)
     X_test, y_test = _split(estimator, X, y, test, train)
     _fit(estimator.fit, X_train, y_train, **fit_params)
     score = _score(estimator, X_test, y_test, scorer)
 
     scoring_time = time.time() - start_time
-    if verbose > 2:
-        msg += ", score=%f" % score
-    if verbose > 1:
-        end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time))
-        print("[%s] %s %s" % (log_label, (64 - len(end_msg)) * '.', end_msg))
 
     return score, _num_samples(X_test), scoring_time
 
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index c7824f2e8b63f..4db7d435256a8 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -24,7 +24,7 @@
 from .base import MetaEstimatorMixin
 from .cross_validation import _check_cv as check_cv
 from .cross_validation import _check_scorable, _cross_val_score
-from .externals.joblib import Parallel, delayed
+from .externals.joblib import Parallel, delayed, logger
 from .externals import six
 from .utils import safe_mask, check_random_state
 from .utils.validation import _num_samples, check_arrays
@@ -229,10 +229,21 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
     n_samples_test : int
         Number of test samples in this split.
     """
-    score, n_samples_test, _ = _cross_val_score(estimator, X, y, scorer,
-                                                train, test, parameters,
-                                                verbose, fit_params,
-                                                log_label="GridSearchCV")
+    if verbose > 1:
+        msg = '%s' % (', '.join('%s=%s' % (k, v)
+                      for k, v in parameters.items()))
+        print("[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.'))
+
+    estimator.set_params(**parameters)
+    score, n_samples_test, scoring_time = _cross_val_score(
+        estimator, X, y, scorer, train, test, verbose, fit_params)
+
+    if verbose > 2:
+        msg += ", score=%f" % score
+    if verbose > 1:
+        end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time))
+        print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
+
     return score, parameters, n_samples_test
 
 

From 389ed8dbfd018c5ebf6bbf510cb739b1133aca71 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sat, 11 Jan 2014 16:34:17 +0100
Subject: [PATCH 08/51] Log score and time in 'cross_val_score'

---
 sklearn/cross_validation.py | 3 +++
 sklearn/grid_search.py      | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 377dbcced6572..080206f03441c 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1119,6 +1119,9 @@ def _cross_val_score(estimator, X, y, scorer, train, test,
 
     scoring_time = time.time() - start_time
 
+    if verbose > 1:
+        print("score %f in %f s" % (score, scoring_time))
+
     return score, _num_samples(X_test), scoring_time
 
 
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 4db7d435256a8..eac8823656439 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -236,7 +236,8 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
 
     estimator.set_params(**parameters)
     score, n_samples_test, scoring_time = _cross_val_score(
-        estimator, X, y, scorer, train, test, verbose, fit_params)
+        estimator, X, y, scorer, train, test, verbose=0,
+        fit_params=fit_params)
 
     if verbose > 2:
         msg += ", score=%f" % score

From 1fa3ec363e3fb261b87b5c0ccc681e4fa5df70b3 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sun, 12 Jan 2014 00:40:19 +0100
Subject: [PATCH 09/51] check_scorable returns scorer

---
 sklearn/cross_validation.py       | 42 +++------------------
 sklearn/feature_selection/rfe.py  | 13 ++-----
 sklearn/grid_search.py            | 12 +++---
 sklearn/learning_curve.py         |  7 ++--
 sklearn/metrics/scorer.py         | 63 +++++++++++++++++++++++++++++++
 sklearn/tests/test_grid_search.py | 11 +++---
 6 files changed, 86 insertions(+), 62 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 080206f03441c..c03cc4076c6b7 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -27,7 +27,7 @@
 from .utils.fixes import unique
 from .externals.joblib import Parallel, delayed
 from .externals.six import string_types, with_metaclass
-from .metrics.scorer import _deprecate_loss_and_score_funcs
+from .metrics.scorer import check_scorable
 
 __all__ = ['Bootstrap',
            'KFold',
@@ -1087,9 +1087,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    _check_scorable(estimator, score_func=score_func, scoring=scoring)
-    scorer = _deprecate_loss_and_score_funcs(score_func=score_func,
-                                             scoring=scoring)
+    scorer = check_scorable(estimator, score_func=score_func, scoring=scoring)
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
@@ -1168,20 +1166,12 @@ def _fit(fit_function, X_train, y_train, **fit_params):
 def _score(estimator, X_test, y_test, scorer):
     """Compute the score of an estimator on a given test set."""
     if y_test is None:
-        if scorer is None:
-            score = estimator.score(X_test)
-        else:
-            score = scorer(estimator, X_test)
+        score = scorer(estimator, X_test)
     else:
-        if scorer is None:
-            score = estimator.score(X_test, y_test)
-        else:
-            score = scorer(estimator, X_test, y_test)
-
+        score = scorer(estimator, X_test, y_test)
     if not isinstance(score, numbers.Number):
         raise ValueError("scoring must return a number, got %s (%s) instead."
                          % (str(score), type(score)))
-
     return score
 
 
@@ -1262,24 +1252,6 @@ def _check_cv(cv, X=None, y=None, classifier=False, warn_mask=False):
     return cv
 
 
-def _check_scorable(estimator, scoring=None, loss_func=None, score_func=None):
-    """Check that estimator can be fitted and score can be computed."""
-    if (not hasattr(estimator, 'fit') or
-            not (hasattr(estimator, 'predict')
-                    or hasattr(estimator, 'score'))):
-        raise TypeError("estimator should a be an estimator implementing"
-                        " 'fit' and 'predict' or 'score' methods,"
-                        " %s (type %s) was passed" %
-                        (estimator, type(estimator)))
-    if (scoring is None and loss_func is None and score_func
-            is None):
-        if not hasattr(estimator, 'score'):
-            raise TypeError(
-                "If no scoring is specified, the estimator passed "
-                "should have a 'score' method. The estimator %s "
-                "does not." % estimator)
-
-
 def permutation_test_score(estimator, X, y, score_func=None, cv=None,
                            n_permutations=100, n_jobs=1, labels=None,
                            random_state=0, verbose=0, scoring=None):
@@ -1351,11 +1323,7 @@ def permutation_test_score(estimator, X, y, score_func=None, cv=None,
     """
     X, y = check_arrays(X, y, sparse_format='csr')
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    scorer = _deprecate_loss_and_score_funcs(
-        loss_func=None,
-        score_func=score_func,
-        scoring=scoring
-    )
+    scorer = check_scorable(estimator, scoring=scoring, score_func=score_func)
     random_state = check_random_state(random_state)
 
     # We clone the estimator to make sure that all the folds are
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index 49820742289f8..54941036e044a 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -13,9 +13,9 @@
 from ..base import clone
 from ..base import is_classifier
 from ..cross_validation import _check_cv as check_cv
-from ..cross_validation import _check_scorable, _split, _score
+from ..cross_validation import _split, _score
 from .base import SelectorMixin
-from ..metrics.scorer import _deprecate_loss_and_score_funcs
+from ..metrics.scorer import check_scorable
 
 
 class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
@@ -326,8 +326,8 @@ def fit(self, X, y):
                   verbose=self.verbose - 1)
 
         cv = check_cv(self.cv, X, y, is_classifier(self.estimator))
-        _check_scorable(self.estimator, scoring=self.scoring,
-                        loss_func=self.loss_func)
+        scorer = check_scorable(self.estimator, scoring=self.scoring,
+                                loss_func=self.loss_func)
         scores = np.zeros(X.shape[1])
 
         # Cross-validation
@@ -345,11 +345,6 @@ def fit(self, X, y):
 
                 estimator = clone(self.estimator)
                 estimator.fit(X_train_subset, y_train)
-
-                scorer = _deprecate_loss_and_score_funcs(
-                    loss_func=self.loss_func,
-                    scoring=self.scoring
-                )
                 score = _score(estimator, X_test_subset, y_test, scorer)
 
                 if self.verbose > 0:
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index eac8823656439..d26f2a74fdd89 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -23,12 +23,12 @@
 from .base import BaseEstimator, is_classifier, clone
 from .base import MetaEstimatorMixin
 from .cross_validation import _check_cv as check_cv
-from .cross_validation import _check_scorable, _cross_val_score
+from .cross_validation import _cross_val_score
 from .externals.joblib import Parallel, delayed, logger
 from .externals import six
 from .utils import safe_mask, check_random_state
 from .utils.validation import _num_samples, check_arrays
-from .metrics.scorer import _deprecate_loss_and_score_funcs
+from .metrics.scorer import check_scorable
 
 
 __all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point',
@@ -308,8 +308,6 @@ def __init__(self, estimator, scoring=None, loss_func=None,
         self.cv = cv
         self.verbose = verbose
         self.pre_dispatch = pre_dispatch
-        _check_scorable(self.estimator, scoring=self.scoring,
-                        loss_func=self.loss_func, score_func=self.score_func)
 
     def score(self, X, y=None):
         """Returns the score on the given test data and labels, if the search
@@ -360,13 +358,13 @@ def _fit(self, X, y, parameter_iterable):
 
         estimator = self.estimator
         cv = self.cv
+        self.scorer_ = check_scorable(self.estimator, scoring=self.scoring,
+                                      loss_func=self.loss_func,
+                                      score_func=self.score_func)
 
         n_samples = _num_samples(X)
         X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr')
 
-        self.scorer_ = _deprecate_loss_and_score_funcs(
-            self.loss_func, self.score_func, self.scoring)
-
         if y is not None:
             if len(y) != n_samples:
                 raise ValueError('Target variable (y) has a different number '
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index f17c9a5a9fe30..a7fa2c28ce4e0 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -11,7 +11,8 @@
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
 from .metrics.scorer import get_scorer
-from .cross_validation import _check_scorable, _split, _fit, _score
+from .cross_validation import _split, _fit, _score
+from .metrics.scorer import check_scorable
 
 
 def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
@@ -101,6 +102,7 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     # Make a list since we will be iterating multiple times over the folds
     cv = list(_check_cv(cv, X, y, classifier=is_classifier(estimator)))
+    scorer = check_scorable(estimator, scoring=scoring)
 
     # HACK as long as boolean indices are allowed in cv generators
     if cv[0][0].dtype == bool:
@@ -119,9 +121,6 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
     if verbose > 0:
         print("[learning_curve] Training set sizes: " + str(train_sizes_abs))
 
-    _check_scorable(estimator, scoring=scoring)
-    scorer = get_scorer(scoring)
-
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     if exploit_incremental_learning:
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 2a28495890ba2..73c32d60836cf 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -198,6 +198,69 @@ def get_scorer(scoring):
     return scorer
 
 
+class _passthrough_scorer(object):
+    """Callable that wraps estimator.score"""
+    def __call__(self, estimator, *args, **kwargs):
+        return estimator.score(*args, **kwargs)
+
+
+def check_scorable(estimator, scoring=None, loss_func=None, score_func=None):
+    """Check if estimator can be scored.
+
+    A TypeError will be thrown if the estimator cannot be scored.
+
+    Parameters
+    ----------
+    estimator : estimator object implementing 'fit'
+        The object to use to fit the data.
+
+    scoring : string, callable or None, optional, default: None
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+
+    loss_func : callable or None, optional, default: None
+        A loss function callable object / function with signature
+        ``loss_func(estimator, X, y)``.
+
+    score_func : callable or None, optional, default: None
+        A scoring function with signature
+        ``score_func(estimator, X, y)``.
+
+    Returns
+    -------
+    scoring : callable
+        A scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+    """
+    if not hasattr(estimator, 'fit'):
+        raise TypeError("estimator should a be an estimator implementing "
+                        "'fit' method, %s (type %s) was passed" %
+                        (estimator, type(estimator)))
+
+    if scoring is None and loss_func is None and score_func is None:
+        if hasattr(estimator, 'score'):
+            return _passthrough_scorer()
+        else:
+            raise TypeError(
+                "If no scoring is specified, the estimator passed should "
+                "have a 'score' method. The estimator %s (type %s) "
+                "does not." % (estimator, type(estimator)))
+    else:
+        if hasattr(estimator, 'predict'):
+            scorer = _deprecate_loss_and_score_funcs(scoring=scoring,
+                loss_func=loss_func, score_func=score_func)
+            if scorer is None:
+                return ValueError("no scoring")
+            else:
+                return scorer
+        else:
+            raise TypeError(
+                "If a scoring is specified, the estimator passed should "
+                "have a 'predict' method. The estimator %s (type %s) "
+                "does not." % (estimator, type(estimator)))
+
+
 def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                 needs_threshold=False, **kwargs):
     """Make a scorer from a performance metric or loss function.
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index ff2510bce79fa..8d7107d10d9db 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -190,8 +190,9 @@ def test_grid_search_no_score():
     assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y))
 
     # giving no scoring function raises an error
-    assert_raise_message(TypeError, "no scoring",
-                         GridSearchCV, clf_no_score, {'C': Cs})
+    grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs})
+    assert_raise_message(TypeError, "no scoring", grid_search_no_score.fit,
+                         [[1]])
 
 
 def test_trivial_grid_scores():
@@ -494,9 +495,9 @@ def test_bad_estimator():
     # test grid-search with clustering algorithm which doesn't support
     # "predict"
     sc = SpectralClustering()
-    assert_raises(TypeError, GridSearchCV, sc,
-                  param_grid=dict(gamma=[.1, 1, 10]),
-                  scoring='ari')
+    grid_search = GridSearchCV(sc, param_grid=dict(gamma=[.1, 1, 10]),
+                               scoring='ari')
+    assert_raises(TypeError, grid_search.fit, [[1]])
 
 
 def test_param_sampler():

From 5b8933d3b2257390eca62df98bcffe36e1b04c07 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sun, 12 Jan 2014 11:35:27 +0100
Subject: [PATCH 10/51] Clean up

---
 sklearn/cross_validation.py                 |  6 +--
 sklearn/feature_selection/rfe.py            |  6 +--
 sklearn/grid_search.py                      | 12 ++---
 sklearn/learning_curve.py                   |  5 +--
 sklearn/metrics/scorer.py                   | 49 ++++++++------------
 sklearn/metrics/tests/test_score_objects.py | 50 +++++++++++++++++++++
 6 files changed, 82 insertions(+), 46 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index c03cc4076c6b7..43c27904b73aa 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -27,7 +27,7 @@
 from .utils.fixes import unique
 from .externals.joblib import Parallel, delayed
 from .externals.six import string_types, with_metaclass
-from .metrics.scorer import check_scorable
+from .metrics.scorer import check_scoring
 
 __all__ = ['Bootstrap',
            'KFold',
@@ -1087,7 +1087,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    scorer = check_scorable(estimator, score_func=score_func, scoring=scoring)
+    scorer = check_scoring(estimator, score_func=score_func, scoring=scoring)
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
@@ -1323,7 +1323,7 @@ def permutation_test_score(estimator, X, y, score_func=None, cv=None,
     """
     X, y = check_arrays(X, y, sparse_format='csr')
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    scorer = check_scorable(estimator, scoring=scoring, score_func=score_func)
+    scorer = check_scoring(estimator, scoring=scoring, score_func=score_func)
     random_state = check_random_state(random_state)
 
     # We clone the estimator to make sure that all the folds are
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index 54941036e044a..a58fd33d61f47 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -15,7 +15,7 @@
 from ..cross_validation import _check_cv as check_cv
 from ..cross_validation import _split, _score
 from .base import SelectorMixin
-from ..metrics.scorer import check_scorable
+from ..metrics.scorer import check_scoring
 
 
 class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
@@ -326,8 +326,8 @@ def fit(self, X, y):
                   verbose=self.verbose - 1)
 
         cv = check_cv(self.cv, X, y, is_classifier(self.estimator))
-        scorer = check_scorable(self.estimator, scoring=self.scoring,
-                                loss_func=self.loss_func)
+        scorer = check_scoring(self.estimator, scoring=self.scoring,
+                               loss_func=self.loss_func)
         scores = np.zeros(X.shape[1])
 
         # Cross-validation
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index d26f2a74fdd89..4b440d23381e0 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -28,7 +28,7 @@
 from .externals import six
 from .utils import safe_mask, check_random_state
 from .utils.validation import _num_samples, check_arrays
-from .metrics.scorer import check_scorable
+from .metrics.scorer import check_scoring
 
 
 __all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point',
@@ -232,7 +232,7 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
     if verbose > 1:
         msg = '%s' % (', '.join('%s=%s' % (k, v)
                       for k, v in parameters.items()))
-        print("[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.'))
+        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))
 
     estimator.set_params(**parameters)
     score, n_samples_test, scoring_time = _cross_val_score(
@@ -243,7 +243,7 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
         msg += ", score=%f" % score
     if verbose > 1:
         end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time))
-        print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
+        print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
 
     return score, parameters, n_samples_test
 
@@ -358,9 +358,9 @@ def _fit(self, X, y, parameter_iterable):
 
         estimator = self.estimator
         cv = self.cv
-        self.scorer_ = check_scorable(self.estimator, scoring=self.scoring,
-                                      loss_func=self.loss_func,
-                                      score_func=self.score_func)
+        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring,
+                                     loss_func=self.loss_func,
+                                     score_func=self.score_func)
 
         n_samples = _num_samples(X)
         X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr')
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index a7fa2c28ce4e0..26a742a22ed2f 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -12,7 +12,7 @@
 from .externals.joblib import Parallel, delayed
 from .metrics.scorer import get_scorer
 from .cross_validation import _split, _fit, _score
-from .metrics.scorer import check_scorable
+from .metrics.scorer import check_scoring
 
 
 def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
@@ -94,7 +94,6 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
     -----
     See :ref:`examples/plot_learning_curve.py <example_plot_learning_curve.py>`
     """
-
     if exploit_incremental_learning and not hasattr(estimator, "partial_fit"):
         raise ValueError("An estimator must support the partial_fit interface "
                          "to exploit incremental learning")
@@ -102,7 +101,7 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     # Make a list since we will be iterating multiple times over the folds
     cv = list(_check_cv(cv, X, y, classifier=is_classifier(estimator)))
-    scorer = check_scorable(estimator, scoring=scoring)
+    scorer = check_scoring(estimator, scoring=scoring)
 
     # HACK as long as boolean indices are allowed in cv generators
     if cv[0][0].dtype == bool:
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 73c32d60836cf..c4b89d2dfe1bb 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -198,13 +198,13 @@ def get_scorer(scoring):
     return scorer
 
 
-class _passthrough_scorer(object):
+class _PassthroughScorer(object):
     """Callable that wraps estimator.score"""
     def __call__(self, estimator, *args, **kwargs):
         return estimator.score(*args, **kwargs)
 
 
-def check_scorable(estimator, scoring=None, loss_func=None, score_func=None):
+def check_scoring(estimator, scoring=None, loss_func=None, score_func=None):
     """Check if estimator can be scored.
 
     A TypeError will be thrown if the estimator cannot be scored.
@@ -219,46 +219,33 @@ def check_scorable(estimator, scoring=None, loss_func=None, score_func=None):
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
 
-    loss_func : callable or None, optional, default: None
-        A loss function callable object / function with signature
-        ``loss_func(estimator, X, y)``.
-
-    score_func : callable or None, optional, default: None
-        A scoring function with signature
-        ``score_func(estimator, X, y)``.
-
     Returns
     -------
     scoring : callable
         A scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
     """
+    has_scoring = not (scoring is None and loss_func is None and
+                       score_func is None)
     if not hasattr(estimator, 'fit'):
         raise TypeError("estimator should a be an estimator implementing "
                         "'fit' method, %s (type %s) was passed" %
                         (estimator, type(estimator)))
-
-    if scoring is None and loss_func is None and score_func is None:
-        if hasattr(estimator, 'score'):
-            return _passthrough_scorer()
-        else:
-            raise TypeError(
-                "If no scoring is specified, the estimator passed should "
-                "have a 'score' method. The estimator %s (type %s) "
-                "does not." % (estimator, type(estimator)))
+    elif hasattr(estimator, 'predict') and has_scoring:
+        return _deprecate_loss_and_score_funcs(scoring=scoring,
+            loss_func=loss_func, score_func=score_func)
+    elif hasattr(estimator, 'score'):
+        return _PassthroughScorer()
+    elif not has_scoring:
+        raise TypeError(
+            "If no scoring is specified, the estimator passed should "
+            "have a 'score' method. The estimator %s (type %s) "
+            "does not." % (estimator, type(estimator)))
     else:
-        if hasattr(estimator, 'predict'):
-            scorer = _deprecate_loss_and_score_funcs(scoring=scoring,
-                loss_func=loss_func, score_func=score_func)
-            if scorer is None:
-                return ValueError("no scoring")
-            else:
-                return scorer
-        else:
-            raise TypeError(
-                "If a scoring is specified, the estimator passed should "
-                "have a 'predict' method. The estimator %s (type %s) "
-                "does not." % (estimator, type(estimator)))
+        raise TypeError(
+            "The estimator passed should have a 'score' or a 'predict' "
+            "method. The estimator %s (type %s) does not."
+            % (estimator, type(estimator)))
 
 
 def make_scorer(score_func, greater_is_better=True, needs_proba=False,
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index d7ea9f427074d..265e35b9b5034 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -9,6 +9,7 @@
 from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
                              log_loss)
 from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.metrics.scorer import check_scoring
 from sklearn.metrics import make_scorer, SCORERS
 from sklearn.svm import LinearSVC
 from sklearn.cluster import KMeans
@@ -22,6 +23,55 @@
 from sklearn.multiclass import OneVsRestClassifier
 
 
+class EstimatorWithoutFit(object):
+    """Dummy estimator to test check_scoring"""
+    pass
+
+
+class EstimatorWithFit(object):
+    """Dummy estimator to test check_scoring"""
+    def fit(self, X, y):
+        return self
+
+
+class EstimatorWithFitAndScore(object):
+    """Dummy estimator to test check_scoring"""
+    def fit(self, X, y):
+        return self
+    def score(self, X, y):
+        return 1.0
+
+
+class EstimatorWithFitAndPredict(object):
+    """Dummy estimator to test check_scoring"""
+    def fit(self, X, y):
+        self.y = y
+        return self
+    def predict(self, X):
+        return self.y
+
+
+def test_check_scoring():
+    """Test all branches of check_scoring"""
+    estimator = EstimatorWithoutFit()
+    assert_raises(TypeError, check_scoring, estimator)
+
+    estimator = EstimatorWithFitAndScore()
+    estimator.fit([[1]], [1])
+    scorer = check_scoring(estimator)
+    assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)
+
+    estimator = EstimatorWithFitAndPredict()
+    estimator.fit([[1]], [1])
+    assert_raises(TypeError, check_scoring, estimator)
+
+    scorer = check_scoring(estimator, "accuracy")
+    assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)
+
+    estimator = EstimatorWithFit()
+    assert_raises(TypeError, check_scoring, estimator)
+
+
 def test_make_scorer():
     """Sanity check on the make_scorer factory function."""
     f = lambda *args: 0

From 70aaef24e8f6dd2eb8921fd0e0f6a9504c7c9358 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sun, 12 Jan 2014 12:29:31 +0100
Subject: [PATCH 11/51] Replace '_fit_estimator' by '_cross_val_score'

---
 sklearn/cross_validation.py          |  8 ++++++--
 sklearn/learning_curve.py            | 30 +++++++++-------------------
 sklearn/tests/test_learning_curve.py |  2 +-
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 43c27904b73aa..8de3e1474c082 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1100,7 +1100,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
 
 
 def _cross_val_score(estimator, X, y, scorer, train, test,
-                     verbose, fit_params):
+                     verbose, fit_params, return_train_score=False):
     """Inner loop for cross validation"""
     n_samples = _num_samples(X)
     fit_params = fit_params if fit_params is not None else {}
@@ -1120,7 +1120,11 @@ def _cross_val_score(estimator, X, y, scorer, train, test,
     if verbose > 1:
         print("score %f in %f s" % (score, scoring_time))
 
-    return score, _num_samples(X_test), scoring_time
+    if return_train_score:
+        return (_score(estimator, X_train, y_train, scorer), score,
+                _num_samples(X_test), scoring_time)
+    else:
+        return score, _num_samples(X_test), scoring_time
 
 
 def _split(estimator, X, y, indices, train_indices=None):
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 26a742a22ed2f..c803ac242649d 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -11,7 +11,7 @@
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
 from .metrics.scorer import get_scorer
-from .cross_validation import _split, _fit, _score
+from .cross_validation import _split, _fit, _score, _cross_val_score
 from .metrics.scorer import check_scoring
 
 
@@ -127,14 +127,16 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
             classes = np.unique(y)
         else:
             classes = None
+            
         out = parallel(delayed(_incremental_fit_estimator)(
-            estimator, X, y, classes, train, test, train_sizes_abs, scorer,
-            verbose) for train, test in cv)
+            clone(estimator), X, y, classes, train, test, train_sizes_abs,
+            scorer, verbose) for train, test in cv)
     else:
-        out = parallel(delayed(_fit_estimator)(
-            estimator, X, y, train, test, n_train_samples, scorer, verbose)
+        out = parallel(delayed(_cross_val_score)(
+            clone(estimator), X, y, scorer, train[:n_train_samples], test,
+            verbose, fit_params=None, return_train_score=True)
             for train, test in cv for n_train_samples in train_sizes_abs)
-        out = np.array(out)
+        out = np.array(out)[:, :2]
         n_cv_folds = out.shape[0]/n_unique_ticks
         out = out.reshape(n_cv_folds, n_unique_ticks, 2)
 
@@ -202,23 +204,9 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
     return train_sizes_abs
 
 
-def _fit_estimator(base_estimator, X, y, train, test,
-                   n_train_samples, scorer, verbose):
-    """Train estimator on a training subset and compute scores."""
-    train_subset = train[:n_train_samples]
-    estimator = clone(base_estimator)
-    X_train, y_train = _split(estimator, X, y, train_subset)
-    X_test, y_test = _split(estimator, X, y, test, train_subset)
-    _fit(estimator.fit, X_train, y_train)
-    train_score = _score(estimator, X_train, y_train, scorer)
-    test_score = _score(estimator, X_test, y_test, scorer)
-    return train_score, test_score
-
-
-def _incremental_fit_estimator(base_estimator, X, y, classes, train, test,
+def _incremental_fit_estimator(estimator, X, y, classes, train, test,
                                train_sizes, scorer, verbose):
     """Train estimator on training subsets incrementally and compute scores."""
-    estimator = clone(base_estimator)
     train_scores, test_scores = [], []
     partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])
     for n_train_samples, partial_train in partitions:
diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py
index 66b8f36279b97..1d43fdfb0eb4a 100644
--- a/sklearn/tests/test_learning_curve.py
+++ b/sklearn/tests/test_learning_curve.py
@@ -170,7 +170,7 @@ def test_learning_curve_with_boolean_indices():
     estimator = MockImprovingClassifier(20)
     cv = KFold(n=30, n_folds=3, indices=False)
     train_sizes, train_scores, test_scores = learning_curve(estimator, X, y,
-                                                              cv=cv)
+                                                            cv=cv)
     assert_array_equal(train_sizes, np.linspace(2, 20, 10))
     assert_array_almost_equal(train_scores, np.linspace(1.9, 1.0, 10))
     assert_array_almost_equal(test_scores, np.linspace(0.1, 1.0, 10))

From 13c791595ca59456f861a6f1a88ecfb4fc7c1ade Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sun, 12 Jan 2014 13:09:22 +0100
Subject: [PATCH 12/51] Fix PEP8, style and documentation

---
 sklearn/cross_validation.py                 |  2 +-
 sklearn/grid_search.py                      |  3 +--
 sklearn/learning_curve.py                   |  7 +------
 sklearn/metrics/scorer.py                   | 14 ++++++--------
 sklearn/metrics/tests/test_score_objects.py |  2 ++
 5 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 8de3e1474c082..8eb08f9a95deb 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -26,7 +26,7 @@
 from .utils.validation import _num_samples
 from .utils.fixes import unique
 from .externals.joblib import Parallel, delayed
-from .externals.six import string_types, with_metaclass
+from .externals.six import with_metaclass
 from .metrics.scorer import check_scoring
 
 __all__ = ['Bootstrap',
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 4b440d23381e0..d6a29273f7aa6 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -14,7 +14,6 @@
 from collections import Mapping, namedtuple, Sized
 from functools import partial, reduce
 from itertools import product
-import numbers
 import operator
 import warnings
 
@@ -26,7 +25,7 @@
 from .cross_validation import _cross_val_score
 from .externals.joblib import Parallel, delayed, logger
 from .externals import six
-from .utils import safe_mask, check_random_state
+from .utils import check_random_state
 from .utils.validation import _num_samples, check_arrays
 from .metrics.scorer import check_scoring
 
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index c803ac242649d..5ca556a99417f 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -10,7 +10,6 @@
 from .cross_validation import _check_cv
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
-from .metrics.scorer import get_scorer
 from .cross_validation import _split, _fit, _score, _cross_val_score
 from .metrics.scorer import check_scoring
 
@@ -123,11 +122,7 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     if exploit_incremental_learning:
-        if is_classifier(estimator):
-            classes = np.unique(y)
-        else:
-            classes = None
-            
+        classes = np.unique(y) if is_classifier(estimator) else None
         out = parallel(delayed(_incremental_fit_estimator)(
             clone(estimator), X, y, classes, train, test, train_sizes_abs,
             scorer, verbose) for train, test in cv)
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index c4b89d2dfe1bb..9fbf4893652c6 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -205,7 +205,7 @@ def __call__(self, estimator, *args, **kwargs):
 
 
 def check_scoring(estimator, scoring=None, loss_func=None, score_func=None):
-    """Check if estimator can be scored.
+    """Determine scorer from user options.
 
     A TypeError will be thrown if the estimator cannot be scored.
 
@@ -229,23 +229,21 @@ def check_scoring(estimator, scoring=None, loss_func=None, score_func=None):
                        score_func is None)
     if not hasattr(estimator, 'fit'):
         raise TypeError("estimator should a be an estimator implementing "
-                        "'fit' method, %s (type %s) was passed" %
-                        (estimator, type(estimator)))
+                        "'fit' method, %r was passed" % estimator)
     elif hasattr(estimator, 'predict') and has_scoring:
         return _deprecate_loss_and_score_funcs(scoring=scoring,
-            loss_func=loss_func, score_func=score_func)
+                                               loss_func=loss_func,
+                                               score_func=score_func)
     elif hasattr(estimator, 'score'):
         return _PassthroughScorer()
     elif not has_scoring:
         raise TypeError(
             "If no scoring is specified, the estimator passed should "
-            "have a 'score' method. The estimator %s (type %s) "
-            "does not." % (estimator, type(estimator)))
+            "have a 'score' method. The estimator %r does not." % estimator)
     else:
         raise TypeError(
             "The estimator passed should have a 'score' or a 'predict' "
-            "method. The estimator %s (type %s) does not."
-            % (estimator, type(estimator)))
+            "method. The estimator %r does not." % estimator)
 
 
 def make_scorer(score_func, greater_is_better=True, needs_proba=False,
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 265e35b9b5034..3cda7aadece52 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -38,6 +38,7 @@ class EstimatorWithFitAndScore(object):
     """Dummy estimator to test check_scoring"""
     def fit(self, X, y):
         return self
+
     def score(self, X, y):
         return 1.0
 
@@ -47,6 +48,7 @@ class EstimatorWithFitAndPredict(object):
     def fit(self, X, y):
         self.y = y
         return self
+
     def predict(self, X):
         return self.y
 

From 7b951d8c63220c556c9d90a65720dad6cbb78174 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <afabisch@tzi.de>
Date: Sun, 12 Jan 2014 13:41:37 +0100
Subject: [PATCH 13/51] Remove wrong variable names

---
 sklearn/feature_selection/rfe.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index a58fd33d61f47..ca6ede9a7c760 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -340,12 +340,9 @@ def fit(self, X, y):
             # Score each subset of features
             for k in range(0, max(ranking_)):
                 mask = np.where(ranking_ <= k + 1)[0]
-                X_train_subset = X_train[:, mask]
-                X_test_subset = X_test[:, mask]
-
                 estimator = clone(self.estimator)
-                estimator.fit(X_train_subset, y_train)
-                score = _score(estimator, X_test_subset, y_test, scorer)
+                estimator.fit(X_train[:, mask], y_train)
+                score = _score(estimator, X_test[:, mask], y_test, scorer)
 
                 if self.verbose > 0:
                     print("Finished fold with %d / %d feature ranks, score=%f"

From 5b211cd96543090ac5a3684b85e4fcae620003c9 Mon Sep 17 00:00:00 2001
From: Alexander Fabisch <Alexander.Fabisch@dfki.de>
Date: Tue, 14 Jan 2014 11:12:02 +0100
Subject: [PATCH 14/51] Remove helper function '_fit'

---
 sklearn/cross_validation.py | 13 ++++---------
 sklearn/learning_curve.py   | 10 +++++++---
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 8eb08f9a95deb..7eaadafa2dfec 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1112,7 +1112,10 @@ def _cross_val_score(estimator, X, y, scorer, train, test,
 
     X_train, y_train = _split(estimator, X, y, train)
     X_test, y_test = _split(estimator, X, y, test, train)
-    _fit(estimator.fit, X_train, y_train, **fit_params)
+    if y_train is None:
+        estimator.fit(X_train, **fit_params)
+    else:
+        estimator.fit(X_train, y_train, **fit_params)
     score = _score(estimator, X_test, y_test, scorer)
 
     scoring_time = time.time() - start_time
@@ -1159,14 +1162,6 @@ def _split(estimator, X, y, indices, train_indices=None):
     return X_subset, y_subset
 
 
-def _fit(fit_function, X_train, y_train, **fit_params):
-    """Fit an estimator on a given training set."""
-    if y_train is None:
-        fit_function(X_train, **fit_params)
-    else:
-        fit_function(X_train, y_train, **fit_params)
-
-
 def _score(estimator, X_test, y_test, scorer):
     """Compute the score of an estimator on a given test set."""
     if y_test is None:
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 5ca556a99417f..6ba706ed7442f 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -10,7 +10,7 @@
 from .cross_validation import _check_cv
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
-from .cross_validation import _split, _fit, _score, _cross_val_score
+from .cross_validation import _split, _score, _cross_val_score
 from .metrics.scorer import check_scoring
 
 
@@ -209,8 +209,12 @@ def _incremental_fit_estimator(estimator, X, y, classes, train, test,
         X_partial_train, y_partial_train = _split(estimator, X, y,
                                                   partial_train)
         X_test, y_test = _split(estimator, X, y, test, train[:n_train_samples])
-        _fit(estimator.partial_fit, X_partial_train, y_partial_train,
-             classes=classes)
+        if y_partial_train is None:
+            estimator.partial_fit(X_partial_train, classes=classes)
+        else:
+            estimator.partial_fit(X_partial_train, y_partial_train,
+                                  classes=classes)
         train_scores.append(_score(estimator, X_train, y_train, scorer))
         test_scores.append(_score(estimator, X_test, y_test, scorer))
     return np.array((train_scores, test_scores)).T
+

From 13bc90e35cb37cc4e054413057d8d7f0b29ef8a5 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Wed, 15 Jan 2014 02:20:56 +0900
Subject: [PATCH 15/51] Add evaluate_scorers function.

---
 sklearn/metrics/scorer.py                   | 210 ++++++++------------
 sklearn/metrics/tests/test_score_objects.py |  20 +-
 2 files changed, 105 insertions(+), 125 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 9fbf4893652c6..e3e5a45f793a6 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -31,130 +31,94 @@
 from ..externals import six
 
 
-class _BaseScorer(six.with_metaclass(ABCMeta, object)):
-    def __init__(self, score_func, sign, kwargs):
-        self._kwargs = kwargs
-        self._score_func = score_func
-        self._sign = sign
+class _Scorer(object):
+
+    def __init__(self, score_func, greater_is_better=True, needs_proba=False,
+                 needs_threshold=False, kwargs={}):
+        self.score_func = score_func
+        self.greater_is_better = greater_is_better
+        self.needs_proba = needs_proba
+        self.needs_threshold = needs_threshold
+        self.kwargs = kwargs
 
-    @abstractmethod
     def __call__(self, estimator, X, y):
-        pass
-
-    def __repr__(self):
-        kwargs_string = "".join([", %s=%s" % (str(k), str(v))
-                                 for k, v in self._kwargs.items()])
-        return ("make_scorer(%s%s%s%s)"
-                % (self._score_func.__name__,
-                   "" if self._sign > 0 else ", greater_is_better=False",
-                   self._factory_args(), kwargs_string))
-
-    def _factory_args(self):
-        """Return non-default make_scorer arguments for repr."""
-        return ""
-
-
-class _PredictScorer(_BaseScorer):
-    def __call__(self, estimator, X, y_true):
-        """Evaluate predicted target values for X relative to y_true.
-
-        Parameters
-        ----------
-        estimator : object
-            Trained estimator to use for scoring. Must have a predict_proba
-            method; the output of that is used to compute the score.
-
-        X : array-like or sparse matrix
-            Test data that will be fed to estimator.predict.
-
-        y_true : array-like
-            Gold standard target values for X.
-
-        Returns
-        -------
-        score : float
-            Score function applied to prediction of estimator on X.
-        """
+        return evaluate_scorers(estimator, X, y, [self])[0]
+
+
+def evaluate_scorers(estimator, X, y, scorers):
+    has_pb = hasattr(estimator, "predict_proba")
+    has_df = hasattr(estimator, "decision_function")
+
+    # Make a first pass through scorers to determine if we need
+    # predict_proba or decision_function.
+    compute_proba = False
+    compute_df = False
+    for scorer in scorers:
+        if scorer.needs_proba:
+            if not has_pb:
+                raise ValueError("%s needs probabilities but predict_proba is"
+                                 "not available in %s." % (scorer, estimator))
+            compute_proba = True
+
+        elif scorer.needs_threshold:
+            if has_pb:
+                # We choose predict_proba first because its interface
+                # is more consistent across the project.
+                compute_proba = True
+            elif has_df:
+                compute_df = True
+            else:
+                raise ValueError("%s needs continuous outputs but neither"
+                                 "predict_proba nor decision_function "
+                                 "are available in %s." % (scorer, estimator))
+
+    # Compute predict_proba or decision_function if needed.
+    y_pred = None
+    if compute_proba:
+        y_proba = estimator.predict_proba(X)
+
+        # For multi-output multi-class estimator
+        #if isinstance(y_proba, list):
+            #y_proba = np.vstack([p[:, -1] for p in y_proba]).T
+
+        y_pred = estimator.classes_[y_proba.argmax(axis=1)]
+
+
+    elif compute_df:
+        df = estimator.decision_function(X)
+
+        # For multi-output multi-class estimator
+        #if isinstance(df, list):
+            #df = np.vstack(p for p in df).T
+
+        if len(df.shape) == 2 and df.shape[1] >= 2:
+            y_pred = estimator.classes_[df.argmax(axis=1)]
+        else:
+            y_pred = estimator.classes_[(df >= 0).astype(int)]
+
+    # Compute y_pred if needed
+    if y_pred is None:
         y_pred = estimator.predict(X)
-        return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
-
-
-class _ProbaScorer(_BaseScorer):
-    def __call__(self, clf, X, y):
-        """Evaluate predicted probabilities for X relative to y_true.
-
-        Parameters
-        ----------
-        clf : object
-            Trained classifier to use for scoring. Must have a predict_proba
-            method; the output of that is used to compute the score.
-
-        X : array-like or sparse matrix
-            Test data that will be fed to clf.predict_proba.
-
-        y : array-like
-            Gold standard target values for X. These must be class labels,
-            not probabilities.
-
-        Returns
-        -------
-        score : float
-            Score function applied to prediction of estimator on X.
-        """
-        y_pred = clf.predict_proba(X)
-        return self._sign * self._score_func(y, y_pred, **self._kwargs)
-
-    def _factory_args(self):
-        return ", needs_proba=True"
-
-
-class _ThresholdScorer(_BaseScorer):
-    def __call__(self, clf, X, y):
-        """Evaluate decision function output for X relative to y_true.
-
-        Parameters
-        ----------
-        clf : object
-            Trained classifier to use for scoring. Must have either a
-            decision_function method or a predict_proba method; the output of
-            that is used to compute the score.
-
-        X : array-like or sparse matrix
-            Test data that will be fed to clf.decision_function or
-            clf.predict_proba.
-
-        y : array-like
-            Gold standard target values for X. These must be class labels,
-            not decision function values.
-
-        Returns
-        -------
-        score : float
-            Score function applied to prediction of estimator on X.
-        """
-        y_type = type_of_target(y)
-        if y_type not in ("binary", "multilabel-indicator"):
-            raise ValueError("{0} format is not supported".format(y_type))
-
-        try:
-            y_pred = clf.decision_function(X)
 
-            # For multi-output multi-class estimator
-            if isinstance(y_pred, list):
-                y_pred = np.vstack(p for p in y_pred).T
+    # Compute scores.
+    scores = []
+    for scorer in scorers:
+        if scorer.needs_proba:
+            score = scorer.score_func(y, y_proba, **scorer.kwargs)
 
-        except (NotImplementedError, AttributeError):
-            y_pred = clf.predict_proba(X)
+        elif scorer.needs_threshold:
+            if compute_proba:
+                score = scorer.score_func(y, y_proba[:, 1], **scorer.kwargs)
+            else:
+                score = scorer.score_func(y, df.ravel(), **scorer.kwargs)
 
-            if y_type == "binary":
-                y_pred = y_pred[:, 1]
-            elif isinstance(y_pred, list):
-                y_pred = np.vstack([p[:, -1] for p in y_pred]).T
+        else:
+            score = scorer.score_func(y, y_pred, **scorer.kwargs)
 
-        return self._sign * self._score_func(y, y_pred, **self._kwargs)
+        sign = 1 if scorer.greater_is_better else -1
+        scores.append(sign * score)
 
-    def _factory_args(self):
-        return ", needs_threshold=True"
+    return np.array(scores)
 
 
 def _deprecate_loss_and_score_funcs(
@@ -297,17 +261,15 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
     >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},
     ...                     scoring=ftwo_scorer)
     """
-    sign = 1 if greater_is_better else -1
     if needs_proba and needs_threshold:
         raise ValueError("Set either needs_proba or needs_threshold to True,"
                          " but not both.")
-    if needs_proba:
-        cls = _ProbaScorer
-    elif needs_threshold:
-        cls = _ThresholdScorer
-    else:
-        cls = _PredictScorer
-    return cls(score_func, sign, kwargs)
+
+    return _Scorer(score_func,
+                   greater_is_better=greater_is_better,
+                   needs_proba=needs_proba,
+                   needs_threshold=needs_threshold,
+                   kwargs=kwargs)
 
 
 # Standard regression scores
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 3cda7aadece52..b7d698ca58163 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -5,17 +5,19 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
+from sklearn.utils.testing import SkipTest
 
 from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
                              log_loss)
 from sklearn.metrics.cluster import adjusted_rand_score
-from sklearn.metrics.scorer import check_scoring
+from sklearn.metrics.scorer import check_scoring, evaluate_scorers
 from sklearn.metrics import make_scorer, SCORERS
 from sklearn.svm import LinearSVC
 from sklearn.cluster import KMeans
 from sklearn.linear_model import Ridge, LogisticRegression
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.datasets import make_blobs
+from sklearn.datasets import make_classification
 from sklearn.datasets import make_multilabel_classification
 from sklearn.datasets import load_diabetes
 from sklearn.cross_validation import train_test_split, cross_val_score
@@ -152,6 +154,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
     """Test that the scorer work with multilabel-indicator format
     for multilabel and multi-output multi-class classifier
     """
+    raise SkipTest
     X, y = make_multilabel_classification(return_indicator=True,
                                           allow_unlabeled=False,
                                           random_state=0)
@@ -205,6 +208,21 @@ def test_unsupervised_scorers():
     assert_almost_equal(score1, score2)
 
 
+def test_evaluate_scorers():
+    X, y = make_classification(n_classes=2, random_state=0)
+    clf = LinearSVC()
+    clf.fit(X, y)
+    s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+                                          SCORERS["roc_auc"]])
+    df = clf.decision_function(X)
+    y_pred = clf.predict(X)
+    f1 = f1_score(y, y_pred)
+    roc = roc_auc_score(y, df.ravel())
+
+    assert_almost_equal(s1, f1)
+    assert_almost_equal(s2, roc)
+
+
 @ignore_warnings
 def test_raises_on_score_list():
     """Test that when a list of scores is returned, we raise proper errors."""

From 4b2cd18a3e58536acba3fd41eb6e35b7e86cc168 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Wed, 15 Jan 2014 23:33:03 +0900
Subject: [PATCH 16/51] Add more tests for evaluate_scorers.

---
 sklearn/metrics/scorer.py                   |  2 +-
 sklearn/metrics/tests/test_score_objects.py | 80 +++++++++++++++++++--
 2 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index e3e5a45f793a6..e03130d411ccf 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -96,7 +96,7 @@ def evaluate_scorers(estimator, X, y, scorers):
         else:
             y_pred = estimator.classes_[(df >= 0).astype(int)]
 
-    # Compute y_pred if needed
+    # Compute y_pred if needed.
     if y_pred is None:
         y_pred = estimator.predict(X)
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index b7d698ca58163..8d2f7b202cab1 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -7,8 +7,8 @@
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import SkipTest
 
-from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
-                             log_loss)
+from sklearn.metrics import (accuracy_score, f1_score, r2_score, roc_auc_score,
+                             fbeta_score, log_loss, mean_squared_error)
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.metrics.scorer import check_scoring, evaluate_scorers
 from sklearn.metrics import make_scorer, SCORERS
@@ -17,6 +17,7 @@
 from sklearn.linear_model import Ridge, LogisticRegression
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.datasets import make_blobs
+from sklearn.datasets import load_iris
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_multilabel_classification
 from sklearn.datasets import load_diabetes
@@ -208,19 +209,84 @@ def test_unsupervised_scorers():
     assert_almost_equal(score1, score2)
 
 
-def test_evaluate_scorers():
+def test_evaluate_scorers_binary():
     X, y = make_classification(n_classes=2, random_state=0)
+
+    # Test a classifier with decision_function.
     clf = LinearSVC()
     clf.fit(X, y)
+
     s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
                                           SCORERS["roc_auc"]])
     df = clf.decision_function(X)
     y_pred = clf.predict(X)
-    f1 = f1_score(y, y_pred)
-    roc = roc_auc_score(y, df.ravel())
 
-    assert_almost_equal(s1, f1)
-    assert_almost_equal(s2, roc)
+    assert_almost_equal(s1, f1_score(y, y_pred))
+    assert_almost_equal(s2, roc_auc_score(y, df))
+
+    # Test a classifier with predict_proba.
+    clf = LogisticRegression()
+    clf.fit(X, y)
+
+    s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+                                          SCORERS["roc_auc"]])
+    y_proba = clf.predict_proba(X)[:, 1]
+    y_pred = clf.predict(X)
+
+    assert_almost_equal(s1, f1_score(y, y_pred))
+    assert_almost_equal(s2, roc_auc_score(y, y_proba))
+
+
+def test_evaluate_scorers_multiclass():
+    iris = load_iris()
+    X, y = iris.data, iris.target
+
+    # Test a classifier with decision_function.
+    clf = LinearSVC()
+    clf.fit(X, y)
+
+    s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+                                          SCORERS["accuracy"]])
+    y_pred = clf.predict(X)
+
+    assert_almost_equal(s1, f1_score(y, y_pred))
+    assert_almost_equal(s2, accuracy_score(y, y_pred))
+
+    # Test a classifier with predict_proba.
+    clf = LogisticRegression()
+    clf.fit(X, y)
+
+    s1, s2, s3 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+                                              SCORERS["accuracy"],
+                                              SCORERS["log_loss"]])
+    y_proba = clf.predict_proba(X)
+    y_pred = clf.predict(X)
+
+    assert_almost_equal(s1, f1_score(y, y_pred))
+    assert_almost_equal(s2, accuracy_score(y, y_pred))
+    assert_almost_equal(s3, -log_loss(y, y_proba))
+
+
+def test_evaluate_scorers_regression():
+    diabetes = load_diabetes()
+    X, y = diabetes.data, diabetes.target
+
+    reg = Ridge()
+    reg.fit(X, y)
+
+    s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"],
+                                          SCORERS["mean_squared_error"]])
+    y_pred = reg.predict(X)
+
+    assert_almost_equal(s1, r2_score(y, y_pred))
+    assert_almost_equal(s2, -mean_squared_error(y, y_pred))
+
+
+def test_evaluate_scorers_exceptions():
+    clf = LinearSVC()
+    # log_loss needs probabilities but LinearSVC does not have predict_proba.
+    assert_raises(ValueError, evaluate_scorers, clf, [], [],
+                  [SCORERS["log_loss"]])
 
 
 @ignore_warnings

From 91ff4981dc8def2c952946c026221b4bf75f69a9 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 00:08:36 +0900
Subject: [PATCH 17/51] Support ranking by regression.

---
 sklearn/metrics/scorer.py                   | 16 +++++++++++-----
 sklearn/metrics/tests/test_score_objects.py | 19 +++++++++++++++++--
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index e03130d411ccf..e98b666a366e8 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -29,6 +29,7 @@
 from .cluster import adjusted_rand_score
 from ..utils.multiclass import type_of_target
 from ..externals import six
+from  ..base import is_classifier
 
 
 class _Scorer(object):
@@ -65,13 +66,16 @@ def evaluate_scorers(estimator, X, y, scorers):
                 # We choose predict_proba first because its interface
                 # is more consistent across the project.
                 compute_proba = True
-            elif has_df:
-                compute_df = True
-            else:
+                continue
+
+            if is_classifier(estimator) and not has_df:
                 raise ValueError("%s needs continuous outputs but neither"
                                  "predict_proba nor decision_function "
                                  "are available in %s." % (scorer, estimator))
 
+            if is_classifier(estimator):
+                compute_df = True
+
     # Compute predict_proba or decision_function if needed.
     y_pred = None
     if compute_proba:
@@ -84,7 +88,7 @@ def evaluate_scorers(estimator, X, y, scorers):
         y_pred = estimator.classes_[y_proba.argmax(axis=1)]
 
 
-    elif compute_df:
+    if compute_df:
         df = estimator.decision_function(X)
 
         # For multi-output multi-class estimator
@@ -109,8 +113,10 @@ def evaluate_scorers(estimator, X, y, scorers):
         elif scorer.needs_threshold:
             if compute_proba:
                 score = scorer.score_func(y, y_proba[:, 1], **scorer.kwargs)
-            else:
+            elif is_classifier(estimator):
                 score = scorer.score_func(y, df.ravel(), **scorer.kwargs)
+            else:
+                score = scorer.score_func(y, y_pred, **scorer.kwargs)
 
         else:
             score = scorer.score_func(y, y_pred, **scorer.kwargs)
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 8d2f7b202cab1..b1ca52bf55477 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -8,14 +8,15 @@
 from sklearn.utils.testing import SkipTest
 
 from sklearn.metrics import (accuracy_score, f1_score, r2_score, roc_auc_score,
-                             fbeta_score, log_loss, mean_squared_error)
+                             fbeta_score, log_loss, mean_squared_error,
+                             average_precision_score)
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.metrics.scorer import check_scoring, evaluate_scorers
 from sklearn.metrics import make_scorer, SCORERS
 from sklearn.svm import LinearSVC
 from sklearn.cluster import KMeans
 from sklearn.linear_model import Ridge, LogisticRegression
-from sklearn.tree import DecisionTreeClassifier
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.datasets import make_blobs
 from sklearn.datasets import load_iris
 from sklearn.datasets import make_classification
@@ -282,6 +283,20 @@ def test_evaluate_scorers_regression():
     assert_almost_equal(s2, -mean_squared_error(y, y_pred))
 
 
+def test_evaluate_scorers_ranking_by_regression():
+    X, y = make_classification(n_classes=2, random_state=0)
+
+    reg = DecisionTreeRegressor()
+    reg.fit(X, y)
+
+    s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["roc_auc"],
+                                          SCORERS["average_precision"]])
+    y_pred = reg.predict(X)
+
+    assert_almost_equal(s1, roc_auc_score(y, y_pred))
+    assert_almost_equal(s2, average_precision_score(y, y_pred))
+
+
 def test_evaluate_scorers_exceptions():
     clf = LinearSVC()
     # log_loss needs probabilities but LinearSVC does not have predict_proba.

From 4a934f092487d7941ae01c074ec0294807ff2121 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 00:09:24 +0900
Subject: [PATCH 18/51] Support SVC.

---
 sklearn/metrics/scorer.py                   | 16 +++++++++++-----
 sklearn/metrics/tests/test_score_objects.py | 18 +++++++++---------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index e98b666a366e8..758715df9072f 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -79,14 +79,20 @@ def evaluate_scorers(estimator, X, y, scorers):
     # Compute predict_proba or decision_function if needed.
     y_pred = None
     if compute_proba:
-        y_proba = estimator.predict_proba(X)
+        try:
+            y_proba = estimator.predict_proba(X)
 
-        # For multi-output multi-class estimator
-        #if isinstance(y_proba, list):
-            #y_proba = np.vstack([p[:, -1] for p in y_proba]).T
+            # For multi-output multi-class estimator
+            #if isinstance(y_proba, list):
+                #y_proba = np.vstack([p[:, -1] for p in y_proba]).T
 
-        y_pred = estimator.classes_[y_proba.argmax(axis=1)]
+            y_pred = estimator.classes_[y_proba.argmax(axis=1)]
 
+        except NotImplementedError:
+            # SVC has predict_proba but it may raise NotImplementedError
+            # if probabilities are not enabled.
+            compute_proba = False
+            compute_df = True
 
     if compute_df:
         df = estimator.decision_function(X)
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index b1ca52bf55477..8f35064621a2a 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -13,7 +13,7 @@
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.metrics.scorer import check_scoring, evaluate_scorers
 from sklearn.metrics import make_scorer, SCORERS
-from sklearn.svm import LinearSVC
+from sklearn.svm import LinearSVC, SVC
 from sklearn.cluster import KMeans
 from sklearn.linear_model import Ridge, LogisticRegression
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
@@ -214,16 +214,16 @@ def test_evaluate_scorers_binary():
     X, y = make_classification(n_classes=2, random_state=0)
 
     # Test a classifier with decision_function.
-    clf = LinearSVC()
-    clf.fit(X, y)
+    for clf in (SVC(), LinearSVC()):
+        clf.fit(X, y)
 
-    s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
-                                          SCORERS["roc_auc"]])
-    df = clf.decision_function(X)
-    y_pred = clf.predict(X)
+        s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+                                              SCORERS["roc_auc"]])
+        df = clf.decision_function(X)
+        y_pred = clf.predict(X)
 
-    assert_almost_equal(s1, f1_score(y, y_pred))
-    assert_almost_equal(s2, roc_auc_score(y, df))
+        assert_almost_equal(s1, f1_score(y, y_pred))
+        assert_almost_equal(s2, roc_auc_score(y, df))
 
     # Test a classifier with predict_proba.
     clf = LogisticRegression()

From 314497a623af152265760a01e1cdd29d2ba4e3c3 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 01:24:07 +0900
Subject: [PATCH 19/51] Handle multi-label case.

---
 sklearn/metrics/scorer.py                   | 50 ++++++++++-----------
 sklearn/metrics/tests/test_score_objects.py | 34 +++++++-------
 2 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 758715df9072f..96b6005ad7ad7 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -49,58 +49,58 @@ def __call__(self, estimator, X, y):
 def evaluate_scorers(estimator, X, y, scorers):
     has_pb = hasattr(estimator, "predict_proba")
     has_df = hasattr(estimator, "decision_function")
+    _is_classifier = is_classifier(estimator)
+    _type_of_y = type_of_target(y)
 
     # Make a first pass through scorers to determine if we need
     # predict_proba or decision_function.
-    compute_proba = False
-    compute_df = False
+    needs_proba = False
+    needs_df = False
     for scorer in scorers:
         if scorer.needs_proba:
             if not has_pb:
                 raise ValueError("%s needs probabilities but predict_proba is"
                                  "not available in %s." % (scorer, estimator))
-            compute_proba = True
+            needs_proba = True
 
         elif scorer.needs_threshold:
             if has_pb:
                 # We choose predict_proba first because its interface
                 # is more consistent across the project.
-                compute_proba = True
+                needs_proba = True
                 continue
 
-            if is_classifier(estimator) and not has_df:
+            if _is_classifier and not has_df:
                 raise ValueError("%s needs continuous outputs but neither"
                                  "predict_proba nor decision_function "
                                  "are available in %s." % (scorer, estimator))
 
-            if is_classifier(estimator):
-                compute_df = True
+            if _is_classifier:
+                needs_df = True
 
-    # Compute predict_proba or decision_function if needed.
+    # Compute predict_proba if needed.
+    y_proba = None
     y_pred = None
-    if compute_proba:
+    if needs_proba:
         try:
             y_proba = estimator.predict_proba(X)
 
-            # For multi-output multi-class estimator
-            #if isinstance(y_proba, list):
-                #y_proba = np.vstack([p[:, -1] for p in y_proba]).T
-
             y_pred = estimator.classes_[y_proba.argmax(axis=1)]
 
-        except NotImplementedError:
+            if _type_of_y == "binary":
+                y_proba = y_proba[:, 1]
+
+        except (NotImplementedError, AttributeError):
             # SVC has predict_proba but it may raise NotImplementedError
             # if probabilities are not enabled.
-            compute_proba = False
-            compute_df = True
+            needs_proba = False
+            needs_df = True
 
-    if compute_df:
+    # Compute decision_function.
+    df = None
+    if needs_df:
         df = estimator.decision_function(X)
 
-        # For multi-output multi-class estimator
-        #if isinstance(df, list):
-            #df = np.vstack(p for p in df).T
-
         if len(df.shape) == 2 and df.shape[1] >= 2:
             y_pred = estimator.classes_[df.argmax(axis=1)]
         else:
@@ -117,10 +117,10 @@ def evaluate_scorers(estimator, X, y, scorers):
             score = scorer.score_func(y, y_proba, **scorer.kwargs)
 
         elif scorer.needs_threshold:
-            if compute_proba:
-                score = scorer.score_func(y, y_proba[:, 1], **scorer.kwargs)
-            elif is_classifier(estimator):
-                score = scorer.score_func(y, df.ravel(), **scorer.kwargs)
+            if y_proba is not None:
+                score = scorer.score_func(y, y_proba, **scorer.kwargs)
+            elif df is not None:
+                score = scorer.score_func(y, df, **scorer.kwargs)
             else:
                 score = scorer.score_func(y, y_pred, **scorer.kwargs)
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 8f35064621a2a..1dbb1ffa32970 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -5,7 +5,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.testing import SkipTest
 
 from sklearn.metrics import (accuracy_score, f1_score, r2_score, roc_auc_score,
                              fbeta_score, log_loss, mean_squared_error,
@@ -156,32 +155,31 @@ def test_thresholded_scorers_multilabel_indicator_data():
     """Test that the scorer work with multilabel-indicator format
     for multilabel and multi-output multi-class classifier
     """
-    raise SkipTest
     X, y = make_multilabel_classification(return_indicator=True,
                                           allow_unlabeled=False,
                                           random_state=0)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     # Multi-output multi-class predict_proba
-    clf = DecisionTreeClassifier()
-    clf.fit(X_train, y_train)
-    y_proba = clf.predict_proba(X_test)
-    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
-    score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
-    assert_almost_equal(score1, score2)
+    #clf = DecisionTreeClassifier()
+    #clf.fit(X_train, y_train)
+    #y_proba = clf.predict_proba(X_test)
+    #score1 = SCORERS['roc_auc'](clf, X_test, y_test)
+    #score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
+    #assert_almost_equal(score1, score2)
 
     # Multi-output multi-class decision_function
     # TODO Is there any yet?
-    clf = DecisionTreeClassifier()
-    clf.fit(X_train, y_train)
-    clf._predict_proba = clf.predict_proba
-    clf.predict_proba = None
-    clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]
-
-    y_proba = clf.decision_function(X_test)
-    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
-    score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
-    assert_almost_equal(score1, score2)
+    #clf = DecisionTreeClassifier()
+    #clf.fit(X_train, y_train)
+    #clf._predict_proba = clf.predict_proba
+    #clf.predict_proba = None
+    #clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]
+
+    #y_proba = clf.decision_function(X_test)
+    #score1 = SCORERS['roc_auc'](clf, X_test, y_test)
+    #score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
+    #assert_almost_equal(score1, score2)
 
     # Multilabel predict_proba
     clf = OneVsRestClassifier(DecisionTreeClassifier())

From 754c72d2b6a40acc42e432eb3cc2e918270cedf7 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 01:34:37 +0900
Subject: [PATCH 20/51] Test ranking with more than two relevance levels.

---
 sklearn/metrics/tests/test_score_objects.py | 30 +++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 1dbb1ffa32970..f99e1a2badcb7 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -25,6 +25,24 @@
 from sklearn.grid_search import GridSearchCV
 from sklearn.multiclass import OneVsRestClassifier
 
+# FIXME: temporary, to demonstrate ranking with several relevance levels.
+def dcg_score(y_true, y_score, k=10, gains="exponential"):
+    order = np.argsort(y_score)[::-1]
+    y_true = np.take(y_true, order[:k])
+
+    if gains == "exponential":
+        gains = 2 ** y_true - 1
+    elif gains == "linear":
+        gains = y_true
+    else:
+        raise ValueError("Invalid gains option.")
+
+    # highest rank is 1 so +2 instead of +1
+    discounts = np.log2(np.arange(len(y_true)) + 2)
+    return np.sum(gains / discounts)
+
+dcg_scorer = make_scorer(dcg_score, needs_threshold=True)
+
 
 class EstimatorWithoutFit(object):
     """Dummy estimator to test check_scoring"""
@@ -294,6 +312,18 @@ def test_evaluate_scorers_ranking_by_regression():
     assert_almost_equal(s1, roc_auc_score(y, y_pred))
     assert_almost_equal(s2, average_precision_score(y, y_pred))
 
+    diabetes = load_diabetes()
+    X, y = diabetes.data, diabetes.target
+
+    reg.fit(X, y)
+
+    s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"],
+                                          dcg_scorer])
+    y_pred = reg.predict(X)
+
+    assert_almost_equal(s1, r2_score(y, y_pred))
+    assert_almost_equal(s2, dcg_score(y, y_pred))
+
 
 def test_evaluate_scorers_exceptions():
     clf = LinearSVC()

From 7f4d7ad130085d6907050a6969fb38a597770218 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 13:38:38 +0900
Subject: [PATCH 21/51] Rename evaluate_scorers to _evaluate_scorers.

---
 sklearn/metrics/scorer.py                   |  4 ++--
 sklearn/metrics/tests/test_score_objects.py | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index cd40c2a09ed71..18d629b04908a 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -43,10 +43,10 @@ def __init__(self, score_func, greater_is_better=True, needs_proba=False,
         self.kwargs = kwargs
 
     def __call__(self, estimator, X, y):
-        return evaluate_scorers(estimator, X, y, [self])[0]
+        return _evaluate_scorers(estimator, X, y, [self])[0]
 
 
-def evaluate_scorers(estimator, X, y, scorers):
+def _evaluate_scorers(estimator, X, y, scorers):
     has_pb = hasattr(estimator, "predict_proba")
     has_df = hasattr(estimator, "decision_function")
     _is_classifier = is_classifier(estimator)
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 9f23fdbe7f23b..83fce651bc693 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -12,7 +12,7 @@
                              fbeta_score, log_loss, mean_squared_error,
                              average_precision_score)
 from sklearn.metrics.cluster import adjusted_rand_score
-from sklearn.metrics.scorer import check_scoring, evaluate_scorers
+from sklearn.metrics.scorer import check_scoring, _evaluate_scorers
 from sklearn.metrics import make_scorer, SCORERS
 from sklearn.svm import LinearSVC, SVC
 from sklearn.cluster import KMeans
@@ -291,7 +291,7 @@ def test_evaluate_scorers_binary():
     for clf in (SVC(), LinearSVC()):
         clf.fit(X, y)
 
-        s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+        s1, s2 = _evaluate_scorers(clf, X, y, [SCORERS["f1"],
                                               SCORERS["roc_auc"]])
         df = clf.decision_function(X)
         y_pred = clf.predict(X)
@@ -303,7 +303,7 @@ def test_evaluate_scorers_binary():
     clf = LogisticRegression()
     clf.fit(X, y)
 
-    s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+    s1, s2 = _evaluate_scorers(clf, X, y, [SCORERS["f1"],
                                           SCORERS["roc_auc"]])
     y_proba = clf.predict_proba(X)[:, 1]
     y_pred = clf.predict(X)
@@ -320,7 +320,7 @@ def test_evaluate_scorers_multiclass():
     clf = LinearSVC()
     clf.fit(X, y)
 
-    s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+    s1, s2 = _evaluate_scorers(clf, X, y, [SCORERS["f1"],
                                           SCORERS["accuracy"]])
     y_pred = clf.predict(X)
 
@@ -331,7 +331,7 @@ def test_evaluate_scorers_multiclass():
     clf = LogisticRegression()
     clf.fit(X, y)
 
-    s1, s2, s3 = evaluate_scorers(clf, X, y, [SCORERS["f1"],
+    s1, s2, s3 = _evaluate_scorers(clf, X, y, [SCORERS["f1"],
                                               SCORERS["accuracy"],
                                               SCORERS["log_loss"]])
     y_proba = clf.predict_proba(X)
@@ -349,7 +349,7 @@ def test_evaluate_scorers_regression():
     reg = Ridge()
     reg.fit(X, y)
 
-    s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"],
+    s1, s2 = _evaluate_scorers(reg, X, y, [SCORERS["r2"],
                                           SCORERS["mean_squared_error"]])
     y_pred = reg.predict(X)
 
@@ -363,7 +363,7 @@ def test_evaluate_scorers_ranking_by_regression():
     reg = DecisionTreeRegressor()
     reg.fit(X, y)
 
-    s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["roc_auc"],
+    s1, s2 = _evaluate_scorers(reg, X, y, [SCORERS["roc_auc"],
                                           SCORERS["average_precision"]])
     y_pred = reg.predict(X)
 
@@ -375,7 +375,7 @@ def test_evaluate_scorers_ranking_by_regression():
 
     reg.fit(X, y)
 
-    s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"],
+    s1, s2 = _evaluate_scorers(reg, X, y, [SCORERS["r2"],
                                           dcg_scorer])
     y_pred = reg.predict(X)
 
@@ -386,7 +386,7 @@ def test_evaluate_scorers_ranking_by_regression():
 def test_evaluate_scorers_exceptions():
     clf = LinearSVC()
     # log_loss needs probabilities but LinearSVC does not have predict_proba.
-    assert_raises(ValueError, evaluate_scorers, clf, [], [],
+    assert_raises(ValueError, _evaluate_scorers, clf, [], [],
                   [SCORERS["log_loss"]])
 
 

From a756083e7767eff1862559d13ffb075a7807c3e5 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 14:11:44 +0900
Subject: [PATCH 22/51] Remove _score utility function.

---
 sklearn/cross_validation.py      | 16 ++--------------
 sklearn/feature_selection/rfe.py |  4 ++--
 sklearn/learning_curve.py        | 10 +++++++---
 sklearn/metrics/scorer.py        | 14 +++++++++++---
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index df042e2b6a5ac..01251a072e8ea 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1184,9 +1184,9 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
         estimator.fit(X_train, **fit_params)
     else:
         estimator.fit(X_train, y_train, **fit_params)
-    test_score = _score(estimator, X_test, y_test, scorer)
+    test_score = scorer(estimator, X_test, y_test)
     if return_train_score:
-        train_score = _score(estimator, X_train, y_train, scorer)
+        train_score = scorer(estimator, X_train, y_train)
 
     scoring_time = time.time() - start_time
 
@@ -1235,18 +1235,6 @@ def _safe_split(estimator, X, y, indices, train_indices=None):
     return X_subset, y_subset
 
 
-def _score(estimator, X_test, y_test, scorer):
-    """Compute the score of an estimator on a given test set."""
-    if y_test is None:
-        score = scorer(estimator, X_test)
-    else:
-        score = scorer(estimator, X_test, y_test)
-    if not isinstance(score, numbers.Number):
-        raise ValueError("scoring must return a number, got %s (%s) instead."
-                         % (str(score), type(score)))
-    return score
-
-
 def _permutation_test_score(estimator, X, y, cv, scorer):
     """Auxiliary function for permutation_test_score"""
     avg_score = []
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index 01c99ceb526f4..05f376250c024 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -13,7 +13,7 @@
 from ..base import clone
 from ..base import is_classifier
 from ..cross_validation import _check_cv as check_cv
-from ..cross_validation import _safe_split, _score
+from ..cross_validation import _safe_split
 from .base import SelectorMixin
 from ..metrics.scorer import check_scoring
 
@@ -342,7 +342,7 @@ def fit(self, X, y):
                 mask = np.where(ranking_ <= k + 1)[0]
                 estimator = clone(self.estimator)
                 estimator.fit(X_train[:, mask], y_train)
-                score = _score(estimator, X_test[:, mask], y_test, scorer)
+                score = scorer(estimator, X_test[:, mask], y_test)
 
                 if self.verbose > 0:
                     print("Finished fold with %d / %d feature ranks, score=%f"
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 7989bc3534658..a650a11f75af3 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -10,7 +10,7 @@
 from .cross_validation import _check_cv
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
-from .cross_validation import _safe_split, _score, _fit_and_score
+from .cross_validation import _safe_split, _fit_and_score
 from .metrics.scorer import check_scoring
 
 
@@ -204,18 +204,22 @@ def _incremental_fit_estimator(estimator, X, y, classes, train, test,
     """Train estimator on training subsets incrementally and compute scores."""
     train_scores, test_scores = [], []
     partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])
+
     for n_train_samples, partial_train in partitions:
         train_subset = train[:n_train_samples]
         X_train, y_train = _safe_split(estimator, X, y, train_subset)
         X_partial_train, y_partial_train = _safe_split(estimator, X, y,
                                                               partial_train)
         X_test, y_test = _safe_split(estimator, X, y, test, train_subset)
+
         if y_partial_train is None:
             estimator.partial_fit(X_partial_train, classes=classes)
         else:
             estimator.partial_fit(X_partial_train, y_partial_train,
                                   classes=classes)
-        train_scores.append(_score(estimator, X_train, y_train, scorer))
-        test_scores.append(_score(estimator, X_test, y_test, scorer))
+
+        train_scores.append(scorer(estimator, X_train, y_train))
+        test_scores.append(scorer(estimator, X_test, y_test))
+
     return np.array((train_scores, test_scores)).T
 
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 18d629b04908a..beae0ee27c56d 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -20,6 +20,7 @@
 
 from abc import ABCMeta, abstractmethod
 from warnings import warn
+import numbers
 
 import numpy as np
 
@@ -127,6 +128,10 @@ def _evaluate_scorers(estimator, X, y, scorers):
         else:
             score = scorer.score_func(y, y_pred, **scorer.kwargs)
 
+        if not isinstance(score, numbers.Number):
+            raise ValueError("scoring must return a number, got %s (%s)"
+                             " instead." % (str(score), type(score)))
+
         sign = 1 if scorer.greater_is_better else -1
         scores.append(sign * score)
 
@@ -146,9 +151,12 @@ def get_scorer(scoring):
     return scorer
 
 
-def _passthrough_scorer(estimator, *args, **kwargs):
+def _default_scorer(estimator, X, y, *args, **kwargs):
     """Function that wraps estimator.score"""
-    return estimator.score(*args, **kwargs)
+    if y is None:
+        return estimator.score(X, *args, **kwargs)
+    else:
+        return estimator.score(X, y, *args, **kwargs)
 
 
 def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None,
@@ -204,7 +212,7 @@ def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None,
             scorer = get_scorer(scoring)
         return scorer
     elif hasattr(estimator, 'score'):
-        return _passthrough_scorer
+        return _default_scorer
     elif not has_scoring:
         if allow_none:
             return None

From b4255d8699ce8792ff04ee8075081f5c6086cefd Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 17:11:38 +0900
Subject: [PATCH 23/51] Support for multiple scorers in cross_val_score.

---
 sklearn/cross_validation.py            | 77 ++++++++++++++++++--------
 sklearn/grid_search.py                 |  9 ++-
 sklearn/learning_curve.py              | 12 +++-
 sklearn/tests/test_cross_validation.py | 18 +++++-
 4 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 01251a072e8ea..6e0fe40d330d2 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -27,7 +27,8 @@
 from .utils.fixes import unique
 from .externals.joblib import Parallel, delayed, logger
 from .externals.six import with_metaclass
-from .metrics.scorer import check_scoring
+from .metrics.scorer import check_scoring, _evaluate_scorers
+
 
 __all__ = ['Bootstrap',
            'KFold',
@@ -1087,20 +1088,38 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    scorer = check_scoring(estimator, score_func=score_func, scoring=scoring)
+
+    if isinstance(scoring, list):
+        scorers = [check_scoring(estimator, scoring=s) for s in scoring]
+        ret_1d = False
+    else:
+        scorers = [check_scoring(estimator, score_func=score_func,
+                               scoring=scoring)]
+        ret_1d = True
+
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
-    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
-                                              train, test, verbose, None,
-                                              fit_params)
+
+    # ret is a list of size n_folds. Each element of the list contains the tuple
+    # returned by _fit_and_score.
+    ret = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
+                                           train, test, verbose, None,
+                                           fit_params)
                       for train, test in cv)
-    return np.array(scores)[:, 0]
+
+    # Retrieve n_scorers x n_folds 2d-array.
+    scores = np.array([r[0] for r in ret]).T
+
+    if ret_1d:
+        return scores[0]
+    else:
+        return scores
 
 
-def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
-                   fit_params, return_train_score=False,
+def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
+                   fit_params, return_train_scores=False,
                    return_parameters=False):
     """Fit estimator and compute scores for a given dataset split.
 
@@ -1116,9 +1135,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
         The target variable to try to predict in the case of
         supervised learning.
 
-    scoring : callable
-        A scorer callable object / function with signature
-        ``scorer(estimator, X, y)``.
+    scorers : list
+        A list of scorer objects
 
     train : array-like, shape = (n_train_samples,)
         Indices of training samples.
@@ -1135,19 +1153,19 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
     fit_params : dict or None
         Parameters that will be passed to ``estimator.fit``.
 
-    return_train_score : boolean, optional, default: False
-        Compute and return score on training set.
+    return_train_scores : boolean, optional, default: False
+        Compute and return scores on training set.
 
     return_parameters : boolean, optional, default: False
         Return parameters that has been used for the estimator.
 
     Returns
     -------
-    test_score : float
-        Score on test set.
+    test_score : array of floats
+        Scores on test set.
 
-    train_score : float, optional
-        Score on training set.
+    train_score : array of floats, optional
+        Scores on training set.
 
     n_test_samples : int
         Number of test samples.
@@ -1180,24 +1198,37 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
 
     X_train, y_train = _safe_split(estimator, X, y, train)
     X_test, y_test = _safe_split(estimator, X, y, test, train)
+
     if y_train is None:
         estimator.fit(X_train, **fit_params)
     else:
         estimator.fit(X_train, y_train, **fit_params)
-    test_score = scorer(estimator, X_test, y_test)
-    if return_train_score:
-        train_score = scorer(estimator, X_train, y_train)
+
+    if len(scorers) == 1:
+        # We cannot use _evaluate_scorers here because the scorer might be
+        # estimator.score.
+        test_scores = np.array([scorers[0](estimator, X_test, y_test)])
+    else:
+        test_scores = _evaluate_scorers(estimator, X_test, y_test, scorers)
+
+
+    if return_train_scores:
+        if len(scorers) == 1:
+            train_scores = np.array([scorers[0](estimator, X_train, y_train)])
+        else:
+            train_scores = _evaluate_scorers(estimator, X_train, y_train,
+                                             scorers)
 
     scoring_time = time.time() - start_time
 
     if verbose > 2:
-        msg += ", score=%f" % test_score
+        msg += ", score=%s" % test_scores
     if verbose > 1:
         end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time))
         print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
 
-    ret = [train_score] if return_train_score else []
-    ret.extend([test_score, _num_samples(X_test), scoring_time])
+    ret = [train_scores] if return_train_scores else []
+    ret.extend([test_scores, _num_samples(X_test), scoring_time])
     if return_parameters:
         ret.append(parameters)
     return ret
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 2dff365c568d2..e54116fdaa476 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -228,10 +228,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
     n_samples_test : int
         Number of test samples in this split.
     """
-    score, n_samples_test, _ = _fit_and_score(estimator, X, y, scorer, train,
+    scores, n_samples_test, _ = _fit_and_score(estimator, X, y, [scorer], train,
                                               test, verbose, parameters,
                                               fit_params)
-    return score, parameters, n_samples_test
+    return scores[0], parameters, n_samples_test
 
 
 def _check_param_grid(param_grid):
@@ -374,7 +374,7 @@ def _fit(self, X, y, parameter_iterable):
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=pre_dispatch)(
                 delayed(_fit_and_score)(
-                    clone(base_estimator), X, y, self.scorer_, train, test,
+                    clone(base_estimator), X, y, [self.scorer_], train, test,
                     self.verbose, parameters, self.fit_params,
                     return_parameters=True)
                 for parameters in parameter_iterable
@@ -392,6 +392,9 @@ def _fit(self, X, y, parameter_iterable):
             all_scores = []
             for this_score, this_n_test_samples, _, parameters in \
                     out[grid_start:grid_start + n_folds]:
+                # _fit_and_score returns a list even if there is only one
+                # scorer in the list.
+                this_score = this_score[0]
                 all_scores.append(this_score)
                 if self.iid:
                     this_score *= this_n_test_samples
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index a650a11f75af3..91ca179ba34a5 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -127,11 +127,17 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10),
             clone(estimator), X, y, classes, train, test, train_sizes_abs,
             scorer, verbose) for train, test in cv)
     else:
+        # ret is a list of size n_folds. Each element of the list contains the
+        # tuple returned by _fit_and_score.
         out = parallel(delayed(_fit_and_score)(
-            clone(estimator), X, y, scorer, train[:n_train_samples], test,
-            verbose, parameters=None, fit_params=None, return_train_score=True)
+            clone(estimator), X, y, [scorer], train[:n_train_samples], test,
+            verbose, parameters=None, fit_params=None, return_train_scores=True)
             for train, test in cv for n_train_samples in train_sizes_abs)
-        out = np.array(out)[:, :2]
+
+        test_scores = [r[0][0] for r in out]
+        train_scores = [r[1][0] for r in out]
+        out = np.array([test_scores, train_scores]).T
+
         n_cv_folds = out.shape[0]/n_unique_ticks
         out = out.reshape(n_cv_folds, n_unique_ticks, 2)
 
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index 73d82da7437c3..e2191565e8ba6 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -21,6 +21,7 @@
 
 from sklearn import cross_validation as cval
 from sklearn.base import BaseEstimator
+from sklearn.datasets import make_classification
 from sklearn.datasets import make_regression
 from sklearn.datasets import load_digits
 from sklearn.datasets import load_iris
@@ -31,7 +32,7 @@
 from sklearn.metrics import make_scorer
 
 from sklearn.externals import six
-from sklearn.linear_model import Ridge
+from sklearn.linear_model import Ridge, Perceptron
 from sklearn.svm import SVC
 
 
@@ -460,6 +461,21 @@ def test_cross_val_score_precomputed():
                   linear_kernel.tolist(), y)
 
 
+def test_cross_val_score_multiple_scorers():
+    X, y = make_classification(n_classes=2)
+    clf = Perceptron(random_state=0)
+
+    scores = cval.cross_val_score(clf, X, y, cv=3, scoring=["f1", "roc_auc"])
+    assert_equal(scores.shape, (2, 3))
+
+    f1_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="f1")
+    auc_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="roc_auc")
+    scores2 = np.array([f1_scores, auc_scores])
+    assert_equal(scores2.shape, (2, 3))
+
+    assert_array_almost_equal(scores, scores2)
+
+
 def test_cross_val_score_fit_params():
     clf = MockClassifier()
     n_samples = X.shape[0]

From 264013f31ce151806dd71991bdcbb6b64a4dd975 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 18:39:24 +0900
Subject: [PATCH 24/51] Refactoring for allowing mutiple scorers.

---
 sklearn/grid_search.py | 167 ++++++++++++++++++++++-------------------
 1 file changed, 90 insertions(+), 77 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index e54116fdaa476..b16989df73dd8 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -182,6 +182,83 @@ def __len__(self):
         return self.n_iter
 
 
+def _fit(estimator, X, y, scorers, parameter_iterable, cv, pre_dispatch,
+         fit_params, iid, n_jobs, verbose):
+    """Actual fitting,  performing the search over parameters."""
+
+    n_samples = _num_samples(X)
+    X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr')
+
+    if y is not None:
+        if len(y) != n_samples:
+            raise ValueError('Target variable (y) has a different number '
+                             'of samples (%i) than data (X: %i samples)'
+                             % (len(y), n_samples))
+        y = np.asarray(y)
+    cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
+
+    if verbose > 0:
+        if isinstance(parameter_iterable, Sized):
+            n_candidates = len(parameter_iterable)
+            print("Fitting {0} folds for each of {1} candidates, totalling"
+                  " {2} fits".format(len(cv), n_candidates,
+                                     n_candidates * len(cv)))
+
+    base_estimator = clone(estimator)
+
+    out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
+            delayed(_fit_and_score)(
+                clone(base_estimator), X, y, scorers, train, test,
+                verbose, parameters, fit_params,
+                return_parameters=True)
+            for parameters in parameter_iterable
+            for train, test in cv)
+
+    # Out is a list of triplet: score, estimator, n_test_samples
+    n_fits = len(out)
+    n_folds = len(cv)
+    n_scorers = len(scorers)
+
+    grid_scores = []
+    for i in xrange(n_scorers):
+        grid_scores.append([])
+
+    for grid_start in range(0, n_fits, n_folds):
+        n_test_samples = 0
+        scores = np.zeros(n_scorers)
+        all_scores = np.zeros((n_scorers, n_folds))
+
+        for j, (curr_scores, curr_n_test_samples, _, parameters) in \
+                enumerate(out[grid_start:grid_start + n_folds]):
+
+            all_scores[:, j] = curr_scores
+
+            if iid:
+                curr_scores *= curr_n_test_samples
+                n_test_samples += curr_n_test_samples
+
+            scores += curr_scores
+
+        if iid:
+            scores /= float(n_test_samples)
+        else:
+            scores /= float(n_folds)
+
+        for i in xrange(n_scorers):
+            # TODO: shall we also store the test_fold_sizes?
+            tup = _CVScoreTuple(parameters, scores[i], all_scores[i])
+            grid_scores[i].append(tup)
+
+    # Find the best parameters by comparing on the mean validation score:
+    # note that `sorted` is deterministic in the way it breaks ties
+    bests = [sorted(grid_scores[i], key=lambda x: x.mean_validation_score,
+                      reverse=True)[0] for i in xrange(n_scorers)]
+    best_params = [best.parameters for best in bests]
+    best_scores = [best.mean_validation_score for best in bests]
+
+    return grid_scores, best_params, best_scores
+
+
 def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
                    verbose, **fit_params):
     """Run fit on one set of parameters.
@@ -340,96 +417,32 @@ def transform(self):
         return self.best_estimator_.transform
 
     def _fit(self, X, y, parameter_iterable):
-        """Actual fitting,  performing the search over parameters."""
-
-        estimator = self.estimator
-        cv = self.cv
         self.scorer_ = check_scoring(self.estimator, scoring=self.scoring,
                                      loss_func=self.loss_func,
                                      score_func=self.score_func)
 
-        n_samples = _num_samples(X)
-        X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr')
-
-        if y is not None:
-            if len(y) != n_samples:
-                raise ValueError('Target variable (y) has a different number '
-                                 'of samples (%i) than data (X: %i samples)'
-                                 % (len(y), n_samples))
-            y = np.asarray(y)
-        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
-
-        if self.verbose > 0:
-            if isinstance(parameter_iterable, Sized):
-                n_candidates = len(parameter_iterable)
-                print("Fitting {0} folds for each of {1} candidates, totalling"
-                      " {2} fits".format(len(cv), n_candidates,
-                                         n_candidates * len(cv)))
-
-        base_estimator = clone(self.estimator)
-
-        pre_dispatch = self.pre_dispatch
-
-        out = Parallel(
-            n_jobs=self.n_jobs, verbose=self.verbose,
-            pre_dispatch=pre_dispatch)(
-                delayed(_fit_and_score)(
-                    clone(base_estimator), X, y, [self.scorer_], train, test,
-                    self.verbose, parameters, self.fit_params,
-                    return_parameters=True)
-                for parameters in parameter_iterable
-                for train, test in cv)
-
-        # Out is a list of triplet: score, estimator, n_test_samples
-        n_fits = len(out)
-        n_folds = len(cv)
-
-        scores = list()
-        grid_scores = list()
-        for grid_start in range(0, n_fits, n_folds):
-            n_test_samples = 0
-            score = 0
-            all_scores = []
-            for this_score, this_n_test_samples, _, parameters in \
-                    out[grid_start:grid_start + n_folds]:
-                # _fit_and_score returns a list even if there is only one
-                # scorer in the list.
-                this_score = this_score[0]
-                all_scores.append(this_score)
-                if self.iid:
-                    this_score *= this_n_test_samples
-                    n_test_samples += this_n_test_samples
-                score += this_score
-            if self.iid:
-                score /= float(n_test_samples)
-            else:
-                score /= float(n_folds)
-            scores.append((score, parameters))
-            # TODO: shall we also store the test_fold_sizes?
-            grid_scores.append(_CVScoreTuple(
-                parameters,
-                score,
-                np.array(all_scores)))
-        # Store the computed scores
-        self.grid_scores_ = grid_scores
-
-        # Find the best parameters by comparing on the mean validation score:
-        # note that `sorted` is deterministic in the way it breaks ties
-        best = sorted(grid_scores, key=lambda x: x.mean_validation_score,
-                      reverse=True)[0]
-        self.best_params_ = best.parameters
-        self.best_score_ = best.mean_validation_score
+        grid_scores, best_params, best_scores = _fit(self.estimator, X, y,
+                                                     [self.scorer_],
+                                                     parameter_iterable,
+                                                     self.cv, self.pre_dispatch,
+                                                     self.fit_params, self.iid,
+                                                     self.n_jobs, self.verbose)
+
+        self.grid_scores_ = grid_scores[0]
+        self.best_params_ = best_params[0]
+        self.best_score_ = best_scores[0]
 
         if self.refit:
             # fit the best estimator using the entire dataset
             # clone first to work around broken estimators
-            best_estimator = clone(base_estimator).set_params(
-                **best.parameters)
+            base_estimator = clone(self.estimator)
+            best_estimator = base_estimator.set_params(**self.best_params_)
             if y is not None:
                 best_estimator.fit(X, y, **self.fit_params)
             else:
                 best_estimator.fit(X, **self.fit_params)
             self.best_estimator_ = best_estimator
+
         return self
 
 

From 0feed968ef4aabd14921877895f9642868e180ee Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 18:39:52 +0900
Subject: [PATCH 25/51] Define `parameters` upfront.

We are not supposed to use `parameters` outside of the loop. And this
makes the code very difficult to read.
---
 sklearn/grid_search.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index b16989df73dd8..0d8a32dc99f96 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -228,7 +228,10 @@ def _fit(estimator, X, y, scorers, parameter_iterable, cv, pre_dispatch,
         scores = np.zeros(n_scorers)
         all_scores = np.zeros((n_scorers, n_folds))
 
-        for j, (curr_scores, curr_n_test_samples, _, parameters) in \
+        # Parameters for this part of the grid.
+        parameters = out[grid_start][3]
+
+        for j, (curr_scores, curr_n_test_samples, _, _) in \
                 enumerate(out[grid_start:grid_start + n_folds]):
 
             all_scores[:, j] = curr_scores

From 0a667489918c0267e7438a4456562ccc1fb3e6e3 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 18:42:54 +0900
Subject: [PATCH 26/51] Use more informative name.

---
 sklearn/grid_search.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 0d8a32dc99f96..25c469604db44 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -182,8 +182,8 @@ def __len__(self):
         return self.n_iter
 
 
-def _fit(estimator, X, y, scorers, parameter_iterable, cv, pre_dispatch,
-         fit_params, iid, n_jobs, verbose):
+def _fit_parameter_iterable(estimator, X, y, scorers, parameter_iterable, cv,
+                            pre_dispatch, fit_params, iid, n_jobs, verbose):
     """Actual fitting,  performing the search over parameters."""
 
     n_samples = _num_samples(X)
@@ -424,12 +424,11 @@ def _fit(self, X, y, parameter_iterable):
                                      loss_func=self.loss_func,
                                      score_func=self.score_func)
 
-        grid_scores, best_params, best_scores = _fit(self.estimator, X, y,
-                                                     [self.scorer_],
-                                                     parameter_iterable,
-                                                     self.cv, self.pre_dispatch,
-                                                     self.fit_params, self.iid,
-                                                     self.n_jobs, self.verbose)
+        grid_scores, best_params, best_scores = \
+            _fit_parameter_iterable(self.estimator, X, y, [self.scorer_],
+                                    parameter_iterable, self.cv,
+                                    self.pre_dispatch, self.fit_params,
+                                    self.iid, self.n_jobs, self.verbose)
 
         self.grid_scores_ = grid_scores[0]
         self.best_params_ = best_params[0]

From 6f68bfb9ea1e9f08e74a0af661d0c6eed580501e Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 19:02:11 +0900
Subject: [PATCH 27/51] Put __repr__ back.

---
 sklearn/metrics/scorer.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index beae0ee27c56d..7f51320b62656 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -46,6 +46,14 @@ def __init__(self, score_func, greater_is_better=True, needs_proba=False,
     def __call__(self, estimator, X, y):
         return _evaluate_scorers(estimator, X, y, [self])[0]
 
+    def __repr__(self):
+        kwargs_string = "".join([", %s=%s" % (str(k), str(v))
+                                 for k, v in self.kwargs.items()])
+        return ("make_scorer(%s%s%s)"
+                % (self.score_func.__name__,
+                   "" if self.greater_is_better else ", greater_is_better=False",
+                   kwargs_string))
+
 
 def _evaluate_scorers(estimator, X, y, scorers):
     has_pb = hasattr(estimator, "predict_proba")

From 114bec6c97cb0aa0bf45640854ff9c080adedd14 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 22:39:51 +0900
Subject: [PATCH 28/51] Deprecate fit_grid_point.

---
 sklearn/grid_search.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 25c469604db44..e5c6dd6c0f7c6 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -30,8 +30,8 @@
 from .metrics.scorer import check_scoring
 
 
-__all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point',
-           'ParameterSampler', 'RandomizedSearchCV']
+__all__ = ['GridSearchCV', 'ParameterGrid', 'ParameterSampler',
+           'RandomizedSearchCV']
 
 
 class ParameterGrid(object):
@@ -308,6 +308,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
     n_samples_test : int
         Number of test samples in this split.
     """
+    warnings.warn("fit_grid_point is deprecated and will be "
+                  "removed in 0.17", DeprecationWarning, stacklevel=1)
+
     scores, n_samples_test, _ = _fit_and_score(estimator, X, y, [scorer], train,
                                               test, verbose, parameters,
                                               fit_params)

From aff769d2738bf3fcceb6189d6e41b9c3f00ffb12 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Thu, 16 Jan 2014 23:46:40 +0900
Subject: [PATCH 29/51] Add grid_search_cv.

---
 sklearn/grid_search.py            | 108 ++++++++++++++++++++++++++++++
 sklearn/tests/test_grid_search.py |  21 +++++-
 2 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index e5c6dd6c0f7c6..66bba0df4caf3 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -316,6 +316,114 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
                                               fit_params)
     return scores[0], parameters, n_samples_test
 
+def grid_search_cv(estimator, param_grid, X, y=None, scoring=None,
+                   fit_params=None, n_jobs=1, iid=True,
+                   refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs'):
+    """Exhaustive search over specified parameter values for an estimator.
+
+    Parameters
+    ----------
+    estimator : object type that implements the "fit" and "predict" methods
+        A object of that type is instantiated for each grid point.
+
+    param_grid : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    X : array-like, shape = [n_samples, n_features]
+        Training vector, where n_samples is the number of samples and
+        n_features is the number of features.
+
+    y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+        Target relative to X for classification or regression;
+        None for unsupervised learning.
+
+    scoring : string, callable or None, optional, default: None
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+
+    fit_params : dict, optional
+        Parameters to pass to the fit method.
+
+    n_jobs : int, optional
+        Number of jobs to run in parallel (default 1).
+
+    iid : boolean, optional
+        If True, the data is assumed to be identically distributed across
+        the folds, and the loss minimized is the total loss per sample,
+        and not the mean loss across the folds.
+
+    refit : boolean
+        Refit the best estimator with the entire dataset.
+        If "False", it is impossible to make predictions using
+        this GridSearchCV instance after fitting.
+
+    cv : integer or cross-validation generator, optional
+        If an integer is passed, it is the number of folds (default 3).
+        Specific cross-validation objects can be passed, see
+        sklearn.cross_validation module for the list of possible objects
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+    """
+    fit_params = fit_params if fit_params is not None else {}
+    if isinstance(scoring, list):
+        scorers = [check_scoring(estimator, scoring=s) for s in scoring]
+        ret_1d = False
+    else:
+        scorers = [check_scoring(estimator, scoring=scoring)]
+        ret_1d = True
+
+    grid_scores, best_params, best_scores = \
+        _fit_parameter_iterable(estimator, X, y, scorers,
+                                ParameterGrid(param_grid), cv, pre_dispatch,
+                                fit_params, iid, n_jobs, verbose)
+    best_estimators = []
+    if refit:
+        for i in xrange(len(scorers)):
+            base_estimator = clone(estimator)
+            best_estimator = base_estimator.set_params(**best_params[i])
+            best_estimators.append(best_estimator)
+            if y is not None:
+                best_estimator.fit(X, y, **fit_params)
+            else:
+                best_estimator.fit(X, **fit_params)
+
+    if ret_1d:
+        grid_scores = grid_scores[0]
+        best_params = best_params[0]
+        best_scores = best_scores[0]
+        if refit:
+            best_estimators = best_estimators[0]
+
+    ret = [best_params, best_scores, grid_scores]
+
+    if refit:
+        ret.append(best_estimators)
+
+    return ret
+
 
 def _check_param_grid(param_grid):
     if hasattr(param_grid, 'items'):
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index 6d1a0d4f2ccc6..da80b7f8e26d9 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -28,8 +28,9 @@
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
-from sklearn.grid_search import (GridSearchCV, RandomizedSearchCV,
-                                 ParameterGrid, ParameterSampler)
+from sklearn.grid_search import (grid_search_cv, GridSearchCV,
+                                 RandomizedSearchCV, ParameterGrid,
+                                 ParameterSampler)
 from sklearn.svm import LinearSVC, SVC
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.tree import DecisionTreeClassifier
@@ -644,3 +645,19 @@ def test_grid_search_with_multioutput_data():
                 correct_score = est.score(X[test], y[test])
                 assert_almost_equal(correct_score,
                                     cv_validation_scores[i])
+
+def test_multiple_grid_search():
+    clf = LinearSVC(random_state=0)
+    X, y = make_blobs(random_state=0, centers=2)
+    param_grid = {"C": [0.1, 1, 10]}
+
+    ret = grid_search_cv(clf, param_grid, X, y, scoring=["f1", "roc_auc"])
+    ret_f1 = grid_search_cv(clf, param_grid, X, y, scoring="f1")
+    ret_auc = grid_search_cv(clf, param_grid, X, y, scoring="roc_auc")
+
+    for i in xrange(len(ret)):
+        assert_equal(len(ret[i]), 2)
+
+    for i in (0, 1):
+        assert_equal(ret[i][0], ret_f1[i])
+        assert_equal(ret[i][1], ret_auc[i])

From 55f41266f493905399438e8ca0e2736728e20e5e Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 17 Jan 2014 00:47:30 +0900
Subject: [PATCH 30/51] Refactor code.

---
 sklearn/grid_search.py    | 104 ++++++++++++++++++--------------------
 sklearn/metrics/scorer.py |   3 ++
 2 files changed, 51 insertions(+), 56 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 66bba0df4caf3..09c64574daff6 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -182,10 +182,19 @@ def __len__(self):
         return self.n_iter
 
 
-def _fit_parameter_iterable(estimator, X, y, scorers, parameter_iterable, cv,
-                            pre_dispatch, fit_params, iid, n_jobs, verbose):
+def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
+                    cv, pre_dispatch, fit_params, iid, n_jobs, verbose):
     """Actual fitting,  performing the search over parameters."""
 
+    fit_params = fit_params if fit_params is not None else {}
+
+    if isinstance(scoring, list):
+        scorers = [check_scoring(estimator, scoring=s) for s in scoring]
+        ret_1d = False
+    else:
+        scorers = [check_scoring(estimator, scoring=scoring)]
+        ret_1d = True
+
     n_samples = _num_samples(X)
     X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr')
 
@@ -259,7 +268,30 @@ def _fit_parameter_iterable(estimator, X, y, scorers, parameter_iterable, cv,
     best_params = [best.parameters for best in bests]
     best_scores = [best.mean_validation_score for best in bests]
 
-    return grid_scores, best_params, best_scores
+    best_estimators = []
+    if refit:
+        for i in xrange(len(scorers)):
+            base_estimator = clone(estimator)
+            best_estimator = base_estimator.set_params(**best_params[i])
+            best_estimators.append(best_estimator)
+            if y is not None:
+                best_estimator.fit(X, y, **fit_params)
+            else:
+                best_estimator.fit(X, **fit_params)
+
+    if ret_1d:
+        grid_scores = grid_scores[0]
+        best_params = best_params[0]
+        best_scores = best_scores[0]
+        if refit:
+            best_estimators = best_estimators[0]
+
+    ret = [best_params, best_scores, grid_scores]
+
+    if refit:
+        ret.append(best_estimators)
+
+    return ret
 
 
 def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
@@ -387,42 +419,9 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None,
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
     """
-    fit_params = fit_params if fit_params is not None else {}
-    if isinstance(scoring, list):
-        scorers = [check_scoring(estimator, scoring=s) for s in scoring]
-        ret_1d = False
-    else:
-        scorers = [check_scoring(estimator, scoring=scoring)]
-        ret_1d = True
-
-    grid_scores, best_params, best_scores = \
-        _fit_parameter_iterable(estimator, X, y, scorers,
-                                ParameterGrid(param_grid), cv, pre_dispatch,
-                                fit_params, iid, n_jobs, verbose)
-    best_estimators = []
-    if refit:
-        for i in xrange(len(scorers)):
-            base_estimator = clone(estimator)
-            best_estimator = base_estimator.set_params(**best_params[i])
-            best_estimators.append(best_estimator)
-            if y is not None:
-                best_estimator.fit(X, y, **fit_params)
-            else:
-                best_estimator.fit(X, **fit_params)
-
-    if ret_1d:
-        grid_scores = grid_scores[0]
-        best_params = best_params[0]
-        best_scores = best_scores[0]
-        if refit:
-            best_estimators = best_estimators[0]
-
-    ret = [best_params, best_scores, grid_scores]
-
-    if refit:
-        ret.append(best_estimators)
-
-    return ret
+    param_grid = ParameterGrid(param_grid)
+    return _fit_param_iter(estimator, X, y, scoring, param_grid, refit, cv,
+                           pre_dispatch, fit_params, iid, n_jobs, verbose)
 
 
 def _check_param_grid(param_grid):
@@ -535,26 +534,18 @@ def _fit(self, X, y, parameter_iterable):
                                      loss_func=self.loss_func,
                                      score_func=self.score_func)
 
-        grid_scores, best_params, best_scores = \
-            _fit_parameter_iterable(self.estimator, X, y, [self.scorer_],
-                                    parameter_iterable, self.cv,
-                                    self.pre_dispatch, self.fit_params,
-                                    self.iid, self.n_jobs, self.verbose)
+        ret = _fit_param_iter(self.estimator, X, y, self.scorer_,
+                              parameter_iterable, self.refit, self.cv,
+                              self.pre_dispatch, self.fit_params, self.iid,
+                              self.n_jobs, self.verbose)
 
-        self.grid_scores_ = grid_scores[0]
-        self.best_params_ = best_params[0]
-        self.best_score_ = best_scores[0]
+
+        self.best_params_ = ret[0]
+        self.best_score_ = ret[1]
+        self.grid_scores_ = ret[2]
 
         if self.refit:
-            # fit the best estimator using the entire dataset
-            # clone first to work around broken estimators
-            base_estimator = clone(self.estimator)
-            best_estimator = base_estimator.set_params(**self.best_params_)
-            if y is not None:
-                best_estimator.fit(X, y, **self.fit_params)
-            else:
-                best_estimator.fit(X, **self.fit_params)
-            self.best_estimator_ = best_estimator
+            self.best_estimator_ = ret[3]
 
         return self
 
@@ -728,6 +719,7 @@ def fit(self, X, y=None, **params):
             warnings.warn("Additional parameters to GridSearchCV are ignored!"
                           " The params argument will be removed in 0.15.",
                           DeprecationWarning)
+
         return self._fit(X, y, ParameterGrid(self.param_grid))
 
 
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 7f51320b62656..7b21ddcb75fdb 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -193,6 +193,9 @@ def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None,
         A scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
     """
+    if isinstance(scoring, _Scorer):
+        return scoring
+
     has_scoring = not (scoring is None and loss_func is None and
                        score_func is None)
     if not hasattr(estimator, 'fit'):

From 4bd6c9129cec932800c2f48959cbcf369a6c75ed Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 17 Jan 2014 00:57:16 +0900
Subject: [PATCH 31/51] Add randomized_search_cv.

---
 sklearn/grid_search.py            | 82 +++++++++++++++++++++++++++++++
 sklearn/tests/test_grid_search.py | 36 ++++++++------
 2 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 09c64574daff6..e876de8d86799 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -424,6 +424,88 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None,
                            pre_dispatch, fit_params, iid, n_jobs, verbose)
 
 
+def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10,
+                         scoring=None, fit_params=None, n_jobs=1, iid=True,
+                         refit=True, cv=None, verbose=0,
+                         pre_dispatch='2*n_jobs', random_state=None):
+    """Randomized search on hyper parameters.
+
+    Parameters
+    ----------
+    estimator : object type that implements the "fit" and "predict" methods
+        A object of that type is instantiated for each parameter setting.
+
+    param_distributions : dict
+        Dictionary with parameters names (string) as keys and distributions
+        or lists of parameters to try. Distributions must provide a ``rvs``
+        method for sampling (such as those from scipy.stats.distributions).
+        If a list is given, it is sampled uniformly.
+
+    X : array-like, shape = [n_samples, n_features]
+        Training vector, where n_samples is the number of samples and
+        n_features is the number of features.
+
+    y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+        Target relative to X for classification or regression;
+        None for unsupervised learning.
+
+    n_iter : int, default=10
+        Number of parameter settings that are sampled. n_iter trades
+        off runtime vs quality of the solution.
+
+    scoring : string, callable or None, optional, default: None
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+
+    fit_params : dict, optional
+        Parameters to pass to the fit method.
+
+    n_jobs : int, optional
+        Number of jobs to run in parallel (default 1).
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    iid : boolean, optional
+        If True, the data is assumed to be identically distributed across
+        the folds, and the loss minimized is the total loss per sample,
+        and not the mean loss across the folds.
+
+    cv : integer or cross-validation generator, optional
+        If an integer is passed, it is the number of folds (default 3).
+        Specific cross-validation objects can be passed, see
+        sklearn.cross_validation module for the list of possible objects
+
+    refit : boolean
+        Refit the best estimator with the entire dataset.
+        If "False", it is impossible to make predictions using
+        this RandomizedSearchCV instance after fitting.
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+    """
+    sampled_params = ParameterSampler(param_distributions,
+                                      n_iter,
+                                      random_state=random_state)
+    return _fit_param_iter(estimator, X, y, scoring, sampled_params, refit, cv,
+                           pre_dispatch, fit_params, iid, n_jobs, verbose)
+
+
 def _check_param_grid(param_grid):
     if hasattr(param_grid, 'items'):
         param_grid = [param_grid]
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index da80b7f8e26d9..6d57e89855a8e 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -29,8 +29,8 @@
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
 from sklearn.grid_search import (grid_search_cv, GridSearchCV,
-                                 RandomizedSearchCV, ParameterGrid,
-                                 ParameterSampler)
+                                 randomized_search_cv, RandomizedSearchCV,
+                                 ParameterGrid, ParameterSampler)
 from sklearn.svm import LinearSVC, SVC
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.tree import DecisionTreeClassifier
@@ -646,18 +646,26 @@ def test_grid_search_with_multioutput_data():
                 assert_almost_equal(correct_score,
                                     cv_validation_scores[i])
 
-def test_multiple_grid_search():
-    clf = LinearSVC(random_state=0)
-    X, y = make_blobs(random_state=0, centers=2)
-    param_grid = {"C": [0.1, 1, 10]}
 
-    ret = grid_search_cv(clf, param_grid, X, y, scoring=["f1", "roc_auc"])
-    ret_f1 = grid_search_cv(clf, param_grid, X, y, scoring="f1")
-    ret_auc = grid_search_cv(clf, param_grid, X, y, scoring="roc_auc")
+def test_multiple_grid_search_cv():
+    for n, func in enumerate((grid_search_cv, randomized_search_cv)):
+        clf = LinearSVC(random_state=0)
+        X, y = make_blobs(random_state=0, centers=2)
+        param_grid = {"C": [0.1, 1, 10]}
+
+        if n == 0:
+            kwargs = dict()
+        else:
+            kwargs = dict(random_state=0)
+
+        ret = func(clf, param_grid, X, y, scoring=["f1", "roc_auc"], **kwargs)
+        ret_f1 = func(clf, param_grid, X, y, scoring="f1", **kwargs)
+        ret_auc = func(clf, param_grid, X, y, scoring="roc_auc", **kwargs)
+
+        for i in xrange(len(ret)):
 
-    for i in xrange(len(ret)):
-        assert_equal(len(ret[i]), 2)
+            assert_equal(len(ret[i]), 2)
 
-    for i in (0, 1):
-        assert_equal(ret[i][0], ret_f1[i])
-        assert_equal(ret[i][1], ret_auc[i])
+        for i in (0, 1):
+            assert_equal(ret[i][0], ret_f1[i])
+            assert_equal(ret[i][1], ret_auc[i])

From b02a7e80a3de71067849e4782eb461b1fe8a170f Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 17 Jan 2014 00:59:46 +0900
Subject: [PATCH 32/51] Remove multi-output multiclass support from scorers for
 now.

As per discussion with @arjoly and @jnothman.
---
 sklearn/metrics/tests/test_score_objects.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 83fce651bc693..2d2a39176a0ea 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -236,27 +236,6 @@ def test_thresholded_scorers_multilabel_indicator_data():
                                           random_state=0)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
-    # Multi-output multi-class predict_proba
-    #clf = DecisionTreeClassifier()
-    #clf.fit(X_train, y_train)
-    #y_proba = clf.predict_proba(X_test)
-    #score1 = SCORERS['roc_auc'](clf, X_test, y_test)
-    #score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
-    #assert_almost_equal(score1, score2)
-
-    # Multi-output multi-class decision_function
-    # TODO Is there any yet?
-    #clf = DecisionTreeClassifier()
-    #clf.fit(X_train, y_train)
-    #clf._predict_proba = clf.predict_proba
-    #clf.predict_proba = None
-    #clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]
-
-    #y_proba = clf.decision_function(X_test)
-    #score1 = SCORERS['roc_auc'](clf, X_test, y_test)
-    #score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
-    #assert_almost_equal(score1, score2)
-
     # Multilabel predict_proba
     clf = OneVsRestClassifier(DecisionTreeClassifier())
     clf.fit(X_train, y_train)

From 47dd41c625ad316e38a205fb0615d3aefb57b317 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 17 Jan 2014 01:11:30 +0900
Subject: [PATCH 33/51] Update docstrings.

---
 sklearn/cross_validation.py |  7 +++--
 sklearn/grid_search.py      | 58 +++++++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 6e0fe40d330d2..19daf190fa093 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1044,10 +1044,12 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
         The target variable to try to predict in the case of
         supervised learning.
 
-    scoring : string, callable or None, optional, default: None
+    scoring : string, callable, list of strings/callables or None, optional,
+              default: None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
+        Lists can be used for randomized search of multiple metrics.
 
     cv : cross-validation generator, optional, default: None
         A cross-validation generator. If None, a 3-fold cross
@@ -1083,8 +1085,9 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
 
     Returns
     -------
-    scores : array of float, shape=(len(list(cv)),)
+    scores : array of float, shape=(n_folds,) or (n_scoring, n_folds)
         Array of scores of the estimator for each run of the cross validation.
+        The returned array is 2d is `scoring` is a list.
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index e876de8d86799..2f58c20ce9c55 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -373,10 +373,12 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None,
         Target relative to X for classification or regression;
         None for unsupervised learning.
 
-    scoring : string, callable or None, optional, default: None
+    scoring : string, callable, list of strings/callables or None, optional,
+              default: None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
+        Lists can be used for randomized search of multiple metrics.
 
     fit_params : dict, optional
         Parameters to pass to the fit method.
@@ -418,6 +420,31 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None,
 
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
+
+    Returns
+    -------
+    `best_params` : dict or list of dicts
+        Parameter setting that gave the best results on the hold out data.
+
+    `best_score` : float or list of floats
+        Score of best_estimator on the left out data.
+
+    `grid_scores` : list of named tuples or list of lists of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+        Each named tuple has the attributes:
+
+            * ``parameters``, a dict of parameter settings
+            * ``mean_validation_score``, the mean score over the
+              cross-validation folds
+            * ``cv_validation_scores``, the list of scores for each fold
+
+    `best_estimator` : estimator or list of estimators (only if refit=True)
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data.
+
+    Lists are returned when `scoring` is a list.
     """
     param_grid = ParameterGrid(param_grid)
     return _fit_param_iter(estimator, X, y, scoring, param_grid, refit, cv,
@@ -453,10 +480,12 @@ def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10,
         Number of parameter settings that are sampled. n_iter trades
         off runtime vs quality of the solution.
 
-    scoring : string, callable or None, optional, default: None
+    scoring : string, callable, list of strings/callables or None, optional,
+              default: None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
+        Lists can be used for randomized search of multiple metrics.
 
     fit_params : dict, optional
         Parameters to pass to the fit method.
@@ -498,6 +527,31 @@ def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10,
 
     verbose : integer
         Controls the verbosity: the higher, the more messages.
+
+    Returns
+    -------
+    `best_params` : dict or list of dicts
+        Parameter setting that gave the best results on the hold out data.
+
+    `best_score` : float or list of floats
+        Score of best_estimator on the left out data.
+
+    `grid_scores` : list of named tuples or list of lists of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+        Each named tuple has the attributes:
+
+            * ``parameters``, a dict of parameter settings
+            * ``mean_validation_score``, the mean score over the
+              cross-validation folds
+            * ``cv_validation_scores``, the list of scores for each fold
+
+    `best_estimator` : estimator or list of estimators (only if refit=True)
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data.
+
+    Lists are returned when `scoring` is a list.
     """
     sampled_params = ParameterSampler(param_distributions,
                                       n_iter,

From c4905c3176a07519dc3da2cb4694c65e4c95b71b Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 17 Jan 2014 17:23:18 +0900
Subject: [PATCH 34/51] Support multiple metrics directly in GridSearchCV and
 RandomizedSearchCV.

---
 sklearn/grid_search.py            | 240 ++----------------------------
 sklearn/tests/test_grid_search.py |  44 +++---
 2 files changed, 40 insertions(+), 244 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 2f58c20ce9c55..abe6beae1ab53 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -185,9 +185,6 @@ def __len__(self):
 def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
                     cv, pre_dispatch, fit_params, iid, n_jobs, verbose):
     """Actual fitting,  performing the search over parameters."""
-
-    fit_params = fit_params if fit_params is not None else {}
-
     if isinstance(scoring, list):
         scorers = [check_scoring(estimator, scoring=s) for s in scoring]
         ret_1d = False
@@ -280,13 +277,14 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
                 best_estimator.fit(X, **fit_params)
 
     if ret_1d:
-        grid_scores = grid_scores[0]
+        scorers = scorers[0]
         best_params = best_params[0]
         best_scores = best_scores[0]
+        grid_scores = grid_scores[0]
         if refit:
             best_estimators = best_estimators[0]
 
-    ret = [best_params, best_scores, grid_scores]
+    ret = [scorers, best_params, best_scores, grid_scores]
 
     if refit:
         ret.append(best_estimators)
@@ -348,217 +346,6 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
                                               fit_params)
     return scores[0], parameters, n_samples_test
 
-def grid_search_cv(estimator, param_grid, X, y=None, scoring=None,
-                   fit_params=None, n_jobs=1, iid=True,
-                   refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs'):
-    """Exhaustive search over specified parameter values for an estimator.
-
-    Parameters
-    ----------
-    estimator : object type that implements the "fit" and "predict" methods
-        A object of that type is instantiated for each grid point.
-
-    param_grid : dict or list of dictionaries
-        Dictionary with parameters names (string) as keys and lists of
-        parameter settings to try as values, or a list of such
-        dictionaries, in which case the grids spanned by each dictionary
-        in the list are explored. This enables searching over any sequence
-        of parameter settings.
-
-    X : array-like, shape = [n_samples, n_features]
-        Training vector, where n_samples is the number of samples and
-        n_features is the number of features.
-
-    y : array-like, shape = [n_samples] or [n_samples, n_output], optional
-        Target relative to X for classification or regression;
-        None for unsupervised learning.
-
-    scoring : string, callable, list of strings/callables or None, optional,
-              default: None
-        A string (see model evaluation documentation) or
-        a scorer callable object / function with signature
-        ``scorer(estimator, X, y)``.
-        Lists can be used for randomized search of multiple metrics.
-
-    fit_params : dict, optional
-        Parameters to pass to the fit method.
-
-    n_jobs : int, optional
-        Number of jobs to run in parallel (default 1).
-
-    iid : boolean, optional
-        If True, the data is assumed to be identically distributed across
-        the folds, and the loss minimized is the total loss per sample,
-        and not the mean loss across the folds.
-
-    refit : boolean
-        Refit the best estimator with the entire dataset.
-        If "False", it is impossible to make predictions using
-        this GridSearchCV instance after fitting.
-
-    cv : integer or cross-validation generator, optional
-        If an integer is passed, it is the number of folds (default 3).
-        Specific cross-validation objects can be passed, see
-        sklearn.cross_validation module for the list of possible objects
-
-    verbose : integer
-        Controls the verbosity: the higher, the more messages.
-
-    pre_dispatch : int, or string, optional
-        Controls the number of jobs that get dispatched during parallel
-        execution. Reducing this number can be useful to avoid an
-        explosion of memory consumption when more jobs get dispatched
-        than CPUs can process. This parameter can be:
-
-            - None, in which case all the jobs are immediately
-              created and spawned. Use this for lightweight and
-              fast-running jobs, to avoid delays due to on-demand
-              spawning of the jobs
-
-            - An int, giving the exact number of total jobs that are
-              spawned
-
-            - A string, giving an expression as a function of n_jobs,
-              as in '2*n_jobs'
-
-    Returns
-    -------
-    `best_params` : dict or list of dicts
-        Parameter setting that gave the best results on the hold out data.
-
-    `best_score` : float or list of floats
-        Score of best_estimator on the left out data.
-
-    `grid_scores` : list of named tuples or list of lists of named tuples
-        Contains scores for all parameter combinations in param_grid.
-        Each entry corresponds to one parameter setting.
-        Each named tuple has the attributes:
-
-            * ``parameters``, a dict of parameter settings
-            * ``mean_validation_score``, the mean score over the
-              cross-validation folds
-            * ``cv_validation_scores``, the list of scores for each fold
-
-    `best_estimator` : estimator or list of estimators (only if refit=True)
-        Estimator that was chosen by the search, i.e. estimator
-        which gave highest score (or smallest loss if specified)
-        on the left out data.
-
-    Lists are returned when `scoring` is a list.
-    """
-    param_grid = ParameterGrid(param_grid)
-    return _fit_param_iter(estimator, X, y, scoring, param_grid, refit, cv,
-                           pre_dispatch, fit_params, iid, n_jobs, verbose)
-
-
-def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10,
-                         scoring=None, fit_params=None, n_jobs=1, iid=True,
-                         refit=True, cv=None, verbose=0,
-                         pre_dispatch='2*n_jobs', random_state=None):
-    """Randomized search on hyper parameters.
-
-    Parameters
-    ----------
-    estimator : object type that implements the "fit" and "predict" methods
-        A object of that type is instantiated for each parameter setting.
-
-    param_distributions : dict
-        Dictionary with parameters names (string) as keys and distributions
-        or lists of parameters to try. Distributions must provide a ``rvs``
-        method for sampling (such as those from scipy.stats.distributions).
-        If a list is given, it is sampled uniformly.
-
-    X : array-like, shape = [n_samples, n_features]
-        Training vector, where n_samples is the number of samples and
-        n_features is the number of features.
-
-    y : array-like, shape = [n_samples] or [n_samples, n_output], optional
-        Target relative to X for classification or regression;
-        None for unsupervised learning.
-
-    n_iter : int, default=10
-        Number of parameter settings that are sampled. n_iter trades
-        off runtime vs quality of the solution.
-
-    scoring : string, callable, list of strings/callables or None, optional,
-              default: None
-        A string (see model evaluation documentation) or
-        a scorer callable object / function with signature
-        ``scorer(estimator, X, y)``.
-        Lists can be used for randomized search of multiple metrics.
-
-    fit_params : dict, optional
-        Parameters to pass to the fit method.
-
-    n_jobs : int, optional
-        Number of jobs to run in parallel (default 1).
-
-    pre_dispatch : int, or string, optional
-        Controls the number of jobs that get dispatched during parallel
-        execution. Reducing this number can be useful to avoid an
-        explosion of memory consumption when more jobs get dispatched
-        than CPUs can process. This parameter can be:
-
-            - None, in which case all the jobs are immediately
-              created and spawned. Use this for lightweight and
-              fast-running jobs, to avoid delays due to on-demand
-              spawning of the jobs
-
-            - An int, giving the exact number of total jobs that are
-              spawned
-
-            - A string, giving an expression as a function of n_jobs,
-              as in '2*n_jobs'
-
-    iid : boolean, optional
-        If True, the data is assumed to be identically distributed across
-        the folds, and the loss minimized is the total loss per sample,
-        and not the mean loss across the folds.
-
-    cv : integer or cross-validation generator, optional
-        If an integer is passed, it is the number of folds (default 3).
-        Specific cross-validation objects can be passed, see
-        sklearn.cross_validation module for the list of possible objects
-
-    refit : boolean
-        Refit the best estimator with the entire dataset.
-        If "False", it is impossible to make predictions using
-        this RandomizedSearchCV instance after fitting.
-
-    verbose : integer
-        Controls the verbosity: the higher, the more messages.
-
-    Returns
-    -------
-    `best_params` : dict or list of dicts
-        Parameter setting that gave the best results on the hold out data.
-
-    `best_score` : float or list of floats
-        Score of best_estimator on the left out data.
-
-    `grid_scores` : list of named tuples or list of lists of named tuples
-        Contains scores for all parameter combinations in param_grid.
-        Each entry corresponds to one parameter setting.
-        Each named tuple has the attributes:
-
-            * ``parameters``, a dict of parameter settings
-            * ``mean_validation_score``, the mean score over the
-              cross-validation folds
-            * ``cv_validation_scores``, the list of scores for each fold
-
-    `best_estimator` : estimator or list of estimators (only if refit=True)
-        Estimator that was chosen by the search, i.e. estimator
-        which gave highest score (or smallest loss if specified)
-        on the left out data.
-
-    Lists are returned when `scoring` is a list.
-    """
-    sampled_params = ParameterSampler(param_distributions,
-                                      n_iter,
-                                      random_state=random_state)
-    return _fit_param_iter(estimator, X, y, scoring, sampled_params, refit, cv,
-                           pre_dispatch, fit_params, iid, n_jobs, verbose)
-
 
 def _check_param_grid(param_grid):
     if hasattr(param_grid, 'items'):
@@ -666,22 +453,25 @@ def transform(self):
         return self.best_estimator_.transform
 
     def _fit(self, X, y, parameter_iterable):
-        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring,
-                                     loss_func=self.loss_func,
-                                     score_func=self.score_func)
-
-        ret = _fit_param_iter(self.estimator, X, y, self.scorer_,
+        ret = _fit_param_iter(self.estimator, X, y, self.scoring,
                               parameter_iterable, self.refit, self.cv,
                               self.pre_dispatch, self.fit_params, self.iid,
                               self.n_jobs, self.verbose)
 
+        self.scorer_ = ret[0]
+        self.best_params_ = ret[1]
+        self.best_score_ = ret[2]
+        self.grid_scores_ = ret[3]
 
-        self.best_params_ = ret[0]
-        self.best_score_ = ret[1]
-        self.grid_scores_ = ret[2]
 
         if self.refit:
-            self.best_estimator_ = ret[3]
+            if isinstance(ret[4], list):
+                self.best_estimators_ = ret[4]
+                # By default, select the best estimator corresponding to the
+                # first scorer.
+                self.best_estimator_ = ret[4][0]
+            else:
+                self.best_estimator_ = ret[4]
 
         return self
 
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index 6d57e89855a8e..ea188b66ecfe4 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -24,12 +24,11 @@
 
 from scipy.stats import distributions
 
-from sklearn.base import BaseEstimator
+from sklearn.base import BaseEstimator, clone
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
-from sklearn.grid_search import (grid_search_cv, GridSearchCV,
-                                 randomized_search_cv, RandomizedSearchCV,
+from sklearn.grid_search import (GridSearchCV, RandomizedSearchCV,
                                  ParameterGrid, ParameterSampler)
 from sklearn.svm import LinearSVC, SVC
 from sklearn.tree import DecisionTreeRegressor
@@ -648,24 +647,31 @@ def test_grid_search_with_multioutput_data():
 
 
 def test_multiple_grid_search_cv():
-    for n, func in enumerate((grid_search_cv, randomized_search_cv)):
-        clf = LinearSVC(random_state=0)
-        X, y = make_blobs(random_state=0, centers=2)
-        param_grid = {"C": [0.1, 1, 10]}
+    clf = LinearSVC(random_state=0)
+    X, y = make_blobs(random_state=0, centers=2)
+    param_grid = {"C": [0.1, 1, 10]}
+    scoring = ["f1", "roc_auc"]
 
-        if n == 0:
-            kwargs = dict()
-        else:
-            kwargs = dict(random_state=0)
+    gs = GridSearchCV(clf, param_grid, scoring=scoring)
+    rs = RandomizedSearchCV(clf, param_grid, scoring=scoring, random_state=0)
+
+    for n, est in enumerate((gs, rs)):
+        est.fit(X, y)
 
-        ret = func(clf, param_grid, X, y, scoring=["f1", "roc_auc"], **kwargs)
-        ret_f1 = func(clf, param_grid, X, y, scoring="f1", **kwargs)
-        ret_auc = func(clf, param_grid, X, y, scoring="roc_auc", **kwargs)
+        for attr in ("scorer_", "best_score_", "grid_scores_", "best_params_"):
+            attr = getattr(est, attr)
+            assert_equal(len(attr), 2)
 
-        for i in xrange(len(ret)):
+        est_f1 = clone(est)
+        est_f1.scoring = "f1"
+        est_f1.fit(X, y)
 
-            assert_equal(len(ret[i]), 2)
+        est_auc = clone(est)
+        est_auc.scoring = "roc_auc"
+        est_auc.fit(X, y)
 
-        for i in (0, 1):
-            assert_equal(ret[i][0], ret_f1[i])
-            assert_equal(ret[i][1], ret_auc[i])
+        for attr in ("best_score_", "best_params_"):
+            assert_equal(getattr(est, attr)[0],
+                         getattr(est_f1, attr))
+            assert_equal(getattr(est, attr)[1],
+                         getattr(est_auc, attr))

From aad77c87e37a5d459ed6b67ed0b2ea7712e0db71 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 17:08:28 +0900
Subject: [PATCH 35/51] Simplify inner loop.

Code snippet by @jnothman.
---
 sklearn/grid_search.py | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index abe6beae1ab53..1692689c1c2c2 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -230,32 +230,19 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
         grid_scores.append([])
 
     for grid_start in range(0, n_fits, n_folds):
-        n_test_samples = 0
-        scores = np.zeros(n_scorers)
-        all_scores = np.zeros((n_scorers, n_folds))
 
-        # Parameters for this part of the grid.
-        parameters = out[grid_start][3]
-
-        for j, (curr_scores, curr_n_test_samples, _, _) in \
-                enumerate(out[grid_start:grid_start + n_folds]):
-
-            all_scores[:, j] = curr_scores
-
-            if iid:
-                curr_scores *= curr_n_test_samples
-                n_test_samples += curr_n_test_samples
-
-            scores += curr_scores
-
-        if iid:
-            scores /= float(n_test_samples)
-        else:
-            scores /= float(n_folds)
+        grid_stop = grid_start + n_folds
+        fold_scores, n_test, _, parameters = zip(*out[grid_start:grid_stop])
+        # `params` contains the same parameters n_fold times.
+        parameters = parameters[0]
+        # `fold_scores` is an n_folds x n_scorers 2-d array.
+        fold_scores = np.array(fold_scores)
+        weights = n_test if iid else None
+        mean_scores = np.average(fold_scores, axis=0, weights=weights)
 
         for i in xrange(n_scorers):
             # TODO: shall we also store the test_fold_sizes?
-            tup = _CVScoreTuple(parameters, scores[i], all_scores[i])
+            tup = _CVScoreTuple(parameters, mean_scores[i], fold_scores[:, i])
             grid_scores[i].append(tup)
 
     # Find the best parameters by comparing on the mean validation score:

From a7e79f3c4ad818cbc7cb41add8fa476049fd4480 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 17:15:24 +0900
Subject: [PATCH 36/51] Fix incorrect comment.

---
 sklearn/grid_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 1692689c1c2c2..1904fffa67871 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -220,7 +220,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
             for parameters in parameter_iterable
             for train, test in cv)
 
-    # Out is a list of triplet: score, estimator, n_test_samples
+    # `out` is a list of tuples (fold_score, n_test, scoring_time, params).
     n_fits = len(out)
     n_folds = len(cv)
     n_scorers = len(scorers)

From 8040432c17d982935368efa73fb223b966847911 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 17:24:51 +0900
Subject: [PATCH 37/51] Fix comments.

---
 sklearn/cross_validation.py | 10 ++++------
 sklearn/grid_search.py      |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 19daf190fa093..1ef0290602b69 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1100,20 +1100,18 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                                scoring=scoring)]
         ret_1d = True
 
-    # We clone the estimator to make sure that all the folds are
-    # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
 
-    # ret is a list of size n_folds. Each element of the list contains the tuple
-    # returned by _fit_and_score.
-    ret = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
+    # `out` is a list of size n_folds. Each element of the list is a tuple
+    # (fold_scores, n_test, scoring_time)
+    out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
                                            train, test, verbose, None,
                                            fit_params)
                       for train, test in cv)
 
     # Retrieve n_scorers x n_folds 2d-array.
-    scores = np.array([r[0] for r in ret]).T
+    scores = np.array([o[0] for o in out]).T
 
     if ret_1d:
         return scores[0]
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 1904fffa67871..cbab51a175d5e 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -220,7 +220,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
             for parameters in parameter_iterable
             for train, test in cv)
 
-    # `out` is a list of tuples (fold_score, n_test, scoring_time, params).
+    # `out` is a list of tuples (fold_scores, n_test, scoring_time, params).
     n_fits = len(out)
     n_folds = len(cv)
     n_scorers = len(scorers)

From 2a9638477efd8795183d6925b04e6ea4549802c0 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 18:01:39 +0900
Subject: [PATCH 38/51] Return training time only.

---
 sklearn/cross_validation.py | 18 +++++++++---------
 sklearn/grid_search.py      |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 1ef0290602b69..9ee8482b47718 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1104,7 +1104,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                         pre_dispatch=pre_dispatch)
 
     # `out` is a list of size n_folds. Each element of the list is a tuple
-    # (fold_scores, n_test, scoring_time)
+    # (fold_scores, n_test, train_time)
     out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
                                            train, test, verbose, None,
                                            fit_params)
@@ -1171,8 +1171,8 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
     n_test_samples : int
         Number of test samples.
 
-    scoring_time : float
-        Time spent for fitting and scoring in seconds.
+    train_time : float
+        Time spent for fitting in seconds.
 
     parameters : dict or None, optional
         The parameters that have been evaluated.
@@ -1195,16 +1195,18 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
     if parameters is not None:
         estimator.set_params(**parameters)
 
-    start_time = time.time()
-
     X_train, y_train = _safe_split(estimator, X, y, train)
     X_test, y_test = _safe_split(estimator, X, y, test, train)
 
+    start_time = time.time()
+
     if y_train is None:
         estimator.fit(X_train, **fit_params)
     else:
         estimator.fit(X_train, y_train, **fit_params)
 
+    train_time = time.time() - start_time
+
     if len(scorers) == 1:
         # We cannot use _evaluate_scorers here because the scorer might be
         # estimator.score.
@@ -1220,16 +1222,14 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
             train_scores = _evaluate_scorers(estimator, X_train, y_train,
                                              scorers)
 
-    scoring_time = time.time() - start_time
-
     if verbose > 2:
         msg += ", score=%s" % test_scores
     if verbose > 1:
-        end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time))
+        end_msg = "%s -%s" % (msg, logger.short_format_time(train_time))
         print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
 
     ret = [train_scores] if return_train_scores else []
-    ret.extend([test_scores, _num_samples(X_test), scoring_time])
+    ret.extend([test_scores, _num_samples(X_test), train_time])
     if return_parameters:
         ret.append(parameters)
     return ret
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index cbab51a175d5e..0272fb0975c4b 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -220,7 +220,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
             for parameters in parameter_iterable
             for train, test in cv)
 
-    # `out` is a list of tuples (fold_scores, n_test, scoring_time, params).
+    # `out` is a list of tuples (fold_scores, n_test, train_time, params).
     n_fits = len(out)
     n_folds = len(cv)
     n_scorers = len(scorers)

From 5933d981151da594a2ffbf4badb1c9edeeed73d3 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 18:14:47 +0900
Subject: [PATCH 39/51] Remove return_parameters.

---
 sklearn/cross_validation.py | 49 ++++---------------------------------
 sklearn/grid_search.py      |  3 +--
 2 files changed, 6 insertions(+), 46 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 9ee8482b47718..ad0d3f0cccbfe 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1120,54 +1120,17 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
 
 
 def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
-                   fit_params, return_train_scores=False,
-                   return_parameters=False):
+                   fit_params, return_train_scores=False):
     """Fit estimator and compute scores for a given dataset split.
 
-    Parameters
-    ----------
-    estimator : estimator object implementing 'fit'
-        The object to use to fit the data.
-
-    X : array-like of shape at least 2D
-        The data to fit.
-
-    y : array-like, optional, default: None
-        The target variable to try to predict in the case of
-        supervised learning.
-
-    scorers : list
-        A list of scorer objects
-
-    train : array-like, shape = (n_train_samples,)
-        Indices of training samples.
-
-    test : array-like, shape = (n_test_samples,)
-        Indices of test samples.
-
-    verbose : integer
-        The verbosity level.
-
-    parameters : dict or None
-        Parameters to be set on the estimator.
-
-    fit_params : dict or None
-        Parameters that will be passed to ``estimator.fit``.
-
-    return_train_scores : boolean, optional, default: False
-        Compute and return scores on training set.
-
-    return_parameters : boolean, optional, default: False
-        Return parameters that has been used for the estimator.
-
     Returns
     -------
-    test_score : array of floats
-        Scores on test set.
-
     train_score : array of floats, optional
         Scores on training set.
 
+    test_score : array of floats
+        Scores on test set.
+
     n_test_samples : int
         Number of test samples.
 
@@ -1229,9 +1192,7 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
         print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
 
     ret = [train_scores] if return_train_scores else []
-    ret.extend([test_scores, _num_samples(X_test), train_time])
-    if return_parameters:
-        ret.append(parameters)
+    ret.extend([test_scores, _num_samples(X_test), train_time, parameters])
     return ret
 
 
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 0272fb0975c4b..5aae5a77427a2 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -215,8 +215,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
     out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
             delayed(_fit_and_score)(
                 clone(base_estimator), X, y, scorers, train, test,
-                verbose, parameters, fit_params,
-                return_parameters=True)
+                verbose, parameters, fit_params)
             for parameters in parameter_iterable
             for train, test in cv)
 

From 4ee0a8e83f28d9052870561eb9cb255a379d9d2b Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 18:16:44 +0900
Subject: [PATCH 40/51] Cosmit: used += instead of extend.

---
 sklearn/cross_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index ad0d3f0cccbfe..a3a86e281e409 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1192,7 +1192,7 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
         print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
 
     ret = [train_scores] if return_train_scores else []
-    ret.extend([test_scores, _num_samples(X_test), train_time, parameters])
+    ret += [test_scores, _num_samples(X_test), train_time, parameters]
     return ret
 
 

From c08bdd8960f3da4ff7b343ef6f787e09e82040e5 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 3 Feb 2014 18:48:56 +0900
Subject: [PATCH 41/51] Add cross_val_report.

---
 sklearn/cross_validation.py            | 115 +++++++++++++++++++++++--
 sklearn/tests/test_cross_validation.py |  41 ++++++++-
 2 files changed, 149 insertions(+), 7 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index a3a86e281e409..1cc8ce7cdfe29 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1030,7 +1030,7 @@ def __len__(self):
 def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                     verbose=0, fit_params=None, score_func=None,
                     pre_dispatch='2*n_jobs'):
-    """Evaluate a score by cross-validation
+    """Evaluate test score by cross-validation
 
     Parameters
     ----------
@@ -1087,7 +1087,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     -------
     scores : array of float, shape=(n_folds,) or (n_scoring, n_folds)
         Array of scores of the estimator for each run of the cross validation.
-        The returned array is 2d is `scoring` is a list.
+        The returned array is 2d if `scoring` is a list.
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
@@ -1104,19 +1104,122 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
                         pre_dispatch=pre_dispatch)
 
     # `out` is a list of size n_folds. Each element of the list is a tuple
-    # (fold_scores, n_test, train_time)
+    # (test_scores, n_test, train_time)
     out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
                                            train, test, verbose, None,
                                            fit_params)
                       for train, test in cv)
 
     # Retrieve n_scorers x n_folds 2d-array.
-    scores = np.array([o[0] for o in out]).T
+    test_scores = np.array([o[0] for o in out]).T
+
+    if ret_1d:
+        return test_scores[0]
+    else:
+        return test_scores
+
+
+def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
+                     verbose=0, fit_params=None, score_func=None,
+                     pre_dispatch='2*n_jobs'):
+    """Evaluate a score by cross-validation
+
+    Parameters
+    ----------
+    estimator : estimator object implementing 'fit'
+        The object to use to fit the data.
+
+    X : array-like of shape at least 2D
+        The data to fit.
+
+    y : array-like, optional, default: None
+        The target variable to try to predict in the case of
+        supervised learning.
+
+    scoring : string, callable, list of strings/callables or None, optional,
+              default: None
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+        Lists can be used for randomized search of multiple metrics.
+
+    cv : cross-validation generator, optional, default: None
+        A cross-validation generator. If None, a 3-fold cross
+        validation is used or 3-fold stratified cross-validation
+        when y is supplied and estimator is a classifier.
+
+    n_jobs : integer, optional
+        The number of CPUs to use to do the computation. -1 means
+        'all CPUs'.
+
+    verbose : integer, optional
+        The verbosity level.
+
+    fit_params : dict, optional
+        Parameters to pass to the fit method of the estimator.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    Returns
+    -------
+    train_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds)
+        Array of trainng scores of the estimator for each run of the cross
+        validation.  The returned array is 2d if `scoring` is a list.
+
+    test_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds)
+        Array of test scores of the estimator for each run of the cross
+        validation.  The returned array is 2d if `scoring` is a list.
+
+    train_times : array of float, shape=(n_folds,)
+        Array of training times of the estimator for each run of the cross
+        validation.
+    """
+    X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
+    cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
+
+    if isinstance(scoring, list):
+        scorers = [check_scoring(estimator, scoring=s) for s in scoring]
+        ret_1d = False
+    else:
+        scorers = [check_scoring(estimator, score_func=score_func,
+                               scoring=scoring)]
+        ret_1d = True
+
+    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
+                        pre_dispatch=pre_dispatch)
+
+    # `out` is a list of size n_folds. Each element of the list is a tuple
+    # (train_scores, test_scores, n_test, train_time)
+    out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
+                                           train, test, verbose, None,
+                                           fit_params,
+                                           return_train_scores=True)
+                      for train, test in cv)
+
+    # Retrieve n_scorers x n_folds 2d-array.
+    train_scores = np.array([o[0] for o in out]).T
+    test_scores = np.array([o[1] for o in out]).T
+    train_times = np.array([o[3] for o in out])
 
     if ret_1d:
-        return scores[0]
+        return train_scores[0], test_scores[0], train_times
     else:
-        return scores
+        return train_scores, test_scores, train_times
 
 
 def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index e2191565e8ba6..744045f51b97c 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -462,12 +462,14 @@ def test_cross_val_score_precomputed():
 
 
 def test_cross_val_score_multiple_scorers():
-    X, y = make_classification(n_classes=2)
+    X, y = make_classification(n_classes=2, random_state=0)
     clf = Perceptron(random_state=0)
 
     scores = cval.cross_val_score(clf, X, y, cv=3, scoring=["f1", "roc_auc"])
     assert_equal(scores.shape, (2, 3))
 
+    # Check that the results are the same as when cross_val_score is called
+    # individually.
     f1_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="f1")
     auc_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="roc_auc")
     scores2 = np.array([f1_scores, auc_scores])
@@ -506,6 +508,43 @@ class BrokenEstimator:
     assert_raises(TypeError, cval.cross_val_score, BrokenEstimator(), X)
 
 
+def test_cross_val_report():
+    X, y = make_classification(n_classes=2, random_state=0)
+    clf = Perceptron(random_state=0)
+
+    tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3,
+                                                           scoring="f1")
+    assert_equal(tr_scores.shape, (3,))
+    assert_equal(te_scores.shape, (3,))
+    assert_equal(tr_times.shape, (3,))
+
+    assert_greater(tr_scores.mean(), te_scores.mean())
+
+
+def test_cross_val_report_multiple_scorers():
+    X, y = make_classification(n_classes=2, random_state=0)
+    clf = Perceptron(random_state=0)
+
+    tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3,
+                                                           scoring=["f1",
+                                                                    "roc_auc"])
+    assert_equal(tr_scores.shape, (2, 3))
+    assert_equal(te_scores.shape, (2, 3))
+    assert_equal(tr_times.shape, (3,))
+
+    # Check that the results are the same as when cross_val_report is called
+    # individually.
+    f1_tr, f1_te, _ = cval.cross_val_report(clf, X, y, cv=3, scoring="f1")
+    auc_tr, auc_te, _ = cval.cross_val_report(clf, X, y, cv=3,
+                                              scoring="roc_auc")
+
+    assert_array_almost_equal(tr_scores[0], f1_tr)
+    assert_array_almost_equal(te_scores[0], f1_te)
+    assert_array_almost_equal(tr_scores[1], auc_tr)
+    assert_array_almost_equal(te_scores[1], auc_te)
+
+
+
 def test_train_test_split_errors():
     assert_raises(ValueError, cval.train_test_split)
     assert_raises(ValueError, cval.train_test_split, range(3), train_size=1.1)

From e0dfe238d08497c492b10d9bcf87cb99ea51af98 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Tue, 4 Feb 2014 14:26:57 +0900
Subject: [PATCH 42/51] Remove score_func from cross_val_report.

This function is deprecated, no need to add it to a new function.
---
 sklearn/cross_validation.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 1cc8ce7cdfe29..35051a97a3a91 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1120,8 +1120,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
 
 
 def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
-                     verbose=0, fit_params=None, score_func=None,
-                     pre_dispatch='2*n_jobs'):
+                     verbose=0, fit_params=None, pre_dispatch='2*n_jobs'):
     """Evaluate a score by cross-validation
 
     Parameters
@@ -1196,8 +1195,7 @@ def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
         scorers = [check_scoring(estimator, scoring=s) for s in scoring]
         ret_1d = False
     else:
-        scorers = [check_scoring(estimator, score_func=score_func,
-                               scoring=scoring)]
+        scorers = [check_scoring(estimator, scoring=scoring)]
         ret_1d = True
 
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,

From 9b5fe9a709d92a953dc9cd1444814477da1e1ac6 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 7 Feb 2014 16:33:07 +0900
Subject: [PATCH 43/51] Accept tuples too.

---
 sklearn/cross_validation.py | 4 ++--
 sklearn/grid_search.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 35051a97a3a91..60e40d3348a3d 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1092,7 +1092,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
 
-    if isinstance(scoring, list):
+    if isinstance(scoring, (tuple, list)):
         scorers = [check_scoring(estimator, scoring=s) for s in scoring]
         ret_1d = False
     else:
@@ -1191,7 +1191,7 @@ def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
 
-    if isinstance(scoring, list):
+    if isinstance(scoring, (tuple, list)):
         scorers = [check_scoring(estimator, scoring=s) for s in scoring]
         ret_1d = False
     else:
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 5aae5a77427a2..15930f62b1065 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -185,7 +185,7 @@ def __len__(self):
 def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
                     cv, pre_dispatch, fit_params, iid, n_jobs, verbose):
     """Actual fitting,  performing the search over parameters."""
-    if isinstance(scoring, list):
+    if isinstance(scoring, (tuple, list)):
         scorers = [check_scoring(estimator, scoring=s) for s in scoring]
         ret_1d = False
     else:

From 0346fa3f4ae4e86b80a6e9b4d3080cff4dd0f3c4 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 7 Feb 2014 17:07:16 +0900
Subject: [PATCH 44/51] Accept callables in _evaluate_scorers.

---
 sklearn/cross_validation.py |  8 +-------
 sklearn/metrics/scorer.py   | 19 +++++++++++++++++--
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 60e40d3348a3d..5216c9f0980e5 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1271,13 +1271,7 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
 
     train_time = time.time() - start_time
 
-    if len(scorers) == 1:
-        # We cannot use _evaluate_scorers here because the scorer might be
-        # estimator.score.
-        test_scores = np.array([scorers[0](estimator, X_test, y_test)])
-    else:
-        test_scores = _evaluate_scorers(estimator, X_test, y_test, scorers)
-
+    test_scores = _evaluate_scorers(estimator, X_test, y_test, scorers)
 
     if return_train_scores:
         if len(scorers) == 1:
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 7b21ddcb75fdb..72460cd316704 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -56,16 +56,27 @@ def __repr__(self):
 
 
 def _evaluate_scorers(estimator, X, y, scorers):
+    """Evaluate a list of scorers. `scorers` may contain _Scorer objects or
+    callables of the form callable(estimator, X, y)."""
+
+    if len(scorers) == 1 and not isinstance(scorers[0], _Scorer):
+        # We won't need any predictions if there is only one callable in the
+        # list.
+        return np.array([scorers[0](estimator, X, y)])
+
     has_pb = hasattr(estimator, "predict_proba")
     has_df = hasattr(estimator, "decision_function")
     _is_classifier = is_classifier(estimator)
-    _type_of_y = type_of_target(y)
+    _type_of_y = type_of_target(y) if y is not None else None
 
     # Make a first pass through scorers to determine if we need
     # predict_proba or decision_function.
     needs_proba = False
     needs_df = False
     for scorer in scorers:
+        if not isinstance(scorer, _Scorer):
+            continue  # assumed to be a callable
+
         if scorer.needs_proba:
             if not has_pb:
                 raise ValueError("%s needs probabilities but predict_proba is"
@@ -122,6 +133,10 @@ def _evaluate_scorers(estimator, X, y, scorers):
     # Compute scores.
     scores = []
     for scorer in scorers:
+        if not isinstance(scorer, _Scorer):
+            scores.append(scorer(estimator, X, y))
+            continue
+
         if scorer.needs_proba:
             score = scorer.score_func(y, y_proba, **scorer.kwargs)
 
@@ -189,7 +204,7 @@ def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None,
 
     Returns
     -------
-    scoring : callable
+    scorer : callable
         A scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
     """

From 015e01e7906f6e8275cd5132618d8fdf6628e81e Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 7 Feb 2014 17:19:10 +0900
Subject: [PATCH 45/51] Unused imports.

---
 sklearn/metrics/scorer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 72460cd316704..2447307a9d9f5 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -18,7 +18,6 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
-from abc import ABCMeta, abstractmethod
 from warnings import warn
 import numbers
 

From eaa3aebf20152d9b62a933527e0778ffa9904107 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 7 Feb 2014 17:26:15 +0900
Subject: [PATCH 46/51] Clone early.

---
 sklearn/grid_search.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 15930f62b1065..00de861164f42 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -185,6 +185,9 @@ def __len__(self):
 def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
                     cv, pre_dispatch, fit_params, iid, n_jobs, verbose):
     """Actual fitting,  performing the search over parameters."""
+
+    estimator = clone(estimator)
+
     if isinstance(scoring, (tuple, list)):
         scorers = [check_scoring(estimator, scoring=s) for s in scoring]
         ret_1d = False
@@ -210,11 +213,9 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
                   " {2} fits".format(len(cv), n_candidates,
                                      n_candidates * len(cv)))
 
-    base_estimator = clone(estimator)
-
     out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
             delayed(_fit_and_score)(
-                clone(base_estimator), X, y, scorers, train, test,
+                clone(estimator), X, y, scorers, train, test,
                 verbose, parameters, fit_params)
             for parameters in parameter_iterable
             for train, test in cv)
@@ -254,8 +255,8 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit,
     best_estimators = []
     if refit:
         for i in xrange(len(scorers)):
-            base_estimator = clone(estimator)
-            best_estimator = base_estimator.set_params(**best_params[i])
+            estimator = clone(estimator)
+            best_estimator = estimator.set_params(**best_params[i])
             best_estimators.append(best_estimator)
             if y is not None:
                 best_estimator.fit(X, y, **fit_params)

From 96c36c77b6b22e1f32702b0f8b8ffba26afc1d1f Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Sun, 9 Feb 2014 23:55:53 +0900
Subject: [PATCH 47/51] Multiple scorer support in validation_curve.

---
 sklearn/learning_curve.py            | 32 +++++++++++++++++++---------
 sklearn/tests/test_learning_curve.py | 18 ++++++++++++++++
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 7ed1736fffe77..fac55b0d9ba18 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -252,7 +252,7 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
     param_name : string
         Name of the parameter that will be varied.
 
-    param_range : array-like, shape (n_values,)
+    param_range : array-like, shape (n_params,)
         The values of the parameter that will be evaluated.
 
     cv : integer, cross-validation generator, optional
@@ -278,10 +278,12 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
 
     Returns
     -------
-    train_scores : array, shape (n_ticks, n_cv_folds)
+    train_scores : array, shape (n_params, n_cv_folds) or
+                                (n_scorers, n_params, n_cv_folds)
         Scores on training sets.
 
-    test_scores : array, shape (n_ticks, n_cv_folds)
+    test_scores : array, shape (n_params, n_cv_folds) or
+                               (n_scorers, n_params, n_cv_folds)
         Scores on test set.
 
     Notes
@@ -291,22 +293,32 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
     """
     X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
     cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-    scorer = check_scoring(estimator, scoring=scoring)
+
+    if isinstance(scoring, (tuple, list)):
+        scorer = [check_scoring(estimator, scoring=s) for s in scoring]
+        one_scorer = False
+    else:
+        scorer = [check_scoring(estimator, scoring=scoring)]
+        one_scorer = True
 
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     out = parallel(delayed(_fit_and_score)(
-        estimator, X, y, [scorer], train, test, verbose,
+        estimator, X, y, scorer, train, test, verbose,
         parameters={param_name : v}, fit_params=None, return_train_scores=True)
         for train, test in cv for v in param_range)
 
-    out = np.asarray(out)
     n_params = len(param_range)
-    n_cv_folds = out.shape[0] / n_params
+    n_folds = len(out) / n_params
 
-    out = np.array(out).reshape(n_cv_folds, n_params, -1)
+    shape = (n_folds, n_params, -1)
+    train_scores = np.array([o[0] for o in out]).reshape(shape).T
+    test_scores = np.array([o[1] for o in out]).reshape(shape).T
 
-    train_scores = out[:, :, 0].T
-    test_scores = out[:, :, 1].T
+    #train_times = np.array([o[3] for o in out]).reshape(n_folds, n_params)
+
+    if one_scorer:
+        train_scores = train_scores[0]
+        test_scores = test_scores[0]
 
     return train_scores, test_scores
diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py
index 42985823345ed..9d05d79aed28f 100644
--- a/sklearn/tests/test_learning_curve.py
+++ b/sklearn/tests/test_learning_curve.py
@@ -15,6 +15,7 @@
 from sklearn.datasets import make_classification
 from sklearn.cross_validation import KFold
 from sklearn.linear_model import PassiveAggressiveClassifier
+from sklearn.svm import LinearSVC
 
 
 class MockImprovingEstimator(BaseEstimator):
@@ -242,3 +243,20 @@ def test_validation_curve():
                                                  param_range=param_range, cv=2)
     assert_array_almost_equal(train_scores.mean(axis=1), param_range)
     assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range)
+
+
+def test_validation_curve_multiple_scorers():
+    X, y = make_classification(n_classes=2, random_state=0)
+    clf = LinearSVC(random_state=0)
+    C = [0.1, 1, 10, 100]
+    train_scores, test_scores = validation_curve(clf, X, y, param_name="C",
+                                                 param_range=C, cv=3,
+                                                 scoring=["f1", "roc_auc"])
+    assert_equal(train_scores.shape, (2, 4, 3))
+    assert_equal(test_scores.shape, (2, 4, 3))
+
+    for i, scoring in enumerate(("f1", "roc_auc")):
+        tr, te = validation_curve(clf, X, y, param_name="C", param_range=C,
+                                  cv=3, scoring=scoring)
+        assert_array_almost_equal(train_scores[i], tr)
+        assert_array_almost_equal(test_scores[i], te)

From d4ffc1f360b2c5faf29ade922da677a003f3a752 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 10 Feb 2014 00:42:47 +0900
Subject: [PATCH 48/51] Add rudimentary validation with contours example.

---
 examples/plot_validation_contours.py | 41 ++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 examples/plot_validation_contours.py

diff --git a/examples/plot_validation_contours.py b/examples/plot_validation_contours.py
new file mode 100644
index 0000000000000..a7ff13f9d66b2
--- /dev/null
+++ b/examples/plot_validation_contours.py
@@ -0,0 +1,41 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.datasets import load_digits
+from sklearn.svm import SVC
+from sklearn.learning_curve import validation_curve
+from sklearn.externals.joblib import Memory
+
+memory = Memory(cachedir=".", verbose=0)
+
+@memory.cache
+def grid(X, y, Cs, gammas):
+    scores = np.zeros((len(Cs), len(gammas)))
+
+    for i, C in enumerate(Cs):
+        tr, te = validation_curve(SVC(kernel="rbf", C=C), X, y,
+                                  param_name="gamma", param_range=gammas, cv=3)
+        scores[i] = te.mean(axis=1)
+
+    return scores
+
+digits = load_digits()
+X, y = digits.data, digits.target
+
+gammas = np.logspace(-6, -1, 5)
+Cs = np.logspace(-3, 3, 5)
+
+scores = grid(X, y, Cs, gammas)
+
+
+plt.xlabel("C")
+plt.xscale("log")
+
+plt.ylabel("gamma")
+plt.yscale("log")
+
+X1, X2 = np.meshgrid(Cs, gammas)
+cs = plt.contour(X1, X2, scores)
+
+plt.colorbar(cs)
+
+plt.show()

From c33f0f9d6804343028c04a9503562fb0cd5b2e18 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 10 Feb 2014 01:15:20 +0900
Subject: [PATCH 49/51] Support param_grid in validation_curve.

---
 examples/plot_validation_contours.py |  9 +++------
 sklearn/learning_curve.py            | 19 ++++++++++---------
 sklearn/tests/test_learning_curve.py | 21 ++++++++++++++-------
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/examples/plot_validation_contours.py b/examples/plot_validation_contours.py
index a7ff13f9d66b2..1e3b3edceaaa6 100644
--- a/examples/plot_validation_contours.py
+++ b/examples/plot_validation_contours.py
@@ -9,14 +9,11 @@
 
 @memory.cache
 def grid(X, y, Cs, gammas):
-    scores = np.zeros((len(Cs), len(gammas)))
+    param_grid = {"C": Cs, "gamma": gammas}
 
-    for i, C in enumerate(Cs):
-        tr, te = validation_curve(SVC(kernel="rbf", C=C), X, y,
-                                  param_name="gamma", param_range=gammas, cv=3)
-        scores[i] = te.mean(axis=1)
+    tr, te = validation_curve(SVC(kernel="rbf"), X, y, param_grid, cv=3)
 
-    return scores
+    return te.mean(axis=1).reshape(len(Cs), len(gammas))
 
 digits = load_digits()
 X, y = digits.data, digits.target
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index fac55b0d9ba18..dbd0bbaf7e3d4 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -11,6 +11,7 @@
 from .utils import check_arrays
 from .externals.joblib import Parallel, delayed
 from .cross_validation import _safe_split, _fit_and_score
+from .grid_search import ParameterGrid
 from .metrics.scorer import check_scoring
 
 
@@ -225,7 +226,7 @@ def _incremental_fit_estimator(estimator, X, y, classes, train, test,
     return np.array((train_scores, test_scores)).T
 
 
-def validation_curve(estimator, X, y, param_name, param_range, cv=None,
+def validation_curve(estimator, X, y, param_grid, cv=None,
                      scoring=None, n_jobs=1, pre_dispatch="all", verbose=0):
     """Validation curve.
 
@@ -249,11 +250,9 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
         Target relative to X for classification or regression;
         None for unsupervised learning.
 
-    param_name : string
-        Name of the parameter that will be varied.
-
-    param_range : array-like, shape (n_params,)
-        The values of the parameter that will be evaluated.
+    param_grid : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values.
 
     cv : integer, cross-validation generator, optional
         If an integer is passed, it is the number of folds (defaults to 3).
@@ -301,14 +300,16 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
         scorer = [check_scoring(estimator, scoring=scoring)]
         one_scorer = True
 
+    param_grid = ParameterGrid(param_grid)
+    n_params = len(param_grid)
+
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     out = parallel(delayed(_fit_and_score)(
         estimator, X, y, scorer, train, test, verbose,
-        parameters={param_name : v}, fit_params=None, return_train_scores=True)
-        for train, test in cv for v in param_range)
+        parameters=params, fit_params=None, return_train_scores=True)
+        for train, test in cv for params in param_grid)
 
-    n_params = len(param_range)
     n_folds = len(out) / n_params
 
     shape = (n_folds, n_params, -1)
diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py
index 9d05d79aed28f..b3a908bf44699 100644
--- a/sklearn/tests/test_learning_curve.py
+++ b/sklearn/tests/test_learning_curve.py
@@ -238,25 +238,32 @@ def test_validation_curve():
                                n_redundant=0, n_classes=2,
                                n_clusters_per_class=1, random_state=0)
     param_range = np.linspace(0, 1, 10)
+    param_grid = {"param": param_range}
     train_scores, test_scores = validation_curve(MockEstimatorWithParameter(),
-                                                 X, y, param_name="param",
-                                                 param_range=param_range, cv=2)
+                                                 X, y, param_grid, cv=2)
     assert_array_almost_equal(train_scores.mean(axis=1), param_range)
     assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range)
 
 
+def test_validation_curve_2d():
+    X, y = make_classification(n_classes=2, random_state=0)
+    param_grid = {"C": [1, 10, 100], "fit_intercept": [True, False]}
+    clf = LinearSVC(random_state=0)
+    train_scores, test_scores = validation_curve(clf,  X, y, param_grid, cv=2)
+    assert_equal(train_scores.shape, (6, 2))
+    assert_equal(test_scores.shape, (6, 2))
+
+
 def test_validation_curve_multiple_scorers():
     X, y = make_classification(n_classes=2, random_state=0)
     clf = LinearSVC(random_state=0)
-    C = [0.1, 1, 10, 100]
-    train_scores, test_scores = validation_curve(clf, X, y, param_name="C",
-                                                 param_range=C, cv=3,
+    param_grid = {"C": [0.1, 1, 10, 100]}
+    train_scores, test_scores = validation_curve(clf, X, y, param_grid, cv=3,
                                                  scoring=["f1", "roc_auc"])
     assert_equal(train_scores.shape, (2, 4, 3))
     assert_equal(test_scores.shape, (2, 4, 3))
 
     for i, scoring in enumerate(("f1", "roc_auc")):
-        tr, te = validation_curve(clf, X, y, param_name="C", param_range=C,
-                                  cv=3, scoring=scoring)
+        tr, te = validation_curve(clf, X, y, param_grid, cv=3, scoring=scoring)
         assert_array_almost_equal(train_scores[i], tr)
         assert_array_almost_equal(test_scores[i], te)

From 34ba9062e3868f6a646d8c56a8013e2d10cfbf88 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 10 Feb 2014 01:32:24 +0900
Subject: [PATCH 50/51] Return training times.

---
 examples/plot_validation_contours.py | 32 +++++++++++++++++++---------
 examples/plot_validation_curve.py    |  6 +++---
 sklearn/learning_curve.py            |  7 ++++--
 sklearn/tests/test_learning_curve.py | 22 +++++++++++++------
 4 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/examples/plot_validation_contours.py b/examples/plot_validation_contours.py
index 1e3b3edceaaa6..ac8c308f53a2b 100644
--- a/examples/plot_validation_contours.py
+++ b/examples/plot_validation_contours.py
@@ -11,9 +11,14 @@
 def grid(X, y, Cs, gammas):
     param_grid = {"C": Cs, "gamma": gammas}
 
-    tr, te = validation_curve(SVC(kernel="rbf"), X, y, param_grid, cv=3)
+    tr, te, times = validation_curve(SVC(kernel="rbf"), X, y, param_grid, cv=3)
 
-    return te.mean(axis=1).reshape(len(Cs), len(gammas))
+    shape = (len(Cs), len(gammas))
+    tr = tr.mean(axis=1).reshape(shape)
+    te = te.mean(axis=1).reshape(shape)
+    times = times.mean(axis=1).reshape(shape)
+
+    return tr, te, times
 
 digits = load_digits()
 X, y = digits.data, digits.target
@@ -21,18 +26,25 @@ def grid(X, y, Cs, gammas):
 gammas = np.logspace(-6, -1, 5)
 Cs = np.logspace(-3, 3, 5)
 
-scores = grid(X, y, Cs, gammas)
+tr, te, times = grid(X, y, Cs, gammas)
+
+
+for title, values in (("Training accuracy", tr),
+                      ("Test accuracy", te),
+                      ("Training time", times)):
 
+    plt.figure()
 
-plt.xlabel("C")
-plt.xscale("log")
+    plt.title(title)
+    plt.xlabel("C")
+    plt.xscale("log")
 
-plt.ylabel("gamma")
-plt.yscale("log")
+    plt.ylabel("gamma")
+    plt.yscale("log")
 
-X1, X2 = np.meshgrid(Cs, gammas)
-cs = plt.contour(X1, X2, scores)
+    X1, X2 = np.meshgrid(Cs, gammas)
+    cs = plt.contour(X1, X2, values)
 
-plt.colorbar(cs)
+    plt.colorbar(cs)
 
 plt.show()
diff --git a/examples/plot_validation_curve.py b/examples/plot_validation_curve.py
index 7b5f05050183a..0c6a056089c0b 100644
--- a/examples/plot_validation_curve.py
+++ b/examples/plot_validation_curve.py
@@ -23,9 +23,9 @@
 X, y = digits.data, digits.target
 
 param_range = np.logspace(-6, -1, 5)
-train_scores, test_scores = validation_curve(
-    SVC(), X, y, param_name="gamma", param_range=param_range,
-    cv=10, scoring="accuracy", n_jobs=1)
+param_grid = {"gamma": param_range}
+train_scores, test_scores, train_times = validation_curve(
+    SVC(), X, y, param_grid, cv=10, scoring="accuracy", n_jobs=1)
 train_scores_mean = np.mean(train_scores, axis=1)
 train_scores_std = np.std(train_scores, axis=1)
 test_scores_mean = np.mean(test_scores, axis=1)
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index dbd0bbaf7e3d4..5185eb915ff0a 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -285,6 +285,9 @@ def validation_curve(estimator, X, y, param_grid, cv=None,
                                (n_scorers, n_params, n_cv_folds)
         Scores on test set.
 
+    train_times : array, shape (n_params, n_cv_folds)
+        Training times.
+
     Notes
     -----
     See
@@ -316,10 +319,10 @@ def validation_curve(estimator, X, y, param_grid, cv=None,
     train_scores = np.array([o[0] for o in out]).reshape(shape).T
     test_scores = np.array([o[1] for o in out]).reshape(shape).T
 
-    #train_times = np.array([o[3] for o in out]).reshape(n_folds, n_params)
+    train_times = np.array([o[3] for o in out]).reshape(n_folds, n_params).T
 
     if one_scorer:
         train_scores = train_scores[0]
         test_scores = test_scores[0]
 
-    return train_scores, test_scores
+    return train_scores, test_scores, train_times
diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py
index b3a908bf44699..72e8bda244e2c 100644
--- a/sklearn/tests/test_learning_curve.py
+++ b/sklearn/tests/test_learning_curve.py
@@ -239,8 +239,12 @@ def test_validation_curve():
                                n_clusters_per_class=1, random_state=0)
     param_range = np.linspace(0, 1, 10)
     param_grid = {"param": param_range}
-    train_scores, test_scores = validation_curve(MockEstimatorWithParameter(),
-                                                 X, y, param_grid, cv=2)
+    est = MockEstimatorWithParameter()
+    train_scores, test_scores, train_times = validation_curve(est, X, y,
+                                                              param_grid, cv=2)
+    assert_equal(train_scores.shape, (10, 2))
+    assert_equal(test_scores.shape, (10, 2))
+    assert_equal(train_times.shape, (10, 2))
     assert_array_almost_equal(train_scores.mean(axis=1), param_range)
     assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range)
 
@@ -249,21 +253,27 @@ def test_validation_curve_2d():
     X, y = make_classification(n_classes=2, random_state=0)
     param_grid = {"C": [1, 10, 100], "fit_intercept": [True, False]}
     clf = LinearSVC(random_state=0)
-    train_scores, test_scores = validation_curve(clf,  X, y, param_grid, cv=2)
+    train_scores, test_scores, train_times = validation_curve(clf,  X, y,
+                                                              param_grid, cv=2)
     assert_equal(train_scores.shape, (6, 2))
     assert_equal(test_scores.shape, (6, 2))
+    assert_equal(train_times.shape, (6, 2))
 
 
 def test_validation_curve_multiple_scorers():
     X, y = make_classification(n_classes=2, random_state=0)
     clf = LinearSVC(random_state=0)
     param_grid = {"C": [0.1, 1, 10, 100]}
-    train_scores, test_scores = validation_curve(clf, X, y, param_grid, cv=3,
-                                                 scoring=["f1", "roc_auc"])
+    scoring = ["f1", "roc_auc"]
+    train_scores, test_scores, train_times = validation_curve(clf, X, y,
+                                                              param_grid, cv=3,
+                                                              scoring=scoring)
     assert_equal(train_scores.shape, (2, 4, 3))
     assert_equal(test_scores.shape, (2, 4, 3))
+    assert_equal(train_times.shape, (4, 3))
 
     for i, scoring in enumerate(("f1", "roc_auc")):
-        tr, te = validation_curve(clf, X, y, param_grid, cv=3, scoring=scoring)
+        tr, te, ti = validation_curve(clf, X, y, param_grid, cv=3,
+                                      scoring=scoring)
         assert_array_almost_equal(train_scores[i], tr)
         assert_array_almost_equal(test_scores[i], te)

From 7317c3196317fc7c13e468da93ac7f5abb5c38bf Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Mon, 10 Feb 2014 01:35:33 +0900
Subject: [PATCH 51/51] Remove cross_val_report.

---
 sklearn/cross_validation.py            | 101 -------------------------
 sklearn/tests/test_cross_validation.py |  37 ---------
 2 files changed, 138 deletions(-)

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 0ea9acf867bde..46cf2e926333d 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1131,107 +1131,6 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
         return test_scores
 
 
-def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
-                     verbose=0, fit_params=None, pre_dispatch='2*n_jobs'):
-    """Evaluate a score by cross-validation
-
-    Parameters
-    ----------
-    estimator : estimator object implementing 'fit'
-        The object to use to fit the data.
-
-    X : array-like of shape at least 2D
-        The data to fit.
-
-    y : array-like, optional, default: None
-        The target variable to try to predict in the case of
-        supervised learning.
-
-    scoring : string, callable, list of strings/callables or None, optional,
-              default: None
-        A string (see model evaluation documentation) or
-        a scorer callable object / function with signature
-        ``scorer(estimator, X, y)``.
-        Lists can be used for randomized search of multiple metrics.
-
-    cv : cross-validation generator, optional, default: None
-        A cross-validation generator. If None, a 3-fold cross
-        validation is used or 3-fold stratified cross-validation
-        when y is supplied and estimator is a classifier.
-
-    n_jobs : integer, optional
-        The number of CPUs to use to do the computation. -1 means
-        'all CPUs'.
-
-    verbose : integer, optional
-        The verbosity level.
-
-    fit_params : dict, optional
-        Parameters to pass to the fit method of the estimator.
-
-    pre_dispatch : int, or string, optional
-        Controls the number of jobs that get dispatched during parallel
-        execution. Reducing this number can be useful to avoid an
-        explosion of memory consumption when more jobs get dispatched
-        than CPUs can process. This parameter can be:
-
-            - None, in which case all the jobs are immediately
-              created and spawned. Use this for lightweight and
-              fast-running jobs, to avoid delays due to on-demand
-              spawning of the jobs
-
-            - An int, giving the exact number of total jobs that are
-              spawned
-
-            - A string, giving an expression as a function of n_jobs,
-              as in '2*n_jobs'
-
-    Returns
-    -------
-    train_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds)
-        Array of trainng scores of the estimator for each run of the cross
-        validation.  The returned array is 2d if `scoring` is a list.
-
-    test_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds)
-        Array of test scores of the estimator for each run of the cross
-        validation.  The returned array is 2d if `scoring` is a list.
-
-    train_times : array of float, shape=(n_folds,)
-        Array of training times of the estimator for each run of the cross
-        validation.
-    """
-    X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
-    cv = _check_cv(cv, X, y, classifier=is_classifier(estimator))
-
-    if isinstance(scoring, (tuple, list)):
-        scorers = [check_scoring(estimator, scoring=s) for s in scoring]
-        ret_1d = False
-    else:
-        scorers = [check_scoring(estimator, scoring=scoring)]
-        ret_1d = True
-
-    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
-                        pre_dispatch=pre_dispatch)
-
-    # `out` is a list of size n_folds. Each element of the list is a tuple
-    # (train_scores, test_scores, n_test, train_time)
-    out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers,
-                                           train, test, verbose, None,
-                                           fit_params,
-                                           return_train_scores=True)
-                      for train, test in cv)
-
-    # Retrieve n_scorers x n_folds 2d-array.
-    train_scores = np.array([o[0] for o in out]).T
-    test_scores = np.array([o[1] for o in out]).T
-    train_times = np.array([o[3] for o in out])
-
-    if ret_1d:
-        return train_scores[0], test_scores[0], train_times
-    else:
-        return train_scores, test_scores, train_times
-
-
 def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters,
                    fit_params, return_train_scores=False):
     """Fit estimator and compute scores for a given dataset split.
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index 10c29fa4882a3..f1e5cc22731ce 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -558,43 +558,6 @@ class BrokenEstimator:
     assert_raises(TypeError, cval.cross_val_score, BrokenEstimator(), X)
 
 
-def test_cross_val_report():
-    X, y = make_classification(n_classes=2, random_state=0)
-    clf = Perceptron(random_state=0)
-
-    tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3,
-                                                           scoring="f1")
-    assert_equal(tr_scores.shape, (3,))
-    assert_equal(te_scores.shape, (3,))
-    assert_equal(tr_times.shape, (3,))
-
-    assert_greater(tr_scores.mean(), te_scores.mean())
-
-
-def test_cross_val_report_multiple_scorers():
-    X, y = make_classification(n_classes=2, random_state=0)
-    clf = Perceptron(random_state=0)
-
-    tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3,
-                                                           scoring=["f1",
-                                                                    "roc_auc"])
-    assert_equal(tr_scores.shape, (2, 3))
-    assert_equal(te_scores.shape, (2, 3))
-    assert_equal(tr_times.shape, (3,))
-
-    # Check that the results are the same as when cross_val_report is called
-    # individually.
-    f1_tr, f1_te, _ = cval.cross_val_report(clf, X, y, cv=3, scoring="f1")
-    auc_tr, auc_te, _ = cval.cross_val_report(clf, X, y, cv=3,
-                                              scoring="roc_auc")
-
-    assert_array_almost_equal(tr_scores[0], f1_tr)
-    assert_array_almost_equal(te_scores[0], f1_te)
-    assert_array_almost_equal(tr_scores[1], auc_tr)
-    assert_array_almost_equal(te_scores[1], auc_te)
-
-
-
 def test_train_test_split_errors():
     assert_raises(ValueError, cval.train_test_split)
     assert_raises(ValueError, cval.train_test_split, range(3), train_size=1.1)