scikit-learn · larsmans · Jul 1, 2013 · Jul 1, 2013 · GaelVaroquaux · Jul 24, 2013
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -671,9 +671,9 @@ Model Selection Interface
 -------------------------
 .. autosummary::
    :toctree: generated/
-   :template: class_with_call.rst
+   :template: function.rst
 
-   metrics.Scorer
+   metrics.make_scorer
 
 Classification metrics
 ----------------------

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -943,16 +943,16 @@ Creating scoring objects from score functions
 If you want to use a scoring function that takes additional parameters, such as
 :func:`fbeta_score`, you need to generate an appropriate scoring object.  The
 simplest way to generate a callable object for scoring is by using
-:class:`Scorer`.
-:class:`Scorer` converts score functions as above into callables that can be
+:func:`make_scorer`.
+That function converts score functions as above into callables that can be
 used for model evaluation.
 
 One typical use case is to wrap an existing scoring function from the library
 with non default value for its parameters such as the beta parameter for the
 :func:`fbeta_score` function::
 
-    >>> from sklearn.metrics import fbeta_score, Scorer
-    >>> ftwo_scorer = Scorer(fbeta_score, beta=2)
+    >>> from sklearn.metrics import fbeta_score, make_scorer
+    >>> ftwo_scorer = make_scorer(fbeta_score, beta=2)
     >>> from sklearn.grid_search import GridSearchCV
     >>> from sklearn.svm import LinearSVC
     >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=ftwo_scorer)
@@ -964,10 +964,10 @@ from a simple python function::
     ...     diff = np.abs(ground_truth - predictions).max()
     ...     return np.log(1 + diff)
     ...
-    >>> my_custom_scorer = Scorer(my_custom_loss_func, greater_is_better=False)
+    >>> my_custom_scorer = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=my_custom_scorer)
 
-:class:`Scorer` takes as parameters the function you want to use, whether it is
+:func:`make_scorer` takes as parameters the function you want to use, whether it is
 a score (``greater_is_better=True``) or a loss (``greater_is_better=False``),
 whether the function you provided takes predictions as input
 (``needs_threshold=False``) or needs confidence scores
@@ -978,22 +978,18 @@ the previous example.
 Implementing your own scoring object
 ------------------------------------
 You can generate even more flexible model scores by constructing your own
-scoring object from scratch, without using the :class:`Scorer` helper class.
-The requirements that a callable can be used for model selection are as
-follows:
+scoring object from scratch, without using the :func:`make_scorer` factory.
+For a callable to be a scorer, it needs to meet the protocol specified by
+the following two rules:
 
 - It can be called with parameters ``(estimator, X, y)``, where ``estimator``
   it the model that should be evaluated, ``X`` is validation data and ``y`` is
   the ground truth target for ``X`` (in the supervised case) or ``None`` in the
   unsupervised case.
 
-- The call returns a number indicating the quality of estimator.
-
-- The callable has a boolean attribute ``greater_is_better`` which indicates whether
-  high or low values correspond to a better estimator.
-
-Objects that meet those conditions as said to implement the sklearn Scorer
-protocol.
+- It returns either a floating point number (the score), or a tuple, the
+  first element of which is a float. The additional values are used by the
+  ``report`` method on ``GridSearchCV`` and ``RandomizedSearchCV``.
 
 
 .. _dummy_estimators:

diff --git a/examples/grid_search_text_feature_extraction.py b/examples/grid_search_text_feature_extraction.py
@@ -29,18 +29,16 @@
    'vect__max_features': (None, 5000, 10000, 50000)}
   done in 1737.030s
 
-  Best score: 0.940
+  Best score: 0.923
   Best parameters set:
-      clf__alpha: 9.9999999999999995e-07
-      clf__n_iter: 50
-      clf__penalty: 'elasticnet'
-      tfidf__use_idf: True
-      vect__max_n: 2
-      vect__max_df: 0.75
-      vect__max_features: 50000
+      clf__alpha: 1e-06
+      clf__penalty: 'l2'
+      vect__max_df: 1.0
+      vect__ngram_range: (1, 2)
 
 """
 
+
 # Author: Olivier Grisel <olivier.grisel@ensta.org>
 #         Peter Prettenhofer <peter.prettenhofer@gmail.com>
 #         Mathieu Blondel <mathieu@mblondel.org>
@@ -49,6 +47,7 @@
 from __future__ import print_function
 
 from pprint import pprint
+import sys
 from time import time
 import logging
 
@@ -111,7 +110,8 @@
 
     # find the best parameters for both the feature extraction and the
     # classifier
-    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
+    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1,
+                               scoring="f1")
 
     print("Performing grid search...")
     print("pipeline:", [name for name, _ in pipeline.steps])
@@ -127,3 +127,8 @@
     best_parameters = grid_search.best_estimator_.get_params()
     for param_name in sorted(parameters.keys()):
         print("\t%s: %r" % (param_name, best_parameters[param_name]))
+
+    # Uncomment the following line to get a detailed (and long!) report
+    # about the cross-validation results, including precision and recall
+    # per fold for all settings.
+    #grid_search.report(sys.stdout)
diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
@@ -24,7 +24,7 @@
 from .utils.fixes import unique
 from .externals.joblib import Parallel, delayed
 from .externals.six import string_types, with_metaclass
-from .metrics import SCORERS, Scorer
+from .metrics import make_scorer, SCORERS
 
 __all__ = ['Bootstrap',
            'KFold',
@@ -1136,7 +1136,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
         warnings.warn("Passing function as ``score_func`` is "
                       "deprecated and will be removed in 0.15. "
                       "Either use strings or score objects.", stacklevel=2)
-        scorer = Scorer(score_func)
+        scorer = make_scorer(score_func)
     elif isinstance(scoring, string_types):
         scorer = SCORERS[scoring]
     else:
@@ -1299,7 +1299,7 @@ def permutation_test_score(estimator, X, y, scoring=None, cv=None,
         warnings.warn("Passing function as ``score_func`` is "
                       "deprecated and will be removed in 0.15. "
                       "Either use strings or score objects.")
-        scorer = Scorer(score_func)
+        scorer = make_scorer(score_func)
     elif isinstance(scoring, string_types):
         scorer = SCORERS[scoring]
     else:

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
@@ -11,7 +11,7 @@
 # License: BSD 3 clause
 
 from abc import ABCMeta, abstractmethod
-from collections import Mapping, namedtuple, Sized
+from collections import Mapping, namedtuple, Sequence, Sized
 from functools import partial, reduce
 from itertools import product
 import numbers
@@ -28,7 +28,7 @@
 from .externals import six
 from .utils import safe_mask, check_random_state
 from .utils.validation import _num_samples, check_arrays
-from .metrics import SCORERS, Scorer
+from .metrics import make_scorer, SCORERS
 
 
 __all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point',
@@ -316,8 +316,10 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer,
         else:
             this_score = clf.score(X_test)
 
-    if not isinstance(this_score, numbers.Number):
-        raise ValueError("scoring must return a number, got %s (%s)"
+    if not isinstance(this_score, numbers.Number) \
+      and not (isinstance(this_score, Sequence)
+               and isinstance(this_score[0], numbers.Number)):
+        raise ValueError("scoring must return a number or tuple, got %s (%s)"
                          " instead." % (str(this_score), type(this_score)))
 
     if verbose > 2:
@@ -364,10 +366,17 @@ class _CVScoreTuple (namedtuple('_CVScoreTuple',
 
     def __repr__(self):
         """Simple custom repr to summarize the main info"""
+        std = np.std([sc if isinstance(sc, numbers.Number) else sc[0]
+                      for sc in self.cv_validation_scores])
+
         return "mean: {0:.5f}, std: {1:.5f}, params: {2}".format(
-            self.mean_validation_score,
-            np.std(self.cv_validation_scores),
-            self.parameters)
+            self.mean_validation_score, std, self.parameters)
+
+    def __str__(self):
+        """More extensive reporting than from repr."""
+        per_fold = ("\n    fold {0}: {1}".format(i, sc)
+                    for i, sc in enumerate(self.cv_validation_scores))
+        return repr(self) + "".join(per_fold)
 
 
 class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
@@ -392,6 +401,33 @@ def __init__(self, estimator, scoring=None, loss_func=None,
         self.pre_dispatch = pre_dispatch
         self._check_estimator()
 
+    def report(self, file=None):
+        """Generate a report of the scores achieved.
+
+        Reports on the scores achieved across the folds for the various
+        parameter settings tried. This also prints the additional information
+        reported by some scorers, such as "f1", which tracks precision and
+        recall as well.
+
+        Parameters
+        ----------
+        file : file-like, optional
+            File to which the report is written. If None or not given, the
+            report is returned as a string.
+        """
+        if not hasattr(self, "cv_scores_"):
+            raise AttributeError("no cv_scores_ found; run fit first")
+
+        return_string = (file is None)
+        if return_string:
+            file = six.StringIO()
+
+        for cvs in self.cv_scores_:
+            print(cvs, file=file)
+
+        if return_string:
+            return file.getvalue()
+
     def score(self, X, y=None):
         """Returns the score on the given test data and labels, if the search
         estimator has been refit. The ``score`` function of the best estimator
@@ -465,13 +501,13 @@ def _fit(self, X, y, parameter_iterable):
                           "deprecated and will be removed in 0.15. "
                           "Either use strings or score objects."
                           "The relevant new parameter is called ''scoring''. ")
-            scorer = Scorer(self.loss_func, greater_is_better=False)
+            scorer = make_scorer(self.loss_func, greater_is_better=False)
         elif self.score_func is not None:
             warnings.warn("Passing function as ``score_func`` is "
                           "deprecated and will be removed in 0.15. "
                           "Either use strings or score objects."
                           "The relevant new parameter is called ''scoring''.")
-            scorer = Scorer(self.score_func)
+            scorer = make_scorer(self.score_func)
         elif isinstance(self.scoring, six.string_types):
             scorer = SCORERS[self.scoring]
         else:
@@ -507,7 +543,7 @@ def _fit(self, X, y, parameter_iterable):
                 for parameters in parameter_iterable
                 for train, test in cv)
 
-        # Out is a list of triplet: score, estimator, n_test_samples
+        # Out is a list of triples: score, estimator, n_test_samples
         n_fits = len(out)
         n_folds = len(cv)
 
@@ -519,7 +555,11 @@ def _fit(self, X, y, parameter_iterable):
             all_scores = []
             for this_score, parameters, this_n_test_samples in \
                     out[grid_start:grid_start + n_folds]:
-                all_scores.append(this_score)
+                full_info = this_score
+                if isinstance(this_score, Sequence):
+                    # Structured score.
+                    this_score = this_score[0]
+                all_scores.append(full_info)
                 if self.iid:
                     this_score *= this_n_test_samples
                     n_test_samples += this_n_test_samples
@@ -530,18 +570,14 @@ def _fit(self, X, y, parameter_iterable):
                 score /= float(n_folds)
             scores.append((score, parameters))
             # TODO: shall we also store the test_fold_sizes?
-            cv_scores.append(_CVScoreTuple(
-                parameters,
-                score,
-                np.array(all_scores)))
+            cv_scores.append(_CVScoreTuple(parameters, score, all_scores))
         # Store the computed scores
         self.cv_scores_ = cv_scores
 
         # Find the best parameters by comparing on the mean validation score:
         # note that `sorted` is deterministic in the way it breaks ties
-        greater_is_better = getattr(self.scorer_, 'greater_is_better', True)
         best = sorted(cv_scores, key=lambda x: x.mean_validation_score,
-                      reverse=greater_is_better)[0]
+                      reverse=True)[0]
         self.best_params_ = best.parameters
         self.best_score_ = best.mean_validation_score
 

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -30,7 +30,7 @@
 from .metrics import zero_one
 from .metrics import zero_one_score
 
-from .scorer import Scorer, SCORERS
+from .scorer import make_scorer, SCORERS
 
 from . import cluster
 from .cluster import (adjusted_rand_score,
@@ -85,5 +85,5 @@
            'silhouette_samples',
            'v_measure_score',
            'zero_one_loss',
-           'Scorer',
+           'make_scorer',
            'SCORERS']