From b217697e4c3b6e2cf5f01cb87fe7a094b2168669 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Thu, 9 Jan 2014 09:18:02 +0100 Subject: [PATCH 01/51] Refactor cv code --- sklearn/cross_validation.py | 153 ++++++++++++++++++++++++------------ sklearn/grid_search.py | 75 +----------------- sklearn/learning_curve.py | 2 +- 3 files changed, 103 insertions(+), 127 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 84045105d56a9..06d909ce35dfa 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -22,6 +22,7 @@ from .base import is_classifier, clone from .utils import check_arrays, check_random_state, safe_mask +from .utils.validation import _num_samples from .utils.fixes import unique from .externals.joblib import Parallel, delayed from .externals.six import string_types, with_metaclass @@ -1023,48 +1024,6 @@ def __len__(self): ############################################################################## -def _cross_val_score(estimator, X, y, scorer, train, test, verbose, - fit_params): - """Inner loop for cross validation""" - n_samples = X.shape[0] if sp.issparse(X) else len(X) - fit_params = dict([(k, np.asarray(v)[train] - if hasattr(v, '__len__') and len(v) == n_samples else v) - for k, v in fit_params.items()]) - if not hasattr(X, "shape"): - if getattr(estimator, "_pairwise", False): - raise ValueError("Precomputed kernels or affinity matrices have " - "to be passed as arrays or sparse matrices.") - X_train = [X[idx] for idx in train] - X_test = [X[idx] for idx in test] - else: - if getattr(estimator, "_pairwise", False): - # X is a precomputed square kernel matrix - if X.shape[0] != X.shape[1]: - raise ValueError("X should be a square kernel matrix") - X_train = X[np.ix_(train, train)] - X_test = X[np.ix_(test, train)] - else: - X_train = X[safe_mask(X, train)] - X_test = X[safe_mask(X, test)] - - if y is None: - y_train = None - y_test = None - else: - y_train = y[train] - y_test = y[test] - estimator.fit(X_train, y_train, **fit_params) - if scorer is None: - score = estimator.score(X_test, y_test) - else: - score = scorer(estimator, X_test, y_test) - if not isinstance(score, numbers.Number): - raise ValueError("scoring must return a number, got %s (%s)" - " instead." % (str(score), type(score))) - if verbose > 1: - print("score: %f" % score) - return score - def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, score_func=None, @@ -1127,16 +1086,9 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - scorer = _deprecate_loss_and_score_funcs( - loss_func=None, - score_func=score_func, - scoring=scoring - ) - if scorer is None and not hasattr(estimator, 'score'): - raise TypeError( - "If no scoring is specified, the estimator passed " - "should have a 'score' method. The estimator %s " - "does not." % estimator) + _check_scorable(estimator, score_func=score_func, scoring=scoring) + scorer = _deprecate_loss_and_score_funcs(score_func=score_func, + scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. fit_params = fit_params if fit_params is not None else {} @@ -1149,6 +1101,85 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, return np.array(scores) +def _cross_val_score(estimator, X, y, scorer, train, test, verbose, + fit_params): + """Inner loop for cross validation""" + # TODO replace with grid_search.fit_grid_point() + n_samples = _num_samples(X) + fit_params = dict([(k, np.asarray(v)[train] # TODO why is this necessary? + if hasattr(v, '__len__') and len(v) == n_samples else v) + for k, v in fit_params.items()]) + + X_train, y_train = _split(estimator, X, y, train) + X_test, y_test = _split(estimator, X, y, test, train) + estimator.fit(X_train, y_train, **fit_params) + score = _score(estimator, X_test, y_test, scorer) + + if verbose > 1: + print("score: %f" % score) + return score + + +def _split(estimator, X, y, indices, train_indices=None): + """Create subset of dataset.""" + if hasattr(estimator, 'kernel') and callable(estimator.kernel): + # cannot compute the kernel values with custom function + raise ValueError("Cannot use a custom kernel function. " + "Precompute the kernel matrix instead.") + + if not hasattr(X, "shape"): + if getattr(estimator, "_pairwise", False): + raise ValueError("Precomputed kernels or affinity matrices have " + "to be passed as arrays or sparse matrices.") + X_subset = [X[idx] for idx in indices] + else: + if getattr(estimator, "_pairwise", False): + # X is a precomputed square kernel matrix + if X.shape[0] != X.shape[1]: + raise ValueError("X should be a square kernel matrix") + if train_indices is None: + X_subset = X[np.ix_(indices, indices)] + else: + X_subset = X[np.ix_(indices, train_indices)] + else: + X_subset = X[safe_mask(X, indices)] + + if y is not None: + y_subset = y[safe_mask(y, indices)] + else: + y_subset = None + + return X_subset, y_subset + + +def _fit(fit_function, X_train, y_train, **fit_params): + """Fit and estimator on a given training set.""" + if y_train is None: + fit_function(X_train, **fit_params) + else: + fit_function(X_train, y_train, **fit_params) + + +def _score(estimator, X_test, y_test, scorer): + """Compute the score of an estimator on a given test set.""" + if y_test is None: + if scorer is None: + score = estimator.score(X_test) + else: + score = scorer(estimator, X_test) + else: + if scorer is None: + score = estimator.score(X_test, y_test) + else: + score = scorer(estimator, X_test, y_test) + + if not isinstance(score, numbers.Number): + raise ValueError("scoring must return a number, got %s (%s) instead." + % (str(score), type(score))) + + return score + + def _permutation_test_score(estimator, X, y, cv, scorer): """Auxiliary function for permutation_test_score""" avg_score = [] @@ -1226,6 +1257,24 @@ def _check_cv(cv, X=None, y=None, classifier=False, warn_mask=False): return cv +def _check_scorable(estimator, scoring=None, loss_func=None, score_func=None): + """Check that estimator can be fitted and score can be computed.""" + if (not hasattr(estimator, 'fit') or + not (hasattr(estimator, 'predict') + or hasattr(estimator, 'score'))): + raise TypeError("estimator should a be an estimator implementing" + " 'fit' and 'predict' or 'score' methods," + " %s (type %s) was passed" % + (estimator, type(estimator))) + if (scoring is None and loss_func is None and score_func + is None): + if not hasattr(estimator, 'score'): + raise TypeError( + "If no scoring is specified, the estimator passed " + "should have a 'score' method. The estimator %s " + "does not." % estimator) + + def permutation_test_score(estimator, X, y, score_func=None, cv=None, n_permutations=100, n_jobs=1, labels=None, random_state=0, verbose=0, scoring=None): diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 108d320139c2e..b3fa04dc4bc8a 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -24,6 +24,7 @@ from .base import BaseEstimator, is_classifier, clone from .base import MetaEstimatorMixin from .cross_validation import _check_cv as check_cv +from .cross_validation import _check_scorable, _split, _fit, _score from .externals.joblib import Parallel, delayed, logger from .externals import six from .utils import safe_mask, check_random_state @@ -255,62 +256,6 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer, return this_score, parameters, _num_samples(X_test) -def _split(estimator, X, y, indices, train_indices=None): - """Create subset of dataset.""" - if hasattr(estimator, 'kernel') and callable(estimator.kernel): - # cannot compute the kernel values with custom function - raise ValueError("Cannot use a custom kernel function. " - "Precompute the kernel matrix instead.") - - if not hasattr(X, "shape"): - if getattr(estimator, "_pairwise", False): - raise ValueError("Precomputed kernels or affinity matrices have " - "to be passed as arrays or sparse matrices.") - X_subset = [X[idx] for idx in indices] - else: - if getattr(estimator, "_pairwise", False): - # X is a precomputed square kernel matrix - if X.shape[0] != X.shape[1]: - raise ValueError("X should be a square kernel matrix") - if train_indices is None: - X_subset = X[np.ix_(indices, indices)] - else: - X_subset = X[np.ix_(indices, train_indices)] - else: - X_subset = X[safe_mask(X, indices)] - - if y is not None: - y_subset = y[safe_mask(y, indices)] - else: - y_subset = None - - return X_subset, y_subset - - -def _fit(fit_function, X_train, y_train, **fit_params): - """Fit and estimator on a given training set.""" - if y_train is None: - fit_function(X_train, **fit_params) - else: - fit_function(X_train, y_train, **fit_params) - - -def _score(estimator, X_test, y_test, scorer): - """Compute the score of an estimator on a given test set.""" - if y_test is None: - if scorer is None: - this_score = estimator.score(X_test) - else: - this_score = scorer(estimator, X_test) - else: - if scorer is None: - this_score = estimator.score(X_test, y_test) - else: - this_score = scorer(estimator, X_test, y_test) - - return this_score - - def _check_param_grid(param_grid): if hasattr(param_grid, 'items'): param_grid = [param_grid] @@ -351,24 +296,6 @@ def __repr__(self): self.parameters) -def _check_scorable(estimator, scoring=None, loss_func=None, score_func=None): - """Check that estimator can be fitted and score can be computed.""" - if (not hasattr(estimator, 'fit') or - not (hasattr(estimator, 'predict') - or hasattr(estimator, 'score'))): - raise TypeError("estimator should a be an estimator implementing" - " 'fit' and 'predict' or 'score' methods," - " %s (type %s) was passed" % - (estimator, type(estimator))) - if (scoring is None and loss_func is None and score_func - is None): - if not hasattr(estimator, 'score'): - raise TypeError( - "If no scoring is specified, the estimator passed " - "should have a 'score' method. The estimator %s " - "does not." % estimator) - - class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, MetaEstimatorMixin)): """Base class for hyper parameter search with cross-validation.""" diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index 406f28b12c280..f17c9a5a9fe30 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -11,7 +11,7 @@ from .utils import check_arrays from .externals.joblib import Parallel, delayed from .metrics.scorer import get_scorer -from .grid_search import _check_scorable, _split, _fit, _score +from .cross_validation import _check_scorable, _split, _fit, _score def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), From c4d6278c83859de73bb6c53a62dc98973d6a9c79 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Thu, 9 Jan 2014 23:37:01 +0100 Subject: [PATCH 02/51] Clean up --- sklearn/cross_validation.py | 5 ++--- sklearn/grid_search.py | 11 +++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 06d909ce35dfa..4d42a626c24eb 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1091,7 +1091,6 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. - fit_params = fit_params if fit_params is not None else {} parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( @@ -1104,15 +1103,15 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, def _cross_val_score(estimator, X, y, scorer, train, test, verbose, fit_params): """Inner loop for cross validation""" - # TODO replace with grid_search.fit_grid_point() n_samples = _num_samples(X) + fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, np.asarray(v)[train] # TODO why is this necessary? if hasattr(v, '__len__') and len(v) == n_samples else v) for k, v in fit_params.items()]) X_train, y_train = _split(estimator, X, y, train) X_test, y_test = _split(estimator, X, y, test, train) - estimator.fit(X_train, y_train, **fit_params) + _fit(estimator.fit, X_train, y_train, **fit_params) score = _score(estimator, X_test, y_test, scorer) if verbose > 1: diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index b3fa04dc4bc8a..bdbc26c9436c6 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -243,17 +243,16 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer, X_train, y_train = _split(estimator, X, y, train) X_test, y_test = _split(estimator, X, y, test, train) _fit(estimator.fit, X_train, y_train, **fit_params) - this_score = _score(estimator, X_test, y_test, scorer) + score = _score(estimator, X_test, y_test, scorer) if verbose > 2: - msg += ", score=%f" % this_score + msg += ", score=%f" % score if verbose > 1: - end_msg = "%s -%s" % (msg, - logger.short_format_time(time.time() - - start_time)) + end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() - + start_time)) print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) - return this_score, parameters, _num_samples(X_test) + return score, parameters, _num_samples(X_test) def _check_param_grid(param_grid): From 1599952d022fee81fc043a712dee4eae5a2dae5a Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Fri, 10 Jan 2014 09:27:43 +0100 Subject: [PATCH 03/51] Refactor RFE and add _check_scorable --- sklearn/feature_selection/rfe.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index cc80d7ffdcbda..49820742289f8 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -13,6 +13,7 @@ from ..base import clone from ..base import is_classifier from ..cross_validation import _check_cv as check_cv +from ..cross_validation import _check_scorable, _split, _score from .base import SelectorMixin from ..metrics.scorer import _deprecate_loss_and_score_funcs @@ -325,29 +326,31 @@ def fit(self, X, y): verbose=self.verbose - 1) cv = check_cv(self.cv, X, y, is_classifier(self.estimator)) + _check_scorable(self.estimator, scoring=self.scoring, + loss_func=self.loss_func) scores = np.zeros(X.shape[1]) # Cross-validation for n, (train, test) in enumerate(cv): - X_train, X_test = X[train], X[test] - y_train, y_test = y[train], y[test] + X_train, y_train = _split(self.estimator, X, y, train) + X_test, y_test = _split(self.estimator, X, y, test, train) # Compute a full ranking of the features ranking_ = rfe.fit(X_train, y_train).ranking_ # Score each subset of features for k in range(0, max(ranking_)): mask = np.where(ranking_ <= k + 1)[0] + X_train_subset = X_train[:, mask] + X_test_subset = X_test[:, mask] + estimator = clone(self.estimator) - estimator.fit(X_train[:, mask], y_train) - - if self.loss_func is None and self.scoring is None: - score = estimator.score(X_test[:, mask], y_test) - else: - scorer = _deprecate_loss_and_score_funcs( - loss_func=self.loss_func, - scoring=self.scoring - ) - score = scorer(estimator, X_test[:, mask], y_test) + estimator.fit(X_train_subset, y_train) + + scorer = _deprecate_loss_and_score_funcs( + loss_func=self.loss_func, + scoring=self.scoring + ) + score = _score(estimator, X_test_subset, y_test, scorer) if self.verbose > 0: print("Finished fold with %d / %d feature ranks, score=%f" From 5e520318c508d7fa151495e637ecbdb23264dc6c Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Fri, 10 Jan 2014 09:41:08 +0100 Subject: [PATCH 04/51] FIX typo in docstring --- sklearn/cross_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 4d42a626c24eb..ca818076c3dcb 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1152,7 +1152,7 @@ def _split(estimator, X, y, indices, train_indices=None): def _fit(fit_function, X_train, y_train, **fit_params): - """Fit and estimator on a given training set.""" + """Fit an estimator on a given training set.""" if y_train is None: fit_function(X_train, **fit_params) else: From 4b5f468c05814efa9baf8d8d3c34a54f5ae61f1e Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Fri, 10 Jan 2014 10:53:54 +0100 Subject: [PATCH 05/51] Merge `fit_grid_point` into `_cross_val_score` --- sklearn/cross_validation.py | 35 +++++++++++++++++++++++-------- sklearn/grid_search.py | 41 ++++++++++--------------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index ca818076c3dcb..050e70c2d8089 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -15,6 +15,7 @@ from itertools import chain, combinations from math import ceil, floor, factorial import numbers +import time from abc import ABCMeta, abstractmethod import numpy as np @@ -24,7 +25,7 @@ from .utils import check_arrays, check_random_state, safe_mask from .utils.validation import _num_samples from .utils.fixes import unique -from .externals.joblib import Parallel, delayed +from .externals.joblib import Parallel, delayed, logger from .externals.six import string_types, with_metaclass from .metrics.scorer import _deprecate_loss_and_score_funcs @@ -1095,17 +1096,30 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, pre_dispatch=pre_dispatch) scores = parallel( delayed(_cross_val_score)(clone(estimator), X, y, scorer, train, test, - verbose, fit_params) + parameters=None, verbose=verbose, + fit_params=fit_params, + log_label="cross_val_score") for train, test in cv) - return np.array(scores) + return np.array(scores)[:, 0] -def _cross_val_score(estimator, X, y, scorer, train, test, verbose, - fit_params): +def _cross_val_score(estimator, X, y, scorer, train, test, parameters, verbose, + fit_params, log_label): """Inner loop for cross validation""" + if parameters is not None: + estimator.set_params(**parameters) + if verbose > 1: + start_time = time.time() + if parameters is None: + msg = "Evaluating..." + else: + msg = '%s' % (', '.join('%s=%s' % (k, v) + for k, v in parameters.items())) + print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.')) + n_samples = _num_samples(X) fit_params = fit_params if fit_params is not None else {} - fit_params = dict([(k, np.asarray(v)[train] # TODO why is this necessary? + fit_params = dict([(k, np.asarray(v)[train] if hasattr(v, '__len__') and len(v) == n_samples else v) for k, v in fit_params.items()]) @@ -1114,9 +1128,14 @@ def _cross_val_score(estimator, X, y, scorer, train, test, verbose, _fit(estimator.fit, X_train, y_train, **fit_params) score = _score(estimator, X_test, y_test, scorer) + if verbose > 2: + msg += ", score=%f" % score if verbose > 1: - print("score: %f" % score) - return score + end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() - + start_time)) + print("[%s] %s %s" % (log_label, (64 - len(end_msg)) * '.', end_msg)) + + return score, _num_samples(X_test) def _split(estimator, X, y, indices, train_indices=None): diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index bdbc26c9436c6..87ff9dedbdb1b 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -16,7 +16,6 @@ from itertools import product import numbers import operator -import time import warnings import numpy as np @@ -24,8 +23,8 @@ from .base import BaseEstimator, is_classifier, clone from .base import MetaEstimatorMixin from .cross_validation import _check_cv as check_cv -from .cross_validation import _check_scorable, _split, _fit, _score -from .externals.joblib import Parallel, delayed, logger +from .cross_validation import _check_scorable, _cross_val_score +from .externals.joblib import Parallel, delayed from .externals import six from .utils import safe_mask, check_random_state from .utils.validation import _num_samples, check_arrays @@ -184,7 +183,7 @@ def __len__(self): return self.n_iter -def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer, +def fit_grid_point(X, y, estimator, parameters, train, test, scorer, verbose, loss_func=None, **fit_params): """Run fit on one set of parameters. @@ -196,11 +195,11 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer, y : array-like or None Targets for input data. - base_estimator : estimator object + estimator : estimator object This estimator will be cloned and then fitted. parameters : dict - Parameters to be set on base_estimator clone for this grid point. + Parameters to be set on estimator for this grid point. train : ndarray, dtype int or bool Boolean mask or indices for training set. @@ -230,29 +229,11 @@ def fit_grid_point(X, y, base_estimator, parameters, train, test, scorer, n_samples_test : int Number of test samples in this split. """ - if verbose > 1: - start_time = time.time() - msg = '%s' % (', '.join('%s=%s' % (k, v) - for k, v in parameters.items())) - print("[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.')) - - # update parameters of the classifier after a copy of its base structure - estimator = clone(base_estimator) - estimator.set_params(**parameters) - - X_train, y_train = _split(estimator, X, y, train) - X_test, y_test = _split(estimator, X, y, test, train) - _fit(estimator.fit, X_train, y_train, **fit_params) - score = _score(estimator, X_test, y_test, scorer) - - if verbose > 2: - msg += ", score=%f" % score - if verbose > 1: - end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() - - start_time)) - print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) - - return score, parameters, _num_samples(X_test) + score, n_samples_test = _cross_val_score(estimator, X, y, scorer, train, + test, parameters, verbose, + fit_params, + log_label="GridSearchCV") + return score, parameters, n_samples_test def _check_param_grid(param_grid): @@ -397,7 +378,7 @@ def _fit(self, X, y, parameter_iterable): n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch)( delayed(fit_grid_point)( - X, y, base_estimator, parameters, train, test, + X, y, clone(base_estimator), parameters, train, test, self.scorer_, self.verbose, **self.fit_params) for parameters in parameter_iterable for train, test in cv) From 38081fdd56b6372a3ef6e768f134d46fd6a187ec Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sat, 11 Jan 2014 00:21:23 +0100 Subject: [PATCH 06/51] Return time --- sklearn/cross_validation.py | 29 +++++++++++++++-------------- sklearn/grid_search.py | 10 +++++----- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 050e70c2d8089..25efa841fcf35 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1103,39 +1103,40 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, return np.array(scores)[:, 0] -def _cross_val_score(estimator, X, y, scorer, train, test, parameters, verbose, - fit_params, log_label): +def _cross_val_score(estimator, X, y, scorer, train, test, parameters, + verbose, fit_params, log_label): """Inner loop for cross validation""" if parameters is not None: estimator.set_params(**parameters) - if verbose > 1: - start_time = time.time() - if parameters is None: - msg = "Evaluating..." - else: - msg = '%s' % (', '.join('%s=%s' % (k, v) - for k, v in parameters.items())) - print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.')) - n_samples = _num_samples(X) fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, np.asarray(v)[train] if hasattr(v, '__len__') and len(v) == n_samples else v) for k, v in fit_params.items()]) + start_time = time.time() + + if verbose > 1: + if parameters is None: + msg = "" + else: + msg = '%s' % (', '.join('%s=%s' % (k, v) + for k, v in parameters.items())) + print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.')) + X_train, y_train = _split(estimator, X, y, train) X_test, y_test = _split(estimator, X, y, test, train) _fit(estimator.fit, X_train, y_train, **fit_params) score = _score(estimator, X_test, y_test, scorer) + scoring_time = time.time() - start_time if verbose > 2: msg += ", score=%f" % score if verbose > 1: - end_msg = "%s -%s" % (msg, logger.short_format_time(time.time() - - start_time)) + end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) print("[%s] %s %s" % (log_label, (64 - len(end_msg)) * '.', end_msg)) - return score, _num_samples(X_test) + return score, _num_samples(X_test), scoring_time def _split(estimator, X, y, indices, train_indices=None): diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 87ff9dedbdb1b..c7824f2e8b63f 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -184,7 +184,7 @@ def __len__(self): def fit_grid_point(X, y, estimator, parameters, train, test, scorer, - verbose, loss_func=None, **fit_params): + verbose, **fit_params): """Run fit on one set of parameters. Parameters @@ -229,10 +229,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, n_samples_test : int Number of test samples in this split. """ - score, n_samples_test = _cross_val_score(estimator, X, y, scorer, train, - test, parameters, verbose, - fit_params, - log_label="GridSearchCV") + score, n_samples_test, _ = _cross_val_score(estimator, X, y, scorer, + train, test, parameters, + verbose, fit_params, + log_label="GridSearchCV") return score, parameters, n_samples_test From 30c86ea2ddccab0610944a83bffea1719cc810fd Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sat, 11 Jan 2014 00:32:19 +0100 Subject: [PATCH 07/51] Move set_params back to fit_grid_point --- sklearn/cross_validation.py | 25 ++++--------------------- sklearn/grid_search.py | 21 ++++++++++++++++----- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 25efa841fcf35..377dbcced6572 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -25,7 +25,7 @@ from .utils import check_arrays, check_random_state, safe_mask from .utils.validation import _num_samples from .utils.fixes import unique -from .externals.joblib import Parallel, delayed, logger +from .externals.joblib import Parallel, delayed from .externals.six import string_types, with_metaclass from .metrics.scorer import _deprecate_loss_and_score_funcs @@ -1096,18 +1096,14 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, pre_dispatch=pre_dispatch) scores = parallel( delayed(_cross_val_score)(clone(estimator), X, y, scorer, train, test, - parameters=None, verbose=verbose, - fit_params=fit_params, - log_label="cross_val_score") + verbose=verbose, fit_params=fit_params) for train, test in cv) return np.array(scores)[:, 0] -def _cross_val_score(estimator, X, y, scorer, train, test, parameters, - verbose, fit_params, log_label): +def _cross_val_score(estimator, X, y, scorer, train, test, + verbose, fit_params): """Inner loop for cross validation""" - if parameters is not None: - estimator.set_params(**parameters) n_samples = _num_samples(X) fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, np.asarray(v)[train] @@ -1116,25 +1112,12 @@ def _cross_val_score(estimator, X, y, scorer, train, test, parameters, start_time = time.time() - if verbose > 1: - if parameters is None: - msg = "" - else: - msg = '%s' % (', '.join('%s=%s' % (k, v) - for k, v in parameters.items())) - print("[%s] %s %s" % (log_label, msg, (64 - len(msg)) * '.')) - X_train, y_train = _split(estimator, X, y, train) X_test, y_test = _split(estimator, X, y, test, train) _fit(estimator.fit, X_train, y_train, **fit_params) score = _score(estimator, X_test, y_test, scorer) scoring_time = time.time() - start_time - if verbose > 2: - msg += ", score=%f" % score - if verbose > 1: - end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) - print("[%s] %s %s" % (log_label, (64 - len(end_msg)) * '.', end_msg)) return score, _num_samples(X_test), scoring_time diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index c7824f2e8b63f..4db7d435256a8 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -24,7 +24,7 @@ from .base import MetaEstimatorMixin from .cross_validation import _check_cv as check_cv from .cross_validation import _check_scorable, _cross_val_score -from .externals.joblib import Parallel, delayed +from .externals.joblib import Parallel, delayed, logger from .externals import six from .utils import safe_mask, check_random_state from .utils.validation import _num_samples, check_arrays @@ -229,10 +229,21 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, n_samples_test : int Number of test samples in this split. """ - score, n_samples_test, _ = _cross_val_score(estimator, X, y, scorer, - train, test, parameters, - verbose, fit_params, - log_label="GridSearchCV") + if verbose > 1: + msg = '%s' % (', '.join('%s=%s' % (k, v) + for k, v in parameters.items())) + print("[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.')) + + estimator.set_params(**parameters) + score, n_samples_test, scoring_time = _cross_val_score( + estimator, X, y, scorer, train, test, verbose, fit_params) + + if verbose > 2: + msg += ", score=%f" % score + if verbose > 1: + end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) + print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) + return score, parameters, n_samples_test From 389ed8dbfd018c5ebf6bbf510cb739b1133aca71 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sat, 11 Jan 2014 16:34:17 +0100 Subject: [PATCH 08/51] Log score and time in 'cross_val_score' --- sklearn/cross_validation.py | 3 +++ sklearn/grid_search.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 377dbcced6572..080206f03441c 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1119,6 +1119,9 @@ def _cross_val_score(estimator, X, y, scorer, train, test, scoring_time = time.time() - start_time + if verbose > 1: + print("score %f in %f s" % (score, scoring_time)) + return score, _num_samples(X_test), scoring_time diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 4db7d435256a8..eac8823656439 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -236,7 +236,8 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, estimator.set_params(**parameters) score, n_samples_test, scoring_time = _cross_val_score( - estimator, X, y, scorer, train, test, verbose, fit_params) + estimator, X, y, scorer, train, test, verbose=0, + fit_params=fit_params) if verbose > 2: msg += ", score=%f" % score From 1fa3ec363e3fb261b87b5c0ccc681e4fa5df70b3 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sun, 12 Jan 2014 00:40:19 +0100 Subject: [PATCH 09/51] check_scorable returns scorer --- sklearn/cross_validation.py | 42 +++------------------ sklearn/feature_selection/rfe.py | 13 ++----- sklearn/grid_search.py | 12 +++--- sklearn/learning_curve.py | 7 ++-- sklearn/metrics/scorer.py | 63 +++++++++++++++++++++++++++++++ sklearn/tests/test_grid_search.py | 11 +++--- 6 files changed, 86 insertions(+), 62 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 080206f03441c..c03cc4076c6b7 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -27,7 +27,7 @@ from .utils.fixes import unique from .externals.joblib import Parallel, delayed from .externals.six import string_types, with_metaclass -from .metrics.scorer import _deprecate_loss_and_score_funcs +from .metrics.scorer import check_scorable __all__ = ['Bootstrap', 'KFold', @@ -1087,9 +1087,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - _check_scorable(estimator, score_func=score_func, scoring=scoring) - scorer = _deprecate_loss_and_score_funcs(score_func=score_func, - scoring=scoring) + scorer = check_scorable(estimator, score_func=score_func, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, @@ -1168,20 +1166,12 @@ def _fit(fit_function, X_train, y_train, **fit_params): def _score(estimator, X_test, y_test, scorer): """Compute the score of an estimator on a given test set.""" if y_test is None: - if scorer is None: - score = estimator.score(X_test) - else: - score = scorer(estimator, X_test) + score = scorer(estimator, X_test) else: - if scorer is None: - score = estimator.score(X_test, y_test) - else: - score = scorer(estimator, X_test, y_test) - + score = scorer(estimator, X_test, y_test) if not isinstance(score, numbers.Number): raise ValueError("scoring must return a number, got %s (%s) instead." % (str(score), type(score))) - return score @@ -1262,24 +1252,6 @@ def _check_cv(cv, X=None, y=None, classifier=False, warn_mask=False): return cv -def _check_scorable(estimator, scoring=None, loss_func=None, score_func=None): - """Check that estimator can be fitted and score can be computed.""" - if (not hasattr(estimator, 'fit') or - not (hasattr(estimator, 'predict') - or hasattr(estimator, 'score'))): - raise TypeError("estimator should a be an estimator implementing" - " 'fit' and 'predict' or 'score' methods," - " %s (type %s) was passed" % - (estimator, type(estimator))) - if (scoring is None and loss_func is None and score_func - is None): - if not hasattr(estimator, 'score'): - raise TypeError( - "If no scoring is specified, the estimator passed " - "should have a 'score' method. The estimator %s " - "does not." % estimator) - - def permutation_test_score(estimator, X, y, score_func=None, cv=None, n_permutations=100, n_jobs=1, labels=None, random_state=0, verbose=0, scoring=None): @@ -1351,11 +1323,7 @@ def permutation_test_score(estimator, X, y, score_func=None, cv=None, """ X, y = check_arrays(X, y, sparse_format='csr') cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - scorer = _deprecate_loss_and_score_funcs( - loss_func=None, - score_func=score_func, - scoring=scoring - ) + scorer = check_scorable(estimator, scoring=scoring, score_func=score_func) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index 49820742289f8..54941036e044a 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -13,9 +13,9 @@ from ..base import clone from ..base import is_classifier from ..cross_validation import _check_cv as check_cv -from ..cross_validation import _check_scorable, _split, _score +from ..cross_validation import _split, _score from .base import SelectorMixin -from ..metrics.scorer import _deprecate_loss_and_score_funcs +from ..metrics.scorer import check_scorable class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): @@ -326,8 +326,8 @@ def fit(self, X, y): verbose=self.verbose - 1) cv = check_cv(self.cv, X, y, is_classifier(self.estimator)) - _check_scorable(self.estimator, scoring=self.scoring, - loss_func=self.loss_func) + scorer = check_scorable(self.estimator, scoring=self.scoring, + loss_func=self.loss_func) scores = np.zeros(X.shape[1]) # Cross-validation @@ -345,11 +345,6 @@ def fit(self, X, y): estimator = clone(self.estimator) estimator.fit(X_train_subset, y_train) - - scorer = _deprecate_loss_and_score_funcs( - loss_func=self.loss_func, - scoring=self.scoring - ) score = _score(estimator, X_test_subset, y_test, scorer) if self.verbose > 0: diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index eac8823656439..d26f2a74fdd89 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -23,12 +23,12 @@ from .base import BaseEstimator, is_classifier, clone from .base import MetaEstimatorMixin from .cross_validation import _check_cv as check_cv -from .cross_validation import _check_scorable, _cross_val_score +from .cross_validation import _cross_val_score from .externals.joblib import Parallel, delayed, logger from .externals import six from .utils import safe_mask, check_random_state from .utils.validation import _num_samples, check_arrays -from .metrics.scorer import _deprecate_loss_and_score_funcs +from .metrics.scorer import check_scorable __all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point', @@ -308,8 +308,6 @@ def __init__(self, estimator, scoring=None, loss_func=None, self.cv = cv self.verbose = verbose self.pre_dispatch = pre_dispatch - _check_scorable(self.estimator, scoring=self.scoring, - loss_func=self.loss_func, score_func=self.score_func) def score(self, X, y=None): """Returns the score on the given test data and labels, if the search @@ -360,13 +358,13 @@ def _fit(self, X, y, parameter_iterable): estimator = self.estimator cv = self.cv + self.scorer_ = check_scorable(self.estimator, scoring=self.scoring, + loss_func=self.loss_func, + score_func=self.score_func) n_samples = _num_samples(X) X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr') - self.scorer_ = _deprecate_loss_and_score_funcs( - self.loss_func, self.score_func, self.scoring) - if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index f17c9a5a9fe30..a7fa2c28ce4e0 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -11,7 +11,8 @@ from .utils import check_arrays from .externals.joblib import Parallel, delayed from .metrics.scorer import get_scorer -from .cross_validation import _check_scorable, _split, _fit, _score +from .cross_validation import _split, _fit, _score +from .metrics.scorer import check_scorable def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), @@ -101,6 +102,7 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) # Make a list since we will be iterating multiple times over the folds cv = list(_check_cv(cv, X, y, classifier=is_classifier(estimator))) + scorer = check_scorable(estimator, scoring=scoring) # HACK as long as boolean indices are allowed in cv generators if cv[0][0].dtype == bool: @@ -119,9 +121,6 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), if verbose > 0: print("[learning_curve] Training set sizes: " + str(train_sizes_abs)) - _check_scorable(estimator, scoring=scoring) - scorer = get_scorer(scoring) - parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) if exploit_incremental_learning: diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 2a28495890ba2..73c32d60836cf 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -198,6 +198,69 @@ def get_scorer(scoring): return scorer +class _passthrough_scorer(object): + """Callable that wraps estimator.score""" + def __call__(self, estimator, *args, **kwargs): + return estimator.score(*args, **kwargs) + + +def check_scorable(estimator, scoring=None, loss_func=None, score_func=None): + """Check if estimator can be scored. + + A TypeError will be thrown if the estimator cannot be scored. + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + scoring : string, callable or None, optional, default: None + A string (see model evaluation documentation) or + a scorer callable object / function with signature + ``scorer(estimator, X, y)``. + + loss_func : callable or None, optional, default: None + A loss function callable object / function with signature + ``loss_func(estimator, X, y)``. + + score_func : callable or None, optional, default: None + A scoring function with signature + ``score_func(estimator, X, y)``. + + Returns + ------- + scoring : callable + A scorer callable object / function with signature + ``scorer(estimator, X, y)``. + """ + if not hasattr(estimator, 'fit'): + raise TypeError("estimator should a be an estimator implementing " + "'fit' method, %s (type %s) was passed" % + (estimator, type(estimator))) + + if scoring is None and loss_func is None and score_func is None: + if hasattr(estimator, 'score'): + return _passthrough_scorer() + else: + raise TypeError( + "If no scoring is specified, the estimator passed should " + "have a 'score' method. The estimator %s (type %s) " + "does not." % (estimator, type(estimator))) + else: + if hasattr(estimator, 'predict'): + scorer = _deprecate_loss_and_score_funcs(scoring=scoring, + loss_func=loss_func, score_func=score_func) + if scorer is None: + return ValueError("no scoring") + else: + return scorer + else: + raise TypeError( + "If a scoring is specified, the estimator passed should " + "have a 'predict' method. The estimator %s (type %s) " + "does not." % (estimator, type(estimator))) + + def make_scorer(score_func, greater_is_better=True, needs_proba=False, needs_threshold=False, **kwargs): """Make a scorer from a performance metric or loss function. diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index ff2510bce79fa..8d7107d10d9db 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -190,8 +190,9 @@ def test_grid_search_no_score(): assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y)) # giving no scoring function raises an error - assert_raise_message(TypeError, "no scoring", - GridSearchCV, clf_no_score, {'C': Cs}) + grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs}) + assert_raise_message(TypeError, "no scoring", grid_search_no_score.fit, + [[1]]) def test_trivial_grid_scores(): @@ -494,9 +495,9 @@ def test_bad_estimator(): # test grid-search with clustering algorithm which doesn't support # "predict" sc = SpectralClustering() - assert_raises(TypeError, GridSearchCV, sc, - param_grid=dict(gamma=[.1, 1, 10]), - scoring='ari') + grid_search = GridSearchCV(sc, param_grid=dict(gamma=[.1, 1, 10]), + scoring='ari') + assert_raises(TypeError, grid_search.fit, [[1]]) def test_param_sampler(): From 5b8933d3b2257390eca62df98bcffe36e1b04c07 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sun, 12 Jan 2014 11:35:27 +0100 Subject: [PATCH 10/51] Clean up --- sklearn/cross_validation.py | 6 +-- sklearn/feature_selection/rfe.py | 6 +-- sklearn/grid_search.py | 12 ++--- sklearn/learning_curve.py | 5 +-- sklearn/metrics/scorer.py | 49 ++++++++------------ sklearn/metrics/tests/test_score_objects.py | 50 +++++++++++++++++++++ 6 files changed, 82 insertions(+), 46 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index c03cc4076c6b7..43c27904b73aa 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -27,7 +27,7 @@ from .utils.fixes import unique from .externals.joblib import Parallel, delayed from .externals.six import string_types, with_metaclass -from .metrics.scorer import check_scorable +from .metrics.scorer import check_scoring __all__ = ['Bootstrap', 'KFold', @@ -1087,7 +1087,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - scorer = check_scorable(estimator, score_func=score_func, scoring=scoring) + scorer = check_scoring(estimator, score_func=score_func, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, @@ -1323,7 +1323,7 @@ def permutation_test_score(estimator, X, y, score_func=None, cv=None, """ X, y = check_arrays(X, y, sparse_format='csr') cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - scorer = check_scorable(estimator, scoring=scoring, score_func=score_func) + scorer = check_scoring(estimator, scoring=scoring, score_func=score_func) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index 54941036e044a..a58fd33d61f47 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -15,7 +15,7 @@ from ..cross_validation import _check_cv as check_cv from ..cross_validation import _split, _score from .base import SelectorMixin -from ..metrics.scorer import check_scorable +from ..metrics.scorer import check_scoring class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): @@ -326,8 +326,8 @@ def fit(self, X, y): verbose=self.verbose - 1) cv = check_cv(self.cv, X, y, is_classifier(self.estimator)) - scorer = check_scorable(self.estimator, scoring=self.scoring, - loss_func=self.loss_func) + scorer = check_scoring(self.estimator, scoring=self.scoring, + loss_func=self.loss_func) scores = np.zeros(X.shape[1]) # Cross-validation diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index d26f2a74fdd89..4b440d23381e0 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -28,7 +28,7 @@ from .externals import six from .utils import safe_mask, check_random_state from .utils.validation import _num_samples, check_arrays -from .metrics.scorer import check_scorable +from .metrics.scorer import check_scoring __all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point', @@ -232,7 +232,7 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, if verbose > 1: msg = '%s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) - print("[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.')) + print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) estimator.set_params(**parameters) score, n_samples_test, scoring_time = _cross_val_score( @@ -243,7 +243,7 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, msg += ", score=%f" % score if verbose > 1: end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) - print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) + print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) return score, parameters, n_samples_test @@ -358,9 +358,9 @@ def _fit(self, X, y, parameter_iterable): estimator = self.estimator cv = self.cv - self.scorer_ = check_scorable(self.estimator, scoring=self.scoring, - loss_func=self.loss_func, - score_func=self.score_func) + self.scorer_ = check_scoring(self.estimator, scoring=self.scoring, + loss_func=self.loss_func, + score_func=self.score_func) n_samples = _num_samples(X) X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr') diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index a7fa2c28ce4e0..26a742a22ed2f 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -12,7 +12,7 @@ from .externals.joblib import Parallel, delayed from .metrics.scorer import get_scorer from .cross_validation import _split, _fit, _score -from .metrics.scorer import check_scorable +from .metrics.scorer import check_scoring def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), @@ -94,7 +94,6 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), ----- See :ref:`examples/plot_learning_curve.py ` """ - if exploit_incremental_learning and not hasattr(estimator, "partial_fit"): raise ValueError("An estimator must support the partial_fit interface " "to exploit incremental learning") @@ -102,7 +101,7 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) # Make a list since we will be iterating multiple times over the folds cv = list(_check_cv(cv, X, y, classifier=is_classifier(estimator))) - scorer = check_scorable(estimator, scoring=scoring) + scorer = check_scoring(estimator, scoring=scoring) # HACK as long as boolean indices are allowed in cv generators if cv[0][0].dtype == bool: diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 73c32d60836cf..c4b89d2dfe1bb 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -198,13 +198,13 @@ def get_scorer(scoring): return scorer -class _passthrough_scorer(object): +class _PassthroughScorer(object): """Callable that wraps estimator.score""" def __call__(self, estimator, *args, **kwargs): return estimator.score(*args, **kwargs) -def check_scorable(estimator, scoring=None, loss_func=None, score_func=None): +def check_scoring(estimator, scoring=None, loss_func=None, score_func=None): """Check if estimator can be scored. A TypeError will be thrown if the estimator cannot be scored. @@ -219,46 +219,33 @@ def check_scorable(estimator, scoring=None, loss_func=None, score_func=None): a scorer callable object / function with signature ``scorer(estimator, X, y)``. - loss_func : callable or None, optional, default: None - A loss function callable object / function with signature - ``loss_func(estimator, X, y)``. - - score_func : callable or None, optional, default: None - A scoring function with signature - ``score_func(estimator, X, y)``. - Returns ------- scoring : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. """ + has_scoring = not (scoring is None and loss_func is None and + score_func is None) if not hasattr(estimator, 'fit'): raise TypeError("estimator should a be an estimator implementing " "'fit' method, %s (type %s) was passed" % (estimator, type(estimator))) - - if scoring is None and loss_func is None and score_func is None: - if hasattr(estimator, 'score'): - return _passthrough_scorer() - else: - raise TypeError( - "If no scoring is specified, the estimator passed should " - "have a 'score' method. The estimator %s (type %s) " - "does not." % (estimator, type(estimator))) + elif hasattr(estimator, 'predict') and has_scoring: + return _deprecate_loss_and_score_funcs(scoring=scoring, + loss_func=loss_func, score_func=score_func) + elif hasattr(estimator, 'score'): + return _PassthroughScorer() + elif not has_scoring: + raise TypeError( + "If no scoring is specified, the estimator passed should " + "have a 'score' method. The estimator %s (type %s) " + "does not." % (estimator, type(estimator))) else: - if hasattr(estimator, 'predict'): - scorer = _deprecate_loss_and_score_funcs(scoring=scoring, - loss_func=loss_func, score_func=score_func) - if scorer is None: - return ValueError("no scoring") - else: - return scorer - else: - raise TypeError( - "If a scoring is specified, the estimator passed should " - "have a 'predict' method. The estimator %s (type %s) " - "does not." % (estimator, type(estimator))) + raise TypeError( + "The estimator passed should have a 'score' or a 'predict' " + "method. The estimator %s (type %s) does not." + % (estimator, type(estimator))) def make_scorer(score_func, greater_is_better=True, needs_proba=False, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index d7ea9f427074d..265e35b9b5034 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -9,6 +9,7 @@ from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score, log_loss) from sklearn.metrics.cluster import adjusted_rand_score +from sklearn.metrics.scorer import check_scoring from sklearn.metrics import make_scorer, SCORERS from sklearn.svm import LinearSVC from sklearn.cluster import KMeans @@ -22,6 +23,55 @@ from sklearn.multiclass import OneVsRestClassifier +class EstimatorWithoutFit(object): + """Dummy estimator to test check_scoring""" + pass + + +class EstimatorWithFit(object): + """Dummy estimator to test check_scoring""" + def fit(self, X, y): + return self + + +class EstimatorWithFitAndScore(object): + """Dummy estimator to test check_scoring""" + def fit(self, X, y): + return self + def score(self, X, y): + return 1.0 + + +class EstimatorWithFitAndPredict(object): + """Dummy estimator to test check_scoring""" + def fit(self, X, y): + self.y = y + return self + def predict(self, X): + return self.y + + +def test_check_scoring(): + """Test all branches of check_scoring""" + estimator = EstimatorWithoutFit() + assert_raises(TypeError, check_scoring, estimator) + + estimator = EstimatorWithFitAndScore() + estimator.fit([[1]], [1]) + scorer = check_scoring(estimator) + assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) + + estimator = EstimatorWithFitAndPredict() + estimator.fit([[1]], [1]) + assert_raises(TypeError, check_scoring, estimator) + + scorer = check_scoring(estimator, "accuracy") + assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) + + estimator = EstimatorWithFit() + assert_raises(TypeError, check_scoring, estimator) + + def test_make_scorer(): """Sanity check on the make_scorer factory function.""" f = lambda *args: 0 From 70aaef24e8f6dd2eb8921fd0e0f6a9504c7c9358 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sun, 12 Jan 2014 12:29:31 +0100 Subject: [PATCH 11/51] Replace '_fit_estimator' by '_cross_val_score' --- sklearn/cross_validation.py | 8 ++++++-- sklearn/learning_curve.py | 30 +++++++++------------------- sklearn/tests/test_learning_curve.py | 2 +- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 43c27904b73aa..8de3e1474c082 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1100,7 +1100,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, def _cross_val_score(estimator, X, y, scorer, train, test, - verbose, fit_params): + verbose, fit_params, return_train_score=False): """Inner loop for cross validation""" n_samples = _num_samples(X) fit_params = fit_params if fit_params is not None else {} @@ -1120,7 +1120,11 @@ def _cross_val_score(estimator, X, y, scorer, train, test, if verbose > 1: print("score %f in %f s" % (score, scoring_time)) - return score, _num_samples(X_test), scoring_time + if return_train_score: + return (_score(estimator, X_train, y_train, scorer), score, + _num_samples(X_test), scoring_time) + else: + return score, _num_samples(X_test), scoring_time def _split(estimator, X, y, indices, train_indices=None): diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index 26a742a22ed2f..c803ac242649d 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -11,7 +11,7 @@ from .utils import check_arrays from .externals.joblib import Parallel, delayed from .metrics.scorer import get_scorer -from .cross_validation import _split, _fit, _score +from .cross_validation import _split, _fit, _score, _cross_val_score from .metrics.scorer import check_scoring @@ -127,14 +127,16 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), classes = np.unique(y) else: classes = None + out = parallel(delayed(_incremental_fit_estimator)( - estimator, X, y, classes, train, test, train_sizes_abs, scorer, - verbose) for train, test in cv) + clone(estimator), X, y, classes, train, test, train_sizes_abs, + scorer, verbose) for train, test in cv) else: - out = parallel(delayed(_fit_estimator)( - estimator, X, y, train, test, n_train_samples, scorer, verbose) + out = parallel(delayed(_cross_val_score)( + clone(estimator), X, y, scorer, train[:n_train_samples], test, + verbose, fit_params=None, return_train_score=True) for train, test in cv for n_train_samples in train_sizes_abs) - out = np.array(out) + out = np.array(out)[:, :2] n_cv_folds = out.shape[0]/n_unique_ticks out = out.reshape(n_cv_folds, n_unique_ticks, 2) @@ -202,23 +204,9 @@ def _translate_train_sizes(train_sizes, n_max_training_samples): return train_sizes_abs -def _fit_estimator(base_estimator, X, y, train, test, - n_train_samples, scorer, verbose): - """Train estimator on a training subset and compute scores.""" - train_subset = train[:n_train_samples] - estimator = clone(base_estimator) - X_train, y_train = _split(estimator, X, y, train_subset) - X_test, y_test = _split(estimator, X, y, test, train_subset) - _fit(estimator.fit, X_train, y_train) - train_score = _score(estimator, X_train, y_train, scorer) - test_score = _score(estimator, X_test, y_test, scorer) - return train_score, test_score - - -def _incremental_fit_estimator(base_estimator, X, y, classes, train, test, +def _incremental_fit_estimator(estimator, X, y, classes, train, test, train_sizes, scorer, verbose): """Train estimator on training subsets incrementally and compute scores.""" - estimator = clone(base_estimator) train_scores, test_scores = [], [] partitions = zip(train_sizes, np.split(train, train_sizes)[:-1]) for n_train_samples, partial_train in partitions: diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py index 66b8f36279b97..1d43fdfb0eb4a 100644 --- a/sklearn/tests/test_learning_curve.py +++ b/sklearn/tests/test_learning_curve.py @@ -170,7 +170,7 @@ def test_learning_curve_with_boolean_indices(): estimator = MockImprovingClassifier(20) cv = KFold(n=30, n_folds=3, indices=False) train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, - cv=cv) + cv=cv) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores, np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores, np.linspace(0.1, 1.0, 10)) From 13c791595ca59456f861a6f1a88ecfb4fc7c1ade Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sun, 12 Jan 2014 13:09:22 +0100 Subject: [PATCH 12/51] Fix PEP8, style and documentation --- sklearn/cross_validation.py | 2 +- sklearn/grid_search.py | 3 +-- sklearn/learning_curve.py | 7 +------ sklearn/metrics/scorer.py | 14 ++++++-------- sklearn/metrics/tests/test_score_objects.py | 2 ++ 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 8de3e1474c082..8eb08f9a95deb 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -26,7 +26,7 @@ from .utils.validation import _num_samples from .utils.fixes import unique from .externals.joblib import Parallel, delayed -from .externals.six import string_types, with_metaclass +from .externals.six import with_metaclass from .metrics.scorer import check_scoring __all__ = ['Bootstrap', diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 4b440d23381e0..d6a29273f7aa6 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -14,7 +14,6 @@ from collections import Mapping, namedtuple, Sized from functools import partial, reduce from itertools import product -import numbers import operator import warnings @@ -26,7 +25,7 @@ from .cross_validation import _cross_val_score from .externals.joblib import Parallel, delayed, logger from .externals import six -from .utils import safe_mask, check_random_state +from .utils import check_random_state from .utils.validation import _num_samples, check_arrays from .metrics.scorer import check_scoring diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index c803ac242649d..5ca556a99417f 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -10,7 +10,6 @@ from .cross_validation import _check_cv from .utils import check_arrays from .externals.joblib import Parallel, delayed -from .metrics.scorer import get_scorer from .cross_validation import _split, _fit, _score, _cross_val_score from .metrics.scorer import check_scoring @@ -123,11 +122,7 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) if exploit_incremental_learning: - if is_classifier(estimator): - classes = np.unique(y) - else: - classes = None - + classes = np.unique(y) if is_classifier(estimator) else None out = parallel(delayed(_incremental_fit_estimator)( clone(estimator), X, y, classes, train, test, train_sizes_abs, scorer, verbose) for train, test in cv) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index c4b89d2dfe1bb..9fbf4893652c6 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -205,7 +205,7 @@ def __call__(self, estimator, *args, **kwargs): def check_scoring(estimator, scoring=None, loss_func=None, score_func=None): - """Check if estimator can be scored. + """Determine scorer from user options. A TypeError will be thrown if the estimator cannot be scored. @@ -229,23 +229,21 @@ def check_scoring(estimator, scoring=None, loss_func=None, score_func=None): score_func is None) if not hasattr(estimator, 'fit'): raise TypeError("estimator should a be an estimator implementing " - "'fit' method, %s (type %s) was passed" % - (estimator, type(estimator))) + "'fit' method, %r was passed" % estimator) elif hasattr(estimator, 'predict') and has_scoring: return _deprecate_loss_and_score_funcs(scoring=scoring, - loss_func=loss_func, score_func=score_func) + loss_func=loss_func, + score_func=score_func) elif hasattr(estimator, 'score'): return _PassthroughScorer() elif not has_scoring: raise TypeError( "If no scoring is specified, the estimator passed should " - "have a 'score' method. The estimator %s (type %s) " - "does not." % (estimator, type(estimator))) + "have a 'score' method. The estimator %r does not." % estimator) else: raise TypeError( "The estimator passed should have a 'score' or a 'predict' " - "method. The estimator %s (type %s) does not." - % (estimator, type(estimator))) + "method. The estimator %r does not." % estimator) def make_scorer(score_func, greater_is_better=True, needs_proba=False, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 265e35b9b5034..3cda7aadece52 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -38,6 +38,7 @@ class EstimatorWithFitAndScore(object): """Dummy estimator to test check_scoring""" def fit(self, X, y): return self + def score(self, X, y): return 1.0 @@ -47,6 +48,7 @@ class EstimatorWithFitAndPredict(object): def fit(self, X, y): self.y = y return self + def predict(self, X): return self.y From 7b951d8c63220c556c9d90a65720dad6cbb78174 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Sun, 12 Jan 2014 13:41:37 +0100 Subject: [PATCH 13/51] Remove wrong variable names --- sklearn/feature_selection/rfe.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index a58fd33d61f47..ca6ede9a7c760 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -340,12 +340,9 @@ def fit(self, X, y): # Score each subset of features for k in range(0, max(ranking_)): mask = np.where(ranking_ <= k + 1)[0] - X_train_subset = X_train[:, mask] - X_test_subset = X_test[:, mask] - estimator = clone(self.estimator) - estimator.fit(X_train_subset, y_train) - score = _score(estimator, X_test_subset, y_test, scorer) + estimator.fit(X_train[:, mask], y_train) + score = _score(estimator, X_test[:, mask], y_test, scorer) if self.verbose > 0: print("Finished fold with %d / %d feature ranks, score=%f" From 5b211cd96543090ac5a3684b85e4fcae620003c9 Mon Sep 17 00:00:00 2001 From: Alexander Fabisch Date: Tue, 14 Jan 2014 11:12:02 +0100 Subject: [PATCH 14/51] Remove helper function '_fit' --- sklearn/cross_validation.py | 13 ++++--------- sklearn/learning_curve.py | 10 +++++++--- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 8eb08f9a95deb..7eaadafa2dfec 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1112,7 +1112,10 @@ def _cross_val_score(estimator, X, y, scorer, train, test, X_train, y_train = _split(estimator, X, y, train) X_test, y_test = _split(estimator, X, y, test, train) - _fit(estimator.fit, X_train, y_train, **fit_params) + if y_train is None: + estimator.fit(X_train, **fit_params) + else: + estimator.fit(X_train, y_train, **fit_params) score = _score(estimator, X_test, y_test, scorer) scoring_time = time.time() - start_time @@ -1159,14 +1162,6 @@ def _split(estimator, X, y, indices, train_indices=None): return X_subset, y_subset -def _fit(fit_function, X_train, y_train, **fit_params): - """Fit an estimator on a given training set.""" - if y_train is None: - fit_function(X_train, **fit_params) - else: - fit_function(X_train, y_train, **fit_params) - - def _score(estimator, X_test, y_test, scorer): """Compute the score of an estimator on a given test set.""" if y_test is None: diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index 5ca556a99417f..6ba706ed7442f 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -10,7 +10,7 @@ from .cross_validation import _check_cv from .utils import check_arrays from .externals.joblib import Parallel, delayed -from .cross_validation import _split, _fit, _score, _cross_val_score +from .cross_validation import _split, _score, _cross_val_score from .metrics.scorer import check_scoring @@ -209,8 +209,12 @@ def _incremental_fit_estimator(estimator, X, y, classes, train, test, X_partial_train, y_partial_train = _split(estimator, X, y, partial_train) X_test, y_test = _split(estimator, X, y, test, train[:n_train_samples]) - _fit(estimator.partial_fit, X_partial_train, y_partial_train, - classes=classes) + if y_partial_train is None: + estimator.partial_fit(X_partial_train, classes=classes) + else: + estimator.partial_fit(X_partial_train, y_partial_train, + classes=classes) train_scores.append(_score(estimator, X_train, y_train, scorer)) test_scores.append(_score(estimator, X_test, y_test, scorer)) return np.array((train_scores, test_scores)).T + From 13bc90e35cb37cc4e054413057d8d7f0b29ef8a5 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Wed, 15 Jan 2014 02:20:56 +0900 Subject: [PATCH 15/51] Add evaluate_scorers function. --- sklearn/metrics/scorer.py | 210 ++++++++------------ sklearn/metrics/tests/test_score_objects.py | 20 +- 2 files changed, 105 insertions(+), 125 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 9fbf4893652c6..e3e5a45f793a6 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -31,130 +31,94 @@ from ..externals import six -class _BaseScorer(six.with_metaclass(ABCMeta, object)): - def __init__(self, score_func, sign, kwargs): - self._kwargs = kwargs - self._score_func = score_func - self._sign = sign +class _Scorer(object): + + def __init__(self, score_func, greater_is_better=True, needs_proba=False, + needs_threshold=False, kwargs={}): + self.score_func = score_func + self.greater_is_better = greater_is_better + self.needs_proba = needs_proba + self.needs_threshold = needs_threshold + self.kwargs = kwargs - @abstractmethod def __call__(self, estimator, X, y): - pass - - def __repr__(self): - kwargs_string = "".join([", %s=%s" % (str(k), str(v)) - for k, v in self._kwargs.items()]) - return ("make_scorer(%s%s%s%s)" - % (self._score_func.__name__, - "" if self._sign > 0 else ", greater_is_better=False", - self._factory_args(), kwargs_string)) - - def _factory_args(self): - """Return non-default make_scorer arguments for repr.""" - return "" - - -class _PredictScorer(_BaseScorer): - def __call__(self, estimator, X, y_true): - """Evaluate predicted target values for X relative to y_true. - - Parameters - ---------- - estimator : object - Trained estimator to use for scoring. Must have a predict_proba - method; the output of that is used to compute the score. - - X : array-like or sparse matrix - Test data that will be fed to estimator.predict. - - y_true : array-like - Gold standard target values for X. - - Returns - ------- - score : float - Score function applied to prediction of estimator on X. - """ + return evaluate_scorers(estimator, X, y, [self])[0] + + +def evaluate_scorers(estimator, X, y, scorers): + has_pb = hasattr(estimator, "predict_proba") + has_df = hasattr(estimator, "decision_function") + + # Make a first pass through scorers to determine if we need + # predict_proba or decision_function. + compute_proba = False + compute_df = False + for scorer in scorers: + if scorer.needs_proba: + if not has_pb: + raise ValueError("%s needs probabilities but predict_proba is" + "not available in %s." % (scorer, estimator)) + compute_proba = True + + elif scorer.needs_threshold: + if has_pb: + # We choose predict_proba first because its interface + # is more consistent across the project. + compute_proba = True + elif has_df: + compute_df = True + else: + raise ValueError("%s needs continuous outputs but neither" + "predict_proba nor decision_function " + "are available in %s." % (scorer, estimator)) + + # Compute predict_proba or decision_function if needed. + y_pred = None + if compute_proba: + y_proba = estimator.predict_proba(X) + + # For multi-output multi-class estimator + #if isinstance(y_proba, list): + #y_proba = np.vstack([p[:, -1] for p in y_proba]).T + + y_pred = estimator.classes_[y_proba.argmax(axis=1)] + + + elif compute_df: + df = estimator.decision_function(X) + + # For multi-output multi-class estimator + #if isinstance(df, list): + #df = np.vstack(p for p in df).T + + if len(df.shape) == 2 and df.shape[1] >= 2: + y_pred = estimator.classes_[df.argmax(axis=1)] + else: + y_pred = estimator.classes_[(df >= 0).astype(int)] + + # Compute y_pred if needed + if y_pred is None: y_pred = estimator.predict(X) - return self._sign * self._score_func(y_true, y_pred, **self._kwargs) - - -class _ProbaScorer(_BaseScorer): - def __call__(self, clf, X, y): - """Evaluate predicted probabilities for X relative to y_true. - - Parameters - ---------- - clf : object - Trained classifier to use for scoring. Must have a predict_proba - method; the output of that is used to compute the score. - - X : array-like or sparse matrix - Test data that will be fed to clf.predict_proba. - - y : array-like - Gold standard target values for X. These must be class labels, - not probabilities. - - Returns - ------- - score : float - Score function applied to prediction of estimator on X. - """ - y_pred = clf.predict_proba(X) - return self._sign * self._score_func(y, y_pred, **self._kwargs) - - def _factory_args(self): - return ", needs_proba=True" - - -class _ThresholdScorer(_BaseScorer): - def __call__(self, clf, X, y): - """Evaluate decision function output for X relative to y_true. - - Parameters - ---------- - clf : object - Trained classifier to use for scoring. Must have either a - decision_function method or a predict_proba method; the output of - that is used to compute the score. - - X : array-like or sparse matrix - Test data that will be fed to clf.decision_function or - clf.predict_proba. - - y : array-like - Gold standard target values for X. These must be class labels, - not decision function values. - - Returns - ------- - score : float - Score function applied to prediction of estimator on X. - """ - y_type = type_of_target(y) - if y_type not in ("binary", "multilabel-indicator"): - raise ValueError("{0} format is not supported".format(y_type)) - - try: - y_pred = clf.decision_function(X) - # For multi-output multi-class estimator - if isinstance(y_pred, list): - y_pred = np.vstack(p for p in y_pred).T + # Compute scores. + scores = [] + for scorer in scorers: + if scorer.needs_proba: + score = scorer.score_func(y, y_proba, **scorer.kwargs) - except (NotImplementedError, AttributeError): - y_pred = clf.predict_proba(X) + elif scorer.needs_threshold: + if compute_proba: + score = scorer.score_func(y, y_proba[:, 1], **scorer.kwargs) + else: + score = scorer.score_func(y, df.ravel(), **scorer.kwargs) - if y_type == "binary": - y_pred = y_pred[:, 1] - elif isinstance(y_pred, list): - y_pred = np.vstack([p[:, -1] for p in y_pred]).T + else: + score = scorer.score_func(y, y_pred, **scorer.kwargs) - return self._sign * self._score_func(y, y_pred, **self._kwargs) + sign = 1 if scorer.greater_is_better else -1 + scores.append(sign * score) - def _factory_args(self): - return ", needs_threshold=True" + return np.array(scores) def _deprecate_loss_and_score_funcs( @@ -297,17 +261,15 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, ... scoring=ftwo_scorer) """ - sign = 1 if greater_is_better else -1 if needs_proba and needs_threshold: raise ValueError("Set either needs_proba or needs_threshold to True," " but not both.") - if needs_proba: - cls = _ProbaScorer - elif needs_threshold: - cls = _ThresholdScorer - else: - cls = _PredictScorer - return cls(score_func, sign, kwargs) + + return _Scorer(score_func, + greater_is_better=greater_is_better, + needs_proba=needs_proba, + needs_threshold=needs_threshold, + kwargs=kwargs) # Standard regression scores diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 3cda7aadece52..b7d698ca58163 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -5,17 +5,19 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import ignore_warnings +from sklearn.utils.testing import SkipTest from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score, log_loss) from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.metrics.scorer import check_scoring +from sklearn.metrics.scorer import check_scoring, evaluate_scorers from sklearn.metrics import make_scorer, SCORERS from sklearn.svm import LinearSVC from sklearn.cluster import KMeans from sklearn.linear_model import Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import make_blobs +from sklearn.datasets import make_classification from sklearn.datasets import make_multilabel_classification from sklearn.datasets import load_diabetes from sklearn.cross_validation import train_test_split, cross_val_score @@ -152,6 +154,7 @@ def test_thresholded_scorers_multilabel_indicator_data(): """Test that the scorer work with multilabel-indicator format for multilabel and multi-output multi-class classifier """ + raise SkipTest X, y = make_multilabel_classification(return_indicator=True, allow_unlabeled=False, random_state=0) @@ -205,6 +208,21 @@ def test_unsupervised_scorers(): assert_almost_equal(score1, score2) +def test_evaluate_scorers(): + X, y = make_classification(n_classes=2, random_state=0) + clf = LinearSVC() + clf.fit(X, y) + s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + SCORERS["roc_auc"]]) + df = clf.decision_function(X) + y_pred = clf.predict(X) + f1 = f1_score(y, y_pred) + roc = roc_auc_score(y, df.ravel()) + + assert_almost_equal(s1, f1) + assert_almost_equal(s2, roc) + + @ignore_warnings def test_raises_on_score_list(): """Test that when a list of scores is returned, we raise proper errors.""" From 4b2cd18a3e58536acba3fd41eb6e35b7e86cc168 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Wed, 15 Jan 2014 23:33:03 +0900 Subject: [PATCH 16/51] Add more tests for evaluate_scorers. --- sklearn/metrics/scorer.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 80 +++++++++++++++++++-- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index e3e5a45f793a6..e03130d411ccf 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -96,7 +96,7 @@ def evaluate_scorers(estimator, X, y, scorers): else: y_pred = estimator.classes_[(df >= 0).astype(int)] - # Compute y_pred if needed + # Compute y_pred if needed. if y_pred is None: y_pred = estimator.predict(X) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index b7d698ca58163..8d2f7b202cab1 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -7,8 +7,8 @@ from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import SkipTest -from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score, - log_loss) +from sklearn.metrics import (accuracy_score, f1_score, r2_score, roc_auc_score, + fbeta_score, log_loss, mean_squared_error) from sklearn.metrics.cluster import adjusted_rand_score from sklearn.metrics.scorer import check_scoring, evaluate_scorers from sklearn.metrics import make_scorer, SCORERS @@ -17,6 +17,7 @@ from sklearn.linear_model import Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import make_blobs +from sklearn.datasets import load_iris from sklearn.datasets import make_classification from sklearn.datasets import make_multilabel_classification from sklearn.datasets import load_diabetes @@ -208,19 +209,84 @@ def test_unsupervised_scorers(): assert_almost_equal(score1, score2) -def test_evaluate_scorers(): +def test_evaluate_scorers_binary(): X, y = make_classification(n_classes=2, random_state=0) + + # Test a classifier with decision_function. clf = LinearSVC() clf.fit(X, y) + s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], SCORERS["roc_auc"]]) df = clf.decision_function(X) y_pred = clf.predict(X) - f1 = f1_score(y, y_pred) - roc = roc_auc_score(y, df.ravel()) - assert_almost_equal(s1, f1) - assert_almost_equal(s2, roc) + assert_almost_equal(s1, f1_score(y, y_pred)) + assert_almost_equal(s2, roc_auc_score(y, df)) + + # Test a classifier with predict_proba. + clf = LogisticRegression() + clf.fit(X, y) + + s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + SCORERS["roc_auc"]]) + y_proba = clf.predict_proba(X)[:, 1] + y_pred = clf.predict(X) + + assert_almost_equal(s1, f1_score(y, y_pred)) + assert_almost_equal(s2, roc_auc_score(y, y_proba)) + + +def test_evaluate_scorers_multiclass(): + iris = load_iris() + X, y = iris.data, iris.target + + # Test a classifier with decision_function. + clf = LinearSVC() + clf.fit(X, y) + + s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + SCORERS["accuracy"]]) + y_pred = clf.predict(X) + + assert_almost_equal(s1, f1_score(y, y_pred)) + assert_almost_equal(s2, accuracy_score(y, y_pred)) + + # Test a classifier with predict_proba. + clf = LogisticRegression() + clf.fit(X, y) + + s1, s2, s3 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + SCORERS["accuracy"], + SCORERS["log_loss"]]) + y_proba = clf.predict_proba(X) + y_pred = clf.predict(X) + + assert_almost_equal(s1, f1_score(y, y_pred)) + assert_almost_equal(s2, accuracy_score(y, y_pred)) + assert_almost_equal(s3, -log_loss(y, y_proba)) + + +def test_evaluate_scorers_regression(): + diabetes = load_diabetes() + X, y = diabetes.data, diabetes.target + + reg = Ridge() + reg.fit(X, y) + + s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"], + SCORERS["mean_squared_error"]]) + y_pred = reg.predict(X) + + assert_almost_equal(s1, r2_score(y, y_pred)) + assert_almost_equal(s2, -mean_squared_error(y, y_pred)) + + +def test_evaluate_scorers_exceptions(): + clf = LinearSVC() + # log_loss needs probabilities but LinearSVC does not have predict_proba. + assert_raises(ValueError, evaluate_scorers, clf, [], [], + [SCORERS["log_loss"]]) @ignore_warnings From 91ff4981dc8def2c952946c026221b4bf75f69a9 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 00:08:36 +0900 Subject: [PATCH 17/51] Support ranking by regression. --- sklearn/metrics/scorer.py | 16 +++++++++++----- sklearn/metrics/tests/test_score_objects.py | 19 +++++++++++++++++-- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index e03130d411ccf..e98b666a366e8 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -29,6 +29,7 @@ from .cluster import adjusted_rand_score from ..utils.multiclass import type_of_target from ..externals import six +from ..base import is_classifier class _Scorer(object): @@ -65,13 +66,16 @@ def evaluate_scorers(estimator, X, y, scorers): # We choose predict_proba first because its interface # is more consistent across the project. compute_proba = True - elif has_df: - compute_df = True - else: + continue + + if is_classifier(estimator) and not has_df: raise ValueError("%s needs continuous outputs but neither" "predict_proba nor decision_function " "are available in %s." % (scorer, estimator)) + if is_classifier(estimator): + compute_df = True + # Compute predict_proba or decision_function if needed. y_pred = None if compute_proba: @@ -84,7 +88,7 @@ def evaluate_scorers(estimator, X, y, scorers): y_pred = estimator.classes_[y_proba.argmax(axis=1)] - elif compute_df: + if compute_df: df = estimator.decision_function(X) # For multi-output multi-class estimator @@ -109,8 +113,10 @@ def evaluate_scorers(estimator, X, y, scorers): elif scorer.needs_threshold: if compute_proba: score = scorer.score_func(y, y_proba[:, 1], **scorer.kwargs) - else: + elif is_classifier(estimator): score = scorer.score_func(y, df.ravel(), **scorer.kwargs) + else: + score = scorer.score_func(y, y_pred, **scorer.kwargs) else: score = scorer.score_func(y, y_pred, **scorer.kwargs) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 8d2f7b202cab1..b1ca52bf55477 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -8,14 +8,15 @@ from sklearn.utils.testing import SkipTest from sklearn.metrics import (accuracy_score, f1_score, r2_score, roc_auc_score, - fbeta_score, log_loss, mean_squared_error) + fbeta_score, log_loss, mean_squared_error, + average_precision_score) from sklearn.metrics.cluster import adjusted_rand_score from sklearn.metrics.scorer import check_scoring, evaluate_scorers from sklearn.metrics import make_scorer, SCORERS from sklearn.svm import LinearSVC from sklearn.cluster import KMeans from sklearn.linear_model import Ridge, LogisticRegression -from sklearn.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.datasets import make_blobs from sklearn.datasets import load_iris from sklearn.datasets import make_classification @@ -282,6 +283,20 @@ def test_evaluate_scorers_regression(): assert_almost_equal(s2, -mean_squared_error(y, y_pred)) +def test_evaluate_scorers_ranking_by_regression(): + X, y = make_classification(n_classes=2, random_state=0) + + reg = DecisionTreeRegressor() + reg.fit(X, y) + + s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["roc_auc"], + SCORERS["average_precision"]]) + y_pred = reg.predict(X) + + assert_almost_equal(s1, roc_auc_score(y, y_pred)) + assert_almost_equal(s2, average_precision_score(y, y_pred)) + + def test_evaluate_scorers_exceptions(): clf = LinearSVC() # log_loss needs probabilities but LinearSVC does not have predict_proba. From 4a934f092487d7941ae01c074ec0294807ff2121 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 00:09:24 +0900 Subject: [PATCH 18/51] Support SVC. --- sklearn/metrics/scorer.py | 16 +++++++++++----- sklearn/metrics/tests/test_score_objects.py | 18 +++++++++--------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index e98b666a366e8..758715df9072f 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -79,14 +79,20 @@ def evaluate_scorers(estimator, X, y, scorers): # Compute predict_proba or decision_function if needed. y_pred = None if compute_proba: - y_proba = estimator.predict_proba(X) + try: + y_proba = estimator.predict_proba(X) - # For multi-output multi-class estimator - #if isinstance(y_proba, list): - #y_proba = np.vstack([p[:, -1] for p in y_proba]).T + # For multi-output multi-class estimator + #if isinstance(y_proba, list): + #y_proba = np.vstack([p[:, -1] for p in y_proba]).T - y_pred = estimator.classes_[y_proba.argmax(axis=1)] + y_pred = estimator.classes_[y_proba.argmax(axis=1)] + except NotImplementedError: + # SVC has predict_proba but it may raise NotImplementedError + # if probabilities are not enabled. + compute_proba = False + compute_df = True if compute_df: df = estimator.decision_function(X) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index b1ca52bf55477..8f35064621a2a 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -13,7 +13,7 @@ from sklearn.metrics.cluster import adjusted_rand_score from sklearn.metrics.scorer import check_scoring, evaluate_scorers from sklearn.metrics import make_scorer, SCORERS -from sklearn.svm import LinearSVC +from sklearn.svm import LinearSVC, SVC from sklearn.cluster import KMeans from sklearn.linear_model import Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor @@ -214,16 +214,16 @@ def test_evaluate_scorers_binary(): X, y = make_classification(n_classes=2, random_state=0) # Test a classifier with decision_function. - clf = LinearSVC() - clf.fit(X, y) + for clf in (SVC(), LinearSVC()): + clf.fit(X, y) - s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], - SCORERS["roc_auc"]]) - df = clf.decision_function(X) - y_pred = clf.predict(X) + s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + SCORERS["roc_auc"]]) + df = clf.decision_function(X) + y_pred = clf.predict(X) - assert_almost_equal(s1, f1_score(y, y_pred)) - assert_almost_equal(s2, roc_auc_score(y, df)) + assert_almost_equal(s1, f1_score(y, y_pred)) + assert_almost_equal(s2, roc_auc_score(y, df)) # Test a classifier with predict_proba. clf = LogisticRegression() From 314497a623af152265760a01e1cdd29d2ba4e3c3 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 01:24:07 +0900 Subject: [PATCH 19/51] Handle multi-label case. --- sklearn/metrics/scorer.py | 50 ++++++++++----------- sklearn/metrics/tests/test_score_objects.py | 34 +++++++------- 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 758715df9072f..96b6005ad7ad7 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -49,58 +49,58 @@ def __call__(self, estimator, X, y): def evaluate_scorers(estimator, X, y, scorers): has_pb = hasattr(estimator, "predict_proba") has_df = hasattr(estimator, "decision_function") + _is_classifier = is_classifier(estimator) + _type_of_y = type_of_target(y) # Make a first pass through scorers to determine if we need # predict_proba or decision_function. - compute_proba = False - compute_df = False + needs_proba = False + needs_df = False for scorer in scorers: if scorer.needs_proba: if not has_pb: raise ValueError("%s needs probabilities but predict_proba is" "not available in %s." % (scorer, estimator)) - compute_proba = True + needs_proba = True elif scorer.needs_threshold: if has_pb: # We choose predict_proba first because its interface # is more consistent across the project. - compute_proba = True + needs_proba = True continue - if is_classifier(estimator) and not has_df: + if _is_classifier and not has_df: raise ValueError("%s needs continuous outputs but neither" "predict_proba nor decision_function " "are available in %s." % (scorer, estimator)) - if is_classifier(estimator): - compute_df = True + if _is_classifier: + needs_df = True - # Compute predict_proba or decision_function if needed. + # Compute predict_proba if needed. + y_proba = None y_pred = None - if compute_proba: + if needs_proba: try: y_proba = estimator.predict_proba(X) - # For multi-output multi-class estimator - #if isinstance(y_proba, list): - #y_proba = np.vstack([p[:, -1] for p in y_proba]).T - y_pred = estimator.classes_[y_proba.argmax(axis=1)] - except NotImplementedError: + if _type_of_y == "binary": + y_proba = y_proba[:, 1] + + except (NotImplementedError, AttributeError): # SVC has predict_proba but it may raise NotImplementedError # if probabilities are not enabled. - compute_proba = False - compute_df = True + needs_proba = False + needs_df = True - if compute_df: + # Compute decision_function. + df = None + if needs_df: df = estimator.decision_function(X) - # For multi-output multi-class estimator - #if isinstance(df, list): - #df = np.vstack(p for p in df).T - if len(df.shape) == 2 and df.shape[1] >= 2: y_pred = estimator.classes_[df.argmax(axis=1)] else: @@ -117,10 +117,10 @@ def evaluate_scorers(estimator, X, y, scorers): score = scorer.score_func(y, y_proba, **scorer.kwargs) elif scorer.needs_threshold: - if compute_proba: - score = scorer.score_func(y, y_proba[:, 1], **scorer.kwargs) - elif is_classifier(estimator): - score = scorer.score_func(y, df.ravel(), **scorer.kwargs) + if y_proba is not None: + score = scorer.score_func(y, y_proba, **scorer.kwargs) + elif df is not None: + score = scorer.score_func(y, df, **scorer.kwargs) else: score = scorer.score_func(y, y_pred, **scorer.kwargs) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 8f35064621a2a..1dbb1ffa32970 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -5,7 +5,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import ignore_warnings -from sklearn.utils.testing import SkipTest from sklearn.metrics import (accuracy_score, f1_score, r2_score, roc_auc_score, fbeta_score, log_loss, mean_squared_error, @@ -156,32 +155,31 @@ def test_thresholded_scorers_multilabel_indicator_data(): """Test that the scorer work with multilabel-indicator format for multilabel and multi-output multi-class classifier """ - raise SkipTest X, y = make_multilabel_classification(return_indicator=True, allow_unlabeled=False, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # Multi-output multi-class predict_proba - clf = DecisionTreeClassifier() - clf.fit(X_train, y_train) - y_proba = clf.predict_proba(X_test) - score1 = SCORERS['roc_auc'](clf, X_test, y_test) - score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T) - assert_almost_equal(score1, score2) + #clf = DecisionTreeClassifier() + #clf.fit(X_train, y_train) + #y_proba = clf.predict_proba(X_test) + #score1 = SCORERS['roc_auc'](clf, X_test, y_test) + #score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T) + #assert_almost_equal(score1, score2) # Multi-output multi-class decision_function # TODO Is there any yet? - clf = DecisionTreeClassifier() - clf.fit(X_train, y_train) - clf._predict_proba = clf.predict_proba - clf.predict_proba = None - clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] - - y_proba = clf.decision_function(X_test) - score1 = SCORERS['roc_auc'](clf, X_test, y_test) - score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T) - assert_almost_equal(score1, score2) + #clf = DecisionTreeClassifier() + #clf.fit(X_train, y_train) + #clf._predict_proba = clf.predict_proba + #clf.predict_proba = None + #clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] + + #y_proba = clf.decision_function(X_test) + #score1 = SCORERS['roc_auc'](clf, X_test, y_test) + #score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T) + #assert_almost_equal(score1, score2) # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) From 754c72d2b6a40acc42e432eb3cc2e918270cedf7 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 01:34:37 +0900 Subject: [PATCH 20/51] Test ranking with more than two relevance levels. --- sklearn/metrics/tests/test_score_objects.py | 30 +++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 1dbb1ffa32970..f99e1a2badcb7 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -25,6 +25,24 @@ from sklearn.grid_search import GridSearchCV from sklearn.multiclass import OneVsRestClassifier +# FIXME: temporary, to demonstrate ranking with several relevance levels. +def dcg_score(y_true, y_score, k=10, gains="exponential"): + order = np.argsort(y_score)[::-1] + y_true = np.take(y_true, order[:k]) + + if gains == "exponential": + gains = 2 ** y_true - 1 + elif gains == "linear": + gains = y_true + else: + raise ValueError("Invalid gains option.") + + # highest rank is 1 so +2 instead of +1 + discounts = np.log2(np.arange(len(y_true)) + 2) + return np.sum(gains / discounts) + +dcg_scorer = make_scorer(dcg_score, needs_threshold=True) + class EstimatorWithoutFit(object): """Dummy estimator to test check_scoring""" @@ -294,6 +312,18 @@ def test_evaluate_scorers_ranking_by_regression(): assert_almost_equal(s1, roc_auc_score(y, y_pred)) assert_almost_equal(s2, average_precision_score(y, y_pred)) + diabetes = load_diabetes() + X, y = diabetes.data, diabetes.target + + reg.fit(X, y) + + s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"], + dcg_scorer]) + y_pred = reg.predict(X) + + assert_almost_equal(s1, r2_score(y, y_pred)) + assert_almost_equal(s2, dcg_score(y, y_pred)) + def test_evaluate_scorers_exceptions(): clf = LinearSVC() From 7f4d7ad130085d6907050a6969fb38a597770218 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 13:38:38 +0900 Subject: [PATCH 21/51] Rename evaluate_scorers to _evaluate_scorers. --- sklearn/metrics/scorer.py | 4 ++-- sklearn/metrics/tests/test_score_objects.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index cd40c2a09ed71..18d629b04908a 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -43,10 +43,10 @@ def __init__(self, score_func, greater_is_better=True, needs_proba=False, self.kwargs = kwargs def __call__(self, estimator, X, y): - return evaluate_scorers(estimator, X, y, [self])[0] + return _evaluate_scorers(estimator, X, y, [self])[0] -def evaluate_scorers(estimator, X, y, scorers): +def _evaluate_scorers(estimator, X, y, scorers): has_pb = hasattr(estimator, "predict_proba") has_df = hasattr(estimator, "decision_function") _is_classifier = is_classifier(estimator) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 9f23fdbe7f23b..83fce651bc693 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -12,7 +12,7 @@ fbeta_score, log_loss, mean_squared_error, average_precision_score) from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.metrics.scorer import check_scoring, evaluate_scorers +from sklearn.metrics.scorer import check_scoring, _evaluate_scorers from sklearn.metrics import make_scorer, SCORERS from sklearn.svm import LinearSVC, SVC from sklearn.cluster import KMeans @@ -291,7 +291,7 @@ def test_evaluate_scorers_binary(): for clf in (SVC(), LinearSVC()): clf.fit(X, y) - s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + s1, s2 = _evaluate_scorers(clf, X, y, [SCORERS["f1"], SCORERS["roc_auc"]]) df = clf.decision_function(X) y_pred = clf.predict(X) @@ -303,7 +303,7 @@ def test_evaluate_scorers_binary(): clf = LogisticRegression() clf.fit(X, y) - s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + s1, s2 = _evaluate_scorers(clf, X, y, [SCORERS["f1"], SCORERS["roc_auc"]]) y_proba = clf.predict_proba(X)[:, 1] y_pred = clf.predict(X) @@ -320,7 +320,7 @@ def test_evaluate_scorers_multiclass(): clf = LinearSVC() clf.fit(X, y) - s1, s2 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + s1, s2 = _evaluate_scorers(clf, X, y, [SCORERS["f1"], SCORERS["accuracy"]]) y_pred = clf.predict(X) @@ -331,7 +331,7 @@ def test_evaluate_scorers_multiclass(): clf = LogisticRegression() clf.fit(X, y) - s1, s2, s3 = evaluate_scorers(clf, X, y, [SCORERS["f1"], + s1, s2, s3 = _evaluate_scorers(clf, X, y, [SCORERS["f1"], SCORERS["accuracy"], SCORERS["log_loss"]]) y_proba = clf.predict_proba(X) @@ -349,7 +349,7 @@ def test_evaluate_scorers_regression(): reg = Ridge() reg.fit(X, y) - s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"], + s1, s2 = _evaluate_scorers(reg, X, y, [SCORERS["r2"], SCORERS["mean_squared_error"]]) y_pred = reg.predict(X) @@ -363,7 +363,7 @@ def test_evaluate_scorers_ranking_by_regression(): reg = DecisionTreeRegressor() reg.fit(X, y) - s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["roc_auc"], + s1, s2 = _evaluate_scorers(reg, X, y, [SCORERS["roc_auc"], SCORERS["average_precision"]]) y_pred = reg.predict(X) @@ -375,7 +375,7 @@ def test_evaluate_scorers_ranking_by_regression(): reg.fit(X, y) - s1, s2 = evaluate_scorers(reg, X, y, [SCORERS["r2"], + s1, s2 = _evaluate_scorers(reg, X, y, [SCORERS["r2"], dcg_scorer]) y_pred = reg.predict(X) @@ -386,7 +386,7 @@ def test_evaluate_scorers_ranking_by_regression(): def test_evaluate_scorers_exceptions(): clf = LinearSVC() # log_loss needs probabilities but LinearSVC does not have predict_proba. - assert_raises(ValueError, evaluate_scorers, clf, [], [], + assert_raises(ValueError, _evaluate_scorers, clf, [], [], [SCORERS["log_loss"]]) From a756083e7767eff1862559d13ffb075a7807c3e5 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 14:11:44 +0900 Subject: [PATCH 22/51] Remove _score utility function. --- sklearn/cross_validation.py | 16 ++-------------- sklearn/feature_selection/rfe.py | 4 ++-- sklearn/learning_curve.py | 10 +++++++--- sklearn/metrics/scorer.py | 14 +++++++++++--- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index df042e2b6a5ac..01251a072e8ea 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1184,9 +1184,9 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) - test_score = _score(estimator, X_test, y_test, scorer) + test_score = scorer(estimator, X_test, y_test) if return_train_score: - train_score = _score(estimator, X_train, y_train, scorer) + train_score = scorer(estimator, X_train, y_train) scoring_time = time.time() - start_time @@ -1235,18 +1235,6 @@ def _safe_split(estimator, X, y, indices, train_indices=None): return X_subset, y_subset -def _score(estimator, X_test, y_test, scorer): - """Compute the score of an estimator on a given test set.""" - if y_test is None: - score = scorer(estimator, X_test) - else: - score = scorer(estimator, X_test, y_test) - if not isinstance(score, numbers.Number): - raise ValueError("scoring must return a number, got %s (%s) instead." - % (str(score), type(score))) - return score - - def _permutation_test_score(estimator, X, y, cv, scorer): """Auxiliary function for permutation_test_score""" avg_score = [] diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index 01c99ceb526f4..05f376250c024 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import is_classifier from ..cross_validation import _check_cv as check_cv -from ..cross_validation import _safe_split, _score +from ..cross_validation import _safe_split from .base import SelectorMixin from ..metrics.scorer import check_scoring @@ -342,7 +342,7 @@ def fit(self, X, y): mask = np.where(ranking_ <= k + 1)[0] estimator = clone(self.estimator) estimator.fit(X_train[:, mask], y_train) - score = _score(estimator, X_test[:, mask], y_test, scorer) + score = scorer(estimator, X_test[:, mask], y_test) if self.verbose > 0: print("Finished fold with %d / %d feature ranks, score=%f" diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index 7989bc3534658..a650a11f75af3 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -10,7 +10,7 @@ from .cross_validation import _check_cv from .utils import check_arrays from .externals.joblib import Parallel, delayed -from .cross_validation import _safe_split, _score, _fit_and_score +from .cross_validation import _safe_split, _fit_and_score from .metrics.scorer import check_scoring @@ -204,18 +204,22 @@ def _incremental_fit_estimator(estimator, X, y, classes, train, test, """Train estimator on training subsets incrementally and compute scores.""" train_scores, test_scores = [], [] partitions = zip(train_sizes, np.split(train, train_sizes)[:-1]) + for n_train_samples, partial_train in partitions: train_subset = train[:n_train_samples] X_train, y_train = _safe_split(estimator, X, y, train_subset) X_partial_train, y_partial_train = _safe_split(estimator, X, y, partial_train) X_test, y_test = _safe_split(estimator, X, y, test, train_subset) + if y_partial_train is None: estimator.partial_fit(X_partial_train, classes=classes) else: estimator.partial_fit(X_partial_train, y_partial_train, classes=classes) - train_scores.append(_score(estimator, X_train, y_train, scorer)) - test_scores.append(_score(estimator, X_test, y_test, scorer)) + + train_scores.append(scorer(estimator, X_train, y_train)) + test_scores.append(scorer(estimator, X_test, y_test)) + return np.array((train_scores, test_scores)).T diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 18d629b04908a..beae0ee27c56d 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -20,6 +20,7 @@ from abc import ABCMeta, abstractmethod from warnings import warn +import numbers import numpy as np @@ -127,6 +128,10 @@ def _evaluate_scorers(estimator, X, y, scorers): else: score = scorer.score_func(y, y_pred, **scorer.kwargs) + if not isinstance(score, numbers.Number): + raise ValueError("scoring must return a number, got %s (%s)" + " instead." % (str(score), type(score))) + sign = 1 if scorer.greater_is_better else -1 scores.append(sign * score) @@ -146,9 +151,12 @@ def get_scorer(scoring): return scorer -def _passthrough_scorer(estimator, *args, **kwargs): +def _default_scorer(estimator, X, y, *args, **kwargs): """Function that wraps estimator.score""" - return estimator.score(*args, **kwargs) + if y is None: + return estimator.score(X, *args, **kwargs) + else: + return estimator.score(X, y, *args, **kwargs) def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None, @@ -204,7 +212,7 @@ def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None, scorer = get_scorer(scoring) return scorer elif hasattr(estimator, 'score'): - return _passthrough_scorer + return _default_scorer elif not has_scoring: if allow_none: return None From b4255d8699ce8792ff04ee8075081f5c6086cefd Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 17:11:38 +0900 Subject: [PATCH 23/51] Support for multiple scorers in cross_val_score. --- sklearn/cross_validation.py | 77 ++++++++++++++++++-------- sklearn/grid_search.py | 9 ++- sklearn/learning_curve.py | 12 +++- sklearn/tests/test_cross_validation.py | 18 +++++- 4 files changed, 86 insertions(+), 30 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 01251a072e8ea..6e0fe40d330d2 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -27,7 +27,8 @@ from .utils.fixes import unique from .externals.joblib import Parallel, delayed, logger from .externals.six import with_metaclass -from .metrics.scorer import check_scoring +from .metrics.scorer import check_scoring, _evaluate_scorers + __all__ = ['Bootstrap', 'KFold', @@ -1087,20 +1088,38 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - scorer = check_scoring(estimator, score_func=score_func, scoring=scoring) + + if isinstance(scoring, list): + scorers = [check_scoring(estimator, scoring=s) for s in scoring] + ret_1d = False + else: + scorers = [check_scoring(estimator, score_func=score_func, + scoring=scoring)] + ret_1d = True + # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) - scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer, - train, test, verbose, None, - fit_params) + + # ret is a list of size n_folds. Each element of the list contains the tuple + # returned by _fit_and_score. + ret = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, + train, test, verbose, None, + fit_params) for train, test in cv) - return np.array(scores)[:, 0] + + # Retrieve n_scorers x n_folds 2d-array. + scores = np.array([r[0] for r in ret]).T + + if ret_1d: + return scores[0] + else: + return scores -def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, - fit_params, return_train_score=False, +def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, + fit_params, return_train_scores=False, return_parameters=False): """Fit estimator and compute scores for a given dataset split. @@ -1116,9 +1135,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, The target variable to try to predict in the case of supervised learning. - scoring : callable - A scorer callable object / function with signature - ``scorer(estimator, X, y)``. + scorers : list + A list of scorer objects train : array-like, shape = (n_train_samples,) Indices of training samples. @@ -1135,19 +1153,19 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params : dict or None Parameters that will be passed to ``estimator.fit``. - return_train_score : boolean, optional, default: False - Compute and return score on training set. + return_train_scores : boolean, optional, default: False + Compute and return scores on training set. return_parameters : boolean, optional, default: False Return parameters that has been used for the estimator. Returns ------- - test_score : float - Score on test set. + test_score : array of floats + Scores on test set. - train_score : float, optional - Score on training set. + train_score : array of floats, optional + Scores on training set. n_test_samples : int Number of test samples. @@ -1180,24 +1198,37 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) + if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) - test_score = scorer(estimator, X_test, y_test) - if return_train_score: - train_score = scorer(estimator, X_train, y_train) + + if len(scorers) == 1: + # We cannot use _evaluate_scorers here because the scorer might be + # estimator.score. + test_scores = np.array([scorers[0](estimator, X_test, y_test)]) + else: + test_scores = _evaluate_scorers(estimator, X_test, y_test, scorers) + + + if return_train_scores: + if len(scorers) == 1: + train_scores = np.array([scorers[0](estimator, X_train, y_train)]) + else: + train_scores = _evaluate_scorers(estimator, X_train, y_train, + scorers) scoring_time = time.time() - start_time if verbose > 2: - msg += ", score=%f" % test_score + msg += ", score=%s" % test_scores if verbose > 1: end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) - ret = [train_score] if return_train_score else [] - ret.extend([test_score, _num_samples(X_test), scoring_time]) + ret = [train_scores] if return_train_scores else [] + ret.extend([test_scores, _num_samples(X_test), scoring_time]) if return_parameters: ret.append(parameters) return ret diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 2dff365c568d2..e54116fdaa476 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -228,10 +228,10 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, n_samples_test : int Number of test samples in this split. """ - score, n_samples_test, _ = _fit_and_score(estimator, X, y, scorer, train, + scores, n_samples_test, _ = _fit_and_score(estimator, X, y, [scorer], train, test, verbose, parameters, fit_params) - return score, parameters, n_samples_test + return scores[0], parameters, n_samples_test def _check_param_grid(param_grid): @@ -374,7 +374,7 @@ def _fit(self, X, y, parameter_iterable): n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( - clone(base_estimator), X, y, self.scorer_, train, test, + clone(base_estimator), X, y, [self.scorer_], train, test, self.verbose, parameters, self.fit_params, return_parameters=True) for parameters in parameter_iterable @@ -392,6 +392,9 @@ def _fit(self, X, y, parameter_iterable): all_scores = [] for this_score, this_n_test_samples, _, parameters in \ out[grid_start:grid_start + n_folds]: + # _fit_and_score returns a list even if there is only one + # scorer in the list. + this_score = this_score[0] all_scores.append(this_score) if self.iid: this_score *= this_n_test_samples diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index a650a11f75af3..91ca179ba34a5 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -127,11 +127,17 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 10), clone(estimator), X, y, classes, train, test, train_sizes_abs, scorer, verbose) for train, test in cv) else: + # ret is a list of size n_folds. Each element of the list contains the + # tuple returned by _fit_and_score. out = parallel(delayed(_fit_and_score)( - clone(estimator), X, y, scorer, train[:n_train_samples], test, - verbose, parameters=None, fit_params=None, return_train_score=True) + clone(estimator), X, y, [scorer], train[:n_train_samples], test, + verbose, parameters=None, fit_params=None, return_train_scores=True) for train, test in cv for n_train_samples in train_sizes_abs) - out = np.array(out)[:, :2] + + test_scores = [r[0][0] for r in out] + train_scores = [r[1][0] for r in out] + out = np.array([test_scores, train_scores]).T + n_cv_folds = out.shape[0]/n_unique_ticks out = out.reshape(n_cv_folds, n_unique_ticks, 2) diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index 73d82da7437c3..e2191565e8ba6 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -21,6 +21,7 @@ from sklearn import cross_validation as cval from sklearn.base import BaseEstimator +from sklearn.datasets import make_classification from sklearn.datasets import make_regression from sklearn.datasets import load_digits from sklearn.datasets import load_iris @@ -31,7 +32,7 @@ from sklearn.metrics import make_scorer from sklearn.externals import six -from sklearn.linear_model import Ridge +from sklearn.linear_model import Ridge, Perceptron from sklearn.svm import SVC @@ -460,6 +461,21 @@ def test_cross_val_score_precomputed(): linear_kernel.tolist(), y) +def test_cross_val_score_multiple_scorers(): + X, y = make_classification(n_classes=2) + clf = Perceptron(random_state=0) + + scores = cval.cross_val_score(clf, X, y, cv=3, scoring=["f1", "roc_auc"]) + assert_equal(scores.shape, (2, 3)) + + f1_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="f1") + auc_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="roc_auc") + scores2 = np.array([f1_scores, auc_scores]) + assert_equal(scores2.shape, (2, 3)) + + assert_array_almost_equal(scores, scores2) + + def test_cross_val_score_fit_params(): clf = MockClassifier() n_samples = X.shape[0] From 264013f31ce151806dd71991bdcbb6b64a4dd975 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 18:39:24 +0900 Subject: [PATCH 24/51] Refactoring for allowing mutiple scorers. --- sklearn/grid_search.py | 167 ++++++++++++++++++++++------------------- 1 file changed, 90 insertions(+), 77 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index e54116fdaa476..b16989df73dd8 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -182,6 +182,83 @@ def __len__(self): return self.n_iter +def _fit(estimator, X, y, scorers, parameter_iterable, cv, pre_dispatch, + fit_params, iid, n_jobs, verbose): + """Actual fitting, performing the search over parameters.""" + + n_samples = _num_samples(X) + X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr') + + if y is not None: + if len(y) != n_samples: + raise ValueError('Target variable (y) has a different number ' + 'of samples (%i) than data (X: %i samples)' + % (len(y), n_samples)) + y = np.asarray(y) + cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) + + if verbose > 0: + if isinstance(parameter_iterable, Sized): + n_candidates = len(parameter_iterable) + print("Fitting {0} folds for each of {1} candidates, totalling" + " {2} fits".format(len(cv), n_candidates, + n_candidates * len(cv))) + + base_estimator = clone(estimator) + + out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( + delayed(_fit_and_score)( + clone(base_estimator), X, y, scorers, train, test, + verbose, parameters, fit_params, + return_parameters=True) + for parameters in parameter_iterable + for train, test in cv) + + # Out is a list of triplet: score, estimator, n_test_samples + n_fits = len(out) + n_folds = len(cv) + n_scorers = len(scorers) + + grid_scores = [] + for i in xrange(n_scorers): + grid_scores.append([]) + + for grid_start in range(0, n_fits, n_folds): + n_test_samples = 0 + scores = np.zeros(n_scorers) + all_scores = np.zeros((n_scorers, n_folds)) + + for j, (curr_scores, curr_n_test_samples, _, parameters) in \ + enumerate(out[grid_start:grid_start + n_folds]): + + all_scores[:, j] = curr_scores + + if iid: + curr_scores *= curr_n_test_samples + n_test_samples += curr_n_test_samples + + scores += curr_scores + + if iid: + scores /= float(n_test_samples) + else: + scores /= float(n_folds) + + for i in xrange(n_scorers): + # TODO: shall we also store the test_fold_sizes? + tup = _CVScoreTuple(parameters, scores[i], all_scores[i]) + grid_scores[i].append(tup) + + # Find the best parameters by comparing on the mean validation score: + # note that `sorted` is deterministic in the way it breaks ties + bests = [sorted(grid_scores[i], key=lambda x: x.mean_validation_score, + reverse=True)[0] for i in xrange(n_scorers)] + best_params = [best.parameters for best in bests] + best_scores = [best.mean_validation_score for best in bests] + + return grid_scores, best_params, best_scores + + def fit_grid_point(X, y, estimator, parameters, train, test, scorer, verbose, **fit_params): """Run fit on one set of parameters. @@ -340,96 +417,32 @@ def transform(self): return self.best_estimator_.transform def _fit(self, X, y, parameter_iterable): - """Actual fitting, performing the search over parameters.""" - - estimator = self.estimator - cv = self.cv self.scorer_ = check_scoring(self.estimator, scoring=self.scoring, loss_func=self.loss_func, score_func=self.score_func) - n_samples = _num_samples(X) - X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr') - - if y is not None: - if len(y) != n_samples: - raise ValueError('Target variable (y) has a different number ' - 'of samples (%i) than data (X: %i samples)' - % (len(y), n_samples)) - y = np.asarray(y) - cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) - - if self.verbose > 0: - if isinstance(parameter_iterable, Sized): - n_candidates = len(parameter_iterable) - print("Fitting {0} folds for each of {1} candidates, totalling" - " {2} fits".format(len(cv), n_candidates, - n_candidates * len(cv))) - - base_estimator = clone(self.estimator) - - pre_dispatch = self.pre_dispatch - - out = Parallel( - n_jobs=self.n_jobs, verbose=self.verbose, - pre_dispatch=pre_dispatch)( - delayed(_fit_and_score)( - clone(base_estimator), X, y, [self.scorer_], train, test, - self.verbose, parameters, self.fit_params, - return_parameters=True) - for parameters in parameter_iterable - for train, test in cv) - - # Out is a list of triplet: score, estimator, n_test_samples - n_fits = len(out) - n_folds = len(cv) - - scores = list() - grid_scores = list() - for grid_start in range(0, n_fits, n_folds): - n_test_samples = 0 - score = 0 - all_scores = [] - for this_score, this_n_test_samples, _, parameters in \ - out[grid_start:grid_start + n_folds]: - # _fit_and_score returns a list even if there is only one - # scorer in the list. - this_score = this_score[0] - all_scores.append(this_score) - if self.iid: - this_score *= this_n_test_samples - n_test_samples += this_n_test_samples - score += this_score - if self.iid: - score /= float(n_test_samples) - else: - score /= float(n_folds) - scores.append((score, parameters)) - # TODO: shall we also store the test_fold_sizes? - grid_scores.append(_CVScoreTuple( - parameters, - score, - np.array(all_scores))) - # Store the computed scores - self.grid_scores_ = grid_scores - - # Find the best parameters by comparing on the mean validation score: - # note that `sorted` is deterministic in the way it breaks ties - best = sorted(grid_scores, key=lambda x: x.mean_validation_score, - reverse=True)[0] - self.best_params_ = best.parameters - self.best_score_ = best.mean_validation_score + grid_scores, best_params, best_scores = _fit(self.estimator, X, y, + [self.scorer_], + parameter_iterable, + self.cv, self.pre_dispatch, + self.fit_params, self.iid, + self.n_jobs, self.verbose) + + self.grid_scores_ = grid_scores[0] + self.best_params_ = best_params[0] + self.best_score_ = best_scores[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators - best_estimator = clone(base_estimator).set_params( - **best.parameters) + base_estimator = clone(self.estimator) + best_estimator = base_estimator.set_params(**self.best_params_) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator + return self From 0feed968ef4aabd14921877895f9642868e180ee Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 18:39:52 +0900 Subject: [PATCH 25/51] Define `parameters` upfront. We are not supposed to use `parameters` outside of the loop. And this makes the code very difficult to read. --- sklearn/grid_search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index b16989df73dd8..0d8a32dc99f96 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -228,7 +228,10 @@ def _fit(estimator, X, y, scorers, parameter_iterable, cv, pre_dispatch, scores = np.zeros(n_scorers) all_scores = np.zeros((n_scorers, n_folds)) - for j, (curr_scores, curr_n_test_samples, _, parameters) in \ + # Parameters for this part of the grid. + parameters = out[grid_start][3] + + for j, (curr_scores, curr_n_test_samples, _, _) in \ enumerate(out[grid_start:grid_start + n_folds]): all_scores[:, j] = curr_scores From 0a667489918c0267e7438a4456562ccc1fb3e6e3 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 18:42:54 +0900 Subject: [PATCH 26/51] Use more informative name. --- sklearn/grid_search.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 0d8a32dc99f96..25c469604db44 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -182,8 +182,8 @@ def __len__(self): return self.n_iter -def _fit(estimator, X, y, scorers, parameter_iterable, cv, pre_dispatch, - fit_params, iid, n_jobs, verbose): +def _fit_parameter_iterable(estimator, X, y, scorers, parameter_iterable, cv, + pre_dispatch, fit_params, iid, n_jobs, verbose): """Actual fitting, performing the search over parameters.""" n_samples = _num_samples(X) @@ -424,12 +424,11 @@ def _fit(self, X, y, parameter_iterable): loss_func=self.loss_func, score_func=self.score_func) - grid_scores, best_params, best_scores = _fit(self.estimator, X, y, - [self.scorer_], - parameter_iterable, - self.cv, self.pre_dispatch, - self.fit_params, self.iid, - self.n_jobs, self.verbose) + grid_scores, best_params, best_scores = \ + _fit_parameter_iterable(self.estimator, X, y, [self.scorer_], + parameter_iterable, self.cv, + self.pre_dispatch, self.fit_params, + self.iid, self.n_jobs, self.verbose) self.grid_scores_ = grid_scores[0] self.best_params_ = best_params[0] From 6f68bfb9ea1e9f08e74a0af661d0c6eed580501e Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 19:02:11 +0900 Subject: [PATCH 27/51] Put __repr__ back. --- sklearn/metrics/scorer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index beae0ee27c56d..7f51320b62656 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -46,6 +46,14 @@ def __init__(self, score_func, greater_is_better=True, needs_proba=False, def __call__(self, estimator, X, y): return _evaluate_scorers(estimator, X, y, [self])[0] + def __repr__(self): + kwargs_string = "".join([", %s=%s" % (str(k), str(v)) + for k, v in self.kwargs.items()]) + return ("make_scorer(%s%s%s)" + % (self.score_func.__name__, + "" if self.greater_is_better else ", greater_is_better=False", + kwargs_string)) + def _evaluate_scorers(estimator, X, y, scorers): has_pb = hasattr(estimator, "predict_proba") From 114bec6c97cb0aa0bf45640854ff9c080adedd14 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 22:39:51 +0900 Subject: [PATCH 28/51] Deprecate fit_grid_point. --- sklearn/grid_search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 25c469604db44..e5c6dd6c0f7c6 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -30,8 +30,8 @@ from .metrics.scorer import check_scoring -__all__ = ['GridSearchCV', 'ParameterGrid', 'fit_grid_point', - 'ParameterSampler', 'RandomizedSearchCV'] +__all__ = ['GridSearchCV', 'ParameterGrid', 'ParameterSampler', + 'RandomizedSearchCV'] class ParameterGrid(object): @@ -308,6 +308,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, n_samples_test : int Number of test samples in this split. """ + warnings.warn("fit_grid_point is deprecated and will be " + "removed in 0.17", DeprecationWarning, stacklevel=1) + scores, n_samples_test, _ = _fit_and_score(estimator, X, y, [scorer], train, test, verbose, parameters, fit_params) From aff769d2738bf3fcceb6189d6e41b9c3f00ffb12 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Thu, 16 Jan 2014 23:46:40 +0900 Subject: [PATCH 29/51] Add grid_search_cv. --- sklearn/grid_search.py | 108 ++++++++++++++++++++++++++++++ sklearn/tests/test_grid_search.py | 21 +++++- 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index e5c6dd6c0f7c6..66bba0df4caf3 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -316,6 +316,114 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, fit_params) return scores[0], parameters, n_samples_test +def grid_search_cv(estimator, param_grid, X, y=None, scoring=None, + fit_params=None, n_jobs=1, iid=True, + refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs'): + """Exhaustive search over specified parameter values for an estimator. + + Parameters + ---------- + estimator : object type that implements the "fit" and "predict" methods + A object of that type is instantiated for each grid point. + + param_grid : dict or list of dictionaries + Dictionary with parameters names (string) as keys and lists of + parameter settings to try as values, or a list of such + dictionaries, in which case the grids spanned by each dictionary + in the list are explored. This enables searching over any sequence + of parameter settings. + + X : array-like, shape = [n_samples, n_features] + Training vector, where n_samples is the number of samples and + n_features is the number of features. + + y : array-like, shape = [n_samples] or [n_samples, n_output], optional + Target relative to X for classification or regression; + None for unsupervised learning. + + scoring : string, callable or None, optional, default: None + A string (see model evaluation documentation) or + a scorer callable object / function with signature + ``scorer(estimator, X, y)``. + + fit_params : dict, optional + Parameters to pass to the fit method. + + n_jobs : int, optional + Number of jobs to run in parallel (default 1). + + iid : boolean, optional + If True, the data is assumed to be identically distributed across + the folds, and the loss minimized is the total loss per sample, + and not the mean loss across the folds. + + refit : boolean + Refit the best estimator with the entire dataset. + If "False", it is impossible to make predictions using + this GridSearchCV instance after fitting. + + cv : integer or cross-validation generator, optional + If an integer is passed, it is the number of folds (default 3). + Specific cross-validation objects can be passed, see + sklearn.cross_validation module for the list of possible objects + + verbose : integer + Controls the verbosity: the higher, the more messages. + + pre_dispatch : int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + """ + fit_params = fit_params if fit_params is not None else {} + if isinstance(scoring, list): + scorers = [check_scoring(estimator, scoring=s) for s in scoring] + ret_1d = False + else: + scorers = [check_scoring(estimator, scoring=scoring)] + ret_1d = True + + grid_scores, best_params, best_scores = \ + _fit_parameter_iterable(estimator, X, y, scorers, + ParameterGrid(param_grid), cv, pre_dispatch, + fit_params, iid, n_jobs, verbose) + best_estimators = [] + if refit: + for i in xrange(len(scorers)): + base_estimator = clone(estimator) + best_estimator = base_estimator.set_params(**best_params[i]) + best_estimators.append(best_estimator) + if y is not None: + best_estimator.fit(X, y, **fit_params) + else: + best_estimator.fit(X, **fit_params) + + if ret_1d: + grid_scores = grid_scores[0] + best_params = best_params[0] + best_scores = best_scores[0] + if refit: + best_estimators = best_estimators[0] + + ret = [best_params, best_scores, grid_scores] + + if refit: + ret.append(best_estimators) + + return ret + def _check_param_grid(param_grid): if hasattr(param_grid, 'items'): diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index 6d1a0d4f2ccc6..da80b7f8e26d9 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -28,8 +28,9 @@ from sklearn.datasets import make_classification from sklearn.datasets import make_blobs from sklearn.datasets import make_multilabel_classification -from sklearn.grid_search import (GridSearchCV, RandomizedSearchCV, - ParameterGrid, ParameterSampler) +from sklearn.grid_search import (grid_search_cv, GridSearchCV, + RandomizedSearchCV, ParameterGrid, + ParameterSampler) from sklearn.svm import LinearSVC, SVC from sklearn.tree import DecisionTreeRegressor from sklearn.tree import DecisionTreeClassifier @@ -644,3 +645,19 @@ def test_grid_search_with_multioutput_data(): correct_score = est.score(X[test], y[test]) assert_almost_equal(correct_score, cv_validation_scores[i]) + +def test_multiple_grid_search(): + clf = LinearSVC(random_state=0) + X, y = make_blobs(random_state=0, centers=2) + param_grid = {"C": [0.1, 1, 10]} + + ret = grid_search_cv(clf, param_grid, X, y, scoring=["f1", "roc_auc"]) + ret_f1 = grid_search_cv(clf, param_grid, X, y, scoring="f1") + ret_auc = grid_search_cv(clf, param_grid, X, y, scoring="roc_auc") + + for i in xrange(len(ret)): + assert_equal(len(ret[i]), 2) + + for i in (0, 1): + assert_equal(ret[i][0], ret_f1[i]) + assert_equal(ret[i][1], ret_auc[i]) From 55f41266f493905399438e8ca0e2736728e20e5e Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 17 Jan 2014 00:47:30 +0900 Subject: [PATCH 30/51] Refactor code. --- sklearn/grid_search.py | 104 ++++++++++++++++++-------------------- sklearn/metrics/scorer.py | 3 ++ 2 files changed, 51 insertions(+), 56 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 66bba0df4caf3..09c64574daff6 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -182,10 +182,19 @@ def __len__(self): return self.n_iter -def _fit_parameter_iterable(estimator, X, y, scorers, parameter_iterable, cv, - pre_dispatch, fit_params, iid, n_jobs, verbose): +def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, + cv, pre_dispatch, fit_params, iid, n_jobs, verbose): """Actual fitting, performing the search over parameters.""" + fit_params = fit_params if fit_params is not None else {} + + if isinstance(scoring, list): + scorers = [check_scoring(estimator, scoring=s) for s in scoring] + ret_1d = False + else: + scorers = [check_scoring(estimator, scoring=scoring)] + ret_1d = True + n_samples = _num_samples(X) X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr') @@ -259,7 +268,30 @@ def _fit_parameter_iterable(estimator, X, y, scorers, parameter_iterable, cv, best_params = [best.parameters for best in bests] best_scores = [best.mean_validation_score for best in bests] - return grid_scores, best_params, best_scores + best_estimators = [] + if refit: + for i in xrange(len(scorers)): + base_estimator = clone(estimator) + best_estimator = base_estimator.set_params(**best_params[i]) + best_estimators.append(best_estimator) + if y is not None: + best_estimator.fit(X, y, **fit_params) + else: + best_estimator.fit(X, **fit_params) + + if ret_1d: + grid_scores = grid_scores[0] + best_params = best_params[0] + best_scores = best_scores[0] + if refit: + best_estimators = best_estimators[0] + + ret = [best_params, best_scores, grid_scores] + + if refit: + ret.append(best_estimators) + + return ret def fit_grid_point(X, y, estimator, parameters, train, test, scorer, @@ -387,42 +419,9 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None, - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' """ - fit_params = fit_params if fit_params is not None else {} - if isinstance(scoring, list): - scorers = [check_scoring(estimator, scoring=s) for s in scoring] - ret_1d = False - else: - scorers = [check_scoring(estimator, scoring=scoring)] - ret_1d = True - - grid_scores, best_params, best_scores = \ - _fit_parameter_iterable(estimator, X, y, scorers, - ParameterGrid(param_grid), cv, pre_dispatch, - fit_params, iid, n_jobs, verbose) - best_estimators = [] - if refit: - for i in xrange(len(scorers)): - base_estimator = clone(estimator) - best_estimator = base_estimator.set_params(**best_params[i]) - best_estimators.append(best_estimator) - if y is not None: - best_estimator.fit(X, y, **fit_params) - else: - best_estimator.fit(X, **fit_params) - - if ret_1d: - grid_scores = grid_scores[0] - best_params = best_params[0] - best_scores = best_scores[0] - if refit: - best_estimators = best_estimators[0] - - ret = [best_params, best_scores, grid_scores] - - if refit: - ret.append(best_estimators) - - return ret + param_grid = ParameterGrid(param_grid) + return _fit_param_iter(estimator, X, y, scoring, param_grid, refit, cv, + pre_dispatch, fit_params, iid, n_jobs, verbose) def _check_param_grid(param_grid): @@ -535,26 +534,18 @@ def _fit(self, X, y, parameter_iterable): loss_func=self.loss_func, score_func=self.score_func) - grid_scores, best_params, best_scores = \ - _fit_parameter_iterable(self.estimator, X, y, [self.scorer_], - parameter_iterable, self.cv, - self.pre_dispatch, self.fit_params, - self.iid, self.n_jobs, self.verbose) + ret = _fit_param_iter(self.estimator, X, y, self.scorer_, + parameter_iterable, self.refit, self.cv, + self.pre_dispatch, self.fit_params, self.iid, + self.n_jobs, self.verbose) - self.grid_scores_ = grid_scores[0] - self.best_params_ = best_params[0] - self.best_score_ = best_scores[0] + + self.best_params_ = ret[0] + self.best_score_ = ret[1] + self.grid_scores_ = ret[2] if self.refit: - # fit the best estimator using the entire dataset - # clone first to work around broken estimators - base_estimator = clone(self.estimator) - best_estimator = base_estimator.set_params(**self.best_params_) - if y is not None: - best_estimator.fit(X, y, **self.fit_params) - else: - best_estimator.fit(X, **self.fit_params) - self.best_estimator_ = best_estimator + self.best_estimator_ = ret[3] return self @@ -728,6 +719,7 @@ def fit(self, X, y=None, **params): warnings.warn("Additional parameters to GridSearchCV are ignored!" " The params argument will be removed in 0.15.", DeprecationWarning) + return self._fit(X, y, ParameterGrid(self.param_grid)) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 7f51320b62656..7b21ddcb75fdb 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -193,6 +193,9 @@ def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None, A scorer callable object / function with signature ``scorer(estimator, X, y)``. """ + if isinstance(scoring, _Scorer): + return scoring + has_scoring = not (scoring is None and loss_func is None and score_func is None) if not hasattr(estimator, 'fit'): From 4bd6c9129cec932800c2f48959cbcf369a6c75ed Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 17 Jan 2014 00:57:16 +0900 Subject: [PATCH 31/51] Add randomized_search_cv. --- sklearn/grid_search.py | 82 +++++++++++++++++++++++++++++++ sklearn/tests/test_grid_search.py | 36 ++++++++------ 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 09c64574daff6..e876de8d86799 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -424,6 +424,88 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None, pre_dispatch, fit_params, iid, n_jobs, verbose) +def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10, + scoring=None, fit_params=None, n_jobs=1, iid=True, + refit=True, cv=None, verbose=0, + pre_dispatch='2*n_jobs', random_state=None): + """Randomized search on hyper parameters. + + Parameters + ---------- + estimator : object type that implements the "fit" and "predict" methods + A object of that type is instantiated for each parameter setting. + + param_distributions : dict + Dictionary with parameters names (string) as keys and distributions + or lists of parameters to try. Distributions must provide a ``rvs`` + method for sampling (such as those from scipy.stats.distributions). + If a list is given, it is sampled uniformly. + + X : array-like, shape = [n_samples, n_features] + Training vector, where n_samples is the number of samples and + n_features is the number of features. + + y : array-like, shape = [n_samples] or [n_samples, n_output], optional + Target relative to X for classification or regression; + None for unsupervised learning. + + n_iter : int, default=10 + Number of parameter settings that are sampled. n_iter trades + off runtime vs quality of the solution. + + scoring : string, callable or None, optional, default: None + A string (see model evaluation documentation) or + a scorer callable object / function with signature + ``scorer(estimator, X, y)``. + + fit_params : dict, optional + Parameters to pass to the fit method. + + n_jobs : int, optional + Number of jobs to run in parallel (default 1). + + pre_dispatch : int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + + iid : boolean, optional + If True, the data is assumed to be identically distributed across + the folds, and the loss minimized is the total loss per sample, + and not the mean loss across the folds. + + cv : integer or cross-validation generator, optional + If an integer is passed, it is the number of folds (default 3). + Specific cross-validation objects can be passed, see + sklearn.cross_validation module for the list of possible objects + + refit : boolean + Refit the best estimator with the entire dataset. + If "False", it is impossible to make predictions using + this RandomizedSearchCV instance after fitting. + + verbose : integer + Controls the verbosity: the higher, the more messages. + """ + sampled_params = ParameterSampler(param_distributions, + n_iter, + random_state=random_state) + return _fit_param_iter(estimator, X, y, scoring, sampled_params, refit, cv, + pre_dispatch, fit_params, iid, n_jobs, verbose) + + def _check_param_grid(param_grid): if hasattr(param_grid, 'items'): param_grid = [param_grid] diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index da80b7f8e26d9..6d57e89855a8e 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -29,8 +29,8 @@ from sklearn.datasets import make_blobs from sklearn.datasets import make_multilabel_classification from sklearn.grid_search import (grid_search_cv, GridSearchCV, - RandomizedSearchCV, ParameterGrid, - ParameterSampler) + randomized_search_cv, RandomizedSearchCV, + ParameterGrid, ParameterSampler) from sklearn.svm import LinearSVC, SVC from sklearn.tree import DecisionTreeRegressor from sklearn.tree import DecisionTreeClassifier @@ -646,18 +646,26 @@ def test_grid_search_with_multioutput_data(): assert_almost_equal(correct_score, cv_validation_scores[i]) -def test_multiple_grid_search(): - clf = LinearSVC(random_state=0) - X, y = make_blobs(random_state=0, centers=2) - param_grid = {"C": [0.1, 1, 10]} - ret = grid_search_cv(clf, param_grid, X, y, scoring=["f1", "roc_auc"]) - ret_f1 = grid_search_cv(clf, param_grid, X, y, scoring="f1") - ret_auc = grid_search_cv(clf, param_grid, X, y, scoring="roc_auc") +def test_multiple_grid_search_cv(): + for n, func in enumerate((grid_search_cv, randomized_search_cv)): + clf = LinearSVC(random_state=0) + X, y = make_blobs(random_state=0, centers=2) + param_grid = {"C": [0.1, 1, 10]} + + if n == 0: + kwargs = dict() + else: + kwargs = dict(random_state=0) + + ret = func(clf, param_grid, X, y, scoring=["f1", "roc_auc"], **kwargs) + ret_f1 = func(clf, param_grid, X, y, scoring="f1", **kwargs) + ret_auc = func(clf, param_grid, X, y, scoring="roc_auc", **kwargs) + + for i in xrange(len(ret)): - for i in xrange(len(ret)): - assert_equal(len(ret[i]), 2) + assert_equal(len(ret[i]), 2) - for i in (0, 1): - assert_equal(ret[i][0], ret_f1[i]) - assert_equal(ret[i][1], ret_auc[i]) + for i in (0, 1): + assert_equal(ret[i][0], ret_f1[i]) + assert_equal(ret[i][1], ret_auc[i]) From b02a7e80a3de71067849e4782eb461b1fe8a170f Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 17 Jan 2014 00:59:46 +0900 Subject: [PATCH 32/51] Remove multi-output multiclass support from scorers for now. As per discussion with @arjoly and @jnothman. --- sklearn/metrics/tests/test_score_objects.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 83fce651bc693..2d2a39176a0ea 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -236,27 +236,6 @@ def test_thresholded_scorers_multilabel_indicator_data(): random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) - # Multi-output multi-class predict_proba - #clf = DecisionTreeClassifier() - #clf.fit(X_train, y_train) - #y_proba = clf.predict_proba(X_test) - #score1 = SCORERS['roc_auc'](clf, X_test, y_test) - #score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T) - #assert_almost_equal(score1, score2) - - # Multi-output multi-class decision_function - # TODO Is there any yet? - #clf = DecisionTreeClassifier() - #clf.fit(X_train, y_train) - #clf._predict_proba = clf.predict_proba - #clf.predict_proba = None - #clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] - - #y_proba = clf.decision_function(X_test) - #score1 = SCORERS['roc_auc'](clf, X_test, y_test) - #score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T) - #assert_almost_equal(score1, score2) - # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) clf.fit(X_train, y_train) From 47dd41c625ad316e38a205fb0615d3aefb57b317 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 17 Jan 2014 01:11:30 +0900 Subject: [PATCH 33/51] Update docstrings. --- sklearn/cross_validation.py | 7 +++-- sklearn/grid_search.py | 58 +++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 6e0fe40d330d2..19daf190fa093 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1044,10 +1044,12 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, The target variable to try to predict in the case of supervised learning. - scoring : string, callable or None, optional, default: None + scoring : string, callable, list of strings/callables or None, optional, + default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. + Lists can be used for randomized search of multiple metrics. cv : cross-validation generator, optional, default: None A cross-validation generator. If None, a 3-fold cross @@ -1083,8 +1085,9 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, Returns ------- - scores : array of float, shape=(len(list(cv)),) + scores : array of float, shape=(n_folds,) or (n_scoring, n_folds) Array of scores of the estimator for each run of the cross validation. + The returned array is 2d is `scoring` is a list. """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index e876de8d86799..2f58c20ce9c55 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -373,10 +373,12 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None, Target relative to X for classification or regression; None for unsupervised learning. - scoring : string, callable or None, optional, default: None + scoring : string, callable, list of strings/callables or None, optional, + default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. + Lists can be used for randomized search of multiple metrics. fit_params : dict, optional Parameters to pass to the fit method. @@ -418,6 +420,31 @@ def grid_search_cv(estimator, param_grid, X, y=None, scoring=None, - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' + + Returns + ------- + `best_params` : dict or list of dicts + Parameter setting that gave the best results on the hold out data. + + `best_score` : float or list of floats + Score of best_estimator on the left out data. + + `grid_scores` : list of named tuples or list of lists of named tuples + Contains scores for all parameter combinations in param_grid. + Each entry corresponds to one parameter setting. + Each named tuple has the attributes: + + * ``parameters``, a dict of parameter settings + * ``mean_validation_score``, the mean score over the + cross-validation folds + * ``cv_validation_scores``, the list of scores for each fold + + `best_estimator` : estimator or list of estimators (only if refit=True) + Estimator that was chosen by the search, i.e. estimator + which gave highest score (or smallest loss if specified) + on the left out data. + + Lists are returned when `scoring` is a list. """ param_grid = ParameterGrid(param_grid) return _fit_param_iter(estimator, X, y, scoring, param_grid, refit, cv, @@ -453,10 +480,12 @@ def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10, Number of parameter settings that are sampled. n_iter trades off runtime vs quality of the solution. - scoring : string, callable or None, optional, default: None + scoring : string, callable, list of strings/callables or None, optional, + default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. + Lists can be used for randomized search of multiple metrics. fit_params : dict, optional Parameters to pass to the fit method. @@ -498,6 +527,31 @@ def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10, verbose : integer Controls the verbosity: the higher, the more messages. + + Returns + ------- + `best_params` : dict or list of dicts + Parameter setting that gave the best results on the hold out data. + + `best_score` : float or list of floats + Score of best_estimator on the left out data. + + `grid_scores` : list of named tuples or list of lists of named tuples + Contains scores for all parameter combinations in param_grid. + Each entry corresponds to one parameter setting. + Each named tuple has the attributes: + + * ``parameters``, a dict of parameter settings + * ``mean_validation_score``, the mean score over the + cross-validation folds + * ``cv_validation_scores``, the list of scores for each fold + + `best_estimator` : estimator or list of estimators (only if refit=True) + Estimator that was chosen by the search, i.e. estimator + which gave highest score (or smallest loss if specified) + on the left out data. + + Lists are returned when `scoring` is a list. """ sampled_params = ParameterSampler(param_distributions, n_iter, From c4905c3176a07519dc3da2cb4694c65e4c95b71b Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 17 Jan 2014 17:23:18 +0900 Subject: [PATCH 34/51] Support multiple metrics directly in GridSearchCV and RandomizedSearchCV. --- sklearn/grid_search.py | 240 ++---------------------------- sklearn/tests/test_grid_search.py | 44 +++--- 2 files changed, 40 insertions(+), 244 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 2f58c20ce9c55..abe6beae1ab53 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -185,9 +185,6 @@ def __len__(self): def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, cv, pre_dispatch, fit_params, iid, n_jobs, verbose): """Actual fitting, performing the search over parameters.""" - - fit_params = fit_params if fit_params is not None else {} - if isinstance(scoring, list): scorers = [check_scoring(estimator, scoring=s) for s in scoring] ret_1d = False @@ -280,13 +277,14 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, best_estimator.fit(X, **fit_params) if ret_1d: - grid_scores = grid_scores[0] + scorers = scorers[0] best_params = best_params[0] best_scores = best_scores[0] + grid_scores = grid_scores[0] if refit: best_estimators = best_estimators[0] - ret = [best_params, best_scores, grid_scores] + ret = [scorers, best_params, best_scores, grid_scores] if refit: ret.append(best_estimators) @@ -348,217 +346,6 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer, fit_params) return scores[0], parameters, n_samples_test -def grid_search_cv(estimator, param_grid, X, y=None, scoring=None, - fit_params=None, n_jobs=1, iid=True, - refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs'): - """Exhaustive search over specified parameter values for an estimator. - - Parameters - ---------- - estimator : object type that implements the "fit" and "predict" methods - A object of that type is instantiated for each grid point. - - param_grid : dict or list of dictionaries - Dictionary with parameters names (string) as keys and lists of - parameter settings to try as values, or a list of such - dictionaries, in which case the grids spanned by each dictionary - in the list are explored. This enables searching over any sequence - of parameter settings. - - X : array-like, shape = [n_samples, n_features] - Training vector, where n_samples is the number of samples and - n_features is the number of features. - - y : array-like, shape = [n_samples] or [n_samples, n_output], optional - Target relative to X for classification or regression; - None for unsupervised learning. - - scoring : string, callable, list of strings/callables or None, optional, - default: None - A string (see model evaluation documentation) or - a scorer callable object / function with signature - ``scorer(estimator, X, y)``. - Lists can be used for randomized search of multiple metrics. - - fit_params : dict, optional - Parameters to pass to the fit method. - - n_jobs : int, optional - Number of jobs to run in parallel (default 1). - - iid : boolean, optional - If True, the data is assumed to be identically distributed across - the folds, and the loss minimized is the total loss per sample, - and not the mean loss across the folds. - - refit : boolean - Refit the best estimator with the entire dataset. - If "False", it is impossible to make predictions using - this GridSearchCV instance after fitting. - - cv : integer or cross-validation generator, optional - If an integer is passed, it is the number of folds (default 3). - Specific cross-validation objects can be passed, see - sklearn.cross_validation module for the list of possible objects - - verbose : integer - Controls the verbosity: the higher, the more messages. - - pre_dispatch : int, or string, optional - Controls the number of jobs that get dispatched during parallel - execution. Reducing this number can be useful to avoid an - explosion of memory consumption when more jobs get dispatched - than CPUs can process. This parameter can be: - - - None, in which case all the jobs are immediately - created and spawned. Use this for lightweight and - fast-running jobs, to avoid delays due to on-demand - spawning of the jobs - - - An int, giving the exact number of total jobs that are - spawned - - - A string, giving an expression as a function of n_jobs, - as in '2*n_jobs' - - Returns - ------- - `best_params` : dict or list of dicts - Parameter setting that gave the best results on the hold out data. - - `best_score` : float or list of floats - Score of best_estimator on the left out data. - - `grid_scores` : list of named tuples or list of lists of named tuples - Contains scores for all parameter combinations in param_grid. - Each entry corresponds to one parameter setting. - Each named tuple has the attributes: - - * ``parameters``, a dict of parameter settings - * ``mean_validation_score``, the mean score over the - cross-validation folds - * ``cv_validation_scores``, the list of scores for each fold - - `best_estimator` : estimator or list of estimators (only if refit=True) - Estimator that was chosen by the search, i.e. estimator - which gave highest score (or smallest loss if specified) - on the left out data. - - Lists are returned when `scoring` is a list. - """ - param_grid = ParameterGrid(param_grid) - return _fit_param_iter(estimator, X, y, scoring, param_grid, refit, cv, - pre_dispatch, fit_params, iid, n_jobs, verbose) - - -def randomized_search_cv(estimator, param_distributions, X, y, n_iter=10, - scoring=None, fit_params=None, n_jobs=1, iid=True, - refit=True, cv=None, verbose=0, - pre_dispatch='2*n_jobs', random_state=None): - """Randomized search on hyper parameters. - - Parameters - ---------- - estimator : object type that implements the "fit" and "predict" methods - A object of that type is instantiated for each parameter setting. - - param_distributions : dict - Dictionary with parameters names (string) as keys and distributions - or lists of parameters to try. Distributions must provide a ``rvs`` - method for sampling (such as those from scipy.stats.distributions). - If a list is given, it is sampled uniformly. - - X : array-like, shape = [n_samples, n_features] - Training vector, where n_samples is the number of samples and - n_features is the number of features. - - y : array-like, shape = [n_samples] or [n_samples, n_output], optional - Target relative to X for classification or regression; - None for unsupervised learning. - - n_iter : int, default=10 - Number of parameter settings that are sampled. n_iter trades - off runtime vs quality of the solution. - - scoring : string, callable, list of strings/callables or None, optional, - default: None - A string (see model evaluation documentation) or - a scorer callable object / function with signature - ``scorer(estimator, X, y)``. - Lists can be used for randomized search of multiple metrics. - - fit_params : dict, optional - Parameters to pass to the fit method. - - n_jobs : int, optional - Number of jobs to run in parallel (default 1). - - pre_dispatch : int, or string, optional - Controls the number of jobs that get dispatched during parallel - execution. Reducing this number can be useful to avoid an - explosion of memory consumption when more jobs get dispatched - than CPUs can process. This parameter can be: - - - None, in which case all the jobs are immediately - created and spawned. Use this for lightweight and - fast-running jobs, to avoid delays due to on-demand - spawning of the jobs - - - An int, giving the exact number of total jobs that are - spawned - - - A string, giving an expression as a function of n_jobs, - as in '2*n_jobs' - - iid : boolean, optional - If True, the data is assumed to be identically distributed across - the folds, and the loss minimized is the total loss per sample, - and not the mean loss across the folds. - - cv : integer or cross-validation generator, optional - If an integer is passed, it is the number of folds (default 3). - Specific cross-validation objects can be passed, see - sklearn.cross_validation module for the list of possible objects - - refit : boolean - Refit the best estimator with the entire dataset. - If "False", it is impossible to make predictions using - this RandomizedSearchCV instance after fitting. - - verbose : integer - Controls the verbosity: the higher, the more messages. - - Returns - ------- - `best_params` : dict or list of dicts - Parameter setting that gave the best results on the hold out data. - - `best_score` : float or list of floats - Score of best_estimator on the left out data. - - `grid_scores` : list of named tuples or list of lists of named tuples - Contains scores for all parameter combinations in param_grid. - Each entry corresponds to one parameter setting. - Each named tuple has the attributes: - - * ``parameters``, a dict of parameter settings - * ``mean_validation_score``, the mean score over the - cross-validation folds - * ``cv_validation_scores``, the list of scores for each fold - - `best_estimator` : estimator or list of estimators (only if refit=True) - Estimator that was chosen by the search, i.e. estimator - which gave highest score (or smallest loss if specified) - on the left out data. - - Lists are returned when `scoring` is a list. - """ - sampled_params = ParameterSampler(param_distributions, - n_iter, - random_state=random_state) - return _fit_param_iter(estimator, X, y, scoring, sampled_params, refit, cv, - pre_dispatch, fit_params, iid, n_jobs, verbose) - def _check_param_grid(param_grid): if hasattr(param_grid, 'items'): @@ -666,22 +453,25 @@ def transform(self): return self.best_estimator_.transform def _fit(self, X, y, parameter_iterable): - self.scorer_ = check_scoring(self.estimator, scoring=self.scoring, - loss_func=self.loss_func, - score_func=self.score_func) - - ret = _fit_param_iter(self.estimator, X, y, self.scorer_, + ret = _fit_param_iter(self.estimator, X, y, self.scoring, parameter_iterable, self.refit, self.cv, self.pre_dispatch, self.fit_params, self.iid, self.n_jobs, self.verbose) + self.scorer_ = ret[0] + self.best_params_ = ret[1] + self.best_score_ = ret[2] + self.grid_scores_ = ret[3] - self.best_params_ = ret[0] - self.best_score_ = ret[1] - self.grid_scores_ = ret[2] if self.refit: - self.best_estimator_ = ret[3] + if isinstance(ret[4], list): + self.best_estimators_ = ret[4] + # By default, select the best estimator corresponding to the + # first scorer. + self.best_estimator_ = ret[4][0] + else: + self.best_estimator_ = ret[4] return self diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index 6d57e89855a8e..ea188b66ecfe4 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -24,12 +24,11 @@ from scipy.stats import distributions -from sklearn.base import BaseEstimator +from sklearn.base import BaseEstimator, clone from sklearn.datasets import make_classification from sklearn.datasets import make_blobs from sklearn.datasets import make_multilabel_classification -from sklearn.grid_search import (grid_search_cv, GridSearchCV, - randomized_search_cv, RandomizedSearchCV, +from sklearn.grid_search import (GridSearchCV, RandomizedSearchCV, ParameterGrid, ParameterSampler) from sklearn.svm import LinearSVC, SVC from sklearn.tree import DecisionTreeRegressor @@ -648,24 +647,31 @@ def test_grid_search_with_multioutput_data(): def test_multiple_grid_search_cv(): - for n, func in enumerate((grid_search_cv, randomized_search_cv)): - clf = LinearSVC(random_state=0) - X, y = make_blobs(random_state=0, centers=2) - param_grid = {"C": [0.1, 1, 10]} + clf = LinearSVC(random_state=0) + X, y = make_blobs(random_state=0, centers=2) + param_grid = {"C": [0.1, 1, 10]} + scoring = ["f1", "roc_auc"] - if n == 0: - kwargs = dict() - else: - kwargs = dict(random_state=0) + gs = GridSearchCV(clf, param_grid, scoring=scoring) + rs = RandomizedSearchCV(clf, param_grid, scoring=scoring, random_state=0) + + for n, est in enumerate((gs, rs)): + est.fit(X, y) - ret = func(clf, param_grid, X, y, scoring=["f1", "roc_auc"], **kwargs) - ret_f1 = func(clf, param_grid, X, y, scoring="f1", **kwargs) - ret_auc = func(clf, param_grid, X, y, scoring="roc_auc", **kwargs) + for attr in ("scorer_", "best_score_", "grid_scores_", "best_params_"): + attr = getattr(est, attr) + assert_equal(len(attr), 2) - for i in xrange(len(ret)): + est_f1 = clone(est) + est_f1.scoring = "f1" + est_f1.fit(X, y) - assert_equal(len(ret[i]), 2) + est_auc = clone(est) + est_auc.scoring = "roc_auc" + est_auc.fit(X, y) - for i in (0, 1): - assert_equal(ret[i][0], ret_f1[i]) - assert_equal(ret[i][1], ret_auc[i]) + for attr in ("best_score_", "best_params_"): + assert_equal(getattr(est, attr)[0], + getattr(est_f1, attr)) + assert_equal(getattr(est, attr)[1], + getattr(est_auc, attr)) From aad77c87e37a5d459ed6b67ed0b2ea7712e0db71 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 17:08:28 +0900 Subject: [PATCH 35/51] Simplify inner loop. Code snippet by @jnothman. --- sklearn/grid_search.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index abe6beae1ab53..1692689c1c2c2 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -230,32 +230,19 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, grid_scores.append([]) for grid_start in range(0, n_fits, n_folds): - n_test_samples = 0 - scores = np.zeros(n_scorers) - all_scores = np.zeros((n_scorers, n_folds)) - # Parameters for this part of the grid. - parameters = out[grid_start][3] - - for j, (curr_scores, curr_n_test_samples, _, _) in \ - enumerate(out[grid_start:grid_start + n_folds]): - - all_scores[:, j] = curr_scores - - if iid: - curr_scores *= curr_n_test_samples - n_test_samples += curr_n_test_samples - - scores += curr_scores - - if iid: - scores /= float(n_test_samples) - else: - scores /= float(n_folds) + grid_stop = grid_start + n_folds + fold_scores, n_test, _, parameters = zip(*out[grid_start:grid_stop]) + # `params` contains the same parameters n_fold times. + parameters = parameters[0] + # `fold_scores` is an n_folds x n_scorers 2-d array. + fold_scores = np.array(fold_scores) + weights = n_test if iid else None + mean_scores = np.average(fold_scores, axis=0, weights=weights) for i in xrange(n_scorers): # TODO: shall we also store the test_fold_sizes? - tup = _CVScoreTuple(parameters, scores[i], all_scores[i]) + tup = _CVScoreTuple(parameters, mean_scores[i], fold_scores[:, i]) grid_scores[i].append(tup) # Find the best parameters by comparing on the mean validation score: From a7e79f3c4ad818cbc7cb41add8fa476049fd4480 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 17:15:24 +0900 Subject: [PATCH 36/51] Fix incorrect comment. --- sklearn/grid_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 1692689c1c2c2..1904fffa67871 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -220,7 +220,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, for parameters in parameter_iterable for train, test in cv) - # Out is a list of triplet: score, estimator, n_test_samples + # `out` is a list of tuples (fold_score, n_test, scoring_time, params). n_fits = len(out) n_folds = len(cv) n_scorers = len(scorers) From 8040432c17d982935368efa73fb223b966847911 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 17:24:51 +0900 Subject: [PATCH 37/51] Fix comments. --- sklearn/cross_validation.py | 10 ++++------ sklearn/grid_search.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 19daf190fa093..1ef0290602b69 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1100,20 +1100,18 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, scoring=scoring)] ret_1d = True - # We clone the estimator to make sure that all the folds are - # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) - # ret is a list of size n_folds. Each element of the list contains the tuple - # returned by _fit_and_score. - ret = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, + # `out` is a list of size n_folds. Each element of the list is a tuple + # (fold_scores, n_test, scoring_time) + out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params) for train, test in cv) # Retrieve n_scorers x n_folds 2d-array. - scores = np.array([r[0] for r in ret]).T + scores = np.array([o[0] for o in out]).T if ret_1d: return scores[0] diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 1904fffa67871..cbab51a175d5e 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -220,7 +220,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, for parameters in parameter_iterable for train, test in cv) - # `out` is a list of tuples (fold_score, n_test, scoring_time, params). + # `out` is a list of tuples (fold_scores, n_test, scoring_time, params). n_fits = len(out) n_folds = len(cv) n_scorers = len(scorers) From 2a9638477efd8795183d6925b04e6ea4549802c0 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 18:01:39 +0900 Subject: [PATCH 38/51] Return training time only. --- sklearn/cross_validation.py | 18 +++++++++--------- sklearn/grid_search.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 1ef0290602b69..9ee8482b47718 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1104,7 +1104,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, pre_dispatch=pre_dispatch) # `out` is a list of size n_folds. Each element of the list is a tuple - # (fold_scores, n_test, scoring_time) + # (fold_scores, n_test, train_time) out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params) @@ -1171,8 +1171,8 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, n_test_samples : int Number of test samples. - scoring_time : float - Time spent for fitting and scoring in seconds. + train_time : float + Time spent for fitting in seconds. parameters : dict or None, optional The parameters that have been evaluated. @@ -1195,16 +1195,18 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, if parameters is not None: estimator.set_params(**parameters) - start_time = time.time() - X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) + start_time = time.time() + if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) + train_time = time.time() - start_time + if len(scorers) == 1: # We cannot use _evaluate_scorers here because the scorer might be # estimator.score. @@ -1220,16 +1222,14 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, train_scores = _evaluate_scorers(estimator, X_train, y_train, scorers) - scoring_time = time.time() - start_time - if verbose > 2: msg += ", score=%s" % test_scores if verbose > 1: - end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) + end_msg = "%s -%s" % (msg, logger.short_format_time(train_time)) print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) ret = [train_scores] if return_train_scores else [] - ret.extend([test_scores, _num_samples(X_test), scoring_time]) + ret.extend([test_scores, _num_samples(X_test), train_time]) if return_parameters: ret.append(parameters) return ret diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index cbab51a175d5e..0272fb0975c4b 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -220,7 +220,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, for parameters in parameter_iterable for train, test in cv) - # `out` is a list of tuples (fold_scores, n_test, scoring_time, params). + # `out` is a list of tuples (fold_scores, n_test, train_time, params). n_fits = len(out) n_folds = len(cv) n_scorers = len(scorers) From 5933d981151da594a2ffbf4badb1c9edeeed73d3 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 18:14:47 +0900 Subject: [PATCH 39/51] Remove return_parameters. --- sklearn/cross_validation.py | 49 ++++--------------------------------- sklearn/grid_search.py | 3 +-- 2 files changed, 6 insertions(+), 46 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 9ee8482b47718..ad0d3f0cccbfe 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1120,54 +1120,17 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, - fit_params, return_train_scores=False, - return_parameters=False): + fit_params, return_train_scores=False): """Fit estimator and compute scores for a given dataset split. - Parameters - ---------- - estimator : estimator object implementing 'fit' - The object to use to fit the data. - - X : array-like of shape at least 2D - The data to fit. - - y : array-like, optional, default: None - The target variable to try to predict in the case of - supervised learning. - - scorers : list - A list of scorer objects - - train : array-like, shape = (n_train_samples,) - Indices of training samples. - - test : array-like, shape = (n_test_samples,) - Indices of test samples. - - verbose : integer - The verbosity level. - - parameters : dict or None - Parameters to be set on the estimator. - - fit_params : dict or None - Parameters that will be passed to ``estimator.fit``. - - return_train_scores : boolean, optional, default: False - Compute and return scores on training set. - - return_parameters : boolean, optional, default: False - Return parameters that has been used for the estimator. - Returns ------- - test_score : array of floats - Scores on test set. - train_score : array of floats, optional Scores on training set. + test_score : array of floats + Scores on test set. + n_test_samples : int Number of test samples. @@ -1229,9 +1192,7 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) ret = [train_scores] if return_train_scores else [] - ret.extend([test_scores, _num_samples(X_test), train_time]) - if return_parameters: - ret.append(parameters) + ret.extend([test_scores, _num_samples(X_test), train_time, parameters]) return ret diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 0272fb0975c4b..5aae5a77427a2 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -215,8 +215,7 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorers, train, test, - verbose, parameters, fit_params, - return_parameters=True) + verbose, parameters, fit_params) for parameters in parameter_iterable for train, test in cv) From 4ee0a8e83f28d9052870561eb9cb255a379d9d2b Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 18:16:44 +0900 Subject: [PATCH 40/51] Cosmit: used += instead of extend. --- sklearn/cross_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index ad0d3f0cccbfe..a3a86e281e409 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1192,7 +1192,7 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) ret = [train_scores] if return_train_scores else [] - ret.extend([test_scores, _num_samples(X_test), train_time, parameters]) + ret += [test_scores, _num_samples(X_test), train_time, parameters] return ret From c08bdd8960f3da4ff7b343ef6f787e09e82040e5 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 3 Feb 2014 18:48:56 +0900 Subject: [PATCH 41/51] Add cross_val_report. --- sklearn/cross_validation.py | 115 +++++++++++++++++++++++-- sklearn/tests/test_cross_validation.py | 41 ++++++++- 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index a3a86e281e409..1cc8ce7cdfe29 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1030,7 +1030,7 @@ def __len__(self): def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, score_func=None, pre_dispatch='2*n_jobs'): - """Evaluate a score by cross-validation + """Evaluate test score by cross-validation Parameters ---------- @@ -1087,7 +1087,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, ------- scores : array of float, shape=(n_folds,) or (n_scoring, n_folds) Array of scores of the estimator for each run of the cross validation. - The returned array is 2d is `scoring` is a list. + The returned array is 2d if `scoring` is a list. """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) @@ -1104,19 +1104,122 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, pre_dispatch=pre_dispatch) # `out` is a list of size n_folds. Each element of the list is a tuple - # (fold_scores, n_test, train_time) + # (test_scores, n_test, train_time) out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params) for train, test in cv) # Retrieve n_scorers x n_folds 2d-array. - scores = np.array([o[0] for o in out]).T + test_scores = np.array([o[0] for o in out]).T + + if ret_1d: + return test_scores[0] + else: + return test_scores + + +def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, + verbose=0, fit_params=None, score_func=None, + pre_dispatch='2*n_jobs'): + """Evaluate a score by cross-validation + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + X : array-like of shape at least 2D + The data to fit. + + y : array-like, optional, default: None + The target variable to try to predict in the case of + supervised learning. + + scoring : string, callable, list of strings/callables or None, optional, + default: None + A string (see model evaluation documentation) or + a scorer callable object / function with signature + ``scorer(estimator, X, y)``. + Lists can be used for randomized search of multiple metrics. + + cv : cross-validation generator, optional, default: None + A cross-validation generator. If None, a 3-fold cross + validation is used or 3-fold stratified cross-validation + when y is supplied and estimator is a classifier. + + n_jobs : integer, optional + The number of CPUs to use to do the computation. -1 means + 'all CPUs'. + + verbose : integer, optional + The verbosity level. + + fit_params : dict, optional + Parameters to pass to the fit method of the estimator. + + pre_dispatch : int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + + Returns + ------- + train_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds) + Array of trainng scores of the estimator for each run of the cross + validation. The returned array is 2d if `scoring` is a list. + + test_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds) + Array of test scores of the estimator for each run of the cross + validation. The returned array is 2d if `scoring` is a list. + + train_times : array of float, shape=(n_folds,) + Array of training times of the estimator for each run of the cross + validation. + """ + X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) + cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) + + if isinstance(scoring, list): + scorers = [check_scoring(estimator, scoring=s) for s in scoring] + ret_1d = False + else: + scorers = [check_scoring(estimator, score_func=score_func, + scoring=scoring)] + ret_1d = True + + parallel = Parallel(n_jobs=n_jobs, verbose=verbose, + pre_dispatch=pre_dispatch) + + # `out` is a list of size n_folds. Each element of the list is a tuple + # (train_scores, test_scores, n_test, train_time) + out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, + train, test, verbose, None, + fit_params, + return_train_scores=True) + for train, test in cv) + + # Retrieve n_scorers x n_folds 2d-array. + train_scores = np.array([o[0] for o in out]).T + test_scores = np.array([o[1] for o in out]).T + train_times = np.array([o[3] for o in out]) if ret_1d: - return scores[0] + return train_scores[0], test_scores[0], train_times else: - return scores + return train_scores, test_scores, train_times def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index e2191565e8ba6..744045f51b97c 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -462,12 +462,14 @@ def test_cross_val_score_precomputed(): def test_cross_val_score_multiple_scorers(): - X, y = make_classification(n_classes=2) + X, y = make_classification(n_classes=2, random_state=0) clf = Perceptron(random_state=0) scores = cval.cross_val_score(clf, X, y, cv=3, scoring=["f1", "roc_auc"]) assert_equal(scores.shape, (2, 3)) + # Check that the results are the same as when cross_val_score is called + # individually. f1_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="f1") auc_scores = cval.cross_val_score(clf, X, y, cv=3, scoring="roc_auc") scores2 = np.array([f1_scores, auc_scores]) @@ -506,6 +508,43 @@ class BrokenEstimator: assert_raises(TypeError, cval.cross_val_score, BrokenEstimator(), X) +def test_cross_val_report(): + X, y = make_classification(n_classes=2, random_state=0) + clf = Perceptron(random_state=0) + + tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3, + scoring="f1") + assert_equal(tr_scores.shape, (3,)) + assert_equal(te_scores.shape, (3,)) + assert_equal(tr_times.shape, (3,)) + + assert_greater(tr_scores.mean(), te_scores.mean()) + + +def test_cross_val_report_multiple_scorers(): + X, y = make_classification(n_classes=2, random_state=0) + clf = Perceptron(random_state=0) + + tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3, + scoring=["f1", + "roc_auc"]) + assert_equal(tr_scores.shape, (2, 3)) + assert_equal(te_scores.shape, (2, 3)) + assert_equal(tr_times.shape, (3,)) + + # Check that the results are the same as when cross_val_report is called + # individually. + f1_tr, f1_te, _ = cval.cross_val_report(clf, X, y, cv=3, scoring="f1") + auc_tr, auc_te, _ = cval.cross_val_report(clf, X, y, cv=3, + scoring="roc_auc") + + assert_array_almost_equal(tr_scores[0], f1_tr) + assert_array_almost_equal(te_scores[0], f1_te) + assert_array_almost_equal(tr_scores[1], auc_tr) + assert_array_almost_equal(te_scores[1], auc_te) + + + def test_train_test_split_errors(): assert_raises(ValueError, cval.train_test_split) assert_raises(ValueError, cval.train_test_split, range(3), train_size=1.1) From e0dfe238d08497c492b10d9bcf87cb99ea51af98 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Tue, 4 Feb 2014 14:26:57 +0900 Subject: [PATCH 42/51] Remove score_func from cross_val_report. This function is deprecated, no need to add it to a new function. --- sklearn/cross_validation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 1cc8ce7cdfe29..35051a97a3a91 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1120,8 +1120,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, - verbose=0, fit_params=None, score_func=None, - pre_dispatch='2*n_jobs'): + verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Evaluate a score by cross-validation Parameters @@ -1196,8 +1195,7 @@ def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, scorers = [check_scoring(estimator, scoring=s) for s in scoring] ret_1d = False else: - scorers = [check_scoring(estimator, score_func=score_func, - scoring=scoring)] + scorers = [check_scoring(estimator, scoring=scoring)] ret_1d = True parallel = Parallel(n_jobs=n_jobs, verbose=verbose, From 9b5fe9a709d92a953dc9cd1444814477da1e1ac6 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 7 Feb 2014 16:33:07 +0900 Subject: [PATCH 43/51] Accept tuples too. --- sklearn/cross_validation.py | 4 ++-- sklearn/grid_search.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 35051a97a3a91..60e40d3348a3d 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1092,7 +1092,7 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - if isinstance(scoring, list): + if isinstance(scoring, (tuple, list)): scorers = [check_scoring(estimator, scoring=s) for s in scoring] ret_1d = False else: @@ -1191,7 +1191,7 @@ def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - if isinstance(scoring, list): + if isinstance(scoring, (tuple, list)): scorers = [check_scoring(estimator, scoring=s) for s in scoring] ret_1d = False else: diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 5aae5a77427a2..15930f62b1065 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -185,7 +185,7 @@ def __len__(self): def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, cv, pre_dispatch, fit_params, iid, n_jobs, verbose): """Actual fitting, performing the search over parameters.""" - if isinstance(scoring, list): + if isinstance(scoring, (tuple, list)): scorers = [check_scoring(estimator, scoring=s) for s in scoring] ret_1d = False else: From 0346fa3f4ae4e86b80a6e9b4d3080cff4dd0f3c4 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 7 Feb 2014 17:07:16 +0900 Subject: [PATCH 44/51] Accept callables in _evaluate_scorers. --- sklearn/cross_validation.py | 8 +------- sklearn/metrics/scorer.py | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 60e40d3348a3d..5216c9f0980e5 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1271,13 +1271,7 @@ def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, train_time = time.time() - start_time - if len(scorers) == 1: - # We cannot use _evaluate_scorers here because the scorer might be - # estimator.score. - test_scores = np.array([scorers[0](estimator, X_test, y_test)]) - else: - test_scores = _evaluate_scorers(estimator, X_test, y_test, scorers) - + test_scores = _evaluate_scorers(estimator, X_test, y_test, scorers) if return_train_scores: if len(scorers) == 1: diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 7b21ddcb75fdb..72460cd316704 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -56,16 +56,27 @@ def __repr__(self): def _evaluate_scorers(estimator, X, y, scorers): + """Evaluate a list of scorers. `scorers` may contain _Scorer objects or + callables of the form callable(estimator, X, y).""" + + if len(scorers) == 1 and not isinstance(scorers[0], _Scorer): + # We won't need any predictions if there is only one callable in the + # list. + return np.array([scorers[0](estimator, X, y)]) + has_pb = hasattr(estimator, "predict_proba") has_df = hasattr(estimator, "decision_function") _is_classifier = is_classifier(estimator) - _type_of_y = type_of_target(y) + _type_of_y = type_of_target(y) if y is not None else None # Make a first pass through scorers to determine if we need # predict_proba or decision_function. needs_proba = False needs_df = False for scorer in scorers: + if not isinstance(scorer, _Scorer): + continue # assumed to be a callable + if scorer.needs_proba: if not has_pb: raise ValueError("%s needs probabilities but predict_proba is" @@ -122,6 +133,10 @@ def _evaluate_scorers(estimator, X, y, scorers): # Compute scores. scores = [] for scorer in scorers: + if not isinstance(scorer, _Scorer): + scores.append(scorer(estimator, X, y)) + continue + if scorer.needs_proba: score = scorer.score_func(y, y_proba, **scorer.kwargs) @@ -189,7 +204,7 @@ def check_scoring(estimator, scoring=None, allow_none=False, loss_func=None, Returns ------- - scoring : callable + scorer : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. """ From 015e01e7906f6e8275cd5132618d8fdf6628e81e Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 7 Feb 2014 17:19:10 +0900 Subject: [PATCH 45/51] Unused imports. --- sklearn/metrics/scorer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 72460cd316704..2447307a9d9f5 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -18,7 +18,6 @@ # Arnaud Joly # License: Simplified BSD -from abc import ABCMeta, abstractmethod from warnings import warn import numbers From eaa3aebf20152d9b62a933527e0778ffa9904107 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 7 Feb 2014 17:26:15 +0900 Subject: [PATCH 46/51] Clone early. --- sklearn/grid_search.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 15930f62b1065..00de861164f42 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -185,6 +185,9 @@ def __len__(self): def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, cv, pre_dispatch, fit_params, iid, n_jobs, verbose): """Actual fitting, performing the search over parameters.""" + + estimator = clone(estimator) + if isinstance(scoring, (tuple, list)): scorers = [check_scoring(estimator, scoring=s) for s in scoring] ret_1d = False @@ -210,11 +213,9 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) - base_estimator = clone(estimator) - out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( - clone(base_estimator), X, y, scorers, train, test, + clone(estimator), X, y, scorers, train, test, verbose, parameters, fit_params) for parameters in parameter_iterable for train, test in cv) @@ -254,8 +255,8 @@ def _fit_param_iter(estimator, X, y, scoring, parameter_iterable, refit, best_estimators = [] if refit: for i in xrange(len(scorers)): - base_estimator = clone(estimator) - best_estimator = base_estimator.set_params(**best_params[i]) + estimator = clone(estimator) + best_estimator = estimator.set_params(**best_params[i]) best_estimators.append(best_estimator) if y is not None: best_estimator.fit(X, y, **fit_params) From 96c36c77b6b22e1f32702b0f8b8ffba26afc1d1f Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Sun, 9 Feb 2014 23:55:53 +0900 Subject: [PATCH 47/51] Multiple scorer support in validation_curve. --- sklearn/learning_curve.py | 32 +++++++++++++++++++--------- sklearn/tests/test_learning_curve.py | 18 ++++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index 7ed1736fffe77..fac55b0d9ba18 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -252,7 +252,7 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None, param_name : string Name of the parameter that will be varied. - param_range : array-like, shape (n_values,) + param_range : array-like, shape (n_params,) The values of the parameter that will be evaluated. cv : integer, cross-validation generator, optional @@ -278,10 +278,12 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None, Returns ------- - train_scores : array, shape (n_ticks, n_cv_folds) + train_scores : array, shape (n_params, n_cv_folds) or + (n_scorers, n_params, n_cv_folds) Scores on training sets. - test_scores : array, shape (n_ticks, n_cv_folds) + test_scores : array, shape (n_params, n_cv_folds) or + (n_scorers, n_params, n_cv_folds) Scores on test set. Notes @@ -291,22 +293,32 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None, """ X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - scorer = check_scoring(estimator, scoring=scoring) + + if isinstance(scoring, (tuple, list)): + scorer = [check_scoring(estimator, scoring=s) for s in scoring] + one_scorer = False + else: + scorer = [check_scoring(estimator, scoring=scoring)] + one_scorer = True parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) out = parallel(delayed(_fit_and_score)( - estimator, X, y, [scorer], train, test, verbose, + estimator, X, y, scorer, train, test, verbose, parameters={param_name : v}, fit_params=None, return_train_scores=True) for train, test in cv for v in param_range) - out = np.asarray(out) n_params = len(param_range) - n_cv_folds = out.shape[0] / n_params + n_folds = len(out) / n_params - out = np.array(out).reshape(n_cv_folds, n_params, -1) + shape = (n_folds, n_params, -1) + train_scores = np.array([o[0] for o in out]).reshape(shape).T + test_scores = np.array([o[1] for o in out]).reshape(shape).T - train_scores = out[:, :, 0].T - test_scores = out[:, :, 1].T + #train_times = np.array([o[3] for o in out]).reshape(n_folds, n_params) + + if one_scorer: + train_scores = train_scores[0] + test_scores = test_scores[0] return train_scores, test_scores diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py index 42985823345ed..9d05d79aed28f 100644 --- a/sklearn/tests/test_learning_curve.py +++ b/sklearn/tests/test_learning_curve.py @@ -15,6 +15,7 @@ from sklearn.datasets import make_classification from sklearn.cross_validation import KFold from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.svm import LinearSVC class MockImprovingEstimator(BaseEstimator): @@ -242,3 +243,20 @@ def test_validation_curve(): param_range=param_range, cv=2) assert_array_almost_equal(train_scores.mean(axis=1), param_range) assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range) + + +def test_validation_curve_multiple_scorers(): + X, y = make_classification(n_classes=2, random_state=0) + clf = LinearSVC(random_state=0) + C = [0.1, 1, 10, 100] + train_scores, test_scores = validation_curve(clf, X, y, param_name="C", + param_range=C, cv=3, + scoring=["f1", "roc_auc"]) + assert_equal(train_scores.shape, (2, 4, 3)) + assert_equal(test_scores.shape, (2, 4, 3)) + + for i, scoring in enumerate(("f1", "roc_auc")): + tr, te = validation_curve(clf, X, y, param_name="C", param_range=C, + cv=3, scoring=scoring) + assert_array_almost_equal(train_scores[i], tr) + assert_array_almost_equal(test_scores[i], te) From d4ffc1f360b2c5faf29ade922da677a003f3a752 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 10 Feb 2014 00:42:47 +0900 Subject: [PATCH 48/51] Add rudimentary validation with contours example. --- examples/plot_validation_contours.py | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 examples/plot_validation_contours.py diff --git a/examples/plot_validation_contours.py b/examples/plot_validation_contours.py new file mode 100644 index 0000000000000..a7ff13f9d66b2 --- /dev/null +++ b/examples/plot_validation_contours.py @@ -0,0 +1,41 @@ +import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import load_digits +from sklearn.svm import SVC +from sklearn.learning_curve import validation_curve +from sklearn.externals.joblib import Memory + +memory = Memory(cachedir=".", verbose=0) + +@memory.cache +def grid(X, y, Cs, gammas): + scores = np.zeros((len(Cs), len(gammas))) + + for i, C in enumerate(Cs): + tr, te = validation_curve(SVC(kernel="rbf", C=C), X, y, + param_name="gamma", param_range=gammas, cv=3) + scores[i] = te.mean(axis=1) + + return scores + +digits = load_digits() +X, y = digits.data, digits.target + +gammas = np.logspace(-6, -1, 5) +Cs = np.logspace(-3, 3, 5) + +scores = grid(X, y, Cs, gammas) + + +plt.xlabel("C") +plt.xscale("log") + +plt.ylabel("gamma") +plt.yscale("log") + +X1, X2 = np.meshgrid(Cs, gammas) +cs = plt.contour(X1, X2, scores) + +plt.colorbar(cs) + +plt.show() From c33f0f9d6804343028c04a9503562fb0cd5b2e18 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 10 Feb 2014 01:15:20 +0900 Subject: [PATCH 49/51] Support param_grid in validation_curve. --- examples/plot_validation_contours.py | 9 +++------ sklearn/learning_curve.py | 19 ++++++++++--------- sklearn/tests/test_learning_curve.py | 21 ++++++++++++++------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/examples/plot_validation_contours.py b/examples/plot_validation_contours.py index a7ff13f9d66b2..1e3b3edceaaa6 100644 --- a/examples/plot_validation_contours.py +++ b/examples/plot_validation_contours.py @@ -9,14 +9,11 @@ @memory.cache def grid(X, y, Cs, gammas): - scores = np.zeros((len(Cs), len(gammas))) + param_grid = {"C": Cs, "gamma": gammas} - for i, C in enumerate(Cs): - tr, te = validation_curve(SVC(kernel="rbf", C=C), X, y, - param_name="gamma", param_range=gammas, cv=3) - scores[i] = te.mean(axis=1) + tr, te = validation_curve(SVC(kernel="rbf"), X, y, param_grid, cv=3) - return scores + return te.mean(axis=1).reshape(len(Cs), len(gammas)) digits = load_digits() X, y = digits.data, digits.target diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index fac55b0d9ba18..dbd0bbaf7e3d4 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -11,6 +11,7 @@ from .utils import check_arrays from .externals.joblib import Parallel, delayed from .cross_validation import _safe_split, _fit_and_score +from .grid_search import ParameterGrid from .metrics.scorer import check_scoring @@ -225,7 +226,7 @@ def _incremental_fit_estimator(estimator, X, y, classes, train, test, return np.array((train_scores, test_scores)).T -def validation_curve(estimator, X, y, param_name, param_range, cv=None, +def validation_curve(estimator, X, y, param_grid, cv=None, scoring=None, n_jobs=1, pre_dispatch="all", verbose=0): """Validation curve. @@ -249,11 +250,9 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None, Target relative to X for classification or regression; None for unsupervised learning. - param_name : string - Name of the parameter that will be varied. - - param_range : array-like, shape (n_params,) - The values of the parameter that will be evaluated. + param_grid : dict or list of dictionaries + Dictionary with parameters names (string) as keys and lists of + parameter settings to try as values. cv : integer, cross-validation generator, optional If an integer is passed, it is the number of folds (defaults to 3). @@ -301,14 +300,16 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None, scorer = [check_scoring(estimator, scoring=scoring)] one_scorer = True + param_grid = ParameterGrid(param_grid) + n_params = len(param_grid) + parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) out = parallel(delayed(_fit_and_score)( estimator, X, y, scorer, train, test, verbose, - parameters={param_name : v}, fit_params=None, return_train_scores=True) - for train, test in cv for v in param_range) + parameters=params, fit_params=None, return_train_scores=True) + for train, test in cv for params in param_grid) - n_params = len(param_range) n_folds = len(out) / n_params shape = (n_folds, n_params, -1) diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py index 9d05d79aed28f..b3a908bf44699 100644 --- a/sklearn/tests/test_learning_curve.py +++ b/sklearn/tests/test_learning_curve.py @@ -238,25 +238,32 @@ def test_validation_curve(): n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) param_range = np.linspace(0, 1, 10) + param_grid = {"param": param_range} train_scores, test_scores = validation_curve(MockEstimatorWithParameter(), - X, y, param_name="param", - param_range=param_range, cv=2) + X, y, param_grid, cv=2) assert_array_almost_equal(train_scores.mean(axis=1), param_range) assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range) +def test_validation_curve_2d(): + X, y = make_classification(n_classes=2, random_state=0) + param_grid = {"C": [1, 10, 100], "fit_intercept": [True, False]} + clf = LinearSVC(random_state=0) + train_scores, test_scores = validation_curve(clf, X, y, param_grid, cv=2) + assert_equal(train_scores.shape, (6, 2)) + assert_equal(test_scores.shape, (6, 2)) + + def test_validation_curve_multiple_scorers(): X, y = make_classification(n_classes=2, random_state=0) clf = LinearSVC(random_state=0) - C = [0.1, 1, 10, 100] - train_scores, test_scores = validation_curve(clf, X, y, param_name="C", - param_range=C, cv=3, + param_grid = {"C": [0.1, 1, 10, 100]} + train_scores, test_scores = validation_curve(clf, X, y, param_grid, cv=3, scoring=["f1", "roc_auc"]) assert_equal(train_scores.shape, (2, 4, 3)) assert_equal(test_scores.shape, (2, 4, 3)) for i, scoring in enumerate(("f1", "roc_auc")): - tr, te = validation_curve(clf, X, y, param_name="C", param_range=C, - cv=3, scoring=scoring) + tr, te = validation_curve(clf, X, y, param_grid, cv=3, scoring=scoring) assert_array_almost_equal(train_scores[i], tr) assert_array_almost_equal(test_scores[i], te) From 34ba9062e3868f6a646d8c56a8013e2d10cfbf88 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 10 Feb 2014 01:32:24 +0900 Subject: [PATCH 50/51] Return training times. --- examples/plot_validation_contours.py | 32 +++++++++++++++++++--------- examples/plot_validation_curve.py | 6 +++--- sklearn/learning_curve.py | 7 ++++-- sklearn/tests/test_learning_curve.py | 22 +++++++++++++------ 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/examples/plot_validation_contours.py b/examples/plot_validation_contours.py index 1e3b3edceaaa6..ac8c308f53a2b 100644 --- a/examples/plot_validation_contours.py +++ b/examples/plot_validation_contours.py @@ -11,9 +11,14 @@ def grid(X, y, Cs, gammas): param_grid = {"C": Cs, "gamma": gammas} - tr, te = validation_curve(SVC(kernel="rbf"), X, y, param_grid, cv=3) + tr, te, times = validation_curve(SVC(kernel="rbf"), X, y, param_grid, cv=3) - return te.mean(axis=1).reshape(len(Cs), len(gammas)) + shape = (len(Cs), len(gammas)) + tr = tr.mean(axis=1).reshape(shape) + te = te.mean(axis=1).reshape(shape) + times = times.mean(axis=1).reshape(shape) + + return tr, te, times digits = load_digits() X, y = digits.data, digits.target @@ -21,18 +26,25 @@ def grid(X, y, Cs, gammas): gammas = np.logspace(-6, -1, 5) Cs = np.logspace(-3, 3, 5) -scores = grid(X, y, Cs, gammas) +tr, te, times = grid(X, y, Cs, gammas) + + +for title, values in (("Training accuracy", tr), + ("Test accuracy", te), + ("Training time", times)): + plt.figure() -plt.xlabel("C") -plt.xscale("log") + plt.title(title) + plt.xlabel("C") + plt.xscale("log") -plt.ylabel("gamma") -plt.yscale("log") + plt.ylabel("gamma") + plt.yscale("log") -X1, X2 = np.meshgrid(Cs, gammas) -cs = plt.contour(X1, X2, scores) + X1, X2 = np.meshgrid(Cs, gammas) + cs = plt.contour(X1, X2, values) -plt.colorbar(cs) + plt.colorbar(cs) plt.show() diff --git a/examples/plot_validation_curve.py b/examples/plot_validation_curve.py index 7b5f05050183a..0c6a056089c0b 100644 --- a/examples/plot_validation_curve.py +++ b/examples/plot_validation_curve.py @@ -23,9 +23,9 @@ X, y = digits.data, digits.target param_range = np.logspace(-6, -1, 5) -train_scores, test_scores = validation_curve( - SVC(), X, y, param_name="gamma", param_range=param_range, - cv=10, scoring="accuracy", n_jobs=1) +param_grid = {"gamma": param_range} +train_scores, test_scores, train_times = validation_curve( + SVC(), X, y, param_grid, cv=10, scoring="accuracy", n_jobs=1) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index dbd0bbaf7e3d4..5185eb915ff0a 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -285,6 +285,9 @@ def validation_curve(estimator, X, y, param_grid, cv=None, (n_scorers, n_params, n_cv_folds) Scores on test set. + train_times : array, shape (n_params, n_cv_folds) + Training times. + Notes ----- See @@ -316,10 +319,10 @@ def validation_curve(estimator, X, y, param_grid, cv=None, train_scores = np.array([o[0] for o in out]).reshape(shape).T test_scores = np.array([o[1] for o in out]).reshape(shape).T - #train_times = np.array([o[3] for o in out]).reshape(n_folds, n_params) + train_times = np.array([o[3] for o in out]).reshape(n_folds, n_params).T if one_scorer: train_scores = train_scores[0] test_scores = test_scores[0] - return train_scores, test_scores + return train_scores, test_scores, train_times diff --git a/sklearn/tests/test_learning_curve.py b/sklearn/tests/test_learning_curve.py index b3a908bf44699..72e8bda244e2c 100644 --- a/sklearn/tests/test_learning_curve.py +++ b/sklearn/tests/test_learning_curve.py @@ -239,8 +239,12 @@ def test_validation_curve(): n_clusters_per_class=1, random_state=0) param_range = np.linspace(0, 1, 10) param_grid = {"param": param_range} - train_scores, test_scores = validation_curve(MockEstimatorWithParameter(), - X, y, param_grid, cv=2) + est = MockEstimatorWithParameter() + train_scores, test_scores, train_times = validation_curve(est, X, y, + param_grid, cv=2) + assert_equal(train_scores.shape, (10, 2)) + assert_equal(test_scores.shape, (10, 2)) + assert_equal(train_times.shape, (10, 2)) assert_array_almost_equal(train_scores.mean(axis=1), param_range) assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range) @@ -249,21 +253,27 @@ def test_validation_curve_2d(): X, y = make_classification(n_classes=2, random_state=0) param_grid = {"C": [1, 10, 100], "fit_intercept": [True, False]} clf = LinearSVC(random_state=0) - train_scores, test_scores = validation_curve(clf, X, y, param_grid, cv=2) + train_scores, test_scores, train_times = validation_curve(clf, X, y, + param_grid, cv=2) assert_equal(train_scores.shape, (6, 2)) assert_equal(test_scores.shape, (6, 2)) + assert_equal(train_times.shape, (6, 2)) def test_validation_curve_multiple_scorers(): X, y = make_classification(n_classes=2, random_state=0) clf = LinearSVC(random_state=0) param_grid = {"C": [0.1, 1, 10, 100]} - train_scores, test_scores = validation_curve(clf, X, y, param_grid, cv=3, - scoring=["f1", "roc_auc"]) + scoring = ["f1", "roc_auc"] + train_scores, test_scores, train_times = validation_curve(clf, X, y, + param_grid, cv=3, + scoring=scoring) assert_equal(train_scores.shape, (2, 4, 3)) assert_equal(test_scores.shape, (2, 4, 3)) + assert_equal(train_times.shape, (4, 3)) for i, scoring in enumerate(("f1", "roc_auc")): - tr, te = validation_curve(clf, X, y, param_grid, cv=3, scoring=scoring) + tr, te, ti = validation_curve(clf, X, y, param_grid, cv=3, + scoring=scoring) assert_array_almost_equal(train_scores[i], tr) assert_array_almost_equal(test_scores[i], te) From 7317c3196317fc7c13e468da93ac7f5abb5c38bf Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Mon, 10 Feb 2014 01:35:33 +0900 Subject: [PATCH 51/51] Remove cross_val_report. --- sklearn/cross_validation.py | 101 ------------------------- sklearn/tests/test_cross_validation.py | 37 --------- 2 files changed, 138 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 0ea9acf867bde..46cf2e926333d 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1131,107 +1131,6 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, return test_scores -def cross_val_report(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, - verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): - """Evaluate a score by cross-validation - - Parameters - ---------- - estimator : estimator object implementing 'fit' - The object to use to fit the data. - - X : array-like of shape at least 2D - The data to fit. - - y : array-like, optional, default: None - The target variable to try to predict in the case of - supervised learning. - - scoring : string, callable, list of strings/callables or None, optional, - default: None - A string (see model evaluation documentation) or - a scorer callable object / function with signature - ``scorer(estimator, X, y)``. - Lists can be used for randomized search of multiple metrics. - - cv : cross-validation generator, optional, default: None - A cross-validation generator. If None, a 3-fold cross - validation is used or 3-fold stratified cross-validation - when y is supplied and estimator is a classifier. - - n_jobs : integer, optional - The number of CPUs to use to do the computation. -1 means - 'all CPUs'. - - verbose : integer, optional - The verbosity level. - - fit_params : dict, optional - Parameters to pass to the fit method of the estimator. - - pre_dispatch : int, or string, optional - Controls the number of jobs that get dispatched during parallel - execution. Reducing this number can be useful to avoid an - explosion of memory consumption when more jobs get dispatched - than CPUs can process. This parameter can be: - - - None, in which case all the jobs are immediately - created and spawned. Use this for lightweight and - fast-running jobs, to avoid delays due to on-demand - spawning of the jobs - - - An int, giving the exact number of total jobs that are - spawned - - - A string, giving an expression as a function of n_jobs, - as in '2*n_jobs' - - Returns - ------- - train_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds) - Array of trainng scores of the estimator for each run of the cross - validation. The returned array is 2d if `scoring` is a list. - - test_scores : array of float, shape=(n_folds,) or (n_scoring, n_folds) - Array of test scores of the estimator for each run of the cross - validation. The returned array is 2d if `scoring` is a list. - - train_times : array of float, shape=(n_folds,) - Array of training times of the estimator for each run of the cross - validation. - """ - X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True) - cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) - - if isinstance(scoring, (tuple, list)): - scorers = [check_scoring(estimator, scoring=s) for s in scoring] - ret_1d = False - else: - scorers = [check_scoring(estimator, scoring=scoring)] - ret_1d = True - - parallel = Parallel(n_jobs=n_jobs, verbose=verbose, - pre_dispatch=pre_dispatch) - - # `out` is a list of size n_folds. Each element of the list is a tuple - # (train_scores, test_scores, n_test, train_time) - out = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorers, - train, test, verbose, None, - fit_params, - return_train_scores=True) - for train, test in cv) - - # Retrieve n_scorers x n_folds 2d-array. - train_scores = np.array([o[0] for o in out]).T - test_scores = np.array([o[1] for o in out]).T - train_times = np.array([o[3] for o in out]) - - if ret_1d: - return train_scores[0], test_scores[0], train_times - else: - return train_scores, test_scores, train_times - - def _fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, fit_params, return_train_scores=False): """Fit estimator and compute scores for a given dataset split. diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index 10c29fa4882a3..f1e5cc22731ce 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -558,43 +558,6 @@ class BrokenEstimator: assert_raises(TypeError, cval.cross_val_score, BrokenEstimator(), X) -def test_cross_val_report(): - X, y = make_classification(n_classes=2, random_state=0) - clf = Perceptron(random_state=0) - - tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3, - scoring="f1") - assert_equal(tr_scores.shape, (3,)) - assert_equal(te_scores.shape, (3,)) - assert_equal(tr_times.shape, (3,)) - - assert_greater(tr_scores.mean(), te_scores.mean()) - - -def test_cross_val_report_multiple_scorers(): - X, y = make_classification(n_classes=2, random_state=0) - clf = Perceptron(random_state=0) - - tr_scores, te_scores, tr_times = cval.cross_val_report(clf, X, y, cv=3, - scoring=["f1", - "roc_auc"]) - assert_equal(tr_scores.shape, (2, 3)) - assert_equal(te_scores.shape, (2, 3)) - assert_equal(tr_times.shape, (3,)) - - # Check that the results are the same as when cross_val_report is called - # individually. - f1_tr, f1_te, _ = cval.cross_val_report(clf, X, y, cv=3, scoring="f1") - auc_tr, auc_te, _ = cval.cross_val_report(clf, X, y, cv=3, - scoring="roc_auc") - - assert_array_almost_equal(tr_scores[0], f1_tr) - assert_array_almost_equal(te_scores[0], f1_te) - assert_array_almost_equal(tr_scores[1], auc_tr) - assert_array_almost_equal(te_scores[1], auc_te) - - - def test_train_test_split_errors(): assert_raises(ValueError, cval.train_test_split) assert_raises(ValueError, cval.train_test_split, range(3), train_size=1.1)