From da0f6732e63a915fdd24529188d543d45448dc17 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 11 Aug 2017 08:46:27 +0800 Subject: [PATCH 01/10] add scorer based on brier_score_loss --- doc/modules/model_evaluation.rst | 4 ++-- sklearn/metrics/scorer.py | 4 +++- sklearn/metrics/tests/test_score_objects.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 4800569556758..fa618e7ee62af 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -60,6 +60,7 @@ Scoring Function **Classification** 'accuracy' :func:`metrics.accuracy_score` 'average_precision' :func:`metrics.average_precision_score` +'brier_score_loss' :func:`metrics.brier_score_loss` 'f1' :func:`metrics.f1_score` for binary targets 'f1_micro' :func:`metrics.f1_score` micro-averaged 'f1_macro' :func:`metrics.f1_score` macro-averaged @@ -102,8 +103,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] - + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: The values listed by the ValueError exception correspond to the functions measuring diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index b1f01c1a18e1b..b910ebc612c87 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -27,7 +27,7 @@ mean_squared_error, mean_squared_log_error, accuracy_score, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, log_loss, - explained_variance_score) + explained_variance_score, brier_score_loss) from .cluster import adjusted_rand_score from .cluster import homogeneity_score @@ -514,6 +514,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg +brier_score_loss_scorer = make_scorer(brier_score_loss) # Clustering scores @@ -540,6 +541,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, average_precision=average_precision_scorer, log_loss=log_loss_scorer, neg_log_loss=neg_log_loss_scorer, + brier_score_loss=brier_score_loss_scorer, # Cluster metrics that use supervised evaluation adjusted_rand_score=adjusted_rand_scorer, homogeneity_score=homogeneity_scorer, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index fc5ba91401eab..552c0afac5f5b 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -51,7 +51,7 @@ 'roc_auc', 'average_precision', 'precision', 'precision_weighted', 'precision_macro', 'precision_micro', 'recall', 'recall_weighted', 'recall_macro', 'recall_micro', - 'neg_log_loss', 'log_loss'] + 'neg_log_loss', 'log_loss', 'brier_score_loss'] # All supervised cluster scorers (They behave like classification metric) CLUSTER_SCORERS = ["adjusted_rand_score", From 844f08e24b0624ea63a5842d8b78353066e777d1 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 11 Aug 2017 09:08:15 +0800 Subject: [PATCH 02/10] fix test --- doc/modules/model_evaluation.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index fa618e7ee62af..9bf3c9fa68660 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -104,6 +104,7 @@ Usage examples: >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + .. note:: The values listed by the ValueError exception correspond to the functions measuring From 063c9f3eb9c792f7095e50a6235ab0cd54dd466e Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 11 Aug 2017 11:22:58 +0800 Subject: [PATCH 03/10] update parameter --- sklearn/metrics/scorer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index b910ebc612c87..6bc6a546fcd39 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -514,7 +514,9 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg -brier_score_loss_scorer = make_scorer(brier_score_loss) +brier_score_loss_scorer = make_scorer(brier_score_loss, + greater_is_better=False, + needs_proba=True) # Clustering scores From 995b1a3712874c289f4952e5cb928c6e7cc8ca1f Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 11 Aug 2017 11:59:57 +0800 Subject: [PATCH 04/10] update parameter --- sklearn/metrics/scorer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 6bc6a546fcd39..db208a793ffe9 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -515,8 +515,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg brier_score_loss_scorer = make_scorer(brier_score_loss, - greater_is_better=False, - needs_proba=True) + greater_is_better=False) # Clustering scores From c8773ab23281a1dc0f9197821cc029a1d880016f Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 12 Aug 2017 11:00:05 +0800 Subject: [PATCH 05/10] update parameter --- sklearn/metrics/scorer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index db208a793ffe9..2603ec169b25e 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -514,8 +514,13 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg -brier_score_loss_scorer = make_scorer(brier_score_loss, - greater_is_better=False) +# currently brier_score_loss don't support the result +# returned by predict_proba(shape = (n_samples, n_classes)), +# so we just pass the second column(probabilities of the positive class) +brier_score_loss_scorer = make_scorer(lambda y_true, y_pred: + brier_score_loss(y_true, y_pred[:, 1]), + greater_is_better=False, + needs_proba=True) # Clustering scores From 1be7c7e3282bb86e3b616d61b2e300fad634d6c9 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 12 Aug 2017 16:38:37 +0800 Subject: [PATCH 06/10] better format --- sklearn/metrics/scorer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 2603ec169b25e..233ddabeb0b67 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -514,9 +514,9 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg -# currently brier_score_loss don't support the result -# returned by predict_proba(shape = (n_samples, n_classes)), -# so we just pass the second column(probabilities of the positive class) +# currently brier_score_loss don't support the shape of result +# returned by predict_proba (shape = (n_samples, n_classes)), +# so we just pass the second column (probabilities of the positive class) brier_score_loss_scorer = make_scorer(lambda y_true, y_pred: brier_score_loss(y_true, y_pred[:, 1]), greater_is_better=False, From 6d27fca3d07980f79bc686c5532342ca1dd38fd8 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 12 Aug 2017 17:37:28 +0800 Subject: [PATCH 07/10] make test pass? --- sklearn/metrics/scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 233ddabeb0b67..6f07c31ab7766 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -514,7 +514,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg -# currently brier_score_loss don't support the shape of result +# Currently brier_score_loss don't support the shape of result # returned by predict_proba (shape = (n_samples, n_classes)), # so we just pass the second column (probabilities of the positive class) brier_score_loss_scorer = make_scorer(lambda y_true, y_pred: From 9239c2bd3c618d268340ce747b2c1ac36b604889 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 15 Aug 2017 18:22:17 +0800 Subject: [PATCH 08/10] new idea, kindly review if CIs green --- sklearn/metrics/scorer.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 6f07c31ab7766..3fb35994c351f 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -135,7 +135,10 @@ def __call__(self, clf, X, y, sample_weight=None): """ super(_ProbaScorer, self).__call__(clf, X, y, sample_weight=sample_weight) + y_type = type_of_target(y) y_pred = clf.predict_proba(X) + if y_type == "binary": + y_pred = y_pred[:, 1] if sample_weight is not None: return self._sign * self._score_func(y, y_pred, sample_weight=sample_weight, @@ -514,11 +517,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) log_loss_scorer._deprecation_msg = deprecation_msg -# Currently brier_score_loss don't support the shape of result -# returned by predict_proba (shape = (n_samples, n_classes)), -# so we just pass the second column (probabilities of the positive class) -brier_score_loss_scorer = make_scorer(lambda y_true, y_pred: - brier_score_loss(y_true, y_pred[:, 1]), +brier_score_loss_scorer = make_scorer(brier_score_loss, greater_is_better=False, needs_proba=True) From cb40c5c675a1f659ba094124b84d87c54c0cd2ec Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Thu, 17 Aug 2017 08:41:53 +0800 Subject: [PATCH 09/10] update what's new --- doc/whats_new.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 258dfe19b33cb..913891b32cca3 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -36,6 +36,14 @@ Classifiers and regressors via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071` by `Raghav RV`_ +Enhancements +............ + +Model evaluation and meta-estimators + +- A scorer based on :func:`metrics.brier_score_loss` is also available. + :issue:`9521` by `Hanmin Qin `. + Bug fixes ......... @@ -185,9 +193,6 @@ Model selection and evaluation :class:`model_selection.RepeatedStratifiedKFold`. :issue:`8120` by `Neeraj Gangwar`_. -- Added a scorer based on :class:`metrics.explained_variance_score`. - :issue:`9259` by `Hanmin Qin `_. - Miscellaneous - Validation that input data contains no NaN or inf can now be suppressed @@ -380,6 +385,9 @@ Model evaluation and meta-estimators - More clustering metrics are now available through :func:`metrics.get_scorer` and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. +- A scorer based on :func:`metrics.explained_variance_score` is also available. + :issue:`9259` by `Hanmin Qin `. + Metrics - :func:`metrics.matthews_corrcoef` now support multiclass classification. From 3b096e8445af0e89c56ba110934e9a2240824a46 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Thu, 17 Aug 2017 08:49:57 +0800 Subject: [PATCH 10/10] update what's new --- doc/whats_new.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 913891b32cca3..2bc793bfbd459 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -42,7 +42,7 @@ Enhancements Model evaluation and meta-estimators - A scorer based on :func:`metrics.brier_score_loss` is also available. - :issue:`9521` by `Hanmin Qin `. + :issue:`9521` by :user:`Hanmin Qin `. Bug fixes ......... @@ -292,9 +292,6 @@ Decomposition, manifold learning and clustering ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. :issue:`7685` by :user:`Tommy Löfstedt ` -- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. - :issue:`9108` by `Hanmin Qin `_. - - :class:`decomposition.NMF` now faster when ``beta_loss=0``. :issue:`9277` by :user:`hongkahjun`. @@ -386,7 +383,7 @@ Model evaluation and meta-estimators and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. - A scorer based on :func:`metrics.explained_variance_score` is also available. - :issue:`9259` by `Hanmin Qin `. + :issue:`9259` by :user:`Hanmin Qin `. Metrics