From 20ff083c1557caa660df54c42ec392c7f64e7172 Mon Sep 17 00:00:00 2001 From: Moussa Taifi Date: Sun, 29 May 2016 13:43:53 -0700 Subject: [PATCH 1/3] Add Brier_score_loss to model validation doc --- doc/modules/model_evaluation.rst | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 4d693b68cfb77..3f0bb51f2c16f 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1033,6 +1033,47 @@ set [0,1] has an error: :: for an example of zero one loss usage to perform recursive feature elimination with cross-validation. +.. _brier_score_loss: + +Brier score loss +---------------- + +The :func:`brier_score_loss` function returns a score of the mean square difference +between the actual outcome and the predicted probability of the possible outcome. +The actual outcome has to be 1 or 0 (true or false), while the predicted probability +of the actual outcome happens can be value between 0 and 1. The brier score loss is +also between 0 to 1 and the lower the score (the mean square difference is smaller), +the more accurate the prediction is. + +.. math:: + + BS = \frac{1}{N} \sum_{t=1}^{N}(f_t - o_t)^2 + +where : :math:`N` is the total number of predictions, :math:`f_t` is the predicted +probablity of the actual outcome :math:`o_t` + + >>> import numpy as np + >>> from sklearn.metrics import brier_score_loss + >>> from sklearn.metrics import zero_one_loss + >>> y_true = np.array([0, 1, 1, 0]) + >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) + >>> y_prob = np.array([0.1, 0.9, 0.8, 0.4]) + >>> y_pred = np.array([0, 1, 1, 0]) + >>> brier_score_loss(y_true, y_prob) # doctest: +ELLIPSIS + 0.055 + >>> zero_one_loss(y_true, y_pred, normalize=False) + 0 + >>> brier_score_loss(y_true, 1-y_prob, pos_label=0) # doctest: +ELLIPSIS + 0.055 + >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham") # doctest: +ELLIPSIS + 0.055 + >>> brier_score_loss(y_true, np.array(y_prob) > 0.5) + 0.055 + +.. topic:: Example: + + * See :ref:`example_calibration_plot_calibration.py` + for an example of Brier score loss usage to perform probability calibration of classifiers. .. _multilabel_ranking_metrics: From 46f8baa022337ef6ba9b8138ebeef809c7520409 Mon Sep 17 00:00:00 2001 From: Moussa Taifi Date: Fri, 3 Jun 2016 23:05:39 -0700 Subject: [PATCH 2/3] Add more precise explanation of the metric, add two references and fix typo in doctest result --- doc/modules/model_evaluation.rst | 41 +++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 3f0bb51f2c16f..3ff78c2db65a3 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1038,12 +1038,27 @@ set [0,1] has an error: :: Brier score loss ---------------- -The :func:`brier_score_loss` function returns a score of the mean square difference -between the actual outcome and the predicted probability of the possible outcome. -The actual outcome has to be 1 or 0 (true or false), while the predicted probability -of the actual outcome happens can be value between 0 and 1. The brier score loss is -also between 0 to 1 and the lower the score (the mean square difference is smaller), -the more accurate the prediction is. +The :func:`brier_score_loss` function computes the +`Brier score `_ +for binary classes. Quoting Wikipedia: + + "The Brier score is a proper score function that measures the accuracy of + probabilistic predictions. It is applicable to tasks in which predictions + must assign probabilities to a set of mutually exclusive discrete outcomes. + The set of possible outcomes can be either binary or categorical in nature, + and the probabilities assigned to this set of outcomes must sum to one + (where each individual probability is in the range of 0 to 1). It was proposed + by Glenn W. Brier in 1950." + +This function returns a score of the mean square difference between the actual outcome and +the predicted probability of the possible outcome. The actual outcome has to be 1 or 0 +(true or false), while the predicted probability of the actual outcome can be +a value between 0 and 1. +The brier score loss is also between 0 to 1 and the lower the score +(the mean square difference is smaller), the more accurate the prediction is. +It can be thought of as a measure of the "calibration" of a set of probabilistic predictions. +`Here `_ is +the paper describing it. .. math:: @@ -1054,21 +1069,19 @@ probablity of the actual outcome :math:`o_t` >>> import numpy as np >>> from sklearn.metrics import brier_score_loss - >>> from sklearn.metrics import zero_one_loss >>> y_true = np.array([0, 1, 1, 0]) >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) >>> y_prob = np.array([0.1, 0.9, 0.8, 0.4]) >>> y_pred = np.array([0, 1, 1, 0]) - >>> brier_score_loss(y_true, y_prob) # doctest: +ELLIPSIS - 0.055 - >>> zero_one_loss(y_true, y_pred, normalize=False) - 0 - >>> brier_score_loss(y_true, 1-y_prob, pos_label=0) # doctest: +ELLIPSIS + >>> brier_score_loss(y_true, y_prob) 0.055 - >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham") # doctest: +ELLIPSIS + >>> brier_score_loss(y_true, 1-y_prob, pos_label=0) 0.055 - >>> brier_score_loss(y_true, np.array(y_prob) > 0.5) + >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham") 0.055 + >>> brier_score_loss(y_true, y_prob>0.5) + 0.0 + .. topic:: Example: From 9a5eb748039cb3bc1f08df9772fb395f5aba7e35 Mon Sep 17 00:00:00 2001 From: Moussa Taifi Date: Sat, 11 Jun 2016 17:52:16 -0400 Subject: [PATCH 3/3] Remove mention of multi-class to match the supported implementation, fix references section and pep8 formatting --- doc/modules/model_evaluation.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 3ff78c2db65a3..7d1722d5efaaf 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1042,23 +1042,18 @@ The :func:`brier_score_loss` function computes the `Brier score `_ for binary classes. Quoting Wikipedia: - "The Brier score is a proper score function that measures the accuracy of + + "The Brier score is a proper score function that measures the accuracy of probabilistic predictions. It is applicable to tasks in which predictions - must assign probabilities to a set of mutually exclusive discrete outcomes. - The set of possible outcomes can be either binary or categorical in nature, - and the probabilities assigned to this set of outcomes must sum to one - (where each individual probability is in the range of 0 to 1). It was proposed - by Glenn W. Brier in 1950." + must assign probabilities to a set of mutually exclusive discrete outcomes." This function returns a score of the mean square difference between the actual outcome and the predicted probability of the possible outcome. The actual outcome has to be 1 or 0 (true or false), while the predicted probability of the actual outcome can be a value between 0 and 1. -The brier score loss is also between 0 to 1 and the lower the score -(the mean square difference is smaller), the more accurate the prediction is. -It can be thought of as a measure of the "calibration" of a set of probabilistic predictions. -`Here `_ is -the paper describing it. +The brier score loss is also between 0 to 1 and the lower the score (the mean square difference is smaller), +the more accurate the prediction is. It can be thought of as a measure of the "calibration" +of a set of probabilistic predictions. .. math:: @@ -1079,7 +1074,7 @@ probablity of the actual outcome :math:`o_t` 0.055 >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham") 0.055 - >>> brier_score_loss(y_true, y_prob>0.5) + >>> brier_score_loss(y_true, y_prob > 0.5) 0.0 @@ -1088,6 +1083,11 @@ probablity of the actual outcome :math:`o_t` * See :ref:`example_calibration_plot_calibration.py` for an example of Brier score loss usage to perform probability calibration of classifiers. +.. topic:: References: + + * G. Brier, `Verification of forecasts expressed in terms of probability + `_, Monthly weather review 78.1 (1950) + .. _multilabel_ranking_metrics: Multilabel ranking metrics