scikit-learn · gnsiva · Jul 8, 2017 · Jul 8, 2017 · Jul 8, 2017 · Jul 8, 2017
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -540,6 +540,11 @@ Metrics
   :issue:`9515` by :user:`Alan Liddell <aliddell>` and
   :user:`Manh Dao <manhdao>`.
 
+- Fixed a bug in:  :func:`metrics.brier_score_loss`, when all ``y_true`` values
+  are 1. The function previously reported a loss of 1 when given a ``y_prob``
+  of 1, when it should be 0.  :issue:`9300` by
+  :user:`Ganesh N. Sivalingam <gnsiva>`.
+
 Neighbors
 
 - Fixed a bug so ``predict`` in :class:`neighbors.RadiusNeighborsRegressor` can

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
@@ -18,6 +18,7 @@
 #          Jatin Shah <jatindshah@gmail.com>
 #          Saurabh Jha <saurabh.jhaa@gmail.com>
 #          Bernardo Stein <bernardovstein@gmail.com>
+#          Ganesh N. Sivalingam <g.n.sivalingam@gmail.com>
 # License: BSD 3 clause
 
 from __future__ import division
@@ -1977,7 +1978,8 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
 
     pos_label : int or str, default=None
         Label of the positive class. If None, the maximum label is used as
-        positive class
+        positive class. If all values are 0 or False, then 1 is used as
+        pos_label.
 
     Returns
     -------
@@ -2013,7 +2015,12 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
     check_consistent_length(y_true, y_prob, sample_weight)
 
     if pos_label is None:
-        pos_label = y_true.max()
+        y_true_max = y_true.max()
+        if y_true_max != 0:
+            pos_label = y_true_max
+        else:
+            pos_label = 1
+
     y_true = np.array(y_true == pos_label, int)
-    y_true = _check_binary_probabilistic_predictions(y_true, y_prob)
+    _check_binary_probabilistic_predictions(y_true, y_prob)
     return np.average((y_true - y_prob) ** 2, weights=sample_weight)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -52,6 +52,7 @@
 
 from scipy.spatial.distance import hamming as sp_hamming
 
+
 ###############################################################################
 # Utilities for testing
 
@@ -885,7 +886,7 @@ def test_classification_report_multiclass_with_unicode_label():
 def test_classification_report_multiclass_with_long_string_label():
     y_true, y_pred, _ = make_prediction(binary=False)
 
-    labels = np.array(["blue", "green"*5, "red"])
+    labels = np.array(["blue", "green" * 5, "red"])
     y_true = labels[y_true]
     y_pred = labels[y_pred]
 
@@ -986,7 +987,7 @@ def test_multilabel_hamming_loss():
     assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6)
     assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
     assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12)
-    assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12)
+    assert_equal(hamming_loss(y1, 1 - y2, sample_weight=w), 11. / 12)
     assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3)
     # sp_hamming only works with 1-D arrays
     assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
@@ -1675,3 +1676,30 @@ def test_brier_score_loss():
     # calculate even if only single class in y_true (#6980)
     assert_almost_equal(brier_score_loss([0], [0.5]), 0.25)
     assert_almost_equal(brier_score_loss([1], [0.5]), 0.25)
+
+    # brier_score_loss should work when all inputs are the same
+    assert_almost_equal(
+        brier_score_loss(np.array([0, 0, 0]), np.array([0, 0, 0])), 0)
+    assert_almost_equal(
+        brier_score_loss(np.array([0, 0, 0]), np.array([1, 1, 1])), 1)
+    assert_almost_equal(
+        brier_score_loss(np.array([1, 1, 1]), np.array([1, 1, 1])), 0)
+    assert_almost_equal(
+        brier_score_loss(np.array([1, 1, 1]), np.array([0, 0, 0])), 1)
+
+    # test for when y_true is not 0s and 1s
+    assert_almost_equal(
+        brier_score_loss(np.array([3, 0, 3]), np.array([1, 0, 1])), 0)
+    assert_almost_equal(
+        brier_score_loss(np.array([3, 2, 3]), np.array([1, 0, 1])), 0)
+
+    # categorical test
+    assert_almost_equal(brier_score_loss(
+        np.array(["foo", "foo", "foo"]), np.array([1, 1, 1]),
+        pos_label="foo"), 0)
+    assert_almost_equal(brier_score_loss(
+        np.array(["foo", "bar", "foo"]), np.array([1, 0, 1]),
+        pos_label="foo"), 0)
+    assert_almost_equal(brier_score_loss(
+        np.array(["foo", "bar", "foo"]), np.array([0, 1, 0]),
+        pos_label="foo"), 1)
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -267,6 +267,23 @@
     "macro_precision_score", "macro_recall_score",
 }
 
+# Metrics with a "pos_label" argument where the second argument is labels
+POSITIVE_SCORE_MEANS_POSITIVE_CLASS = [
+    "brier_score_loss",
+
+    "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score",
+
+    # pos_label support deprecated; to be removed in 0.18:
+    "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
+    "weighted_precision_score", "weighted_recall_score",
+
+    "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
+    "micro_precision_score", "micro_recall_score",
+
+    "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
+    "macro_precision_score", "macro_recall_score",
+]
+
 # Metrics with a "labels" argument
 # TODO: Handle multi_class metrics that has a labels argument as well as a
 # decision function argument. e.g hinge_loss
@@ -1150,3 +1167,17 @@ def test_no_averaging_labels():
             score_labels = metric(y_true, y_pred, labels=labels, average=None)
             score = metric(y_true, y_pred, average=None)
             assert_array_equal(score_labels, score[inverse_labels])
+
+
+@ignore_warnings
+def test_all_true_pos_label():
+    # Correct predictions should still work when there are only 1s in the
+    # actual set
+    examples = np.array([0, 1, 1, 0, 1, 1])
+    all_ones = np.array([1, 1, 1])
+
+    for name in POSITIVE_SCORE_MEANS_POSITIVE_CLASS:
+        metric = ALL_METRICS[name]
+        perfect_score = metric(examples, examples, pos_label=1)
+        assert_almost_equal(
+            perfect_score, metric(all_ones, all_ones, pos_label=1))