Skip to content

[MRG] FIX Correct brier_score_loss when there's only one class in y_true #13628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 26, 2019
6 changes: 5 additions & 1 deletion doc/whats_new/v0.21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,11 @@ Support for Python 3.4 and below has been officially dropped.
and now it returns NaN and raises :class:`exceptions.UndefinedMetricWarning`.
:pr:`12855` by :user:`Pawel Sendyk <psendyk>`.

- |Fix| Fixed a bug in :func:`metrics.label_ranking_average_precision_score`
- |Fix| Fixed a bug where :func:`metrics.brier_score_loss` will sometimes
return incorrect result when there's only one class in ``y_true``.
:pr:`13628` by :user:`Hanmin Qin <qinhanmin2014>`.

- |Fix| Fixed a bug in :func:`metrics.label_ranking_average_precision_score`
where sample_weight wasn't taken into account for samples with degenerate
labels.
:pr:`13447` by :user:`Dan Ellis <dpwe>`.
Expand Down
8 changes: 6 additions & 2 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from .isotonic import IsotonicRegression
from .svm import LinearSVC
from .model_selection import check_cv
from .metrics.classification import _check_binary_probabilistic_predictions


class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
Expand Down Expand Up @@ -572,14 +571,19 @@ def calibration_curve(y_true, y_prob, normalize=False, n_bins=5,
"""
y_true = column_or_1d(y_true)
y_prob = column_or_1d(y_prob)
check_consistent_length(y_true, y_prob)

if normalize: # Normalize predicted values into interval [0, 1]
y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min())
elif y_prob.min() < 0 or y_prob.max() > 1:
raise ValueError("y_prob has values outside [0, 1] and normalize is "
"set to False.")

y_true = _check_binary_probabilistic_predictions(y_true, y_prob)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we no longer validating y_prob?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

y_prob is already validated above.

labels = np.unique(y_true)
if len(labels) > 2:
raise ValueError("Only binary classification is supported. "
"Provided labels %s." % labels)
y_true = label_binarize(y_true, labels)[:, 0]

if strategy == 'quantile': # Determine bin edges by distribution of data
quantiles = np.linspace(0, 1, n_bins + 1)
Expand Down
47 changes: 23 additions & 24 deletions sklearn/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix

from ..preprocessing import LabelBinarizer, label_binarize
from ..preprocessing import LabelBinarizer
from ..preprocessing import LabelEncoder
from ..utils import assert_all_finite
from ..utils import check_array
Expand Down Expand Up @@ -2301,25 +2301,6 @@ def hinge_loss(y_true, pred_decision, labels=None, sample_weight=None):
return np.average(losses, weights=sample_weight)


def _check_binary_probabilistic_predictions(y_true, y_prob):
"""Check that y_true is binary and y_prob contains valid probabilities"""
check_consistent_length(y_true, y_prob)

labels = np.unique(y_true)

if len(labels) > 2:
raise ValueError("Only binary classification is supported. "
"Provided labels %s." % labels)

if y_prob.max() > 1:
raise ValueError("y_prob contains values greater than 1.")

if y_prob.min() < 0:
raise ValueError("y_prob contains values less than 0.")

return label_binarize(y_true, labels)[:, 0]


def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
"""Compute the Brier score.
The smaller the Brier score, the better, hence the naming with "loss".
Expand Down Expand Up @@ -2353,8 +2334,9 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
Sample weights.

pos_label : int or str, default=None
Label of the positive class. If None, the maximum label is used as
positive class
Label of the positive class.
Defaults to the greater label unless y_true is all 0 or all -1
in which case pos_label defaults to 1.

Returns
-------
Expand Down Expand Up @@ -2389,8 +2371,25 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
assert_all_finite(y_prob)
check_consistent_length(y_true, y_prob, sample_weight)

labels = np.unique(y_true)
if len(labels) > 2:
raise ValueError("Only binary classification is supported. "
"Labels in y_true: %s." % labels)
if y_prob.max() > 1:
raise ValueError("y_prob contains values greater than 1.")
if y_prob.min() < 0:
raise ValueError("y_prob contains values less than 0.")

# if pos_label=None, when y_true is in {-1, 1} or {0, 1},
# pos_labe is set to 1 (consistent with precision_recall_curve/roc_curve),
# otherwise pos_label is set to the greater label
# (different from precision_recall_curve/roc_curve,
# the purpose is to keep backward compatibility).
if pos_label is None:
pos_label = y_true.max()
if (np.array_equal(labels, [0]) or
np.array_equal(labels, [-1])):
pos_label = 1
else:
pos_label = y_true.max()
y_true = np.array(y_true == pos_label, int)
y_true = _check_binary_probabilistic_predictions(y_true, y_prob)
return np.average((y_true - y_prob) ** 2, weights=sample_weight)
11 changes: 8 additions & 3 deletions sklearn/metrics/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,13 +469,16 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None,
Parameters
----------
y_true : array, shape = [n_samples]
True targets of binary classification in range {-1, 1} or {0, 1}.
True binary labels. If labels are not either {-1, 1} or {0, 1}, then
pos_label should be explicitly given.

probas_pred : array, shape = [n_samples]
Estimated probabilities or decision function.

pos_label : int or str, default=None
The label of the positive class
The label of the positive class.
When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},
``pos_label`` is set to 1, otherwise an error will be raised.

sample_weight : array-like of shape = [n_samples], optional
Sample weights.
Expand Down Expand Up @@ -552,7 +555,9 @@ def roc_curve(y_true, y_score, pos_label=None, sample_weight=None,
(as returned by "decision_function" on some classifiers).

pos_label : int or str, default=None
Label considered as positive and others are considered negative.
The label of the positive class.
When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},
``pos_label`` is set to 1, otherwise an error will be raised.

sample_weight : array-like of shape = [n_samples], optional
Sample weights.
Expand Down
20 changes: 17 additions & 3 deletions sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1997,9 +1997,23 @@ def test_brier_score_loss():
assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:])
assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.)
assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
# calculate even if only single class in y_true (#6980)
assert_almost_equal(brier_score_loss([0], [0.5]), 0.25)
assert_almost_equal(brier_score_loss([1], [0.5]), 0.25)

# ensure to raise an error for multiclass y_true
y_true = np.array([0, 1, 2, 0])
y_pred = np.array([0.8, 0.6, 0.4, 0.2])
error_message = ("Only binary classification is supported. Labels "
"in y_true: {}".format(np.array([0, 1, 2])))
assert_raise_message(ValueError, error_message, brier_score_loss,
y_true, y_pred)

# calculate correctly when there's only one class in y_true
assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16)
assert_almost_equal(brier_score_loss([0], [0.4]), 0.16)
assert_almost_equal(brier_score_loss([1], [0.4]), 0.36)
assert_almost_equal(
brier_score_loss(['foo'], [0.4], pos_label='bar'), 0.16)
assert_almost_equal(
brier_score_loss(['foo'], [0.4], pos_label='foo'), 0.36)


def test_balanced_accuracy_score_unseen():
Expand Down