Skip to content

MNT make error message consistent in brier_score_loss #18183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Sep 29, 2020
49 changes: 49 additions & 0 deletions sklearn/metrics/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,52 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
pair_scores[ix] = (a_true_score + b_true_score) / 2

return np.average(pair_scores, weights=prevalence)


def _check_pos_label_consistency(pos_label, y_true):
"""Check if `pos_label` need to be specified or not.

In binary classification, we fix `pos_label=1` if the labels are in the set
{-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the
`pos_label` parameters.

Parameters
----------
pos_label : int, str or None
The positive label.
y_true : ndarray of shape (n_samples,)
The target vector.

Returns
-------
pos_label : int
If `pos_label` can be inferred, it will be returned.

Raises
------
ValueError
In the case that `y_true` does not have label in {-1, 1} or {0, 1},
it will raise a `ValueError`.
"""
# ensure binary classification if pos_label is not specified
# classes.dtype.kind in ('O', 'U', 'S') is required to avoid
# triggering a FutureWarning by calling np.array_equal(a, b)
# when elements in the two arrays are not comparable.
classes = np.unique(y_true)
if (pos_label is None and (
classes.dtype.kind in 'OUS' or
not (np.array_equal(classes, [0, 1]) or
np.array_equal(classes, [-1, 1]) or
np.array_equal(classes, [0]) or
np.array_equal(classes, [-1]) or
np.array_equal(classes, [1])))):
classes_repr = ", ".join(repr(c) for c in classes)
raise ValueError(
f"y_true takes value in {{{classes_repr}}} and pos_label is not "
f"specified: either make y_true take value in {{0, 1}} or "
f"{{-1, 1}} or pass pos_label explicitly."
)
elif pos_label is None:
pos_label = 1.0

return pos_label
43 changes: 26 additions & 17 deletions sklearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
from ..utils.sparsefuncs import count_nonzero
from ..exceptions import UndefinedMetricWarning

from ._base import _check_pos_label_consistency


def _check_zero_division(zero_division):
if isinstance(zero_division, str) and zero_division == "warn":
Expand Down Expand Up @@ -2437,9 +2439,14 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
Sample weights.

pos_label : int or str, default=None
Label of the positive class.
Defaults to the greater label unless y_true is all 0 or all -1
in which case pos_label defaults to 1.
Label of the positive class. `pos_label` will be infered in the
following manner:

* if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;
* else if `y_true` contains string, an error will be raised and
`pos_label` should be explicitely specified;
* otherwise, `pos_label` defaults to the greater label,
i.e. `np.unique(y_true)[-1]`.

Returns
-------
Expand Down Expand Up @@ -2473,25 +2480,27 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
assert_all_finite(y_prob)
check_consistent_length(y_true, y_prob, sample_weight)

labels = np.unique(y_true)
if len(labels) > 2:
raise ValueError("Only binary classification is supported. "
"Labels in y_true: %s." % labels)
y_type = type_of_target(y_true)
if y_type != "binary":
raise ValueError(
f"Only binary classification is supported. The type of the target "
f"is {y_type}."
)

if y_prob.max() > 1:
raise ValueError("y_prob contains values greater than 1.")
if y_prob.min() < 0:
raise ValueError("y_prob contains values less than 0.")

# if pos_label=None, when y_true is in {-1, 1} or {0, 1},
# pos_label is set to 1 (consistent with precision_recall_curve/roc_curve),
# otherwise pos_label is set to the greater label
# (different from precision_recall_curve/roc_curve,
# the purpose is to keep backward compatibility).
if pos_label is None:
if (np.array_equal(labels, [0]) or
np.array_equal(labels, [-1])):
pos_label = 1
try:
pos_label = _check_pos_label_consistency(pos_label, y_true)
except ValueError:
classes = np.unique(y_true)
if classes.dtype.kind not in ('O', 'U', 'S'):
# for backward compatibility, if classes are not string then
# `pos_label` will correspond to the greater label
pos_label = classes[-1]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not super clear why we accept this use case?

@jnothman Do you have the keys for this one?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we open an issue about this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should and see if this is legit. I would not block this PR for this, however :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should at least open an issue to discuss the possibility to deprecate this behavior this behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally I don't think it's legit. It's likely to cause hard to detect usage issues.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened issue here: #18381

else:
pos_label = y_true.max()
raise
y_true = np.array(y_true == pos_label, int)
return np.average((y_true - y_prob) ** 2, weights=sample_weight)
27 changes: 6 additions & 21 deletions sklearn/metrics/_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@
from ..preprocessing import label_binarize
from ..utils._encode import _encode, _unique

from ._base import _average_binary_score, _average_multiclass_ovo_score
from ._base import (
_average_binary_score,
_average_multiclass_ovo_score,
_check_pos_label_consistency,
)


def auc(x, y):
Expand Down Expand Up @@ -694,26 +698,7 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
if sample_weight is not None:
sample_weight = column_or_1d(sample_weight)

# ensure binary classification if pos_label is not specified
# classes.dtype.kind in ('O', 'U', 'S') is required to avoid
# triggering a FutureWarning by calling np.array_equal(a, b)
# when elements in the two arrays are not comparable.
classes = np.unique(y_true)
if (pos_label is None and (
classes.dtype.kind in ('O', 'U', 'S') or
not (np.array_equal(classes, [0, 1]) or
np.array_equal(classes, [-1, 1]) or
np.array_equal(classes, [0]) or
np.array_equal(classes, [-1]) or
np.array_equal(classes, [1])))):
classes_repr = ", ".join(repr(c) for c in classes)
raise ValueError("y_true takes value in {{{classes_repr}}} and "
"pos_label is not specified: either make y_true "
"take value in {{0, 1}} or {{-1, 1}} or "
"pass pos_label explicitly.".format(
classes_repr=classes_repr))
elif pos_label is None:
pos_label = 1.
pos_label = _check_pos_label_consistency(pos_label, y_true)

# make y_true a boolean vector
y_true = (y_true == pos_label)
Expand Down
10 changes: 6 additions & 4 deletions sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from itertools import chain
from itertools import permutations
import warnings
import re

import numpy as np
from scipy import linalg
Expand Down Expand Up @@ -2320,9 +2319,12 @@ def test_brier_score_loss():
# ensure to raise an error for multiclass y_true
y_true = np.array([0, 1, 2, 0])
y_pred = np.array([0.8, 0.6, 0.4, 0.2])
error_message = ("Only binary classification is supported. Labels "
"in y_true: {}".format(np.array([0, 1, 2])))
with pytest.raises(ValueError, match=re.escape(error_message)):
error_message = (
"Only binary classification is supported. The type of the target is "
"multiclass"
)

with pytest.raises(ValueError, match=error_message):
brier_score_loss(y_true, y_pred)

# calculate correctly when there's only one class in y_true
Expand Down
7 changes: 1 addition & 6 deletions sklearn/metrics/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1432,12 +1432,7 @@ def test_metrics_consistent_type_error(metric_name):
"metric, y_pred_threshold",
[
(average_precision_score, True),
# FIXME: `brier_score_loss` does not follow this convention.
# See discussion in:
# https://github.com/scikit-learn/scikit-learn/issues/18307
pytest.param(
brier_score_loss, True, marks=pytest.mark.xfail(reason="#18307")
),
(brier_score_loss, True),
(f1_score, False),
(partial(fbeta_score, beta=1), False),
(jaccard_score, False),
Expand Down