diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 92794b8854e64..ba095eedf1331 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -480,7 +480,11 @@ Support for Python 3.4 and below has been officially dropped. and now it returns NaN and raises :class:`exceptions.UndefinedMetricWarning`. :pr:`12855` by :user:`Pawel Sendyk `. -- |Fix| Fixed a bug in :func:`metrics.label_ranking_average_precision_score` +- |Fix| Fixed a bug where :func:`metrics.brier_score_loss` will sometimes + return incorrect result when there's only one class in ``y_true``. + :pr:`13628` by :user:`Hanmin Qin `. + +- |Fix| Fixed a bug in :func:`metrics.label_ranking_average_precision_score` where sample_weight wasn't taken into account for samples with degenerate labels. :pr:`13447` by :user:`Dan Ellis `. diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 434d30a557540..1502875145e51 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -25,7 +25,6 @@ from .isotonic import IsotonicRegression from .svm import LinearSVC from .model_selection import check_cv -from .metrics.classification import _check_binary_probabilistic_predictions class CalibratedClassifierCV(BaseEstimator, ClassifierMixin): @@ -572,6 +571,7 @@ def calibration_curve(y_true, y_prob, normalize=False, n_bins=5, """ y_true = column_or_1d(y_true) y_prob = column_or_1d(y_prob) + check_consistent_length(y_true, y_prob) if normalize: # Normalize predicted values into interval [0, 1] y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min()) @@ -579,7 +579,11 @@ def calibration_curve(y_true, y_prob, normalize=False, n_bins=5, raise ValueError("y_prob has values outside [0, 1] and normalize is " "set to False.") - y_true = _check_binary_probabilistic_predictions(y_true, y_prob) + labels = np.unique(y_true) + if len(labels) > 2: + raise ValueError("Only binary classification is supported. " + "Provided labels %s." % labels) + y_true = label_binarize(y_true, labels)[:, 0] if strategy == 'quantile': # Determine bin edges by distribution of data quantiles = np.linspace(0, 1, n_bins + 1) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index fad416a7fc0ff..7f3347716d3fa 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -28,7 +28,7 @@ from scipy.sparse import coo_matrix from scipy.sparse import csr_matrix -from ..preprocessing import LabelBinarizer, label_binarize +from ..preprocessing import LabelBinarizer from ..preprocessing import LabelEncoder from ..utils import assert_all_finite from ..utils import check_array @@ -2301,25 +2301,6 @@ def hinge_loss(y_true, pred_decision, labels=None, sample_weight=None): return np.average(losses, weights=sample_weight) -def _check_binary_probabilistic_predictions(y_true, y_prob): - """Check that y_true is binary and y_prob contains valid probabilities""" - check_consistent_length(y_true, y_prob) - - labels = np.unique(y_true) - - if len(labels) > 2: - raise ValueError("Only binary classification is supported. " - "Provided labels %s." % labels) - - if y_prob.max() > 1: - raise ValueError("y_prob contains values greater than 1.") - - if y_prob.min() < 0: - raise ValueError("y_prob contains values less than 0.") - - return label_binarize(y_true, labels)[:, 0] - - def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): """Compute the Brier score. The smaller the Brier score, the better, hence the naming with "loss". @@ -2353,8 +2334,9 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): Sample weights. pos_label : int or str, default=None - Label of the positive class. If None, the maximum label is used as - positive class + Label of the positive class. + Defaults to the greater label unless y_true is all 0 or all -1 + in which case pos_label defaults to 1. Returns ------- @@ -2389,8 +2371,25 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): assert_all_finite(y_prob) check_consistent_length(y_true, y_prob, sample_weight) + labels = np.unique(y_true) + if len(labels) > 2: + raise ValueError("Only binary classification is supported. " + "Labels in y_true: %s." % labels) + if y_prob.max() > 1: + raise ValueError("y_prob contains values greater than 1.") + if y_prob.min() < 0: + raise ValueError("y_prob contains values less than 0.") + + # if pos_label=None, when y_true is in {-1, 1} or {0, 1}, + # pos_labe is set to 1 (consistent with precision_recall_curve/roc_curve), + # otherwise pos_label is set to the greater label + # (different from precision_recall_curve/roc_curve, + # the purpose is to keep backward compatibility). if pos_label is None: - pos_label = y_true.max() + if (np.array_equal(labels, [0]) or + np.array_equal(labels, [-1])): + pos_label = 1 + else: + pos_label = y_true.max() y_true = np.array(y_true == pos_label, int) - y_true = _check_binary_probabilistic_predictions(y_true, y_prob) return np.average((y_true - y_prob) ** 2, weights=sample_weight) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index cc06db01a6acc..9d72be290de9a 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -469,13 +469,16 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None, Parameters ---------- y_true : array, shape = [n_samples] - True targets of binary classification in range {-1, 1} or {0, 1}. + True binary labels. If labels are not either {-1, 1} or {0, 1}, then + pos_label should be explicitly given. probas_pred : array, shape = [n_samples] Estimated probabilities or decision function. pos_label : int or str, default=None - The label of the positive class + The label of the positive class. + When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1}, + ``pos_label`` is set to 1, otherwise an error will be raised. sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -552,7 +555,9 @@ def roc_curve(y_true, y_score, pos_label=None, sample_weight=None, (as returned by "decision_function" on some classifiers). pos_label : int or str, default=None - Label considered as positive and others are considered negative. + The label of the positive class. + When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1}, + ``pos_label`` is set to 1, otherwise an error will be raised. sample_weight : array-like of shape = [n_samples], optional Sample weights. diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index df192ef19b146..1b45c974e7e30 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1997,9 +1997,23 @@ def test_brier_score_loss(): assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:]) assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.) assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.) - # calculate even if only single class in y_true (#6980) - assert_almost_equal(brier_score_loss([0], [0.5]), 0.25) - assert_almost_equal(brier_score_loss([1], [0.5]), 0.25) + + # ensure to raise an error for multiclass y_true + y_true = np.array([0, 1, 2, 0]) + y_pred = np.array([0.8, 0.6, 0.4, 0.2]) + error_message = ("Only binary classification is supported. Labels " + "in y_true: {}".format(np.array([0, 1, 2]))) + assert_raise_message(ValueError, error_message, brier_score_loss, + y_true, y_pred) + + # calculate correctly when there's only one class in y_true + assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16) + assert_almost_equal(brier_score_loss([0], [0.4]), 0.16) + assert_almost_equal(brier_score_loss([1], [0.4]), 0.36) + assert_almost_equal( + brier_score_loss(['foo'], [0.4], pos_label='bar'), 0.16) + assert_almost_equal( + brier_score_loss(['foo'], [0.4], pos_label='foo'), 0.36) def test_balanced_accuracy_score_unseen():