From 132ab73e2fe14eec9bb3c29084f16b2570565781 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 23 Oct 2017 16:42:00 +0800 Subject: [PATCH 1/8] average precision score pos_label --- sklearn/metrics/ranking.py | 29 ++++++++++++++++++++------- sklearn/metrics/tests/test_common.py | 21 +++++++++++-------- sklearn/metrics/tests/test_ranking.py | 14 +++++++++++++ 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index bc18b248cadfe..4fad02b9dfdfb 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -23,6 +23,7 @@ import numpy as np from scipy.sparse import csr_matrix from scipy.stats import rankdata +from functools import partial from ..utils import assert_all_finite from ..utils import check_consistent_length @@ -125,7 +126,7 @@ def auc(x, y, reorder='deprecated'): return area -def average_precision_score(y_true, y_score, average="macro", +def average_precision_score(y_true, y_score, average="macro", pos_label=None, sample_weight=None): """Compute average precision (AP) from prediction scores @@ -204,18 +205,31 @@ def average_precision_score(y_true, y_score, average="macro", """ def _binary_uninterpolated_average_precision( - y_true, y_score, sample_weight=None): + y_true, y_score, pos_label=None, sample_weight=None): precision, recall, thresholds = precision_recall_curve( - y_true, y_score, sample_weight=sample_weight) + y_true, y_score, pos_label=pos_label, sample_weight=sample_weight) # Return the step function integral # The following works because the last entry of precision is # guaranteed to be 1, as returned by precision_recall_curve return -np.sum(np.diff(recall) * np.array(precision)[:-1]) - return _average_binary_score(_binary_uninterpolated_average_precision, - y_true, y_score, average, - sample_weight=sample_weight) - + y_type = type_of_target(y_true) + if y_type == "binary": + _partial_binary_uninterpolated_average_precision = partial( + _binary_uninterpolated_average_precision, + pos_label=pos_label) + return _average_binary_score( + _partial_binary_uninterpolated_average_precision, y_true, + y_score, average, sample_weight=sample_weight) + else: + if pos_label is not None and pos_label != 1: + raise ValueError("Parameter pos_label is fixed to 1 for " + "multilabel-indicator y_true. Do not set " + "pos_label or set pos_label to either None " + "or 1.") + return _average_binary_score( + _binary_uninterpolated_average_precision, y_true, y_score, + average, sample_weight=sample_weight) def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): @@ -468,6 +482,7 @@ def precision_recall_curve(y_true, probas_pred, pos_label=None, sample_weight=sample_weight) precision = tps / (tps + fps) + precision[np.isnan(precision)] = 0 recall = tps / tps[-1] # stop when full recall attained diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index e68f4024b24af..b60d0642d4ed2 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -200,13 +200,6 @@ "samples_precision_score", "samples_recall_score", "coverage_error", - - "average_precision_score", - "weighted_average_precision_score", - "micro_average_precision_score", - "macro_average_precision_score", - "samples_average_precision_score", - "label_ranking_loss", "label_ranking_average_precision_score", ] @@ -222,6 +215,12 @@ "macro_roc_auc", "samples_roc_auc", + "average_precision_score", + "weighted_average_precision_score", + "micro_average_precision_score", + "macro_average_precision_score", + "samples_average_precision_score", + # with default average='binary', multiclass is prohibited "precision_score", "recall_score", @@ -252,6 +251,12 @@ "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", + "average_precision_score", + "weighted_average_precision_score", + "micro_average_precision_score", + "macro_average_precision_score", + "samples_average_precision_score", + # pos_label support deprecated; to be removed in 0.18: "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", "weighted_precision_score", "weighted_recall_score", @@ -611,7 +616,7 @@ def test_invariance_string_vs_numbers_labels(): err_msg="{0} failed string vs number " "invariance test".format(name)) - measure_with_strobj = metric(y1_str.astype('O'), y2) + measure_with_strobj = metric_str(y1_str.astype('O'), y2) assert_array_equal(measure_with_number, measure_with_strobj, err_msg="{0} failed string object vs number " "invariance test".format(name)) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index acdec8932211c..75d4349f1dacc 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -668,6 +668,20 @@ def test_average_precision_constant_values(): assert_equal(average_precision_score(y_true, y_score), .25) +def test_roc_auc_score_pos_label_multilabel_indicator(): + # Raise an error for multilabel-indicator y_true with + # pos_label other than None or 1 + y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) + y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]]) + average_precision_score_1 = average_precision_score(y_true, y_pred, + pos_label=None) + average_precision_score_2 = average_precision_score(y_true, y_pred, + pos_label=1) + assert_almost_equal(average_precision_score_1, average_precision_score_2) + assert_raises(ValueError, average_precision_score, y_true, y_pred, + pos_label=0) + + def test_score_scale_invariance(): # Test that average_precision_score and roc_auc_score are invariant by # the scaling or shifting of probabilities From 7123d4da9193bef340154c932c0fb5a8629ee57c Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 23 Oct 2017 18:53:52 +0800 Subject: [PATCH 2/8] minor improve --- sklearn/metrics/ranking.py | 13 ++++++++----- sklearn/metrics/tests/test_ranking.py | 9 ++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 4fad02b9dfdfb..6fc80b4a0a8a4 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -126,7 +126,7 @@ def auc(x, y, reorder='deprecated'): return area -def average_precision_score(y_true, y_score, average="macro", pos_label=None, +def average_precision_score(y_true, y_score, average="macro", pos_label=1, sample_weight=None): """Compute average precision (AP) from prediction scores @@ -151,7 +151,7 @@ def average_precision_score(y_true, y_score, average="macro", pos_label=None, Parameters ---------- y_true : array, shape = [n_samples] or [n_samples, n_classes] - True binary labels (either {0, 1} or {-1, 1}). + True binary labels or binary label indicators. y_score : array, shape = [n_samples] or [n_samples, n_classes] Target scores, can either be probability estimates of the positive @@ -174,6 +174,10 @@ def average_precision_score(y_true, y_score, average="macro", pos_label=None, ``'samples'``: Calculate metrics for each instance, and find their average. + pos_label : int or str (default=1) + The label of the positive class. For multilabel-indicator y_true, + pos_label is fixed to 1. + sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -222,11 +226,10 @@ def _binary_uninterpolated_average_precision( _partial_binary_uninterpolated_average_precision, y_true, y_score, average, sample_weight=sample_weight) else: - if pos_label is not None and pos_label != 1: + if pos_label != 1: raise ValueError("Parameter pos_label is fixed to 1 for " "multilabel-indicator y_true. Do not set " - "pos_label or set pos_label to either None " - "or 1.") + "pos_label or set pos_label to 1.") return _average_binary_score( _binary_uninterpolated_average_precision, y_true, y_score, average, sample_weight=sample_weight) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 75d4349f1dacc..8720a59f23ca7 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -668,16 +668,11 @@ def test_average_precision_constant_values(): assert_equal(average_precision_score(y_true, y_score), .25) -def test_roc_auc_score_pos_label_multilabel_indicator(): +def test_average_precision_score_pos_label_multilabel_indicator(): # Raise an error for multilabel-indicator y_true with - # pos_label other than None or 1 + # pos_label other than 1 y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]]) - average_precision_score_1 = average_precision_score(y_true, y_pred, - pos_label=None) - average_precision_score_2 = average_precision_score(y_true, y_pred, - pos_label=1) - assert_almost_equal(average_precision_score_1, average_precision_score_2) assert_raises(ValueError, average_precision_score, y_true, y_pred, pos_label=0) From 846dada2ea9a62cafee1bbfc0c28b8ebcfe76212 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 23 Oct 2017 18:59:33 +0800 Subject: [PATCH 3/8] minor improve --- sklearn/metrics/tests/test_ranking.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 8720a59f23ca7..c8c737bf6b244 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -673,8 +673,11 @@ def test_average_precision_score_pos_label_multilabel_indicator(): # pos_label other than 1 y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]]) - assert_raises(ValueError, average_precision_score, y_true, y_pred, - pos_label=0) + erorr_message = ("Parameter pos_label is fixed to 1 for multilabel" + "-indicator y_true. Do not set pos_label or set " + "pos_label to 1.") + assert_raise_message(ValueError, erorr_message, average_precision_score, + y_true, y_pred, pos_label=0) def test_score_scale_invariance(): From d61a4ae962854a92e507f5f43a55e2a7022c6d2e Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 23 Oct 2017 19:42:18 +0800 Subject: [PATCH 4/8] minor improve --- sklearn/metrics/ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 6fc80b4a0a8a4..3f950074eecfd 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -209,7 +209,7 @@ def average_precision_score(y_true, y_score, average="macro", pos_label=1, """ def _binary_uninterpolated_average_precision( - y_true, y_score, pos_label=None, sample_weight=None): + y_true, y_score, pos_label=1, sample_weight=None): precision, recall, thresholds = precision_recall_curve( y_true, y_score, pos_label=pos_label, sample_weight=sample_weight) # Return the step function integral From 7b92714d6fcab997c4e83acf1c15a7c7bf86be1b Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sun, 10 Dec 2017 09:52:41 +0800 Subject: [PATCH 5/8] jnothman's comment --- sklearn/metrics/ranking.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index a4f8e7173c1d1..e8db78ccf787d 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -175,8 +175,8 @@ def average_precision_score(y_true, y_score, average="macro", pos_label=1, Calculate metrics for each instance, and find their average. pos_label : int or str (default=1) - The label of the positive class. For multilabel-indicator y_true, - pos_label is fixed to 1. + The label of the positive class. Only applied to binary ``y_true``. + For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1. sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -218,21 +218,15 @@ def _binary_uninterpolated_average_precision( return -np.sum(np.diff(recall) * np.array(precision)[:-1]) y_type = type_of_target(y_true) - if y_type == "binary": - _partial_binary_uninterpolated_average_precision = partial( - _binary_uninterpolated_average_precision, - pos_label=pos_label) - return _average_binary_score( - _partial_binary_uninterpolated_average_precision, y_true, - y_score, average, sample_weight=sample_weight) - else: - if pos_label != 1: - raise ValueError("Parameter pos_label is fixed to 1 for " - "multilabel-indicator y_true. Do not set " - "pos_label or set pos_label to 1.") - return _average_binary_score( - _binary_uninterpolated_average_precision, y_true, y_score, - average, sample_weight=sample_weight) + if y_type == "multilabel-indicator" and pos_label != 1: + raise ValueError("Parameter pos_label is fixed to 1 for " + "multilabel-indicator y_true. Do not set " + "pos_label or set pos_label to 1.") + average_precision = partial(_binary_uninterpolated_average_precision, + pos_label=pos_label) + return _average_binary_score(average_precision, y_true, y_score, + average, sample_weight=sample_weight) + def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) From 652d296f4de22de91ccfdab85a413d469b52b441 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 16 Jan 2018 21:58:24 +0800 Subject: [PATCH 6/8] what's new --- doc/whats_new/v0.20.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 7e7d39dbf1759..a9721c624b928 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -149,6 +149,10 @@ Metrics - :func:`metrics.roc_auc_score` now supports binary ``y_true`` other than ``{0, 1}`` or ``{-1, 1}``. :issue:`9828` by :user:`Hanmin Qin `. +- :func:`metrics.average_precision_score` now supports binary ``y_true`` + other than ``{0, 1}`` or ``{-1, 1}`` through ``pos_label`` parameter. + :issue:`9980` by :user:`Hanmin Qin `. + Linear, kernelized and related models - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the @@ -283,6 +287,10 @@ Metrics :func:`mutual_info_score`. :issue:`9772` by :user:`Kumar Ashutosh `. +- Fixed a bug where :func:`metrics.average_precision_score` will sometimes return + ``nan`` when ``sample_weight`` contains 0. + :issue:`9980` by :user:`Hanmin Qin `. + Neighbors - Fixed a bug so ``predict`` in :class:`neighbors.RadiusNeighborsRegressor` can From 2e325652e1590b81546f0c2f3b20002e39587d1d Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Mon, 16 Jul 2018 16:43:38 +0200 Subject: [PATCH 7/8] COSMIT: stdlib import come first --- sklearn/metrics/ranking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 21feaefb48c0b..11fbd330f52b6 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -20,10 +20,11 @@ from __future__ import division import warnings +from functools import partial + import numpy as np from scipy.sparse import csr_matrix from scipy.stats import rankdata -from functools import partial from ..utils import assert_all_finite from ..utils import check_consistent_length From 9f45954d588c96ce1c060d390160233f8f5ea65b Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 16 Jul 2018 22:58:08 +0800 Subject: [PATCH 8/8] remove unused variable --- sklearn/metrics/ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 11fbd330f52b6..fd6e28a20ae0c 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -216,7 +216,7 @@ def average_precision_score(y_true, y_score, average="macro", pos_label=1, """ def _binary_uninterpolated_average_precision( y_true, y_score, pos_label=1, sample_weight=None): - precision, recall, thresholds = precision_recall_curve( + precision, recall, _ = precision_recall_curve( y_true, y_score, pos_label=pos_label, sample_weight=sample_weight) # Return the step function integral # The following works because the last entry of precision is