From 00e38aa19aa8733eac57edac21e41fe9d075cd5f Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Wed, 15 Jul 2015 09:08:25 -0700 Subject: [PATCH 01/10] Added DET curve to classification metrics. --- sklearn/metrics/ranking.py | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 793c5a5b3d0ca..a19ec0f2654a5 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -15,6 +15,7 @@ # Lars Buitinck # Joel Nothman # Noel Dawe +# Jeremy Karnowski # License: BSD 3 clause from __future__ import division @@ -183,6 +184,83 @@ def _binary_average_precision(y_true, y_score, sample_weight=None): return _average_binary_score(_binary_average_precision, y_true, y_score, average, sample_weight=sample_weight) +def detection_error_tradeoff(y_true, probas_pred, pos_label=None, + sample_weight=None): + """Compute error rates for different probability thresholds + + Note: this implementation is restricted to the binary classification task. + + Parameters + ---------- + y_true : array, shape = [n_samples] + True targets of binary classification in range {-1, 1} or {0, 1}. + + probas_pred : array, shape = [n_samples] + Estimated probabilities or decision function. + + pos_label : int, optional (default=None) + The label of the positive class + + sample_weight : array-like of shape = [n_samples], optional + Sample weights. + + Returns + ------- + fps : array, shape = [n_thresholds] + A count of false positives, at index i being the number of negative + samples assigned a score >= thresholds[i]. The total number of + negative samples is equal to fps[-1] (thus true negatives are given by + fps[-1] - fps). + + fns : array, shape = [n_thresholds] + A count of false negatives, at index i being the number of positive + samples assigned a score < thresholds[i]. The total number of + positive samples is equal to tps[-1] (thus false negatives are given by + tps[-1] - tps). + + thresholds : array, shape = [n_thresholds] + Decreasing score values. + + References + ---------- + .. [1] `Wikipedia entry for Detection error tradeoff + `_ + .. [2] `The DET Curve in Assessment of Detection Task Performance + `_ + .. [3] `2008 NIST Speaker Recognition Evaluation Results + `_ + .. [4] `DET-Curve Plotting software for use with MATLAB + `_ + + Examples + -------- + >>> import numpy as np + >>> from sklearn.metrics import detection_error_tradeoff + >>> y_true = np.array([0, 0, 1, 1]) + >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) + >>> fps, fns, thresholds = detection_error_tradeoff(y_true, y_scores) + >>> fps + array([ 1., 1., 0.]) + >>> fns + array([ 0., 1., 1.]) + >>> thresholds + array([ 0.35, 0.4 , 0.8 ]) + + """ + fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred, + pos_label=pos_label, + sample_weight=sample_weight) + fns = tps[-1] - tps + tpN = tps[-1] + tnN = (fps[-1]-fps)[0] + + # start with false positives is zero and stop with false negatives zero + # and reverse the outputs so list of false positives is decreasing + last_ind = tps.searchsorted(tps[-1])+1 + first_ind = fps[::-1].searchsorted(fps[0]) + sl = range(first_ind,last_ind)[::-1] + return fps[sl]/tpN, fns[sl]/tnN, thresholds[sl] + def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): """Compute Area Under the Curve (AUC) from prediction scores From dce415acde56b8957d38648a68b2c5a61f96e922 Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Wed, 15 Jul 2015 09:27:56 -0700 Subject: [PATCH 02/10] Fixed example output. --- sklearn/metrics/ranking.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index a19ec0f2654a5..22eb38377392c 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -240,9 +240,9 @@ def detection_error_tradeoff(y_true, probas_pred, pos_label=None, >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) >>> fps, fns, thresholds = detection_error_tradeoff(y_true, y_scores) >>> fps - array([ 1., 1., 0.]) + array([ 0.5, 0.5, 0. ]) >>> fns - array([ 0., 1., 1.]) + array([ 0. , 0.5, 0.5]) >>> thresholds array([ 0.35, 0.4 , 0.8 ]) From fe22cb6c99d4384fc7f88819ef4af9a499d2d1ed Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Wed, 15 Jul 2015 09:44:08 -0700 Subject: [PATCH 03/10] Adding necessary import in metrics.py --- sklearn/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index cae8f9b6c7d03..958b2d275da87 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -12,6 +12,7 @@ from .ranking import precision_recall_curve from .ranking import roc_auc_score from .ranking import roc_curve +from .ranking import detection_error_tradeoff from .classification import accuracy_score from .classification import classification_report From 3461f9494116dec34c26851b0cde1b87a99f3d71 Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Wed, 15 Jul 2015 09:54:59 -0700 Subject: [PATCH 04/10] Added metric to init --- sklearn/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 958b2d275da87..afbd5aedb0f80 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -7,6 +7,7 @@ from .ranking import auc from .ranking import average_precision_score from .ranking import coverage_error +from .ranking import detection_error_tradeoff from .ranking import label_ranking_average_precision_score from .ranking import label_ranking_loss from .ranking import precision_recall_curve From 77fc3ce68f0e763467db06eff9a08ad6b948498c Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Wed, 15 Jul 2015 09:55:25 -0700 Subject: [PATCH 05/10] Put metric in alphabetical position. --- sklearn/metrics/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index afbd5aedb0f80..296578d63942d 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -13,7 +13,6 @@ from .ranking import precision_recall_curve from .ranking import roc_auc_score from .ranking import roc_curve -from .ranking import detection_error_tradeoff from .classification import accuracy_score from .classification import classification_report From 6592d6ccd675b6e58413cd4e983c0b199279763d Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Fri, 24 Jul 2015 21:07:08 -0700 Subject: [PATCH 06/10] Added white space as per PEP8 and modified variable names for counts. --- sklearn/metrics/ranking.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 22eb38377392c..26c25d6bdf506 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -251,15 +251,15 @@ def detection_error_tradeoff(y_true, probas_pred, pos_label=None, pos_label=pos_label, sample_weight=sample_weight) fns = tps[-1] - tps - tpN = tps[-1] - tnN = (fps[-1]-fps)[0] + tp_count = tps[-1] + tn_count = (fps[-1] - fps)[0] # start with false positives is zero and stop with false negatives zero # and reverse the outputs so list of false positives is decreasing - last_ind = tps.searchsorted(tps[-1])+1 + last_ind = tps.searchsorted(tps[-1]) + 1 first_ind = fps[::-1].searchsorted(fps[0]) - sl = range(first_ind,last_ind)[::-1] - return fps[sl]/tpN, fns[sl]/tnN, thresholds[sl] + sl = range(first_ind, last_ind)[::-1] + return fps[sl] / tp_count, fns[sl] / tn_count, thresholds[sl] def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): From fb31e57548285dcd8286060cb655679ddddecf73 Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Fri, 24 Jul 2015 21:08:34 -0700 Subject: [PATCH 07/10] Added another blank line above function as per PEP8. --- sklearn/metrics/ranking.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 26c25d6bdf506..f548553a75c2d 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -184,6 +184,7 @@ def _binary_average_precision(y_true, y_score, sample_weight=None): return _average_binary_score(_binary_average_precision, y_true, y_score, average, sample_weight=sample_weight) + def detection_error_tradeoff(y_true, probas_pred, pos_label=None, sample_weight=None): """Compute error rates for different probability thresholds From 9f3e7fc9dc2c93f4eee464994d5864a49303877f Mon Sep 17 00:00:00 2001 From: Jeremy Karnowski Date: Fri, 7 Aug 2015 15:26:46 -0700 Subject: [PATCH 08/10] Added barebones information about DET into documentation. --- doc/modules/model_evaluation.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index db7b59d6c1d3a..37a720a3d130b 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1001,6 +1001,34 @@ if the predicted outputs have been binarized. for an example of using ROC to model species distribution. +.. _det_curve: + +Detection error tradeoff (DET) +--------------------------------------- + +The function :func:`detection_error_tradeoff` computes the +`detection error tradeoff curve, or DET curve `_. +Quoting Wikipedia : + + "A detection error tradeoff (DET) graph is a graphical plot of error rates for binary classification systems, plotting false reject rate vs. false accept rate. The x- and y-axes are scaled non-linearly by their standard normal deviates (or just by logarithmic transformation), yielding tradeoff curves that are more linear than ROC curves, and use most of the image area to highlight the differences of importance in the critical operating region." + +This function requires the true binary +value and the target scores, which can either be probability estimates of the +positive class, confidence values, or binary decisions. +Here is a small example of how to use the :func:`detection_error_tradeoff` function:: + + >>> import numpy as np + >>> from sklearn.metrics import det_error_tradeoff + >>> y = np.array([1, 1, 2, 2]) + >>> scores = np.array([0.1, 0.4, 0.35, 0.8]) + >>> fpr, tpr, thresholds = error_detection_tradeoff(y, scores, pos_label=2) + >>> fpr + array([ 0.5, 0.5, 0. ]) + >>> fnr + array([ 0. , 0.5, 0.5]) + >>> thresholds + array([ 0.35, 0.4 , 0.8 ]) + .. _zero_one_loss: Zero one loss From 2e2f9994cae02126dc2a4ba13cf2e247725e9bdc Mon Sep 17 00:00:00 2001 From: Julien Cornebise Date: Sun, 9 Apr 2017 17:11:08 +0100 Subject: [PATCH 09/10] Add DET to exports --- sklearn/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 296578d63942d..e49cc112a6f7d 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -75,6 +75,7 @@ 'confusion_matrix', 'consensus_score', 'coverage_error', + 'detection_error_tradeoff', 'euclidean_distances', 'explained_variance_score', 'f1_score', From 3fc7c30daf9956784b1ebf5f64ebbbe873fb3ec6 Mon Sep 17 00:00:00 2001 From: Julien Cornebise Date: Sun, 9 Apr 2017 17:17:01 +0100 Subject: [PATCH 10/10] Rename argument probas_pred -> y_score --- sklearn/metrics/ranking.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index f548553a75c2d..abec26ea90755 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -185,7 +185,7 @@ def _binary_average_precision(y_true, y_score, sample_weight=None): average, sample_weight=sample_weight) -def detection_error_tradeoff(y_true, probas_pred, pos_label=None, +def detection_error_tradeoff(y_true, y_score, pos_label=None, sample_weight=None): """Compute error rates for different probability thresholds @@ -196,7 +196,7 @@ def detection_error_tradeoff(y_true, probas_pred, pos_label=None, y_true : array, shape = [n_samples] True targets of binary classification in range {-1, 1} or {0, 1}. - probas_pred : array, shape = [n_samples] + y_score : array, shape = [n_samples] Estimated probabilities or decision function. pos_label : int, optional (default=None) @@ -248,7 +248,7 @@ def detection_error_tradeoff(y_true, probas_pred, pos_label=None, array([ 0.35, 0.4 , 0.8 ]) """ - fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred, + fps, tps, thresholds = _binary_clf_curve(y_true, y_score, pos_label=pos_label, sample_weight=sample_weight) fns = tps[-1] - tps