From b122492625065d8b0c083bc9e0755fb4b7d7dcc1 Mon Sep 17 00:00:00 2001 From: Maximilian Soelch Date: Wed, 20 Apr 2016 22:47:34 +0200 Subject: [PATCH 1/2] fix ROC for low variance scores --- sklearn/metrics/ranking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index da75465597f69..4f101dd06778b 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -329,7 +329,8 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): # We need to use isclose to avoid spurious repeated thresholds # stemming from floating point roundoff errors. distinct_value_indices = np.where(np.logical_not(isclose( - np.diff(y_score), 0)))[0] + np.diff(y_score), 0, atol=0.0, rtol=0.0)))[0] + threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] # accumulate the true positives with decreasing threshold From dd23495f702782ddd408e3944d07ada10b4111e4 Mon Sep 17 00:00:00 2001 From: Maximilian Soelch Date: Thu, 21 Apr 2016 09:50:41 +0200 Subject: [PATCH 2/2] adjust docu after changes in ROC --- sklearn/metrics/ranking.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 4f101dd06778b..17ffd53442143 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -15,6 +15,7 @@ # Lars Buitinck # Joel Nothman # Noel Dawe +# Maximilian Soelch # License: BSD 3 clause from __future__ import division @@ -328,6 +329,8 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): # concatenate a value for the end of the curve. # We need to use isclose to avoid spurious repeated thresholds # stemming from floating point roundoff errors. + # Setting the tolerances to higher values than 0.0 will cause unexpected + # behavior for low variance y_score arrays. distinct_value_indices = np.where(np.logical_not(isclose( np.diff(y_score), 0, atol=0.0, rtol=0.0)))[0]