From 090037f13fe9b7af6496a37efb17a44e7f459da7 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Mon, 14 Nov 2011 21:13:34 -0500 Subject: [PATCH 01/15] added avg_f1_score --- sklearn/metrics/__init__.py | 2 +- sklearn/metrics/metrics.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index c8123b2343573..bff0e64cc5123 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -7,7 +7,7 @@ recall_score, fbeta_score, f1_score, zero_one_score, \ precision_recall_fscore_support, classification_report, \ precision_recall_curve, explained_variance_score, r2_score, \ - zero_one, mean_square_error, hinge_loss + zero_one, mean_square_error, hinge_loss, avg_f1_score from . import cluster from .cluster import adjusted_rand_score diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index e6f7c7ad12262..e69017f4c0b05 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -458,6 +458,11 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): return precision, recall, fscore, support +def avg_f1_score(y_true, y_pred): + """Return the average f1 score + """ + p, r, f1, support = precision_recall_fscore_support(y_true, y_pred) + return np.average(f1, weights=support) def classification_report(y_true, y_pred, labels=None, target_names=None): """Build a text report showing the main classification metrics From e546621f38a7e29f11770df8b27382849b77ba9c Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Mon, 14 Nov 2011 21:29:24 -0500 Subject: [PATCH 02/15] tst: added tests --- sklearn/metrics/metrics.py | 20 ++++++++++++++++++-- sklearn/metrics/tests/test_metrics.py | 4 ++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index e69017f4c0b05..dce6b3f4513b2 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -361,7 +361,7 @@ def f1_score(y_true, y_pred, pos_label=1): ------- f1_score : float f1_score of the positive class in binary classification or weighted - avergage of the f1_scores of each class for the multiclass task + average of the f1_scores of each class for the multiclass task References ---------- @@ -458,12 +458,28 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): return precision, recall, fscore, support + def avg_f1_score(y_true, y_pred): """Return the average f1 score + + Parameters + ---------- + y_true : array, shape = [n_samples] + true targets + + y_pred : array, shape = [n_samples] + estimated targets + + Returns + ------- + avg_f1_score : float + average of the f1_scores of each class for the multiclass task + """ - p, r, f1, support = precision_recall_fscore_support(y_true, y_pred) + _, _, f1, support = precision_recall_fscore_support(y_true, y_pred) return np.average(f1, weights=support) + def classification_report(y_true, y_pred, labels=None, target_names=None): """Build a text report showing the main classification metrics diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index 0fe5e6eae4942..ceef914bf1034 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -15,6 +15,7 @@ from ..metrics import explained_variance_score from ..metrics import r2_score from ..metrics import f1_score +from ..metrics import avg_f1_score from ..metrics import mean_square_error from ..metrics import precision_recall_curve from ..metrics import precision_recall_fscore_support @@ -140,6 +141,9 @@ def test_precision_recall_f1_score_binary(): fs = f1_score(y_true, y_pred) assert_array_almost_equal(fs, 0.74, 2) + afs = avg_f1_score(y_true, y_pred) + assert_array_almost_equal(afs, 0.74, decimal=2) + def test_confusion_matrix_binary(): """Test confusion matrix - binary classification case""" From df6010bb341b811d89befd3f48113ed517574cb5 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Mon, 14 Nov 2011 22:30:28 -0500 Subject: [PATCH 03/15] enh: added matthew's correlation coefficient --- sklearn/metrics/__init__.py | 3 ++- sklearn/metrics/metrics.py | 33 +++++++++++++++++++++++++++ sklearn/metrics/tests/test_metrics.py | 15 ++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index bff0e64cc5123..ff5baef9e87f9 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -7,7 +7,8 @@ recall_score, fbeta_score, f1_score, zero_one_score, \ precision_recall_fscore_support, classification_report, \ precision_recall_curve, explained_variance_score, r2_score, \ - zero_one, mean_square_error, hinge_loss, avg_f1_score + zero_one, mean_square_error, hinge_loss, avg_f1_score, \ + matthews_corrcoef from . import cluster from .cluster import adjusted_rand_score diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index dce6b3f4513b2..14551c3b6b5d7 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -480,6 +480,39 @@ def avg_f1_score(y_true, y_pred): return np.average(f1, weights=support) +def matthews_corrcoef(y_true, y_pred): + """Returns matthew's correlation coefficient for binary classes + + Only in the binary case does this relate to information about true and false + positives and negatives. See references below. + + Parameters + ---------- + y_true : array, shape = [n_samples] + true targets + + y_pred : array, shape = [n_samples] + estimated targets + + Returns + ------- + mcc : float + matthew's correlation coefficient (+1 represents a perfect prediction, + 0 an average random prediction and -1 and inverse prediction). + + References + ---------- + http://en.wikipedia.org/wiki/Matthews_correlation_coefficient + doi: 10.1093/bioinformatics/16.5.412 + + """ + mcc = np.corrcoef(y_true, y_pred)[0,1] + if np.isnan(mcc): + return 0. + else: + return mcc + + def classification_report(y_true, y_pred, labels=None, target_names=None): """Build a text report showing the main classification metrics diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index ceef914bf1034..ada5daf77e7c2 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -16,6 +16,7 @@ from ..metrics import r2_score from ..metrics import f1_score from ..metrics import avg_f1_score +from ..metrics import matthews_corrcoef from ..metrics import mean_square_error from ..metrics import precision_recall_curve from ..metrics import precision_recall_fscore_support @@ -152,6 +153,20 @@ def test_confusion_matrix_binary(): cm = confusion_matrix(y_true, y_pred) assert_array_equal(cm, [[19, 6], [7, 18]]) + tp = cm[0,0] + tn = cm[1,1] + fp = cm[0,1] + fn = cm[1,0] + num = (tp*tn-fp*fn) + den = np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)) + if den == 0.: + true_mcc = 0 + else: + true_mcc = num/den + mcc = matthews_corrcoef(y_true, y_pred) + assert_array_almost_equal(mcc, true_mcc, decimal=2) + assert_array_almost_equal(mcc, 0.48, decimal=2) + def test_precision_recall_f1_score_multiclass(): """Test Precision Recall and F1 Score for multiclass classification task""" From ee5451e5b7654404ca62ebca530ae38ae5e00fd5 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Tue, 15 Nov 2011 08:20:26 -0500 Subject: [PATCH 04/15] sty: pep8 + doc --- sklearn/metrics/metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 14551c3b6b5d7..786ddc18071dc 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -473,7 +473,7 @@ def avg_f1_score(y_true, y_pred): Returns ------- avg_f1_score : float - average of the f1_scores of each class for the multiclass task + average of the f1_scores of all classes """ _, _, f1, support = precision_recall_fscore_support(y_true, y_pred) @@ -483,8 +483,8 @@ def avg_f1_score(y_true, y_pred): def matthews_corrcoef(y_true, y_pred): """Returns matthew's correlation coefficient for binary classes - Only in the binary case does this relate to information about true and false - positives and negatives. See references below. + Only in the binary case does this relate to information about true and + false positives and negatives. See references below. Parameters ---------- @@ -506,7 +506,7 @@ def matthews_corrcoef(y_true, y_pred): doi: 10.1093/bioinformatics/16.5.412 """ - mcc = np.corrcoef(y_true, y_pred)[0,1] + mcc = np.corrcoef(y_true, y_pred)[0, 1] if np.isnan(mcc): return 0. else: From 940f34ac512999189f06718ea6ee58313f67816e Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 14:26:00 +0100 Subject: [PATCH 05/15] enh: added support for weighted metrics closes #83 removed avg_f1_score --- sklearn/metrics/__init__.py | 3 +- sklearn/metrics/metrics.py | 159 +++++++++++++++++--------- sklearn/metrics/tests/test_metrics.py | 42 +++++-- 3 files changed, 138 insertions(+), 66 deletions(-) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 5ecdd62980eb1..8486c0e9686bc 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -7,8 +7,7 @@ recall_score, fbeta_score, f1_score, zero_one_score, \ precision_recall_fscore_support, classification_report, \ precision_recall_curve, explained_variance_score, r2_score, \ - zero_one, mean_square_error, hinge_loss, avg_f1_score, \ - matthews_corrcoef + zero_one, mean_square_error, hinge_loss, matthews_corrcoef from . import cluster from .cluster import adjusted_rand_score diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 786ddc18071dc..c973a93447eb1 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -205,7 +205,7 @@ def auc(x, y): return area -def precision_score(y_true, y_pred, pos_label=1): +def precision_score(y_true, y_pred, labels=None, pos_label=1, average='micro'): """Compute the precision The precision is the ratio :math:`tp / (tp + fp)` where tp is the @@ -223,11 +223,18 @@ def precision_score(y_true, y_pred, pos_label=1): y_pred : array, shape = [n_samples] predicted targets + labels : array + integer array of labels + pos_label : int - in the binary classification case, give the label of the - positive class (default is 1). Everything else but 'pos_label' + in the binary classification case, give the label of the positive + class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. - Not used in the case of multiclass classification. + Set to None in the case of multiclass classification. + + average : string, ['micro', 'macro', 'weighted'] + in the multiclass classification case, this determines the + type of averaging performed on the data. Returns ------- @@ -237,14 +244,14 @@ def precision_score(y_true, y_pred, pos_label=1): multiclass task """ - p, _, _, s = precision_recall_fscore_support(y_true, y_pred) - if p.shape[0] == 2: - return p[pos_label] - else: - return np.average(p, weights=s) + p, _, _, _ = precision_recall_fscore_support(y_true, y_pred, + labels=labels, + pos_label=pos_label, + average=average) + return p -def recall_score(y_true, y_pred, pos_label=1): +def recall_score(y_true, y_pred, labels=None, pos_label=1, average='micro'): """Compute the recall The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of @@ -261,11 +268,18 @@ def recall_score(y_true, y_pred, pos_label=1): y_pred : array, shape = [n_samples] predicted targets + labels : array + integer array of labels + pos_label : int in the binary classification case, give the label of the positive class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. - Not used in the case of multiclass classification. + Set to None in the case of multiclass classification. + + average : string, ['micro', 'macro', 'weighted'] + in the multiclass classification case, this determines the + type of averaging performed on the data. Returns ------- @@ -274,14 +288,14 @@ class (default is 1). Everything else but 'pos_label' avergage of the recall of each class for the multiclass task. """ - _, r, _, s = precision_recall_fscore_support(y_true, y_pred) - if r.shape[0] == 2: - return r[pos_label] - else: - return np.average(r, weights=s) + _, r, _, _ = precision_recall_fscore_support(y_true, y_pred, + labels=labels, + pos_label=pos_label, + average=average) + return r -def fbeta_score(y_true, y_pred, beta, pos_label=1): +def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, average='micro'): """Compute fbeta score The F_beta score is the weighted harmonic mean of precision and recall, @@ -301,11 +315,18 @@ def fbeta_score(y_true, y_pred, beta, pos_label=1): beta: float + labels : array + integer array of labels + pos_label : int in the binary classification case, give the label of the positive class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. - Not used in the case of multiclass classification. + Set to None in the case of multiclass classification. + + average : string, ['micro', 'macro', 'weighted'] + in the multiclass classification case, this determines the + type of averaging performed on the data. Returns ------- @@ -321,14 +342,15 @@ class (default is 1). Everything else but 'pos_label' http://en.wikipedia.org/wiki/F1_score """ - _, _, f, s = precision_recall_fscore_support(y_true, y_pred, beta=beta) - if f.shape[0] == 2: - return f[pos_label] - else: - return np.average(f, weights=s) + _, _, f, _ = precision_recall_fscore_support(y_true, y_pred, + beta=beta, + labels=labels, + pos_label=pos_label, + average=average) + return f -def f1_score(y_true, y_pred, pos_label=1): +def f1_score(y_true, y_pred, labels=None, pos_label=1, average='micro'): """Compute f1 score The F1 score can be interpreted as a weighted average of the precision @@ -351,11 +373,18 @@ def f1_score(y_true, y_pred, pos_label=1): y_pred : array, shape = [n_samples] predicted targets + labels : array + integer array of labels + pos_label : int - in the binary classification case, give the label of the positive class - (default is 1). Everything else but 'pos_label' + in the binary classification case, give the label of the positive + class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. - Not used in the case of multiclass classification. + Set to None in the case of multiclass classification. + + average : string, ['micro', 'macro', 'weighted'] + in the multiclass classification case, this determines the + type of averaging performed on the data. Returns ------- @@ -368,10 +397,12 @@ def f1_score(y_true, y_pred, pos_label=1): http://en.wikipedia.org/wiki/F1_score """ - return fbeta_score(y_true, y_pred, 1, pos_label=pos_label) + return fbeta_score(y_true, y_pred, 1, labels=labels, + pos_label=pos_label, average=average) -def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): +def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, + pos_label=None, average=None): """Compute precisions, recalls, f-measures and support for each class The precision is the ratio :math:`tp / (tp + fp)` where tp is the number of @@ -392,6 +423,9 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): The support is the number of occurrences of each class in y_true. + If pos_label is None, this function returns the average precision, recall + and f-measure. The averaging is either 'micro', 'macro', 'weighted'. + Parameters ---------- y_true : array, shape = [n_samples] @@ -403,6 +437,19 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): beta : float, 1.0 by default the strength of recall versus precision in the f-score + labels : array + integer array of labels + + pos_label : int + in the binary classification case, give the label of the positive + class (default is 1). Everything else but 'pos_label' + is considered to belong to the negative class. + Set to None in the case of multiclass classification. + + average : string, ['micro', 'macro', 'weighted'] + in the multiclass classification case, this determines the + type of averaging performed on the data. + Returns ------- precision: array, shape = [n_unique_labels], dtype = np.double @@ -456,28 +503,36 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): finally: np.seterr(**old_err_settings) - return precision, recall, fscore, support - - -def avg_f1_score(y_true, y_pred): - """Return the average f1 score - - Parameters - ---------- - y_true : array, shape = [n_samples] - true targets - - y_pred : array, shape = [n_samples] - estimated targets - - Returns - ------- - avg_f1_score : float - average of the f1_scores of all classes - - """ - _, _, f1, support = precision_recall_fscore_support(y_true, y_pred) - return np.average(f1, weights=support) + if pos_label is not None: + if precision.shape[0] != 2: + raise ValueError(("pos_label should be set to None for multiclass " + "tasks got %d") % pos_label) + if pos_label not in labels: + raise ValueError("pos_label=%d is not a valid label: %r" % (pos_label, + labels)) + pos_label_idx = list(labels).index(pos_label) + return (precision[pos_label_idx], recall[pos_label_idx], + fscore[pos_label_idx], support[pos_label_idx]) + else: + average_options = (None, 'micro', 'macro', 'weighted') + if average not in average_options: + raise ValueError('average has to be one of ' + + str(average_options)) + if average is None: + return precision, recall, fscore, support + if average == 'micro': + avg_precision = true_pos.sum() / (true_pos.sum() + false_pos.sum()) + avg_recall = true_pos.sum() / (true_pos.sum() + false_neg.sum()) + avg_fscore = (1 + beta2) * (avg_precision * avg_recall) / (beta2 * avg_precision + avg_recall) + if average == 'macro': + avg_precision = np.mean(precision) + avg_recall = np.mean(recall) + avg_fscore = np.mean(fscore) + if average == 'weighted': + avg_precision = np.average(precision, weights=support) + avg_recall = np.average(recall, weights=support) + avg_fscore = np.average(fscore, weights=support) + return avg_precision, avg_recall, avg_fscore, None def matthews_corrcoef(y_true, y_pred): @@ -503,7 +558,7 @@ def matthews_corrcoef(y_true, y_pred): References ---------- http://en.wikipedia.org/wiki/Matthews_correlation_coefficient - doi: 10.1093/bioinformatics/16.5.412 + http://dx.doi.org/10.1093/bioinformatics/16.5.412 """ mcc = np.corrcoef(y_true, y_pred)[0, 1] diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index 8ac6bc309e854..573644e7eb2aa 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -4,7 +4,7 @@ from nose.tools import raises from nose.tools import assert_true from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_equal, assert_raises from numpy.testing import assert_equal, assert_almost_equal from ... import datasets @@ -15,7 +15,6 @@ from ..metrics import explained_variance_score from ..metrics import r2_score from ..metrics import f1_score -from ..metrics import avg_f1_score from ..metrics import matthews_corrcoef from ..metrics import mean_square_error from ..metrics import precision_recall_curve @@ -142,9 +141,6 @@ def test_precision_recall_f1_score_binary(): fs = f1_score(y_true, y_pred) assert_array_almost_equal(fs, 0.74, 2) - afs = avg_f1_score(y_true, y_pred) - assert_array_almost_equal(afs, 0.74, decimal=2) - def test_confusion_matrix_binary(): """Test confusion matrix - binary classification case""" @@ -183,14 +179,36 @@ def test_precision_recall_f1_score_multiclass(): # multiclass case the score is the wieghthed average of the individual # class values hence f1_score is not necessary between precision_score and # recall_score - ps = precision_score(y_true, y_pred) + func = lambda : precision_recall_fscore_support(y_true, y_pred, pos_label=1) + assert_raises(ValueError, func) + + # averaging tests + ps = precision_score(y_true, y_pred, pos_label=None, average='micro') assert_array_almost_equal(ps, 0.62, 2) - rs = recall_score(y_true, y_pred) + rs = recall_score(y_true, y_pred, pos_label=None, average='micro') assert_array_almost_equal(rs, 0.61, 2) - fs = f1_score(y_true, y_pred) - assert_array_almost_equal(fs, 0.56, 2) + fs = f1_score(y_true, y_pred, pos_label=None, average='micro') + assert_array_almost_equal(fs, 0.61, 2) + + ps = precision_score(y_true, y_pred, pos_label=None, average='macro') + assert_array_almost_equal(ps, 0.62, 2) + + rs = recall_score(y_true, y_pred, pos_label=None, average='macro') + assert_array_almost_equal(rs, 0.66, 2) + + fs = f1_score(y_true, y_pred, pos_label=None, average='macro') + assert_array_almost_equal(fs, 0.58, 2) + + ps = precision_score(y_true, y_pred, pos_label=None, average='weighted') + assert_array_almost_equal(ps, 0.62, 2) + + rs = recall_score(y_true, y_pred, pos_label=None, average='weighted') + assert_array_almost_equal(rs, 0.61, 2) + + fs = f1_score(y_true, y_pred, pos_label=None, average='weighted') + assert_array_almost_equal(fs, 0.55, 2) # same prediction but with and explicit label ordering p, r, f, s = precision_recall_fscore_support( @@ -210,9 +228,9 @@ def test_zero_precision_recall(): y_true = np.array([0, 1, 2, 0, 1, 2]) y_pred = np.array([2, 0, 1, 1, 2, 0]) - assert_almost_equal(precision_score(y_true, y_pred), 0.0, 2) - assert_almost_equal(recall_score(y_true, y_pred), 0.0, 2) - assert_almost_equal(f1_score(y_true, y_pred), 0.0, 2) + assert_almost_equal(precision_score(y_true, y_pred, pos_label=None, average='weighted'), 0.0, 2) + assert_almost_equal(recall_score(y_true, y_pred, pos_label=None, average='weighted'), 0.0, 2) + assert_almost_equal(f1_score(y_true, y_pred, pos_label=None, average='weighted'), 0.0, 2) finally: np.seterr(**old_error_settings) From bbd8b71fdd18f85e3029bd335038110f38bc49e9 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 14:34:36 +0100 Subject: [PATCH 06/15] doc: added description for matthew's corrcoef from wikipedia --- sklearn/metrics/metrics.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index c973a93447eb1..731574f3bc58e 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -538,6 +538,16 @@ class (default is 1). Everything else but 'pos_label' def matthews_corrcoef(y_true, y_pred): """Returns matthew's correlation coefficient for binary classes + The Matthews correlation coefficient is used in machine learning as a + measure of the quality of binary (two-class) classifications. It takes + into account true and false positives and negatives and is generally + regarded as a balanced measure which can be used even if the classes are + of very different sizes. The MCC is in essence a correlation coefficient + between the observed and predicted binary classifications; it returns a + value between −1 and +1. A coefficient of +1 represents a perfect + prediction, 0 an average random prediction and −1 an inverse prediction. + The statistic is also known as the phi coefficient. [source: Wikipedia] + Only in the binary case does this relate to information about true and false positives and negatives. See references below. From 493e20e142e8def62a65aee819cfb4ae0c917a4a Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 14:36:24 +0100 Subject: [PATCH 07/15] sty: pep8 fixes --- sklearn/metrics/metrics.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 731574f3bc58e..390e80dba42d3 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -295,7 +295,8 @@ class (default is 1). Everything else but 'pos_label' return r -def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, average='micro'): +def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, + average='micro'): """Compute fbeta score The F_beta score is the weighted harmonic mean of precision and recall, @@ -508,8 +509,8 @@ class (default is 1). Everything else but 'pos_label' raise ValueError(("pos_label should be set to None for multiclass " "tasks got %d") % pos_label) if pos_label not in labels: - raise ValueError("pos_label=%d is not a valid label: %r" % (pos_label, - labels)) + raise ValueError("pos_label=%d is not a valid label: %r" % + (pos_label, labels)) pos_label_idx = list(labels).index(pos_label) return (precision[pos_label_idx], recall[pos_label_idx], fscore[pos_label_idx], support[pos_label_idx]) @@ -521,9 +522,11 @@ class (default is 1). Everything else but 'pos_label' if average is None: return precision, recall, fscore, support if average == 'micro': - avg_precision = true_pos.sum() / (true_pos.sum() + false_pos.sum()) + avg_precision = true_pos.sum() / (true_pos.sum() + + false_pos.sum()) avg_recall = true_pos.sum() / (true_pos.sum() + false_neg.sum()) - avg_fscore = (1 + beta2) * (avg_precision * avg_recall) / (beta2 * avg_precision + avg_recall) + avg_fscore = (1 + beta2) * (avg_precision * avg_recall) / \ + (beta2 * avg_precision + avg_recall) if average == 'macro': avg_precision = np.mean(precision) avg_recall = np.mean(recall) From e69c24d56c0297df8cb5f9d6a71ad04465903dcd Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 15:09:11 +0100 Subject: [PATCH 08/15] sty: pep8 on test file --- sklearn/metrics/tests/test_metrics.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index 573644e7eb2aa..63d982e23328b 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -149,16 +149,16 @@ def test_confusion_matrix_binary(): cm = confusion_matrix(y_true, y_pred) assert_array_equal(cm, [[19, 6], [7, 18]]) - tp = cm[0,0] - tn = cm[1,1] - fp = cm[0,1] - fn = cm[1,0] - num = (tp*tn-fp*fn) - den = np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)) + tp = cm[0, 0] + tn = cm[1, 1] + fp = cm[0, 1] + fn = cm[1, 0] + num = (tp * tn - fp * fn) + den = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) if den == 0.: true_mcc = 0 else: - true_mcc = num/den + true_mcc = num / den mcc = matthews_corrcoef(y_true, y_pred) assert_array_almost_equal(mcc, true_mcc, decimal=2) assert_array_almost_equal(mcc, 0.48, decimal=2) @@ -179,7 +179,8 @@ def test_precision_recall_f1_score_multiclass(): # multiclass case the score is the wieghthed average of the individual # class values hence f1_score is not necessary between precision_score and # recall_score - func = lambda : precision_recall_fscore_support(y_true, y_pred, pos_label=1) + func = lambda: precision_recall_fscore_support(y_true, y_pred, + pos_label=1) assert_raises(ValueError, func) # averaging tests @@ -228,9 +229,12 @@ def test_zero_precision_recall(): y_true = np.array([0, 1, 2, 0, 1, 2]) y_pred = np.array([2, 0, 1, 1, 2, 0]) - assert_almost_equal(precision_score(y_true, y_pred, pos_label=None, average='weighted'), 0.0, 2) - assert_almost_equal(recall_score(y_true, y_pred, pos_label=None, average='weighted'), 0.0, 2) - assert_almost_equal(f1_score(y_true, y_pred, pos_label=None, average='weighted'), 0.0, 2) + assert_almost_equal(precision_score(y_true, y_pred, pos_label=None, + average='weighted'), 0.0, 2) + assert_almost_equal(recall_score(y_true, y_pred, pos_label=None, + average='weighted'), 0.0, 2) + assert_almost_equal(f1_score(y_true, y_pred, pos_label=None, + average='weighted'), 0.0, 2) finally: np.seterr(**old_error_settings) From 8c052b4d2476a3148dfa4371b8de2e5130e5be2a Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 15:17:10 +0100 Subject: [PATCH 09/15] doc: removed strange character --- sklearn/metrics/metrics.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 390e80dba42d3..792472ceedafe 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -546,9 +546,8 @@ def matthews_corrcoef(y_true, y_pred): into account true and false positives and negatives and is generally regarded as a balanced measure which can be used even if the classes are of very different sizes. The MCC is in essence a correlation coefficient - between the observed and predicted binary classifications; it returns a - value between −1 and +1. A coefficient of +1 represents a perfect - prediction, 0 an average random prediction and −1 an inverse prediction. + value between -1 and +1. A coefficient of +1 represents a perfect + prediction, 0 an average random prediction and -1 an inverse prediction. The statistic is also known as the phi coefficient. [source: Wikipedia] Only in the binary case does this relate to information about true and From 584a2e667a84d118f3dc60709c6202bc13cb95bb Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 15:17:40 +0100 Subject: [PATCH 10/15] fix: updated tests to reflect that micro shows the same precision and recall --- sklearn/metrics/tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index 63d982e23328b..baf5942db8c49 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -185,7 +185,7 @@ def test_precision_recall_f1_score_multiclass(): # averaging tests ps = precision_score(y_true, y_pred, pos_label=None, average='micro') - assert_array_almost_equal(ps, 0.62, 2) + assert_array_almost_equal(ps, 0.61, 2) rs = recall_score(y_true, y_pred, pos_label=None, average='micro') assert_array_almost_equal(rs, 0.61, 2) From 3ade680f85d7384d1d7ec94a366f2ad904cea119 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 15:36:11 +0100 Subject: [PATCH 11/15] fix: average with elif --- sklearn/metrics/metrics.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 792472ceedafe..e36966cb21a0e 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -425,7 +425,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, The support is the number of occurrences of each class in y_true. If pos_label is None, this function returns the average precision, recall - and f-measure. The averaging is either 'micro', 'macro', 'weighted'. + and f-measure if `average` is one of 'micro', 'macro', 'weighted'. Parameters ---------- @@ -516,25 +516,26 @@ class (default is 1). Everything else but 'pos_label' fscore[pos_label_idx], support[pos_label_idx]) else: average_options = (None, 'micro', 'macro', 'weighted') - if average not in average_options: - raise ValueError('average has to be one of ' + - str(average_options)) if average is None: return precision, recall, fscore, support - if average == 'micro': + elif average == 'micro': avg_precision = true_pos.sum() / (true_pos.sum() + false_pos.sum()) avg_recall = true_pos.sum() / (true_pos.sum() + false_neg.sum()) avg_fscore = (1 + beta2) * (avg_precision * avg_recall) / \ (beta2 * avg_precision + avg_recall) - if average == 'macro': + elif average == 'macro': avg_precision = np.mean(precision) avg_recall = np.mean(recall) avg_fscore = np.mean(fscore) - if average == 'weighted': + elif average == 'weighted': avg_precision = np.average(precision, weights=support) avg_recall = np.average(recall, weights=support) avg_fscore = np.average(fscore, weights=support) + else: + raise ValueError('average has to be one of ' + + str(average_options)) + return avg_precision, avg_recall, avg_fscore, None From 9a6d934b06713e10b144cadbbcab6e38144888b8 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 15 Dec 2011 15:50:24 +0100 Subject: [PATCH 12/15] doc: improved description of average --- sklearn/metrics/metrics.py | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index e36966cb21a0e..647669d444e6c 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -205,7 +205,7 @@ def auc(x, y): return area -def precision_score(y_true, y_pred, labels=None, pos_label=1, average='micro'): +def precision_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): """Compute the precision The precision is the ratio :math:`tp / (tp + fp)` where tp is the @@ -232,15 +232,20 @@ class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. Set to None in the case of multiclass classification. - average : string, ['micro', 'macro', 'weighted'] + average : string, ['micro', 'macro', 'weighted'(default)] in the multiclass classification case, this determines the type of averaging performed on the data. + macro: average over classes (does not take imbalance into account) + micro: average over instances (takes imbalance into account) + implies that precision == recall == f1 + weighted: average weighted by support (takes imbalance into account) + can have f1 score that is not between precision and recall Returns ------- precision : float precision of the positive class in binary classification or - weighted avergage of the precision of each class for the + weighted average of the precision of each class for the multiclass task """ @@ -251,7 +256,7 @@ class (default is 1). Everything else but 'pos_label' return p -def recall_score(y_true, y_pred, labels=None, pos_label=1, average='micro'): +def recall_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): """Compute the recall The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of @@ -277,15 +282,20 @@ class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. Set to None in the case of multiclass classification. - average : string, ['micro', 'macro', 'weighted'] + average : string, [None, 'micro', 'macro', 'weighted'(default)] in the multiclass classification case, this determines the type of averaging performed on the data. + macro: average over classes (does not take imbalance into account) + micro: average over instances (takes imbalance into account) + implies that precision == recall == f1 + weighted: average weighted by support (takes imbalance into account) + can have f1 score that is not between precision and recall Returns ------- recall : float recall of the positive class in binary classification or weighted - avergage of the recall of each class for the multiclass task. + average of the recall of each class for the multiclass task. """ _, r, _, _ = precision_recall_fscore_support(y_true, y_pred, @@ -296,7 +306,7 @@ class (default is 1). Everything else but 'pos_label' def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, - average='micro'): + average='weighted'): """Compute fbeta score The F_beta score is the weighted harmonic mean of precision and recall, @@ -325,15 +335,20 @@ class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. Set to None in the case of multiclass classification. - average : string, ['micro', 'macro', 'weighted'] + average : string, [None, 'micro', 'macro', 'weighted'(default)] in the multiclass classification case, this determines the type of averaging performed on the data. + macro: average over classes (does not take imbalance into account) + micro: average over instances (takes imbalance into account) + implies that precision == recall == f1 + weighted: average weighted by support (takes imbalance into account) + can have f1 score that is not between precision and recall Returns ------- fbeta_score : float fbeta_score of the positive class in binary classification or weighted - avergage of the fbeta_score of each class for the multiclass task. + average of the fbeta_score of each class for the multiclass task. See also -------- @@ -351,7 +366,7 @@ class (default is 1). Everything else but 'pos_label' return f -def f1_score(y_true, y_pred, labels=None, pos_label=1, average='micro'): +def f1_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): """Compute f1 score The F1 score can be interpreted as a weighted average of the precision @@ -383,9 +398,14 @@ class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. Set to None in the case of multiclass classification. - average : string, ['micro', 'macro', 'weighted'] + average : string, [None, 'micro', 'macro', 'weighted'(default)] in the multiclass classification case, this determines the type of averaging performed on the data. + macro: average over classes (does not take imbalance into account) + micro: average over instances (takes imbalance into account) + implies that precision == recall == f1 + weighted: average weighted by support (takes imbalance into account) + can have f1 score that is not between precision and recall Returns ------- @@ -447,9 +467,14 @@ class (default is 1). Everything else but 'pos_label' is considered to belong to the negative class. Set to None in the case of multiclass classification. - average : string, ['micro', 'macro', 'weighted'] + average : string, [None (default), 'micro', 'macro', 'weighted'] in the multiclass classification case, this determines the type of averaging performed on the data. + macro: average over classes (does not take imbalance into account) + micro: average over instances (takes imbalance into account) + implies that precision == recall == f1 + weighted: average weighted by support (takes imbalance into account) + can have f1 score that is not between precision and recall Returns ------- From 0a8b6e51f3afe61ee754c4d894f0a130748c44b1 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Wed, 21 Dec 2011 09:45:23 +0100 Subject: [PATCH 13/15] api: changed pos_label to None for metrics --- sklearn/metrics/metrics.py | 8 ++++---- sklearn/metrics/tests/test_metrics.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 647669d444e6c..5772fd21765ea 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -205,7 +205,7 @@ def auc(x, y): return area -def precision_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): +def precision_score(y_true, y_pred, labels=None, pos_label=None, average='weighted'): """Compute the precision The precision is the ratio :math:`tp / (tp + fp)` where tp is the @@ -256,7 +256,7 @@ class (default is 1). Everything else but 'pos_label' return p -def recall_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): +def recall_score(y_true, y_pred, labels=None, pos_label=None, average='weighted'): """Compute the recall The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of @@ -305,7 +305,7 @@ class (default is 1). Everything else but 'pos_label' return r -def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, +def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=None, average='weighted'): """Compute fbeta score @@ -366,7 +366,7 @@ class (default is 1). Everything else but 'pos_label' return f -def f1_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): +def f1_score(y_true, y_pred, labels=None, pos_label=None, average='weighted'): """Compute f1 score The F1 score can be interpreted as a weighted average of the precision diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index baf5942db8c49..3716890704567 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -136,7 +136,7 @@ def test_precision_recall_f1_score_binary(): assert_array_almost_equal(ps, 0.75, 2) rs = recall_score(y_true, y_pred) - assert_array_almost_equal(rs, 0.72, 2) + assert_array_almost_equal(rs, 0.74, 2) fs = f1_score(y_true, y_pred) assert_array_almost_equal(fs, 0.74, 2) From 2fa7a096a87a2e14f50b76015d20f9040cfcd8dc Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 27 Jan 2012 21:26:36 +0900 Subject: [PATCH 14/15] Backward compatibility in precision, recall and f1-score. --- sklearn/metrics/metrics.py | 24 +++++++------- sklearn/metrics/tests/test_metrics.py | 46 +++++++++++---------------- 2 files changed, 32 insertions(+), 38 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 8af3ac0435328..7da145f145544 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -212,7 +212,7 @@ def auc(x, y): return area -def precision_score(y_true, y_pred, labels=None, pos_label=None, average='weighted'): +def precision_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): """Compute the precision The precision is the ratio :math:`tp / (tp + fp)` where tp is the @@ -263,7 +263,7 @@ class (default is 1). Everything else but 'pos_label' return p -def recall_score(y_true, y_pred, labels=None, pos_label=None, average='weighted'): +def recall_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): """Compute the recall The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of @@ -312,7 +312,7 @@ class (default is 1). Everything else but 'pos_label' return r -def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=None, +def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1, average='weighted'): """Compute fbeta score @@ -373,7 +373,7 @@ class (default is 1). Everything else but 'pos_label' return f -def f1_score(y_true, y_pred, labels=None, pos_label=None, average='weighted'): +def f1_score(y_true, y_pred, labels=None, pos_label=1, average='weighted'): """Compute f1 score The F1 score can be interpreted as a weighted average of the precision @@ -431,7 +431,7 @@ class (default is 1). Everything else but 'pos_label' def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, - pos_label=None, average=None): + pos_label=1, average="weighted"): """Compute precisions, recalls, f-measures and support for each class The precision is the ratio :math:`tp / (tp + fp)` where tp is the number of @@ -540,10 +540,10 @@ class (default is 1). Everything else but 'pos_label' finally: np.seterr(**old_err_settings) - if pos_label is not None: - if precision.shape[0] != 2: - raise ValueError(("pos_label should be set to None for multiclass " - "tasks got %d") % pos_label) + if n_labels == 2: + if not average: + return precision, recall, fscore, support + if pos_label not in labels: raise ValueError("pos_label=%d is not a valid label: %r" % (pos_label, labels)) @@ -552,7 +552,7 @@ class (default is 1). Everything else but 'pos_label' fscore[pos_label_idx], support[pos_label_idx]) else: average_options = (None, 'micro', 'macro', 'weighted') - if average is None: + if not average: return precision, recall, fscore, support elif average == 'micro': avg_precision = true_pos.sum() / (true_pos.sum() + @@ -666,7 +666,9 @@ def classification_report(y_true, y_pred, labels=None, target_names=None): report += '\n' p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, - labels=labels) + labels=labels, + average=None) + for i, label in enumerate(labels): values = [target_names[i]] for v in (p[i], r[i], f1[i]): diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index 2ab0f118c7fdd..41af3b9f679a4 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -139,7 +139,7 @@ def test_precision_recall_f1_score_binary(): y_true, y_pred, _ = make_prediction(binary=True) # detailed measures for each class - p, r, f, s = precision_recall_fscore_support(y_true, y_pred) + p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.73, 0.75], 2) assert_array_almost_equal(r, [0.76, 0.72], 2) assert_array_almost_equal(f, [0.75, 0.74], 2) @@ -152,7 +152,7 @@ def test_precision_recall_f1_score_binary(): assert_array_almost_equal(ps, 0.75, 2) rs = recall_score(y_true, y_pred) - assert_array_almost_equal(rs, 0.74, 2) + assert_array_almost_equal(rs, 0.72, 2) fs = f1_score(y_true, y_pred) assert_array_almost_equal(fs, 0.74, 2) @@ -185,51 +185,43 @@ def test_precision_recall_f1_score_multiclass(): y_true, y_pred, _ = make_prediction(binary=False) # compute scores with default labels introspection - p, r, f, s = precision_recall_fscore_support(y_true, y_pred) + p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.82, 0.55, 0.47], 2) assert_array_almost_equal(r, [0.92, 0.17, 0.90], 2) assert_array_almost_equal(f, [0.87, 0.26, 0.62], 2) assert_array_equal(s, [25, 30, 20]) - # individual scoring function that can be used for grid search: in the - # multiclass case the score is the wieghthed average of the individual - # class values hence f1_score is not necessary between precision_score and - # recall_score - func = lambda: precision_recall_fscore_support(y_true, y_pred, - pos_label=1) - assert_raises(ValueError, func) - # averaging tests - ps = precision_score(y_true, y_pred, pos_label=None, average='micro') + ps = precision_score(y_true, y_pred, pos_label=1, average='micro') assert_array_almost_equal(ps, 0.61, 2) - rs = recall_score(y_true, y_pred, pos_label=None, average='micro') + rs = recall_score(y_true, y_pred, average='micro') assert_array_almost_equal(rs, 0.61, 2) - fs = f1_score(y_true, y_pred, pos_label=None, average='micro') + fs = f1_score(y_true, y_pred, average='micro') assert_array_almost_equal(fs, 0.61, 2) - ps = precision_score(y_true, y_pred, pos_label=None, average='macro') + ps = precision_score(y_true, y_pred, average='macro') assert_array_almost_equal(ps, 0.62, 2) - rs = recall_score(y_true, y_pred, pos_label=None, average='macro') + rs = recall_score(y_true, y_pred, average='macro') assert_array_almost_equal(rs, 0.66, 2) - fs = f1_score(y_true, y_pred, pos_label=None, average='macro') + fs = f1_score(y_true, y_pred, average='macro') assert_array_almost_equal(fs, 0.58, 2) - ps = precision_score(y_true, y_pred, pos_label=None, average='weighted') + ps = precision_score(y_true, y_pred, average='weighted') assert_array_almost_equal(ps, 0.62, 2) - rs = recall_score(y_true, y_pred, pos_label=None, average='weighted') + rs = recall_score(y_true, y_pred, average='weighted') assert_array_almost_equal(rs, 0.61, 2) - fs = f1_score(y_true, y_pred, pos_label=None, average='weighted') + fs = f1_score(y_true, y_pred, average='weighted') assert_array_almost_equal(fs, 0.55, 2) # same prediction but with and explicit label ordering p, r, f, s = precision_recall_fscore_support( - y_true, y_pred, labels=[0, 2, 1]) + y_true, y_pred, labels=[0, 2, 1], average=None) assert_array_almost_equal(p, [0.82, 0.47, 0.55], 2) assert_array_almost_equal(r, [0.92, 0.90, 0.17], 2) assert_array_almost_equal(f, [0.87, 0.62, 0.26], 2) @@ -245,12 +237,12 @@ def test_zero_precision_recall(): y_true = np.array([0, 1, 2, 0, 1, 2]) y_pred = np.array([2, 0, 1, 1, 2, 0]) - assert_almost_equal(precision_score(y_true, y_pred, pos_label=None, - average='weighted'), 0.0, 2) - assert_almost_equal(recall_score(y_true, y_pred, pos_label=None, - average='weighted'), 0.0, 2) - assert_almost_equal(f1_score(y_true, y_pred, pos_label=None, - average='weighted'), 0.0, 2) + assert_almost_equal(precision_score(y_true, y_pred, average='weighted'), + 0.0, 2) + assert_almost_equal(recall_score(y_true, y_pred, average='weighted'), + 0.0, 2) + assert_almost_equal(f1_score(y_true, y_pred, average='weighted'), + 0.0, 2) finally: np.seterr(**old_error_settings) From f62398b86208979e326e1033871ff6930fba85ef Mon Sep 17 00:00:00 2001 From: Mathieu Blondel Date: Fri, 27 Jan 2012 21:40:32 +0900 Subject: [PATCH 15/15] Factor some code. --- sklearn/metrics/metrics.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index 7da145f145544..5b64e21c0e5cf 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -540,10 +540,10 @@ class (default is 1). Everything else but 'pos_label' finally: np.seterr(**old_err_settings) - if n_labels == 2: - if not average: - return precision, recall, fscore, support + if not average: + return precision, recall, fscore, support + elif n_labels == 2: if pos_label not in labels: raise ValueError("pos_label=%d is not a valid label: %r" % (pos_label, labels)) @@ -552,9 +552,7 @@ class (default is 1). Everything else but 'pos_label' fscore[pos_label_idx], support[pos_label_idx]) else: average_options = (None, 'micro', 'macro', 'weighted') - if not average: - return precision, recall, fscore, support - elif average == 'micro': + if average == 'micro': avg_precision = true_pos.sum() / (true_pos.sum() + false_pos.sum()) avg_recall = true_pos.sum() / (true_pos.sum() + false_neg.sum())