From 80844ae48c6e059fc439ea6531cb565ff6a5754c Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Wed, 23 May 2018 18:12:03 -0400 Subject: [PATCH 01/14] Add averaging option to AMI and NMI Leave current behavior unchanged --- sklearn/metrics/cluster/supervised.py | 52 +++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 19bc461c9e9fd..b8759a1e5695e 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -16,6 +16,7 @@ from __future__ import division from math import log +import warnings import numpy as np from scipy import sparse as sp @@ -527,6 +528,22 @@ def v_measure_score(labels_true, labels_pred): return homogeneity_completeness_v_measure(labels_true, labels_pred)[2] + +def _generalized_average(U, V, average_method): + if average_method == "min": + return min(U, V) + elif average_method == "sqrt": + return max(np.sqrt(U * V), 1e-10) # Avoids zero-division error + elif average_method == "sum": + return max(np.mean(U, V), 1e-10) + elif average_method == "max": + return max(U, V) + else: + raise ValueError("'average_method' must be 'min', 'sqrt', 'sum', or " + "'max'") + + + def mutual_info_score(labels_true, labels_pred, contingency=None): r"""Mutual Information between two clusterings. @@ -608,7 +625,7 @@ def mutual_info_score(labels_true, labels_pred, contingency=None): return mi.sum() -def adjusted_mutual_info_score(labels_true, labels_pred): +def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): """Adjusted Mutual Information between two clusterings. Adjusted Mutual Information (AMI) is an adjustment of the Mutual @@ -617,7 +634,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred): clusters, regardless of whether there is actually more information shared. For two clusterings :math:`U` and :math:`V`, the AMI is given as:: - AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [max(H(U), H(V)) - E(MI(U, V))] + AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))] This metric is independent of the absolute values of the labels: a permutation of the class or cluster label values won't change the @@ -641,6 +658,12 @@ def adjusted_mutual_info_score(labels_true, labels_pred): labels_pred : array, shape = [n_samples] A clustering of the data into disjoint subsets. + average_method : string or None, optional (default: None) + How to compute the normalizer in the denominator. Possible options + are 'min', 'sqrt', 'sum', and 'max'. + If None, 'max' will be used. This is likely to change in a future + version. + Returns ------- ami: float(upperlimited by 1.0) @@ -682,6 +705,12 @@ def adjusted_mutual_info_score(labels_true, labels_pred): `_ """ + if average_method is None: + warnings.warn("The behavior of AMI will change in a future version. " + "To match the behavior of 'v_measure_score', AMI will use " + "sqrt-averaging, i.e. geometric mean, by default." + ) + average_method = 'max' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) n_samples = labels_true.shape[0] classes = np.unique(labels_true) @@ -700,17 +729,19 @@ def adjusted_mutual_info_score(labels_true, labels_pred): emi = expected_mutual_information(contingency, n_samples) # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) - ami = (mi - emi) / (max(h_true, h_pred) - emi) + normalizer = _generalized_average(h_true, h_pred, average_method) + ami = (mi - emi) / (normalizer - emi) return ami -def normalized_mutual_info_score(labels_true, labels_pred): +def normalized_mutual_info_score(labels_true, labels_pred, average_method=None): """Normalized Mutual Information between two clusterings. Normalized Mutual Information (NMI) is an normalization of the Mutual Information (MI) score to scale the results between 0 (no mutual information) and 1 (perfect correlation). In this function, mutual - information is normalized by ``sqrt(H(labels_true) * H(labels_pred))``. + information is normalized by some generalized mean of ``H(labels_true)`` + and ``H(labels_pred))``. This measure is not adjusted for chance. Therefore :func:`adjusted_mustual_info_score` might be preferred. @@ -734,6 +765,12 @@ def normalized_mutual_info_score(labels_true, labels_pred): labels_pred : array, shape = [n_samples] A clustering of the data into disjoint subsets. + average_method : string or None, optional (default: None) + How to compute the normalizer in the denominator. Possible options + are 'min', 'sqrt', 'sum', and 'max'. + If None, 'sqrt' will be used, matching the behavior of + `v_measure_score`. + Returns ------- nmi : float @@ -764,6 +801,8 @@ def normalized_mutual_info_score(labels_true, labels_pred): 0.0 """ + if average_method is None: + average_method = 'sqrt' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) classes = np.unique(labels_true) clusters = np.unique(labels_pred) @@ -780,7 +819,8 @@ def normalized_mutual_info_score(labels_true, labels_pred): # Calculate the expected value for the mutual information # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) - nmi = mi / max(np.sqrt(h_true * h_pred), 1e-10) + normalizer = _generalized_average(h_true, h_pred, average_method) + nmi = mi / normalizer return nmi From 479448683c6023d8a0351fd406eab3ad0ff7ab14 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Thu, 24 May 2018 11:20:33 -0400 Subject: [PATCH 02/14] Flake8 fixes --- sklearn/metrics/cluster/supervised.py | 50 +++++++++++++++------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index b8759a1e5695e..6c2e63d874a6f 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -11,6 +11,7 @@ # Thierry Guillemot # Gregory Stupp # Joel Nothman +# Arya McCarthy # License: BSD 3 clause from __future__ import division @@ -51,6 +52,21 @@ def check_clusterings(labels_true, labels_pred): return labels_true, labels_pred +def _generalized_average(U, V, average_method): + """Return a particular mean of two numbers.""" + if average_method == "min": + return max(min(U, V), 1e-10) + elif average_method == "sqrt": + return max(np.sqrt(U * V), 1e-10) # Avoids zero-division error + elif average_method == "sum": + return max(np.mean([U, V]), 1e-10) + elif average_method == "max": + return max(U, V) + else: + raise ValueError("'average_method' must be 'min', 'sqrt', 'sum', or " + "'max'") + + def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False): """Build a contingency matrix describing the relationship between labels. @@ -528,22 +544,6 @@ def v_measure_score(labels_true, labels_pred): return homogeneity_completeness_v_measure(labels_true, labels_pred)[2] - -def _generalized_average(U, V, average_method): - if average_method == "min": - return min(U, V) - elif average_method == "sqrt": - return max(np.sqrt(U * V), 1e-10) # Avoids zero-division error - elif average_method == "sum": - return max(np.mean(U, V), 1e-10) - elif average_method == "max": - return max(U, V) - else: - raise ValueError("'average_method' must be 'min', 'sqrt', 'sum', or " - "'max'") - - - def mutual_info_score(labels_true, labels_pred, contingency=None): r"""Mutual Information between two clusterings. @@ -662,7 +662,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): How to compute the normalizer in the denominator. Possible options are 'min', 'sqrt', 'sum', and 'max'. If None, 'max' will be used. This is likely to change in a future - version. + version. Returns ------- @@ -707,9 +707,9 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): """ if average_method is None: warnings.warn("The behavior of AMI will change in a future version. " - "To match the behavior of 'v_measure_score', AMI will use " - "sqrt-averaging, i.e. geometric mean, by default." - ) + "To match the behavior of 'v_measure_score', AMI will " + "use sum-averaging, i.e. arithmetic mean, by default." + ) average_method = 'max' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) n_samples = labels_true.shape[0] @@ -730,11 +730,13 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) + print(normalizer) ami = (mi - emi) / (normalizer - emi) return ami -def normalized_mutual_info_score(labels_true, labels_pred, average_method=None): +def normalized_mutual_info_score(labels_true, labels_pred, + average_method=None): """Normalized Mutual Information between two clusterings. Normalized Mutual Information (NMI) is an normalization of the Mutual @@ -769,7 +771,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, average_method=None): How to compute the normalizer in the denominator. Possible options are 'min', 'sqrt', 'sum', and 'max'. If None, 'sqrt' will be used, matching the behavior of - `v_measure_score`. + `v_measure_score`. Returns ------- @@ -802,6 +804,10 @@ def normalized_mutual_info_score(labels_true, labels_pred, average_method=None): """ if average_method is None: + warnings.warn("The behavior of NMI will change in a future version. " + "To match the behavior of 'v_measure_score', NMI will " + "use sum-averaging, i.e. arithmetic mean, by default." + ) average_method = 'sqrt' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) classes = np.unique(labels_true) From 6279c2535a91a031eb92d7ba00cea54be06f7c09 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Thu, 24 May 2018 11:21:07 -0400 Subject: [PATCH 03/14] Incorporate tests of means for AMI and NMI --- .../metrics/cluster/tests/test_supervised.py | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 8be39cd220d2a..a1a199cae8b56 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -12,6 +12,7 @@ from sklearn.metrics.cluster import mutual_info_score from sklearn.metrics.cluster import normalized_mutual_info_score from sklearn.metrics.cluster import v_measure_score +from sklearn.metrics.cluster.supervised import _generalized_average from sklearn.utils import assert_all_finite from sklearn.utils.testing import ( @@ -46,6 +47,18 @@ def test_error_messages_on_wrong_input(): [0, 1, 0], [[1, 1], [0, 0]]) +def test_generalized_average(): + a, b = 1, 2 + methods = ["min", "sqrt", "sum", "max"] + means = [_generalized_average(a, b, method) for method in methods] + assert means[0] <= means[1] <= means[2] <= means[3] + c, d = 12, 12 + means = [_generalized_average(c, d, method) for method in methods] + assert_equal(means[0], means[1]) + assert_equal(means[1], means[2]) + assert_equal(means[2], means[3]) + + def test_perfect_matches(): for score_func in score_funcs: assert_equal(score_func([], []), 1.0) @@ -55,6 +68,20 @@ def test_perfect_matches(): assert_equal(score_func([0., 1., 0.], [42., 7., 42.]), 1.0) assert_equal(score_func([0., 1., 2.], [42., 7., 2.]), 1.0) assert_equal(score_func([0, 1, 2], [42, 7, 2]), 1.0) + score_funcs_with_changing_means = [ + normalized_mutual_info_score, + adjusted_mutual_info_score, + ] + means = {"min", "sqrt", "sum", "max"} + for score_func in score_funcs_with_changing_means: + for mean in means: + assert_equal(score_func([], [], mean), 1.0) + assert_equal(score_func([0], [1], mean), 1.0) + assert_equal(score_func([0, 0, 0], [0, 0, 0], mean), 1.0) + assert_equal(score_func([0, 1, 0], [42, 7, 42], mean), 1.0) + assert_equal(score_func([0., 1., 0.], [42., 7., 42.], mean), 1.0) + assert_equal(score_func([0., 1., 2.], [42., 7., 2.], mean), 1.0) + assert_equal(score_func([0, 1, 2], [42, 7, 2], mean), 1.0) def test_homogeneous_but_not_complete_labeling(): @@ -87,7 +114,7 @@ def test_not_complete_and_not_homogeneous_labeling(): assert_almost_equal(v, 0.52, 2) -def test_non_consicutive_labels(): +def test_non_consecutive_labels(): # regression tests for labels with gaps h, c, v = homogeneity_completeness_v_measure( [0, 0, 0, 2, 2, 2], @@ -224,6 +251,14 @@ def test_exactly_zero_info_score(): assert_equal(v_measure_score(labels_a, labels_b), 0.0) assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0) assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0) + for method in ["min", "sqrt", "sum", "max"]: + print(method) + assert_equal(adjusted_mutual_info_score(labels_a, labels_b, + method), + 0.0) + assert_equal(normalized_mutual_info_score(labels_a, labels_b, + method), + 0.0) def test_v_measure_and_mutual_information(seed=36): @@ -235,6 +270,10 @@ def test_v_measure_and_mutual_information(seed=36): assert_almost_equal(v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) / (entropy(labels_a) + entropy(labels_b)), 0) + assert_almost_equal(v_measure_score(labels_a, labels_b), + normalized_mutual_info_score(labels_a, labels_b, + average_method='sum') + ) def test_fowlkes_mallows_score(): From ed500d694b0f36f47efa03d8ac1dc53990182d89 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Thu, 24 May 2018 11:33:41 -0400 Subject: [PATCH 04/14] Add note about `average_method` in NMI --- sklearn/metrics/cluster/supervised.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 6c2e63d874a6f..7e04c31907555 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -730,7 +730,6 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) - print(normalizer) ami = (mi - emi) / (normalizer - emi) return ami @@ -743,7 +742,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, Information (MI) score to scale the results between 0 (no mutual information) and 1 (perfect correlation). In this function, mutual information is normalized by some generalized mean of ``H(labels_true)`` - and ``H(labels_pred))``. + and ``H(labels_pred))``, defined by the `average_method`. This measure is not adjusted for chance. Therefore :func:`adjusted_mustual_info_score` might be preferred. From 5ed8527a6a5fc72f685bdc55c7e61c03a1630a3d Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Thu, 24 May 2018 13:34:09 -0400 Subject: [PATCH 05/14] Update docs from AMI, NMI changes (#1) * Correct the NMI and AMI descriptions in docs * Update docstrings due to averaging changes - V-measure - Homogeneity - Completeness - NMI - AMI --- doc/modules/clustering.rst | 15 +++++---------- sklearn/metrics/cluster/supervised.py | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index ce335cef2dd5c..fe16eb4658a73 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -1102,17 +1102,11 @@ Advantages for any value of ``n_clusters`` and ``n_samples`` (which is not the case for raw Mutual Information or the V-measure for instance). -- **Bounded range [0, 1]**: Values close to zero indicate two label +- **Upper bound of 1**: Values close to zero indicate two label assignments that are largely independent, while values close to one - indicate significant agreement. Further, values of exactly 0 indicate - **purely** independent label assignments and a AMI of exactly 1 indicates + indicate significant agreement. Further, an AMI of exactly 1 indicates that the two label assignments are equal (with or without permutation). -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. - Drawbacks ~~~~~~~~~ @@ -1185,7 +1179,7 @@ following equation, from Vinh, Epps, and Bailey, (2009). In this equation, Using the expected value, the adjusted mutual information can then be calculated using a similar form to that of the adjusted Rand index: -.. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\max(H(U), H(V)) - E[\text{MI}]} +.. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} .. topic:: References @@ -1249,7 +1243,8 @@ Their harmonic mean called **V-measure** is computed by 0.51... The V-measure is actually equivalent to the mutual information (NMI) -discussed above normalized by the sum of the label entropies [B2011]_. +discussed above normalized by the arithmetic mean of the label +entropies [B2011]_. Homogeneity, completeness and V-measure can be computed at once using :func:`homogeneity_completeness_v_measure` as follows:: diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 7e04c31907555..9cb52431b6aaa 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -253,7 +253,9 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred): V-Measure is furthermore symmetric: swapping ``labels_true`` and ``label_pred`` will give the same score. This does not hold for - homogeneity and completeness. + homogeneity and completeness. V-Measure is identical to + :func:`normalized_mutual_info_score` with the averaging method + ``'sum'``. Read more in the :ref:`User Guide `. @@ -452,7 +454,8 @@ def completeness_score(labels_true, labels_pred): def v_measure_score(labels_true, labels_pred): """V-measure cluster labeling given a ground truth. - This score is identical to :func:`normalized_mutual_info_score`. + This score is identical to :func:`normalized_mutual_info_score` with + the ``'sum'`` option for averaging. The V-measure is the harmonic mean between homogeneity and completeness:: @@ -467,6 +470,7 @@ def v_measure_score(labels_true, labels_pred): measure the agreement of two independent label assignments strategies on the same dataset when the real ground truth is not known. + Read more in the :ref:`User Guide `. Parameters @@ -493,6 +497,7 @@ def v_measure_score(labels_true, labels_pred): -------- homogeneity_score completeness_score + normalized_mutual_info_score Examples -------- @@ -664,6 +669,8 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): If None, 'max' will be used. This is likely to change in a future version. + .. versionadded:: 0.20 + Returns ------- ami: float(upperlimited by 1.0) @@ -772,6 +779,8 @@ def normalized_mutual_info_score(labels_true, labels_pred, If None, 'sqrt' will be used, matching the behavior of `v_measure_score`. + .. versionadded:: 0.20 + Returns ------- nmi : float @@ -779,6 +788,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, See also -------- + v_measure_score: V-Measure (NMI with arithmetic mean option.) adjusted_rand_score: Adjusted Rand Index adjusted_mutual_info_score: Adjusted Mutual Information (adjusted against chance) From df60d46282fed85a591b62f9aa3d51e5cb26780f Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Sat, 26 May 2018 21:59:41 -0400 Subject: [PATCH 06/14] Update documentation and remove nose tests (#2) * Update v0.20.rst * Update test_supervised.py * Update clustering.rst --- doc/modules/clustering.rst | 21 +++++++++++--- doc/whats_new/v0.20.rst | 6 ++++ .../metrics/cluster/tests/test_supervised.py | 29 ++++++++----------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index fe16eb4658a73..cf994a8dc0a37 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -1158,7 +1158,7 @@ It also can be expressed in set cardinality formulation: The normalized mutual information is defined as -.. math:: \text{NMI}(U, V) = \frac{\text{MI}(U, V)}{\sqrt{H(U)H(V)}} +.. math:: \text{NMI}(U, V) = \frac{\text{MI}(U, V)}{\text{mean}(H(U), H(V))} This value of the mutual information and also the normalized variant is not adjusted for chance and will tend to increase as the number of different labels @@ -1181,6 +1181,13 @@ calculated using a similar form to that of the adjusted Rand index: .. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} +For normalized mutual information and adjusted mutual information, the normalizing +value is typically some mean of the entropies of each clustering. Various means exist, +and no firm rules exist for preferring one over the others. The decision is largely +a field-by-field basis; for instance, in community detection, the arithmetic mean is +most common. Yang et al. (2016) found that each normalizing method provided +"qualitatively similar behaviours". + .. topic:: References * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a @@ -1194,16 +1201,22 @@ calculated using a similar form to that of the adjusted Rand index: `doi:10.1145/1553374.1553511 `_. ISBN 9781605585161. - * Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for + * Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and - Correction for Chance, JMLR - http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf + Correction for Chance". JMLR + * `Wikipedia entry for the (normalized) Mutual Information `_ * `Wikipedia entry for the Adjusted Mutual Information `_ + + * Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of community + detection algorithms on artificial networks". Scientific Reports 6: 30750. + `doi:10.1038/srep30750 `_. + + .. _homogeneity_completeness: diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index ef6f065bdd3c2..9e1b158ab93b3 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -121,6 +121,12 @@ Metrics - Partial AUC is available via ``max_fpr`` parameter in :func:`metrics.roc_auc_score`. :issue:`3273` by :user:`Alexander Niederbühl `. +- Added control over the normalizer in + :func:`metrics.normalized_mutual_information_score` and + :func:`metrics.adjusted_mutual_information_score` via the ``average_method`` + parameter. In a future version, the default normalizer for each will become + the *arithmetic* mean of the entropies of each clustering. :issue:`11124` by + :user:`Arya McCarthy `. Enhancements ............ diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index a1a199cae8b56..6bcbd2fcc2e60 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -54,9 +54,7 @@ def test_generalized_average(): assert means[0] <= means[1] <= means[2] <= means[3] c, d = 12, 12 means = [_generalized_average(c, d, method) for method in methods] - assert_equal(means[0], means[1]) - assert_equal(means[1], means[2]) - assert_equal(means[2], means[3]) + assert means[0] == means[1] == means[2] == means[3] def test_perfect_matches(): @@ -75,13 +73,13 @@ def test_perfect_matches(): means = {"min", "sqrt", "sum", "max"} for score_func in score_funcs_with_changing_means: for mean in means: - assert_equal(score_func([], [], mean), 1.0) - assert_equal(score_func([0], [1], mean), 1.0) - assert_equal(score_func([0, 0, 0], [0, 0, 0], mean), 1.0) - assert_equal(score_func([0, 1, 0], [42, 7, 42], mean), 1.0) - assert_equal(score_func([0., 1., 0.], [42., 7., 42.], mean), 1.0) - assert_equal(score_func([0., 1., 2.], [42., 7., 2.], mean), 1.0) - assert_equal(score_func([0, 1, 2], [42, 7, 2], mean), 1.0) + assert score_func([], [], mean) == 1.0 + assert score_func([0], [1], mean) == 1.0 + assert score_func([0, 0, 0], [0, 0, 0], mean) == 1.0 + assert score_func([0, 1, 0], [42, 7, 42], mean) == 1.0 + assert score_func([0., 1., 0.], [42., 7., 42.], mean) == 1.0 + assert score_func([0., 1., 2.], [42., 7., 2.], mean) == 1.0 + assert score_func([0, 1, 2], [42, 7, 2], mean) == 1.0 def test_homogeneous_but_not_complete_labeling(): @@ -252,13 +250,10 @@ def test_exactly_zero_info_score(): assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0) assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0) for method in ["min", "sqrt", "sum", "max"]: - print(method) - assert_equal(adjusted_mutual_info_score(labels_a, labels_b, - method), - 0.0) - assert_equal(normalized_mutual_info_score(labels_a, labels_b, - method), - 0.0) + assert adjusted_mutual_info_score(labels_a, labels_b, + method) == 0.0 + assert normalized_mutual_info_score(labels_a, labels_b, + method) == 0.0 def test_v_measure_and_mutual_information(seed=36): From b449cb924e1927d65f2d0695cfff8847490a26c6 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Sat, 26 May 2018 22:27:19 -0400 Subject: [PATCH 07/14] Fix multiple spaces after operator --- sklearn/metrics/cluster/tests/test_supervised.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 6bcbd2fcc2e60..08d96130d2f3e 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -73,7 +73,7 @@ def test_perfect_matches(): means = {"min", "sqrt", "sum", "max"} for score_func in score_funcs_with_changing_means: for mean in means: - assert score_func([], [], mean) == 1.0 + assert score_func([], [], mean) == 1.0 assert score_func([0], [1], mean) == 1.0 assert score_func([0, 0, 0], [0, 0, 0], mean) == 1.0 assert score_func([0, 1, 0], [42, 7, 42], mean) == 1.0 From 1b36da577f314fbcb11bdaaf9cccf634d2f5143b Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Mon, 28 May 2018 11:19:59 -0400 Subject: [PATCH 08/14] Rename all arguments --- doc/modules/clustering.rst | 8 ++++-- sklearn/metrics/cluster/supervised.py | 28 +++++++++---------- .../metrics/cluster/tests/test_supervised.py | 9 +++--- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index cf994a8dc0a37..bb85b9c6b6e9c 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -1048,8 +1048,8 @@ Given the knowledge of the ground truth class assignments ``labels_true`` and our clustering algorithm assignments of the same samples ``labels_pred``, the **Mutual Information** is a function that measures the **agreement** of the two assignments, ignoring permutations. Two different normalized versions of this -measure are available, **Normalized Mutual Information(NMI)** and **Adjusted -Mutual Information(AMI)**. NMI is often used in the literature while AMI was +measure are available, **Normalized Mutual Information (NMI)** and **Adjusted +Mutual Information (AMI)**. NMI is often used in the literature, while AMI was proposed more recently and is **normalized against chance**:: >>> from sklearn import metrics @@ -1188,6 +1188,10 @@ a field-by-field basis; for instance, in community detection, the arithmetic mea most common. Yang et al. (2016) found that each normalizing method provided "qualitatively similar behaviours". +Vinh et al. (2010) named variants of NMI and AMI by their averaging method. Their +'sqrt' and 'sum' averages are the geometric and arithmetic means; we use these +more broadly common names. + .. topic:: References * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 9cb52431b6aaa..c7de12e61d586 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -56,15 +56,15 @@ def _generalized_average(U, V, average_method): """Return a particular mean of two numbers.""" if average_method == "min": return max(min(U, V), 1e-10) - elif average_method == "sqrt": + elif average_method == "geometric": return max(np.sqrt(U * V), 1e-10) # Avoids zero-division error - elif average_method == "sum": + elif average_method == "arithmetic": return max(np.mean([U, V]), 1e-10) elif average_method == "max": return max(U, V) else: - raise ValueError("'average_method' must be 'min', 'sqrt', 'sum', or " - "'max'") + raise ValueError("'average_method' must be 'min', 'geometric', " + "'arithmetic', or 'max'") def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False): @@ -254,8 +254,8 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred): V-Measure is furthermore symmetric: swapping ``labels_true`` and ``label_pred`` will give the same score. This does not hold for homogeneity and completeness. V-Measure is identical to - :func:`normalized_mutual_info_score` with the averaging method - ``'sum'``. + :func:`normalized_mutual_info_score` with the arithmetic averaging + method. Read more in the :ref:`User Guide `. @@ -455,7 +455,7 @@ def v_measure_score(labels_true, labels_pred): """V-measure cluster labeling given a ground truth. This score is identical to :func:`normalized_mutual_info_score` with - the ``'sum'`` option for averaging. + the ``'arithmetic'`` option for averaging. The V-measure is the harmonic mean between homogeneity and completeness:: @@ -665,7 +665,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): average_method : string or None, optional (default: None) How to compute the normalizer in the denominator. Possible options - are 'min', 'sqrt', 'sum', and 'max'. + are 'min', 'geometric', 'arithmetic', and 'max'. If None, 'max' will be used. This is likely to change in a future version. @@ -715,7 +715,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): if average_method is None: warnings.warn("The behavior of AMI will change in a future version. " "To match the behavior of 'v_measure_score', AMI will " - "use sum-averaging, i.e. arithmetic mean, by default." + "use arithmetic mean by default." ) average_method = 'max' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) @@ -775,9 +775,9 @@ def normalized_mutual_info_score(labels_true, labels_pred, average_method : string or None, optional (default: None) How to compute the normalizer in the denominator. Possible options - are 'min', 'sqrt', 'sum', and 'max'. - If None, 'sqrt' will be used, matching the behavior of - `v_measure_score`. + are 'min', 'geometric', 'arithmetic', and 'max'. + If None, 'geometric' will be used. This is likely to change in a + future version. .. versionadded:: 0.20 @@ -815,9 +815,9 @@ def normalized_mutual_info_score(labels_true, labels_pred, if average_method is None: warnings.warn("The behavior of NMI will change in a future version. " "To match the behavior of 'v_measure_score', NMI will " - "use sum-averaging, i.e. arithmetic mean, by default." + "use arithmetic mean by default." ) - average_method = 'sqrt' + average_method = 'geometric' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) classes = np.unique(labels_true) clusters = np.unique(labels_pred) diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 08d96130d2f3e..bb2f253774b85 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -49,7 +49,7 @@ def test_error_messages_on_wrong_input(): def test_generalized_average(): a, b = 1, 2 - methods = ["min", "sqrt", "sum", "max"] + methods = ["min", "geometric", "arithmetic", "max"] means = [_generalized_average(a, b, method) for method in methods] assert means[0] <= means[1] <= means[2] <= means[3] c, d = 12, 12 @@ -70,7 +70,7 @@ def test_perfect_matches(): normalized_mutual_info_score, adjusted_mutual_info_score, ] - means = {"min", "sqrt", "sum", "max"} + means = {"min", "geometric", "arithmetic", "max"} for score_func in score_funcs_with_changing_means: for mean in means: assert score_func([], [], mean) == 1.0 @@ -249,7 +249,7 @@ def test_exactly_zero_info_score(): assert_equal(v_measure_score(labels_a, labels_b), 0.0) assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0) assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0) - for method in ["min", "sqrt", "sum", "max"]: + for method in ["min", "geometric", "arithmetic", "max"]: assert adjusted_mutual_info_score(labels_a, labels_b, method) == 0.0 assert normalized_mutual_info_score(labels_a, labels_b, @@ -265,9 +265,10 @@ def test_v_measure_and_mutual_information(seed=36): assert_almost_equal(v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) / (entropy(labels_a) + entropy(labels_b)), 0) + avg = 'arithmetic' assert_almost_equal(v_measure_score(labels_a, labels_b), normalized_mutual_info_score(labels_a, labels_b, - average_method='sum') + average_method=avg) ) From 3d8bf2c16133730ef3041c4d0816526ffdf277df Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Wed, 6 Jun 2018 13:20:07 -0400 Subject: [PATCH 09/14] No more arbitrary values! --- doc/modules/clustering.rst | 3 ++- doc/whats_new/v0.20.rst | 13 ++++++++++++- sklearn/metrics/cluster/supervised.py | 21 +++++++++++++-------- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index bb85b9c6b6e9c..c2e40081c49f1 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -1186,7 +1186,8 @@ value is typically some mean of the entropies of each clustering. Various means and no firm rules exist for preferring one over the others. The decision is largely a field-by-field basis; for instance, in community detection, the arithmetic mean is most common. Yang et al. (2016) found that each normalizing method provided -"qualitatively similar behaviours". +"qualitatively similar behaviours". In our implementation, this is +controlled by the ``average_method`` parameter. Vinh et al. (2010) named variants of NMI and AMI by their averaging method. Their 'sqrt' and 'sum' averages are the geometric and arithmetic means; we use these diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 9e1b158ab93b3..a61badd73f929 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -124,7 +124,7 @@ Metrics - Added control over the normalizer in :func:`metrics.normalized_mutual_information_score` and :func:`metrics.adjusted_mutual_information_score` via the ``average_method`` - parameter. In a future version, the default normalizer for each will become + parameter. In version 0.22, the default normalizer for each will become the *arithmetic* mean of the entropies of each clustering. :issue:`11124` by :user:`Arya McCarthy `. @@ -537,6 +537,17 @@ Metrics for :func:`metrics.roc_auc_score`. Moreover using ``reorder=True`` can hide bugs due to floating point error in the input. :issue:`9851` by :user:`Hanmin Qin `. +- In :func:`metrics.normalized_mutual_information_score` and + :func:`metrics.adjusted_mutual_information_score`, + warn that ``average_method`` + will have a new default value. In version 0.22, the default normalizer for each + will become the *arithmetic* mean of the entropies of each clustering. Currently, + :func:`metrics.normalized_mutual_information_score` uses the default of + ``average_method='geometric'``, and :func:`metrics.adjusted_mutual_information_score` + uses the default of ``average_method='max'`` to match their behaviors in + version 0.19. + :issue:`11124` by :user:`Arya McCarthy `. + Cluster diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index c7de12e61d586..4d984b73b8dde 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -55,11 +55,11 @@ def check_clusterings(labels_true, labels_pred): def _generalized_average(U, V, average_method): """Return a particular mean of two numbers.""" if average_method == "min": - return max(min(U, V), 1e-10) + return min(U, V) elif average_method == "geometric": - return max(np.sqrt(U * V), 1e-10) # Avoids zero-division error + return np.sqrt(U * V) elif average_method == "arithmetic": - return max(np.mean([U, V]), 1e-10) + return np.mean([U, V]) elif average_method == "max": return max(U, V) else: @@ -713,9 +713,9 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): """ if average_method is None: - warnings.warn("The behavior of AMI will change in a future version. " + warnings.warn("The behavior of AMI will change in version 0.22. " "To match the behavior of 'v_measure_score', AMI will " - "use arithmetic mean by default." + "use average_method='arithmetic' by default." ) average_method = 'max' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) @@ -737,7 +737,10 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) - ami = (mi - emi) / (normalizer - emi) + # Avoid 0.0 / 0.0 when either entropy is zero. + denominator = normalizer - emi + denominator = max(denominator, np.finfo('float64').eps) + ami = (mi - emi) / denominator return ami @@ -813,9 +816,9 @@ def normalized_mutual_info_score(labels_true, labels_pred, """ if average_method is None: - warnings.warn("The behavior of NMI will change in a future version. " + warnings.warn("The behavior of NMI will change in version 0.22. " "To match the behavior of 'v_measure_score', NMI will " - "use arithmetic mean by default." + "use average_method='arithmetic' by default." ) average_method = 'geometric' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) @@ -835,6 +838,8 @@ def normalized_mutual_info_score(labels_true, labels_pred, # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) + # Avoid 0.0 / 0.0 when either entropy is zero. + normalizer = max(normalizer, np.finfo('float64').eps) nmi = mi / normalizer return nmi From 2854014e5ee25fceeb74a34c46c696c65f104107 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Wed, 6 Jun 2018 14:53:13 -0400 Subject: [PATCH 10/14] Improve handling of floating-point imprecision --- sklearn/metrics/cluster/supervised.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 4d984b73b8dde..52c4ad5454388 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -737,9 +737,15 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) - # Avoid 0.0 / 0.0 when either entropy is zero. denominator = normalizer - emi - denominator = max(denominator, np.finfo('float64').eps) + # Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match. + # normalizer should always be >= emi, but because of floating-point + # representation, sometimes emi is slightly larger. Correct this + # by preserving the sign. + if denominator < 0: + denominator = min(denominator, -np.finfo('float64').eps) + else: + denominator = max(denominator, np.finfo('float64').eps) ami = (mi - emi) / denominator return ami From 059bae6c0b4088c8b3a2898e3c2e46e5df883435 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Wed, 6 Jun 2018 20:04:49 -0400 Subject: [PATCH 11/14] Clearly state when the change occurs --- sklearn/metrics/cluster/supervised.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 52c4ad5454388..0dbedf1a9dafd 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -666,8 +666,8 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): average_method : string or None, optional (default: None) How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. - If None, 'max' will be used. This is likely to change in a future - version. + If None, 'max' will be used. The default will change to + 'arithmetic' in version 0.22. .. versionadded:: 0.20 @@ -785,8 +785,8 @@ def normalized_mutual_info_score(labels_true, labels_pred, average_method : string or None, optional (default: None) How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. - If None, 'geometric' will be used. This is likely to change in a - future version. + If None, 'geometric' will be used. The default will change to + 'arithmetic' in version 0.22. .. versionadded:: 0.20 From e8b957903f808ebaba68680072e653691b373ad3 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Sun, 15 Jul 2018 15:28:27 -0400 Subject: [PATCH 12/14] Update AMI/NMI docs --- doc/modules/clustering.rst | 30 +++++++++++++++--------------- doc/whats_new/v0.20.rst | 2 ++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index c2e40081c49f1..7f55e6ac7830e 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -1166,7 +1166,7 @@ adjusted for chance and will tend to increase as the number of different labels between the label assignments. The expected value for the mutual information can be calculated using the -following equation, from Vinh, Epps, and Bailey, (2009). In this equation, +following equation [VEB2009]_. In this equation, :math:`a_i = |U_i|` (the number of elements in :math:`U_i`) and :math:`b_j = |V_j|` (the number of elements in :math:`V_j`). @@ -1181,15 +1181,15 @@ calculated using a similar form to that of the adjusted Rand index: .. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} -For normalized mutual information and adjusted mutual information, the normalizing -value is typically some mean of the entropies of each clustering. Various means exist, -and no firm rules exist for preferring one over the others. The decision is largely -a field-by-field basis; for instance, in community detection, the arithmetic mean is -most common. Yang et al. (2016) found that each normalizing method provided -"qualitatively similar behaviours". In our implementation, this is -controlled by the ``average_method`` parameter. +For normalized mutual information and adjusted mutual information, the normalizing +value is typically some *generalized* mean of the entropies of each clustering. +Various generalized means exist, and no firm rules exist for preferring one over the +others. The decision is largely a field-by-field basis; for instance, in community +detection, the arithmetic mean is most common. Each +normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In our +implementation, this is controlled by the ``average_method`` parameter. -Vinh et al. (2010) named variants of NMI and AMI by their averaging method. Their +Vinh et al. (2010) named variants of NMI and AMI by their averaging method [VEB2010]_. Their 'sqrt' and 'sum' averages are the geometric and arithmetic means; we use these more broadly common names. @@ -1200,13 +1200,13 @@ more broadly common names. Machine Learning Research 3: 583–617. `doi:10.1162/153244303321897735 `_. - * Vinh, Epps, and Bailey, (2009). "Information theoretic measures + * [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures for clusterings comparison". Proceedings of the 26th Annual International Conference on Machine Learning - ICML '09. `doi:10.1145/1553374.1553511 `_. ISBN 9781605585161. - * Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures for + * [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance". JMLR @@ -1217,7 +1217,8 @@ more broadly common names. * `Wikipedia entry for the Adjusted Mutual Information `_ - * Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of community + * [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of + community detection algorithms on artificial networks". Scientific Reports 6: 30750. `doi:10.1038/srep30750 `_. @@ -1261,8 +1262,7 @@ Their harmonic mean called **V-measure** is computed by 0.51... The V-measure is actually equivalent to the mutual information (NMI) -discussed above normalized by the arithmetic mean of the label -entropies [B2011]_. +discussed above, with the aggregation function being the arithmetic mean [B2011]_. Homogeneity, completeness and V-measure can be computed at once using :func:`homogeneity_completeness_v_measure` as follows:: @@ -1437,7 +1437,7 @@ Advantages for any value of ``n_clusters`` and ``n_samples`` (which is not the case for raw Mutual Information or the V-measure for instance). -- **Bounded range [0, 1]**: Values close to zero indicate two label +- **Upper-bounded at 1**: Values close to zero indicate two label assignments that are largely independent, while values close to one indicate significant agreement. Further, values of exactly 0 indicate **purely** independent label assignments and a AMI of exactly 1 indicates diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index a61badd73f929..9001c3b839939 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -121,6 +121,7 @@ Metrics - Partial AUC is available via ``max_fpr`` parameter in :func:`metrics.roc_auc_score`. :issue:`3273` by :user:`Alexander Niederbühl `. + - Added control over the normalizer in :func:`metrics.normalized_mutual_information_score` and :func:`metrics.adjusted_mutual_information_score` via the ``average_method`` @@ -537,6 +538,7 @@ Metrics for :func:`metrics.roc_auc_score`. Moreover using ``reorder=True`` can hide bugs due to floating point error in the input. :issue:`9851` by :user:`Hanmin Qin `. + - In :func:`metrics.normalized_mutual_information_score` and :func:`metrics.adjusted_mutual_information_score`, warn that ``average_method`` From c65d2b3e3ea6b55aee32d835849fa285ed6f9aad Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 16 Jul 2018 15:21:04 -0500 Subject: [PATCH 13/14] Update v0.20.rst --- doc/whats_new/v0.20.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 5f1b29e15004c..dca1157ac0368 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -774,7 +774,6 @@ Metrics due to floating point error in the input. :issue:`9851` by :user:`Hanmin Qin `. -<<<<<<< HEAD - In :func:`metrics.normalized_mutual_information_score` and :func:`metrics.adjusted_mutual_information_score`, warn that ``average_method`` @@ -786,13 +785,11 @@ Metrics version 0.19. :issue:`11124` by :user:`Arya McCarthy `. -======= - The ``batch_size`` parameter to :func:`metrics.pairwise_distances_argmin_min` and :func:`metrics.pairwise_distances_argmin` is deprecated to be removed in v0.22. It no longer has any effect, as batch size is determined by global ``working_memory`` config. See :ref:`working_memory`. :issue:`10280` by `Joel Nothman`_ and :user:`Aman Dalmia `. ->>>>>>> master Cluster From a5b3c0f91cf39f234a181b2ab8ba0aeb69cfad93 Mon Sep 17 00:00:00 2001 From: Arya McCarthy Date: Mon, 16 Jul 2018 18:16:50 -0400 Subject: [PATCH 14/14] Catch FutureWarnings in AMI and NMI --- sklearn/metrics/cluster/supervised.py | 27 ++++++++++--------- .../metrics/cluster/tests/test_supervised.py | 18 +++++++++++++ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 225bb1614fa72..13addf29fdc00 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -639,7 +639,8 @@ def mutual_info_score(labels_true, labels_pred, contingency=None): return mi.sum() -def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): +def adjusted_mutual_info_score(labels_true, labels_pred, + average_method='warn'): """Adjusted Mutual Information between two clusterings. Adjusted Mutual Information (AMI) is an adjustment of the Mutual @@ -672,17 +673,17 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): labels_pred : array, shape = [n_samples] A clustering of the data into disjoint subsets. - average_method : string or None, optional (default: None) + average_method : string, optional (default: 'warn') How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. - If None, 'max' will be used. The default will change to + If 'warn', 'max' will be used. The default will change to 'arithmetic' in version 0.22. .. versionadded:: 0.20 Returns ------- - ami: float(upperlimited by 1.0) + ami: float (upperlimited by 1.0) The AMI returns a value of 1 when the two partitions are identical (ie perfectly matched). Random partitions (independent labellings) have an expected AMI around 0 on average hence can be negative. @@ -721,11 +722,11 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): `_ """ - if average_method is None: + if average_method == 'warn': warnings.warn("The behavior of AMI will change in version 0.22. " "To match the behavior of 'v_measure_score', AMI will " - "use average_method='arithmetic' by default." - ) + "use average_method='arithmetic' by default.", + FutureWarning) average_method = 'max' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) n_samples = labels_true.shape[0] @@ -760,7 +761,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, average_method=None): def normalized_mutual_info_score(labels_true, labels_pred, - average_method=None): + average_method='warn'): """Normalized Mutual Information between two clusterings. Normalized Mutual Information (NMI) is an normalization of the Mutual @@ -791,10 +792,10 @@ def normalized_mutual_info_score(labels_true, labels_pred, labels_pred : array, shape = [n_samples] A clustering of the data into disjoint subsets. - average_method : string or None, optional (default: None) + average_method : string, optional (default: 'warn') How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. - If None, 'geometric' will be used. The default will change to + If 'warn', 'geometric' will be used. The default will change to 'arithmetic' in version 0.22. .. versionadded:: 0.20 @@ -830,11 +831,11 @@ def normalized_mutual_info_score(labels_true, labels_pred, 0.0 """ - if average_method is None: + if average_method == 'warn': warnings.warn("The behavior of NMI will change in version 0.22. " "To match the behavior of 'v_measure_score', NMI will " - "use average_method='arithmetic' by default." - ) + "use average_method='arithmetic' by default.", + FutureWarning) average_method = 'geometric' labels_true, labels_pred = check_clusterings(labels_true, labels_pred) classes = np.unique(labels_true) diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index bb2f253774b85..46b95cfd8fda4 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -17,6 +17,7 @@ from sklearn.utils import assert_all_finite from sklearn.utils.testing import ( assert_equal, assert_almost_equal, assert_raise_message, + assert_warns_message, ignore_warnings ) from numpy.testing import assert_array_almost_equal @@ -31,6 +32,18 @@ ] +def test_future_warning(): + score_funcs_with_changing_means = [ + normalized_mutual_info_score, + adjusted_mutual_info_score, + ] + warning_msg = "The behavior of " + args = [0, 0, 0], [0, 0, 0] + for score_func in score_funcs_with_changing_means: + assert_warns_message(FutureWarning, warning_msg, score_func, *args) + + +@ignore_warnings(category=FutureWarning) def test_error_messages_on_wrong_input(): for score_func in score_funcs: expected = ('labels_true and labels_pred must have same size,' @@ -57,6 +70,7 @@ def test_generalized_average(): assert means[0] == means[1] == means[2] == means[3] +@ignore_warnings(category=FutureWarning) def test_perfect_matches(): for score_func in score_funcs: assert_equal(score_func([], []), 1.0) @@ -134,6 +148,7 @@ def test_non_consecutive_labels(): assert_almost_equal(ari_2, 0.24, 2) +@ignore_warnings(category=FutureWarning) def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10, seed=42): # Compute score for random uniform cluster labelings @@ -147,6 +162,7 @@ def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10, return scores +@ignore_warnings(category=FutureWarning) def test_adjustment_for_chance(): # Check that adjusted scores are almost zero on random labels n_clusters_range = [2, 10, 50, 90] @@ -160,6 +176,7 @@ def test_adjustment_for_chance(): assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2) +@ignore_warnings(category=FutureWarning) def test_adjusted_mutual_info_score(): # Compute the Adjusted Mutual Information and test against known values labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) @@ -240,6 +257,7 @@ def test_contingency_matrix_sparse(): eps=1e-10, sparse=True) +@ignore_warnings(category=FutureWarning) def test_exactly_zero_info_score(): # Check numerical stability when information is exactly zero for i in np.logspace(1, 4, 4).astype(np.int):