scikit-learn · LPugens · Feb 7, 2018 · Feb 7, 2018 · Feb 7, 2018 · Feb 7, 2018
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -904,6 +904,7 @@ details.
    metrics.davies_bouldin_score
    metrics.completeness_score
    metrics.cluster.contingency_matrix
+   metrics.cluster.map_cluster_labels
    metrics.fowlkes_mallows_score
    metrics.homogeneity_completeness_v_measure
    metrics.homogeneity_score

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -1733,3 +1733,74 @@ Drawbacks
 
  * `Wikipedia entry for contingency matrix
    <https://en.wikipedia.org/wiki/Contingency_table>`_
+
+.. _map_cluster_labels:
+
+Map cluster labels
+-------------------
+
+Map cluster labels
+(:func:`sklearn.metrics.cluster.map_cluster_labels`) provides a
+friendly way for the user to calculate classical classification
+metrics, such as :func:`sklearn.metrics.accuracy_score` and 
+:func:`sklearn.metrics.f1_score`. 
+
+Here is an example::
+
+   >>> from sklearn.metrics.cluster import map_cluster_labels, adjusted_rand_score
+   >>> from sklearn.metrics import confusion_matrix, accuracy_score
+   >>> y_true = ['a'] * 1 + ['b'] * 2 + ['c'] * 20 + ['d'] * 6 + ['e'] * \
+   ...  13 + ['f'] * 2 + ['g'] * 3 + ['h'] * 3 + ['i'] * 2 + ['j'] * 1
+   >>> y_pred = [6] * 1 + [2] * 2 + [0] * 6 + [2] * 10 + [8] * 4 + [1] *\
+   ...  4 + [5] * 2 + [0] * 4 + [3] * 5 + [6] * 2 + [9] * 2 + [7] *\
+   ...  2 + [0] * 2 + [8] * 1 + [4] * 3 + [3] * 2 + [8] * 1
+   >>> y_pred = map_cluster_labels(y_true, y_pred)
+   >>> confusion_matrix(y_true, y_pred)
+   array([[ 1,  0,  0,  0,  0,  0,  0,  0,  0,  0],
+          [ 0,  0,  2,  0,  0,  0,  0,  0,  0,  0],
+          [ 0,  0, 10,  0,  0,  0,  6,  0,  0,  4],
+          [ 0,  0,  0,  4,  0,  0,  0,  0,  2,  0],
+          [ 2,  2,  0,  0,  5,  0,  4,  0,  0,  0],
+          [ 0,  0,  0,  0,  0,  2,  0,  0,  0,  0],
+          [ 0,  0,  0,  0,  0,  0,  2,  0,  0,  1],
+          [ 0,  0,  0,  0,  0,  0,  0,  3,  0,  0],
+          [ 0,  0,  0,  0,  2,  0,  0,  0,  0,  0],
+          [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  1]])
+  >>> accuracy_score(y_true, y_pred) # doctest: +ELLIPSIS
+  0.52...
+  >>> adjusted_rand_score(y_true, y_pred) # doctest: +ELLIPSIS
+  0.29...
+
+  Notice the confusion matrix above has its main diagonal maximized, meaning
+  the maximum possible value of accuracy score is obtained by such match of 
+  true classes and clusters.
+
+  This conversion of clustering labels is also compatible with default
+  clustering metrics, since the change in clusters labels does not
+  affect results of such metrics, such as the ARI above.
+
+  Another example::
+
+   >>> y_true = ['a', 'a', 'a', 'b', 'b', 'b']
+   >>> y_pred = [3, 0, 1, 1, 2, 2]
+   >>> map_cluster_labels(y_true, y_pred)
+   ['DEFAULT_LABEL_1', 'a', 'DEFAULT_LABEL_0', 'DEFAULT_LABEL_0', 'b', 'b']
+
+  The above example shows what happens with your clustering method identifies
+  more clusters than true classes. *Such results must be treated carefully*,
+  since not all metrics derived from such mapping are meaningful.
+
+Advantages
+~~~~~~~~~~
+
+- Enables calculation of classical classification metrics, such as
+  accuracy and f1_score.
+
+- Allows for a meaningful and easy-to-read clustering output when classes
+  are known.
+
+Drawbacks
+~~~~~~~~~
+
+- One should use this tool carefully, since its metrics are not always
+  meaningful for every clustering task.
diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py
@@ -27,6 +27,7 @@
 labels = af.labels_
 
 n_clusters_ = len(cluster_centers_indices)
+translated_labels = metrics.cluster.map_cluster_labels(labels_true, labels)
 
 print('Estimated number of clusters: %d' % n_clusters_)
 print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
@@ -38,6 +39,10 @@
       % metrics.adjusted_mutual_info_score(labels_true, labels))
 print("Silhouette Coefficient: %0.3f"
       % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
+print("Accuracy: %0.3f"
+      % metrics.accuracy_score(labels_true, translated_labels))
+print("Confusion Matrix:\n%s"
+      % str(metrics.confusion_matrix(labels_true, translated_labels)))
 
 # #############################################################################
 # Plot result

diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
@@ -17,6 +17,7 @@
 from .supervised import v_measure_score
 from .supervised import fowlkes_mallows_score
 from .supervised import entropy
+from .supervised import map_cluster_labels
 from .unsupervised import silhouette_samples
 from .unsupervised import silhouette_score
 from .unsupervised import calinski_harabaz_score
@@ -27,6 +28,6 @@
            "adjusted_rand_score", "completeness_score", "contingency_matrix",
            "expected_mutual_information", "homogeneity_completeness_v_measure",
            "homogeneity_score", "mutual_info_score", "v_measure_score",
-           "fowlkes_mallows_score", "entropy", "silhouette_samples",
-           "silhouette_score", "calinski_harabaz_score",
+           "fowlkes_mallows_score", "entropy", "map_cluster_labels",
+           "silhouette_samples", "silhouette_score", "calinski_harabaz_score",
            "davies_bouldin_score", "consensus_score"]
diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
@@ -11,6 +11,7 @@
 #          Thierry Guillemot <thierry.guillemot.work@gmail.com>
 #          Gregory Stupp <stuppie@gmail.com>
 #          Joel Nothman <joel.nothman@gmail.com>
+#          Lucas Pugens Fernandes <lpfernandes@gmail.com>
 # License: BSD 3 clause
 
 from __future__ import division
@@ -22,7 +23,9 @@
 
 from .expected_mutual_info_fast import expected_mutual_information
 from ...utils.validation import check_array
+from ...utils.multiclass import unique_labels
 from ...utils.fixes import comb
+from ...utils.linear_assignment_ import linear_assignment
 
 
 def comb2(n):
@@ -871,3 +874,71 @@ def entropy(labels):
     # log(a / b) should be calculated as log(a) - log(b) for
     # possible loss of precision
     return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
+
+
+def map_cluster_labels(labels_true, labels_pred):
+    """Translate prediction labels to maximize the accuracy.
+
+    Translate the prediction labels of a clustering output to those in the
+    ground truth to enable calc of external metrics (eg. accuracy, f1_score,
+    ...). Translation is done by maximization of the confusion matrix :math:`C`
+    main diagonal sum :math:`\sum{i=0}^{K}C_{i, i}`.
+
+    Parameters
+    ----------
+    labels_true : array, shape = [n_samples]
+        Ground truth (correct) target values.
+    labels_pred : array, shape = [n_samples]
+        Estimated clusters as returned by a clustering algorithm.
+
+    Returns
+    -------
+    trans : array, shape = [n_classes, n_classes]
+        Mapping of labels_pred clusters, such that :math:`trans\subseteq
+        labels_true`
+
+    References
+    ----------
+
+    Examples
+    --------
+    >>> from sklearn.metrics import confusion_matrix
+    >>> from sklearn.metrics.cluster import map_cluster_labels
+    >>> labels_true = ["class1", "class2", "class3", "class1", "class1",
+    ...                "class3"]
+    >>> labels_pred = [0, 0, 2, 2, 0, 2]
+    >>> y_pred_translated = map_cluster_labels(labels_true, labels_pred)
+    >>> y_pred_translated
+    ['class1', 'class1', 'class3', 'class3', 'class1', 'class3']
+    >>> confusion_matrix(labels_true, y_pred_translated)
+    array([[2, 0, 1],
+           [1, 0, 0],
+           [0, 0, 2]])
+    """
+
+    classes = unique_labels(labels_true).tolist()
+    n_classes = len(classes)
+    clusters = unique_labels(labels_pred).tolist()
+    n_clusters = len(clusters)
+
+    if n_clusters > n_classes:
+        classes += ['DEFAULT_LABEL_'+str(i) for i in
+                    range(n_clusters-n_classes)]
+    elif n_classes > n_clusters:
+        clusters += ['DEFAULT_CLUSTER_'+str(i) for i in
+                     range(n_classes-n_clusters)]
+
+    C = contingency_matrix(labels_true, labels_pred)
+    true_idx, pred_idx = linear_assignment(-C).T
+
+    true_idx = true_idx.tolist()
+    pred_idx = pred_idx.tolist()
+
+    true_idx = [classes[idx] for idx in true_idx]
+    true_idx = true_idx + sorted(set(classes) - set(true_idx))
+    pred_idx = [clusters[idx] for idx in pred_idx]
+    pred_idx = pred_idx + sorted(set(clusters) - set(pred_idx))
+
+    return_list = [true_idx[pred_idx.index(y)] for y in labels_pred]
+
+    return return_list
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -12,10 +12,11 @@
 from sklearn.metrics.cluster import mutual_info_score
 from sklearn.metrics.cluster import normalized_mutual_info_score
 from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.cluster import map_cluster_labels
 
 from sklearn.utils import assert_all_finite
 from sklearn.utils.testing import (
-        assert_equal, assert_almost_equal, assert_raise_message,
+    assert_equal, assert_almost_equal, assert_raise_message,
 )
 from numpy.testing import assert_array_almost_equal
 
@@ -275,3 +276,33 @@ def test_fowlkes_mallows_score_properties():
     # symmetric and permutation(both together)
     score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
     assert_almost_equal(score_both, expected)
+
+
+def test_map_cluster_labels():
+    # handcrafted example - same number of clusters and classes
+    y_true = ['a', 'b', 'b', 'c', 'c', 'a']
+    y_pred = [1, 0, 0, 1, 2, 1]
+
+    expected = ['a', 'b', 'b', 'a', 'c', 'a']
+
+    y_pred_translated = map_cluster_labels(y_true, y_pred)
+    assert_equal(y_pred_translated, expected)
+
+    # handcrafted example - more clusters than classes
+    y_true = ['a', 'a', 'a', 'b', 'b', 'b']
+    y_pred = [4, 0, 1, 1, 2, 2]
+
+    expected = ['DEFAULT_LABEL_1', 'a', 'DEFAULT_LABEL_0', 'DEFAULT_LABEL_0',
+                'b', 'b']
+
+    y_pred_translated = map_cluster_labels(y_true, y_pred)
+    assert_equal(y_pred_translated, expected)
+
+    # handcrafted example - more classes than clusters
+    y_true = ['a', 'd', 'e', 'b', 'b', 'b']
+    y_pred = [0, 0, -1, -1, 2, 2]
+
+    expected = ['a', 'a', 'e', 'e', 'b', 'b']
+
+    y_pred_translated = map_cluster_labels(y_true, y_pred)
+    assert_equal(y_pred_translated, expected)