From e40a7f56f0970cb06c3574a379452b9951542c9e Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Wed, 29 May 2013 14:14:15 +0200
Subject: [PATCH 01/16] FIX unique_labels in corner case

---
 sklearn/utils/multiclass.py            | 69 ++++++++++++++-----
 sklearn/utils/tests/test_multiclass.py | 95 +++++++++++++++++++++++++-
 2 files changed, 145 insertions(+), 19 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index be79114e3dfda..cc6106eab0074 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -14,16 +14,13 @@
 from ..externals.six import string_types
 
 
-def unique_labels(*lists_of_labels):
+def unique_labels(*ys):
     """Extract an ordered array of unique labels
 
     Parameters
     ----------
-    lists_of_labels : list of labels,
-        The supported "list of labels" are:
-            - a list / tuple / numpy array of int
-            - a list of lists / tuples of int;
-            - a binary indicator matrix (2D numpy array)
+    ys : array-likes,
+        Must be either in binary, multiclass or multilabel format.
 
     Returns
     -------
@@ -45,23 +42,59 @@ def unique_labels(*lists_of_labels):
     array([1, 2, 3])
 
     """
-    def _unique_labels(y):
-        classes = None
-        if is_multilabel(y):
-            if is_label_indicator_matrix(y):
-                classes = np.arange(y.shape[1])
-            else:
-                classes = np.array(sorted(set(chain(*y))))
 
-        else:
-            classes = np.unique(y)
+    def _unique_multiclass(y):
+        return np.unique(y)
 
-        return classes
+    def _unique_sequence_of_sequence(y):
+        return np.array(sorted(set(chain(*y))))
 
-    if not lists_of_labels:
+    def _unique_indicator(y):
+        return np.arange(y.shape[1])
+
+    if not ys:
         raise ValueError('No list of labels has been passed.')
 
-    return np.unique(np.hstack(_unique_labels(y) for y in lists_of_labels))
+    ys_is_multilabels = [is_multilabel(y) for y in ys]
+
+    if len(set(ys_is_multilabels)) != 1:
+        raise ValueError("Mix of binary / mutliclass and multilabel type")
+
+    if all(ys_is_multilabels):
+        ys_is_indicator = [is_label_indicator_matrix(y) for y in ys]
+
+        # Mix of indicator and sequence of sequence multilabel format
+        if len(set(ys_is_indicator)) != 1:
+            labels = unique_labels(*[y for y in ys
+                                     if not is_label_indicator_matrix(y)])
+            labels_idx = unique_labels(*[y for y in ys
+                                         if is_label_indicator_matrix(y)])
+            if labels.size != labels_idx.size:
+                raise ValueError("Unable to infer mix multilabel type")
+
+            return labels
+
+        # Only indicator multilabel format
+        elif all(ys_is_indicator):
+            if len(set(y.shape[1] for y in ys)) > 1:
+                raise ValueError("Multi-label binary indicator input with "
+                                 "different number of labels")
+            else:
+                return _unique_indicator(ys[0])
+        else:
+            # Only indicator sequence of sequence multilabel format
+            _unique_labels = _unique_sequence_of_sequence
+
+    else:
+        _unique_labels = _unique_multiclass
+
+    # Combine every labels
+    ys_labels = [_unique_labels(y) for y in ys]
+
+    if len(set(y_labels.dtype.kind for y_labels in ys_labels)) > 1:
+        raise ValueError("Mix of dtype.kind, can't infered labels set")
+
+    return np.unique(np.hstack(ys_labels))
 
 
 def _is_integral_float(y):
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index e8ad3b77905d2..1842624213eb6 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -1,5 +1,5 @@
 import numpy as np
-
+from itertools import product
 from sklearn.externals.six.moves import xrange
 from sklearn.externals.six import iteritems
 
@@ -117,6 +117,58 @@
 ]
 
 
+EXAMPLES = {
+    'multilabel-indicator': [
+        np.random.randint(2, size=(10, 10)),
+        np.array([[0, 1], [1, 0]]),
+        np.array([[0, 0], [0, 0]]),
+        np.array([[-1, 1], [1, -1]]),
+        np.array([[-3, 3], [3, -3]]),
+
+        # XXX : not considered as multilabel-indicator at the moment
+        #       see is_label_indicator_matrix
+        # np.array([[0, 1]]),
+    ],
+    'multilabel-sequences': [
+        [[0, 1]],
+        [[0], [1]],
+        [[1, 2, 3]],
+        [[1, 2, 1]],  # duplicate values, why not?
+        [[1], [2], [0, 1]],
+        [[1], [2]],
+        [[]],
+        [()],
+        np.array([[], [1, 2]], dtype='object'),
+    ],
+    'multiclass': [
+        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
+        np.array([1, 0, 2]),
+        np.array([[1], [0], [2]]),
+        [0, 1, 2],
+        ['a', 'b', 'c'],
+    ],
+    'multiclass-multioutput': [
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
+        np.array([['a', 'b'], ['c', 'd']]),
+        np.array([[1, 0, 2]]),
+    ],
+    'binary': [
+        [0, 1],
+        [1, 1],
+        [],
+        [0],
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
+        np.array([[0], [1]]),
+        [1, -1],
+        [3, 5],
+        ['a'],
+        ['a', 'b'],
+        ['abc', 'def'],
+    ],
+
+}
+
+
 def test_unique_labels():
     # Empty iterable
     assert_raises(ValueError, unique_labels)
@@ -142,6 +194,47 @@ def test_unique_labels():
     assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                        np.arange(3))
 
+    # Border line case with binary indicator matrix
+    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
+    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
+    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
+                       np.arange(5))
+
+    # Some tests with strings input
+    assert_array_equal(unique_labels(["a", "b", "c"], ["d"]),
+                       ["a", "b", "c", "d"])
+    assert_array_equal(unique_labels([["a", "b"], ["c"]], [["d"]]),
+                       ["a", "b", "c", "d"])
+
+    #Mix of multilabel-indicator and multilabel-sequences
+    assert_array_equal(unique_labels([["a", "b"], ["c"]], np.ones((3, 3))),
+                       ["a", "b", "c"])
+    assert_raises(ValueError, unique_labels, [["a", "b"], ["c"]],
+                  np.ones((3, 4)))
+    assert_raises(ValueError, unique_labels, [["a", "b"], ["c", 'd']],
+                  np.ones((3, 3)))
+
+    assert_array_equal(unique_labels([[1, 2], [3]], np.ones((3, 3))),
+                       [1, 2, 3])
+    assert_raises(ValueError, unique_labels, [[1, 2], [3]],
+                  np.ones((3, 4)))
+    assert_raises(ValueError, unique_labels, [[1, 2], [3, 4]],
+                  np.ones((3, 3)))
+
+    #Mix with binary or multiclass and multilabel
+    pair_multiclass_multilabel = product(EXAMPLES["multilabel-indicator"] +
+                                         EXAMPLES["multilabel-sequences"],
+                                         EXAMPLES["multiclass"] +
+                                         EXAMPLES["binary"])
+
+    for y_multilabel, y_multiclass in pair_multiclass_multilabel:
+        assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
+        assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)
+
+    # Mix input type
+    assert_raises(ValueError, unique_labels, [[1, 2], [3]],
+                  [["a", "d"]])
+
 
 def test_is_multilabel():
     for group, group_examples in iteritems(EXAMPLES):

From e3ca5c13eb2844ff34b3e900346f408bf48062c5 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Wed, 29 May 2013 14:51:54 +0200
Subject: [PATCH 02/16] FIX issue with comparable but different dtype

---
 sklearn/utils/multiclass.py            | 4 +++-
 sklearn/utils/tests/test_multiclass.py | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index cc6106eab0074..3c55fc24863f1 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -90,8 +90,10 @@ def _unique_indicator(y):
 
     # Combine every labels
     ys_labels = [_unique_labels(y) for y in ys]
+    labels_type_set = set(y_labels.dtype.kind for y_labels in ys_labels)
 
-    if len(set(y_labels.dtype.kind for y_labels in ys_labels)) > 1:
+    if (not (labels_type_set <= set(["b", "i", "u","f"])) and
+            not (labels_type_set <= set(["S", "a"," U"]))):
         raise ValueError("Mix of dtype.kind, can't infered labels set")
 
     return np.unique(np.hstack(ys_labels))
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 1842624213eb6..0932d77503e72 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -234,6 +234,7 @@ def test_unique_labels():
     # Mix input type
     assert_raises(ValueError, unique_labels, [[1, 2], [3]],
                   [["a", "d"]])
+    assert_array_equal(unique_labels([(2,), (0, 2,)], [(), ()]), [0, 2])
 
 
 def test_is_multilabel():

From adcb0a811ee0596256ebe66524d2f33e1127129a Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 3 Jun 2013 10:26:30 +0200
Subject: [PATCH 03/16] ENH don't allow mix of input multilabel format

---
 sklearn/utils/multiclass.py            | 21 ++++++-------------
 sklearn/utils/tests/test_multiclass.py | 28 +++++++++-----------------
 2 files changed, 16 insertions(+), 33 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 3c55fc24863f1..e181c9b82a2ed 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -63,27 +63,18 @@ def _unique_indicator(y):
     if all(ys_is_multilabels):
         ys_is_indicator = [is_label_indicator_matrix(y) for y in ys]
 
-        # Mix of indicator and sequence of sequence multilabel format
-        if len(set(ys_is_indicator)) != 1:
-            labels = unique_labels(*[y for y in ys
-                                     if not is_label_indicator_matrix(y)])
-            labels_idx = unique_labels(*[y for y in ys
-                                         if is_label_indicator_matrix(y)])
-            if labels.size != labels_idx.size:
-                raise ValueError("Unable to infer mix multilabel type")
-
-            return labels
-
         # Only indicator multilabel format
-        elif all(ys_is_indicator):
+        if all(ys_is_indicator):
             if len(set(y.shape[1] for y in ys)) > 1:
                 raise ValueError("Multi-label binary indicator input with "
                                  "different number of labels")
             else:
                 return _unique_indicator(ys[0])
-        else:
+        elif all(not ys_is_indicator for ys_is_indicator in ys_is_indicator):
             # Only indicator sequence of sequence multilabel format
             _unique_labels = _unique_sequence_of_sequence
+        else:
+            raise ValueError("Mix multilabel input format")
 
     else:
         _unique_labels = _unique_multiclass
@@ -92,8 +83,8 @@ def _unique_indicator(y):
     ys_labels = [_unique_labels(y) for y in ys]
     labels_type_set = set(y_labels.dtype.kind for y_labels in ys_labels)
 
-    if (not (labels_type_set <= set(["b", "i", "u","f"])) and
-            not (labels_type_set <= set(["S", "a"," U"]))):
+    if (not (labels_type_set <= set(["b", "i", "u", "f"])) and
+            not (labels_type_set <= set(["S", "a", " U"]))):
         raise ValueError("Mix of dtype.kind, can't infered labels set")
 
     return np.unique(np.hstack(ys_labels))
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 0932d77503e72..090f2ebc2d468 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -207,27 +207,19 @@ def test_unique_labels():
                        ["a", "b", "c", "d"])
 
     #Mix of multilabel-indicator and multilabel-sequences
-    assert_array_equal(unique_labels([["a", "b"], ["c"]], np.ones((3, 3))),
-                       ["a", "b", "c"])
-    assert_raises(ValueError, unique_labels, [["a", "b"], ["c"]],
-                  np.ones((3, 4)))
-    assert_raises(ValueError, unique_labels, [["a", "b"], ["c", 'd']],
-                  np.ones((3, 3)))
-
-    assert_array_equal(unique_labels([[1, 2], [3]], np.ones((3, 3))),
-                       [1, 2, 3])
-    assert_raises(ValueError, unique_labels, [[1, 2], [3]],
-                  np.ones((3, 4)))
-    assert_raises(ValueError, unique_labels, [[1, 2], [3, 4]],
-                  np.ones((3, 3)))
+    mix_multilabel_format = product(EXAMPLES["multilabel-indicator"],
+                                    EXAMPLES["multilabel-sequences"])
+    for y_multilabel, y_multiclass in mix_multilabel_format:
+        assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
+        assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)
 
     #Mix with binary or multiclass and multilabel
-    pair_multiclass_multilabel = product(EXAMPLES["multilabel-indicator"] +
-                                         EXAMPLES["multilabel-sequences"],
-                                         EXAMPLES["multiclass"] +
-                                         EXAMPLES["binary"])
+    mix_clf_format = product(EXAMPLES["multilabel-indicator"] +
+                             EXAMPLES["multilabel-sequences"],
+                             EXAMPLES["multiclass"] +
+                             EXAMPLES["binary"])
 
-    for y_multilabel, y_multiclass in pair_multiclass_multilabel:
+    for y_multilabel, y_multiclass in mix_clf_format:
         assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
         assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)
 

From 825b2c0cadcb20caff389394bb1b3d15744fcab9 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 3 Jun 2013 10:29:20 +0200
Subject: [PATCH 04/16] ENH simpler check for mix of string and number input

---
 sklearn/utils/multiclass.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index e181c9b82a2ed..214c2ba4b0dee 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -81,11 +81,12 @@ def _unique_indicator(y):
 
     # Combine every labels
     ys_labels = [_unique_labels(y) for y in ys]
-    labels_type_set = set(y_labels.dtype.kind for y_labels in ys_labels)
+    y_is_string = [y_labels.dtype.type is np.string_
+                   for y_labels in ys_labels]
 
-    if (not (labels_type_set <= set(["b", "i", "u", "f"])) and
-            not (labels_type_set <= set(["S", "a", " U"]))):
-        raise ValueError("Mix of dtype.kind, can't infered labels set")
+    if len(set(y_is_string)) != 1:
+        raise ValueError("Mix of string and number type: "
+                         "can't infered unique labels set")
 
     return np.unique(np.hstack(ys_labels))
 

From 45847a7b422d8eb37da1125d688c1b817540e8da Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 3 Jun 2013 10:47:18 +0200
Subject: [PATCH 05/16] COSMIT better name

---
 sklearn/utils/multiclass.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 214c2ba4b0dee..455300bcc2834 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -81,10 +81,10 @@ def _unique_indicator(y):
 
     # Combine every labels
     ys_labels = [_unique_labels(y) for y in ys]
-    y_is_string = [y_labels.dtype.type is np.string_
-                   for y_labels in ys_labels]
+    ys_is_string = [y_labels.dtype.type is np.string_
+                    for y_labels in ys_labels]
 
-    if len(set(y_is_string)) != 1:
+    if len(set(ys_is_string)) != 1:
         raise ValueError("Mix of string and number type: "
                          "can't infered unique labels set")
 

From 46d4718712c09804e9d492e1fcbbbfdcf121a24f Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 3 Jun 2013 13:06:38 +0200
Subject: [PATCH 06/16] Typo

---
 sklearn/utils/multiclass.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 455300bcc2834..5893a79008832 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -58,7 +58,7 @@ def _unique_indicator(y):
     ys_is_multilabels = [is_multilabel(y) for y in ys]
 
     if len(set(ys_is_multilabels)) != 1:
-        raise ValueError("Mix of binary / mutliclass and multilabel type")
+        raise ValueError("Mix of binary / mutliclass and multilabel types")
 
     if all(ys_is_multilabels):
         ys_is_indicator = [is_label_indicator_matrix(y) for y in ys]
@@ -66,7 +66,7 @@ def _unique_indicator(y):
         # Only indicator multilabel format
         if all(ys_is_indicator):
             if len(set(y.shape[1] for y in ys)) > 1:
-                raise ValueError("Multi-label binary indicator input with "
+                raise ValueError("Multi-label binary indicator inputs with "
                                  "different number of labels")
             else:
                 return _unique_indicator(ys[0])
@@ -74,7 +74,7 @@ def _unique_indicator(y):
             # Only indicator sequence of sequence multilabel format
             _unique_labels = _unique_sequence_of_sequence
         else:
-            raise ValueError("Mix multilabel input format")
+            raise ValueError("Mix of multilabel input format")
 
     else:
         _unique_labels = _unique_multiclass
@@ -85,8 +85,7 @@ def _unique_indicator(y):
                     for y_labels in ys_labels]
 
     if len(set(ys_is_string)) != 1:
-        raise ValueError("Mix of string and number type: "
-                         "can't infered unique labels set")
+        raise ValueError("Mix of label input type s(string and number)")
 
     return np.unique(np.hstack(ys_labels))
 

From 91e4b9a91f84eb8e164fc5c0dd501fb99ebef473 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Sun, 23 Jun 2013 00:00:12 +0200
Subject: [PATCH 07/16] ENH use type_of_target within unique_labels

---
 sklearn/utils/multiclass.py            | 72 ++++++++++++++------------
 sklearn/utils/tests/test_multiclass.py | 59 +++------------------
 2 files changed, 48 insertions(+), 83 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 5893a79008832..17f7be780a55c 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -14,13 +14,32 @@
 from ..externals.six import string_types
 
 
+def _unique_multiclass(y):
+    return np.unique(y)
+
+
+def _unique_sequence_of_sequence(y):
+    return np.array(sorted(set(chain(*y))))
+
+
+def _unique_indicator(y):
+    return np.arange(y.shape[1])
+
+
+_FN_UNIQUE_LABELS = {
+    'binary': _unique_multiclass,
+    'multiclass': _unique_multiclass,
+    'multilabel-sequences': _unique_sequence_of_sequence,
+    'multilabel-indicator': _unique_indicator,
+}
+
+
 def unique_labels(*ys):
     """Extract an ordered array of unique labels
 
     Parameters
     ----------
     ys : array-likes,
-        Must be either in binary, multiclass or multilabel format.
 
     Returns
     -------
@@ -42,42 +61,31 @@ def unique_labels(*ys):
     array([1, 2, 3])
 
     """
+    if not ys:
+        raise ValueError('No argument has been passed.')
 
-    def _unique_multiclass(y):
-        return np.unique(y)
-
-    def _unique_sequence_of_sequence(y):
-        return np.array(sorted(set(chain(*y))))
+    ys_types = [type_of_target(x) for x in ys]
 
-    def _unique_indicator(y):
-        return np.arange(y.shape[1])
+    if len(set(ys_types)) != 1:
+        if set(ys_types) == set(["binary", "multiclass"]):
+            label_type = "multiclass"
 
-    if not ys:
-        raise ValueError('No list of labels has been passed.')
-
-    ys_is_multilabels = [is_multilabel(y) for y in ys]
-
-    if len(set(ys_is_multilabels)) != 1:
-        raise ValueError("Mix of binary / mutliclass and multilabel types")
-
-    if all(ys_is_multilabels):
-        ys_is_indicator = [is_label_indicator_matrix(y) for y in ys]
-
-        # Only indicator multilabel format
-        if all(ys_is_indicator):
-            if len(set(y.shape[1] for y in ys)) > 1:
-                raise ValueError("Multi-label binary indicator inputs with "
-                                 "different number of labels")
-            else:
-                return _unique_indicator(ys[0])
-        elif all(not ys_is_indicator for ys_is_indicator in ys_is_indicator):
-            # Only indicator sequence of sequence multilabel format
-            _unique_labels = _unique_sequence_of_sequence
         else:
-            raise ValueError("Mix of multilabel input format")
-
+            raise ValueError("Mix type of y not allowed, got type %s"
+                             % ys_types)
     else:
-        _unique_labels = _unique_multiclass
+        label_type = ys_types[0]
+
+    # Check consistency for the indicator format
+    if (label_type == "multilabel-indicator" and
+            len(set(y.shape[1] for y in ys)) > 1):
+        raise ValueError("Multi-label binary indicator input with "
+                         "different numbers of labels")
+
+    # Get the proper unique function for the given format
+    _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
+    if not _unique_labels:
+        raise ValueError("Unknown label type")
 
     # Combine every labels
     ys_labels = [_unique_labels(y) for y in ys]
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 090f2ebc2d468..31ec927001e0a 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -117,57 +117,6 @@
 ]
 
 
-EXAMPLES = {
-    'multilabel-indicator': [
-        np.random.randint(2, size=(10, 10)),
-        np.array([[0, 1], [1, 0]]),
-        np.array([[0, 0], [0, 0]]),
-        np.array([[-1, 1], [1, -1]]),
-        np.array([[-3, 3], [3, -3]]),
-
-        # XXX : not considered as multilabel-indicator at the moment
-        #       see is_label_indicator_matrix
-        # np.array([[0, 1]]),
-    ],
-    'multilabel-sequences': [
-        [[0, 1]],
-        [[0], [1]],
-        [[1, 2, 3]],
-        [[1, 2, 1]],  # duplicate values, why not?
-        [[1], [2], [0, 1]],
-        [[1], [2]],
-        [[]],
-        [()],
-        np.array([[], [1, 2]], dtype='object'),
-    ],
-    'multiclass': [
-        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
-        np.array([1, 0, 2]),
-        np.array([[1], [0], [2]]),
-        [0, 1, 2],
-        ['a', 'b', 'c'],
-    ],
-    'multiclass-multioutput': [
-        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
-        np.array([['a', 'b'], ['c', 'd']]),
-        np.array([[1, 0, 2]]),
-    ],
-    'binary': [
-        [0, 1],
-        [1, 1],
-        [],
-        [0],
-        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
-        np.array([[0], [1]]),
-        [1, -1],
-        [3, 5],
-        ['a'],
-        ['a', 'b'],
-        ['abc', 'def'],
-    ],
-
-}
-
 
 def test_unique_labels():
     # Empty iterable
@@ -219,6 +168,14 @@ def test_unique_labels():
                              EXAMPLES["multiclass"] +
                              EXAMPLES["binary"])
 
+    for example in NON_ARRAY_LIKE_EXAMPLES:
+        assert_raises(ValueError, unique_labels, example)
+
+    for y_type in ["unknown", "continuous", 'continuous-multioutput',
+                   'multiclass-multioutput']:
+        for example in EXAMPLES[y_type]:
+            assert_raises(ValueError, unique_labels, example)
+
     for y_multilabel, y_multiclass in mix_clf_format:
         assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
         assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)

From 61d9f41d58a746ba1518450127b92c7298b96663 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 24 Jun 2013 14:01:35 +0200
Subject: [PATCH 08/16] ENH improve documentation with allowed label types

---
 sklearn/utils/multiclass.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 17f7be780a55c..a927d9e0a15dc 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -37,6 +37,15 @@ def _unique_indicator(y):
 def unique_labels(*ys):
     """Extract an ordered array of unique labels
 
+    We don't allow:
+        - mix of multilabel and multiclass (single label) targets
+        - mix of label indicator matrix and anything else,
+          because there are no explicit labels)
+        - mix of label indicator matrices of different sizes
+        - mix of string and integer labels
+
+    At the moment, we also don't allow "mutliclass-multioutput" input type.
+
     Parameters
     ----------
     ys : array-likes,
@@ -71,7 +80,7 @@ def unique_labels(*ys):
             label_type = "multiclass"
 
         else:
-            raise ValueError("Mix type of y not allowed, got type %s"
+            raise ValueError("Mix type of y not allowed, got types %s"
                              % ys_types)
     else:
         label_type = ys_types[0]
@@ -89,11 +98,10 @@ def unique_labels(*ys):
 
     # Combine every labels
     ys_labels = [_unique_labels(y) for y in ys]
-    ys_is_string = [y_labels.dtype.type is np.string_
-                    for y_labels in ys_labels]
 
-    if len(set(ys_is_string)) != 1:
-        raise ValueError("Mix of label input type s(string and number)")
+    if (len(set(y_labels.dtype.type is np.string_
+                for y_labels in ys_labels)) != 1):
+        raise ValueError("Mix of label input types (string and number)")
 
     return np.unique(np.hstack(ys_labels))
 

From aef47a751a7d1f3679db5c642038f0a2597e0460 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 24 Jun 2013 15:28:29 +0200
Subject: [PATCH 09/16] ENH check that we don't mix number and strings

---
 sklearn/utils/multiclass.py            | 20 ++++++++++++--------
 sklearn/utils/tests/test_multiclass.py |  8 +++++++-
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index a927d9e0a15dc..ad0fa97b5f17c 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -8,6 +8,7 @@
 """
 from collections import Sequence
 from itertools import chain
+from itertools import imap
 
 import numpy as np
 
@@ -73,8 +74,8 @@ def unique_labels(*ys):
     if not ys:
         raise ValueError('No argument has been passed.')
 
+    # Check that we don't mix label format
     ys_types = [type_of_target(x) for x in ys]
-
     if len(set(ys_types)) != 1:
         if set(ys_types) == set(["binary", "multiclass"]):
             label_type = "multiclass"
@@ -91,19 +92,22 @@ def unique_labels(*ys):
         raise ValueError("Multi-label binary indicator input with "
                          "different numbers of labels")
 
+    # Check that we don't mix string and number type
+    if ((label_type in ("binary", "multiclass") and
+            len(set([isinstance(x, basestring)
+                     for y in ys for x in y])) > 1) or
+        (label_type == "multilabel-sequences" and
+            len(set.union(*[set(imap(lambda x: isinstance(x, basestring),
+                                     chain(*y))) for y in ys])) > 1)):
+        raise ValueError("Mix of label input types (string and number)")
+
     # Get the proper unique function for the given format
     _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
     if not _unique_labels:
         raise ValueError("Unknown label type")
 
     # Combine every labels
-    ys_labels = [_unique_labels(y) for y in ys]
-
-    if (len(set(y_labels.dtype.type is np.string_
-                for y_labels in ys_labels)) != 1):
-        raise ValueError("Mix of label input types (string and number)")
-
-    return np.unique(np.hstack(ys_labels))
+    return np.unique(np.hstack(_unique_labels(y) for y in ys))
 
 
 def _is_integral_float(y):
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 31ec927001e0a..ebe4ff6bcfaed 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -180,10 +180,16 @@ def test_unique_labels():
         assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
         assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)
 
-    # Mix input type
+    # Mix string and number input type
     assert_raises(ValueError, unique_labels, [[1, 2], [3]],
                   [["a", "d"]])
+    assert_raises(ValueError, unique_labels, ["1", 2])
+    assert_raises(ValueError, unique_labels, [["1", 2], [3]])
+    assert_raises(ValueError, unique_labels, [["1", "2"], [3]])
+
     assert_array_equal(unique_labels([(2,), (0, 2,)], [(), ()]), [0, 2])
+    assert_array_equal(unique_labels([("2",), ("0", "2",)], [(), ()]),
+                       ["0", "2"])
 
 
 def test_is_multilabel():

From 7856a72da8b4ba0e24baff56fad54b9343c804b0 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 24 Jun 2013 15:38:34 +0200
Subject: [PATCH 10/16] Flatten label type checking

---
 sklearn/utils/multiclass.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index ad0fa97b5f17c..3e004f20655b4 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -75,16 +75,14 @@ def unique_labels(*ys):
         raise ValueError('No argument has been passed.')
 
     # Check that we don't mix label format
-    ys_types = [type_of_target(x) for x in ys]
-    if len(set(ys_types)) != 1:
-        if set(ys_types) == set(["binary", "multiclass"]):
-            label_type = "multiclass"
-
-        else:
-            raise ValueError("Mix type of y not allowed, got types %s"
-                             % ys_types)
-    else:
-        label_type = ys_types[0]
+    ys_types = set(type_of_target(x) for x in ys)
+    if ys_types == set(["binary", "multiclass"]):
+        ys_types = set(["multiclass"])
+
+    if len(ys_types) > 1:
+        raise ValueError("Mix type of y not allowed, got types %s" % ys_types)
+
+    label_type = ys_types.pop()
 
     # Check consistency for the indicator format
     if (label_type == "multilabel-indicator" and
@@ -92,7 +90,7 @@ def unique_labels(*ys):
         raise ValueError("Multi-label binary indicator input with "
                          "different numbers of labels")
 
-    # Check that we don't mix string and number type
+    # Check that we don't mix string type with number type
     if ((label_type in ("binary", "multiclass") and
             len(set([isinstance(x, basestring)
                      for y in ys for x in y])) > 1) or
@@ -106,7 +104,7 @@ def unique_labels(*ys):
     if not _unique_labels:
         raise ValueError("Unknown label type")
 
-    # Combine every labels
+    # Combine labels
     return np.unique(np.hstack(_unique_labels(y) for y in ys))
 
 

From 33016ed7b910c42d3855efc82a5ed69ea588a150 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Mon, 24 Jun 2013 15:41:56 +0200
Subject: [PATCH 11/16] TST add smoke test for all supported format

---
 sklearn/utils/tests/test_multiclass.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index ebe4ff6bcfaed..2492b14cb1421 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -155,6 +155,12 @@ def test_unique_labels():
     assert_array_equal(unique_labels([["a", "b"], ["c"]], [["d"]]),
                        ["a", "b", "c", "d"])
 
+    # Smoke test for all supported format
+    for format in ["binary", "multiclass", "multilabel-sequences",
+                   "multilabel-indicator"]:
+        for y in EXAMPLES[format]:
+            unique_labels(y)
+
     #Mix of multilabel-indicator and multilabel-sequences
     mix_multilabel_format = product(EXAMPLES["multilabel-indicator"],
                                     EXAMPLES["multilabel-sequences"])

From 6c68cac70a8577eb073cc32c043dc73ab58bdd15 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Tue, 25 Jun 2013 11:58:30 +0200
Subject: [PATCH 12/16] COSMIT

---
 sklearn/utils/tests/test_multiclass.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 2492b14cb1421..3cd1f1f197171 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -161,6 +161,15 @@ def test_unique_labels():
         for y in EXAMPLES[format]:
             unique_labels(y)
 
+    # We don't support those format at the moment
+    for example in NON_ARRAY_LIKE_EXAMPLES:
+        assert_raises(ValueError, unique_labels, example)
+
+    for y_type in ["unknown", "continuous", 'continuous-multioutput',
+                   'multiclass-multioutput']:
+        for example in EXAMPLES[y_type]:
+            assert_raises(ValueError, unique_labels, example)
+
     #Mix of multilabel-indicator and multilabel-sequences
     mix_multilabel_format = product(EXAMPLES["multilabel-indicator"],
                                     EXAMPLES["multilabel-sequences"])
@@ -174,14 +183,6 @@ def test_unique_labels():
                              EXAMPLES["multiclass"] +
                              EXAMPLES["binary"])
 
-    for example in NON_ARRAY_LIKE_EXAMPLES:
-        assert_raises(ValueError, unique_labels, example)
-
-    for y_type in ["unknown", "continuous", 'continuous-multioutput',
-                   'multiclass-multioutput']:
-        for example in EXAMPLES[y_type]:
-            assert_raises(ValueError, unique_labels, example)
-
     for y_multilabel, y_multiclass in mix_clf_format:
         assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
         assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)

From 7699100be87763ef49ac75fa1298029f185e4377 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Tue, 25 Jun 2013 14:09:30 +0200
Subject: [PATCH 13/16] PY3K use six.string_type

---
 sklearn/utils/multiclass.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 3e004f20655b4..2b7cc35e4ae11 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -92,10 +92,10 @@ def unique_labels(*ys):
 
     # Check that we don't mix string type with number type
     if ((label_type in ("binary", "multiclass") and
-            len(set([isinstance(x, basestring)
+            len(set([isinstance(x, string_types)
                      for y in ys for x in y])) > 1) or
         (label_type == "multilabel-sequences" and
-            len(set.union(*[set(imap(lambda x: isinstance(x, basestring),
+            len(set.union(*[set(imap(lambda x: isinstance(x, string_types),
                                      chain(*y))) for y in ys])) > 1)):
         raise ValueError("Mix of label input types (string and number)")
 

From 109f2fb630e45b3214429ae4c8bdac034066236d Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Tue, 25 Jun 2013 14:33:54 +0200
Subject: [PATCH 14/16] OPTIM + ENH simplify mix string and number check

---
 sklearn/utils/multiclass.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 2b7cc35e4ae11..4f192191fcf54 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -16,15 +16,18 @@
 
 
 def _unique_multiclass(y):
-    return np.unique(y)
+    if isinstance(y, np.ndarray):
+        return np.unique(y)
+    else:
+        return set(y)
 
 
 def _unique_sequence_of_sequence(y):
-    return np.array(sorted(set(chain(*y))))
+    return set(chain.from_iterable(y))
 
 
 def _unique_indicator(y):
-    return np.arange(y.shape[1])
+    return np.arange(y.shape[0])
 
 
 _FN_UNIQUE_LABELS = {
@@ -90,22 +93,18 @@ def unique_labels(*ys):
         raise ValueError("Multi-label binary indicator input with "
                          "different numbers of labels")
 
-    # Check that we don't mix string type with number type
-    if ((label_type in ("binary", "multiclass") and
-            len(set([isinstance(x, string_types)
-                     for y in ys for x in y])) > 1) or
-        (label_type == "multilabel-sequences" and
-            len(set.union(*[set(imap(lambda x: isinstance(x, string_types),
-                                     chain(*y))) for y in ys])) > 1)):
-        raise ValueError("Mix of label input types (string and number)")
-
-    # Get the proper unique function for the given format
+    # Get the unique set of labels
     _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
     if not _unique_labels:
         raise ValueError("Unknown label type")
 
-    # Combine labels
-    return np.unique(np.hstack(_unique_labels(y) for y in ys))
+    y_labels = set(chain.from_iterable(imap(_unique_labels, ys)))
+
+    # Check that we don't mix string type with number type
+    if (len(set(isinstance(label, string_types) for label in y_labels)) > 1):
+        raise ValueError("Mix of label input types (string and number)")
+
+    return np.array(sorted(y_labels))
 
 
 def _is_integral_float(y):

From a9197f3dad8b6afb26419fe84d8bee4810be92e8 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Tue, 25 Jun 2013 14:40:51 +0200
Subject: [PATCH 15/16] FIX bug with indicator format

---
 sklearn/utils/multiclass.py            | 8 ++++----
 sklearn/utils/tests/test_multiclass.py | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 4f192191fcf54..419b38a241ced 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -27,7 +27,7 @@ def _unique_sequence_of_sequence(y):
 
 
 def _unique_indicator(y):
-    return np.arange(y.shape[0])
+    return np.arange(y.shape[1])
 
 
 _FN_UNIQUE_LABELS = {
@@ -98,13 +98,13 @@ def unique_labels(*ys):
     if not _unique_labels:
         raise ValueError("Unknown label type")
 
-    y_labels = set(chain.from_iterable(imap(_unique_labels, ys)))
+    ys_labels = set(chain.from_iterable(imap(_unique_labels, ys)))
 
     # Check that we don't mix string type with number type
-    if (len(set(isinstance(label, string_types) for label in y_labels)) > 1):
+    if (len(set(isinstance(label, string_types) for label in ys_labels)) > 1):
         raise ValueError("Mix of label input types (string and number)")
 
-    return np.array(sorted(y_labels))
+    return np.array(sorted(ys_labels))
 
 
 def _is_integral_float(y):
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 3cd1f1f197171..775a3a8a2248e 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -117,7 +117,6 @@
 ]
 
 
-
 def test_unique_labels():
     # Empty iterable
     assert_raises(ValueError, unique_labels)
@@ -137,6 +136,10 @@ def test_unique_labels():
                                                [0, 0, 0]])),
                        np.arange(3))
 
+    assert_array_equal(unique_labels(np.array([[0, 0, 1],
+                                               [0, 0, 0]])),
+                       np.arange(3))
+
     # Several arrays passed
     assert_array_equal(unique_labels([4, 0, 2], xrange(5)),
                        np.arange(5))

From a62abe66ff14fc60ddeea38e3d527baebe654463 Mon Sep 17 00:00:00 2001
From: Arnaud Joly <arnaud.v.joly@gmail.com>
Date: Tue, 25 Jun 2013 14:49:00 +0200
Subject: [PATCH 16/16] ENH use a comprehension over imap

---
 sklearn/utils/multiclass.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 419b38a241ced..11289c433bf2d 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -8,7 +8,6 @@
 """
 from collections import Sequence
 from itertools import chain
-from itertools import imap
 
 import numpy as np
 
@@ -98,7 +97,7 @@ def unique_labels(*ys):
     if not _unique_labels:
         raise ValueError("Unknown label type")
 
-    ys_labels = set(chain.from_iterable(imap(_unique_labels, ys)))
+    ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
 
     # Check that we don't mix string type with number type
     if (len(set(isinstance(label, string_types) for label in ys_labels)) > 1):