scikit-learn · lobpcg · Oct 6, 2018 · Oct 6, 2018 · Oct 6, 2018 · Oct 6, 2018
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -453,7 +453,6 @@ cluster. This criteria is especially interesting when working on images, where
 graph vertices are pixels, and weights of the edges of the similarity graph are
 computed using a function of a gradient of the image.
 
-
 .. |noisy_img| image:: ../auto_examples/cluster/images/sphx_glr_plot_segmentation_toy_001.png
     :target: ../auto_examples/cluster/plot_segmentation_toy.html
     :scale: 50
@@ -493,22 +492,22 @@ computed using a function of a gradient of the image.
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
     :scale: 65
 
+.. |coin_clusrerQR| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png
+    :target: ../auto_examples/cluster/plot_coin_segmentation.html
+    :scale: 65
+
 Different label assignment strategies
 -------------------------------------
 
 Different label assignment strategies can be used, corresponding to the
 ``assign_labels`` parameter of :class:`SpectralClustering`.
-``"kmeans"`` strategy can match finer details, but can be unstable.
+
+``"kmeans"`` strategy can match finer details, but it can be unstable.
 In particular, unless you control the ``random_state``, it may not be
 reproducible from run-to-run, as it depends on random initialization.
-The alternative ``"discretize"`` strategy is 100% reproducible, but tends
-to create parcels of fairly even and geometrical shape.
-
-=====================================  =====================================
- ``assign_labels="kmeans"``              ``assign_labels="discretize"``
-=====================================  =====================================
-|coin_kmeans|                          |coin_discretize|
-=====================================  =====================================
+Alternative ``"discretize"`` strategy is 100% reproducible, but tends 
+to create parcels of fairly even and geometrical shape. 
+The recently added option ``clusterQR`` is 100% also reproducible. 
 
 Spectral Clustering Graphs
 --------------------------
@@ -540,6 +539,10 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`::
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100>`_
    Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001
 
+ * `"Robust and efficient multi-way spectral clustering"
+   <https://github.com/asdamle/QR-spectral-clustering>`_
+   Anil Damle, Victor Minden, Lexing Ying
+
  * `"Preconditioned Spectral Clustering for Stochastic
    Block Partition Streaming Graph Challenge"
    <https://arxiv.org/abs/1708.07481>`_

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
@@ -10,16 +10,20 @@
 This procedure (spectral clustering on an image) is an efficient
 approximate solution for finding normalized graph cuts.
 
-There are two options to assign labels:
+There are three options to assign labels:
 
 * with 'kmeans' spectral clustering will cluster samples in the embedding space
-  using a kmeans algorithm
+  using a kmeans algorithm,
+* with 'clusterQR' will cluster samples in the embedding space
+  using a clusterQR algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
   space to the embedding space.
+
 """
 print(__doc__)
 
 # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung
+# Andrew Knyazev added clusterQR
 # License: BSD 3 clause
 
 import time
@@ -62,28 +66,34 @@
 eps = 1e-6
 graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps
 
-# Apply spectral clustering (this step goes much faster if you have pyamg
-# installed)
-N_REGIONS = 25
+# The actual number of regions in this example is 27: background and 26 coins
+N_REGIONS = 26
 
 #############################################################################
-# Visualize the resulting regions
+# Compute and visualize the resulting regions
+
+# Any eigen_solver: 'arpack', 'lobpcg', 'amg' can be used. AMG is usually best
+# It often helps the spectral clustering to compute a few extra eigenvectors
+N_REGIONS_PLUS = 3
 
-for assign_labels in ('kmeans', 'discretize'):
+for assign_labels in ('kmeans', 'discretize', 'clusterQR'):
     t0 = time.time()
-    labels = spectral_clustering(graph, n_clusters=N_REGIONS,
-                                 assign_labels=assign_labels, random_state=42)
+    labels = spectral_clustering(graph,
+                                 n_clusters=(N_REGIONS + N_REGIONS_PLUS),
+                                 assign_labels=assign_labels, random_state=42,
+                                 eigen_solver='arpack')
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
 
     plt.figure(figsize=(5, 5))
-    plt.imshow(rescaled_coins, cmap=plt.cm.gray)
-    for l in range(N_REGIONS):
-        plt.contour(labels == l,
-                    colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])
+    plt.imshow(rescaled_coins, cmap=plt.get_cmap('gray'))
     plt.xticks(())
     plt.yticks(())
     title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))
     print(title)
     plt.title(title)
+    for l in range(N_REGIONS):
+        plt.contour(labels == l,
+                    colors=[plt.cm.nipy_spectral((l+3) / float(N_REGIONS+3))])
+        plt.pause(0.5)
 plt.show()
diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
@@ -9,6 +9,8 @@
 
 import numpy as np
 
+from scipy.linalg import qr, svd
+
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_random_state, as_float_array
 from ..utils.validation import check_array
@@ -18,6 +20,36 @@
 from ._k_means import k_means
 
 
+def clusterQR(vectors):
+    """Search for a partition matrix (clustering) which is
+    closest to the eigenvector embedding.
+
+    Parameters
+    ----------
+    vectors : array-like, shape: (n_samples, n_clusters)
+        The embedding space of the samples.
+
+    Returns
+    -------
+    labels : array of integers, shape: n_samples
+        The labels of the clusters.
+
+    References
+    ----------
+    https://github.com/asdamle/QR-spectral-clustering
+    https://arxiv.org/abs/1708.07481
+    """
+
+    k = vectors.shape[1]
+    piv = qr(vectors.T, pivoting=True)[2]
+    piv = piv[0:k]
+    UtSV = svd(vectors[piv, :].T)
+    Ut = UtSV[0]
+    Vt = UtSV[2].T.conj()
+    vectors = abs(np.dot(vectors, np.dot(Ut, Vt.T)))
+    return vectors.argmax(axis=1).T
+
+
 def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
                random_state=None):
     """Search for a partition matrix (clustering) which is closest to the
@@ -210,14 +242,18 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
         Stopping criterion for eigendecomposition of the Laplacian matrix
         when using arpack eigen_solver.
 
-    assign_labels : {'kmeans', 'discretize'}, default: 'kmeans'
+    assign_labels : {'kmeans', 'discretize', 'clusterQR'}, default: 'kmeans'
         The strategy to use to assign labels in the embedding
-        space.  There are two ways to assign labels after the laplacian
-        embedding.  k-means can be applied and is a popular choice. But it can
+        space. There are three ways to assign labels after the laplacian
+        embedding. k-means can be applied and is a popular choice. But it can
         also be sensitive to initialization. Discretization is another
         approach which is less sensitive to random initialization. See
         the 'Multiclass spectral clustering' paper referenced below for
-        more details on the discretization approach.
+        more details on the discretization approach. The newest clusterQR
+        directly extract clusters from eigenvectors in spectral clustering.
+        In contrast to k-means and discretization, clusterQR has no tuning
+        parameters, e.g., runs no iterations, yet may outperform k-means and
+        discretization in terms of both quality and speed.
 
     Returns
     -------
@@ -247,9 +283,10 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
     This algorithm solves the normalized cut for k=2: it is a
     normalized spectral clustering.
     """
-    if assign_labels not in ('kmeans', 'discretize'):
-        raise ValueError("The 'assign_labels' parameter should be "
-                         "'kmeans' or 'discretize', but '%s' was given"
+    if assign_labels not in ('kmeans', 'discretize', 'clusterQR'):
+        raise ValueError(
+            "The 'assign_labels' parameter should be "
+            "'kmeans', 'discretize', or  'clusterQR' but '%s' was given"
                          % assign_labels)
 
     random_state = check_random_state(random_state)
@@ -266,6 +303,8 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
     if assign_labels == 'kmeans':
         _, labels, _ = k_means(maps, n_clusters, random_state=random_state,
                                n_init=n_init)
+    elif assign_labels == 'clusterQR':
+        labels = clusterQR(maps)
     else:
         labels = discretize(maps, random_state=random_state)
 
@@ -351,7 +390,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         Stopping criterion for eigendecomposition of the Laplacian matrix
         when ``eigen_solver='arpack'``.
 
-    assign_labels : {'kmeans', 'discretize'}, default: 'kmeans'
+    assign_labels : {'kmeans', 'discretize', 'clusterQR'}, default: 'kmeans'
         The strategy to use to assign labels in the embedding
         space. There are two ways to assign labels after the laplacian
         embedding. k-means can be applied and is a popular choice. But it can

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
@@ -28,7 +28,11 @@
 
 
 @pytest.mark.parametrize('eigen_solver', ('arpack', 'lobpcg'))
-@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize'))
+@pytest.mark.parametrize(
+    'assign_labels',
+    ('kmeans',
+     'discretize',
+     'clusterQR'))
 def test_spectral_clustering(eigen_solver, assign_labels):
     S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                   [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],