scikit-learn · lobpcg · Oct 7, 2018 · Oct 7, 2018 · Oct 8, 2018 · Oct 8, 2018
diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
@@ -270,6 +270,12 @@ Changelog
   underlying :class:`linear_model.LassoLars` when `algorithm='lasso_lars'`.
   :issue:`12650` by `Adrin Jalali`_.
 
+- |Enhancement| :class:`decomposition.PCA` and
+  :class:`decomposition.TruncatedSVD` has a new solver `lobpcg` which
+  accelerate computation and add a better accuracy.
+  :pr:`12319` by :user:`Andrew Knyazev <lobpcg>` and
+  :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.dummy`
 ....................
 

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
@@ -349,7 +349,8 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
 
     eigen_tol : float, optional, default: 0.0
         Stopping criterion for eigendecomposition of the Laplacian matrix
-        when ``eigen_solver='arpack'``.
+        when 'arpack'  or `lobpcg` eigen_solver. tol = .0 in 'lobpcg' is
+        ignored and substituted by a local default in LOBPCG.
 
     assign_labels : {'kmeans', 'discretize'}, default: 'kmeans'
         The strategy to use to assign labels in the embedding

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -7,7 +7,7 @@
 #         Denis A. Engemann <denis-alexander.engemann@inria.fr>
 #         Michael Eickenberg <michael.eickenberg@inria.fr>
 #         Giorgio Patrini <giorgio.patrini@anu.edu.au>
-#
+#         Andrew Knyazev <andrew.knyazev@ucdenver.edu>
 # License: BSD 3 clause
 
 from math import log, sqrt
@@ -108,12 +108,14 @@ class PCA(_BasePCA):
     data to project it to a lower dimensional space. The input data is centered
     but not scaled for each feature before applying the SVD.
 
-    It uses the LAPACK implementation of the full SVD or a randomized truncated
-    SVD by the method of Halko et al. 2009, depending on the shape of the input
-    data and the number of components to extract.
+    By default, it uses the LAPACK implementation of the full SVD or a
+    randomized truncated SVD [3], depending on the shape of the input data and
+    the number of components to extract.
 
-    It can also use the scipy.sparse.linalg ARPACK implementation of the
-    truncated SVD.
+    In addition, one can also use the ARPACK implementation of the truncated
+    SVD (refer to :func:`scipy.sparse.linalg.svds`) or the randomized truncated
+    SVD with an additional preconditioning called LOBPCG [5] (refer to
+    :func:`scipy.sparse.linalg.lobpcg`).
 
     Notice that this class does not support sparse input. See
     :class:`TruncatedSVD` for an alternative with sparse data.
@@ -122,7 +124,7 @@ class PCA(_BasePCA):
 
     Parameters
     ----------
-    n_components : int, float, None or str
+    n_components : int, float, None or string, default=None
         Number of components to keep.
         if n_components is not set all components are kept::
 
@@ -148,7 +150,7 @@ class PCA(_BasePCA):
         fit(X).transform(X) will not yield the expected results,
         use fit_transform(X) instead.
 
-    whiten : bool, optional (default False)
+    whiten : bool, default=False
         When True (False by default) the `components_` vectors are multiplied
         by the square root of n_samples and then divided by the singular values
         to ensure uncorrelated outputs with unit component-wise variances.
@@ -158,42 +160,53 @@ class PCA(_BasePCA):
         improve the predictive accuracy of the downstream estimators by
         making their data respect some hard-wired assumptions.
 
-    svd_solver : str {'auto', 'full', 'arpack', 'randomized'}
-        If auto :
-            The solver is selected by a default policy based on `X.shape` and
+    svd_solver : string {'auto', 'full', 'arpack', 'randomized', 'lobpcg'}
+        auto (default) :
+            the solver is selected by a default policy based on `X.shape` and
             `n_components`: if the input data is larger than 500x500 and the
             number of components to extract is lower than 80% of the smallest
             dimension of the data, then the more efficient 'randomized'
             method is enabled. Otherwise the exact full SVD is computed and
             optionally truncated afterwards.
         If full :
             run exact full SVD calling the standard LAPACK solver via
-            `scipy.linalg.svd` and select the components by postprocessing
+            :func:`scipy.linalg.svd` and select the components by
+            postprocessing.
         If arpack :
             run SVD truncated to n_components calling ARPACK solver via
-            `scipy.sparse.linalg.svds`. It requires strictly
-            0 < n_components < min(X.shape)
+            :func:`scipy.sparse.linalg.svds`. It requires strictly
+            0 < n_components < min(X.shape).
         If randomized :
-            run randomized SVD by the method of Halko et al.
+            run randomized SVD as in [2].
+        If lobpcg :
+            run Locally Optimal Block Preconditioned Conjugate Gradient [5]
+            for a normal matrix X'*X or X*X', whichever of the two is of
+            the smallest size. See :func:`scipy.sparse.linalg.lobpcg`.
 
         .. versionadded:: 0.18.0
 
-    tol : float >= 0, optional (default .0)
-        Tolerance for singular values computed by svd_solver == 'arpack'.
+    tol : None or float, default=None
+        Tolerance for singular values computed by the 'randomized' and 'lobpcg'
+        SVD solver. If None, then:
+            * `tol = 2 * eps` for svd_solver = 'randomized'.
+              Refer to :func:`scipy.sparse.linalg.svds`.
+            * `tol = n * sqrt(eps)` where `n = min(n_samples, n_features)`.
+              Refer to :func:`scipy.sparse.linalg.lobpcg`.
 
         .. versionadded:: 0.18.0
 
-    iterated_power : int >= 0, or 'auto', (default 'auto')
-        Number of iterations for the power method computed by
-        svd_solver == 'randomized'.
+    iterated_power : int or 'auto', default='auto'
+        Number of iterations of svd_solver = 'lobpcg' or for the power method
+        computed by svd_solver == 'randomized'.
 
         .. versionadded:: 0.18.0
 
     random_state : int, RandomState instance or None, optional (default None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
-        by `np.random`. Used when ``svd_solver`` == 'arpack' or 'randomized'.
+        by `np.random` in ``svd_solver`` == 'arpack', 'randomized',
+        or 'lobpcg'.
 
         .. versionadded:: 0.18.0
 
@@ -244,15 +257,24 @@ class PCA(_BasePCA):
         Number of samples in the training data.
 
     noise_variance_ : float
-        The estimated noise covariance following the Probabilistic PCA model
-        from Tipping and Bishop 1999. See "Pattern Recognition and
-        Machine Learning" by C. Bishop, 12.2.1 p. 574 or
-        http://www.miketipping.com/papers/met-mppca.pdf. It is required to
+        The estimated noise covariance following the implementation [2].
+        See Sect. 12.2.1, pp. 574 of [6] or [5]. It is required to
         compute the estimated data covariance and score samples.
 
         Equal to the average of (min(n_features, n_samples) - n_components)
         smallest eigenvalues of the covariance matrix of X.
 
+    Notes
+    -----
+    For n_components == 'mle', this class uses the method of [1].
+    `score` and `score_samples` implement the probabilistic
+    PCA model from [2].
+    For other solvers:
+        * svd_solver == 'arpack', refers to :func:`scipy.sparse.linalg.svds`.
+        * svd_solver == 'randomized', refers to the implementation in
+          [3] and [4].
+        * svd_solver == 'lobpcg', refers to the implementation in [5].
+
     See Also
     --------
     KernelPCA : Kernel Principal Component Analysis.
@@ -262,26 +284,33 @@ class PCA(_BasePCA):
 
     References
     ----------
-    For n_components == 'mle', this class uses the method of *Minka, T. P.
-    "Automatic choice of dimensionality for PCA". In NIPS, pp. 598-604*
-
-    Implements the probabilistic PCA model from:
-    Tipping, M. E., and Bishop, C. M. (1999). "Probabilistic principal
-    component analysis". Journal of the Royal Statistical Society:
-    Series B (Statistical Methodology), 61(3), 611-622.
-    via the score and score_samples methods.
-    See http://www.miketipping.com/papers/met-mppca.pdf
-
-    For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.
-
-    For svd_solver == 'randomized', see:
-    *Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).
-    "Finding structure with randomness: Probabilistic algorithms for
-    constructing approximate matrix decompositions".
-    SIAM review, 53(2), 217-288.* and also
-    *Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).
-    "A randomized algorithm for the decomposition of matrices".
-    Applied and Computational Harmonic Analysis, 30(1), 47-68.*
+    .. [1] Minka, Thomas P. "Automatic choice of dimensionality for PCA."
+           In Advances in neural information processing systems,
+           pp. 598-604. 2001.
+
+    .. [2] Tipping, Michael E., and Christopher M. Bishop.
+           "Probabilistic principal component analysis."
+           Journal of the Royal Statistical Society:
+           Series B (Statistical Methodology) 61, no. 3 (1999): 611-622.
+           http://www.miketipping.com/papers/met-mppca.pdf
+
+    .. [3] Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp.
+           "Finding structure with randomness: Probabilistic algorithms for
+           constructing approximate matrix decompositions."
+           SIAM review 53, no. 2 (2011): 217-288.
+
+    .. [4] Martinsson, Per-Gunnar, Vladimir Rokhlin, and Mark Tygert.
+           "A randomized algorithm for the decomposition of matrices."
+           Applied and Computational Harmonic Analysis 30, no. 1 (2011): 47-68.
+
+    .. [5] Knyazev, Andrew V.
+           "Toward the optimal preconditioned eigensolver:
+           Locally optimal block preconditioned conjugate gradient method."
+           SIAM journal on scientific computing 23, no. 2 (2001): 517-541.
+           https://doi.org/10.1137%2FS1064827500366124
+
+    .. [6] Bishop, Christopher M.
+           "Pattern recognition and machine learning." Springer, 2006.
 
     Examples
     --------
@@ -366,7 +395,7 @@ def fit_transform(self, X, y=None):
         This method returns a Fortran-ordered array. To convert it to a
         C-ordered array, use 'np.ascontiguousarray'.
         """
-        U, S, V = self._fit(X)
+        U, S, _ = self._fit(X)
         U = U[:, :self.n_components_]
 
         if self.whiten:
@@ -407,15 +436,21 @@ def _fit(self, X):
                 self._fit_svd_solver = 'full'
             elif n_components >= 1 and n_components < .8 * min(X.shape):
                 self._fit_svd_solver = 'randomized'
+            # need to add 'lobpcg' here
             # This is also the case of n_components in (0,1)
             else:
                 self._fit_svd_solver = 'full'
 
+        # Set the default tolerance if necessary
+        tol = (0. if self.tol is None and self._fit_svd_solver == 'arpack'
+               else self.tol)
+
         # Call different fits for either full or truncated SVD
         if self._fit_svd_solver == 'full':
             return self._fit_full(X, n_components)
-        elif self._fit_svd_solver in ['arpack', 'randomized']:
-            return self._fit_truncated(X, n_components, self._fit_svd_solver)
+        elif self._fit_svd_solver in ['arpack', 'randomized', 'lobpcg']:
+            return self._fit_truncated(X, n_components, self._fit_svd_solver,
+                                       tol)
         else:
             raise ValueError("Unrecognized svd_solver='{0}'"
                              "".format(self._fit_svd_solver))
@@ -483,9 +518,9 @@ def _fit_full(self, X, n_components):
 
         return U, S, V
 
-    def _fit_truncated(self, X, n_components, svd_solver):
-        """Fit the model by computing truncated SVD (by ARPACK or randomized)
-        on X
+    def _fit_truncated(self, X, n_components, svd_solver, tol):
+        """Fit the model by computing truncated SVD
+        (by ARPACK, randomized, or LOBPCG) on X
         """
         n_samples, n_features = X.shape
 
@@ -520,7 +555,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         if svd_solver == 'arpack':
             # random init solution, as ARPACK does it internally
             v0 = random_state.uniform(-1, 1, size=min(X.shape))
-            U, S, V = svds(X, k=n_components, tol=self.tol, v0=v0)
+            U, S, V = svds(X, k=n_components, tol=tol, v0=v0)
             # svds doesn't abide by scipy.linalg.svd/randomized_svd
             # conventions, so reverse its outputs.
             S = S[::-1]
@@ -534,6 +569,14 @@ def _fit_truncated(self, X, n_components, svd_solver):
                                      flip_sign=True,
                                      random_state=random_state)
 
+        elif svd_solver == 'lobpcg':
+            # sign flipping is done inside
+            U, S, V = randomized_svd(
+                X, n_components=n_components, n_iter=self.iterated_power,
+                flip_sign=True, random_state=random_state,
+                preconditioner='lobpcg', tol=tol
+            )
+
         self.n_samples_, self.n_features_ = n_samples, n_features
         self.components_ = V
         self.n_components_ = n_components