From f45f260ff7a272b35f19f7bc93070d1f731ec6ff Mon Sep 17 00:00:00 2001 From: giorgiop Date: Wed, 21 Oct 2015 16:29:02 +0200 Subject: [PATCH] DOC versionadded randomized_svd --- doc/whats_new.rst | 7 +++---- sklearn/decomposition/pca.py | 8 +++++--- sklearn/utils/extmath.py | 14 +++++++++++++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 9ae61a3f434b5..266f53f3f1f4a 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -29,17 +29,16 @@ Bug fixes ......... - :class:`RandomizedPCA` default number of `iterated_power` is 2 instead of 3. - This is a speed up with a minor precision decrease. By `Giorgio Patrini`_. + This is a speed up with a minor precision decrease. (`#5141 https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_. - :func:`randomized_svd` performs 2 power iterations by default, instead or 0. In practice this is often enough for obtaining a good approximation of the - true eigenvalues/vectors in the presence of noise. By `Giorgio Patrini`_. + true eigenvalues/vectors in the presence of noise. (`#5141 https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_. - :func:`randomized_range_finder` is more numerically stable when many power iterations are requested, since it applies LU normalization by default. If `n_iter<2` numerical issues are unlikely, thus no normalization is applied. - Other normalization options are available: 'none', 'LU' and 'QR'. By - `Giorgio Patrini`_. + Other normalization options are available: 'none', 'LU' and 'QR'. (`#5141 https://github.com/scikit-learn/scikit-learn/pull/5141>`_) by `Giorgio Patrini`_. - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized Laplacian matrix was incorrectly set to 1. By `Peter Fischer`_. diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index 84aa4874dff46..06f102cc550da 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -488,7 +488,9 @@ class RandomizedPCA(BaseEstimator, TransformerMixin): use fit_transform(X) instead. iterated_power : int, optional - Number of iterations for the power method. 3 by default. + Number of iterations for the power method. 2 by default. + + .. versionchanged:: 0.18 whiten : bool, optional When True (False by default) the `components_` vectors are divided @@ -510,8 +512,8 @@ class RandomizedPCA(BaseEstimator, TransformerMixin): Components with maximum variance. explained_variance_ratio_ : array, [n_components] - Percentage of variance explained by each of the selected components. \ - k is not set then all components are stored and the sum of explained \ + Percentage of variance explained by each of the selected components. + k is not set then all components are stored and the sum of explained variances is equal to 1.0 mean_ : array, [n_features] diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index e71a2e5f1241b..fddae700b02ca 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -195,11 +195,14 @@ def randomized_range_finder(A, size, n_iter=2, Parameters ---------- A: 2D array - The input data matrix + The input data matrix. + size: integer Size of the return array + n_iter: integer Number of power iterations used to stabilize the result + power_iteration_normalizer: 'auto' (default), 'QR', 'LU', 'none' Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' @@ -207,6 +210,9 @@ def randomized_range_finder(A, size, n_iter=2, typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter`<=2 and switches to LU otherwise. + + .. versionadded:: 0.18 + random_state: RandomState or an int seed (0 by default) A random number generator instance @@ -283,6 +289,8 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=2, Number of power iterations (can be used to deal with very noisy problems). + .. versionchanged:: 0.18 + power_iteration_normalizer: 'auto' (default), 'QR', 'LU', 'none' Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' @@ -291,6 +299,8 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=2, but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter`<=2 and switches to LU otherwise. + .. versionadded:: 0.18 + transpose: True, False or 'auto' (default) Whether the algorithm should be applied to M.T instead of M. The result should approximately be the same. The 'auto' mode will @@ -298,6 +308,8 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter=2, implementation of randomized SVD tend to be a little faster in that case. + .. versionchanged:: 0.18 + flip_sign: boolean, (True by default) The output of a singular value decomposition is only unique up to a permutation of the signs of the singular vectors. If `flip_sign` is