@jakevdp's version of pinvh

jakevdp · amueller · commit 12f4eb9047a1 · 2012-09-04T20:54:07.000+01:00
speed up symmetric_pinv
diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
@@ -17,7 +17,7 @@
 
 from ..base import BaseEstimator
 from ..utils import array2d
-from ..utils.extmath import fast_logdet, symmetric_pinv
+from ..utils.extmath import fast_logdet, pinvh
 
 
 def log_likelihood(emp_cov, precision):
@@ -113,7 +113,7 @@ def _set_covariance(self, covariance):
         self.covariance_ = covariance
         # set precision
         if self.store_precision:
-            self.precision_ = symmetric_pinv(covariance)
+            self.precision_ = pinvh(covariance)
         else:
             self.precision_ = None
 
@@ -129,7 +129,7 @@ def get_precision(self):
         if self.store_precision:
             precision = self.precision_
         else:
-            precision = symmetric_pinv(self.covariance_)
+            precision = pinvh(self.covariance_)
         return precision
 
     def fit(self, X):
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
@@ -17,7 +17,7 @@
                 EmpiricalCovariance, log_likelihood
 
 from ..utils import ConvergenceWarning
-from ..utils.extmath import symmetric_pinv
+from ..utils.extmath import pinvh
 from ..linear_model import lars_path
 from ..linear_model import cd_fast
 from ..cross_validation import check_cv, cross_val_score
@@ -144,7 +144,7 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
     covariance_ *= 0.95
     diagonal = emp_cov.flat[::n_features + 1]
     covariance_.flat[::n_features + 1] = diagonal
-    precision_ = symmetric_pinv(covariance_)
+    precision_ = pinvh(covariance_)
 
     indices = np.arange(n_features)
     costs = list()
diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py
@@ -13,7 +13,7 @@
 from scipy.stats import chi2
 
 from . import empirical_covariance, EmpiricalCovariance
-from ..utils.extmath import fast_logdet, symmetric_pinv
+from ..utils.extmath import fast_logdet, pinvh
 from ..utils import check_random_state
 
 
@@ -85,7 +85,7 @@ def c_step(X, n_support, remaining_iterations=30, initial_estimates=None,
         location = initial_estimates[0]
         covariance = initial_estimates[1]
         # run a special iteration for that case (to get an initial support)
-        precision = symmetric_pinv(covariance)
+        precision = pinvh(covariance)
         X_centered = X - location
         dist = (np.dot(X_centered, precision) * X_centered).sum(1)
         # compute new estimates
@@ -104,7 +104,7 @@ def c_step(X, n_support, remaining_iterations=30, initial_estimates=None,
         previous_det = det
         previous_support = support
         # compute a new support from the full data set mahalanobis distances
-        precision = symmetric_pinv(covariance)
+        precision = pinvh(covariance)
         X_centered = X - location
         dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1)
         # compute new estimates
@@ -344,7 +344,7 @@ def fast_mcd(X, support_fraction=None,
         covariance = np.asarray([[np.var(X[support])]])
         location = np.array([location])
         # get precision matrix in an optimized way
-        precision = symmetric_pinv(covariance)
+        precision = pinvh(covariance)
         dist = (np.dot(X_centered, precision) \
                     * (X_centered)).sum(axis=1)
 
@@ -545,7 +545,7 @@ def fit(self, X):
             raw_covariance = self._nonrobust_covariance(
                     X[raw_support], assume_centered=True)
             # get precision matrix in an optimized way
-            precision = symmetric_pinv(raw_covariance)
+            precision = pinvh(raw_covariance)
             raw_dist = np.sum(np.dot(X, precision) * X, 1)
         self.raw_location_ = raw_location
         self.raw_covariance_ = raw_covariance
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -299,61 +299,60 @@ def weighted_mode(a, w, axis=0):
     return mostfrequent, oldcounts
 
 
-def symmetric_pinv(a, cond=None, rcond=None):
-    """Compute the (Moore-Penrose) pseudo-inverse of a matrix.
+def pinvh(a, cond=None, rcond=None, lower=True):
+    """Compute the (Moore-Penrose) pseudo-inverse of a hermetian matrix.
 
     Calculate a generalized inverse of a symmetric matrix using its
     eigenvalue decomposition and including all 'large' eigenvalues.
 
-    Inspired by ``scipy.linalg.pinv2``, credited to Pearu Peterson and Travis
-    Oliphant.
-
     Parameters
     ----------
     a : array, shape (N, N)
-        Symmetric matrix to be pseudo-inverted
+        Real symmetric or complex hermetian matrix to be pseudo-inverted
     cond, rcond : float or None
         Cutoff for 'small' eigenvalues.
         Singular values smaller than rcond * largest_eigenvalue are considered
         zero.
 
         If None or -1, suitable machine precision is used.
+    lower : boolean
+        Whether the pertinent array data is taken from the lower or upper
+        triangle of a. (Default: lower)
 
     Returns
     -------
     B : array, shape (N, N)
 
-    Raises LinAlgError if eigenvalue does not converge
+    Raises
+    ------
+    LinAlgError
+        If eigenvalue does not converge
 
     Examples
     --------
     >>> from numpy import *
     >>> a = random.randn(9, 6)
     >>> a = np.dot(a, a.T)
-    >>> B = symmetric_pinv(a)
+    >>> B = pinvh(a)
     >>> allclose(a, dot(a, dot(B, a)))
     True
     >>> allclose(B, dot(B, dot(a, B)))
     True
 
     """
     a = np.asarray_chkfinite(a)
-    s, u = linalg.eigh(a)
-    # eigh returns eigvals in reverse order, but this doesn't affect anything.
+    s, u = linalg.eigh(a, lower=lower)
 
-    t = u.dtype.char
     if rcond is not None:
         cond = rcond
     if cond in [None, -1]:
-        eps = np.finfo(np.float).eps
-        feps = np.finfo(np.single).eps
-        _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1}
-        cond = {0: feps * 1e3, 1: eps * 1e6}[_array_precision[t]]
-    n = a.shape[0]
-    cutoff = cond * np.maximum.reduce(s)
-    psigma = np.zeros(n, t)
-    above_cutoff = np.where(s > cutoff)
-    psigma[above_cutoff] = 1.0 / np.conjugate(s[above_cutoff])
-    #XXX: use lapack/blas routines for dot
-    #XXX: above comment is from scipy, but I (@vene)'ll take a look
-    return np.transpose(np.conjugate(np.dot(u * psigma, u.T.conjugate())))
+        t = u.dtype.char.lower()
+        factor = {'f': 1E3, 'd': 1E6}
+        cond = factor[t] * np.finfo(t).eps
+
+    # unlike svd case, eigh can lead to negative eigenvalues
+    above_cutoff = (abs(s) > cond * np.max(abs(s)))
+    psigma_diag = np.zeros_like(s)
+    psigma_diag[above_cutoff] = 1.0 / s[above_cutoff]
+
+    return np.dot(u * psigma_diag, np.conjugate(u).T)
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
@@ -11,7 +11,7 @@
 from sklearn.utils import deprecated
 from sklearn.utils import resample
 from sklearn.utils import safe_mask
-from sklearn.utils.extmath import symmetric_pinv
+from sklearn.utils.extmath import pinvh
 
 
 def test_make_rng():
@@ -93,7 +93,7 @@ def test_safe_mask():
     assert_equal(X_csr[mask].shape[0], 3)
 
 
-def test_symmetric_pinv():
+def test_pinvh():
     a = np.random.randn(5, 3)
     a = np.dot(a, a.T)  # symmetric singular matrix
-    assert_almost_equal(pinv2(a), symmetric_pinv(a))
+    assert_almost_equal(pinv2(a), pinvh(a))