scikit-learn · cbrnr · Apr 24, 2014 · Apr 24, 2014 · Apr 24, 2014 · Apr 26, 2014
diff --git a/examples/plot_lda.py b/examples/plot_lda.py
@@ -0,0 +1,67 @@
+"""
+====================================================================
+Normal and Shrinkage Linear Discriminant Analysis for classification
+====================================================================
+
+Shows how shrinkage improves classification.
+"""
+
+from __future__ import division
+
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.lda import LDA, ShrinkageLDA
+
+
+n_train = 10     # samples per class for training
+n_test = 100    # samples per class for training
+n_averages = 50
+
+
+def generate_data(n_samples, n_features):
+    """ generate `n_samples` samples of data with 1 discriminative features
+    and n_`features` non-discriminative features."""
+    X = np.vstack([np.random.randn(n_samples, 1) - 2,
+                   np.random.randn(n_samples, 1) + 2])
+    y = np.hstack([[-1] * n_samples, [1] * n_samples])
+    # add non discriminative features
+    X = np.hstack([X, np.random.randn(2 * n_samples, n_features)])
+    return X, y
+
+acc_lda, acc_slda, acc_nlda = [], [], []
+m_range = range(50)
+for m in m_range:
+    sys.stdout.write('\r{:.0%}'.format(m/max(m_range)))
+    tmp_lda, tmp_slda, tmp_nlda = 0, 0, 0
+    for i in range(n_averages):
+        X, y = generate_data(n_train, m)
+
+        lda = LDA().fit(X, y)
+        slda = ShrinkageLDA().fit(X, y)
+        nlda = ShrinkageLDA(shrinkage=None).fit(X, y)
+
+        X, y = generate_data(n_test, m)
+        tmp_lda += lda.score(X, y) / n_averages
+        tmp_slda += slda.score(X, y) / n_averages
+        tmp_nlda += nlda.score(X, y) / n_averages
+
+    acc_lda.append(tmp_lda)
+    acc_slda.append(tmp_slda)
+    acc_nlda.append(tmp_nlda)
+
+m_range = (1 + np.array(m_range)) / (2 * n_train)
+
+plt.plot(m_range, acc_slda, linewidth=2, label='ShrinkageLDA()', color='r')
+plt.plot(m_range, acc_nlda, linewidth=2, label='ShrinkageLDA(shrinkage=None)', color='g')
+plt.plot(m_range, acc_lda, linewidth=2, label='LDA()', color='b')
+
+plt.xlabel('n_features / n_samples')
+plt.ylabel('Classification accuracy')
+
+plt.legend(loc=4, prop={'size': 8})
+
+plt.suptitle('LDA vs sLDA (1 discriminative feature)')
+
+plt.show()
diff --git a/sklearn/lda.py b/sklearn/lda.py
@@ -1,20 +1,25 @@
 """
 The :mod:`sklearn.lda` module implements Linear Discriminant Analysis (LDA).
 """
-from __future__ import print_function
+
 # Authors: Matthieu Perrot
 #          Mathieu Blondel
+#          Martin Billinger
+#          Clemens Brunner
+
+from __future__ import division, print_function
 
 import warnings
 
 import numpy as np
 from scipy import linalg
 
 from .base import BaseEstimator, ClassifierMixin, TransformerMixin
+from .covariance import ledoit_wolf, empirical_covariance
 from .utils.extmath import logsumexp
 from .utils import check_arrays, array2d, column_or_1d
 
-__all__ = ['LDA']
+__all__ = ['LDA', 'ShrinkageLDA']
 
 
 class LDA(BaseEstimator, ClassifierMixin, TransformerMixin):
@@ -90,7 +95,7 @@ def __init__(self, n_components=None, priors=None):
             if (self.priors < 0).any():
                 raise ValueError('priors must be non-negative')
             if self.priors.sum() != 1:
-                print('warning: the priors do not sum to 1. Renormalizing')
+                warnings.warning('priors do not sum to 1; renormalizing')
                 self.priors = self.priors / self.priors.sum()
 
     def fit(self, X, y, store_covariance=False, tol=1.0e-4):
@@ -285,3 +290,217 @@ def predict_log_proba(self, X):
         loglikelihood = (values - values.max(axis=1)[:, np.newaxis])
         normalization = logsumexp(loglikelihood, axis=1)
         return loglikelihood - normalization[:, np.newaxis]
+
+
+class ShrinkageLDA(BaseEstimator, ClassifierMixin):
+    """
+    Shrinkage Linear Discriminant Analysis (sLDA)
+
+    A classifier with a linear decision boundary, generated by fitting class
+    conditional densities to the data and using Bayes' rule.
+
+    The model fits a Gaussian density to each class, assuming that all classes
+    share the same covariance matrix. By default, the covariance matrix is
+    estimated with :func:`ledoit_wolf`.
+
+    Parameters
+    ----------
+    `priors` : array, optional, shape = [n_classes]
+        Priors on classes
+
+    `shrinkage` : str, optional
+        Shrinkage method, set to 'auto' for Ledoit-Wolf shrinkage
+        or None for empirical covariance estimation (no shrinkage).
+
+    Attributes
+    ----------
+    `coef_` : array-like, shape = [n_classes, n_features]
+        Coefficients of the features in the linear decision function.
+
+    `intercept_` : array, shape = [n_classes]
+        Intercept (bias) added to the decision function.
+
+    `covariance_` : array-like, shape = [n_features, n_features]
+        Covariance matrix (shared by all classes).
+
+    `means_` : array-like, shape = [n_classes, n_features]
+        Class means.
+
+    `priors_` : array-like, shape = [n_classes]
+        Class priors (sum to 1).
+
+    `xbar_` : float, shape = [n_features]
+        Weighted average of class means (weighted with priors).
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.lda import ShrinkageLDA
+    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> y = np.array([1, 1, 1, 2, 2, 2])
+    >>> clf = ShrinkageLDA()
+    >>> clf.fit(X, y)
+    ShrinkageLDA(priors=None, shrinkage='auto')
+    >>> print(clf.predict([[-0.8, -1]]))
+    [1]
+
+    """
+
+    def __init__(self, priors=None, shrinkage='auto'):
+        self.priors = np.asarray(priors) if priors is not None else None
+        self.shrinkage = shrinkage
+
+    def fit(self, X, y):
+        """
+        Fit the LDA model according to the given training data and parameters.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Training vector, where n_samples in the number of samples and
+            n_features is the number of features.
+
+        y : array, shape = [n_samples]
+            Target values (integers).
+
+        """
+        X, y = check_arrays(X, y, sparse_format='dense')
+        X = array2d(X.T).T  # make sure samples are in columns if X is 1D
+        y = column_or_1d(y, warn=True)
+        self.classes_, y = np.unique(y, return_inverse=True)
+        n_samples, n_features = X.shape
+        n_classes = len(self.classes_)
+        if n_classes < 2:
+            raise ValueError('y has less than 2 classes')
+
+        # TODO: support equal priors (with priors=='equal')
+        if self.priors is not None:
+            if (self.priors < 0).any():
+                raise ValueError('priors must be non-negative')
+            if self.priors.sum() != 1:
+                warnings.warn('priors do not sum to 1; renormalizing')
+                self.priors = self.priors / self.priors.sum()
+            self.priors_ = self.priors
+        else:
+            self.priors_ = np.bincount(y) / float(n_samples)
+
+        if self.shrinkage is not None:
+            if self.shrinkage == 'auto':
+                self._cov_estimator = _ledoit_wolf
+            else:
+                warnings.warn('unknown shrinkage method, using no shrinkage')
+                self._cov_estimator = empirical_covariance
+        else:
+            self._cov_estimator = empirical_covariance
+
+        means = []
+        covs = []
+        Xc = []
+        for ind in range(n_classes):
+            Xg = X[y == ind, :]
+            meang = Xg.mean(0)
+            means.append(meang)
+            Xgc = Xg - meang
+            Xc.append(Xgc)
+            covg = self._cov_estimator(Xgc)
+            covg = np.atleast_2d(covg)
+            covs.append(covg)
+
+        self.means_ = np.asarray(means)
+        self.xbar_ = np.dot(self.priors_, self.means_)
+
+        # TODO: weight covariances with priors?
+        Sw = np.mean(covs, 0)  # Within-class scatter
+        means = self.means_ - self.xbar_
+
+        self.coef_ = np.linalg.lstsq(Sw, means.T, rcond=1e-11)[0].T
+        self.intercept_ = (-0.5 * np.diag(np.dot(means, self.coef_.T))
+                           + np.log(self.priors_))
+        return self
+
+    def _decision_function(self, X):
+        X = array2d(np.asarray(X).T).T  # make sure samples are in columns
+        return np.dot(X - self.xbar_, self.coef_.T) + self.intercept_
+
+    def decision_function(self, X):
+        """
+        This function returns the decision function values related to each
+        class on an array of test vectors X.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+
+        Returns
+        -------
+        C : array, shape = [n_samples, n_classes] or [n_samples,]
+            Decision function values related to each class, per sample.
+            In the two-class case, the shape is [n_samples,], giving the
+            log likelihood ratio of the positive class.
+        """
+        dec_func = self._decision_function(X)
+        if len(self.classes_) == 2:
+            return dec_func[:, 1] - dec_func[:, 0]
+        return dec_func
+
+    def predict(self, X):
+        """
+        This function classifies an array of test vectors X. The predicted
+        class C for each sample in X is returned.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+
+        Returns
+        -------
+        C : array, shape = [n_samples]
+        """
+        d = self._decision_function(X)
+        y_pred = self.classes_.take(d.argmax(1))
+        return y_pred
+
+    def predict_proba(self, X):
+        """
+        This function returns posterior probabilities of classification
+        according to each class on an array of test vectors X.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+
+        Returns
+        -------
+        C : array, shape = [n_samples, n_classes]
+        """
+        values = self._decision_function(X)
+        # compute the likelihood of the underlying gaussian models
+        # up to a multiplicative constant.
+        likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])
+        # compute posterior probabilities
+        return likelihood / likelihood.sum(axis=1)[:, np.newaxis]
+
+    def predict_log_proba(self, X):
+        """
+        This function returns posterior log-probabilities of classification
+        according to each class on an array of test vectors X.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+
+        Returns
+        -------
+        C : array, shape = [n_samples, n_classes]
+        """
+        values = self._decision_function(X)
+        loglikelihood = (values - values.max(axis=1)[:, np.newaxis])
+        normalization = logsumexp(loglikelihood, axis=1)
+        return loglikelihood - normalization[:, np.newaxis]
+
+
+def _ledoit_wolf(*args, **kwargs):
+    """
+    This function returns the covariance estimated with :func:`ledoit_wolf`.
+    """
+    return ledoit_wolf(*args, **kwargs)[0]