Skip to content

Implemented shrinkage LDA classifier. #3105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
a9ab33d
Implemented shrinkage LDA classifier.
Apr 24, 2014
db786bc
sLDA example
Apr 24, 2014
e20dc6b
lda.score() instead of manual accuracy calculation
Apr 24, 2014
9e700ba
Changed class name from LDA to SLDA
Apr 26, 2014
03653c1
Replaced lambda with a named function
Apr 26, 2014
dedb370
Added more tests for improved coverage
Apr 26, 2014
06f0494
Merge pull request #1 from kazemakase/sldac
cbrnr Apr 28, 2014
02c0ae8
Updated example and style.
Apr 28, 2014
2edb383
Fixed Python 2 division issue.
Apr 28, 2014
52fbac2
Implemented shrinkage LDA classifier.
Apr 24, 2014
0efc4df
sLDA example
Apr 24, 2014
657edd7
lda.score() instead of manual accuracy calculation
Apr 24, 2014
db6f4df
Changed class name from LDA to SLDA
Apr 26, 2014
ab272ff
Replaced lambda with a named function
Apr 26, 2014
7d94c7f
Added more tests for improved coverage
Apr 26, 2014
38849d6
Updated example and style.
Apr 28, 2014
7681662
Fixed Python 2 division issue.
Apr 28, 2014
14a3e58
Moved input validation to fit method. Now use proper warnings.
Apr 29, 2014
0668b21
Merge branch 'sldac' of https://github.com/cle1109/scikit-learn into …
Apr 29, 2014
c4f4763
Updated tests to reflect changes in input validation.
Apr 29, 2014
5e67d52
Replaced np.linalg.solve() with np.linalg.lstsq(); fixed Python 2 com…
Apr 29, 2014
e08eb09
Renamed SLDA to ShrinkageLDA and moved it to lda.py.
Apr 30, 2014
86384b5
Implemented shrinkage LDA classifier.
Apr 24, 2014
ce3b119
sLDA example
Apr 24, 2014
6353b0a
lda.score() instead of manual accuracy calculation
Apr 24, 2014
176470f
Changed class name from LDA to SLDA
Apr 26, 2014
b9753b0
Replaced lambda with a named function
Apr 26, 2014
f88231e
Added more tests for improved coverage
Apr 26, 2014
507e47e
Updated example and style.
Apr 28, 2014
b096de5
Fixed Python 2 division issue.
Apr 28, 2014
a3d7303
Implemented shrinkage LDA classifier.
Apr 24, 2014
1285ac0
sLDA example
Apr 24, 2014
1ccb555
Replaced lambda with a named function
Apr 26, 2014
79fb3a6
Added more tests for improved coverage
Apr 26, 2014
7f3d22b
Updated tests to reflect changes in input validation.
Apr 29, 2014
dd2244d
Renamed SLDA to ShrinkageLDA and moved it to lda.py.
Apr 30, 2014
a9ad7be
Merge branch 'sldac' of https://github.com/cle1109/scikit-learn into …
Jun 3, 2014
10b1d6f
Corrected doctest.
Jun 3, 2014
73999e2
Corrected errors introduced by manual merge.
Jun 3, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions examples/plot_lda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
====================================================================
Normal and Shrinkage Linear Discriminant Analysis for classification
====================================================================

Shows how shrinkage improves classification.
"""

from __future__ import division

import sys
import numpy as np
import matplotlib.pyplot as plt

from sklearn.lda import LDA, ShrinkageLDA


n_train = 10 # samples per class for training
n_test = 100 # samples per class for training
n_averages = 50


def generate_data(n_samples, n_features):
""" generate `n_samples` samples of data with 1 discriminative features
and n_`features` non-discriminative features."""
X = np.vstack([np.random.randn(n_samples, 1) - 2,
np.random.randn(n_samples, 1) + 2])
y = np.hstack([[-1] * n_samples, [1] * n_samples])
# add non discriminative features
X = np.hstack([X, np.random.randn(2 * n_samples, n_features)])
return X, y

acc_lda, acc_slda, acc_nlda = [], [], []
m_range = range(50)
for m in m_range:
sys.stdout.write('\r{:.0%}'.format(m/max(m_range)))
tmp_lda, tmp_slda, tmp_nlda = 0, 0, 0
for i in range(n_averages):
X, y = generate_data(n_train, m)

lda = LDA().fit(X, y)
slda = ShrinkageLDA().fit(X, y)
nlda = ShrinkageLDA(shrinkage=None).fit(X, y)

X, y = generate_data(n_test, m)
tmp_lda += lda.score(X, y) / n_averages
tmp_slda += slda.score(X, y) / n_averages
tmp_nlda += nlda.score(X, y) / n_averages

acc_lda.append(tmp_lda)
acc_slda.append(tmp_slda)
acc_nlda.append(tmp_nlda)

m_range = (1 + np.array(m_range)) / (2 * n_train)

plt.plot(m_range, acc_slda, linewidth=2, label='ShrinkageLDA()', color='r')
plt.plot(m_range, acc_nlda, linewidth=2, label='ShrinkageLDA(shrinkage=None)', color='g')
plt.plot(m_range, acc_lda, linewidth=2, label='LDA()', color='b')

plt.xlabel('n_features / n_samples')
plt.ylabel('Classification accuracy')

plt.legend(loc=4, prop={'size': 8})

plt.suptitle('LDA vs sLDA (1 discriminative feature)')

plt.show()
225 changes: 222 additions & 3 deletions sklearn/lda.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
"""
The :mod:`sklearn.lda` module implements Linear Discriminant Analysis (LDA).
"""
from __future__ import print_function

# Authors: Matthieu Perrot
# Mathieu Blondel
# Martin Billinger
# Clemens Brunner

from __future__ import division, print_function

import warnings

import numpy as np
from scipy import linalg

from .base import BaseEstimator, ClassifierMixin, TransformerMixin
from .covariance import ledoit_wolf, empirical_covariance
from .utils.extmath import logsumexp
from .utils import check_arrays, array2d, column_or_1d

__all__ = ['LDA']
__all__ = ['LDA', 'ShrinkageLDA']


class LDA(BaseEstimator, ClassifierMixin, TransformerMixin):
Expand Down Expand Up @@ -90,7 +95,7 @@ def __init__(self, n_components=None, priors=None):
if (self.priors < 0).any():
raise ValueError('priors must be non-negative')
if self.priors.sum() != 1:
print('warning: the priors do not sum to 1. Renormalizing')
warnings.warning('priors do not sum to 1; renormalizing')
self.priors = self.priors / self.priors.sum()

def fit(self, X, y, store_covariance=False, tol=1.0e-4):
Expand Down Expand Up @@ -285,3 +290,217 @@ def predict_log_proba(self, X):
loglikelihood = (values - values.max(axis=1)[:, np.newaxis])
normalization = logsumexp(loglikelihood, axis=1)
return loglikelihood - normalization[:, np.newaxis]


class ShrinkageLDA(BaseEstimator, ClassifierMixin):
"""
Shrinkage Linear Discriminant Analysis (sLDA)

A classifier with a linear decision boundary, generated by fitting class
conditional densities to the data and using Bayes' rule.

The model fits a Gaussian density to each class, assuming that all classes
share the same covariance matrix. By default, the covariance matrix is
estimated with :func:`ledoit_wolf`.

Parameters
----------
`priors` : array, optional, shape = [n_classes]
Priors on classes

`shrinkage` : str, optional
Shrinkage method, set to 'auto' for Ledoit-Wolf shrinkage
or None for empirical covariance estimation (no shrinkage).

Attributes
----------
`coef_` : array-like, shape = [n_classes, n_features]
Coefficients of the features in the linear decision function.

`intercept_` : array, shape = [n_classes]
Intercept (bias) added to the decision function.

`covariance_` : array-like, shape = [n_features, n_features]
Covariance matrix (shared by all classes).

`means_` : array-like, shape = [n_classes, n_features]
Class means.

`priors_` : array-like, shape = [n_classes]
Class priors (sum to 1).

`xbar_` : float, shape = [n_features]
Weighted average of class means (weighted with priors).

Examples
--------
>>> import numpy as np
>>> from sklearn.lda import ShrinkageLDA
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
>>> y = np.array([1, 1, 1, 2, 2, 2])
>>> clf = ShrinkageLDA()
>>> clf.fit(X, y)
ShrinkageLDA(priors=None, shrinkage='auto')
>>> print(clf.predict([[-0.8, -1]]))
[1]

"""

def __init__(self, priors=None, shrinkage='auto'):
self.priors = np.asarray(priors) if priors is not None else None
self.shrinkage = shrinkage

def fit(self, X, y):
"""
Fit the LDA model according to the given training data and parameters.

Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples in the number of samples and
n_features is the number of features.

y : array, shape = [n_samples]
Target values (integers).

"""
X, y = check_arrays(X, y, sparse_format='dense')
X = array2d(X.T).T # make sure samples are in columns if X is 1D
y = column_or_1d(y, warn=True)
self.classes_, y = np.unique(y, return_inverse=True)
n_samples, n_features = X.shape
n_classes = len(self.classes_)
if n_classes < 2:
raise ValueError('y has less than 2 classes')

# TODO: support equal priors (with priors=='equal')
if self.priors is not None:
if (self.priors < 0).any():
raise ValueError('priors must be non-negative')
if self.priors.sum() != 1:
warnings.warn('priors do not sum to 1; renormalizing')
self.priors = self.priors / self.priors.sum()
self.priors_ = self.priors
else:
self.priors_ = np.bincount(y) / float(n_samples)

if self.shrinkage is not None:
if self.shrinkage == 'auto':
self._cov_estimator = _ledoit_wolf
else:
warnings.warn('unknown shrinkage method, using no shrinkage')
self._cov_estimator = empirical_covariance
else:
self._cov_estimator = empirical_covariance

means = []
covs = []
Xc = []
for ind in range(n_classes):
Xg = X[y == ind, :]
meang = Xg.mean(0)
means.append(meang)
Xgc = Xg - meang
Xc.append(Xgc)
covg = self._cov_estimator(Xgc)
covg = np.atleast_2d(covg)
covs.append(covg)

self.means_ = np.asarray(means)
self.xbar_ = np.dot(self.priors_, self.means_)

# TODO: weight covariances with priors?
Sw = np.mean(covs, 0) # Within-class scatter
means = self.means_ - self.xbar_

self.coef_ = np.linalg.lstsq(Sw, means.T, rcond=1e-11)[0].T
self.intercept_ = (-0.5 * np.diag(np.dot(means, self.coef_.T))
+ np.log(self.priors_))
return self

def _decision_function(self, X):
X = array2d(np.asarray(X).T).T # make sure samples are in columns
return np.dot(X - self.xbar_, self.coef_.T) + self.intercept_

def decision_function(self, X):
"""
This function returns the decision function values related to each
class on an array of test vectors X.

Parameters
----------
X : array-like, shape = [n_samples, n_features]

Returns
-------
C : array, shape = [n_samples, n_classes] or [n_samples,]
Decision function values related to each class, per sample.
In the two-class case, the shape is [n_samples,], giving the
log likelihood ratio of the positive class.
"""
dec_func = self._decision_function(X)
if len(self.classes_) == 2:
return dec_func[:, 1] - dec_func[:, 0]
return dec_func

def predict(self, X):
"""
This function classifies an array of test vectors X. The predicted
class C for each sample in X is returned.

Parameters
----------
X : array-like, shape = [n_samples, n_features]

Returns
-------
C : array, shape = [n_samples]
"""
d = self._decision_function(X)
y_pred = self.classes_.take(d.argmax(1))
return y_pred

def predict_proba(self, X):
"""
This function returns posterior probabilities of classification
according to each class on an array of test vectors X.

Parameters
----------
X : array-like, shape = [n_samples, n_features]

Returns
-------
C : array, shape = [n_samples, n_classes]
"""
values = self._decision_function(X)
# compute the likelihood of the underlying gaussian models
# up to a multiplicative constant.
likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])
# compute posterior probabilities
return likelihood / likelihood.sum(axis=1)[:, np.newaxis]

def predict_log_proba(self, X):
"""
This function returns posterior log-probabilities of classification
according to each class on an array of test vectors X.

Parameters
----------
X : array-like, shape = [n_samples, n_features]

Returns
-------
C : array, shape = [n_samples, n_classes]
"""
values = self._decision_function(X)
loglikelihood = (values - values.max(axis=1)[:, np.newaxis])
normalization = logsumexp(loglikelihood, axis=1)
return loglikelihood - normalization[:, np.newaxis]


def _ledoit_wolf(*args, **kwargs):
"""
This function returns the covariance estimated with :func:`ledoit_wolf`.
"""
return ledoit_wolf(*args, **kwargs)[0]
Loading