Skip to content

New Feature: Added NCA as first algorithm in metric_learning module #5276

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sklearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
'linear_model', 'manifold', 'metrics', 'mixture', 'multiclass',
'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
'preprocessing', 'qda', 'random_projection', 'semi_supervised',
'svm', 'tree', 'discriminant_analysis',
'svm', 'tree', 'discriminant_analysis', 'metric_learning'
# Non-modules:
'clone']

Expand Down
270 changes: 270 additions & 0 deletions sklearn/metric_learning/NCA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
# -*- coding: utf-8 -*-
"""
@author: thiolliere and Yuan Tang (terrytangyuan)
"""
import numpy as np
import scipy.optimize as opt
from sklearn.base import BaseEstimator

class NCAcost(object):

@staticmethod
def cost(A, X, y, threshold=None):
"""Compute the cost function and the gradient
This is the objective function to be minimized

Parameters:
-----------
A : array-like
Projection matrix, shape = [dim, n_features] with dim <= n_features
X : array-like
Training data, shape = [n_features, n_samples]
y : array-like
Target values, shape = [n_samples]

Returns:
--------
f : float
The value of the objective function
gradf : array-like
The gradient of the objective function, shape = [dim * n_features]
"""

(D, N) = np.shape(X)
A = np.reshape(A, (np.size(A) / np.size(X, axis=0), np.size(X, axis=0)))
(d, aux) = np.shape(A)
assert D == aux

AX = np.dot(A, X)
normAX = np.linalg.norm(AX[:, :, None] - AX[:, None, :], axis=0)

denomSum = np.sum(np.exp(-normAX[:, :]), axis=0)
Pij = np.exp(- normAX) / denomSum[:, None]
if threshold is not None:
Pij[Pij < threshold] = 0
Pij[Pij > 1-threshold] = 1

mask = (y != y[:, None])
Pijmask = np.ma.masked_array(Pij, mask)
P = np.array(np.sum(Pijmask, axis=1))
mask = np.negative(mask)

f = np.sum(P)

Xi = X[:, :, None] - X[:, None, :]
Xi = np.swapaxes(Xi, 0, 2)

Xi = Pij[:, :, None] * Xi

Xij = Xi[:, :, :, None] * Xi[:, :, None, :]

gradf = np.sum(P[:, None, None] * np.sum(Xij[:], axis=1), axis=0)

# To optimize (use mask ?)
for i in range(N):
aux = np.sum(Xij[i, mask[i]], axis=0)
gradf -= aux

gradf = 2 * np.dot(A, gradf)
gradf = -np.reshape(gradf, np.size(gradf))
f = np.size(X, 1) - f

return [f, gradf]

@staticmethod
def f(A, X, y):
return cost(A, X, y)[0]

@staticmethod
def grad(A, X, y):
return cost(A, X, y)[1]

@staticmethod
def cost_g(A, X, y, threshold=None):
"""Compute the cost function and the gradient for the K-L divergence

Parameters:
-----------
A : array-like
Projection matrix, shape = [dim, n_features] with dim <= n_features
X : array-like
Training data, shape = [n_features, n_samples]
y : array-like
Target values, shape = [n_samples]

Returns:
--------
g : float
The value of the objective function
gradg : array-like
The gradient of the objective function, shape = [dim * n_features]
"""

(D, N) = np.shape(X)
A = np.reshape(A, (np.size(A) / np.size(X, axis=0), np.size(X, axis=0)))
(d, aux) = np.shape(A)
assert D == aux

AX = np.dot(A, X)
normAX = np.linalg.norm(AX[:, :, None] - AX[:, None, :], axis=0)

denomSum = np.sum(np.exp(-normAX[:, :]), axis=0)
Pij = np.exp(- normAX) / denomSum[:, None]
if threshold is not None:
Pij[Pij < threshold] = 0
Pij[Pij > 1-threshold] = 1

mask = (y != y[:, None])
Pijmask = np.ma.masked_array(Pij, mask)
P = np.array(np.sum(Pijmask, axis=1))
mask = np.negative(mask)

g = np.sum(np.log(P))

Xi = X[:, :, None] - X[:, None, :]
Xi = np.swapaxes(Xi, 0, 2)

Xi = Pij[:, :, None] * Xi

Xij = Xi[:, :, :, None] * Xi[:, :, None, :]

gradg = np.sum(np.sum(Xij[:], axis=1), axis=0)

# To optimize (use mask ?)
for i in range(N):
aux = np.sum(Xij[i, mask[i]], axis=0) / P[i]
gradg -= aux

gradg = 2 * np.dot(A, gradg)
gradg = -np.reshape(gradg, np.size(gradg))
g = -g

return [g, gradg]


class NCA(BaseEstimator):

def __init__(self, metric=None, dim=None,
threshold=None, objective='mahalanobis', **kwargs):
"""Classification and/or dimensionality reduction with the neighborhood
component analysis.

The algorithm apply the softmax function on the transformed space and
tries to maximise the leave-one-out classification.

Parameters:
-----------
metric : array-like, optional
The initial distance metric, if not precised, the algorithm will
use a poor projection of the Mahalanobis distance.
shape = [dim, n_features] with dim <= n_features being the
dimension of the output space
dim : int, optional
The number of dimensions to keep for dimensionality reduction. If
not precised, the algorithm wont perform dimensionality reduction.
threshold : float, otpional
Threshold for the softmax function, set it higher to discard
further neighbors.
objective : string, optional
The objective function to optimize. The two implemented cost
functions are for Mahalanobis distance and KL-divergence.
**kwargs : keyword arguments, optional
See scipy.optimise.minimize for the list of additional arguments.
Those arguments include:

method : string
The algorithm to use for optimization.
options : dict
a dictionary of solver options
hess, hessp : callable
Hessian matrix
bounds : sequence
Bounds for variables
constraints : dict or sequence of dict
Constraints definition
tol : float
Tolerance for termination

Attributes:
-----------
metric : array-like
The trained disctance metric
"""
self.metric = metric
self.dim = dim
self.threshold = threshold
if objective == 'mahalanobis':
self.objective = NCAcost.cost
elif objective == 'kl-divergence':
self.objective = NCAcost.cost_g
self.kwargs = kwargs

def fit(self, X, y):
"""Fit the model using X as training data and y as target values.

Parameters:
-----------
X : array-like
Training data, shape = [n_features, n_samples]
y : array-like
Target values, shape = [n_samples]
"""
if self.metric is None:
if self.dim is None:
self.metric = np.eye(np.size(X, 1))
self.dim = np.size(X, 1)
else:
self.metric = np.eye(self.dim, np.size(X, 1) - self.dim)

res = opt.minimize(fun=self.objective,
x0=self.metric,
args=(X, y, self.threshold),
jac=True,
**self.kwargs
)

self.metric = np.reshape(res.x,
(np.size(res.x) / np.size(X, 0),
np.size(X, 0)))

def fit_transform(self, X, y):
"""Fit the model with X and apply the dimensionality reduction on X.

Parameters:
-----------
X : array-like
Training data, shape = [n_features, n_samples]
y : array-like
Target values, shape = [n_samples]

Returns:
--------
X_new : array-like
shape = [dim, n_samples]
"""
self.fit(self, X, y)
return np.dot(self.metric, X)

def score(self, X, y):
"""Returns the proportion of X correctly classified by the leave-one-
out classification

Parameters:
-----------
X : array-like
Training data, shape = [n_features, n_samples]
y : array-like
Target values, shape = [n_samples]

Returns:
--------
score : float
The proportion of X correctly classified
"""
return 1 - NCAcost.cost(self.metric, X, y)[0]/np.size(X, 1)

def getParameters(self):
"""Returns a dictionary of the parameters
"""
return dict(metric=self.metric, dim=self.dim, objective=self.objective,
threshold=self.threshold, **self.kwargs)
8 changes: 8 additions & 0 deletions sklearn/metric_learning/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
The :mod:`sklearn.metric_learning` module implements metric learning models.

The algorithms that have been implemented are:
Relevant Components Analysis (RCA)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RCA?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo! I was reading something else at the same time...thanks!
On Sep 16, 2015 4:19 AM, "B@rmaley.exe" notifications@github.com wrote:

In sklearn/metric_learning/init.py
#5276 (comment)
:

@@ -0,0 +1,8 @@
+"""
+The :mod:sklearn.metric_learning module implements metric learning models.
+
+The algorithms that have been implemented are:
+Relevant Components Analysis (RCA)

RCA?


Reply to this email directly or view it on GitHub
https://github.com/scikit-learn/scikit-learn/pull/5276/files#r39603872.

"""

__all__ = ['NCA']
24 changes: 24 additions & 0 deletions sklearn/tests/test_nca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
from sklearn.metric_learning import NCA
from sklearn.utils.testing import ignore_warnings
from sklearn.utils.testing import assert_array_almost_equal

N = 300
aux = (np.concatenate([0.5*np.ones((N/2, 1)),
np.zeros((N/2, 1)), 1.1*np.ones((N/2, 1))], axis=1))
X = np.concatenate([np.random.rand(N/2, 3),
np.random.rand(N/2, 3) + aux])

y = np.concatenate([np.concatenate([np.ones((N/2, 1)), np.zeros((N/2, 1))]),
np.concatenate([np.zeros((N/2, 1)), np.ones((N/2, 1))])],
axis=1)
X = X.T
y = y[:, 0]
A = np.array([[1, 0, 0], [0, 1, 0]])

def test_NCA():
# Training
nca = NCA.NCA(metric=A, method='BFGS', objective='kl-divergence', options={'maxiter': 10, 'disp': True})
print nca.score(X, y)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sklearn aims Py3 compliance, so you should use function-like print.

Also, how can this test fail other that NCA will throw an exception? You may want to add some assertions.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Yes I will use assertions later. Just wanted to pass Travis first.

This single test is not failing actually. It passed on my local pc. It's
just not being included in all, which caused other tests failed in the
package. I'll update soon.
On Sep 16, 2015 4:27 AM, "B@rmaley.exe" notifications@github.com wrote:

In sklearn/tests/test_nca.py
#5276 (comment)
:

+aux = (np.concatenate([0.5*np.ones((N/2, 1)),

  •                   np.zeros((N/2, 1)), 1.1*np.ones((N/2, 1))], axis=1))
    
    +X = np.concatenate([np.random.rand(N/2, 3),
  •                np.random.rand(N/2, 3) + aux])
    
    +y = np.concatenate([np.concatenate([np.ones((N/2, 1)), np.zeros((N/2, 1))]),
  •                np.concatenate([np.zeros((N/2, 1)), np.ones((N/2, 1))])],
    
  •               axis=1)
    
    +X = X.T
    +y = y[:, 0]
    +A = np.array([[1, 0, 0], [0, 1, 0]])
    +
    +def test_NCA():
  • Training

  • nca = NCA.NCA(metric=A, method='BFGS', objective='KL-divergence', options={'maxiter': 10, 'disp': True})
  • print nca.score(X, y)

sklearn aims Py3 compliance, so you should use function-like print.

Also, how can this test fail other that NCA will throw an exception? You
may want to add some assertions.


Reply to this email directly or view it on GitHub
https://github.com/scikit-learn/scikit-learn/pull/5276/files#r39604544.

nca.fit(X, y)
print nca.score(X, y)