-
Notifications
You must be signed in to change notification settings - Fork 228
[MRG] Create new Mahalanobis mixin #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7b3d739
f21cc85
6f8a115
f9e3c82
1a32c11
d0f5019
eba2a60
b5d966f
ee0d1bd
6b5a3b5
6eb65ac
35ece36
dca6838
3254ce3
e209b21
abea7de
65e794a
12b5429
eff278e
810d191
585b5d2
af0a3ac
f2b0163
3c37fd7
912c1db
0e0ebf1
31350e8
d1f811b
4dd8990
779a93a
75d4ad2
657cdcd
131ccbb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,148 @@ | ||
from numpy.linalg import cholesky | ||
from sklearn.base import BaseEstimator, TransformerMixin | ||
from sklearn.base import BaseEstimator | ||
from sklearn.utils.validation import check_array | ||
from sklearn.metrics import roc_auc_score | ||
import numpy as np | ||
from abc import ABCMeta, abstractmethod | ||
import six | ||
from ._util import check_tuples | ||
|
||
|
||
class BaseMetricLearner(BaseEstimator): | ||
def __init__(self): | ||
raise NotImplementedError('BaseMetricLearner should not be instantiated') | ||
class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): | ||
|
||
def metric(self): | ||
"""Computes the Mahalanobis matrix from the transformation matrix. | ||
@abstractmethod | ||
def score_pairs(self, pairs): | ||
"""Returns the score between pairs | ||
(can be a similarity, or a distance/metric depending on the algorithm) | ||
|
||
.. math:: M = L^{\\top} L | ||
Parameters | ||
---------- | ||
pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) | ||
3D array of pairs. | ||
|
||
Returns | ||
------- | ||
M : (d x d) matrix | ||
scores: `numpy.ndarray` of shape=(n_pairs,) | ||
The score of every pair. | ||
""" | ||
L = self.transformer() | ||
return L.T.dot(L) | ||
|
||
def transformer(self): | ||
"""Computes the transformation matrix from the Mahalanobis matrix. | ||
|
||
L = cholesky(M).T | ||
class MetricTransformer(six.with_metaclass(ABCMeta)): | ||
|
||
@abstractmethod | ||
def transform(self, X): | ||
"""Applies the metric transformation. | ||
|
||
Parameters | ||
---------- | ||
X : (n x d) matrix | ||
Data to transform. | ||
|
||
Returns | ||
------- | ||
L : upper triangular (d x d) matrix | ||
transformed : (n x d) matrix | ||
Input data transformed to the metric space by :math:`XL^{\\top}` | ||
""" | ||
return cholesky(self.metric()).T | ||
|
||
|
||
class MetricTransformer(TransformerMixin): | ||
class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, | ||
MetricTransformer)): | ||
"""Mahalanobis metric learning algorithms. | ||
|
||
Algorithm that learns a Mahalanobis (pseudo) distance :math:`d_M(x, x')`, | ||
defined between two column vectors :math:`x` and :math:`x'` by: :math:`d_M(x, | ||
x') = \sqrt{(x-x')^T M (x-x')}`, where :math:`M` is a learned symmetric | ||
positive semi-definite (PSD) matrix. The metric between points can then be | ||
expressed as the euclidean distance between points embedded in a new space | ||
through a linear transformation. Indeed, the above matrix can be decomposed | ||
into the product of two transpose matrices (through SVD or Cholesky | ||
decomposition): :math:`d_M(x, x')^2 = (x-x')^T M (x-x') = (x-x')^T L^T L | ||
(x-x') = (L x - L x')^T (L x- L x')` | ||
|
||
Attributes | ||
---------- | ||
transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) | ||
The learned linear transformation ``L``. | ||
""" | ||
|
||
def score_pairs(self, pairs): | ||
"""Returns the learned Mahalanobis distance between pairs. | ||
|
||
This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` | ||
where ``M`` is the learned Mahalanobis matrix, for every pair of points | ||
``x`` and ``x'``. This corresponds to the euclidean distance between | ||
embeddings of the points in a new space, obtained through a linear | ||
transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - | ||
x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See | ||
:class:`MahalanobisMixin`). | ||
|
||
def transform(self, X=None): | ||
"""Applies the metric transformation. | ||
Parameters | ||
---------- | ||
pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) | ||
3D array of pairs, or 2D array of one pair. | ||
|
||
Returns | ||
------- | ||
scores: `numpy.ndarray` of shape=(n_pairs,) | ||
The learned Mahalanobis distance for every pair. | ||
""" | ||
pairs = check_tuples(pairs) | ||
pairwise_diffs = self.transform(pairs[:, 1, :] - pairs[:, 0, :]) | ||
# (for MahalanobisMixin, the embedding is linear so we can just embed the | ||
# difference) | ||
return np.sqrt(np.sum(pairwise_diffs**2, axis=-1)) | ||
|
||
def transform(self, X): | ||
"""Embeds data points in the learned linear embedding space. | ||
|
||
Transforms samples in ``X`` into ``X_embedded``, samples inside a new | ||
embedding space such that: ``X_embedded = X.dot(L.T)``, where ``L`` is | ||
the learned linear transformation (See :class:`MahalanobisMixin`). | ||
|
||
Parameters | ||
---------- | ||
X : (n x d) matrix, optional | ||
Data to transform. If not supplied, the training data will be used. | ||
X : `numpy.ndarray`, shape=(n_samples, n_features) | ||
The data points to embed. | ||
|
||
Returns | ||
------- | ||
transformed : (n x d) matrix | ||
Input data transformed to the metric space by :math:`XL^{\\top}` | ||
X_embedded : `numpy.ndarray`, shape=(n_samples, num_dims) | ||
The embedded data points. | ||
""" | ||
X_checked = check_array(X, accept_sparse=True) | ||
return X_checked.dot(self.transformer_.T) | ||
|
||
def metric(self): | ||
return self.transformer_.T.dot(self.transformer_) | ||
|
||
def transformer_from_metric(self, metric): | ||
"""Computes the transformation matrix from the Mahalanobis matrix. | ||
|
||
Since by definition the metric `M` is positive semi-definite (PSD), it | ||
admits a Cholesky decomposition: L = cholesky(M).T. However, currently the | ||
computation of the Cholesky decomposition used does not support | ||
non-definite matrices. If the metric is not definite, this method will | ||
return L = V.T w^( -1/2), with M = V*w*V.T being the eigenvector | ||
decomposition of M with the eigenvalues in the diagonal matrix w and the | ||
columns of V being the eigenvectors. If M is diagonal, this method will | ||
just return its elementwise square root (since the diagonalization of | ||
the matrix is itself). | ||
|
||
Returns | ||
------- | ||
L : (d x d) matrix | ||
""" | ||
if X is None: | ||
X = self.X_ | ||
|
||
if np.allclose(metric, np.diag(np.diag(metric))): | ||
return np.sqrt(metric) | ||
elif not np.isclose(np.linalg.det(metric), 0): | ||
return cholesky(metric).T | ||
else: | ||
X = check_array(X, accept_sparse=True) | ||
L = self.transformer() | ||
return X.dot(L.T) | ||
w, V = np.linalg.eigh(metric) | ||
return V.T * np.sqrt(np.maximum(0, w[:, None])) | ||
|
||
|
||
class _PairsClassifierMixin: | ||
class _PairsClassifierMixin(BaseMetricLearner): | ||
|
||
def predict(self, pairs): | ||
"""Predicts the learned metric between input pairs. | ||
|
@@ -74,11 +160,11 @@ def predict(self, pairs): | |
y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) | ||
The predicted learned metric value between samples in every pair. | ||
""" | ||
pairwise_diffs = pairs[:, 0, :] - pairs[:, 1, :] | ||
return np.sqrt(np.sum(pairwise_diffs.dot(self.metric()) * pairwise_diffs, | ||
axis=1)) | ||
pairs = check_tuples(pairs) | ||
return self.score_pairs(pairs) | ||
|
||
def decision_function(self, pairs): | ||
pairs = check_tuples(pairs) | ||
return self.predict(pairs) | ||
|
||
def score(self, pairs, y): | ||
|
@@ -104,12 +190,32 @@ def score(self, pairs, y): | |
score : float | ||
The ``roc_auc`` score. | ||
""" | ||
pairs = check_tuples(pairs) | ||
return roc_auc_score(y, self.decision_function(pairs)) | ||
|
||
|
||
class _QuadrupletsClassifierMixin: | ||
class _QuadrupletsClassifierMixin(BaseMetricLearner): | ||
|
||
def predict(self, quadruplets): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be more logical if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that having a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is what I meant, sorry for the confusion |
||
"""Predicts the ordering between sample distances in input quadruplets. | ||
|
||
For each quadruplet, returns 1 if the quadruplet is in the right order ( | ||
first pair is more similar than second pair), and -1 if not. | ||
|
||
Parameters | ||
---------- | ||
quadruplets : array-like, shape=(n_constraints, 4, n_features) | ||
Input quadruplets. | ||
|
||
Returns | ||
------- | ||
prediction : `numpy.ndarray` of floats, shape=(n_constraints,) | ||
Predictions of the ordering of pairs, for each quadruplet. | ||
""" | ||
quadruplets = check_tuples(quadruplets) | ||
return np.sign(self.decision_function(quadruplets)) | ||
|
||
def decision_function(self, quadruplets): | ||
"""Predicts differences between sample distances in input quadruplets. | ||
|
||
For each quadruplet of samples, computes the difference between the learned | ||
|
@@ -122,18 +228,12 @@ def predict(self, quadruplets): | |
|
||
Returns | ||
------- | ||
prediction : `numpy.ndarray` of floats, shape=(n_constraints,) | ||
decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) | ||
Metric differences. | ||
""" | ||
similar_diffs = quadruplets[:, 0, :] - quadruplets[:, 1, :] | ||
dissimilar_diffs = quadruplets[:, 2, :] - quadruplets[:, 3, :] | ||
return (np.sqrt(np.sum(similar_diffs.dot(self.metric()) * | ||
similar_diffs, axis=1)) - | ||
np.sqrt(np.sum(dissimilar_diffs.dot(self.metric()) * | ||
dissimilar_diffs, axis=1))) | ||
|
||
def decision_function(self, quadruplets): | ||
return self.predict(quadruplets) | ||
quadruplets = check_tuples(quadruplets) | ||
return (self.score_pairs(quadruplets[:, :2, :]) - | ||
self.score_pairs(quadruplets[:, 2:, :])) | ||
|
||
def score(self, quadruplets, y=None): | ||
"""Computes score on input quadruplets | ||
|
@@ -154,4 +254,5 @@ def score(self, quadruplets, y=None): | |
score : float | ||
The quadruplets score. | ||
""" | ||
return - np.mean(np.sign(self.decision_function(quadruplets))) | ||
quadruplets = check_tuples(quadruplets) | ||
return -np.mean(self.predict(quadruplets)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should probably be a private method?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed