From 8576941341ec39fd503a5e09bda6b460f56c9f9a Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 16 May 2016 14:45:13 -0400 Subject: [PATCH 001/210] Bumping docs version [ci skip] --- doc/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 8c6a79c2..5e3f2cd9 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,8 +17,8 @@ project = u'metric-learn' copyright = u'2015, CJ Carey and Yuan Tang' author = u'CJ Carey and Yuan Tang' -version = '0.1.0' -release = '0.1.0' +version = '0.2.1' +release = '0.2.1' language = 'en' exclude_patterns = ['_build'] From 5475a269502e5d417d2966a47774d79fcfd7c4e8 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Fri, 27 May 2016 21:11:46 -0400 Subject: [PATCH 002/210] Attempting to simplify travis config --- .travis.yml | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index f31bef69..378cc5f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,23 +1,11 @@ language: python sudo: false +cache: pip python: - "2.7" - "3.4" -install: - - if [[ "$TRAVIS_PYTHON_VERSION" == 2.* ]]; then - wget http://repo.continuum.io/miniconda/Miniconda-3.4.2-Linux-x86_64.sh -O miniconda.sh; - else - wget http://repo.continuum.io/miniconda/Miniconda3-3.4.2-Linux-x86_64.sh -O miniconda.sh; - fi - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - # Useful for debugging any issues with conda - - conda info -a - - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scikit-learn - - source activate test-environment - - python setup.py install +before_install: + - pip install --upgrade pip + - pip install wheel + - pip install numpy scipy scikit-learn script: python setup.py test From c1372af3698ab93a0489b89f23f3868cded5e6e3 Mon Sep 17 00:00:00 2001 From: "Yuan (Terry) Tang" Date: Thu, 2 Jun 2016 14:02:39 -0500 Subject: [PATCH 003/210] Remove downloads stats for now (#20) --- README.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 3818b68f..d62d32c0 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -|Travis-CI Build Status| |License| |PyPI version| |PyPI downloads| +|Travis-CI Build Status| |License| |PyPI version| metric-learn ============= @@ -67,5 +67,4 @@ more complete. :target: http://badges.mit-license.org .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn -.. |PyPI downloads| image:: https://img.shields.io/pypi/dm/metric-learn.svg - :target: https://pypi.python.org/pypi/metric-learn/ + From 3c57c64e139ed25454901df2c9611cb14fb40cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C5=A0vec?= Date: Mon, 20 Jun 2016 20:14:24 +0200 Subject: [PATCH 004/210] Fit constraints (#19) * ITML fit_constraints * SDML fit_constraints * LSML fit_constraints * RCA fit_constraints * Renamed fit->fit_constrains in test * Created new supervised classes for methods with semi-supervised constraints * Comment * Removed duplicate code * Super compatible with Python 2 * Code standards * Fix overriding of params --- metric_learn/__init__.py | 9 ++--- metric_learn/constraints.py | 62 ++++++++++++++++++++++++++++++++++ metric_learn/itml.py | 60 +++++++++++++++++++++++++++------ metric_learn/lmnn.py | 13 +++---- metric_learn/lsml.py | 67 +++++++++++++++++++++++++++---------- metric_learn/rca.py | 62 ++++++++++++++++++++++------------ metric_learn/sdml.py | 54 ++++++++++++++++++++---------- test/metric_learn_test.py | 30 ++++++----------- 8 files changed, 262 insertions(+), 95 deletions(-) create mode 100644 metric_learn/constraints.py diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index 5efb9f5c..638d6d4d 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -1,9 +1,10 @@ from __future__ import absolute_import -from .itml import ITML +from .itml import ITML, ITML_Supervised from .lmnn import LMNN -from .lsml import LSML -from .sdml import SDML +from .lsml import LSML, LSML_Supervised +from .sdml import SDML, SDML_Supervised from .nca import NCA from .lfda import LFDA -from .rca import RCA +from .rca import RCA, RCA_Supervised +from .constraints import adjacencyMatrix, positiveNegativePairs, relativeQuadruplets, chunks diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py new file mode 100644 index 00000000..58a2768a --- /dev/null +++ b/metric_learn/constraints.py @@ -0,0 +1,62 @@ +""" Helper class that can generate different types of constraints from supervised data labels.""" + +import numpy as np +import random +from six.moves import xrange + +# @TODO: consider creating a stateful class +# https://github.com/all-umass/metric-learn/pull/19#discussion_r67386226 + +def adjacencyMatrix(labels, num_points, num_constraints): + a, c = np.random.randint(len(labels), size=(2,num_constraints)) + b, d = np.empty((2, num_constraints), dtype=int) + for i,(al,cl) in enumerate(zip(labels[a],labels[c])): + b[i] = random.choice(np.nonzero(labels == al)[0]) + d[i] = random.choice(np.nonzero(labels != cl)[0]) + W = np.zeros((num_points,num_points)) + W[a,b] = 1 + W[c,d] = -1 + # make W symmetric + W[b,a] = 1 + W[d,c] = -1 + return W + +def positiveNegativePairs(labels, num_points, num_constraints): + ac,bd = np.random.randint(num_points, size=(2,num_constraints)) + pos = labels[ac] == labels[bd] + a,c = ac[pos], ac[~pos] + b,d = bd[pos], bd[~pos] + return a,b,c,d + +def relativeQuadruplets(labels, num_constraints): + C = np.empty((num_constraints,4), dtype=int) + a, c = np.random.randint(len(labels), size=(2,num_constraints)) + for i,(al,cl) in enumerate(zip(labels[a],labels[c])): + C[i,1] = random.choice(np.nonzero(labels == al)[0]) + C[i,3] = random.choice(np.nonzero(labels != cl)[0]) + C[:,0] = a + C[:,2] = c + return C + +def chunks(Y, num_chunks=100, chunk_size=2, seed=None): + # @TODO: remove seed from params and use numpy RandomState + # https://github.com/all-umass/metric-learn/pull/19#discussion_r67386666 + random.seed(seed) + chunks = -np.ones_like(Y, dtype=int) + uniq, lookup = np.unique(Y, return_inverse=True) + all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] + idx = 0 + while idx < num_chunks and all_inds: + c = random.randint(0, len(all_inds)-1) + inds = all_inds[c] + if len(inds) < chunk_size: + del all_inds[c] + continue + ii = random.sample(inds, chunk_size) + inds.difference_update(ii) + chunks[ii] = idx + idx += 1 + if idx < num_chunks: + raise ValueError('Unable to make %d chunks of %d examples each' % + (num_chunks, chunk_size)) + return chunks diff --git a/metric_learn/itml.py b/metric_learn/itml.py index c6ad7e97..95636a9a 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -16,11 +16,12 @@ from six.moves import xrange from sklearn.metrics import pairwise_distances from .base_metric import BaseMetricLearner +from .constraints import positiveNegativePairs class ITML(BaseMetricLearner): """Information Theoretic Metric Learning (ITML)""" - def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3): + def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, verbose=False): """Initialize the learner. Parameters @@ -29,11 +30,14 @@ def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3): value for slack variables max_iters : int, optional convergence_threshold : float, optional + verbose : bool, optional + if True, prints information while learning """ self.params = { 'gamma': gamma, 'max_iters': max_iters, 'convergence_threshold': convergence_threshold, + 'verbose': verbose, } def _process_inputs(self, X, constraints, bounds, A0): @@ -57,7 +61,7 @@ def _process_inputs(self, X, constraints, bounds, A0): self.A = A0 return a,b,c,d - def fit(self, X, constraints, bounds=None, A0=None, verbose=False): + def fit(self, X, constraints, bounds=None, A0=None): """Learn the ITML model. Parameters @@ -71,6 +75,7 @@ def fit(self, X, constraints, bounds=None, A0=None, verbose=False): A0 : (d x d) matrix, optional initial regularization matrix, defaults to identity """ + verbose = self.params['verbose'] a,b,c,d = self._process_inputs(X, constraints, bounds, A0) gamma = self.params['gamma'] conv_thresh = self.params['convergence_threshold'] @@ -121,14 +126,6 @@ def fit(self, X, constraints, bounds=None, A0=None, verbose=False): def metric(self): return self.A - @classmethod - def prepare_constraints(self, labels, num_points, num_constraints): - ac,bd = np.random.randint(num_points, size=(2,num_constraints)) - pos = labels[ac] == labels[bd] - a,c = ac[pos], ac[~pos] - b,d = bd[pos], bd[~pos] - return a,b,c,d - # hack around lack of axis kwarg in older numpy versions try: np.linalg.norm([[4]], axis=1) @@ -138,3 +135,46 @@ def _vector_norm(X): else: def _vector_norm(X): return np.linalg.norm(X, axis=1) + + +class ITML_Supervised(ITML): + """Information Theoretic Metric Learning (ITML)""" + def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, num_constraints=None, + bounds=None, A0=None, verbose=False): + """Initialize the learner. + + Parameters + ---------- + gamma : float, optional + value for slack variables + max_iters : int, optional + convergence_threshold : float, optional + num_constraints: int, needed for .fit() + verbose : bool, optional + if True, prints information while learning + """ + ITML.__init__(self, gamma=gamma, max_iters=max_iters, + convergence_threshold=convergence_threshold, verbose=verbose) + self.params.update({ + 'num_constraints': num_constraints, + 'bounds': bounds, + 'A0': A0, + }) + + def fit(self, X, labels): + """Create constraints from labels and learn the ITML model. + Needs num_constraints specified in constructor. + + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + labels : (n) data labels + """ + num_constraints = self.params['num_constraints'] + if num_constraints is None: + num_classes = np.unique(labels) + num_constraints = 20*(len(num_classes))**2 + + C = positiveNegativePairs(labels, X.shape[0], num_constraints) + return ITML.fit(self, X, C, bounds=self.params['bounds'], A0=self.params['A0']) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 5552a73b..189b4e83 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -29,7 +29,7 @@ def transformer(self): # slower Python version class python_LMNN(_base_LMNN): def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, - regularization=0.5, convergence_tol=0.001): + regularization=0.5, convergence_tol=0.001, verbose=False): """Initialize the LMNN object k: number of neighbors to consider. (does not include self-edges) @@ -37,7 +37,7 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, """ _base_LMNN.__init__(self, k=k, min_iter=min_iter, max_iter=max_iter, learn_rate=learn_rate, regularization=regularization, - convergence_tol=convergence_tol) + convergence_tol=convergence_tol, verbose=verbose) def _process_inputs(self, X, labels): num_pts = X.shape[0] @@ -51,8 +51,9 @@ def _process_inputs(self, X, labels): 'not enough class labels for specified k' ' (smallest class has %d)' % required_k) - def fit(self, X, labels, verbose=False): + def fit(self, X, labels): k = self.params['k'] + verbose = self.params['verbose'] reg = self.params['regularization'] learn_rate = self.params['learn_rate'] convergence_tol = self.params['convergence_tol'] @@ -236,12 +237,12 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): class LMNN(_base_LMNN): def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, - regularization=0.5, convergence_tol=0.001, use_pca=True): + regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False): _base_LMNN.__init__(self, k=k, min_iter=min_iter, max_iter=max_iter, learn_rate=learn_rate, regularization=regularization, - convergence_tol=convergence_tol, use_pca=use_pca) + convergence_tol=convergence_tol, use_pca=use_pca, verbose=verbose) - def fit(self, X, labels, verbose=False): + def fit(self, X, labels): self.X = X self.L = np.eye(X.shape[1]) labels = MulticlassLabels(labels.astype(np.float64)) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 108bd064..f3bf9738 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -13,20 +13,24 @@ from random import choice from six.moves import xrange from .base_metric import BaseMetricLearner +from .constraints import relativeQuadruplets class LSML(BaseMetricLearner): - def __init__(self, tol=1e-3, max_iter=1000): + def __init__(self, tol=1e-3, max_iter=1000, verbose=False): """Initialize the learner. Parameters ---------- tol : float, optional max_iter : int, optional + verbose : bool, optional + if True, prints information while learning """ self.params = { 'tol': tol, 'max_iter': max_iter, + 'verbose': verbose, } def _prepare_inputs(self, X, constraints, weights, prior): @@ -46,7 +50,7 @@ def _prepare_inputs(self, X, constraints, weights, prior): def metric(self): return self.M - def fit(self, X, constraints, weights=None, prior=None, verbose=False): + def fit(self, X, constraints, weights=None, prior=None): """Learn the LSML model. Parameters @@ -59,9 +63,8 @@ def fit(self, X, constraints, weights=None, prior=None, verbose=False): scale factor for each constraint prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] - verbose : bool, optional - if True, prints information while learning """ + verbose = self.params['verbose'] self._prepare_inputs(X, constraints, weights, prior) prior_inv = scipy.linalg.inv(self.M) s_best = self._total_loss(self.M, prior_inv) @@ -93,7 +96,8 @@ def fit(self, X, constraints, weights=None, prior=None, verbose=False): break self.M = M_best else: - print("Didn't converge after", it, "iterations. Final loss:", s_best) + if verbose: + print("Didn't converge after", it, "iterations. Final loss:", s_best) return self def _comparison_loss(self, metric): @@ -119,18 +123,47 @@ def _gradient(self, metric, prior_inv): (1-np.sqrt(dab/dcd))*np.outer(vcd, vcd)) return dMetric - @classmethod - def prepare_constraints(cls, labels, num_constraints): - C = np.empty((num_constraints,4), dtype=int) - a, c = np.random.randint(len(labels), size=(2,num_constraints)) - for i,(al,cl) in enumerate(zip(labels[a],labels[c])): - C[i,1] = choice(np.nonzero(labels == al)[0]) - C[i,3] = choice(np.nonzero(labels != cl)[0]) - C[:,0] = a - C[:,2] = c - return C - - def _regularization_loss(metric, prior_inv): sign, logdet = np.linalg.slogdet(metric) return np.sum(metric * prior_inv) - sign * logdet + +class LSML_Supervised(LSML): + def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None, weights=None, verbose=False): + """Initialize the learner. + + Parameters + ---------- + tol : float, optional + max_iter : int, optional + prior : (d x d) matrix, optional + guess at a metric [default: covariance(X)] + num_constraints: int, needed for .fit() + weights : (m,) array of floats, optional + scale factor for each constraint + verbose : bool, optional + if True, prints information while learning + """ + LSML.__init__(self, tol=tol, max_iter=max_iter, verbose=verbose) + self.params.update({ + 'prior': prior, + 'num_constraints': num_constraints, + 'weights': weights, + }) + + def fit(self, X, labels): + """Create constraints from labels and learn the LSML model. + Needs num_constraints specified in constructor. + + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + labels : (n) data labels + """ + num_constraints = self.params['num_constraints'] + if num_constraints is None: + num_classes = np.unique(labels) + num_constraints = 20*(len(num_classes))**2 + + C = relativeQuadruplets(labels, num_constraints) + return LSML.fit(self, X, C, weights=self.params['weights'], prior=self.params['prior']) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 818144fb..d76ef21a 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -15,6 +15,7 @@ import random from six.moves import xrange from .base_metric import BaseMetricLearner +from .constraints import chunks class RCA(BaseMetricLearner): @@ -26,6 +27,9 @@ def __init__(self, dim=None): ---------- dim : int, optional embedding dimension (default: original dimension of data) + num_chunks: int, optional + chunk_size: int, optional + seed: int, optional """ self.params = { 'dim': dim, @@ -88,30 +92,44 @@ def fit(self, data, chunks): else: self._transformer = _inv_sqrtm(inner_cov).T - @classmethod - def prepare_constraints(cls, Y, num_chunks=100, chunk_size=2, seed=None): - random.seed(seed) - chunks = -np.ones_like(Y, dtype=int) - uniq, lookup = np.unique(Y, return_inverse=True) - all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] - idx = 0 - while idx < num_chunks and all_inds: - c = random.randint(0, len(all_inds)-1) - inds = all_inds[c] - if len(inds) < chunk_size: - del all_inds[c] - continue - ii = random.sample(inds, chunk_size) - inds.difference_update(ii) - chunks[ii] = idx - idx += 1 - if idx < num_chunks: - raise ValueError('Unable to make %d chunks of %d examples each' % - (num_chunks, chunk_size)) - return chunks - + return self def _inv_sqrtm(x): '''Computes x^(-1/2)''' vals, vecs = np.linalg.eigh(x) return (vecs / np.sqrt(vals)).dot(vecs.T) + + +class RCA_Supervised(RCA): + """Relevant Components Analysis (RCA)""" + def __init__(self, dim=None, num_chunks=None, chunk_size=None, seed=None): + """Initialize the learner. + + Parameters + ---------- + dim : int, optional + embedding dimension (default: original dimension of data) + num_chunks: int, optional + chunk_size: int, optional + seed: int, optional + """ + # @TODO: remove seed from param. See @TODO in constraints/chunks + RCA.__init__(self, dim=dim) + self.params.update({ + 'num_chunks': 100 if num_chunks is None else num_chunks, + 'chunk_size': 2 if chunk_size is None else chunk_size, + 'seed': seed, + }) + + def fit(self, X, labels): + """Create constraints from labels and learn the LSML model. + Needs num_constraints specified in constructor. + + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + labels : (n) data labels + """ + C = chunks(labels, self.params['num_chunks'], self.params['chunk_size'], self.params['seed']) + return RCA.fit(self, X, C) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 5794bcfc..c99f8214 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -15,19 +15,23 @@ from sklearn.covariance import graph_lasso from sklearn.utils.extmath import pinvh from .base_metric import BaseMetricLearner +from .constraints import adjacencyMatrix class SDML(BaseMetricLearner): - def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True): + def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose=False): ''' balance_param: trade off between sparsity and M0 prior sparsity_param: trade off between optimizer and sparseness (see graph_lasso) use_cov: controls prior matrix, will use the identity if use_cov=False + verbose : bool, optional + if True, prints information while learning ''' self.params = { 'balance_param': balance_param, 'sparsity_param': sparsity_param, 'use_cov': use_cov, + 'verbose': verbose, } def _prepare_inputs(self, X, W): @@ -43,7 +47,7 @@ def _prepare_inputs(self, X, W): def metric(self): return self.M - def fit(self, X, W, verbose=False): + def fit(self, X, W): """ X: data matrix, (n x d) W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. @@ -54,20 +58,36 @@ def fit(self, X, W, verbose=False): # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.params['sparsity_param'], - verbose=verbose) + verbose=self.params['verbose']) return self - @classmethod - def prepare_constraints(self, labels, num_points, num_constraints): - a, c = np.random.randint(len(labels), size=(2,num_constraints)) - b, d = np.empty((2, num_constraints), dtype=int) - for i,(al,cl) in enumerate(zip(labels[a],labels[c])): - b[i] = choice(np.nonzero(labels == al)[0]) - d[i] = choice(np.nonzero(labels != cl)[0]) - W = np.zeros((num_points,num_points)) - W[a,b] = 1 - W[c,d] = -1 - # make W symmetric - W[b,a] = 1 - W[d,c] = -1 - return W +class SDML_Supervised(SDML): + def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, num_constraints=None, verbose=False): + ''' + balance_param: trade off between sparsity and M0 prior + sparsity_param: trade off between optimizer and sparseness (see graph_lasso) + use_cov: controls prior matrix, will use the identity if use_cov=False + num_constraints: int, needed for .fit() + verbose : bool, optional + if True, prints information while learning + ''' + SDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, use_cov=use_cov, verbose=verbose) + self.params['num_constraints'] = num_constraints + + def fit(self, X, labels): + """Create constraints from labels and learn the SDML model. + Needs num_constraints specified in constructor. + + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + labels : (n) data labels + """ + num_constraints = self.params['num_constraints'] + if num_constraints is None: + num_classes = np.unique(labels) + num_constraints = 20*(len(num_classes))**2 + + W = adjacencyMatrix(labels, X.shape[0], num_constraints) + return SDML.fit(self, X, W) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index d431db8d..5cc1e56c 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,7 +6,8 @@ from sklearn.datasets import load_iris from numpy.testing import assert_array_almost_equal -from metric_learn import LSML, ITML, LMNN, SDML, NCA, LFDA, RCA +from metric_learn import LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised +from metric_learn import LMNN, NCA, LFDA # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -35,8 +36,7 @@ class TestLSML(MetricTestCase): def test_iris(self): num_constraints = 200 - C = LSML.prepare_constraints(self.iris_labels, num_constraints) - lsml = LSML().fit(self.iris_points, C, verbose=False) + lsml = LSML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels) csep = class_separation(lsml.transform(), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible @@ -46,9 +46,7 @@ class TestITML(MetricTestCase): def test_iris(self): num_constraints = 200 - n = self.iris_points.shape[0] - C = ITML.prepare_constraints(self.iris_labels, n, num_constraints) - itml = ITML().fit(self.iris_points, C, verbose=False) + itml = ITML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels) csep = class_separation(itml.transform(), self.iris_labels) self.assertLess(csep, 0.4) # it's not great @@ -60,8 +58,8 @@ def test_iris(self): # Test both impls, if available. for LMNN_cls in set((LMNN, python_LMNN)): - lmnn = LMNN_cls(k=k, learn_rate=1e-6) - lmnn.fit(self.iris_points, self.iris_labels, verbose=False) + lmnn = LMNN_cls(k=k, learn_rate=1e-6, verbose=False) + lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(), self.iris_labels) self.assertLess(csep, 0.25) @@ -71,17 +69,13 @@ class TestSDML(MetricTestCase): def test_iris(self): num_constraints = 1500 - n = self.iris_points.shape[0] # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! np.random.seed(5555) - W = SDML.prepare_constraints(self.iris_labels, n, num_constraints) - # Test sparse graph inputs. - for graph in ((W, scipy.sparse.csr_matrix(W))): - sdml = SDML().fit(self.iris_points, graph) - csep = class_separation(sdml.transform(), self.iris_labels) - self.assertLess(csep, 0.25) + sdml = SDML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels) + csep = class_separation(sdml.transform(), self.iris_labels) + self.assertLess(csep, 0.25) class TestNCA(MetricTestCase): @@ -109,10 +103,8 @@ def test_iris(self): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA(dim=2) - chunks = RCA.prepare_constraints(self.iris_labels, num_chunks=30, - chunk_size=2, seed=1234) - rca.fit(self.iris_points, chunks) + rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2, seed=1234) + rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) From d48e4e7716f6616f062eeff18998df4cbbdc3ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C5=A0vec?= Date: Tue, 21 Jun 2016 21:27:07 +0200 Subject: [PATCH 005/210] Simple Covariance metric-learner (#21) * Simple Covariance metric-learner * Updated docstring --- metric_learn/__init__.py | 1 + metric_learn/covariance.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 metric_learn/covariance.py diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index 638d6d4d..21b03dc3 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -8,3 +8,4 @@ from .lfda import LFDA from .rca import RCA, RCA_Supervised from .constraints import adjacencyMatrix, positiveNegativePairs, relativeQuadruplets, chunks +from .covariance import Covariance diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py new file mode 100644 index 00000000..68e6d6ce --- /dev/null +++ b/metric_learn/covariance.py @@ -0,0 +1,29 @@ +""" +Covariance metric (baseline method) + +This method does not "learn" anything, only calculates covariance matrix. +Inverse of this covariance matrix is then used in Mahalanobis distance. + +This is a simple baseline method first introduced in +On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 +""" + +from __future__ import absolute_import +import numpy as np +from .base_metric import BaseMetricLearner + + +class Covariance(BaseMetricLearner): + def __init__(self): + self.params = {} + + def metric(self): + return self.M + + def fit(self, X, y=None): + """ + X: data matrix, (n x d) + y: labels (optional) + """ + self.M = np.cov(X.T) + return self From 038e8dd68995ae4c783e59723a567243c6812ce7 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Tue, 21 Jun 2016 15:51:35 -0400 Subject: [PATCH 006/210] Style fixes, docstring tweaks, constraint renaming --- .gitignore | 1 + metric_learn/__init__.py | 3 +-- metric_learn/constraints.py | 16 ++++++++----- metric_learn/covariance.py | 6 ++--- metric_learn/itml.py | 19 +++++++++------- metric_learn/lsml.py | 14 +++++++----- metric_learn/rca.py | 12 +++++----- metric_learn/sdml.py | 45 ++++++++++++++++++++----------------- 8 files changed, 66 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index a2049825..32ed7270 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ build/ dist/ *.egg-info +.coverage diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index 21b03dc3..fd3205a6 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from .covariance import Covariance from .itml import ITML, ITML_Supervised from .lmnn import LMNN from .lsml import LSML, LSML_Supervised @@ -7,5 +8,3 @@ from .nca import NCA from .lfda import LFDA from .rca import RCA, RCA_Supervised -from .constraints import adjacencyMatrix, positiveNegativePairs, relativeQuadruplets, chunks -from .covariance import Covariance diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 58a2768a..ad1660c1 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -1,5 +1,7 @@ -""" Helper class that can generate different types of constraints from supervised data labels.""" - +""" +Helper module for generating different types of constraints +from supervised data labels. +""" import numpy as np import random from six.moves import xrange @@ -7,7 +9,8 @@ # @TODO: consider creating a stateful class # https://github.com/all-umass/metric-learn/pull/19#discussion_r67386226 -def adjacencyMatrix(labels, num_points, num_constraints): + +def adjacency_matrix(labels, num_points, num_constraints): a, c = np.random.randint(len(labels), size=(2,num_constraints)) b, d = np.empty((2, num_constraints), dtype=int) for i,(al,cl) in enumerate(zip(labels[a],labels[c])): @@ -21,14 +24,16 @@ def adjacencyMatrix(labels, num_points, num_constraints): W[d,c] = -1 return W -def positiveNegativePairs(labels, num_points, num_constraints): + +def positive_negative_pairs(labels, num_points, num_constraints): ac,bd = np.random.randint(num_points, size=(2,num_constraints)) pos = labels[ac] == labels[bd] a,c = ac[pos], ac[~pos] b,d = bd[pos], bd[~pos] return a,b,c,d -def relativeQuadruplets(labels, num_constraints): + +def relative_quadruplets(labels, num_constraints): C = np.empty((num_constraints,4), dtype=int) a, c = np.random.randint(len(labels), size=(2,num_constraints)) for i,(al,cl) in enumerate(zip(labels[a],labels[c])): @@ -38,6 +43,7 @@ def relativeQuadruplets(labels, num_constraints): C[:,2] = c return C + def chunks(Y, num_chunks=100, chunk_size=2, seed=None): # @TODO: remove seed from params and use numpy RandomState # https://github.com/all-umass/metric-learn/pull/19#discussion_r67386666 diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 68e6d6ce..70956caa 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -1,8 +1,8 @@ """ Covariance metric (baseline method) -This method does not "learn" anything, only calculates covariance matrix. -Inverse of this covariance matrix is then used in Mahalanobis distance. +This method does not "learn" anything, rather it calculates +the covariance matrix of the input data. This is a simple baseline method first introduced in On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 @@ -23,7 +23,7 @@ def metric(self): def fit(self, X, y=None): """ X: data matrix, (n x d) - y: labels (optional) + y: unused, optional """ self.M = np.cov(X.T) return self diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 95636a9a..05155925 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -15,13 +15,15 @@ import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances + +from . import constraints from .base_metric import BaseMetricLearner -from .constraints import positiveNegativePairs class ITML(BaseMetricLearner): """Information Theoretic Metric Learning (ITML)""" - def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, verbose=False): + def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, + verbose=False): """Initialize the learner. Parameters @@ -139,8 +141,8 @@ def _vector_norm(X): class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" - def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, num_constraints=None, - bounds=None, A0=None, verbose=False): + def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, + num_constraints=None, bounds=None, A0=None, verbose=False): """Initialize the learner. Parameters @@ -153,8 +155,8 @@ def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, num_con verbose : bool, optional if True, prints information while learning """ - ITML.__init__(self, gamma=gamma, max_iters=max_iters, - convergence_threshold=convergence_threshold, verbose=verbose) + ITML.__init__(self, gamma=gamma, max_iters=max_iters, + convergence_threshold=convergence_threshold, verbose=verbose) self.params.update({ 'num_constraints': num_constraints, 'bounds': bounds, @@ -176,5 +178,6 @@ def fit(self, X, labels): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - C = positiveNegativePairs(labels, X.shape[0], num_constraints) - return ITML.fit(self, X, C, bounds=self.params['bounds'], A0=self.params['A0']) + C = constraints.positive_negative_pairs(labels, X.shape[0], num_constraints) + return ITML.fit(self, X, C, bounds=self.params['bounds'], + A0=self.params['A0']) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index f3bf9738..72bdef02 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -10,10 +10,10 @@ from __future__ import print_function, absolute_import import numpy as np import scipy.linalg -from random import choice from six.moves import xrange + +from . import constraints from .base_metric import BaseMetricLearner -from .constraints import relativeQuadruplets class LSML(BaseMetricLearner): @@ -123,12 +123,15 @@ def _gradient(self, metric, prior_inv): (1-np.sqrt(dab/dcd))*np.outer(vcd, vcd)) return dMetric + def _regularization_loss(metric, prior_inv): sign, logdet = np.linalg.slogdet(metric) return np.sum(metric * prior_inv) - sign * logdet + class LSML_Supervised(LSML): - def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None, weights=None, verbose=False): + def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None, + weights=None, verbose=False): """Initialize the learner. Parameters @@ -165,5 +168,6 @@ def fit(self, X, labels): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - C = relativeQuadruplets(labels, num_constraints) - return LSML.fit(self, X, C, weights=self.params['weights'], prior=self.params['prior']) + C = constraints.relative_quadruplets(labels, num_constraints) + return LSML.fit(self, X, C, weights=self.params['weights'], + prior=self.params['prior']) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index d76ef21a..e7010f59 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -12,10 +12,10 @@ from __future__ import absolute_import import numpy as np -import random from six.moves import xrange + +from . import constraints from .base_metric import BaseMetricLearner -from .constraints import chunks class RCA(BaseMetricLearner): @@ -27,9 +27,6 @@ def __init__(self, dim=None): ---------- dim : int, optional embedding dimension (default: original dimension of data) - num_chunks: int, optional - chunk_size: int, optional - seed: int, optional """ self.params = { 'dim': dim, @@ -94,6 +91,7 @@ def fit(self, data, chunks): return self + def _inv_sqrtm(x): '''Computes x^(-1/2)''' vals, vecs = np.linalg.eigh(x) @@ -101,7 +99,6 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): - """Relevant Components Analysis (RCA)""" def __init__(self, dim=None, num_chunks=None, chunk_size=None, seed=None): """Initialize the learner. @@ -131,5 +128,6 @@ def fit(self, X, labels): each row corresponds to a single instance labels : (n) data labels """ - C = chunks(labels, self.params['num_chunks'], self.params['chunk_size'], self.params['seed']) + C = constraints.chunks(labels, self.params['num_chunks'], + self.params['chunk_size'], self.params['seed']) return RCA.fit(self, X, C) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index c99f8214..efaaf6b2 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -10,20 +10,24 @@ from __future__ import absolute_import import numpy as np -from random import choice from scipy.sparse.csgraph import laplacian from sklearn.covariance import graph_lasso from sklearn.utils.extmath import pinvh + +from . import constraints from .base_metric import BaseMetricLearner -from .constraints import adjacencyMatrix class SDML(BaseMetricLearner): - def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose=False): + def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, + verbose=False): ''' - balance_param: trade off between sparsity and M0 prior - sparsity_param: trade off between optimizer and sparseness (see graph_lasso) - use_cov: controls prior matrix, will use the identity if use_cov=False + balance_param: float, optional + trade off between sparsity and M0 prior + sparsity_param: float, optional + trade off between optimizer and sparseness (see graph_lasso) + use_cov: bool, optional + controls prior matrix, will use the identity if use_cov=False verbose : bool, optional if True, prints information while learning ''' @@ -50,6 +54,7 @@ def metric(self): def fit(self, X, W): """ X: data matrix, (n x d) + each row corresponds to a single instance W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) @@ -61,33 +66,33 @@ def fit(self, X, W): verbose=self.params['verbose']) return self + class SDML_Supervised(SDML): - def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, num_constraints=None, verbose=False): - ''' - balance_param: trade off between sparsity and M0 prior - sparsity_param: trade off between optimizer and sparseness (see graph_lasso) - use_cov: controls prior matrix, will use the identity if use_cov=False - num_constraints: int, needed for .fit() - verbose : bool, optional - if True, prints information while learning - ''' - SDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, use_cov=use_cov, verbose=verbose) + def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, + num_constraints=None, verbose=False): + SDML.__init__(self, balance_param=balance_param, + sparsity_param=sparsity_param, use_cov=use_cov, + verbose=verbose) self.params['num_constraints'] = num_constraints + __init__.__doc__ = ( + SDML.__init__.__doc__ + + 'num_constraints: int, optional\n' + ' number of constraints to generate') + def fit(self, X, labels): """Create constraints from labels and learn the SDML model. - Needs num_constraints specified in constructor. Parameters ---------- - X : (n x d) data matrix + X: data matrix, (n x d) each row corresponds to a single instance - labels : (n) data labels + labels: data labels, (n,) array-like """ num_constraints = self.params['num_constraints'] if num_constraints is None: num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - W = adjacencyMatrix(labels, X.shape[0], num_constraints) + W = constraints.adjacency_matrix(labels, X.shape[0], num_constraints) return SDML.fit(self, X, W) From e3066f3a4a43d5b620375511dce8b0034051bd8e Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 13 Jul 2016 13:53:01 -0400 Subject: [PATCH 007/210] Fixing sandwich example --- examples/sandwich.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/sandwich.py b/examples/sandwich.py index 3f12618c..8c987078 100644 --- a/examples/sandwich.py +++ b/examples/sandwich.py @@ -8,6 +8,7 @@ from sklearn.metrics import pairwise_distances from sklearn.neighbors import NearestNeighbors +import metric_learn.constraints as C from metric_learn import ITML, LMNN, LSML, SDML @@ -25,9 +26,9 @@ def sandwich_demo(): num_constraints = 60 mls = [ (LMNN(), (x, y)), - (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))), - (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))), - (LSML(), (x, LSML.prepare_constraints(y, num_constraints))) + (ITML(), (x, C.positive_negative_pairs(y, len(x), num_constraints))), + (SDML(), (x, C.adjacency_matrix(y, len(x), num_constraints))), + (LSML(), (x, C.relative_quadruplets(y, num_constraints))) ] for ax_num, (ml,args) in zip(xrange(3,7), mls): From 7fdf150ec0f48c4085f2a2f4ec943f20adcb80be Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 13 Jul 2016 13:53:11 -0400 Subject: [PATCH 008/210] Style fixes --- test/metric_learn_test.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 5cc1e56c..5cb45892 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1,13 +1,13 @@ import unittest import numpy as np -import scipy.sparse from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.datasets import load_iris from numpy.testing import assert_array_almost_equal -from metric_learn import LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised -from metric_learn import LMNN, NCA, LFDA +from metric_learn import ( + LMNN, NCA, LFDA, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -34,9 +34,8 @@ def setUpClass(self): class TestLSML(MetricTestCase): def test_iris(self): - num_constraints = 200 - - lsml = LSML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels) + lsml = LSML_Supervised(num_constraints=200) + lsml.fit(self.iris_points, self.iris_labels) csep = class_separation(lsml.transform(), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible @@ -44,9 +43,8 @@ def test_iris(self): class TestITML(MetricTestCase): def test_iris(self): - num_constraints = 200 - - itml = ITML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels) + itml = ITML_Supervised(num_constraints=200) + itml.fit(self.iris_points, self.iris_labels) csep = class_separation(itml.transform(), self.iris_labels) self.assertLess(csep, 0.4) # it's not great @@ -54,11 +52,9 @@ def test_iris(self): class TestLMNN(MetricTestCase): def test_iris(self): - k = 5 - # Test both impls, if available. for LMNN_cls in set((LMNN, python_LMNN)): - lmnn = LMNN_cls(k=k, learn_rate=1e-6, verbose=False) + lmnn = LMNN_cls(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(), self.iris_labels) @@ -67,13 +63,12 @@ def test_iris(self): class TestSDML(MetricTestCase): def test_iris(self): - num_constraints = 1500 - # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! np.random.seed(5555) - sdml = SDML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.iris_points, self.iris_labels) csep = class_separation(sdml.transform(), self.iris_labels) self.assertLess(csep, 0.25) From e31fb50e9d0af2e42118474f8042751a6c5692f3 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 13 Jul 2016 15:21:13 -0400 Subject: [PATCH 009/210] More example code cleanup --- examples/sandwich.py | 53 ++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/examples/sandwich.py b/examples/sandwich.py index 8c987078..cecf121e 100644 --- a/examples/sandwich.py +++ b/examples/sandwich.py @@ -3,8 +3,7 @@ """ import numpy as np -from numpy.random import normal -import matplotlib.pyplot as pyplot +from matplotlib import pyplot as plt from sklearn.metrics import pairwise_distances from sklearn.neighbors import NearestNeighbors @@ -15,7 +14,7 @@ def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) - ax = pyplot.subplot(3, 1, 1) # take the whole top row + ax = plt.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') @@ -31,27 +30,26 @@ def sandwich_demo(): (LSML(), (x, C.relative_quadruplets(y, num_constraints))) ] - for ax_num, (ml,args) in zip(xrange(3,7), mls): + for ax_num, (ml,args) in zip(range(3,7), mls): ml.fit(*args) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) - ax = pyplot.subplot(3,2,ax_num) + ax = plt.subplot(3,2,ax_num) plot_sandwich_data(tx, y, ax) plot_neighborhood_graph(tx, ml_knn, y, ax) ax.set_title('%s space' % ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) - pyplot.show() + plt.show() # TODO: use this somewhere def visualize_class_separation(X, labels): - _, (ax1,ax2) = pyplot.subplots(ncols=2) + _, (ax1,ax2) = plt.subplots(ncols=2) label_order = np.argsort(labels) ax1.imshow(pairwise_distances(X[label_order]), interpolation='nearest') ax2.imshow(pairwise_distances(labels[label_order,None]), interpolation='nearest') - pyplot.show() def nearest_neighbors(X, k=5): @@ -67,27 +65,30 @@ def sandwich_data(): num_points = 9 # distance between layers, the points of each class are in a layer dist = 0.7 - # memory pre-allocation - x = np.zeros((num_classes*num_points, 2)) - y = np.zeros(num_classes*num_points, dtype=int) - for i,j in zip(xrange(num_classes), xrange(-num_classes//2,num_classes//2+1)): - for k,l in zip(xrange(num_points), xrange(-num_points//2,num_points//2+1)): - x[i*num_points + k, :] = np.array([normal(l, 0.1), normal(dist*j, 0.1)]) - y[i*num_points:i*num_points + num_points] = i - return x,y - - -def plot_sandwich_data(x, y, axis=pyplot, cols='rbgmky'): - for idx,val in enumerate(np.unique(y)): + + data = np.zeros((num_classes, num_points, 2), dtype=float) + labels = np.zeros((num_classes, num_points), dtype=int) + + x_centers = np.arange(num_points, dtype=float) - num_points / 2 + y_centers = dist * (np.arange(num_classes, dtype=float) - num_classes / 2) + for i, yc in enumerate(y_centers): + for k, xc in enumerate(x_centers): + data[i, k, 0] = np.random.normal(xc, 0.1) + data[i, k, 1] = np.random.normal(yc, 0.1) + labels[i,:] = i + return data.reshape((-1, 2)), labels.ravel() + + +def plot_sandwich_data(x, y, axis=plt, colors='rbgmky'): + for idx, val in enumerate(np.unique(y)): xi = x[y==val] - axis.scatter(xi[:,0], xi[:,1], s=50, facecolors='none',edgecolors=cols[idx]) + axis.scatter(*xi.T, s=50, facecolors='none', edgecolors=colors[idx]) -def plot_neighborhood_graph(x, nn, y, axis=pyplot, cols='rbgmky'): - for i in xrange(x.shape[0]): - xs = [x[i,0], x[nn[i,1], 0]] - ys = [x[i,1], x[nn[i,1], 1]] - axis.plot(xs, ys, cols[y[i]]) +def plot_neighborhood_graph(x, nn, y, axis=plt, colors='rbgmky'): + for i, a in enumerate(x): + b = x[nn[i,1]] + axis.plot((a[0], b[0]), (a[1], b[1]), colors[y[i]]) if __name__ == '__main__': From ce5f2384bbc999511ec9497c8d34ec19aa81d238 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 13 Jul 2016 17:21:36 -0400 Subject: [PATCH 010/210] Converting constraints to a nice object A big refactor, but I think it makes things cleaner. --- examples/sandwich.py | 24 ++++--- metric_learn/__init__.py | 1 + metric_learn/constraints.py | 127 +++++++++++++++++++++--------------- metric_learn/covariance.py | 1 + metric_learn/itml.py | 25 +++---- metric_learn/lfda.py | 1 + metric_learn/lmnn.py | 7 +- metric_learn/lsml.py | 33 +++++----- metric_learn/nca.py | 1 + metric_learn/rca.py | 18 ++--- metric_learn/sdml.py | 29 +++++--- test/metric_learn_test.py | 2 +- 12 files changed, 151 insertions(+), 118 deletions(-) diff --git a/examples/sandwich.py b/examples/sandwich.py index cecf121e..34b48a00 100644 --- a/examples/sandwich.py +++ b/examples/sandwich.py @@ -7,8 +7,7 @@ from sklearn.metrics import pairwise_distances from sklearn.neighbors import NearestNeighbors -import metric_learn.constraints as C -from metric_learn import ITML, LMNN, LSML, SDML +from metric_learn import LMNN, ITML_Supervised, LSML_Supervised, SDML_Supervised def sandwich_demo(): @@ -22,22 +21,21 @@ def sandwich_demo(): ax.set_xticks([]) ax.set_yticks([]) - num_constraints = 60 mls = [ - (LMNN(), (x, y)), - (ITML(), (x, C.positive_negative_pairs(y, len(x), num_constraints))), - (SDML(), (x, C.adjacency_matrix(y, len(x), num_constraints))), - (LSML(), (x, C.relative_quadruplets(y, num_constraints))) + LMNN(), + ITML_Supervised(num_constraints=200), + SDML_Supervised(num_constraints=200), + LSML_Supervised(num_constraints=200), ] - for ax_num, (ml,args) in zip(range(3,7), mls): - ml.fit(*args) + for ax_num, ml in enumerate(mls, start=3): + ml.fit(x, y) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) - ax = plt.subplot(3,2,ax_num) - plot_sandwich_data(tx, y, ax) - plot_neighborhood_graph(tx, ml_knn, y, ax) - ax.set_title('%s space' % ml.__class__.__name__) + ax = plt.subplot(3, 2, ax_num) + plot_sandwich_data(tx, y, axis=ax) + plot_neighborhood_graph(tx, ml_knn, y, axis=ax) + ax.set_title(ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) plt.show() diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index fd3205a6..cc60049d 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from .constraints import Constraints from .covariance import Covariance from .itml import ITML, ITML_Supervised from .lmnn import LMNN diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index ad1660c1..0f57b3e8 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -4,65 +4,86 @@ """ import numpy as np import random +import warnings from six.moves import xrange +from scipy.sparse import coo_matrix -# @TODO: consider creating a stateful class -# https://github.com/all-umass/metric-learn/pull/19#discussion_r67386226 +__all__ = ['Constraints'] -def adjacency_matrix(labels, num_points, num_constraints): - a, c = np.random.randint(len(labels), size=(2,num_constraints)) - b, d = np.empty((2, num_constraints), dtype=int) - for i,(al,cl) in enumerate(zip(labels[a],labels[c])): - b[i] = random.choice(np.nonzero(labels == al)[0]) - d[i] = random.choice(np.nonzero(labels != cl)[0]) - W = np.zeros((num_points,num_points)) - W[a,b] = 1 - W[c,d] = -1 - # make W symmetric - W[b,a] = 1 - W[d,c] = -1 - return W +class Constraints(object): + def __init__(self, partial_labels): + '''partial_labels : int arraylike, -1 indicating unknown label''' + partial_labels = np.asanyarray(partial_labels) + self.num_points, = partial_labels.shape + self.known_label_idx, = np.where(partial_labels >= 0) + self.known_labels = partial_labels[self.known_label_idx] + def adjacency_matrix(self, num_constraints): + a, b, c, d = self.positive_negative_pairs(num_constraints) + row = np.concatenate((a, c)) + col = np.concatenate((b, d)) + data = np.ones_like(row, dtype=int) + data[len(a):] = -1 + adj = coo_matrix((data, (row, col)), shape=(self.num_points,)*2) + # symmetrize + return adj + adj.T -def positive_negative_pairs(labels, num_points, num_constraints): - ac,bd = np.random.randint(num_points, size=(2,num_constraints)) - pos = labels[ac] == labels[bd] - a,c = ac[pos], ac[~pos] - b,d = bd[pos], bd[~pos] - return a,b,c,d + def positive_negative_pairs(self, num_constraints, same_length=False): + a, b = self._pairs(num_constraints, same_label=True) + c, d = self._pairs(num_constraints, same_label=False) + if same_length and len(a) != len(c): + n = min(len(a), len(c)) + return a[:n], b[:n], c[:n], d[:n] + return a, b, c, d + def _pairs(self, num_constraints, same_label=True, max_iter=10): + num_labels = len(self.known_labels) + ab = set() + it = 0 + while it < max_iter and len(ab) < num_constraints: + nc = num_constraints - len(ab) + for aidx in np.random.randint(num_labels, size=nc): + if same_label: + mask = self.known_labels[aidx] == self.known_labels + mask[aidx] = False # avoid identity pairs + else: + mask = self.known_labels[aidx] != self.known_labels + b_choices, = np.where(mask) + if len(b_choices) > 0: + ab.add((aidx, np.random.choice(b_choices))) + it += 1 + if len(ab) < num_constraints: + warnings.warn("Only generated %d %s constraints (requested %d)" % ( + len(ab), 'positive' if same_label else 'negative', num_constraints)) + ab = np.array(list(ab)[:num_constraints], dtype=int) + return self.known_label_idx[ab.T] -def relative_quadruplets(labels, num_constraints): - C = np.empty((num_constraints,4), dtype=int) - a, c = np.random.randint(len(labels), size=(2,num_constraints)) - for i,(al,cl) in enumerate(zip(labels[a],labels[c])): - C[i,1] = random.choice(np.nonzero(labels == al)[0]) - C[i,3] = random.choice(np.nonzero(labels != cl)[0]) - C[:,0] = a - C[:,2] = c - return C + def chunks(self, num_chunks=100, chunk_size=2): + chunks = -np.ones_like(self.known_label_idx, dtype=int) + uniq, lookup = np.unique(self.known_labels, return_inverse=True) + all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] + idx = 0 + while idx < num_chunks and all_inds: + c = random.randint(0, len(all_inds)-1) + inds = all_inds[c] + if len(inds) < chunk_size: + del all_inds[c] + continue + ii = random.sample(inds, chunk_size) + inds.difference_update(ii) + chunks[ii] = idx + idx += 1 + if idx < num_chunks: + raise ValueError('Unable to make %d chunks of %d examples each' % + (num_chunks, chunk_size)) + return chunks - -def chunks(Y, num_chunks=100, chunk_size=2, seed=None): - # @TODO: remove seed from params and use numpy RandomState - # https://github.com/all-umass/metric-learn/pull/19#discussion_r67386666 - random.seed(seed) - chunks = -np.ones_like(Y, dtype=int) - uniq, lookup = np.unique(Y, return_inverse=True) - all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] - idx = 0 - while idx < num_chunks and all_inds: - c = random.randint(0, len(all_inds)-1) - inds = all_inds[c] - if len(inds) < chunk_size: - del all_inds[c] - continue - ii = random.sample(inds, chunk_size) - inds.difference_update(ii) - chunks[ii] = idx - idx += 1 - if idx < num_chunks: - raise ValueError('Unable to make %d chunks of %d examples each' % - (num_chunks, chunk_size)) - return chunks + @staticmethod + def random_subset(all_labels, num_preserved=np.inf): + n = len(all_labels) + num_ignored = max(0, n - num_preserved) + idx = np.random.randint(n, size=num_ignored) + partial_labels = np.array(all_labels, copy=True) + partial_labels[idx] = -1 + return Constraints(partial_labels) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 70956caa..541cbfa9 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -10,6 +10,7 @@ from __future__ import absolute_import import numpy as np + from .base_metric import BaseMetricLearner diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 05155925..7f2118bd 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -16,8 +16,8 @@ from six.moves import xrange from sklearn.metrics import pairwise_distances -from . import constraints from .base_metric import BaseMetricLearner +from .constraints import Constraints class ITML(BaseMetricLearner): @@ -70,7 +70,7 @@ def fit(self, X, constraints, bounds=None, A0=None): ---------- X : (n x d) data matrix each row corresponds to a single instance - constraints : tuple of arrays + constraints : 4-tuple of arrays (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) bounds : list (pos,neg) pairs, optional bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg @@ -142,7 +142,8 @@ def _vector_norm(X): class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, - num_constraints=None, bounds=None, A0=None, verbose=False): + num_labeled=np.inf, num_constraints=None, bounds=None, A0=None, + verbose=False): """Initialize the learner. Parameters @@ -151,17 +152,17 @@ def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, value for slack variables max_iters : int, optional convergence_threshold : float, optional - num_constraints: int, needed for .fit() + num_labeled : int, optional + number of labels to preserve for training + num_constraints: int, optional + number of constraints to generate verbose : bool, optional if True, prints information while learning """ ITML.__init__(self, gamma=gamma, max_iters=max_iters, convergence_threshold=convergence_threshold, verbose=verbose) - self.params.update({ - 'num_constraints': num_constraints, - 'bounds': bounds, - 'A0': A0, - }) + self.params.update(num_labeled=num_labeled, num_constraints=num_constraints, + bounds=bounds, A0=A0) def fit(self, X, labels): """Create constraints from labels and learn the ITML model. @@ -178,6 +179,6 @@ def fit(self, X, labels): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - C = constraints.positive_negative_pairs(labels, X.shape[0], num_constraints) - return ITML.fit(self, X, C, bounds=self.params['bounds'], - A0=self.params['A0']) + c = Constraints.random_subset(labels, self.params['num_labeled']) + return ITML.fit(self, X, c.positive_negative_pairs(num_constraints), + bounds=self.params['bounds'], A0=self.params['A0']) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index a5aec378..097379de 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -15,6 +15,7 @@ import scipy from six.moves import xrange from sklearn.metrics import pairwise_distances + from .base_metric import BaseMetricLearner diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 189b4e83..757d1be5 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -14,6 +14,7 @@ from collections import Counter from six.moves import xrange from sklearn.metrics import pairwise_distances + from .base_metric import BaseMetricLearner @@ -237,10 +238,12 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): class LMNN(_base_LMNN): def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, - regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False): + regularization=0.5, convergence_tol=0.001, use_pca=True, + verbose=False): _base_LMNN.__init__(self, k=k, min_iter=min_iter, max_iter=max_iter, learn_rate=learn_rate, regularization=regularization, - convergence_tol=convergence_tol, use_pca=use_pca, verbose=verbose) + convergence_tol=convergence_tol, use_pca=use_pca, + verbose=verbose) def fit(self, X, labels): self.X = X diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 72bdef02..3a576ab8 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -12,8 +12,8 @@ import scipy.linalg from six.moves import xrange -from . import constraints from .base_metric import BaseMetricLearner +from .constraints import Constraints class LSML(BaseMetricLearner): @@ -35,10 +35,12 @@ def __init__(self, tol=1e-3, max_iter=1000, verbose=False): def _prepare_inputs(self, X, constraints, weights, prior): self.X = X - self.vab = np.diff(X[constraints[:,:2]], axis=1)[:,0] - self.vcd = np.diff(X[constraints[:,2:]], axis=1)[:,0] + a,b,c,d = constraints + self.vab = X[a] - X[b] + self.vcd = X[c] - X[d] + assert self.vab.shape == self.vcd.shape, 'Constraints must have same length' if weights is None: - self.w = np.ones(constraints.shape[0]) + self.w = np.ones(self.vab.shape[0]) else: self.w = weights self.w /= self.w.sum() # weights must sum to 1 @@ -57,7 +59,7 @@ def fit(self, X, constraints, weights=None, prior=None): ---------- X : (n x d) data matrix each row corresponds to a single instance - constraints : (m x 4) matrix of ints + constraints : 4-tuple of arrays (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) weights : (m,) array of floats, optional scale factor for each constraint @@ -130,8 +132,8 @@ def _regularization_loss(metric, prior_inv): class LSML_Supervised(LSML): - def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None, - weights=None, verbose=False): + def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, + num_constraints=None, weights=None, verbose=False): """Initialize the learner. Parameters @@ -140,18 +142,18 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None, max_iter : int, optional prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] - num_constraints: int, needed for .fit() + num_labeled : int, optional + number of labels to preserve for training + num_constraints: int, optional + number of constraints to generate weights : (m,) array of floats, optional scale factor for each constraint verbose : bool, optional if True, prints information while learning """ LSML.__init__(self, tol=tol, max_iter=max_iter, verbose=verbose) - self.params.update({ - 'prior': prior, - 'num_constraints': num_constraints, - 'weights': weights, - }) + self.params.update(prior=prior, num_labeled=num_labeled, + num_constraints=num_constraints, weights=weights) def fit(self, X, labels): """Create constraints from labels and learn the LSML model. @@ -168,6 +170,7 @@ def fit(self, X, labels): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - C = constraints.relative_quadruplets(labels, num_constraints) - return LSML.fit(self, X, C, weights=self.params['weights'], + c = Constraints.random_subset(labels, self.params['num_labeled']) + pairs = c.positive_negative_pairs(num_constraints, same_length=True) + return LSML.fit(self, X, pairs, weights=self.params['weights'], prior=self.params['prior']) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index e2fba0b6..c0616e2f 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -6,6 +6,7 @@ from __future__ import absolute_import import numpy as np from six.moves import xrange + from .base_metric import BaseMetricLearner diff --git a/metric_learn/rca.py b/metric_learn/rca.py index e7010f59..9e91167e 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -14,8 +14,8 @@ import numpy as np from six.moves import xrange -from . import constraints from .base_metric import BaseMetricLearner +from .constraints import Constraints class RCA(BaseMetricLearner): @@ -99,7 +99,7 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): - def __init__(self, dim=None, num_chunks=None, chunk_size=None, seed=None): + def __init__(self, dim=None, num_chunks=100, chunk_size=2): """Initialize the learner. Parameters @@ -108,15 +108,9 @@ def __init__(self, dim=None, num_chunks=None, chunk_size=None, seed=None): embedding dimension (default: original dimension of data) num_chunks: int, optional chunk_size: int, optional - seed: int, optional """ - # @TODO: remove seed from param. See @TODO in constraints/chunks RCA.__init__(self, dim=dim) - self.params.update({ - 'num_chunks': 100 if num_chunks is None else num_chunks, - 'chunk_size': 2 if chunk_size is None else chunk_size, - 'seed': seed, - }) + self.params.update(num_chunks=num_chunks, chunk_size=chunk_size) def fit(self, X, labels): """Create constraints from labels and learn the LSML model. @@ -128,6 +122,6 @@ def fit(self, X, labels): each row corresponds to a single instance labels : (n) data labels """ - C = constraints.chunks(labels, self.params['num_chunks'], - self.params['chunk_size'], self.params['seed']) - return RCA.fit(self, X, C) + chunks = Constraints(labels).chunks(num_chunks=self.params['num_chunks'], + chunk_size=self.params['chunk_size']) + return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index efaaf6b2..c861d10d 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -14,8 +14,8 @@ from sklearn.covariance import graph_lasso from sklearn.utils.extmath import pinvh -from . import constraints from .base_metric import BaseMetricLearner +from .constraints import Constraints class SDML(BaseMetricLearner): @@ -69,16 +69,25 @@ def fit(self, X, W): class SDML_Supervised(SDML): def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - num_constraints=None, verbose=False): + num_labeled=np.inf, num_constraints=None, verbose=False): SDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, use_cov=use_cov, verbose=verbose) - self.params['num_constraints'] = num_constraints - - __init__.__doc__ = ( - SDML.__init__.__doc__ + - 'num_constraints: int, optional\n' - ' number of constraints to generate') + ''' + balance_param: float, optional + trade off between sparsity and M0 prior + sparsity_param: float, optional + trade off between optimizer and sparseness (see graph_lasso) + use_cov: bool, optional + controls prior matrix, will use the identity if use_cov=False + num_labeled : int, optional + number of labels to preserve for training + num_constraints: int, optional + number of constraints to generate + verbose : bool, optional + if True, prints information while learning + ''' + self.params.update(num_labeled=num_labeled, num_constraints=num_constraints) def fit(self, X, labels): """Create constraints from labels and learn the SDML model. @@ -94,5 +103,5 @@ def fit(self, X, labels): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - W = constraints.adjacency_matrix(labels, X.shape[0], num_constraints) - return SDML.fit(self, X, W) + c = Constraints.random_subset(labels, self.params['num_labeled']) + return SDML.fit(self, X, c.adjacency_matrix(num_constraints)) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 5cb45892..1a745596 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -98,7 +98,7 @@ def test_iris(self): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2, seed=1234) + rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) From 96a8abc7862a9c0bcfb44dc9816b8ac882ef34b5 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 13 Jul 2016 17:38:55 -0400 Subject: [PATCH 011/210] Version bump to 0.3, updating docs --- README.rst | 2 +- doc/index.rst | 3 ++- doc/metric_learn.covariance.rst | 21 +++++++++++++++++++++ doc/metric_learn.itml.rst | 10 +++------- doc/metric_learn.lsml.rst | 8 +++----- doc/metric_learn.rca.rst | 8 +++----- doc/metric_learn.sdml.rst | 8 +++----- metric_learn/sdml.py | 3 ++- setup.py | 6 +++--- 9 files changed, 41 insertions(+), 28 deletions(-) create mode 100644 doc/metric_learn.covariance.rst diff --git a/README.rst b/README.rst index d62d32c0..38c088aa 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ Metric Learning algorithms in Python. **Dependencies** -- Python 2.6+ +- Python 2.7+, 3.4+ - numpy, scipy, scikit-learn - (for running the examples only: matplotlib) diff --git a/doc/index.rst b/doc/index.rst index b9c0c209..df4ed8a6 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,6 +17,7 @@ metric learning algorithms. :caption: Algorithms :maxdepth: 1 + metric_learn.covariance metric_learn.lmnn metric_learn.itml metric_learn.sdml @@ -51,7 +52,7 @@ Alternately, download the source repository and run: **Dependencies** -- Python 2.6+ +- Python 2.7+, 3.4+ - numpy, scipy, scikit-learn - (for running the examples only: matplotlib) diff --git a/doc/metric_learn.covariance.rst b/doc/metric_learn.covariance.rst new file mode 100644 index 00000000..d24229a3 --- /dev/null +++ b/doc/metric_learn.covariance.rst @@ -0,0 +1,21 @@ +Covariance metric (baseline method) +=================================== + +.. automodule:: metric_learn.covariance + :members: + :undoc-members: + :inherited-members: + :show-inheritance: + +Example Code +------------ + +:: + + from metric_learn import Covariance + from sklearn.datasets import load_iris + + iris_data = load_iris() + + cov = Covariance() + x = cov.fit_transform(iris_data['data']) diff --git a/doc/metric_learn.itml.rst b/doc/metric_learn.itml.rst index 6f4cf740..d6fb2221 100644 --- a/doc/metric_learn.itml.rst +++ b/doc/metric_learn.itml.rst @@ -12,19 +12,15 @@ Example Code :: - import numpy as np - from metric_learn import ITML + from metric_learn import ITML_Supervised from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] - itml = ITML() - - num_constraints = 200 - C = ITML.prepare_constraints(Y, X.shape[0], num_constraints) - itml.fit(X, C, verbose=False) + itml = ITML_Supervised(num_constraints=200) + itml.fit(X, Y) References ---------- diff --git a/doc/metric_learn.lsml.rst b/doc/metric_learn.lsml.rst index 4409e562..12be71b8 100644 --- a/doc/metric_learn.lsml.rst +++ b/doc/metric_learn.lsml.rst @@ -12,17 +12,15 @@ Example Code :: - import numpy as np - from metric_learn import LSML + from metric_learn import LSML_Supervised from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] - lsml = LSML() - C = LSML.prepare_constraints(Y, 200) - isml.fit(X, C, verbose=False) + lsml = LSML_Supervised(num_constraints=200) + isml.fit(X, Y) References ---------- diff --git a/doc/metric_learn.rca.rst b/doc/metric_learn.rca.rst index a73c6e7c..2430cd82 100644 --- a/doc/metric_learn.rca.rst +++ b/doc/metric_learn.rca.rst @@ -12,17 +12,15 @@ Example Code :: - import numpy as np - from metric_learn import RCA + from metric_learn import RCA_Supervised from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] - rca = RCA() - C = RCA.prepare_constraints(Y, num_chunks=30, chunk_size=2) - rca.fit(X, C) + rca = RCA_Supervised(num_chunks=30, chunk_size=2) + rca.fit(X, Y) References ------------------ diff --git a/doc/metric_learn.sdml.rst b/doc/metric_learn.sdml.rst index de48fec5..83570483 100644 --- a/doc/metric_learn.sdml.rst +++ b/doc/metric_learn.sdml.rst @@ -12,17 +12,15 @@ Example Code :: - import numpy as np - from metric_learn import SDML + from metric_learn import SDML_Supervised from sklearn.datasets import load_iris iris_data = load_iris() X = iris_data['data'] Y = iris_data['target'] - sdml = SDML() - W = SDML.prepare_constraints(Y, X.shape[0], 1500) - sdml.fit(X, W) + sdml = SDML_Supervised(num_constraints=200) + sdml.fit(X, Y) References ------------------ diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index c861d10d..aba1b9be 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -55,7 +55,8 @@ def fit(self, X, W): """ X: data matrix, (n x d) each row corresponds to a single instance - W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. + W: connectivity graph, (n x n) + +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix diff --git a/setup.py b/setup.py index 9696b78f..2031754a 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from setuptools import setup -version = "0.2.1" +version = "0.3.0" setup(name='metric-learn', version=version, description='Python implementations of metric learning algorithms', @@ -26,12 +26,12 @@ 'six' ], extras_require=dict( - docs=['sphinx', 'numpydoc'], + docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], demo=['matplotlib'], ), test_suite='test', keywords=[ - 'Metric Learning', + 'Metric Learning', 'Large Margin Nearest Neighbor', 'Information Theoretic Metric Learning', 'Sparse Determinant Metric Learning', From ee75561fd2555996ff4bda0dddff3b6a4df08d82 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 13 Jul 2016 17:57:57 -0400 Subject: [PATCH 012/210] Updating version strings [ci skip] --- doc/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 5e3f2cd9..467691ab 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,10 +15,10 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015, CJ Carey and Yuan Tang' +copyright = u'2015-2016, CJ Carey and Yuan Tang' author = u'CJ Carey and Yuan Tang' -version = '0.2.1' -release = '0.2.1' +version = '0.3.0' +release = '0.3.0' language = 'en' exclude_patterns = ['_build'] From bb0a70a9dba97262fb29ed80f57f7625ee30eb91 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 14 Jul 2016 16:10:23 -0400 Subject: [PATCH 013/210] Avoid subtle bugs with in-place ops on views --- metric_learn/lfda.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 097379de..5e8c590a 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -93,10 +93,8 @@ def fit(self, X, Y): tSb -= _sum_outer(X)/n - tSw # symmetrize - tSb += tSb.T - tSb /= 2 - tSw += tSw.T - tSw /= 2 + tSb = (tSb + tSb.T) / 2 + tSw = (tSw + tSw.T) / 2 if self.params['dim'] == d: vals, vecs = scipy.linalg.eigh(tSb, tSw) From 45c0d16aece0290ebaed3a2ef03d934526a3240f Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Tue, 2 Aug 2016 16:32:13 -0500 Subject: [PATCH 014/210] adding attribution for original source code --- metric_learn/itml.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7f2118bd..d0b22239 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -9,6 +9,8 @@ incorporate a prior on the distance function. Unlike some other methods, ITML does not rely on an eigenvalue computation or semi-definite programming. + +Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ """ from __future__ import print_function, absolute_import From ac63da544ea41540b4663fb6795342ea8eea6af8 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 7 Sep 2016 10:58:16 -0500 Subject: [PATCH 015/210] DOC: fixing example code Fixes #24. --- doc/metric_learn.covariance.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/metric_learn.covariance.rst b/doc/metric_learn.covariance.rst index d24229a3..92326cc0 100644 --- a/doc/metric_learn.covariance.rst +++ b/doc/metric_learn.covariance.rst @@ -15,7 +15,7 @@ Example Code from metric_learn import Covariance from sklearn.datasets import load_iris - iris_data = load_iris() + iris = load_iris()['data'] - cov = Covariance() - x = cov.fit_transform(iris_data['data']) + cov = Covariance().fit(iris) + x = cov.transform(iris) From 5e4a2d12591d5551d2552786cf0fdd23bf30bf14 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 7 Sep 2016 12:11:21 -0400 Subject: [PATCH 016/210] Adding test coverage for Covariance And storing the input data used for fit() --- .gitignore | 1 + metric_learn/covariance.py | 1 + test/metric_learn_test.py | 12 +++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 32ed7270..3f029036 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ build/ dist/ *.egg-info .coverage +htmlcov/ diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 541cbfa9..2142b337 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -26,5 +26,6 @@ def fit(self, X, y=None): X: data matrix, (n x d) y: unused, optional """ + self.X = X self.M = np.cov(X.T) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1a745596..2ef97237 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,7 +6,7 @@ from numpy.testing import assert_array_almost_equal from metric_learn import ( - LMNN, NCA, LFDA, + LMNN, NCA, LFDA, Covariance, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -32,6 +32,16 @@ def setUpClass(self): np.random.seed(1234) +class TestCovariance(MetricTestCase): + def test_iris(self): + cov = Covariance() + cov.fit(self.iris_points) + + csep = class_separation(cov.transform(), self.iris_labels) + # deterministic result + self.assertAlmostEqual(csep, 0.73068122) + + class TestLSML(MetricTestCase): def test_iris(self): lsml = LSML_Supervised(num_constraints=200) From 9f602e66c55d9ab3627d556019426686d0a8dcaf Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 15 Sep 2016 20:03:10 +0200 Subject: [PATCH 017/210] Fixed typo (#32) --- metric_learn/rca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 9e91167e..f1ab246f 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -113,7 +113,7 @@ def __init__(self, dim=None, num_chunks=100, chunk_size=2): self.params.update(num_chunks=num_chunks, chunk_size=chunk_size) def fit(self, X, labels): - """Create constraints from labels and learn the LSML model. + """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. Parameters From c74a058c54cbfaee13e3e64bd9af319197de07ad Mon Sep 17 00:00:00 2001 From: Anirud Thyagharajan Date: Thu, 29 Sep 2016 08:05:27 +0530 Subject: [PATCH 018/210] adds code for safe return in case of no impostors for lmnn (#36) adds code for safe return in case of no impostors for lmnn, fixes #17 --- metric_learn/lmnn.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 757d1be5..e580f3ed 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -63,6 +63,9 @@ def fit(self, X, labels): target_neighbors = self._select_targets() impostors = self._find_impostors(target_neighbors[:,-1]) + if len(impostors) == 0: + # L has already been initialized to an identity matrix of requisite shape + return # sum outer products dfG = _sum_outer_products(self.X, target_neighbors.flatten(), @@ -203,6 +206,9 @@ def _find_impostors(self, furthest_neighbors): tmp = np.ravel_multi_index((i,j), shape) i,j = np.unravel_index(np.unique(tmp), shape) impostors.append(np.vstack((in_inds[j], out_inds[i]))) + if len(impostors) == 0: + # No impostors detected + return impostors return np.hstack(impostors) From 890b6a893ba9903d541889acae32a14bc97db368 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 29 Sep 2016 20:44:21 +0200 Subject: [PATCH 019/210] [MRG] Added random_states (#35) --- metric_learn/constraints.py | 32 +++++++++++++++++++------------- metric_learn/itml.py | 5 +++-- metric_learn/lsml.py | 5 +++-- metric_learn/rca.py | 5 +++-- metric_learn/sdml.py | 5 +++-- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 0f57b3e8..ce71ecd2 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -19,8 +19,8 @@ def __init__(self, partial_labels): self.known_label_idx, = np.where(partial_labels >= 0) self.known_labels = partial_labels[self.known_label_idx] - def adjacency_matrix(self, num_constraints): - a, b, c, d = self.positive_negative_pairs(num_constraints) + def adjacency_matrix(self, num_constraints, random_state=np.random): + a, b, c, d = self.positive_negative_pairs(num_constraints, random_state=random_state) row = np.concatenate((a, c)) col = np.concatenate((b, d)) data = np.ones_like(row, dtype=int) @@ -29,21 +29,21 @@ def adjacency_matrix(self, num_constraints): # symmetrize return adj + adj.T - def positive_negative_pairs(self, num_constraints, same_length=False): - a, b = self._pairs(num_constraints, same_label=True) - c, d = self._pairs(num_constraints, same_label=False) + def positive_negative_pairs(self, num_constraints, same_length=False, random_state=np.random): + a, b = self._pairs(num_constraints, same_label=True, random_state=random_state) + c, d = self._pairs(num_constraints, same_label=False, random_state=random_state) if same_length and len(a) != len(c): n = min(len(a), len(c)) return a[:n], b[:n], c[:n], d[:n] return a, b, c, d - def _pairs(self, num_constraints, same_label=True, max_iter=10): + def _pairs(self, num_constraints, same_label=True, max_iter=10, random_state=np.random): num_labels = len(self.known_labels) ab = set() it = 0 while it < max_iter and len(ab) < num_constraints: nc = num_constraints - len(ab) - for aidx in np.random.randint(num_labels, size=nc): + for aidx in random_state.randint(num_labels, size=nc): if same_label: mask = self.known_labels[aidx] == self.known_labels mask[aidx] = False # avoid identity pairs @@ -51,7 +51,7 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10): mask = self.known_labels[aidx] != self.known_labels b_choices, = np.where(mask) if len(b_choices) > 0: - ab.add((aidx, np.random.choice(b_choices))) + ab.add((aidx, random_state.choice(b_choices))) it += 1 if len(ab) < num_constraints: warnings.warn("Only generated %d %s constraints (requested %d)" % ( @@ -59,18 +59,21 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10): ab = np.array(list(ab)[:num_constraints], dtype=int) return self.known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2): + def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): + """ + the random state object to be passed must be a numpy random seed + """ chunks = -np.ones_like(self.known_label_idx, dtype=int) uniq, lookup = np.unique(self.known_labels, return_inverse=True) all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] idx = 0 while idx < num_chunks and all_inds: - c = random.randint(0, len(all_inds)-1) + c = random_state.randint(0, high=len(all_inds)-1) inds = all_inds[c] if len(inds) < chunk_size: del all_inds[c] continue - ii = random.sample(inds, chunk_size) + ii = random_state.choice(list(inds), chunk_size, replace=False) inds.difference_update(ii) chunks[ii] = idx idx += 1 @@ -80,10 +83,13 @@ def chunks(self, num_chunks=100, chunk_size=2): return chunks @staticmethod - def random_subset(all_labels, num_preserved=np.inf): + def random_subset(all_labels, num_preserved=np.inf, random_state=np.random): + """ + the random state object to be passed must be a numpy random seed + """ n = len(all_labels) num_ignored = max(0, n - num_preserved) - idx = np.random.randint(n, size=num_ignored) + idx = random_state.randint(n, size=num_ignored) partial_labels = np.array(all_labels, copy=True) partial_labels[idx] = -1 return Constraints(partial_labels) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index d0b22239..19e5bb71 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -166,7 +166,7 @@ def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, self.params.update(num_labeled=num_labeled, num_constraints=num_constraints, bounds=bounds, A0=A0) - def fit(self, X, labels): + def fit(self, X, labels, random_state=np.random): """Create constraints from labels and learn the ITML model. Needs num_constraints specified in constructor. @@ -175,12 +175,13 @@ def fit(self, X, labels): X : (n x d) data matrix each row corresponds to a single instance labels : (n) data labels + random_state : a numpy random.seed object to fix the random_state if needed. """ num_constraints = self.params['num_constraints'] if num_constraints is None: num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - c = Constraints.random_subset(labels, self.params['num_labeled']) + c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) return ITML.fit(self, X, c.positive_negative_pairs(num_constraints), bounds=self.params['bounds'], A0=self.params['A0']) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 3a576ab8..077cdd5d 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -155,7 +155,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, self.params.update(prior=prior, num_labeled=num_labeled, num_constraints=num_constraints, weights=weights) - def fit(self, X, labels): + def fit(self, X, labels, random_state=np.random): """Create constraints from labels and learn the LSML model. Needs num_constraints specified in constructor. @@ -164,13 +164,14 @@ def fit(self, X, labels): X : (n x d) data matrix each row corresponds to a single instance labels : (n) data labels + random_state : a numpy random.seed object to fix the random_state if needed. """ num_constraints = self.params['num_constraints'] if num_constraints is None: num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - c = Constraints.random_subset(labels, self.params['num_labeled']) + c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) pairs = c.positive_negative_pairs(num_constraints, same_length=True) return LSML.fit(self, X, pairs, weights=self.params['weights'], prior=self.params['prior']) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index f1ab246f..420dac65 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -112,7 +112,7 @@ def __init__(self, dim=None, num_chunks=100, chunk_size=2): RCA.__init__(self, dim=dim) self.params.update(num_chunks=num_chunks, chunk_size=chunk_size) - def fit(self, X, labels): + def fit(self, X, labels, random_state=np.random): """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. @@ -121,7 +121,8 @@ def fit(self, X, labels): X : (n x d) data matrix each row corresponds to a single instance labels : (n) data labels + random_state : a random.seed object to fix the random_state if needed. """ chunks = Constraints(labels).chunks(num_chunks=self.params['num_chunks'], - chunk_size=self.params['chunk_size']) + chunk_size=self.params['chunk_size'], random_state=random_state) return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index aba1b9be..474f2502 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -90,7 +90,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, ''' self.params.update(num_labeled=num_labeled, num_constraints=num_constraints) - def fit(self, X, labels): + def fit(self, X, labels, random_state=np.random): """Create constraints from labels and learn the SDML model. Parameters @@ -98,11 +98,12 @@ def fit(self, X, labels): X: data matrix, (n x d) each row corresponds to a single instance labels: data labels, (n,) array-like + random_state : a numpy random.seed object to fix the random_state if needed. """ num_constraints = self.params['num_constraints'] if num_constraints is None: num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - c = Constraints.random_subset(labels, self.params['num_labeled']) + c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) return SDML.fit(self, X, c.adjacency_matrix(num_constraints)) From c5087d72618e85d7ff74020a64fbceb531bfb50a Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 6 Oct 2016 19:03:15 +0200 Subject: [PATCH 020/210] [MRG] Adding fit_transform (#26) --- metric_learn/base_metric.py | 22 ++++++ metric_learn/itml.py | 2 +- metric_learn/lsml.py | 2 +- metric_learn/sdml.py | 2 +- test/test_fit_transform.py | 132 ++++++++++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 test/test_fit_transform.py diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 5fe2ca14..0978b17b 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -45,6 +45,28 @@ def transform(self, X=None): X = self.X L = self.transformer() return X.dot(L.T) + + def fit_transform(self, *args, **kwargs): + """ + Function calls .fit() and returns the result of .transform() + Essentially, it runs the relevant Metric Learning algorithm with .fit() + and returns the metric-transformed input data. + + Paramters + --------- + + Since all the parameters passed to fit_transform are passed on to + fit(), the parameters to be passed must be noted from the corresponding + Metric Learning algorithm's fit method. + + Returns + ------- + transformed : (n x d) matrix + Input data transformed to the metric space by :math:`XL^{\\top}` + + """ + self.fit(*args, **kwargs) + return self.transform() def get_params(self, deep=False): """Get parameters for this metric learner. diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 19e5bb71..6a6fcf04 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -183,5 +183,5 @@ def fit(self, X, labels, random_state=np.random): num_constraints = 20*(len(num_classes))**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - return ITML.fit(self, X, c.positive_negative_pairs(num_constraints), + return ITML.fit(self, X, c.positive_negative_pairs(num_constraints, random_state=random_state), bounds=self.params['bounds'], A0=self.params['A0']) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 077cdd5d..343c0b7f 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -172,6 +172,6 @@ def fit(self, X, labels, random_state=np.random): num_constraints = 20*(len(num_classes))**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - pairs = c.positive_negative_pairs(num_constraints, same_length=True) + pairs = c.positive_negative_pairs(num_constraints, same_length=True, random_state=random_state) return LSML.fit(self, X, pairs, weights=self.params['weights'], prior=self.params['prior']) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 474f2502..852b00f3 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -106,4 +106,4 @@ def fit(self, X, labels, random_state=np.random): num_constraints = 20*(len(num_classes))**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - return SDML.fit(self, X, c.adjacency_matrix(num_constraints)) + return SDML.fit(self, X, c.adjacency_matrix(num_constraints, random_state=random_state)) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py new file mode 100644 index 00000000..a25511ce --- /dev/null +++ b/test/test_fit_transform.py @@ -0,0 +1,132 @@ +import unittest +import numpy as np +from sklearn.datasets import load_iris +from numpy.testing import assert_array_almost_equal + +from metric_learn import ( + LMNN, NCA, LFDA, Covariance, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + + + +class MetricTestCase(unittest.TestCase): + @classmethod + def setUpClass(self): + # runs once per test class + iris_data = load_iris() + self.iris_points = iris_data['data'] + self.iris_labels = iris_data['target'] + + +class TestCovariance(MetricTestCase): + def test_cov(self): + cov = Covariance() + cov.fit(self.iris_points) + res_1 = cov.transform() + + cov = Covariance() + res_2 = cov.fit_transform(self.iris_points) + # deterministic result + assert_array_almost_equal(res_1, res_2) + + +class TestLSML(MetricTestCase): + def test_lsml(self): + + seed = np.random.RandomState(1234) + lsml = LSML_Supervised(num_constraints=200) + lsml.fit(self.iris_points, self.iris_labels, random_state=seed) + res_1 = lsml.transform() + + seed = np.random.RandomState(1234) + lsml = LSML_Supervised(num_constraints=200) + res_2 = lsml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + +class TestITML(MetricTestCase): + def test_itml(self): + + seed = np.random.RandomState(1234) + itml = ITML_Supervised(num_constraints=200) + itml.fit(self.iris_points, self.iris_labels, random_state=seed) + res_1 = itml.transform() + + seed = np.random.RandomState(1234) + itml = ITML_Supervised(num_constraints=200) + res_2 = itml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + +class TestLMNN(MetricTestCase): + def test_lmnn(self): + + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn.fit(self.iris_points, self.iris_labels) + res_1 = lmnn.transform() + + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + res_2 = lmnn.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestSDML(MetricTestCase): + def test_sdml(self): + + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.iris_points, self.iris_labels, random_state=seed) + res_1 = sdml.transform() + + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + res_2 = sdml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + +class TestNCA(MetricTestCase): + def test_nca(self): + + n = self.iris_points.shape[0] + nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca.fit(self.iris_points, self.iris_labels) + res_1 = nca.transform() + + nca = NCA(max_iter=(100000//n), learning_rate=0.01) + res_2 = nca.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestLFDA(MetricTestCase): + def test_lfda(self): + + lfda = LFDA(k=2, dim=2) + lfda.fit(self.iris_points, self.iris_labels) + res_1 = lfda.transform() + + lfda = LFDA(k=2, dim=2) + res_2 = lfda.fit_transform(self.iris_points, self.iris_labels) + + res_1 = round(res_1[0][0], 3) + res_2 = round(res_2[0][0], 3) + res = (res_1 == res_2 or res_1 == -res_2) + + self.assertTrue(res) + +class TestRCA(MetricTestCase): + def test_rca(self): + + seed = np.random.RandomState(1234) + rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + rca.fit(self.iris_points, self.iris_labels, random_state=seed) + res_1 = rca.transform() + + seed = np.random.RandomState(1234) + rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + res_2 = rca.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + + +if __name__ == '__main__': + unittest.main() From 54b06ae7ec6e7d7ad4974932c51af229f34c13da Mon Sep 17 00:00:00 2001 From: Devashish Deshpande Date: Sat, 29 Oct 2016 00:15:57 +0530 Subject: [PATCH 021/210] Added MLKR algorithm (#28) * Added MLKR algorithm * Addressed initial comments, changed to pdist * addressed 2nd review * Made changes in computeyhat --- metric_learn/__init__.py | 1 + metric_learn/mlkr.py | 132 ++++++++++++++++++++++++++++++++++++++ test/metric_learn_test.py | 9 ++- 3 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 metric_learn/mlkr.py diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index cc60049d..5a7508c0 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -9,3 +9,4 @@ from .nca import NCA from .lfda import LFDA from .rca import RCA, RCA_Supervised +from .mlkr import MLKR diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py new file mode 100644 index 00000000..7c279cc8 --- /dev/null +++ b/metric_learn/mlkr.py @@ -0,0 +1,132 @@ +""" +Metric Learning for Kernel Regression (MLKR), Weinberger et al., + +MLKR is an algorithm for supervised metric learning, which learns a distance +function by directly minimising the leave-one-out regression error. This +algorithm can also be viewed as a supervised variation of PCA and can be used +for dimensionality reduction and high dimensional data visualization. +""" +from __future__ import division +import numpy as np +from six.moves import xrange +from scipy.spatial.distance import pdist, squareform + +from .base_metric import BaseMetricLearner + +class MLKR(BaseMetricLearner): + """Metric Learning for Kernel Regression (MLKR)""" + def __init__(self, A0=None, epsilon=0.01, alpha=0.0001): + """ + MLKR initialization + + Parameters + ---------- + A0: Initialization of matrix A. Defaults to the identity matrix. + epsilon: Step size for gradient descent. + alpha: Stopping criterion for loss function in gradient descent. + """ + self.params = { + "A0": A0, + "epsilon": epsilon, + "alpha": alpha + } + + def _process_inputs(self, X, y): + self.X = np.array(X, copy=False) + y = np.array(y, copy=False) + if X.ndim == 1: + X = X[:, np.newaxis] + if y.ndim == 1: + y = y[:, np.newaxis] + n, d = X.shape + if y.shape[0] != n: + raise ValueError('Data and label lengths mismatch: %d != %d' + % (n, y.shape[0])) + return y, n, d + + def fit(self, X, y): + """ + Fit MLKR model + + Parameters: + ---------- + X : (n x d) array of samples + y : (n) data labels + + Returns: + ------- + self: Instance of self + """ + y, n, d = self._process_inputs(X, y) + if self.params['A0'] is None: + A = np.identity(d) # Initialize A as eye matrix + else: + A = self.params['A0'] + if A.shape != (d, d): + raise ValueError('A0 should be a square matrix of dimension' + ' %d. %s shape was provided' % (d, A.shape)) + cost = np.Inf + # Gradient descent procedure + alpha = self.params['alpha'] + epsilon = self.params['epsilon'] + while cost > alpha: + K = self._computeK(X, A) + yhat = self._computeyhat(y, K) + cost = np.sum(np.square(yhat - y)) + # Compute gradient + sum_i = 0 + for i in xrange(n): + sum_j = 0 + for j in xrange(n): + diffK = (yhat[j] - y[j]) * K[i, j] + x_ij = (X[i, :] - X[j, :])[:, np.newaxis] + x_ijT = x_ij.T + sum_j += diffK * x_ij.dot(x_ijT) + sum_i += (yhat[i] - y[i]) * sum_j + gradient = 4 * A.dot(sum_i) + A -= epsilon * gradient + self._transformer = A + return self + + @staticmethod + def _computeK(X, A): + """ + Internal helper function to compute K matrix. + + Parameters: + ---------- + X: (n x d) array of samples + A: (d x d) 'A' matrix + + Returns: + ------- + K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where + distance is defined as squared L2 norm of (x_i - x_j) + """ + dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) + return np.exp(squareform(-(dist_mat ** 2))) + + @staticmethod + def _computeyhat(y, K): + """ + Internal helper function to compute yhat matrix. + + Parameters: + ---------- + y: (n) data labels + K: (n x n) K matrix + + Returns: + ------- + yhat: (n x 1) yhat matrix + """ + K_mod = np.copy(K) + np.fill_diagonal(K_mod, 0) + numerator = K_mod.dot(y) + denominator = np.sum(K_mod, 1)[:, np.newaxis] + denominator[denominator == 0] = 2.2204e-16 # eps val in octave + yhat = numerator / denominator + return yhat + + def transformer(self): + return self._transformer diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 2ef97237..c56bbb99 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -7,7 +7,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MLKR) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -113,6 +113,13 @@ def test_iris(self): csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) +class TestMLKR(MetricTestCase): + def test_iris(self): + mlkr = MLKR(epsilon=10, alpha=10) # for faster testing + mlkr.fit(self.iris_points, self.iris_labels) + csep = class_separation(mlkr.transform(), self.iris_labels) + self.assertLess(csep, 0.25) + if __name__ == '__main__': unittest.main() From cbd5186bab98af9ea16d5fd7624978c92276b04c Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Fri, 28 Oct 2016 14:46:40 -0400 Subject: [PATCH 022/210] Fixing MLKR implementation --- metric_learn/mlkr.py | 139 ++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 82 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 7c279cc8..13a88e23 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -6,43 +6,62 @@ algorithm can also be viewed as a supervised variation of PCA and can be used for dimensionality reduction and high dimensional data visualization. """ -from __future__ import division +from __future__ import division, print_function import numpy as np -from six.moves import xrange +from scipy.optimize import minimize from scipy.spatial.distance import pdist, squareform +from sklearn.decomposition import PCA from .base_metric import BaseMetricLearner +EPS = np.finfo(float).eps + + class MLKR(BaseMetricLearner): """Metric Learning for Kernel Regression (MLKR)""" - def __init__(self, A0=None, epsilon=0.01, alpha=0.0001): + def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001, + max_iter=1000): """ MLKR initialization Parameters ---------- - A0: Initialization of matrix A. Defaults to the identity matrix. - epsilon: Step size for gradient descent. - alpha: Stopping criterion for loss function in gradient descent. + num_dims: If given, restrict to a num_dims-dimensional transformation. + A0: Initialization of transformation matrix. Defaults to PCA loadings. + epsilon: Step size for congujate gradient descent. + alpha: Stopping criterion for congujate gradient descent. + max_iter: Cap on number of congugate gradient iterations. """ self.params = { "A0": A0, "epsilon": epsilon, - "alpha": alpha + "alpha": alpha, + "max_iter": max_iter, + "num_dims": num_dims, } def _process_inputs(self, X, y): self.X = np.array(X, copy=False) - y = np.array(y, copy=False) + y = np.array(y, copy=False).ravel() if X.ndim == 1: X = X[:, np.newaxis] - if y.ndim == 1: - y = y[:, np.newaxis] n, d = X.shape if y.shape[0] != n: raise ValueError('Data and label lengths mismatch: %d != %d' % (n, y.shape[0])) - return y, n, d + + A = self.params['A0'] + m = self.params['num_dims'] + if m is None: + m = d + if A is None: + # initialize to PCA transformation matrix + # note: not the same as n_components=m ! + A = PCA().fit(X).components_.T[:m] + elif A.shape != (m, d): + raise ValueError('A0 needs shape (%d,%d) but got %s' % ( + m, d, A.shape)) + return y, A def fit(self, X, y): """ @@ -52,81 +71,37 @@ def fit(self, X, y): ---------- X : (n x d) array of samples y : (n) data labels - - Returns: - ------- - self: Instance of self - """ - y, n, d = self._process_inputs(X, y) - if self.params['A0'] is None: - A = np.identity(d) # Initialize A as eye matrix - else: - A = self.params['A0'] - if A.shape != (d, d): - raise ValueError('A0 should be a square matrix of dimension' - ' %d. %s shape was provided' % (d, A.shape)) - cost = np.Inf - # Gradient descent procedure - alpha = self.params['alpha'] - epsilon = self.params['epsilon'] - while cost > alpha: - K = self._computeK(X, A) - yhat = self._computeyhat(y, K) - cost = np.sum(np.square(yhat - y)) - # Compute gradient - sum_i = 0 - for i in xrange(n): - sum_j = 0 - for j in xrange(n): - diffK = (yhat[j] - y[j]) * K[i, j] - x_ij = (X[i, :] - X[j, :])[:, np.newaxis] - x_ijT = x_ij.T - sum_j += diffK * x_ij.dot(x_ijT) - sum_i += (yhat[i] - y[i]) * sum_j - gradient = 4 * A.dot(sum_i) - A -= epsilon * gradient - self._transformer = A - return self - - @staticmethod - def _computeK(X, A): """ - Internal helper function to compute K matrix. + y, A = self._process_inputs(X, y) - Parameters: - ---------- - X: (n x d) array of samples - A: (d x d) 'A' matrix + # note: this line takes (n*n*d) memory! + # for larger datasets, we'll need to compute dX as we go + dX = (X[None] - X[:, None]).reshape((-1, X.shape[1])) - Returns: - ------- - K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where - distance is defined as squared L2 norm of (x_i - x_j) - """ - dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) - return np.exp(squareform(-(dist_mat ** 2))) + res = minimize(_loss, A.ravel(), (X, y, dX), method='CG', jac=True, + tol=self.params['alpha'], + options=dict(maxiter=self.params['max_iter'], + eps=self.params['epsilon'])) + self._transformer = res.x.reshape(A.shape) + return self - @staticmethod - def _computeyhat(y, K): - """ - Internal helper function to compute yhat matrix. + def transformer(self): + return self._transformer - Parameters: - ---------- - y: (n) data labels - K: (n x n) K matrix - Returns: - ------- - yhat: (n x 1) yhat matrix - """ - K_mod = np.copy(K) - np.fill_diagonal(K_mod, 0) - numerator = K_mod.dot(y) - denominator = np.sum(K_mod, 1)[:, np.newaxis] - denominator[denominator == 0] = 2.2204e-16 # eps val in octave - yhat = numerator / denominator - return yhat +def _loss(flatA, X, y, dX): + A = flatA.reshape((-1, X.shape[1])) + dist = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) + K = squareform(np.exp(-dist**2)) + denom = np.maximum(K.sum(axis=0), EPS) + yhat = K.dot(y) / denom + ydiff = yhat - y + cost = (ydiff**2).sum() - def transformer(self): - return self._transformer + # also compute the gradient + np.fill_diagonal(K, 1) + W = 2 * K * (np.outer(ydiff, ydiff) / denom) + # note: this is the part that the matlab impl drops to C for + M = (dX.T * W.ravel()).dot(dX) + grad = 2 * A.dot(M) + return cost, grad.ravel() From c595c5a93169e159f5a177c4951a41b1b48e8164 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Fri, 28 Oct 2016 14:58:42 -0400 Subject: [PATCH 023/210] Refactoring / fixing test cases --- test/metric_learn_test.py | 7 +-- test/test_fit_transform.py | 89 +++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 52 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c56bbb99..5942c840 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,8 +6,8 @@ from numpy.testing import assert_array_almost_equal from metric_learn import ( - LMNN, NCA, LFDA, Covariance, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MLKR) + LMNN, NCA, LFDA, Covariance, MLKR, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -113,9 +113,10 @@ def test_iris(self): csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) + class TestMLKR(MetricTestCase): def test_iris(self): - mlkr = MLKR(epsilon=10, alpha=10) # for faster testing + mlkr = MLKR() mlkr.fit(self.iris_points, self.iris_labels) csep = class_separation(mlkr.transform(), self.iris_labels) self.assertLess(csep, 0.25) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index a25511ce..442a93df 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -4,126 +4,117 @@ from numpy.testing import assert_array_almost_equal from metric_learn import ( - LMNN, NCA, LFDA, Covariance, + LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) - -class MetricTestCase(unittest.TestCase): +class TestFitTransform(unittest.TestCase): @classmethod def setUpClass(self): # runs once per test class iris_data = load_iris() - self.iris_points = iris_data['data'] - self.iris_labels = iris_data['target'] - + self.X = iris_data['data'] + self.y = iris_data['target'] -class TestCovariance(MetricTestCase): def test_cov(self): cov = Covariance() - cov.fit(self.iris_points) + cov.fit(self.X) res_1 = cov.transform() cov = Covariance() - res_2 = cov.fit_transform(self.iris_points) + res_2 = cov.fit_transform(self.X) # deterministic result assert_array_almost_equal(res_1, res_2) - -class TestLSML(MetricTestCase): - def test_lsml(self): - + def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.iris_points, self.iris_labels, random_state=seed) + lsml.fit(self.X, self.y, random_state=seed) res_1 = lsml.transform() seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) - res_2 = lsml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) - - assert_array_almost_equal(res_1, res_2) + res_2 = lsml.fit_transform(self.X, self.y, random_state=seed) -class TestITML(MetricTestCase): - def test_itml(self): + assert_array_almost_equal(res_1, res_2) + def test_itml_supervised(self): seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) - itml.fit(self.iris_points, self.iris_labels, random_state=seed) + itml.fit(self.X, self.y, random_state=seed) res_1 = itml.transform() seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) - res_2 = itml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + res_2 = itml.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) -class TestLMNN(MetricTestCase): def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) - lmnn.fit(self.iris_points, self.iris_labels) + lmnn.fit(self.X, self.y) res_1 = lmnn.transform() lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) - res_2 = lmnn.fit_transform(self.iris_points, self.iris_labels) + res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) -class TestSDML(MetricTestCase): - def test_sdml(self): - + def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.iris_points, self.iris_labels, random_state=seed) + sdml.fit(self.X, self.y, random_state=seed) res_1 = sdml.transform() seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) - res_2 = sdml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) -class TestNCA(MetricTestCase): def test_nca(self): - - n = self.iris_points.shape[0] + n = self.X.shape[0] nca = NCA(max_iter=(100000//n), learning_rate=0.01) - nca.fit(self.iris_points, self.iris_labels) + nca.fit(self.X, self.y) res_1 = nca.transform() nca = NCA(max_iter=(100000//n), learning_rate=0.01) - res_2 = nca.fit_transform(self.iris_points, self.iris_labels) + res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) -class TestLFDA(MetricTestCase): def test_lfda(self): - lfda = LFDA(k=2, dim=2) - lfda.fit(self.iris_points, self.iris_labels) + lfda.fit(self.X, self.y) res_1 = lfda.transform() lfda = LFDA(k=2, dim=2) - res_2 = lfda.fit_transform(self.iris_points, self.iris_labels) + res_2 = lfda.fit_transform(self.X, self.y) - res_1 = round(res_1[0][0], 3) - res_2 = round(res_2[0][0], 3) - res = (res_1 == res_2 or res_1 == -res_2) - - self.assertTrue(res) - -class TestRCA(MetricTestCase): - def test_rca(self): + # signs may be flipped, that's okay + if res_1[0,0] != res_2[0,0]: + res_2 *= -1 + assert_array_almost_equal(res_1, res_2) + def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) - rca.fit(self.iris_points, self.iris_labels, random_state=seed) + rca.fit(self.X, self.y, random_state=seed) res_1 = rca.transform() seed = np.random.RandomState(1234) rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) - res_2 = rca.fit_transform(self.iris_points, self.iris_labels, random_state=seed) + res_2 = rca.fit_transform(self.X, self.y, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + + def test_mlkr(self): + mlkr = MLKR(num_dims=2) + mlkr.fit(self.X, self.y) + res_1 = mlkr.transform() + + mlkr = MLKR(num_dims=2) + res_2 = mlkr.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) From 3b8ac1b7466531fd3dcfc83cb60cd2523620d7ce Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Fri, 28 Oct 2016 15:02:13 -0400 Subject: [PATCH 024/210] Adding MLKR to README [ci skip] --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 38c088aa..22a81155 100644 --- a/README.rst +++ b/README.rst @@ -14,6 +14,7 @@ Metric Learning algorithms in Python. - Neighborhood Components Analysis (NCA) - Local Fisher Discriminant Analysis (LFDA) - Relative Components Analysis (RCA) +- Metric Learning for Kernel Regression (MLKR) **Dependencies** From a9d51ca2021efbb91782de29ef57d6b504546025 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Fri, 28 Oct 2016 15:55:03 -0400 Subject: [PATCH 025/210] Fixing tests (again) --- test/metric_learn_test.py | 4 ++-- test/test_fit_transform.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 5942c840..50006486 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -75,10 +75,10 @@ class TestSDML(MetricTestCase): def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! - np.random.seed(5555) + rs = np.random.RandomState(5555) sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.iris_points, self.iris_labels) + sdml.fit(self.iris_points, self.iris_labels, random_state=rs) csep = class_separation(sdml.transform(), self.iris_labels) self.assertLess(csep, 0.25) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 442a93df..8f9f32a1 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -92,7 +92,7 @@ def test_lfda(self): res_2 = lfda.fit_transform(self.X, self.y) # signs may be flipped, that's okay - if res_1[0,0] != res_2[0,0]: + if np.sign(res_1[0,0]) != np.sign(res_2[0,0]): res_2 *= -1 assert_array_almost_equal(res_1, res_2) From 2fabd36de5d6740065d6da8e333bd2ea85172252 Mon Sep 17 00:00:00 2001 From: Michael Stewart Date: Fri, 4 Nov 2016 15:30:48 -0700 Subject: [PATCH 026/210] Fixed sample code (#41) --- doc/metric_learn.lsml.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/metric_learn.lsml.rst b/doc/metric_learn.lsml.rst index 12be71b8..c6c8ede9 100644 --- a/doc/metric_learn.lsml.rst +++ b/doc/metric_learn.lsml.rst @@ -20,7 +20,7 @@ Example Code Y = iris_data['target'] lsml = LSML_Supervised(num_constraints=200) - isml.fit(X, Y) + lsml.fit(X, Y) References ---------- From 136f15215851ed59bd5acfc4a297a9345c67b214 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Wed, 9 Nov 2016 19:18:11 +0100 Subject: [PATCH 027/210] Removed random import (#42) --- metric_learn/constraints.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index ce71ecd2..903c5148 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -3,7 +3,6 @@ from supervised data labels. """ import numpy as np -import random import warnings from six.moves import xrange from scipy.sparse import coo_matrix From 3c6b951626d14c347083e1f9c13eefbb368df53d Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Wed, 9 Nov 2016 19:32:10 +0100 Subject: [PATCH 028/210] [MRG] Metric Learning Tutorial Notebook (#27) * Added Tutorial Notebook * Added Text * Added links, explanations * Added manual constraints --- examples/metric_plotting.ipynb | 708 +++++++++++++++++++++++++++++++++ 1 file changed, 708 insertions(+) create mode 100644 examples/metric_plotting.ipynb diff --git a/examples/metric_plotting.ipynb b/examples/metric_plotting.ipynb new file mode 100644 index 00000000..f8661181 --- /dev/null +++ b/examples/metric_plotting.ipynb @@ -0,0 +1,708 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Metric Learning and Plotting\n", + "\n", + "This is a small walkthrough which illustrates all the Metric Learning algorithms implemented in metric_learn, and also does a quick visualisation which can help understand which algorithm might be best suited for you.\n", + "\n", + "Of course, depending on the data set and the constraints your results will look very different; you can just follow this and change your data and constraints accordingly. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Imports " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import metric_learn\n", + "import numpy as np\n", + "from sklearn.datasets import load_iris\n", + "\n", + "# visualisation imports\n", + "import matplotlib.pyplot as plt\n", + "from mpl_toolkits.mplot3d import Axes3D" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading our data-set and setting up plotting\n", + "\n", + "We will be using the IRIS data-set to illustrate the plotting. You can read more about the IRIS data-set here: [link](https://en.wikipedia.org/wiki/Iris_flower_data_set). \n", + "\n", + "We would like to point out that only two features - Sepal Width and Sepal Length are being plotted. This is because it is tough to visualise more features than this. The purpose of the plotting is to understand how each of the new learned metrics transform the input space. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# loading our dataset\n", + "\n", + "iris_data = load_iris()\n", + "# this is our data\n", + "X = iris_data['data']\n", + "# these are our constraints\n", + "Y = iris_data['target']\n", + "\n", + "# function to plot the results\n", + "def plot(X, Y):\n", + " x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n", + " y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n", + " plt.figure(2, figsize=(8, 6))\n", + "\n", + " # clean the figure\n", + " plt.clf()\n", + "\n", + " plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)\n", + " plt.xlabel('Sepal length')\n", + " plt.ylabel('Sepal width')\n", + "\n", + " plt.xlim(x_min, x_max)\n", + " plt.ylim(y_min, y_max)\n", + " plt.xticks(())\n", + " plt.yticks(())\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XlYVOUXB/DvBQYYYIABhmGRHWRTEBEUF9zXNPctd211\nqczS1OpX2WJmZbm0mJparm2WS4a7uO8obiggJqLs+wAzc35/YBQNKg5cBuF8noenBs+877kzA4d7\n73vPFYgIjDHGGBOPkaETYIwxxho6LraMMcaYyLjYMsYYYyLjYssYY4yJjIstY4wxJjIutowxxpjI\nTMQaWBAEvqaIMcZYo0NEwn+/J1qxvTehmMMzxhhj9Yog6NRZAHwYmTHGGBMdF1vGGGNMZFxsGWOM\nMZFxsWWMMcZExsWWMcYYExkXW8YYY0xkXGwZY4wxkXGxZYwxxkTGxZYxxhgTGRdbxhhjTGRcbBlj\njDGRcbFljDHGRMbFljHGGBMZF1vGGGNMZFxsGWOMMZFxsWWMMcZExsWWMcYYExkXW8YYY0xkXGwZ\nY4wxkXGxZYwxxkTGxZYxxhgTGRdbxhhjTGRcbBljjDGRcbFljDHGRMbFljHGGBMZF1vGGGNMZFxs\nGWOMMZFxsWWMMcZExsWWMcYYExkXW8YYY0xkXGwZY4wxkXGxZYwxxkTGxZYxxhgTGRdbxhhjTGRc\nbBljjDGRcbFljDHGRMbFljHGGBMZF1vGGGNMZFxsGWOMMZFxsWWMMcZExsWWMcYYExkXW8YYY0xk\nXGwZY4wxkXGxZYwxxkRmYugEGHvc7Ny5E6dPn4aXlxeGDRsGIyP+m5Ux9mACEYkzsCCQWGMzZijv\nvPMuvv1uNcI69kDC2eNoHtAUG9evgyAIhk6NMVYPCIIAItL5hcDFlrFqys3NhYtrE3zy6wHY2CtQ\nVlqCOcO74+dNGxAZGWno9Bhj9cD9ii0f/2KsmnJzc2FhaQlrOwcAgMTUDI4uTZCVlWXgzBhj9R0X\nW8aqydXVFfb29vh91VIU5GbjyM7fcPPaFYSHhxs6NcZYPceHkRl7BDdu3MDoseNw9swZeHh6YtWK\nbxEREWHotBhj9QSfs2WMMcZExudsGWOMMQPhYssYY4yJjIstY4wxJjIutowxxpjIuNgyxhhjIuNi\nyxhjjImMiy1jjDEmMi62jDHGmMi42DLGGGMi42LLGGOMiYyLLWOMMSYyLraMMcaYyLjYMsYYYyLj\nYssavEOHDqFbj56IjGqLBR9/DK1Wa+iUGGONDBdb1qDFxcWh35P94dehN3pMeBkr1q7Du/PmGTot\nxlgjw/ezZQ3aG2+8gQupORg+7XUAQErCJXz5+vNITrxu4MwYYw0R38+WNUoSiQQlxUUVj1VFRZCY\nmhowI8ZYY2Ri6AQYE9OECROwNCISUisZ7J1csfW7pXj3f28aOi3GWCPDh5FZg3f9+nV88ulnyC/I\nx+CBAzFgwABDp8QYa6DudxiZiy1jjDFWS/icLWOMMWYgXGwZY4wxkXGxZYwxxkTGxZbVGSLCwk8+\ngbOLKxSOSrz62kxoNBpDp8UYY6LjYsvqzLp167B42dd45Ys1eHPVr9ixZz/mf/SRodNijDHRcbFl\ndeb3rdvQa8xzaOLjD4WLGwY+NwNbt203dFqMMSY6LrasztjZ2eHuX8kVj9NSkmBra2u4hBhjrI7w\ndbaszqSkpKB1mygERnaAmVSK4zFbEfPnTrRs2dLQqTHGWK3gphasXrh9+zY2bNgAtVqNgQMHwtfX\n19ApMcZYreFiyxhjjImMO0gxxhhjBsLFljHGGBMZF1vGRFJSUoL09HTw6RTGGBdbxkSw7MsvIZfb\nwcfXD0HNmiMpKcnQKTHGDIgXSDFWy44dO4a+/Qdg7vIf4ejqjq2rv0LC0d04ceyooVNjjImMF0gx\nVkdOnDiBltHdoGziAUEQ0HvUJJw5dRJardbQqTHGDISLLWO1zM3NDdcvnEVZaQkA4PLp43BydoGR\nEf+4MdZY8WFkxmqZVqvFyFGjceT4Cbh6+uLK2RPYtHEDunXrZujUGGMi46YWjNUhIsLBgweRnp6O\niIgIuLu7Gzolxlgd4GLLGGOMiYwXSDHGGGMGwsWWMcYYExkXW/ZYO3HiBJYsWYKzZ88aOhXGGLsv\nLrbssTVu3Hi0j+6IBZ8vReuoKLz40kuGTokxxqrEC6TYY+nEiRNoH90R8zfshJO7F1KuXsKb4/oh\n4coVeHh4GDo9xlgjxQukWINy7NgxKJt4wMndCwDg3jQQ1nJ7nDhxwsCZMcaYLi627LHUvn173Pkr\nGSkJlwAACXGnkZediTZt2hg4M8YY02Vi6AQY00eLFi3w3LPP4s2x/WAtt0d+dhZenzULTZo0MXRq\njDGmg8/ZssdaUlISTp06hcjISO7SxBgzOO4gxRhjjImMF0gxxhhjBsLFljHGGBMZL5BitUKj0WDi\nxIlISEjAgAEDMHPmTEOnJJqTJ0/i7Nmz8PLyQpcuXSAIOkeMGGvwLly4gGPHjsHZ2Rm9evWq0f2a\n8/PzsXXrVpSVlaFHjx5wcnKqxUzrBz5ny2pMo9FAoXSCRGoJn+BQnI3dg9aREdi7Z4+hU6t1i5cs\nwbz33kfzNtFIOH8afXv1xLKlSwydFmN1av26dZj6wrMId5EhKacELdp0wMafftGr4GZmZiIqIhxy\nFMHcxAiXs0qx7+AhBAYGipC5+HiBFBPN5MmTsfnX37Dw530wNTPHrcQEvD6iB/Lz8iCVSg2dXq3J\nz8+Hk7MLPtz4JxQubiguLMCc4d2xY+tvCAsLM3R6jNUJIoKNzArvdXCEp9wcZRrCrP13seS7dejV\nq9cjjzfz1RmI37YWz4XZAwB+v5qD2w7NsfWPP2s79TrBC6SYaK5fvw43nwCYmpkDAFy8fCEIAhIS\nEgycWe3KysqCpUwGhYsbAEBqaQVXTx+kpaUZODPG6o5KpYKqpAQetmYAAImxAE9bM71/DlL/uglv\na+OKx75yU9xOvVUrudYnXGxZjY0aNQoXTx7GtfNnoNVqsW3tNzCRmCI4ONjQqdUqV1dXSM3MsPeX\n9SAixB8/hKTLF9CiRQtDp8ZYnZFKpQgKaIqfL+dAS4RrWSqcuZ2P1q1b6zVep67d8WdKCXJUaqjU\nWmy5XoROXbrWctb1ABGJ8lU+NGsshgwZQhJTMxKMjEhqaUVr1641dEqiiI+PJ//AIDIxMSGlszPt\n2rXL0CkxVueSk5OpZUgzMjE2Irm1jDZv3qz3WFqtlma9+iqZmUpIYmJCI4cOoeLi4lrMtm7dq306\nNZHP2bJao9FokJWVBYVCYehURFdSUgIzMzNDp8GYQZWUlMDU1LRWVuRrNBpotVpIJJJayMxweIEU\nY4wxJjJeIMUYY4wZCBdbxhhjTGTcQYrVCpVKha+//hopN2+iXdu2GDRoUI3G2717N/7YuRN2cjme\nf/55yOVynRiNRoOVK1fi8pUrCGneHGPGjKlRFxvGGBML/2ZiNVZWVoYu3brjh1+3IbVUgukzZ+Od\nd9/Ve7xVq1bhqTFjkVII7DxyGq3bRCE3N7dSDBFh2IiRWLx8FVJLJZj/6Rd45tnnaropjDEmCl4g\nxWps+/btmDH7Dbyx8lcYGRkhJ+Mupj/ZDnm5uTA1NX3k8Vzd3DF5/lfwDgoBACyZ9QJGD+yDF154\noSLm/Pnz6NH7CXz0015ITM2gKirE9H5tcf7cWbi5udXatjHG2KPgBVJMNAUFBZArlBWHcK3l9hAE\nI5SUlOg1XlFhAewc/2lEbqtQoqCgQGdOa7kdJKbll9+YSS1gZW2D/Px8PbeCMcbEw8WW1ViHDh2Q\nEHcaB37fjNspSVi94E20iYqCTCbTa7wn+w/A6vlzkZp8Haf2x+DIzi3o3bt3pZiQkBCUFORh29qv\nkZaShF+WL4KVhRR+fn61sUmMMVar+DAyqxWnT5/G5KnTkJqaijZt2uCrZUthZ2en11jFxcWY/soM\n7PjjD8jlcixc8BG6deumE3f9+nU8+/wLuHr1Kpo1C8byr79GkyZNaropjDGmN25qwRhjjImMz9ky\nxhhjBsLFljHGGBMZF1vGGGNMZFxs6wEiwnerV6Nf/wF4avRonD9/3tApVVCr1fjgww/Rp28/PPvc\n83yjdMZqwdmzZzFi6GD0690D369da+h0WB3gYlsPLFmyBG+9+x68onrAWOmNjp074+rVq4ZOCwDw\n7HPPY+OW7fDv9CTSVEDbdu2Rl5dn6LQYe2xdunQJXTp2gFXiIQTkxWPOK1OxbNlSQ6fFRMarkesB\n36YBGP/WQvgEtwAArP/8fYS6OWDevHkGzaukpATWNjb4avc5mFtYAgAWThuDOdOnYvDgwQbNjbHH\n1euzZiFx20qMDnEAAFxKL8LqZGNcTEg0cGasNvBq5HpOEIwq/X99+EOlIod/3Rj63gfJQBkx9vj7\n78+PEf9MNQp815964IXnn8Xit1/BoOdnIOvuHRz8fSM+io01dFowNzfHsOEjsPi1Z9F12HhcP38K\n6TeT0L17d0Onxthja+y4cejw9ZeQm2fDTmqC9VcKMGPuO4ZOi4mMDyPXA0SE5d9+i59+/gUymQxz\nZ7+OsLAwQ6cFoPyOPh988CEOHjoEV1dXfPDePLi6uho6LcYeaydPnsSH895BYUE+hj01BhMmToQg\n6Bx5ZI8h7iDFGGOMiYzP2TLGGGMGwsWWMcYYExkXW8YYY0xkXGzZA6WkpMA/MAgyG1sonJyxadOm\nKuN+/PFHODo5w8rGFn7+gbhx40aVcWvWrEHz0BYICGqGBR9/XCeXPGzfvh0tW0XAzz8Qs16fjbKy\nMtHnZIyxf+Niyx4oMqotHL0D8daKnzDw2Vcwdtx4xMXFVYq5cOECRo8Zi/7PTMf/Vv4M56bNEBnV\nVmes3377DTNnz8WAqXMxavaH+HrFd1i8ZImo+R8/fhxjxo1H1zFTMOndz/HH3oOYPWeuqHMyxth/\n8Wpkdl8ZGRlwcnLGd0cSYGxSfkn2/Klj0KlVCD7//POKuOnTp2P30dOYvewHAIBGrcb4tn5IuXED\nLi4uFXFPjRoNK59QdB44EgBw/ugB7F/3FQ4dPCDaNsyZMwdXMoox5IUZAIBbSdeweMZE3Ejibj2M\nsdrHq5HZI7OwsACBUJCbDaD8euDczHRYW1tXipPJZMjNyqg4JFyQlwMi0omztLRETubdisc5Gemw\nsLAQfRvystIrHudmpkMq8pyMMfZfvGfLHqhjp864fC0R3YeNxeXTx3H9/CncSEqsVEgLCgrg5uEJ\n72YtERjeGrs2r4Wvpzti/7PHevnyZbTr0AHtnhgKM6kFdm/6Dr/+8jOio6NFyz8tLQ3hrSLQrF1X\n2Dm5YNeGlVjyxecYPny4aHMyxhovvZtaCIJgBmAwAE/8q70jEb37kOdxsW0AtFotZsyYgX0HDsDF\nyQmrV6+Gg4ODTlxGRgbGjRuH1LQ0dGjXDosWLYKRke6Bk2vXruHbFSugVqvx1MiRaNmypejbcPv2\nbXz55ZfIzy9A//5PolOnTqLPyRhrnGpSbP8AkAvgFADN398nok8e8jwutowxxhqV+xXb6tyIoAkR\n9RIhJ8YYY6xRqM4CqcOCIDQXPRPGGGOsgbrvYWRBEM4DIJTv/foBSARQAkAAQEQU8sCB+TDyI9No\nNDAyMqqVu39otVqo1WqYmprWQmZAaWlptcbSaDQwNjZ+YAwRgYiqPKcrZm6Pu+q8ttVV3fegNj+T\njDUG+lz60xdAPwC9AfgC6HHv8d/fZ7UkPz8fAwYNhtTCAja2ciz61zWs+njm2WdhbmEBc3MplC6u\nuHLlit5jHTp0CHYOCpibm8PcwgJvv/12lXE7duyAs4srzMzMENG6DZKTk3ViiAiz58yFlZUMUgsL\nTJz0NEpLS/XObcWKFbCUWcPM3BwyG1v8+OOPeo9Vn6WmpqJjVGuYmZrCyd6uRttJRJj37juQWVpA\nam6GUSOGQaVS6cTl5uaib68ekJqbwdZahmVLl9ZkExhjf/+Fe78vAGur870qYohVz5hx4ym67yBa\ndegqffrrAXJ286Bt27bpNdby5cvJUmZN8zf+SauPXqOuQ8aQSxM3vXOztpXT0Mmv0ZpjifTWip/I\nTGpBu3btqhRz/fp1ktvZ01vf/khrjyfRyGmvU7OQUJ2xvv7mG/INak5Ld56kb/fHU1i7TjR7zly9\n8kpMTCQzcym9/PE3tPZ4Ej3/zqdkLrWgzMxMvcarz9pHtqIZbXwpY1p32jOiDSlsZHT+/Hm9xvrh\nhx/IU2FD3z7pQ+uH+FGUlwO9OGWyTtywwQOpe1MFbR7WlJY+4UVOchnFxMTUdFMYa/Du1T6dmlid\n43jB/34gCIIxgPBarvmN2t69e9H/6ZdhJpXCyd0L0f1HYM/evXqN9fPPP6ND36Fw9wuExNQMw6fO\nxN07aXqNlZqaisKCfAyYNA0mEgkCwiLRLLIdfvrpp0pxx44dQ3BEWwS0bA1jExP0HT8Z169dQ25u\nbqW4Xbv3oNvwCZArlLCQWaPv+MnYvWePXrnt2LEDjq7uiOzaG8YmJojuNxSW1rbYv3+/XuPVV2q1\nGkdOnsacSC9IjI0Q7mSDXl6OOHz4sF7j7f7zD/T0MIfCUgILiTEG+1liz64/deL279uH4QHWMDU2\nQhNrM3RyNcO+ffp9JhljDziMLAjCbEEQ8gGECIKQd+8rH8BdAFvqLMNGwNFRiRtX4gGUH2m4mXAR\nTkqlXmO5uLgg8eI5aLVaAMCNKxdhamau11jl19MKSE2+DgBQl5Xi5rUrcHd3/0/+jvgrMQFlpSUA\ngLSUJAiCACsrq0pxTkolUq5erHh848pFKB0d9crNx8cHGXdSUZhfXtBzM9ORn5MFPz8/vcarr4yN\njWErs0J8RgEAQK3V4mJWARz1fN2cXFyRnK+teJyUUwLHKj5rCoUDErPLDy8TEW4UAkqlk15zMsZQ\nrcPIHz4s5j7Pq5Nd9oYgNjaW5Hb21KX/MAprG00hLcIoPz9fr7Hy8/PJXuFI3kEh1P6JwWQmtaC3\n335b79wmTJxIFlbWFN1vCLl4+pK7lzeVlZVVitFqtTRk2HDyDWpO3QePIgdHJ/p2xQqdsdLS0sjD\n04tad+lJ0X0GksJRSRcvXtQ7t1aRrclO6UzR/YaSjb2CunTtpvdY9dmGDRvI0UZG41v6UCt3JT3R\noxup1Wq9xsrMzCQ/b09q7a2grv5Ksre1oTNnzujE7du3j+TWVtQjQElhHg4UHtqcCgsLa7opjDV4\nuM9h5AetRn5gax8iOv2gf+fVyI/m+vXr2L17N6ysrDBgwIAa9QwuKCjAm2++iTt37uCpp55C3759\na5Tb999/j23btsHb2xvvvPMOTEx0L8/WarXYunUrbt26hcjISISHV32mIScnB1u2bIFarUbv3r0r\n3ahAHwsWLMDp06fRrl07TJs2rUZj1Wfnzp3D4cOHoVQq0b9//xqtSs7Ly8OWLVtQUlKCnj17ws3N\nrcq4hIQE7NmzB9bW1hg4cCDMzfU7QsJYY/LIHaQEQfj7BI05gFYAzqH8sp8QACeJKOohE3KxZYwx\n1qg88qU/RNSZiDoDuA2gJRG1IqJwAGEAbomXKmOMMdawVGc1sj8Rnf/7ARFdABAoXkqMMcZYw1Kd\nYhsnCMK3giB0uve1HECc2Imx+kOlUuHKlSvIzs5+YFx2djYuX75cZZME9vhRqVSIiYnBhQsXDJ2K\njpycHGzfvh03btwwdCqMVUt1iu0EAPEAXrr3dfHe91gjcOrUKXh5+6Brz95w9/DE4iVLqoxbunQZ\n3D080a1XH3h6e+PkyZN1nCmrTceOHYOj3AZD+/VGeIsQtGoRUnE5maGtWLECTgp7jB7cH37eXhg1\ncqShU2Lsofjm8ey+iAgenl4YOHkW2vToh/TUm5g3cRBidu5AixYtKuLi4uLQpVsPvLnyZzi6uuP4\n7u3Y/Pl7uJlyg3vqPqacHeTo5GyMEc0cUFimxcw/b2DY01Pw6aefGjQvtVoNK6kZZkQ5o3UTGW7n\nl+KVnclY/+MvePLJJw2aG2OAHgukBEHYdO+/5wVBiPvvl5jJsvohPz8f6enpaNOjvBW2wsUNQeFt\ndA4rXrhwAYHhreHoWt7sIrJrH2RnZ+t0kGKPj5zcPHTzti1vTmJqjA4eMhw7dszQaSEhIQEgQusm\nMgCAs8wU/g5S7Nu3z7CJMfYQDzqM/NK9//5944H/frEGTiaTwdLKEhdPHgEA5Odk42rcKfj6+laK\n8/HxQULcaeTnlJ/TvXTqKKRSc1hbW9d5zqx2WFlIcTK1vGtVmUaLk6mFCAw0/LpILy8vaAFcSi8C\nAOSq1LiWqbrvdd2M1RtVdbqgyp2gJgHwe1hcFc8ToTcHq2sxMTEkt7en5q3akJ3C8b43Dpgz9w2y\nUzhSSEQUye3s6c8//6zjTFlt2rJlC5lLjMlLbk625ibk6epMJSUlhk6LiIjmzZtHZsYC+diZk1Ri\nRJ2j2xs6JcYq4FE7SP1NEIR3AHQA4AngFIADAA4S0dmHPI8eNjZ7PNy5cwfx8fFwcXFBQEDAfeMu\nX76M1NRUBAcHQ6lnb2dWf6SkpODnn3+GQqHAyJEja+3+w7UhLi4OMTExCAoKQu/evQ2dDmMVHrmD\nVBUDSAE8A+BVAK5E9MB+cVxsGWOMNTZ6F1tBEN4A0A6AFYAzAGJRvmd7+yHP42LLGGOsUalJsT0N\nQA1gG4D9AI4QUUk1JuRiyxhjrFGp0WFkQRCsUb532x7AUAB3iaj9Q57T4IttXFwckpKSEBwcrLNC\n91FlZmbi6NGjsLKyQvv27e97V5edO3di//79iIiIwMCBA2s0Z3WlpKTg7NmzcHFxQatWrepkzsai\nsLAQsbGxEAQB7du3r9Hdngxl+/btiI2NRZs2be57ratWq8WhQ4eQk5OD1q1b630/3kdBRDh58iRu\n376NsLCw+97dSKVS4eDBg9BoNGjfvr3OfZj/dvv2bZw8eRL29vaIioqq0TXkarUaBw8eRFFREaKi\nomBnZ6f3WKx+uV+xrc6q4mYAXgCwAcA1AHsBvFuN59X+Mq965H9vv0MOSieK7NiN5PYOtHrNGr3H\nOn/+PDkqnahl247k7R9IXbp1r3Ll59ix48jcwpKCWkWRhZU1PdG3b002oVp+//13ktvZU0R0V3Jy\ndaOp014Ufc7G4s6dOxTo401RXi7UxsuFgpv6Unp6uqHTeiTDhw4hqcSImistyUJiRAOf1P1MqtVq\n6tu7J3k62lKkt5Ic5DZ04sQJUfPSarX03NOTyMXOmtr4OJHc2oq2b9+uE5eVlUXNAppSUBMHCnFX\nkI+HG6WmpurEHTx4kBS21tQjwIN8lXY0cshg0mg0euWmUqmoY7so8nWSU7iXkpwU9jW6rzOrX1CD\n1chbUb4CORbACSIqq2Z1p4eN/bi6ePEiojt1wXvr/4CNnQNuJSbgnQkDkHY7FZaWlo88XrvojgiK\n7oMug0dBq9Hg0+kTMHHEYEyZMqUi5vr16wgMCsZHm2Lg5O6F7PQ0zBjYCTt3bEd0dHRtbl4FrVYL\newcFpn+2Cn4hLVFUkI//je6DDT+sRfv2DzywwarhuUkTIIk7iA/a+4KIMCv2GkxadcOSr742dGrV\ncuHCBYS3CMGSPl5QWpkivbAMU7Yl4uCRY4iIiKiIW7NmDRbOfQVvt3OAiZGAAzfyEJMjw7n4y6Ll\ntnfvXowfNhALOikhlRjhUnoRPj6Zi/Ss7Ep7pNNfnIYrMRvxXAs7CIKAtReyYdG8M1b/sL7SeE29\nPDAvxB69vR1Rotai55ZzmPvZMgwePPiRc/vss8+w8Yv3Mau1A4yNBGxPyMEVMy/sjT1c4+1mhvfI\nHaT+RkR9iWgBER2ubqFt6FJSUuDu5w8bOwcAgKu3HyxlMty9e1ev8W4kJyMooi0AwMjYGH4tIpGU\nnFwp5sKFC5DZ2sHJ3QsAIFc4wdHVHefOndN/Qx6ioKAAKpUKfiEtAQAWVjJ4B4dy8/daknztGqJd\nyht/CIKADs7WuJF4zcBZVd+5c+dgJ5VAaWUKAFBYSqCwNNX5TCYnJ8PfRoCJUfnvnxBHC9z8S9y7\ndCYnJ8PPXgqppPxXXICDFHkFBSguLq4Ul3Q9AcF2kooC3MzeFEmJ13XGS7l1G9Fu5Yd6zUyM0NpR\npvfPQXLidQTaGsH43uvR3FGKGyn8M9XQ1Z8L5x4jwcHBSLp0AUmXyu88eGr/nyCNBq6urnqNFxER\ngd0/roVWq0V+TjZOxPyOiP+cG42KikJBXjbOHd4HALh67hTSbiahc+fONdmUB5LJZHBxccH+3zYB\nAFKTryP+5BGEhoaKNmdjEtG2HdZcTUeJWguVWoO1V9MR3qatodOqtg4dOiC7WI24tEIAQPzdIqQX\nlul8JiMiInDsThmyi9UgIuxIzEPYv3priyEsLAzn0gpwO78UABCTmAsvdzedc+KRbdtj718lKFFr\nUaYh7L5ZjIgq3oPw0OZYHvcXiAi38lXYnpypd9eqyDZROJSmRkGpBloi7EwuQHiriIc/kT3eqjq2\nXBtfaODnbH/66SeytrElO4UjKZ2d6ciRI3qPdefOHYpo3YZs5HYktbCkV1+bSVqtVidu8eLFZGpu\nTlIrGUlMzeh///tfDbages6fP08enl4kd1CQpZWMVqxcKfqcjUVxcTEN6vcEWVuYk0xqTkMH9K83\nXZqqa+HChWRqbEQWEiMyNRbogw8+qDLu7bfeJKmZKcmtLCgkKID++usv0XP7+uuvyFJqRnYyS/J0\nc6X4+HidmNLSUhoxdDBZmJuSTGpOT/TsTkVFRTpxSUlJFOznQwprK7I0N6OPP5qvd15arZamvziV\npGamZGMppahWLSkjI0Pv8Vj9An3P2eqrIZ+z/VtJSQnS09Ph5OQEExOTGo1FRLhz5w4sLCwe2FO4\nqKgI8fHxCAwMvO+qydqm0WiQlpYGOzs7SKXSOpmzMcnIyIAgCLC3tzd0Knqp7meyoKAA+fn5UCqV\nddaNqri4GFlZWXBycrrvCn8AyMrKgkajgYODw31XGWu1WqSlpcHGxkavtRn/lZubi+LiYiiVSr47\nVgPyyJeWMVRhAAAgAElEQVT+CILwO4D7VksieuD9rBpDsWWMMcb+7X7F9kG7YwtFzIcxxhhrNPgw\nMmOMMVZL9L70RxAEP0EQfhQE4aIgCIl/f4mTZuMVHx+PTz/9FN988w3y8vJqNFZxcTFWrlyJhQsX\n4vTp0/eN27x5M0JCQtCyZUscOHCgRnOyxomIsHXrVixYsABbtmxBXf2BPXXqVAQEBKBnz546l/M8\nqj179iAsLAwhISH45ZdfailDxv6jqlVTVHlVcSyArgDiAHgAeBvcQapW7d69m+R29tRrxHhq07U3\n+QcEUk5Ojl5jFRUVUXhEJLVs35n6PDWJ7BwU9OOPP+rELVy4kEzNzCm631Bq06MfmZpLacuWLTXd\nFNbIvDxtKnk52tKAYEfyUcpp8nPPij5nSLMgsjEzpn5N5eRla0Y2FmZ6r+LetGkTmRoL1MFdRl28\nrMnUWKDFixfXcsasMUENOkidIqJwQRDOE1Hzf3/vIc+jh43NyrVoGY5uY6cgvGMPAMBXb72EPu0j\nMWvWrEcea8WKFVj23Q+Y8flqCIKAK2dPYMXb03HzRnKlOFs7Bwx49mX0HD4eALD+iw9xbOcvuJOa\nWtPNYY1ESkoKQoMDsbSnK6xMjVFUpsHUP1Nx9NTZGvcKv5/09HQ4Kx3xdT8fKCwlUGsJk7cmYtDY\np7Fs2bJHHs/RzgadnCUYHaoAAGy5nIlfruYjq6Bme8us8dJngdTfSgRBMAKQIAjCVAC3UH67PVZL\nsrKy4OL5zy8nJ3cfZGRm6j2Wk4dPxaUErl6+yMnO1onTEsH1X3O6evuhrEyj15ysccrKyoKdlTms\nTMsvqbGQGMNBJkVWVpZocyYlJcHYSICDRfmvLhMjAc4yU9y6pV9HKk1ZKdxs/rmMx83aDFpNTq3k\nyti/Vedit5cAWAB4EUA4gDEAxomZVGPTq2dPbF76EfKys5B8JR77flmHnj166DVW586dcXTnFlw9\ndwoFeTnY8Pn76Na9u06ck1KBDYvnI+vubaSlJOHnbxYhONC/ppvCGhF/f3+UChL8cS0XhaUa7ErM\nRV4ZEBQUJNqc4eHhMDESsP58BgpLNThxqwCX0ovwzDPP6DWef1AINlzIwO38UqQXlmFtXDqc3Txq\nOWvGUP0OUgCsAcgeIV70Y+MNRWFhIY0aM5asZNbk5OxC3yxfXqPxNm/eTK5u7mRpJaP+AwdRdna2\nTkxRURE5uzYhiakZSczMqam/P6nV6hrNyxqfixcvUljzYJKamVJocCDFxcWJPufvv/9OMjMTMhZA\nUhMjeumll/QeS61Wk4+nB5kaCyQxEqiJk2OVHaQYqy7U4JxtKwCrAMjufSsXwEQiOvWQ59HDxmaM\nMcYaEr1vHi8IQhyAKUR08N7j9gCWEVHIQ57HxZYxxlijovd1tgA0fxdaACCiWADq2kyOMcYYa8iq\ns2e7CIAUwHqU90oeDkAF4HsAIKIquybwni1jjLHGpiZ7tqEAmgL4H8obWgQCCAPwCRpg/2Qiwg8/\n/IDxEydh1uuvIyMjo8q4oqIizHvvPYybMBGLFy+GRlN/LpuJi4tD6zZRCAxuhldmzLhv3OHDh/Hc\n8y9gytRpOH/+fJUxRIQVK1Zg/MRJeOPNN5GbmytW2o+spKQE8z/8EBPHjMKnn3wCtbpmB1wWLFiA\nkAA/tAoNQUxMTJUxGo0GS5YsxsQxozDv3XdQVFRUozlXr16NkKAAhAYH4ocffqgyhoiwZs0aTBgz\nCrNfn3XfS2vu3r2LLp06ItDXCyOGD6vx61Gbtm/fDhcnRyjkMgwcOPC+cQcOHMDzT0/EtMkv4OLF\ni1XGqNVqjB41CoG+XugU3QGp97k2PCcnB3PnzMaEMaOwauXKOuluVVxcjPffm4dxo5/C558vuu/v\nhcTERLw0bSqenjAOO3fuFD2vR3H06FE89/QkTH7+WZw7d67KGCLCqpUrMWHMKMydMxs5OXy51ENV\ntWqqNr7wmK5Gfu/998nD158mzvmAegwbR17ePjqrecvKyqhdh2iK6v4ETZo7n5q1akOjx44zTML/\ncfnyZTK3sKQew8fThNnvk53SmfoPGKATt2vXLrJzUNCo6W/QsMmvkdzOns6ePasT98qMV8kvOIQm\nzfmQOj85jJqFhNaL1ZoajYZ6dulEffyb0KKuQdTF14WGDuhf5X2Aq2PGK6+QrZmEPuoYQK9GepPU\nxJh27dqlEzdx7Ghq6+VEi7oG0cAgN2rfOoJKS0v1mnPZsmVkbmJE40IVNDbUgcxMjOjbb7/ViXvr\njbnk7WhLL0QoqZe/gvy8PCgvL69STH5+PsllFtTWTUaTI5zIR25Owf5+euVV2/bv309mxgL1ayqn\n51spSW5uTOEtw3Titm/fTo62Mvog2p/mRPmSg401XbhwQSeuRbMg8rI1o8kRTtTB3ZpsLM0pNze3\nUkxBQQEF+vlQj6YKeiFCSX5Ocpr16quibSNR+crmju2iqJ23A02OcKIWbvY0ctgQnbikpCRSyG1p\naDMFPRuuJKWtjNatWydqbtW1d+9esrO2pPEtFDQ6VEFyays6deqUTtysV18lPyc5vRChpB5NFRTU\n1JcKCgoMkHH9gxqsRlYC+ACACxH1FgQhCEAUEa14yPPoYWPXN0QEaxtbvL/+Dyhc3AAAX7z6DJ4Z\nNRQTJ06siIuNjcXYSc9i3ro/YGRkBFVxEV7sHYHrCQlwdHQ0VPoAgGHDhuFGdhFeXvAVAOBWYgLe\nGP0EVMWV98C69eiJgE590a53+V7G76u/gmnebXy38p+3tbS0FDJrayzecQIyWzmICPOfG4Z3587C\ngAED6m6jqnDmzBkM6dUNJ0aEw8TICCq1Bs3XHMXRs3Hw8vJ65PGU1lb4sqs/unmWdxL638GrOAI7\nHD15siImIyMDPh5uuDS+HaxMTaAlQocfz2Lpus2Ijo5+5Dk9XRzRt4kxevjaAgC2J2TjzzQjJN78\nZ09Nq9XC0kKKZb3cYG8hAQC8fzQTL877DKNGjaqI+/jjj7H4vTexuI8nBEFAUZkGY36+hmuJSfDw\nMOx1o82aNYNDwU280tYFAJCYrcLsXTdQXKatFNelXRtMkKvQ388JAPDx8URkBnfEsm+WV8TcvXsX\nLk5KrBnkBytTYxARXv4jGROmz8Fbb71VEbd582bMf20K3oqygyAIyFWp8fTWZBQWFdf43tP3c/To\nUYzo3xufdVbC2EhAiVqLZ7bfRPyVBLi6ulbEvTF3DuJ/XY4JoQ4AgLi0QmxMNcP5ywmi5PUo+nTv\ngqYFl9HFywYAsOVKFkqbdsLa9RsrYtRqNSwtpPi2rydszE1ARHj3SBZe/3gphg4daqjU642aHEb+\nDsBOAC73Hl8F8HLtpVa/qNVlsJD9c/N2C5k1SktLK8WUlJTAwsqq4gbYpmbmkJia6cQZQklJCSxl\nNhWPLWTW0Gq1OnGlpaWw+FecpcwapSUllWL+PgwpvXejbEEQYCGzqT/baSqByb33wMzYCFJTCUr+\nsw3VpdVqYG0qqXhsa24CdZnu+25qbAypSXnHJCNBgMxMovfroVFrYGn6z4+gpcQI2v8c+iUiaDRa\nWEiMK8X9d86ioiJIJUYVncPMjI1gLJTfsN3QSktLITOrnH9Vf4eXqEpgbfbPe2BjaozSElWlmOLi\nYgiCAHOT8tdNEARYmRrr3Iyg/PPxz+shlRjdey3FO91TPqcJjI3K55QYCzA1MdZ5r1QqFaT/uo+9\npalujKGoVCpYSip/JktUld8DjUYDIoJU8s97YGmq+5lklVWn2DoQ0SYAWgAgIjWA+nOCshYJgoDh\nI0biqzdeQkLcaez5eR3OHtqD3r17V4pr3bo1CrMz8cvyRbh24QxWfzgHgQEBlf56NZQXX3wRh3b8\ngv2/bcLVc6fwxeuTERgUqBM3dsxobFg0DxeOxeLMwd3Y8u0ijBk9qlKMhYUFevbqja//Nx3Xzp/B\nH+tWIPlSHDp37lxXm3NfLVq0gMbcEu8eTcSptBzMPnQdCtcm8PPz02u8Nh07YcquCzj0Vxa2JKRh\n4YkkPDN5SqUYFxcXBDdvjpcPXMWptBx8fCIZd0rLPw/6GDRiFJafuosztwtxOrUAK87cxZCnxlSK\nMTY2xpBBA/D5qSxcySjGjms5OJ+uQo//dBibMGECbuaVYnN8Bq5kFOOLY7dhY22NwEDd976uzZgx\nAzHXc7A3KReX0ouw8HAq7OzsdeJGTZyE1w8n4cDNTGy/fhefnEvFyLHjK8V4eHhAYSfHoiO3cSWj\nGL9cysS1rBI8/fTTleK6d++Oy5ml2JaQU/56nMpCvz69YWZmJtp2RkREoMRYig3x2biaWYxvz2bD\n29dP58jC8BEjsfNGMQ6l5CH+bhG+jsvF6PET7zNq3Ro36VmsuZiPs2mFOJlagE1XCzFmYuXX1szM\nDP369MYX9z6T2xJycDmzFN26dTNQ1o+Jqo4tU+Vzr/sA2AM4fe9xGwD7q/E8MQ+Li0alUtGMV1+j\n0LCW1K1HTzpz5kyVcTdu3KABgwZT89AWNG7CRMrKyqrjTO/vu+++I0dnF7K1d6C27dpTYWGhToxW\nq6Uvv/qKWkW2pjZt29HmzZurHKugoIAmT5lKIS3CqPcTT9Dly5fFTr/abt26RcMHDaCwIH8aM2IY\npaen6z2WRqOhJ5/oQ0qZJTnLrWnevHlVxmVnZ9OksWMoLMifBvXtQ8nJyXrPSUQ0aeJEcpBZkIPM\ngp5/7rkqY4qLi+mlqVMoJLAp9ejS8b5dmvbv30/uTgqSW5pTUFNfunnzZo1yq00zZ84ka3MJyUyN\nycu9CeXn5+vEaLVaWrL4C2od2pw6RITTr7/+WuVYt27douYBTUluaU5NlA5VnlsnIoqPj6de3TpT\nSGBTmvrCc1X+HNS2lJQUGtjvCQoJ8KOxT42gzMzMKuNiYmKoQ5sICmsWSB++/z5pNBrRc6uu5d98\nQ61Cm1HrlqG0fv36KmMKCwtp6gvPUUhgU+rVrTPFx8fXcZb1F2pwzrYlgMUAmgG4AEABYAgRxT3k\nefSwsRljjLGGRO8OUveebALAH4AA4AoRlVXjOVxsGWOMNSqPvEBKEIQIQRCcgIrztOEA3gfwiSAI\ndqJlyhhjjDUwD1og9TWAUgAQBCEawHwAa1B+I4JvxE+NMcYYaxgeVGyNiejvVjXDAXxDRD8R0ZsA\nfB/wvEYhNTUVT40ejciotpg8ZSry8/MNnRKrASLCsqVL0LF1BHp17oi9e/dWGZefn49pLzyHduFh\nGD18qN43LX8UpaWlmPv6TLRv1RKD+z2BK1euVBl38+ZNPDV0MNqFh+GlKZPve9nPrl270KtzNDq1\nicTXX31VZWclrVaLBfPno21ES/Tu1gXHjx+vcqysrCw8M2Ec2oWHYeKY0fftuFZdK1esQOeo1ujR\nsUO966xU14gIXyz6DO0iw9GjczQOHjz48CexeuuBxfbeuVoA6Apgz7/+TZyrwh8TRUVFiO7UGSpz\nO/SaNB2Xbt7BkwMG1kk7OCaOLxYtwtL338FLLoQh5tkYPrC/ToEhIgx+si8yDu3EXG8JnFPOoUv7\ndigsLBQ1t+efnojjP6/DbE8TtMpPROf27ZCWllYpJj8/H53btYX7rQuY6y1B6v5tGDawv85n8siR\nIxg1ZBCGmefiRWcNPnvnDXy5bJnOnG+/9SZWfbEAvWUZ8M2/hF7du+LSpUuVYtRqNXp36wI6dxBz\nvSUwu3QUPTp3RFnZQ5d0VOnb5cvx4ZyZmKJUY6RlHsaNGIoDBw7oNVZDsGD+fCye/y56WqYjSHUN\n/Z/ojTNnzhg6LaanBxXN9QD2C4KQAaAYwN+32PNF+aHkRuvo0aMwtZRh6JSZAAD/FpGY1qsVUlNT\n68W1tuzRfbf8K3zWwRttXOQAgJt5KqxbuwaRkZEVMbdv38bpU6dwdUJbmBgZoX0TO8RuOY+jR4+i\na9euouSl0Wjww4aNuP50R1ibmaCDmx1OZqrwxx9/YPz48RVxsbGxcDYlzGld3j2rtbMtfFfGIiMj\nAwqFoiLu++9WYWpzZwwJcAYAmJsY4+1vvsTkKZWvKV61Yjleb2kLN5vy61JvFaixefPmSl2aLl++\njIxbN/HJyFYQBAHtXOVos+k0zp8/j5YtWz7ytn73zZdY0M4LXT3KOyulF5Vi7coVenXnaghWLv8K\nz4fawM9eCgC4U1CG9evWISwszMCZMX3ct9gS0fuCIOwG4Azgz38tLTYCMK0ukquvJBIJSlWq8mun\nBAHqslKoy8pEawPHxCeRSFCs/qfTVrFGCxOJRCdGrdGiTEswMSrf0y1Wa0R93wVBgPG9dpTWZuXz\nFKu1OnNKJBKo1JqKz2SpVgu1RgtjY+NKcSYSCVSaf21nWdX5m5hIUKL5Z6+4VIsq5yzVaKDWEiTG\nAjREUNXg9ZBIJCj+V9euIrXue9CYlL++/xwlKNECEgn/jnlcVevSH70GbsCX/pSVlaFdh2hIFS4I\njozGkT9+hp+bCzZtWG/o1Jie1q1bh1nTJuO1MFdkqNT4Kv4ODh49Bn9//0pxo4cPQ+qpQxjha4+9\nqXlINLHFwaPHYWpqKlpus2e+hp3r1+C5IEecyyxCTHoZTsadh43NP+02S0pK0C4iHAFCITo4WWHd\ntUx4RXXCd9+vqzTWxYsX0bFtFKY0c4Lc3AQLTt/CZ199g2HDhlWKW7p0CT7831wM8rFAerEWu2+V\n4dTZc3Bzc6uIISL07dkdws0reNJDjm0pOShy9MTOPfsqWpk+ii1btuD58WMxs2UTFJRp8EVcKnbt\nP4jQ0NBHHqsh+G7VKsyZ8RIG+VkiW6XBzpRSHDt5Cj4+PoZOjT1Aja6z1XPCBltsgfKesx98+CES\nriWiVXgYZrzyCu/ZPua2bt2KTd+vgdTSEi/NeA1BQUE6MWq1Gp99+glOHzsKL7+mmD33DchkMlHz\nIiJ88/XX2L/rTyhdXDH7jTervOFFbm4u5r//Pm4kXkN4myi8PP0VnT1bADh//jwWf/YJSoqLMWLs\neJ12pH/buHEjfv1xE2zkcsx8fQ68vb11YkpKSvDxRx8h/twZBDQLwczXX4dUKtV7W2NiYvD9qhUw\nNTPDlJemo0WLFnqP1RBs2bIFG39YCyuZNWbMnKXzxx+rf7jYMsYYYyKryV1/GGOMMVYDXGwZY4wx\nkXGxZYwxxkTGxZYxlK8wnzblBTjay+Hh6oRvly+vMu7YsWNo4mAHqcQYCpkl1q+vegV6TEwM/H28\nYG9rjSEDnkROTo6Y6QMA1qxZAweZBaQSYzRR2OPkyZNVxn311Zdwd3GC0sEO01+cBvV/blgPlN+k\nfeK4MXCQ28LbvQk2btxY5VgxMTFwsJHBzMQI9taW2L59e5Vxv/zyC5p6ukNpJ8f4UU+J3ggEAA4c\nOICQgKZQyG0w8Ik+Ne5uVV8lJyejU/so2NnI0KpFc8TFPfCGbLUiIyMDT/bpBXtbawT7+zbq5iPV\nxQukGAMw89UZ2L15NZ4PtUFeiQYLT2Rh5Q8bK63ULS0thVJug+eau2BSiDv23MjAK3sv4Wz8pUo3\nrb98+TLaRkbgxZa28LA1w8bLeTD2CMW2nTGi5R8fH4/IsFB80TUI0W72+PrsDay6mIY7OXmVVsn/\n/vvveH78aLwaYQcrUyMsO5uLJ0Y9jfc+nF9pvKcnjMOl/dswKcQWdwvL8MmJLGzZvhNt27atiMnJ\nyYGLUoGRwXaI9rDG4Zt5WBuXieS/Uiutlj558iSe6NYFq7oHwNvWAnMPJ0LWoj2++6HyZUm1KTk5\nGREtQrA42hctnWzw6ekUJEidsftgrGhzGoJarUawvx9aWxejq6cMp24XYnNiKS5dvQZbW1vR5u3Y\nLgo2OdcxqKkNEjKL8eW5XJw+dx6enp6izfm44AVSjD3A1i2/YFSgDEorU/jZS9HHU4rft/xSKebU\nqVMgjRqz2/hCaWmGkUGuCLK3wqZNmyrF7dmzB61dLdHSxQr2FhJMCpEjZs9eaLVaiGXDhg0IVVhj\naIALlJZmeLOtH0pLS3HhwoVKcb/98hP6eknhY2cOpZUpngqwwu+//qwz3ratWzG+mQ0cLCQIUlig\ni5s5/vhjR6WYnTt3wkoioH+AHeRSEzzR1A5yc2Ns27ZNJ25kU0e0b2IHFytzfNDW+757wLVl//79\n6OTugD4+jnCyNMOH7XwRe/QYVCqVqPPWtaSkJOTnZGFIoBxyqQm6edvA0cJY1LaOxcXFOHL8BCY0\nt4Od1AStm8jQwtmK924fgostYwBsbW1xp+Cf7kV3iglyO/tKMS4uLihWa5FZXN7Vp1SjRWqBCkql\nUnesIk1FX+I7hWWwtJDq1eihupRKJf4qKEbZve5Q6UWlKNFoda7Hlds74E7RP0U/raAUNlXsAdnY\nWCOt4J/uRXdVgK2tvFKMq6srCko0KCrTAABUai1yVWq4uLhUirO1tUXyv17bpNxi2FiLe22yra0t\nUvKKob33HtzML4aJibGozUcMwcbGBvnFJcgvKX8PSjVaZOSrKjU8qW2mpqYwMTZGRlH550NLhDsF\nZaLO2SAQkShf5UMz9njYt28fyWVWNCBIQV39FOTm4kRpaWk6cR3bRpGbzJxmRHhRqKM1ebk6kUaj\nqRRTXFxMEWGh1NrTgYYEK8jR1oq+Xb5c1PzLysrIw8mRwpTW9EorL3KVmVPXjtE6campqeSiVFC3\npg40IEhBcmsrio2N1Yn77bffyM7akgYHO1BHXwfy8/KgnJwcnbjmQf7kKjOlIUF25GZtSoF+3jox\neXl5FNzUl/oHutHLET7kaCOjzZs3186G30dpaSl1ahdFXf1caUakD7nb29IXixaJOqehzJj+Mnk5\n2tKwYAcKcrWj4YMHklarFXXOzz/7lJzlMhraTEGtPByoQ9s2VFpaKuqcj4t7tU+nJvI5W8buuXDh\nAn777TdIpVKMHj26UgP/f3vzzTdx8OBBNG3aFEuWLKlyb6m4uBirV6/G3bt30alTpzpppq9SqTBt\n2jRcu3YNHTt2xNtvv11l3N27d/H999+jpKQE/fv3r7JTFlB+rnXHjh2wsbHBuHHjqtxz0Wq1eO21\n13Dq1CmEhobis88+q3IPPj8/H6tXr0Z2djZ69uxZ6QYPYiktLcXq1auRmpqK9u3bi3azCEMjImzZ\nsgVnz56Fn58fRo4cKepRlL/t3r0bsbGxcHFxwbhx4xrcUQN9cQcpxhhjTGS8QIoxxhgzEC62jDHG\nmMi42LI6V1RUhPz8fEOnUaW8vDwUFxfXylgZGRnYs2dPlU0jHhURISsrC2VlZQ8PrkU5OTkoKSl5\nYIxWq0VmZqaolzYx9rjjYsvqjEajwQvPPA17uS2UDg4Y1K9vrRW2msrLy0Ovrp3h7KiAna0NZs54\nBTVZc+Dj5QknRwV6de8GmdQUS5cu1XusxMREhAT6w7OJK+TW1ljx7bd6j1Vd6enpaBvZCq7OSthY\ny/D+e/OqjIuNjYWLUgEvtyZQOthj3759oufG2OOIF0ixOvPF54uwftFH2Nw7GGbGRnhm92V4d+uP\nT79YbOjUMGnsaJScO4QvOvkhr0SNAVsv4JUPFmLs2LGPPNbEiROx6fvV+KSnJ5ysJPjxYiZ+vpSF\nwlKNXrm1Cm2GAbYaTAtzx/WcIvTZEoftu/eiZcuWeo1XHU/26QWjG2cwPkSOHJUGb8WmY9mq79G3\nb9+KmPz8fHh7uGFyiAzhLlY4m1aIL07nIiEpGXK5/AGjM9Zw8QIpZnBHDuzH+KYOsDYzgZmJEZ4N\ndsKxQ/Wjfd7Rw4fxQnNnmBgZwU5qiqd87XE09qBeY/35559o42YFZ5kpBEHAgAB7FJdp9epepFar\ncebCRUxp4Q5BEOArt0QPT4f79j2uLceOH0c/XxmMBAF2UhO0c5Lg6NEjlWISEhJga26CcBcrAEAL\nJ0s4ykxx5coVUXNj7HHExZbVGTcvbxy5W1BxePZoWh6auHsYOKtybu7uOJqaC6D8/Oix9EI00bPP\nq7u7O66kqyq6OV3KKIKpsQBzc/NHHsvExARKOzscv11+IwOVWoMz6flo0qSJXrlVVxNXF1xKLz/E\nr9ESEvIJbm7ulWKcnZ1xN6+oopNQVrEat3OKdDpIMcb4MDKrQ9nZ2YiOag3rskJYSkxwJa8E+w8f\nrRfNyy9evIhuHaMRqrBCVnEpIHfEnoOHYGlp+chjqVQqKGxksJAATazNEH+3CB27dsfOnTv1ym3H\njh0YO2I42rk74HJGHlpFd8HaDRshCDpHqmrNiRMn0LtHNwQ4SJFeVAZXn0D8sWuPTuOCTxcuxPz3\n30WgoyUu3y3E9Ndm4fU5c0XLi7H6jptasHqhqKgIu3fvhlqtRqdOnerVub309HQcOHAAUqkUXbt2\nhZmZmd5jqdVqDB48GDdv3sSkSZMwZcqUGuWWlJSE48ePQ6lUomPHjqIW2r/dvn0bsbGxsLa2Rteu\nXSvdPejfzp07h0uXLsHf3x9hYWGi58VYfcbFljHGGBMZL5BijDHGDISLLWOMMSYyLraswSMiXLx4\nEceOHUNRUVGNx8vIyMDhw4fx119/PTDu2rVrOHLkCHJzc2s8Z3XdvHkThw8fRkZGRp3NyRqnnJwc\nHDlyBElJSYZO5bHAxZY1aBqNBqOGDUX39m3x7ND+aO7fFNevX9d7vO3btyPAxxsvjRqK0KAALP58\nUZVxr73yMtqGh2HqyMEI9PUW/bpYoHxlcPOgADw9fACa+njpvfqZsYc5dOgQ/Lw8MWlYf4SHNMNb\nc+cYOqV6jxdIsQZt5cqVWD5vLn7t2wxSE2N8cfoG9sMBMfsfvWGFSqWCq9IRG3sFIdLFFil5xej6\n0xkcPH4STZs2rYiLiYnBlDEjsGtgC9iaS/DTldtYcCUPl66Ltwdw8eJFREdFYkFnJzhYSHAxvQgL\njmcjLT2D7zPKahURwdXJEU8HmKOVqxVyVWrM2n8XP/6+A23btjV0egbHC6RYo3T50kV0d5FBamIM\nAHtL/C8AABEmSURBVOjno9C7w1FaWhqkJkaIdLEFALhbS9HcSY5r165Virty5QqiXeWwNZcAAJ70\nVeJq8g1RG/UnJCTATyGDg0X5nEEKCxhBi7t374o2J2uciouLkZGVjXCX8mvQbcxNEKSw4M5hD8HF\nljVozZqHYMfNPBSUlt9558erdxEUFKTXWM7OzijREg7ezAIAXM8pRFxaNvz9/SvFBQcHY8/NLGQW\nl96bMw2BPt4wMhLvx83f3x9X0/Nxp6B8znNphYCRMZRKpWhzssZJKpXCyVGBo38VACjvHHbhbiGC\ng4MNnFk9R0SifJUPzZhhaTQamjh2DCmsraipkwP5e3lScnKy3uPFxMSQg83/27vzuCrLvI/j3x8c\nFZBFRMAFTDFNy9RMeyI1HaXS9tK0vawpW2amfeYpW2216TVTmVP5TOs0Zfs8zdRTmWmrWam4ZxI6\nLkioKRxUQOGaPziPUUmJcnmDfN6vFy/l5jrX+R4O+uW+7+vcJ9Ed3CHNtYqPc1Mef2yX42656UaX\nHB/nenRIcxnpqS43N3eP73N3TX5kkktsGeu6tG3tUloluvfff9/7faJpmj17tktLSXZZbVu7xLhY\nd89ddwYdqcGIdN9POpFztmgSVqxYodLSUnXr1m2vrgwlScXFxcrPz1dGRoZSU1NrHbd27Vpt2LBB\nXbt2VVxc3F7d5+4qKipSQUGBsrKylJiYuE/uE01TaWmp8vLylJ6ernbt2gUdp8HgClIAAHjGAikA\nAAJC2QIA4Nmu38YDTcL69es1bdo0hUIhjRgxQgkJCXs8l3NO7733ntasWaP+/furZ8+e9Zh075SX\nl+utt95SOBzWkCFD1LFjx12OKygo0PTp0xUbG6sTTjhBsbGxuxw3b9485ebmqnPnzvvsHXiC8Mkn\nn+iZZ55RSkqKxo8fr/j4+KAj7bRgwQLNmTNHmZmZGjZs2H77HGA/sqtVU/XxIVYjN2h5eXmubWqK\nG9Al3R3ROc116dTRFRUV7dFcVVVV7oKzz3Ld27VxZ/bOcmlJCe5vzz5bz4n3zNatW92Rhx/msju3\nd6MO7exSWyW5zz777Cfj5s+f79JbJ7vTenZyR3fp4Poc0sMVFxf/ZNzkRya51KR4l9O9rctsk+R+\nc/m4ffEw9rnHHnvMtYiOctkZCS4rOcalJMa7TZs2BR3LOefcU08+6VISq5+DTmmt3Njzz3VVVVVB\nxwKcc6xGxo+MOvVkxa/6XKd3r34/2b/O36isoaP14KRH6jzXzJkzddlZo/TBqMMUG4rW0o2lOubV\nudpUElZ0dHR9R6+Thx56SNMefUB/H36wzEyvLFunx9dJs+fN/8G4YwYP1EnNN+vCnhlyzunS6V+p\n5xkX65Zbb905prS0VG3TUvXnnPZKj2+urdsrdfX0Qr39/ofq06fPvn5oXiXHx+rSPq01oGOiqpzT\n7TPWqPuQEzV16tRAc1VUVKh1qyTdP7SdMhJbqHxHla6d8a2m/uNNDRgwINBsgMQCKfzI2jWrdWDy\n95fxOzAxpLWrV+3RXOvWrdPBbRJ3XqWpe+uWqqqqUjgcrpese6Ng7RodlhKz8zBj3/QkFRYW/mTc\nuoJ16ptW/VIZM9PhKXFat2b1D8Zs3LhRLVs0U3p89fctrlm0MpPjtG7dOs+PYt8rK69Qt5Tqw+hR\nZureJkZrfvT9CEJJSYmiTMpIrH75VotQlA5IjlVBQUHAyYCfR9k2UYOH5uhf+VtVtqNK4fJKvbOq\nTIOH5ezRXP369dNHq9ZrTmGxnHN6bP5qdeqYqaSkpHpOXXeDjh6sF/K+05rwNm2vrNKfc1drwMCB\nPxk3cPBgPTR/rcp3VOnbLeV65usNGvSroT8Y06FDB7WIa6n38qsf56Kirfpmwxb17t17Xz2cfaZt\neppeWrxBlVVORVu26938Yg0fPiLoWEpJSVF6WpreXL5Zzjkt27BNSwrD6tevX9DRgJ+3q2PL9fEh\nztk2aGVlZe7sMWe4ZqFo17xZyF31mytdZWXlHs/32muvudaJCa55KOR69TjILV++vB7T7p2J997j\nYls0d81DITd82K92ee4xHA67U08Y4ZqHQi6meTN36/jxuzwPuHDhQte18wGuWSjapbZu5d599919\n8RD2uby8PNc2pZWLMrlokzvlpBODjrTTsmXL3MHdDnTNQtGudVKie+ONN4KOBOwkztliVyoqKhQV\nFaVQaO8XpjvnVFZWVusq3iBVVlZq+/btiomJ+dlx5eXlCoVCv3iueevWrYqNjd3vV8Fu3rxZ8fHx\n9fLzUd+2bdummJiY/f45QOPCFaQAAPCMBVIAAASEsgUAwDPKtglbvHixbr3lFk2YMEErV64MOo43\nS5cu1fDjjtOAo47SlClTgo4DoAmibJuo2bNna8iAbIXfeV6FbzytIw/vq+XLlwcdq94tXbpUR/Tp\nrdarF2lQ1be6/rdX6sYbbww6FoAmhgVSTdRJx+boOFeo83tmSJImzs7Xhh4D9fgTTwacrH4dd+yx\nSlmzWI8dd6gkacaqjbro7UXauGVbwMkA7I9YIIUfCJeUqEPC9y+DyYhvodKS4gAT+VEaLlHHxJqP\nM0Y7KisDTASgKaJsm6hTzhitCV+s1qL1Yc0p3KwHctfqlDPGBB2r3p1/4Vg9Om+VZq7aqLxNW3TV\n9MXq1r170LEANDEN75Xq2CeuuuZabdmyRRf89X8UHQrphjvu1ujRo4OOVe/GjRun/Px8jX3kYe2o\nrFS37t014+NPg44FoInhnC0AAPWEc7YAAASEsgUAwDPKFgAAzyhbz3Jzc3XOeefp1NNH6qWXXgo6\nTp1VVlbqj/ffp9OOH64rx12qoqKioCPV2ZIlS3ThOWdr5InH62/PPht0nEbBOaenn3pKI088XmPP\nPUfLli0LOhLQqFG2Hi1ZskRDc3IUlZalDn0H6+rrf68nnmxcF424/JKL9c/HH9apVqioue9r4BH9\nFQ6Hg46127755hsNGXCUslbN1QmVa3XnDddo0sMPBR2rwXvwT3/SfTfdoBOq1uqAf3+po7OP1IoV\nK4KOBTRarEb26Lrrr9fKkkqNuvw6SdKSL2fpH4/crYXzcwNOtnvKy8uVlJCgby45WgnNq18ldtqb\ni3XFvQ9q5MiRAafbPbfddqu+e/PvuntQV0nSnMJijZu1Vl+vXBVwsoatS0Z7PTukkw5NTZQk/f7D\nr5Vx2kW6+eabA04GNGysRg6Aq3KKqvEm5NGhkBrTLyDVWZ2ia7w5dyjKGtdjqHKKrvFT3qyR5Q+K\nk374vBvfN2BvcFELj84//zwNHZajpJRUJbZO0SuTJ+oP114ddKzdFhMTozNOP13nvfOJLjskXZ9/\nG9bXpTuUk5MTdLTddvY552jQpIeVGd9CGQkxuvPL1brkd9cGHavB+/Vll+vSRx/W+H6ZWh0u04t5\nG/TpmWcGHQtotDiM7NmsWbN078T7tW3bNp05ZrQuGjtWZj85wtBgVVRU6O4Jd+jTD2aqfWam7pr4\nR2VmZgYdq07mzp2re26/VaUlJTpl9BhddvkVjeo5CIJzTn+Z/IjeePklJbZqpfF33Kk+ffoEHQto\n8Go7jEzZAgBQTzhnCwBAQChbAAA8o2wBAPCMsgUauKKiIvXqcZCSYpsrPTlRzz333F7N9+qrr+qI\n3j3V66Cuuvfuu1RVVVVPSQHUhgVSQAPXOaOd2lqpzurZRt9sKtfjX36rGR99rOzs7DrPNX36dJ07\n6nRNHtJVyTEhXffxCo254ir94cabPCQHmh4WSAGNUEVFhVYVFOra7PbqlByjYVlJ6te+paZMmbJH\n870y9Xld1audcjq10eFtW+m+ozrp5ef3bk8ZwC+jbIEGLBQKyUwqKa+UVP36181llYqPj9+j+WJb\nxmt92Y6dn6/fWqHY2Lh6yQqgdhxGBhq44ccco7mzPtDJ3ZK1/LsyzSsqU97K1UpLS6vzXPn5+Tqq\nfz+NyUpWcotoPbqoUE+/8KJGjBjhITnQ9HAYGWik3p42TeeN+62+rGijUJfDteir5XtUtJKUlZWl\nT7/4UrFDR6q473F6/a23KVpgH2DPFgCAesKeLQAAAaFsAQDwjLIFAMAzyhYAAM8oWwAAPKNsAQDw\njLIFAMAzyhYAAM8oWwAAPKNsAQDwjLIFAMAzyhYAAM8oW/yi8vJy5eXlqbi4OOgoANAoUbb4WfPm\nzVPXTh2Vc9QRymzfVo9Onhx0JABodHiLPdTKOacuHTN086FtNOqgdlpZvFXHvj5f737wkXr16hV0\nPABocHiLPdRZOBzWt+s3aNRB7SRJnZLiNKhjihYsWBBwMgBoXChb1CohIUEt4+L0yZrvJEmbyrbr\ni4LN6tKlS8DJAKBxCQUdAA2Xmem5qS/qnNGj1CM1Scs3lOjCX1+i7OzsoKMBQKPCOVv8osLCQi1c\nuFDt27fXIYccEnQcAGiwajtnS9kCAFBPWCAFAEBAKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyj\nbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADP\nKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDA\nM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA\n8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPQj4nNzOf0wMA0CiYcy7oDAAA7Nc4jAwA\ngGeULQAAnlG2AAB4RtkCnpjZeDNbZGbzzWyumfWv5/kHm9k/d3d7PdzfKWbWvcbnM8ysb33fD7A/\n8roaGWiqzOxIScdL6uOc22FmrSU193BXta1w9LHy8VRJ/5L0lYe5gf0ae7aAH+0kbXDO7ZAk59x3\nzrlCSTKzvmY208y+MLP/M7P0yPYZZvagmc0zswVm1i+yvb+ZfWpmc8zsYzPrurshzCzOzJ4ws88i\ntz8psv0CM3s1cv/LzGxijdtcHNn2mZlNMbNJZpYt6WRJ90f20rMiw0eb2Wwz+8rMBtTHNw7YH1G2\ngB/vSuoYKaHJZna0JJlZSNIkSSOdc/0lPSXpnhq3i3XOHSbpysjXJGmppIHOucMl3Sbp3jrkGC9p\nunPuSElDJT1gZrGRr/WWdIakXpLGmFkHM2sn6WZJR0gaIKm7JOecmyXpDUk3OOf6OufyI3NEO+f+\nS9I1km6vQy6gSeEwMuCBc25L5HzmIFWX3FQz+29JcyT1lDTNqq/6EiWpoMZNX4jc/iMzSzCzREmJ\nkp6N7NE61e3f7bGSTjKzGyKfN5fUMfL36c65Ukkys8WSDpCUKmmmc644sv1lST+3J/1a5M85kdsD\n2AXKFvDEVV8x5kNJH5rZQknnS5oraZFzrrZDrj8+1+ok3Snpfefc6WZ2gKQZdYhhqt6LXv6DjdXn\nlMtrbKrS9/8f1OXSb/8/R6X4/wSoFYeRAQ/MrJuZHVhjUx9J/5a0TFJqpOxkZiEzO7jGuDGR7QMl\nFTvnwpKSJK2NfH1sHaO8I+l3NXL1+YXxX0g62sySIoe8R9b4WljVe9m14fqsQC0oW8CPeEnPRF76\nkyuph6TbnXPbJY2SNDGyfZ6k7Bq3KzOzuZL+IumiyLb7Jd1nZnNU93+zd0pqFllwtUjShFrGOUly\nzhWo+hzy55I+krRCUnFkzFRJN0QWWmVp13vhAHaBayMDDYSZzZB0nXNubsA5WkbOOUdLel3SE865\n/w0yE9DYsWcLNBwN5Tff281snqSFkvIpWmDvsWcLAIBn7NkCAOAZZQsAgGeULQAAnlG2AAB4RtkC\nAOAZZQsAgGf/AckQihPvOEbCAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plotting the dataset as is.\n", + "plot(X, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metric Learning\n", + "\n", + "Why is Metric Learning useful? We can, with prior knowledge of which points are supposed to be closer, figure out a better way to understand distances between points. Especially in higher dimensions when Euclidean distances are a poor way to measure distance, this becomes very useful.\n", + "\n", + "Basically, we learn this distance: $D(x,y)=\\sqrt{(x-y)\\,M^{-1}(x-y)}$.\n", + "And we learn this distance by learning a Matrix $M$, based on certain constraints.\n", + "\n", + "Some good reading material for the same can be found [here](https://arxiv.org/pdf/1306.6709.pdf). It serves as a good literature review of Metric Learning. \n", + "\n", + "We will briefly explain the metric-learning algorithms implemented by metric-learn, before providing some examples for it's usage, and also discuss how to go about doing manual constraints.\n", + "\n", + "Metric-learn can be easily integrated with your other machine learning pipelines, and follows (for the most part) scikit-learn conventions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Large Margin Nearest Neighbour\n", + "\n", + "LMNN is a metric learning algorithm primarily designed for k-nearest neighbor classification. The algorithm is based on semidefinite programming, a sub-class of convex programming (as most Metric Learning algorithms are).\n", + "\n", + "The main intuition behind LMNN is to learn a pseudometric under which all data instances in the training set are surrounded by at least k instances that share the same class label. If this is achieved, the leave-one-out error (a special case of cross validation) is minimized. \n", + "\n", + "You can find the paper [here](http://jmlr.csail.mit.edu/papers/volume10/weinberger09a/weinberger09a.pdf)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Fit and then transform!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# setting up LMNN\n", + "lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)\n", + "\n", + "# fit the data!\n", + "lmnn.fit(X, Y)\n", + "\n", + "# transform our input space\n", + "X_lmnn = lmnn.transform()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So what have we learned? The matrix $M$ we talked about before.\n", + "Let's see what it looks like." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 2.49193844, 0.35638993, -0.39984418, -0.77608969],\n", + " [ 0.35638993, 1.68815388, -0.90376817, -0.07406329],\n", + " [-0.39984418, -0.90376817, 2.37468946, 2.18784107],\n", + " [-0.77608969, -0.07406329, 2.18784107, 2.94523937]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lmnn.metric()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let us plot the transformed space - this tells us what the original space looks like after being transformed with the new learned metric." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd4jecbwPHvm71lICRCiIhN7B17qz1rr6Kqeymqdu0a\nVbtWaa0qWntTo2aIGUFIJGTvnJy8vz+ifm2RhJyTk3F/rst1Ocn7PM99EPd5tqKqKkIIIYTQHyND\nByCEEELkdZJshRBCCD2TZCuEEELomSRbIYQQQs8k2QohhBB6JslWCCGE0DMTfVWsKIrsKRJCCJHv\nqKqq/Pdreku2zxrUZ/VCCCFEjqIoL+RZQIaRhRBCCL2TZCuEEELomSRbIYQQQs8k2QohhBB6JslW\nCCGE0DNJtkIIIYSeSbIVQggh9EySrRBCCKFnkmyFEEIIPZNkK4QQQuiZJFshhBBCzyTZCiGEEHom\nyVYIIYTQM0m2QgghhJ5JshVCCCH0TJKtEEIIoWeSbIUQQgg9k2QrhBBC6JkkWyGEEELPJNkKIYQQ\neibJVgghhNAzSbZCCCGEnkmyFUIIIfRMkq0QQgihZ5JshRBCCD2TZCuEEELomSRbIYQQQs8k2Qoh\nhBB6JslWCCGE0DNJtkIYmFarZdbs2bRt34EhQ4fx8OFDQ4ckhNAxSbZCGNiY9z9gzcbNeDV+iwgs\nqFuvPuHh4YYOSwihQ4qqqvqpWFFUfdUtRF6h1WqxsrZm0d6/sLGzB+C7T4YyelBf+vbta+DohBCv\nS1EUVFVV/vt16dkKkQMoyv9/No2MjJAPqkLkLdKzFcLAhr8zgjOXrtKyz1Du3fDl1M5fuHzpIgUL\nFjR0aEKI1yQ9WyFyqO8XL6JHx7Zc2PUTFvFhnDp5QhKtEHmM9GyFEEIIHZGerRBCCGEgkmyFEEII\nPZNkK4QQQuiZJFshhBBCzyTZCiGEEHomyVYIIYTQM0m2QgghhJ5JshVCCCH0TJKtEEIIoWeSbIUQ\nQgg9MzF0AELkNrt37+bs2bOULFmSvn37YmIiP0ZCiPTJ2chCvIaJE79h1dp11GjWjtuXzlK8SCF2\n7vgVIyMZJBJCvPpsZEm2QmRSbGwshQo7M++3ExRwKkSKRsOEt9uwZuUyGjVqZOjwhBA5gFxEIEQW\nxcTEYG5hgZ1j2vV3JqamFCzqQlRUlIEjE0LkdJJshcikIkWK4O7uzpYls4h4EsLJP7Zz78Y1ateu\nbejQhBA5nAwjC/EagoKCGDhoCOcv/EXx4iVYsWwp1atXN3RYQogcQuZshRBCCD2TOVshhBDCQCTZ\nCiGEEHomu/GFyIXOnTvH+vUbMDE1YfiwYXh5eRk6JCFEOqRnK0Quc/jwYVq1actjrTkBURrqNWiA\nn5+focMSQqRDFkgJkcu0aNWaMo3a0aBtZwB2rFyIbXIky5ctNXBkQghZICVEHhEfH4+tvcPz17YO\nTsQnxBswIiFERmTOVohcpk/vXsz+biqW1jYkJSawc9UCViz9wdBhCSHSIclW5BoPHjzA398fDw8P\nihcvbuhwDGbUyJEkJyezat5ETExMmDtrJu3btzd0WEKIdMicrcgVVq1ezccff4KbRxkC/W8xZ85s\nBg8aZOiwhBDiX+QEKZFrhYaG4unlxYRVv+Li7kHw/bt8M6gjt27cwNnZ2dDhCSHEc7JASuRa9+/f\nx9nFDRd3DwCKlihFEdfi3L9/38CRCSFE5kiyFTmeh4cHocEPuXP1IgD+1y4RGvwQDw8PA0cmhBCZ\nIwukRI7n6OjImtWrGTBoAHb2jkRHhrNm9WqcnJwMHZoQQmSKzNmKXCMmJobAwEDc3NywtbU1dDhC\nCPECWSAlhBBC6JkskBL5mqqqfDtzJpWqVKVm7Trs2LHD0CEJIfIRmbMV+cLMWbNYsWYDfT+dRFx0\nJEOGvUOBAgVo3LixoUMTQuQDMows8oUq3tXo8v4EylSpAcDu9cuwiX/KD0u+N3BkQoi8RIaRRb5m\nYWFBTGTE89exEeFYWVoaMCIhRH4iw8giXxj/1VgGDB5C0D1/4qMjObHrF+b8+aehwxJC5BPSsxX5\nQvv27fl16xYcUqIo7WTJmT//lEMxhBDZRuZshRBCCB2ROVshhBDCQCTZCiGEEHomyVYIIYTQM0m2\nIl/QarU8evSIxMREQ4cihMiHJNmKPO/ixYuUcC9JZe9qFCrszLr16w0dkhAin5HVyCJPS01Nxb1k\nKTqO+IR6rTvx0P8m00f04vSpk5QpU8bQ4Qkh8hhZjSzypadPnxIVFUW91p0AKObhRdmqNbly5YqB\nIxNC5CeSbEWe5ujoCEDAdV8A4mKiCLhxlRIlShgyLCFEPiPHNYo8zcTEhFUrVzBseD9KV6zKgzs3\n6NunDzVr1jR0aEKIfETmbEW+cO/ePa5cuYKbmxve3t6GDkcIkUe9as5Wkq3Qq5s3bzJk2HBu37pF\n+QoVWLViOSVLljR0WAYVFRXFtm3bSEpKol27dri5uRk6JCGEjkiyFdkuNjaWcuUr0Lz3MKo3bsmp\nPb9y9o+t+F31xczMzNDhGcSTJ0+oXacuhd1LY2Vjx5U/j3Bw/36qVKli6NCEEDogq5FFtvP19cXa\n3pGWvQbiVMSFDgNHkZyi5fbt24YOzWBmzZ6NZ40GvD97BcMmzqXTsI/49PMvDR2WEELPJNkKvbGz\nsyPiaSjJiQk8DX7EzjVLePI4mOTkZEOHZjAhoU9wK132+Ws3z3I8eRJqwIiEENlBkq3Qm/Lly9Os\naVMmDe7MFz1bEnj7Bt4Nm9G2XXsePHhg6PAMokWzphzcvIanwY+Ii4li56qFNGva1NBhCSH0TOZs\nhV6lpqZSp159yjZoRZu3hwLwy+JvKWySwtIflhg4uuynqipTpk7l229nkpKioWev3iz7YQnm5uaG\nDk0IoQMyZysMwsjICHNzc1xLeT7/mkvJMjx5+tQg8Wg0Gi5cuICvry+pqanZ3r6iKIwfN46Y6CgS\n4uNZs3qVJFoh8gE51ELoxaNHjzh//jyFCxemdauWbFy5gGIeZdAkJ7Nn3Q988cmHOmlHVVX27t3L\n/fv3qVatWrqHVTx9+pRmLVoSGROLJjmZcl5l2PXbDiwtLXUSy+tQlBc++Aoh8jDp2RqYr68vnTp3\noVHjJsycNcsgvS1dO3z4MJUqV2HKnAV069mH6zdu0rxRPcb2bMmkgW/Rv09Phg4ZkuV2VFVl8NBh\njBzzIVv2HaVth7dYvPj7Vz7/yWef41rOm+m/HGTm1iPEY8q3M2dmOQ4hhMiI9GwNKCAggMZNm9Jh\n8BjKFi/J6uXzCQsL59sZ0w0dWpb0HzCQdyZ/R+W6PiQnJjBpUCcWzJnJ/HnzdNrO2bNn2XfgIFM3\n7sPc0pLQRw/4tGdLBg8e9NLeqp+fH62HfoyiKBibmODdqCXXrp7WaUxCCPEykmwNaNu2bdRo2pZW\nvQYBULREKSYP7pSrk21qaiqPHgayfu5knga/Q6nylXEp5cn9+/d13lZISAiu7h6YP0ushV2LY2ll\nRURExEuTbcWKFTl7YBdlq9VGm5LC+cN/0K5JfZ3HJYQQ/yXDyAZkbGxMyj/2nGqSkzE2MjZgRFn3\n5MkTzC0t6TbiIxb+cYYKtRpw4fghqlatqvO2qlWrxp2rl7h29iSpWi37Nv2Ivb09RYoUeenzs2d+\nS+ida3zWxYdPOjXA3tyIzz/7TOdxCSHEf8nWHwMKCgrCu1p1GnbshXPxUvy+9nuGDezP2C9z74lC\nu3btYsL02Xy6aD2QNq86slkV/K764urqqvP2Dh48SL/+Awh5HEyFSpXZ8svP6V4Kn5KSwo0bNzAx\nMcHLy0sWKgkhdOpVW39kGNmAXFxcOP3nKaZNn0HwxWOM/+IzBg0caOiwssTR0ZGnwQ/RJCdhamZO\n5NNQNMnJ2NnZ6aW9Zs2aEfToISkpKZiYZPzP2cTEhIoVK+olFiGEeBXp2QqdUlWVbj164nfbn9JV\nanLhyF7eHTE8V/fWhRAis+TWH6EzWq2W06dPEx8fT+3atV/otaampvLLL79w//59qlevTvPmzQ0U\nqRBCZC9JtkInkpKSaN22HfcePMTW3oGIkCCOHD6Eh4eHoUMTQgiDk+MahU4sWrSIWK3ClI17+XLZ\nZny69efd0WMMHZbO3bp1i4GDh9C5azfWrl2LfHAUQmSFJFvxWu7436VczQYYGadtUapUpxH+d+8a\nOCrdun//PvUbNiTRqiCu1X0Y/80U5s2fb+iwhBC5mCRb8VpqVK/G2X2/ER8bQ2pqKkd/3US1arrf\nQ2tI69evp3rTtnQc8h7123RmxNSFfLdgoaHDEkLkYrL1R2TazZs3MTY2xqNYUca0rYWFpRUepUqx\ndtdOQ4emU6mpqRgb//9Hw8TUFK1Wa8CIhBC5nSRbkSlbt25l2DsjqFynIYH+d2nevAUL5s/Dzc0N\nI6O8NUDSs2dP5tWrh7ObOwVd3Nj+w2yGDxtq6LCEELmYrEYWGVJVFaeCBfl4wVpKla+CJjmJbwa8\nxaJ5s2nTps1Ly1y7do2tW7diYWFBv379KFq0aDZHnTUXL15k4qTJREVF0blTR8a8956cNiWEyJCs\nRhZvTKPREBMdjXvZSgCYmplTwqsiQUFBL33+xIkTNGzkwzn/IP44dZ7SnmVo36Ejt2/fzs6ws8Tb\n25sd27dx5NBB3h8zRhKtECJLpGcrMqVajZqUrdec9gNHEXjnBjPffZujhw+99OhDnyZNqdi8Mw3a\ndgZg/dzJBFy/QtjDe1y+dPGVFwUIIURuJz1bkSXbtmzG78Q+BtXzZOqw7ixa8N0rzxiOjo6msKvb\n89fOxYpTpHhJvKrVYdeuXS8tk5qayuQpUyhV2pMy5cqzfMUKvbwPIYQwBFkgJf4lLCyMGTO+Jehx\nMI0aNmT4sGEoioK7uzuXLpwnPj4eS0vLdIdVO3bowOZFMxgyfhbxsdHsWreMfh9P4PQf2zE2fvkV\ngnPnzWPdps0Mn7qYpIR4Jox/n4JOTnTu3Flfb1UIIbKNDCOL52JjY6lWoybulWtSslxlDm9dx1tt\nWjF71szXqker1fL5F1+yYsUKUrRaajZvSwF7J84f2MmlixdwcnJ6oUzd+g1o2m80FWs3AODw9o3E\nBVxhw7p1OnlvQgiRHWQYWWRo9+7d2BYqyqAvp9G4Uy8+XrCWhQsXvPYeU2NjY2bPmkl42FNmTJuK\nnZqEs5mW03+eemmiBbCxsSH8yePnr8NDgrGz1c+1fEIIkd1kGFk8p9FosLC0ev7a3MIKVVXRarWv\nHP5Nj5GREaNHj2b06NEZPjtxwng6vNWRx/fvkpQQz7kDO/nz5MnXblMIIXIiGUYWz4WGhlKpchVa\nvT2ckhWqsGf9MtydHfl540/Z0v7+/fvZtGkTzs7ODB8+HHd39zeqJyYmhm8mTebmrVtU867Kl198\ngYWFhW6DFUKIl5BhZJGhwoULc+zoESLuXGb3D99S37sia1avypa2vxo3nu49enLk5GlW/7iGyMjI\nN6pHo9HQtHkLzt+6R6n6rdlz7DRdunWXW3uEEAYlPVthcIcPH6bfwMGMX70DOwdHTvy+nQPrl3Dz\nut9r13XmzBl69RvIlI17MTIyIkWTzPvt6nDxr3OUKFFCD9ELIcT/Sc9W5Fh+fn5UqN0AOwdHAOq0\naM+dWzdJTU197br+nl/+e2uSkZExxsbGcpGAEMKgJNlms6SkJMLDw/P8sGZ4eDjvjn6PVm3a8uXY\nr0hMTHzls2XLluXa2ZPERqcNHZ879AelSnu+0QUH1atXx8LUmPWzv+bKn0dZ9s1HlC9XjpIlS77x\nexFCiKySZJuN5sydi72DA8Xd3alWoyaPHj0ydEiZ9vfeWbcS7nh6lWXt2rWvfDYpKYnGTZtxOzSK\nii27cuTsxXTnTZs1a0afnj34vGsTJvZrx8/zJ7Pppw1vFKe5uTlHDx/C1caUk7+soEopN3bv/E3O\nNhZCGJTM2WaTw4cP83b/gYxdthlH56JsWTKbyAA/Dh3Yb+jQXmnLli38sGw5xsbGODo44HvzDgO/\n+pa46Ei+HzuaNatW0Lp16xfKHTt2jKGj3mPi2l0oikKKJpkxbWpx9cplXF1dX9leQEAAoaGhlCtX\nDjs72WMrhMh9ZM7WwM6cOUONpm1wKuKCoii0eXso586eNXRYr7RlyxbeHfMBlVp2pYxPO3bu2kX1\nZu1wLVmaMlVq0OrtYfy647dM1pb27y6jD18lS5akdu3aOk20Dx8+pHuPXlSrUZOhw98hKipKZ3UL\nIURmSbLNJm5ubvhfvUiKRgPAjYtncUmnl2doS5evoPeH46nVrC31WnWkzwfjOHfoj+ffD3/8CDs7\n25eWrV27NmZGsHbmeM4f3c+Sce9Ru3btdHu1+hAbG0sjn8bg6ELH0V9xNzSKDh075fn5ciFEziMn\nSGWTXr16sfHnX/i6Xzuci5Xg1uW/2PHrdp22odFoWLJkCX7Xb1CxQnlGjBiBicmb/RUbGxuTokl+\n/jpFk0zg7Rv8suhb4mKi8D1xkB/Onnlp2b/nTceNn8Cl3zfRsFo1Jn49IdvnTc+cOYOlvSNd3vkI\ngNIVvXmvdQ0ePXpEsWLFsjUWIUT+Jsk2mxgbG/Pbr9s5cuQI4eHh1KmzXKf/4auqSrcePXkQEkbl\nBs1Yvm4TR44dZ/PPm94oyX0w5j3e7j+ApIQEUjTJ7FjxHePGfsGtW7eoXK4kP84/h4uLyyvLOzk5\nseT7xVl5S1lmampKYnwcqampGBkZoUlORqNJxszMzKBxCSHyH1kglUf4+fnRtEVLZm0/hompGclJ\niXzSsQGnThzH09Pzjeo8cOAAy1esxMjYiBRNCgcPHcKpsDPx0VHs27uHChUq6Phd6JZGo6GhT2NM\nCxSifK0GnN77K5U8S7F+3atXUgshRFbIAqk8Lj4+HisbO0xM03ptpmbmWNnYkpCQ8MZ1Nm/enJ83\nbaRPr16cv+zLrG1HmbRhD+2GvE/f/gN0FbremJqacnD/PprWrkrs3csM7NmVH7Pp+EkhhPgn6dnm\nEYmJiVSu6k1ln9bUbNqWM/t3cuP0YS5dOJ/lYdOZM2dy7Ko/b384HoD4mGjea1OL+LhYXYQuhBB5\nhvRs8zgLCwsOHdiPJuQeKyaMQY0I4sC+vTqZnyxatCjnDu0hPiYagD/376SMl1eW6xVCiPxCFkjl\nIcWKFdP5CmeA3/fswc7BkQ87NsS+YGHCQoIYNmQwADdu3CAgIIDy5cvLQf9CCPEKMowsMuRdvQad\n3huPQ6HCxEZF4n/1IomB16lYvgKz583D3bMcATeusnjRQnr37m3ocIUQwmBeNYwsyTaPSE1NZf36\n9Vy+cgWvMmUYPHjwG++x/a++/fsTkWrO2x9NQJuSwoJPh+NTsyqr165l8vrfcSjkTOCdG0wZ2o2g\nRw+xsbHRSbtCCJHbyJxtLhUVFUV0dHSGz70zYiTT5swnKNmURSvX0K1HT52dlDRvzhweXjvPF92a\n8EmnBhSytaBxYx9KlPbCoZAzAG6ly2Jta0dISIhO2hRCiLxE5mxzqMTERHq/3Ze9e/agqipdunTh\nx9WrMDU1feHZhw8fsnnzZubuPIWltQ1t3h7KF92acuXKFapUqZLlWAoVKsRfZ89w/fp1zMzM8PLy\n4uHDh9y76cf9W36UKFOeSycPk6JJkpOZhBDiJSTZ5lATv5lEUEQsPxy6TKo2lQWfDmPmrFl8NXbs\nC8/GxcVhbWuHhZU1kLbH1s7RidhY3W3NMTU1pXLlys9fu7m58cOS7xk2vCeW1tZoUzRs37oVc3Nz\nnbUphBB5hSTb/wgLC2PatOk8ePiIRg3q8e67777RJeZZdfrMGZp0GYipWVryatSpN38e//2lz3p4\neFDAzpZtP8yhfruuXDx2gPjIcJ30atPTo0cP2rdvT0hICK6urnIMohBCvILM2f5DXFwc9Rs0xDcw\nlEIV67Bk9TpGvzdGZ/X7+/vz0UcfM3LUuxw5ciTdZ0uUKMGNi2kH/auqys0Lp3F/xdYaExMT9u/d\nQ0KwP3PH9CPw8kkOHTyQLQuVrKysKFmypCRaIYRIR4arkRVFMQe6Au78oyesquqkDMrlutXIv/76\nKxNnzObzJZuAtJOSRrWsRnRUVJaHR/39/alTrx7123fHxt6RPeuWsXL5Ujp27PjS54ODg2nQsBE2\nToVJ1WrRxMdw8vgxnJycshSHEEII/XnVauTMDCPvAKKA80CSrgPLSTQaDWYWls9fmzzrrWm12izX\nveSHH6jfrjs9R38BgEsJD6ZMm/7KZFu0aFEuXbzAsWPHUBSFxo0bY2VlleU4hBBCZL/MJNtiqqq2\n1nskOUCzZs344MOP+G3VIkpXrs7+Tato3+EtnSS5hIRErOzsn7+2KeBAYmJiumVsbW1p165dltsW\nQghhWJmZsz2lKEolvUeSAzg6OnLi+DE0IQHsXz2f+t4V2aCj69h69ezB3p+Wc/7oPm5d/ot1M8fz\ndp/0T1tKSkrKMCELIYTI+V45Z6soii+gktb79QTukjaMrACqqqqVX1rw/+Vz3Zytvu3atYsp06aT\nkJBAn969+PSTT1660lmr1TJi5CjWrl0DQPfuPVi1coUsQhJCiBzutY9rVBQl3VPlVVW9n0GDkmzf\n0MxZs1i3eTsfzFmJsbEJi74YQWuf+kyZnO6aNCGEEAb22guk/k6miqKsU1W1338qWwf0e2lBkWXH\nT5ykabf+WNnYAtC8x0BObPvRsEEJIYR4Y5mZs63wzxeKohgD1fUTjgBwK1YMf98Lz1/fuXoRV1dX\nA0YkhBAiK9IbRv4SGAtYAvF/fxlIBpapqvpluhXLMPIbCwkJoV79BtgXKYaRsTGPA25z6uQJ3Nzc\ndNrO8uXLuXnzJgMHDqRixYo6rVsIIfKjN75iT1GU6Rkl1leUk2SbBdHR0ezfv5/U1FSaN2+Og4OD\nzurWarW4uhUnJiYWh8LOPAkKZPxXXzFu3DidtSGEEPnRmyyQqpZehaqqXkjv+5Js/+348eNs3/4r\nNjbWjBw5kqJFixoslj59+nDg6HGmb9qHlY0tp/b8yoopX5AQp7uLC4QQIj96k/ts5zz7tRg4AywD\nlj/7/WJ9BKkvgYGBDBg0mBatWjNt+nRSUlKytf1t27bRuWt3gjSmnL31gOo1ahIcHKyz+nfu3Mn4\n8eNZuXIlGo0mw+d9fX2pUq/J8wVYNRq3IikxQWfxCCGE+LdXJltVVZuoqtoECAaqqapaQ1XV6oA3\n8Ci7Asyq8PBw6tVvQKypHVXa9OTnX3czYuSobI1h/NcTGT5pHh0Hj2bA51OoVL8Zy5cv10ndEyZ8\nzegPPsYvJJYFy1fToWMnUlNT0y1Tq1Ytzh/dR3REOAAnft+GhaW1TuIRQgjxoswc1+ilqqrv3y9U\nVb2qKEo5PcakU3v27MG1dDm6jfwEgHLV6zCyeVV+WPI9JibZc8NgQkICDgULP39t51SI2Li4LNd7\n+fJlZnz7LRNWbsWjQhW0KSmMf7sNx48fx8fH56VlQkND6dChA7/v2cuYtrWxKeBAXHQks2fNzHI8\nQgghXi4zW3+uKIqyQlGUxs9+LQeu6DswXVEUBZV/zB0bYB65W9eurJ05nge3rnPp5GEOb11H506d\nslTnosWLady0GW6lyzLzvf4c/e0XjE1MKFikKFFRUS8tc+nSJSpUqsS0eYuwtrOnbLlyjP3sY+4F\n3GX06NFZikcIIcSrZaZrNwgYCbz/7PUxYIneItKx1q1b8/kXX7J58be4l6/C/k2rGDRo8Au92gsX\nLjBoyFDuBQRQuUoV1q35EXd3d53EMG3qFIzGT2D5+PewsbFm9coV1K1b943rCwwMZNy48Uxav5tC\nLm4E37/L+H4dSE5MwO/COQIDA19abvjIkXR79wsadehOqlbLnA8GYmVlRZEiRd44FiGEEBnLsGer\nqmqiqqrzVFXt/OzXPFVVc83p+A4ODvx56iT2JOJ3YDtvd+3I94sX/euZ8PBwWrdpS4OuA5m57Shu\nVerSqk1bnVytB2mXu8+YPo07t25w6cJ5OnTokKX6Hjx4gEuJUhRySdt3W7REKSxtbNi5Zgmd3/mQ\naTNns3r16hfKBT4IpFz1OgAYGRtTukoNAu7dy1IsQgghMvbKnq2iKL+oqtrjHxcS/EtGFxHkJK6u\nrqxa8eoFSefPn8fF3YMG7boA0H7gKA5uWUtgYKDOere65OnpSXBgAHeuXqR0RW+unz9NQmwMC34/\ng5WNLcVKlWHp8iUMGjToX+Vq1arFvk2r6f3BOGIiwzm7bye9Z0430LsQQoj8I71h5L+HjdtnRyCG\n5ODgwJPHQSQnJWJmbkFMZDhxMTHY2dkZOrSXKly4MD+uWsWAgQOwtLYhOjKCms3bPd/Kk6LRYGxs\n/EK5FcuW0v6tjoxoWomUZA0fffwxnV4xd6zRaJj4zST27t9PoYIF+Xb6NCpXzjWfr4QQIkfJzAlS\nQ4Bjqqrefq2Kc9GhFqqq0qtPH65cv0UZ7zpcPLaPfr17MXXKZEOHlq74+HiCgoJ48uQJ7dp3oP2g\n0Vha27B92Vy+X7iAbt26vVBGVVXCw8OxtLTEysrqlXUPf2cE565e563BYwi8c4PfVn7Hhb/+onjx\n4vp8S0IIkatl5bjGb4CGgDtwnrQFUsdVVb2UQblck2wBUlNT+fnnn7l79y7e3t60bdvW0CG9lr/+\n+ov5CxaQlKRhYP++tGvX7o3rUlUVGxtb5u48hZ2DIwArJn1C1xaNGDUqe/coCyFEbvLaV+z9TVXV\nr59VYAkMAz4F5gMvjlPmYkZGRvTu3dvQYbyxGjVqsH7tWp3VZ2JqSlJCHDxLtkkJ8XJ5vRBCvKHM\n9GzHAfUBG+AicIK0nm265w3mtp6t+LfJU6awat0GWvYawqO7t/A9vp+LF87j6Oho6NCEECLHysow\n8gUgBdgNHAX+VFU1KRMNSrLNxVRVZe3atew7cIDCBQvx+eefyX5cIYTIwBsn22eF7Ujr3TYAugOh\nqqo2yKCMJFshhBD5yhvP2SqKUpG0BVI+QA0gEDiu8wiFEEKIPCozZyPPAGyBBUC5Z7cBTdBvWNlr\n48aNeHrU0Ps2AAAgAElEQVSVxaWYG2Pe/4Dk5GRDhySEECIPydQw8htVnEuGkY8cOUL3Xr0ZNXUR\n9gULs3bmOJrUqcncObMNHZoQQohcJktztm/YYK5Ith9/8gkPE43pNOQ9AALv3GDp2FH4375l4MiE\nEELkNq9KtpkZRs7TCtjZEf740fPXT4MfYWtra8CIhBBC5DX5vmcbGhpKjZq1KO1dhwJOhTi6YxMb\n1q2lTZs2hg5NCCFELvPaw8iKouzkJbf9/E1V1bcyaDBXJFuAJ0+esHr1amJjY+nYsSPVq1c3dEhC\nCCFyoTdJtj7pVaiq6tEMGsw1yVYIIYTQBVkgJYQQQuhZVg618ASmA+UBi7+/rqpqKZ1GKIQQQuRR\nmVmNvBpYQtr5yE2AtcB6fQYlhBBC5CWZSbaWqqoeJG3I+b6qqhOBN78sVQghhMhnMhxGBpIURTEC\nbiuKMhp4RNp1e0IIIYTIhMxcsVcTuA7YA5OBAsBMVVVPZ1BOFkgJIYTIV7K8GvnZNXuqqqoxmXxe\nkq0QQoh85Y2Pa1QUpYaiKL7AFcBXUZTLiqLIqQ9CCCFEJmVmGPkK8K6qqsefvW4AfK+qauUMyknP\nVgghRL7yxvtsAe3fiRZAVdUTiqKk6DQ6IYQQIhNUVWX//v3cvHmTChUq0LRpU0OHlCmZ6dnOByyB\njaSdldwTSOTZXltVVS+8opz0bIX4jwsXLnDlyhVKlSpFo0aNDB2OELnOB++N5tefN1CxkDmXQxIY\n9M67TJo6zdBhPffGC6QURTmczrdVVVVf+rFCkq0Q/7ZowQKmfj0enxIFORccSadebzPnuwWGDkuI\nXOP27dvUreHNwhYuWJsZE5WYwuh9j7h+6w4uLi6GDg/IwjCyqqpN9BOSEPlHdHQ0Y7/8nJM9a1Gi\ngCVRSRrqrFvLwKHDqFSpkqHDEyJXePLkCc52VlibGQNQwMIEJxtLwsLCckyyfZXMrEZ2VhRlpaIo\nfzx7XV5RlCH6D02IvOPJkyc4WFlSooAlAAXMTfEsaEdwcLCBIxMi96hQoQJPE1I48SAajVblYEAU\nSRhTunRpQ4eWocwc1/gjsBf4+2PDLeADfQUkRF5UvHhxMDVj0/UgVFXleGA4fk+iqFw53UX9Qoh/\nKFCgALv37GNHsBk9t9zmYLg1e/YfxNLS0tChZSgzc7bnVFWtqSjKRVVVvZ997ZKqqlUzKCdztkL8\nw+XLl+nR6S3uPQzCsYAd6zb9TPPmzQ0dlhC5kqqqKMoLU6MGl5WtP3GKojiRthIZRVHqAFE6jk+I\nPK9KlSrcDLhPfHw8lpaWOfI/CiFyi9z285OZnm01YCFQEbgKFAK6qap6JYNy0rMVQoh8ICIigrNn\nz2JtbU3dunUxNjY2dEgGk6WzkRVFMQG8AAW4qaqqJhNlJNkKIUQed/36dZr6NKSotTGR8Ro8K1Zh\n5x97MTMzM3RoBvHaZyMrilJTUZQiAKqqpgDVganAHEVRHPUWqRBCiFzjnSED6eRuysS6jsxpUpiw\nO76sWLHC0GHlOOmtRl4KJAMoitIImAGsJW2+dpn+QxNCiNwhOjqavr16UKxIIbwrlef48eMZF8oj\n7gXco6qzFQDGRgrlHYzwv3PbwFHlPOklW2NVVcOf/b4nsExV1a2qqo4Hcv6mJiHES2m1WoKCgkhK\nSjJ0KHnG2716EHLhMBNq2dHKPoqO7dvi7+9v6LCyhXf1auwLiEVVVWKTtZwO0VCjZi1Dh5XjpJts\nn83VAjQDDv3je5lZxSyEyGHOnz9PyWIuVC3nRWEnR3755RdDh5TrabVa9u4/wDvejhSxMaOemx01\nXKw5dOhQxoXzgGUrf+SeSWEG737IsN0PaNe9L7169TJ0WDlOeklzI3BUUZSnQALw9xV7pZGtP0Lk\nOikpKXRq15bJ1YvSpUwRroRG03nYEGrWrEnJkiUNHV6uZWRkhLmZKWHxKRS1NUNVVZ4maLGxsTF0\naNnC2dmZcxcuExQUhLW1NQ4ODoYOKUd6ZbJVVXWqoigHgaLAvn8sLTYC3suO4IQQuvP48WM0SYl0\nKVMEgMqF7ajm4oSvr2+GyTYxMZEHDx7g7OxMgQIFsiPcXENRFKZOncY3k7+mias59+MAO2c6depk\n6NCyjZGREcWKFTN0GDlausPBqqqefsnXbukvHCGEvhQsWJAETQrXnsZQoaAt4YnJXA2JTDtKMh2n\nT5+mS4f2WBiphMUmMPe77xgydFg2RZ07jPngQ8qULceRw4eoWdSFYcOG5YojBEX2ydQ+2zeqWPbZ\nCpHjbPzpJ8aMfIcark74hkQyYOhwpn4785XPa7Vaihctwpw6brT1KIx/RBytf73M0dNnKVu2bDZG\nnvOoqsq8OXNYNH8uWq2WwcPfYcLEb3LdyUZCt7JyXKMQIhuoqsrly5eJjo6matWq2NnZ6byN3n36\nUKt2bXx9fSlevDjVqlVL9/knT56QnJhIW4/CAHg4WFOzmBPXrl3L98l23dq1LJ09nXXNvTAzNmL4\nqh+wt7fn/Q8/MnRoIgeSnq0QOYBWq6Vn1878efwoDtbmRGoUDhw+avCEptFoKFLQiV/alKdmUXue\nxCfRaPNFdh86QtWq6d5FolfR0dE8fvyY4sWLY2FhYZAYenTsQMvk+/Qom3Yh2v6AJyx5as6BE6cM\nEo/IGV77BCkhRPZZs2YNt8+fYkHzIkxr4ET7YkYMG9jf0GFhamrKmg0/0WuPH+13+1Hv5/OMGPO+\nQRPtj6tX41a0CK0a1KVkMVdOnTJMcrNzcOB+9P/3Kt+LSaSArMQVryDDyELkALdv3aSig4Kpcdrn\n3xou1vx68o6Bo0rTvn17rly/ybVr13Bzc8PLy8tgsdy+fZvPPnyfg92qUcbRhj13Q+nW8S0eBD/G\nxCR7/zv7/KvxNKxTm8cJGsyMFH6585R9h9dnawwi95BkK0QOUKWqN1vXaGmfrMXK1IjD92OpXLnS\nC88lJSWxYcMGQkJC8PHxoV69etkSX9GiRSlatGi2tJUePz8/qrk4UsYxbQ9r61KF0R69Q0hICK6u\nrtkai6enJ2cvXuKnn34iJSWFUz174unpma0xiNxD5myFyAFUVWX0qBGsW7sWO0tzbB2c2HfoCG5u\nbs+fSUpKoknD+mhC7+NmrXDiUQIz5y1g4KBBBow8e/n6+tKyUQOOd69GYWtzLoRE0WXXVR4/Dcu3\nt8yInCVLV+y9YYOSbEW20Gq13L9/HysrK4oUKWLocLIkODiYmJgYSpYsiamp6b++t2nTJmZ8/h7f\n1HNCURTuRSYy8WQY4VHRBorWMKZOmsSCubPxKmTP9SeRLP9xbb46QELkbLL1R+RJISEhtG3RjMcP\nHxKfnEyPnj1ZsnwlRka6Xft39epVrl27RunSpalevbpO6/6n9IZrIyIiKGpt/Hwfp6utGbHx8aSm\npur8/eZkX02YQLeePQkMDKRcuXLZPnwsxJvIPz+hIk96d9hQ6lsk4de/NlcH1OPCgT9Ys2aNTttY\nsngxzRrUY+OUL+nYshnfTBiv0/ozq3Hjxpx9FMflx3FEJ6Ww6koETRo1zFeJ9m9eXl40b95cEq3I\nNfLfT6nIUy5fvkS/cs4oioKtmQmdSthz6fxfOqs/IiKCLz77lP1dqrK2eRmOdavG9wu+49at7D+1\ntFy5cmz4eTM/+qcyam8QRu7ebNy8NVvavnnzJr26d6VF44bMnTOb1NTUbGn3b+Hh4fTu1gV3lyLU\nq1GN8+fPZ2v7QmSVDCOLXM3Dw4P990PwcrRBo03l8OMYOnbX3UEQISEhFLK1wr1A2uXYBa3MKF2w\nAI8ePcLOzo4//vgDU1NTOnTokC0H9Ldu3Rr/+w/13s4/PXz4kIZ169C2hDm1bE1YMWcaT0JCmD5z\nVrbF0KNzR0rEPOLXVmU4GxxJ2xbNuXTNL0eskBYiM6Rnm46UlBRDhyAysGjZCn64GU6TbZep/fNf\nmJUoy4gRI3RWv7u7O/Fa2HUnBIBTjyK49TQKU1NTvCtWYNfcSfw09StqVK5EaGioztrNSbZt24Z3\nYVO6lHOgdjFbPqrpyNKlP2RYTqvVsmnTJmbOnMnRo0df+kxSUhJDB/bHxtISpwJ2zJo544VnYmJi\nOHX6LLMbelLK3ope5Vyo7WLPsWPHsvzehMgukmxf4vjx47gVL4G5uTlly1fg6tWrhg5JvMIvmzYS\nFhHJvfBoTKztWLJi1QureLPCwsKC7bt28/m5IIovO0a//TdZ//Nm5s6YxvuVCrO6uRebWpejeSFT\nZkydorN2cxJFUVD5/+JKVVUzPGw/NTWVrh07MPmTdzm1Zg69u3Rg7pzZLzw37svPCTx1iKsD6nKg\nSxWWz531woX25ubmqKg8TUhOq1tVeRyb+Py+WFVVmTt7Fm5FC1O0kBPjxn752sPcwcHB3Lt3L9uH\nx0X+Icn2P548eULnLl3p/elk1p4NwKf7INq2a09ycrKhQxP/sW/fPlYumMfF/vW4O6QBXYuaM+jt\n3jpvp3bt2jwIfoz/g0BCwsJp1aoVj4OCqFzQ9vkzVZysCAl6pPO2syo6OpqBffvg6V6cRnVrc+nS\npdeuo2vXrlx6omGzXzinAqOZfS6CUe+OTrfM0aNHuXLuTyY3KMTgKo5MblCYr8aOJSkp6V/PHdjz\nB19Ud8PR0gwPe2veKe/MgT2//+sZMzMzxo4dS/vffJlz9i599vhhXsSNFi1aALB+3ToWzJzKp97W\nTKhtz7Y1S5kzK3ND3Fqtlv69e1GhTGnqelehUd3aREREvMafjhCZI8n2P65cuYJrKU+8GzTFyMiI\nxp16oVXh/v37hg5N/Mdff/1Fe3cHiliboygKwyq5cuHy5QzLJSQksHz5cmbMmMGZM2cy1ZaiKDg5\nOT1f+du4RQvmXw4iJjmF0LgklvqF4tO8ZZbejz706t6Vh2cPMKa8CVXVQFo0bUxQUNBr1eHi4sKf\nZ89hVKEpvpblGPPVJCZNmZpumfDwcIraWWBilNYDLmhlgqmxMTExMc+fOXfuHEkaLdtvPebvPfnX\nIxMpWPjFvdLjv57IjCXLia/TgVYjP2H/kWPPD7H4bdsWOntY4W5vgaudGT3L2PDb9i2Zem/ff7+Y\ngDPH8BtQj+sD6uClCeOj99L/ICHEm5AFUv/h7OxM8IMA4mNjsLKxJTw0mKiIcAoWLGjo0MR/uLu7\nsyM0nmRtKmbGRhwNDKd4BltBEhIS8KlXB4f4cLwKmNPx2+nMXbyEPn36vFbbEydP5Z2gIDyWb8bI\nSOH9MWMYNnx4Vt6OziUkJHDw8BE2dvHAxEihhL05l8NVjhw58trv18PDgzUbNmb6+Tp16nA9NJaz\nj4ypUMiKXXeiKVnSHScnJwAWL1zApAnjqFzYko03IjgYGE7pQvb4xamc+vjjl9bZuXNnOnfu/MLX\n7R2dCAnQPn/9OE6DvZNjpuK8ePYM3Uo5YGVqDEBfL2c+uqC71exC/E2S7X9UrFiRnt27883At/Cq\nWhPf08f4+usJOMhtHjlOz5492fbzRupv/pMS9tZcDonmtz/2pFvm559/xi4ujF/aVkBRFDp7FKLv\nB2NeO/mYmZmxet0GVvy4FkVRcuReV1NTUxRFISZJi4OlCaqqEpmYgpWVld7bdnV1Zceu3xkyoC+P\nzj6gundVdv+yBUVRiI2N5bPPPmN+C1ecbcxIqGzP6L2BdOnSnzUff4y9vf1rtfXluPHUqbmDyKQw\nTI3gxKNEDhx5caHVy3h4leXQ+eP0r6BibKRw4EE4HqXlfGOhe3Jc40uoqsrBgwfx9/encuXK1K1b\n19AhiVdITU3l1KlTREREUKtWLZydndN9ft68edzYsJhZDUsDEJWkwWvVCeITk9Itl1tNnDCetUsX\n0cTVjDvRqcRaF+HkmXMGuwMW4MGDB9SoUoEVbYo9/9qUMxFMXLiKtm3bvlGdDx8+5KeffkKr1dKt\nW7dMXwiQkJBAm+ZNCQ24g625GeGpxhw8foLixYu/URxCyNnIQgCXL1+mhU9DfmxRlrKONnx9JoDY\nYuXYvuv3jAvnQqqqsnnzZo4fOYxr8RKMHj36+SpeQ0lJSaF0yRK0c0mlRakC+IbEM+98BFev38TF\nxcUg8Zw7d46kpCRq1qyJtbV1tscg8g5JtkI8s3PnTj58dyRhEZE0b9aU5T+ufe2hy/xGVVWmTPqG\nlUuXYmykMPrDj/ngo48y3AL0KtevX6drxw7c8g+goKMDGzb9TLNmzd44tjt37pCYmEjZsmV1uvVL\niNclyVaIXERVVdavX8+lC+cpXcaLYcOGZepy9NjYWM6dSxsmrlWrFsbGxjqJZ+F337Fy1lSWNS2D\nJjWVwQduMnbGHAYMHJilepOTk7N0NZ5Go6F7546cOnEcCzMT7As6s//w0QynE4TQF0m2Itc7deoU\nI4cM4lFwMHVq1WLlug159j/VUcOHcWbPb7xVvACHgmOxKFGGHb/vSXeu9d69ezRr2IDCZiqRCcm4\nlPZi974DOpmfbdGwPsOdEmlVshAAW24E87tJMbbs3J3lurNi3ry5bJg/jS/rOGFipLD2agSJrpXp\nP2gozs7O+Pj4vHHvW4g38apkm/OWUIpcTVVVzp07x++//05ISIjO6n348CGd2rXh09JWnO5RnVKR\n9+jSPvOLae7evUsLn4a4OReiZeNGBAQE6Cw2XXv8+DEbf9rAjvaV+LBmKZY08eT86VOUcC3K5XT2\nEb8/8h36utuy961KnOpeDcvQ+3w3f75OYirg4MCD6ITnr+/FJGJnb/gV+r4XL1CzsAmmxkYoioKj\nGRw6dJjvx3/AoJ6d6dOjG/KhX+QEkmyFzqiqyqC+b9OzXSvmfjSSSmXLcOLECZ3UffLkSeq4OvJW\naWcKW5szqZ4Hl69eIyoqKsOyCQkJtGrSmEbqU35vV54GqU9o1aQxiYmJOontb3FxcaxevZqFCxdy\n8+bNLNVjY2GOrZkxk0/dova6k4CKJj6Wjm3bvLLcXX9/mhdPS4DGRgpNXGzxv3XjjeP4p3GTpjDj\nwiPGHr/Np8dusdQvlC/GT9BJ3S+jqiparTbD5ypUrsL5JylotCqqqrLx6lO+9nHl4xr2zG5cmL9O\nHOb33/Pm4jeRu0iyFTqzc+dOLhw9wJ89q7OtTTkWNfLQ2fGJ9vb2PIiKR5ua1ksJik0kVVUztWfU\nz88Pc20S71cvQYkClnxQvQQmmkRu3NBNIoK0YxFrVavK0smfs2fJVOrWqsGRI0feqC53d3ccCzsz\neI8vG68/Ykn7kqzqVJoeFZ0IfRJCXFzcS8tVrVaNdTdCSFVVYpNT2BoQgXeNWll4V/+ou2pVTp37\niyKdBlGi61DOXrxEmTJlMl0+LCyMo0ePZurPfPrUqdhaWWFpYU6Pzh1f+X4B3hvzPoW8vBm9P4gP\nD4USn5yKl5MlAKbGRpR2MOfhw+y9JUmIl5FDLYTO3Lt3j9rOdliapC3KaVzciXu7r2Tq4PqMNG/e\nnCJlytNp11VqFLRke0A4kyZNytTKU1tbW8LiEonXaLEyNSZeoyUsLkGnW2CWLl1KIW0kH9VJO2ms\nSkETPhw9iotX/V76fGJiIgsXLiTA/zZ16jWgX79+z/+MjI2N2XPwMA3q1qGSsxV25mk/pk3cC7D8\nfMgrP2DMX7yEDq1bUm7NaRI1Gjp36cI7OrwBydPTk3Hjxr12uePHj9P5rfa42lkQFBnHgCFDmT33\n/8PboaGhbNiwgcTERCwsLFi9cC6netfEycKMMUcv8sn7Y1iyYuVL6zYzM2PXnn34+fmRlJTEkAH9\n+O1WGJ287AmK0XA+KJapNWu+8XsWQlck2QqdqVatGrMnhfFRTDFcbS1YffUR3hXL62SBirGxMTv3\n7GP9+vU8fPiQpXXqPD+IPiOenp60bNOWjrsO09LVlr2PYmjboQMeHh5ZjutvoSEhFPvH9swSBcwJ\nux1GWFgYH7w7kksXLlDKw4PvlizF1dWVFk180Ib4U97emGm/beHCubPMX7joefmiRYsyb8FCRg/s\nTYImFUtTI849iqW4q8sr/zydnJw4ceYcgYGBWFhYGHzxmEajYfasWUyfMokPaxWiuosNsckF+Hzt\najp07IyPjw9BQUHUrVGNhoUssTc1Yu3VQDztLamx5gRGCnTzKsrhQwfSbUdRFCpUqADAtt920aFN\nKzb/eg8VWLBoEdWqVcuGdytE+mQ1stCpObNmMvHrr7GxMKOAvQO79h2gdOnShg6LhIQEZs2axYMH\n92nYsBH9+vXT6RGL+/btY0CvbnxVtyBOliYsvRxBybqtuOHnR2U1ir5lnTnwIIL192JY8MNSPhzW\nn5k+hTBSFGKTtQzddZ+QJ0+xtf3/TUKqqvLO0MFs37KZQjZmPElIZc/+g1SvXl1ncetTjy6d8T9/\njEuB4Wzr6fX8Q8Lii5F0/3gyQ4cO5fNPPyH28HamN0w78anW2hNYm8HnDV1JSknl68MPMXUswk3/\nzC9oU1WVqKgobGxsMrVdSghdetVqZPmXKHTq408/Y/iIkURGRuLi4qKzfZ5ZERgYSMvGjVAS44lM\nSCQuMpK+ffvqtI2WLVsyYeoMxo/9krj4RN5q35bPvvyKpvXq8MeAOhgpClUK27H3kS+XL1+mgIUp\nRs+Sj6WJEaYmxiQmJv4r2SqKwrKVq3n/o08ICwujcuXK2X74RlxcHEePHkVVVXx8fDI99P748WP2\n7tvLynZuvB8Ry9H70TR2L0BYvIbLIXFMqlQJgMjwMDxt/7nPVqV7xYJYmBhhYWJEey977ti93giE\noihySInIcSTZ5gNJSUncvHkTOzs73N3d9d6era3tv5KGoY0aOpiuRc34rKYXiSlauuz+k5UrVzJc\nx7f0jBw5ipEjRz1/HRISQqJGQ0KKFmtTE7SpKlGJyVSvXp2F81PYfTuSioUs2RMQS+VKlV55s9Tf\nQ6TZLTQ0lAZ1amGljUdRIFax5MTps5kantZqtRgbKRgrCp/Wd2HqsYesvfSEhFQjJn7zDbVr1wag\nXcfOvD/kV2q7OGBvbkqUJpXbYUmUL5Q2L+0flUL5epX0+j6zi1arZea309mxdQsOjo5Mnj6TGjVq\nGDoskU1kGDmPCwgIoFWTxhgnJxAWl0Dnbt34YcWqfLXR37N4MTY1KYmnY9qk6sLz9wit3Iz5Cxdx\n8eJFIiIi8Pb2xtHx39eyxcXFceHCBaysrPD29n6jYefB/fty4/hBupZ04FBQDImFS7Dv8FHu3LnD\nu+8M5f69+9SqXZuFS5a+0H52SE5O5tSpUyQnJ1O3bt1/fUgaMWwoj0/tZHCVtLh+vBKOY83WrFi9\nJsN6VVWlWeNGKME3aeJmwYXHiVyIMuXYqT9fOOT/hyVL+HbKJBKTkmjZug1//PE75ZzMSUxJJRxL\n/jx7Pk9ccfnlZ5+y86dV9CpjzeNYDT/djOX0ufOvtapb5HxyglQ+1bxRAxoRxgfVSxCbnEKHnb58\n8u18evfWzZac3KBDqxZUiXvAZzVLPuvZXqPvZxM4dfQIRw/sxc3eBv+IeHbv2/98MU1AQADNfRri\naJxKRHwSXlW82b7r99c+WlCr1bJs6VIunT+Hh1dZxox5P8MTnf7erxsaGkrTpk1p3Ljxm771dMXG\nxtLMpyGRwQ+wNDUmMtWUYyf/nwxbNfWhRoo/tYulJeBzj2L5kxIcOJq5vdOxsbF88enHnD93ltKe\nZZg17zuKFHnxYvj/evz4Mfv27cPMzIx27drlqFGSrChSyIlv6thT9Nmw+cpLYdTp9yFffvmlgSMT\nuiRztvmU3/XrLOhYEQAbMxNaudrie+VKvkq2i5evpFUTH7b9cpHIhER8mjbHwcGBqycPc7ZXDSxN\njNl8I4jBfftwyS9tH+h77wyjqlUqBcyMcXa257S/H99//z0ffPDBa7VtbGzMyFGjMn7wmYSEBHzq\n1qaoJppyBczou3gBE2fMYuiwYa/VbmZ8O2M6VtEPGetTCEVR+Nkvgo/GjGbLr78BUK+hD7vXXKNq\nEWsUBQ48SKR5n0aZrt/GxoZFS5a+dlxFihShf//+r10upzM1MSEpJfX56+RU5NKEfEQOtcjjynh6\nstP/CQDxGi0HgmMpb6A5QEMpXrw4l/xusGHXHo6cPsdPm7cQEBBAgyK2z/cEN3cvxN37D56XOffX\nX5x/HI6xaTJnQ8K4FRqOn+8Vvce6ZcsW7BMjWd+qHOPqlmZL2wqM/fxTvbR19/YtKjmaPJ9SqFzI\nnLv+d55//8uvxlGqpg8DfrtH/x33KFa1PuO/nvivOlRVZfGihVQqU5pKZUrz/eLFOo3x6tWrTJ06\nlXnz5vH06VOd1p3dPvnsc+b8FcGBu5Fs8A3j4lMtffr0MXRYIptIss3jlq9Zx9LbUTTaeplqP52l\nXL0m9O7dm1kzZ+DuWhR3lyJMmzI5z58fa25ujre3N2XKlEFRFKpUqcKeBxE8jU8GYL1fEJUrlAfS\nhn7Do2KY2MSNDl6OjKlTBFsLIxQT/fdCoqOjKW5j/jwBlihgSXRsvF7+fmrWqcuxoGQSU1LRpqoc\nfJBAzdp1nn/fzMyMTZu3EhwSSnBIKJu3//rCMPqaH3/kuykTmeddkHneBZk/eQJr12Q8p5sZR48e\npVH9uvz103fsXjId78oVdXrednZ7/8OPmLFgCU/c6uFYrxOn/zpvkPt7hWHInG0+EB8fz9WrV7Gz\ns8PLy4tVK1cyZ8KXrGxWBiNFYdjBW4wYO4FR7442dKjZasJXY5k/bx4O1pZY2trx+4FDlCpViuTk\nZKytrNjY1QMz47TPo9NOBjPim/kMzOKVchm5ceMGDWvXYkkTT8oXtGHqufsklqjI1t926bwtrVbL\n4AH92Lp1G6YmRlSt6s2O3X9gZ2eX6To6tGxGd7MwOnmmzcVuv/WYbdpC7NizP8vx1atZjQbmITQo\nnhbPsothVO0ylClTp2W5biH0RW79ycesrKyoVasWZcuWRVEUdm7dzGferlQoaEs5JxvGVi/Gzi2b\nDS5teHIAACAASURBVB1mtps0dRp3HwRy8M+zXL11h1KlSgFpPbr2bVqx6EI4dyMS2ecfiX9M2l5a\nfStbtiwbt25j0o0Ymu+4Cl41WL3+J720ZWxszJr/tXff8VGVWQPHf8+k9x5aEgg1CU0iIBDpIAgi\nVQGRquIqKojL6iuygO6Kq74WcEVUUKRJL0oLvffQuwFCCoSQXieZmfv+kbyUXZCWy0zC+f4DuXPv\nfU7yyeTMfcp5Zs8lLiGRk2fPsXHr9ntKtACubu5cKekdAEjOM+LmXjoTmjIyMqjofv1JuoKLIi31\nzl3JhYWFnDx5kqSkpFKJQ4jSIBOkHkFePj5cTIi79nVcVgFevlWtGJH1+Pv733JZyexfFzB65JtM\n27yJihVDmTt/Ilu2bMHNzY3OnTs/0Ibnd9KhQweOnDqj2/3/k5+f331f++648XRq14bkkoT786kU\nojeVzoeDZ7r3ZM7Cmbz2mB2ZBSZWX8hn+j97/uk158+fp3O7tpjzc0jLzWfwkKF8MXnKI7XUTdgm\n6UZ+BJ08eZI2US3oGeqDQSkWxaayYes26tcvH8UD/tPKlSuJiYkhNDSU/v3733NVqwMHDtClYwea\nV/bmcq4Rg38lNmzdjouLi04RX6dpGlu2bOHSpUs0btyYWrVq6d7mrRiNRsaP+4BtmzdSJTiYf33+\nJaGhoQAcP36cX2b+jFKKQYOHEBERUSptFhUVMXrUW8z/9VdcnJ0YN+GjO87KbhvVnLZ2GYx6vCoZ\nBUV0WXGED6dMo1evXqUSkxB3IutsxU3Onz/P3Llz0TSN/v37l2pRfr3l5OSwbt06zGYzHTp0+NPS\nfOM/GMuvP07jmare7LiSS3DDpixYuuyennSebNqYgT5G+kdUQdM0Xlx7gnbDR/P222+XxrdzW5qm\nMXjAC2zdsIZq3s4cuZTN9Jmz6Nnzz5/u9PBC3+c4t3cTXUNdOZNWyIZLZo6eOPVAT8V6CPDxYudz\nkVRwcwLgo11/4NppABMnTrRyZOJRIetsxU1CQ0MZO3astcO4ZykpKbR8oimV7U04GBRjRpnYumsP\nwcHB/3VuZmYmX3zxBYcGNiPA1QmjyUKLhTvYs2cPzZo1u8Xdb+1SUhJNIoo/jCileNzPhcT4+FL7\nnm5nw4YNbN+whs9aB+Jkb+BsqhPDhgyiR4+sh9otajQaWbRkKXN61sDJ3kDDim7E5qSzfv16+vbt\n+9DiuBs1q1dn1bkUhtYPIt9kZvOlHN6uU8faYZV5mqZx8OBBUlNTiYyMtLkPWWWBTJASZcqH48fR\nztfAsq51Wfh0BP1C3Hl/zDu3PDcrKws3Jwf8XYrHV53sDQR5uZKRkXHHdlJTU9m9ezeJiYk0j4ri\n68MJmCwWLucamfdHGlEtW5bq93UrCQkJVPV0wMm++G1a09eZ3Lx8CgoKdG/7RsWJXVFkud5TVWTW\nbGKTif80fdYcPj92hQ7LjtJ43j7CW7ShX79+D3zfs2fPsnfvXnJyckohyrLFYrEwaEB/unVsy7vD\nBxJWqwb79u2zdlhljiRbUaYkxsXRtML12a5NK3qSePHiLc+tUqUKgRUr8dm+C6TkGVlw6hKnUnPu\nWPx9zZo1hNWozoh+vWgQXocGjR7nsk9VKk/dRMOZOxj4+psPpSs3Pz+fPXGpXMw0omkaK06nUy04\n6KGMFd/I0dGRV14exse7U9lyIZMfD6eSoZzp1KnTQ43jbkRERHD8zB98MWs+qzZtY+bceQ+0laKm\nafzllZdp3rgRA3t2pU7N6hw/fvyB45z1yy+E1QilWlBlxv7Pu5jN5ge+p16WLFnC3s3RfNW+IhOa\n+zA0wo3BAx6dCnSlRbqRRZnSok0bpk/9mo7V/LE3KKYdv0yLXrfeLs9gMLAyej1DB/Rn6q8HqBYS\nzMro9X9a1N5oNPJiv77MeSqM5lV8iM/Kp90nH7N1zz6Cg4NxdHR8aHukLpj1M4PrVuG9dRexaBpe\nTva069pal7auXr3K22+9wYljR4moV58vvp5CQEDAtden/HsqU2qHsXXTBkIjqzJ9/ASbrVns6enJ\nk08+WSr3WrZsGet/W8I3T1XG1cGO6NhMBvbvS8yRY/d9zzVr1jBm5AhGNfbF3dGZ72b9iJOTM3+f\nYJvjyufOnSPC93oPS2QlN77ZH3eHq8R/kmQrrMZkMrFlyxZycnKIioq6q51d3h79V86eOk2NH2YD\nGr16dGfcn/yRCg4OZv3WuyucD8Xb4jkaoHkVn+LrPV1oWMmHM2fOUOchj/0ZjUaerl6Bj1uFk1Nk\nYt6JRM44O5V6O0VFRXRo04pqWirPV3Zm1+FNdGzbmn0HD1+r3WswGBj59tuM1HlSmK05deoUDf3t\ncXUo7jJvHuTOz6tjH+ieSxctpFt112vbCA6O8GD2gl9tNtk2atSIKZ8Z6VlgwtvZnnXnsmlQ79Eq\n+VoapBtZWIXRaKRDm1a8NvB5Jo1+lbphtTl69Ogdr7Ozs2Pa9BlcuHiR1157jUKjkclff4nJZCqV\nuCpUqECRptiekAbA+cw8DiWlPfRECzDwpVd4d+d5tiemsT0hnS8OX+KFwUNLvZ0TJ06QfuUSQxv4\nEB7gytAGPqRfucSJEydKva2yJiIigkNXi8gtLO7m3R6fTVjtB1t+5eHlRWrB9Q0JruYV2WwvAUDH\njh15+fW3eH1NPK+uSWJrmiNz5j96RXAelDzZCquYNm0aBYln+FerAOwMiujYDF57ZRjbd9954kVB\nQQGdO7SjQtFV6vrZ8+u/d3H44EFmz5v/wHE5OTkxb+Ei+vfpTUUPFxLSs5n06WdW2XP0L6+9jsVi\n4ePpP+Lo5Mj3M2fRrl27Um/HwcGBQpMZswb2CswaFJrMuhbuKCueffZZNqzrz+szZ+Ln7kyhwZHo\nDQse6J5vjRxF019mUnjwKm52sD6+gAVLfimliPUxfuKHvDlyFBkZGYSEhDy0oZTyRNbZCqsYPWok\nqZvn0TuieAlBYlYhn8TkEpd0+Y7Xrlu3jpFD+zOppT9KKYwmC0N+u0B84qVS24A9MzOT2NhYgoKC\nCAwMLJV72iqLxcLTT3Ug69wRmgY6sO+KCY/q9Vkdvf6BJheVJ3FxcaSlpREWFlYqE9QSExOZMWMG\nBfl59O7z3LV9lEXZJ7WRbdyBAweYNm0aq1evLvc78AA0axHFjstFZBlNWDSN1eezafrEE3d1rdls\nxtHOcG2tqZ1BYacMpTqj08vLi8jIyDKVaAsLCzl//jy5ubnXjh05coR2LaMIrxnK8JeG3nLpisFg\nYMXK1TwzbBRXglvQddhIVqxcLYn2BlWrVqVRo0alNhO8SpUqjBs3jn9+PEkS7SNCnmxtwLTvpjLh\n/fd4qloA+5OzaNq2IzNmzS7X9Vw1TeO9v41h8uTJONjbUa9eXX5btfauFstnZ2fTsG4ETbwLqe/v\nxLq4PBxD6hK9cXO5/pn9md27d9P9ma4YLCZyjEV8N+172rVvT4O64Txfy4Vavs4s/yMXzzqNWf77\nKmuHK0S5JeUabZTRaMTP24tt/ZpQw9uNfJOZqAUxzFyygqioKGuHp7vc3Fzy8/Px8/O7p0SZmJjI\n8GFD2LFtKwalKLLAx598wpsjR+kYrW0qKioiuHIlXgp35okgD+IyjIzffoWx4yeyYtpnvNO4eGZ1\nkdlC/yWxZOfk4uRU+rOahRDSjWyzMjMzcbK3o4a3GwAu9nbU8fcs05tk3ws3N7dr++0ajca7vq5y\n5cqcPnmKz1vX4cLwNux64QkmTRxPTEyMjtHapsuXL2PMz+WJoOIZrVW9najp78qVK1fILjRfG5bI\nKbRgMBhkcosQViDJ1soCAgIIDKzA1EMXMVs0tieksTcx9Y5VjsoDTdMY8epwIutF0OepdtSvU5tz\n587d1bV5eXkkXL7Mc3UqARDi6ULLYH+OHDmiZ8g2KS4ujtwCI+fSiss4ZhSYOJmUTp8+fTC5+jF5\nfyq/nU7jw51X+duYMTZZZlGI8k4+4lqZUooVa9byfM/ujP1mPYG+Psyev5CQkBBrh6a7hQsXsmPV\ncg692AxPJ3smx8Tx0sABbNqx647Xurq64u3pwdaENFoH+5FRUMS+S+m8UYZ2LyotR48epV5FT8Zv\njifU24mLmUaMJguRkZFs372Xr7/6isT4i/zjr+1LpU6wEOLeSbK1AbVq1eLgsRMUFRVdq9jzKDh2\n7Bidqnjg6VT8a/h0NT8mrziGpmnMnj2bfbt3UbV6DUaMGIGzs/NN1yqlmDN/If169yI8wIuzVzMZ\nOGwYLR/CBgG2JiQkhGyzgU86hJCcU0R6gYkF54qws7PD09OTcX//u7VDFOKRJxOkhNXMnTuXL/9n\nNN+2rsnLa45wJi0XC4o2bVpz5fQxnqvuw/bkXAoCQojetOWWY43JyckcPXqUSpUqUbfuo1lCTtM0\nBr7Qj20bognycuZEcjbDXnmV9JRkKgdX5a9jxvzpnr9CiNIjs5HLibVr1/LBmHfIzMqiW/ceTPrs\n8zJb6cdisTBs4IssW7KIkZFVGd2kOmfTc2n/6x7mPtuIlkG+mC0arRYfYsrs+bRp08baIdssTdPY\nsWMHycnJbFy/jnXL5tM+yIlTqQXsT8qjdZs2/PDTTCpVqmTtUIWNWbduHRvWryOwQkWGDx+Ou7u7\ntUMq0yTZlgMxMTF0bteGb1rXJMTThbG7L1C347NM/naqtUO7b0VFRTg7OXH1zY7YGYp/P19adZgW\nQT681KB43LrHqhP89cvv6NKly13f12KxMGbMGI4fP06LFi34+yPSlWoymXB3c+X7rlXxdrZH0zTG\nbYzH3dmeLJcKxBw5hoODwyO7Hlnc7LvvpjLx/XdpF+REfB5kOPqxa98B3NzcrB1amSVLf8qBFStW\n8GKdQDpXDyTC34MvWtZgyaJF1g7rgTg4OBDo68P+y8UbuhtNFg6nZLHmfAonU3OYdjies5kFNG/e\n/K7vabFYiKhdgwU/foPThb1M+ddHPNn87qpT3S9N0/h+2jSaRzakVdPGLF26VNf2bsdsNmOxWHB1\nKH5rK6Vwd7KjWWU3Ui4l4O3pgYuzEy8PHUJRUZFVYhS2Y+x77zK2uT996/nzThM/XArSWFTG/6bY\nKkm2ZYi7uzvJ+dd3t0nONeLq+nA3EtfDDz/PpN+q4zy3PIbW83YS7ueOh4MdnRfuZUWOC+s2b8XH\nx+eu77dkyRIuJcTzRadqvBRZgS87V2Pvvn2cPHlSt+9h+o8/8vn493kv1JERlSyMeGkIa9eu1a29\n23FycqJbl6f5at9VzqTm8/vpNE6l5BEe4EJOvpEJrSoyo1s1Dm38nQnjPnjo8QnboWkaOXn5+LsW\nz4VQSuHnbLhlSU/x4CTZliFDhgxhV3oRIzed5ot95xiy7hTj/znJ2mE9sGeeeYbte/dxusiBGr6e\nvNIgBA9XV8Lq1mPzrj2EhYXd0/0SEhLwc72+2bWnkz1uDnZcvHhRj/ABmD39ByY1q0bbED+erh7I\n3xoFMXfmT7q192dmzZvPY5368OmeVJaezqBNNS8+351MNW8n6vi54O5oR69abqxbu9oq8QnboJSi\nW5enmXYoncs5hexJyGZ3Yi4dOnSwdmjlkiTbMsTf3589MYcI7TWE7CZdmLN4GQMGDLB2WKUiPDyc\n/QcPE9isHZ/EmXGMbM3q9RvvqwBD9+7dScouZNP5DHIKzSw/lUqhBV2XBTk5O5NZeL3XIavQhKOV\nSiK6urryzdRpJKak8fdJn1OpdR+C6jahpr/7tbHaCxmFBFaoaJX4xK1pmsayZcuYNGkSy5cvfygb\nkvw8ey5Vn+jIh3uzWZnqwZLlv1ll7+ZHgUyQEuXSvHnz+MtLQ8kzFuLp5sKCpSto3769bu1FR0cz\n8Pk+jGpYmTyThe+OJ7N+y1YaNmyoW5v34vLlyzRr8jhBTiac7RVHrhjZtG37I7tcyha9/upwopcv\noqG/PYevmujc8zm+mTrtlufm5+djZ2dXZlcilGcyG1kInW3bto3ZP8/A3sGBv4x4k/r161s7pJuk\np6ezdOlSTCYTXbt2pUqVKtYOSZSIjY2lSaMGfNupCq4OduQVmXl9bSIHDh8jNDT02nl5eXkM6Psc\nq9ZGA/DG66/z+ZdfyexyG3K7ZCsVpIQoJS1btrTpClY+Pj4MGzbM2mGIW0hPT8ff3QVXh+JhE1cH\nO/zcXUhLS7sp2f7tnbdJO7mXuT2rk2/S+Mf8WYTXrcfLr7xirdDFXZIxWyGEsLLw8HDyNDvWxmaS\nU2hmzR+ZFGBPeHj4Tedt37qFZ6q74WBnwNPJjvbBTmzbsslKUYt7IclWCCGszM3NjegNm9hd4Mfw\nVfHsMfoRvWETrq6uN51XJSiY02nFW1FqmsbZTDNBIVWtEbK4RzJmW0q+/fbfjP9gLLl5+fR49ll+\n/Hnmf71RhCgt/z9z9dChQ9SsWZMBAwZgMNz82TktLY2ioiICAwNlTK+cOH36NK2fbEFNbwdyC80U\nufiwffdeqX1tQ2SClI7Wrl3L0P7P8X5zf3xd7Jl6MJ2w1l35YcbP1g7NZhmNRubMmUNKSgqtW7em\nWbNm1g6pTHln1FssmzeLxgH2HM+wUK9Za+YtXIxSCrPZzCtDB7No0WIc7Aw0ioxkyW8r8fT0tHbY\nohSkpKSwceNGHB0d6dSpk3yotzGSbHX0zui3Sdkwhz51/QBIyDLy6aECLiQkWTky22Q0GmnXMgqX\njMuEezmxKPYqn341hYGDBlk7tDLhypUr1AytyndPB+PuaEeh2cLI9Zf5ff1mGjVqxNdffcXCyZ+y\n4OkInO0NvLn5LB5N2vLdjzOsHboQ5Z7URtZRQGAFEvKuf7CIzyzEz9fXihHZtsWLF2OXeonFXery\nzydrsfDpuvx11Ehrh1VmZGVl4e7siLtj8cxVRzsD/u7OZGZmAnBg90761/TF3dEee4OBIWGBHNi7\nx5ohC/HIk2RbCl577TUu4ckne1KZdiiNaUcy+fKbb60dls1KT0+nppfztXHEWr5upGdn37Jizs6d\nO5kxYwY7d+582GHarGrVquHp48eik+mk5ZuIjs3gSr6ZRo0aFb9eszZbkq7/PLckZlAttIY1Qxbi\nkSfdyKUkOzubhQsXkpOTQ6dOnaTk2Z84fvw4bVo046eOYdTz9+CjvRdI9q/B79Hrbjpv4vhxTPtm\nMvUCXTl2JY9X33iL8RM/slLUtiUuLo4hL77A0WPHqB5ajRm/zKFevXoA5OTk0KF1S4wpl3BztOdS\nIXz/00wA6tWrR4UKFawYuRDlm4zZCpuycuVKRr3+F1LS0mjbqjXTZ83G94au9/j4eOqH12HyU1Xw\ndrYno8DEW9GJHDlxipCQECtGXjYUFhayY8cOCgsLiV69kjkzf6aWvxenUrKYt2ixFJsXQidSQUrY\nlK5du9I1Lv62rycnJ1PByw1v5+JfUW9neyp4uZGcnCzJ9i44OjrStm1bdu3axeI5s9ndrzG+zo5s\ni09jQN/nuXw1VZYDCfEQyZitsEm1a9cmvcDMrvjiscfdCdmkF5jKfPf8yZMnad38CUIqBtKtU0eS\nkvSdsR4bG0uTSt74OhcXrG8Z7EtObi7Z2dm6tivKp8LCQo4fP05cXJy1QylzJNkKm+Tp6clvq1Yz\n55yZPgvPMjvWzG+r1pTptaKZmZk81bY1z7rm8HuXMMKy4ujSsT1ms1m3NuvXr8/2hFTiMvMBWHLm\nMoH+fnh4eOjWpiifLl68SIOIMLq0a0mj+hEMGzwQi8Vi7bDKDBmzFTavoKAAZ2dna4fxwDZs2MC4\n4YNZ82zxRCZN04iYtYft+w/eVGy+tH0zeTJj/+c9/NxdKMSOFavXEBkZqVt7onzq1L4NAakneT7C\nl/wiCxN3XuW9SV8ySNbH30TW2YoyqzwkWgAPDw+u5ORTaC5+Gsg0msgpMOLu7q5ru2+89RZxiUms\n2baL2IvxkmjFfTlx4gQtg4t/V10cDDT2t+PI4cNWjqrskAlSQjwkjRs3pl7jpvReeZhWFd1YEZfJ\n0KHDCAgI0L1tb29vqZ8rHkjt2nXYk/QHPeo4Umi2cCjNzMiICGuHVWZIN7IQD5HJZOKnn37ij7Nn\naBT5OA0bNuTkyZPUrFmTBg0aWDs8IW4rNjaW9m1a4WQxkplfSKs27Zm3cBF2dnbWDs2myDpbIWzM\nd1OnMva9MYQFenD2ag6jRo/h/XF/t3ZYQtxWXl4eR48exd3dnYiICFk+dguSbIWwIWlpaVQLDuKz\ndpWo5OFIer6JtzdcYt/Bw9SoIaUVhSirZIKUEDbk0qVL+Lo7U8mjeP2rj4s9wT5uxMffXOjDZDIx\n6o0R+Ht7Usnfl//97DNrhCuEeEAyQUoIKwgNDSXPpLE/KYfGld05mZJHfEYe4eHhN533j4kTiFm1\nlG19IskpMjHgfz+hclAQ/fv3t1LkQoj7Id3IQljJjh076N39WQqNBWCwY+78BXTu3Pmmc5o91oAJ\ntV1oUcUHgJnHEjgQUJ+f586zRshCiDuQ2shlWExMDDt37qRixYr07NlTZv+VE1FRUSQmXyElJQV/\nf3/s7f/77ejj58vZ9KvXku3ZzAJ8I/RfKqSXEydOMOuXmSilGDR4CGFhYdYOSYiHQp5sbdysX35h\nzMg3eaZ6AEdSc6kQVp9lK1dLwn1EHDhwgM7t29G9uh85Jgs7UwrYfSCGypUrWzu0e3bgwAE6tmtD\nh2BnLMCmeCMbt26jYcOG1g5NiFIjs5HLIE3T8PH0YE2PhkT4e2CyWOiw9AgT//0D3bp1s3Z44iGJ\njY1l+fLlODo60rdv34dSBEMPvbt3wz9pH11rFT+lLz+dRl6NJ5m3YLGVIxOi9Eg3chlUWFhIXkEB\ndXyLS6TZGwyE+bqRkpJi5cjEw1SjRg1Gjx5t7TAeWE52FrWdr//J8XW254rsPiQeEbL0x4Y5OTnR\n5LGGfLz3PEaThd1J6ay/kEJUVJS1QxPinj3/wkB+PZ3DmdR8Tl3NZ/6ZHPq+MNDaYQnxUEg3so1L\nSkrihT692LF3PxX8fJj64wzpQhZlkqZpTPn6K76d8jWgeHP0O4wY8Ya1wxKiVMmYbRmnaZqURhNC\nCBsnFaTKOEm0QghRdkmyFUIIIXQms5GFEEIAEBcXR3R0NK6urvTo0QM3Nzdrh1RuyJitEEII9u/f\nT+cO7XmsogvZRgvZDp7s2ncALy8va4dWpsgEKSGEELfVsllTHlcJtAstTq5f77tKm0FvMk72WL4n\nMkFKCB2sXLmSWqFV8ffxol+fXmRlZVk7JCHuS3JyMtW9na59Xc3DwOXERCtGVL5IshXiPh05coRB\nL/RlUKjGZ60DuHpkG8MGvWjtsIS4L23bt2fR2RwKTBau5BaxPt5Iu45PWTusckMmSAlxn9avX0+L\nIDcaViyeRDKsgQ+vrIy2clRC3J8vJ3/D4AGpvLh0FY4O9nzwwTh69+5t7bDKDUm2Qtwnb29vruRZ\nrhUcuZRdiKeHu7XDEuK+uLq6snDpcsxmMwaDQdb2lzKZICXEfcrLyyPqiSa45F6hsqtiS0I+X075\nlhcHSr1fIR5VMhtZCB3k5uYyc+ZMrl69Svv27WWTCCEecZJshRBCCJ3J0h8hhBDCSiTZCiGEEDqT\nZCuEEELoTJKtEEIIoTNJtkIIIYTOJNkKIYQQOpNkK4QQQuhMkq0QQgihM0m2QgghhM4k2QohhBA6\nk2QrhBBC6EySrRBCCKEzSbZCCCGEziTZCiGEEDqTZCuEEELoTJKtEEIIoTNJtkIIIYTOJNkKIYQQ\nOpNkK4QQQuhMkq0QQgihM0m2QgghhM4k2QohhBA6k2QrhBBC6EySrRBCCKEzSbZCCCGEziTZCiGE\nEDqTZCuEEELoTJKtEEIIoTNJtkIIIYTOJNkKIYQQOrPX8+ZKKT1vL4QQQpQJStM0a8cghBBClGvS\njSyEEELoTJKtEEIIoTNJtkIIIYTOJNkKoROl1Fil1DGl1GGlVIxSqkkp37+1Uuq3uz1eCu11V0qF\n3fD1JqVUZGm3I0R5pOtsZCEeVUqpZkAX4DFN00xKKV/AUYembjfDUY+Zjz2A34FTOtxbiHJNnmyF\n0Ecl4KqmaSYATdPSNE27DKCUilRKbVZK7VNKrVZKVSg5vkkp9ZVS6qBS6ohSqnHJ8SZKqZ1KqQNK\nqe1KqVp3G4RSylUpNV0ptbvk+m4lxwcrpRaXtH9aKfWvG655qeTYbqXU90qpKUqp5sCzwKclT+nV\nS05/Xim1Ryl1SikVVRo/OCHKI0m2QugjGggpSUL/Vkq1AlBK2QNTgN6apjUBfgI+vuE6F03TGgEj\nSl4DOAk8qWna48B4YNI9xDEW2KBpWjOgHfC5Usql5LWGwHNAA6CvUqqKUqoS8AHQFIgCwgBN07Rd\nwApgjKZpkZqmnSu5h52maU8AbwMT7iEuIR4p0o0shA40TcstGc9sSXGS+1Up9R5wAKgHrFPFVV8M\nQNINl84ruX6bUspDKeUJeAK/lDzRatzb+/YpoJtSakzJ145ASMn/N2ialgOglDoOVAUCgM2apmWW\nHF8I/NmT9JKSfw+UXC+EuAVJtkLoRCuuGLMV2KqUOgoMAmKAY5qm3a7L9T/HWjXgI2Cjpmm9lFJV\ngU33EIai+Cn67E0Hi8eUjTccsnD978G9lH77/3uYkb8nQtyWdCMLoQOlVG2lVM0bDj0GxAGngYCS\nZIdSyl4pFXHDeX1Ljj8JZGqalg14AYklrw+9x1DWAm/dENdjdzh/H9BKKeVV0uXd+4bXsil+yr4d\nqc8qxG1IshVCH+7AzJKlP4eAcGCCpmlFQB/gXyXHDwLNb7iuQCkVA3wLDCs59inwiVLqAPf+nv0I\ncCiZcHUM+PA252kAmqYlUTyGvBfYBpwHMkvO+RUYUzLRqjq3fgoXQtyC1EYWwkYopTYB72iaFmPl\nONxKxpztgKXAdE3TllszJiHKOnmyFcJ22Mon3wlKqYPAUeCcJFohHpw82QohhBA6kydbIYQQsZRi\nigAAACxJREFUQmeSbIUQQgidSbIVQgghdCbJVgghhNCZJFshhBBCZ5JshRBCCJ39H9+X+UbrPmv5\nAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_lmnn, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pretty neat, huh?\n", + "\n", + "The rest of this notebook will briefly explain the other Metric Learning algorithms before plottting them.\n", + "Also, while we have first run `fit` and then `transform` to see our data transformed, we can also use `fit_transform` if you are using the bleeding edge version of the code. The rest of the examples and illustrations will use `fit_transform`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Information Theoretic Metric Learning \n", + "\n", + "ITML uses a regularizer that automatically enforces a Semi-Definite Positive Matrix condition - the LogDet divergence. It uses soft must-link or cannot like constraints, and a simple algorithm based on Bregman projections. \n", + "\n", + "Link to paper: [ITML](http://www.cs.utexas.edu/users/pjain/pubs/metriclearning_icml.pdf). " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "itml = metric_learn.ITML_Supervised(num_constraints=200)\n", + "X_itml = itml.fit_transform(X, Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd0FNX7x/H3pPdKCoEQeif0UAOhgyC9g2ABFLAAfhFR\nOio2FARFpKP0Jl16R3rvJZBAIIH0bArJ7s7vjyDqzyRAkt1NeV7ncGTCnZnPiUme3Dt37lVUVUUI\nIYQQhmNm6gBCCCFEQSfFVgghhDAwKbZCCCGEgUmxFUIIIQxMiq0QQghhYFJshRBCCAOzMNSFFUWR\nd4qEEEIUOqqqKv//YwYrtk9vaMjLCyGEEHmKovynzgIyjCyEEEIYnBRbIYQQwsCk2AohhBAGJsVW\nCCGEMDAptkIIIYSBSbEVQgghDEyKrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYm\nxVYIIYQwMCm2QgghhIFJsRVCCCEMTIqtEEIIYWBSbIUQQggDk2IrhBBCGJgUWyGEEMLApNgKIYQQ\nBibFVgghhDAwKbZCZEGr1RITE4OqqqaOIoTIx6TYCpGJhYsW4eziim8JPypXrUZwcLCpIwkh8inF\nUL+xK4qiSm9A5Fdnz56lVZt2jJ27Cp+SZdj22y9c3LeVc2dOmzqaECIPUxQFVVWV//9x6dkKkYGT\nJ09SvVEQPiXLANC27yAuXThPWlqaiZMJIfIjKbZCZMDX15c7Vy+Q+iQFgFsXz+Dq7o6lpaWJkwkh\n8iMZRhYiA6qq0u+1ARw5dpziZcpz9dQxfl26hPbt25s6mhAiD8tsGFmKrRCZUFWVgwcPEh4eTt26\ndSldurSpIwkh8jgptkIIIYSByQQpIYQQwkSk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEVQgghDEyK\nrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYigIhNTWVqKgo2eRdCJEnSbEV+d7sH3/ExcUV\nv1KlqF6zFqGhoaaOJIQQ/yJrI4scSUxMZOXKlcTHx9OqVSuqVq1q1PsfOXKErj168skva/Dw8WXj\ngh+4f+E4Rw4dNGoOIYQAWRtZGIBGo6FBo8b8vHQFO46fp0nTIP744w+jZjhx4gS1g9rgWawEiqLQ\nrt8QTp44btQMQgjxPBamDiDyr0WLFmHv4cN7X/2MoijUCGzJyA//R9u2bY2WwdfXl9uLlqJNS8XC\n0orr507gU6y40e4vhBAvQoqtyLbIyEiKliyLoqSPmBQvXZ7oqCijZujatSvLVqxkQv/2FPUrxbUz\nJ1i7ZrVRMwghxPPIM1uRbYcOHaJrj56M/G4hHsV8+e2biRR3tWfZr0uNmkOv13PgwAEiIyOpV68e\nJUqUMOr9hRDiL7J5vDCIJUuWMObjsSTEx/NKhw4snD8PR0dHU8cSQgiTkGIrhBBCGJjMRhZCCCFM\nRIqtEEIIYWBSbIUQQggDk2IrhBBCGJgUWyGEEMLApNgKIYQQBibFVgghhDAwKbZCCCGEgUmxFUII\nIQxMiq0QQghhYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEV\nQgghDEyKrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYmxVYIIYQwMCm2QgghhIFJ\nsRV50o4dOyhboQI+viXo2q0bWq3W1JGEECLbpNiKDIWGhnLixAl0Op3R733q1Ck6du5CndZd6D1y\nAqcuXKZpUDOj5xBCiNyiqKpqmAsrimqoawvD0el0lC1Xnnv3QjEzt8DS0pKdf2ynUaNGRsvQtWtX\nHqcqDJs6E4DHD+7xv27NSU1JNlqGl3X06FGGDn+X8IcPadS4MfN/mYubm5upYwkhjExRFFRVVf7/\nx6VnK/6lW7duaFJSmf3HSRYduU7jDt1o/2ono2ZI/2L9+1hVVf7zlZuHhISE0KFjR5r1fZvxizaS\naGZH9569TB1LCJGHWJg6gMhbTp46TfMufXB2KwLAK/0Gc2DjaqNmGDt2LI0Dm7BhfhmKlSrHmjnf\nEhAQYNQML+PgwYNUDQikXsv2ALw2ejJvBVYkOTkZW1tbE6cTQuQF0rMV/+Lt5cnF44fQP31We/X0\nMSwsLI2aoU6dOmzdspmzuzez5ofPaVi7Ogf27zNqhpfh5ORE5MP76PV6AKIfhWNuZo6VlZWJkwkh\n8gp5Ziv+JTo6mhIlS+Ho6o6rpze3L55lyuRJjBkzxtTR8qzU1FSatWjJE8USv4r+HNvxO6M+eI8P\nR40ydTQhhJFl9sxWiq34D41Gw8iRI4mOjuaDDz6gSZMmpo6U56WkpLBw4UIePHhA48aNadu2rakj\nCSFMQIptLouLi2Pz5s2kpaXRrl07vL29TR1JCCGEiUmxzUWPHj2ifoOGFPEtjbWtLdfPHOfggf1U\nrFjR1NGEEEKYkBTbXDRq1IdcDY9hwOgpAGxfNp/Ym+fYtPF3EycTQghhSvKebS56GB6OX4Wqz45L\nVqxKeESECRMJIYTIy6TYZkNQUFP2rF5CXNRjkhM1bFs6h2ZBQaaOJYQQIo+SRS2yYcjgwdy5c5eR\nHRuh1+vp1bsPU6dMNnWsXBUeHo5Go6FkyZJYWMiXiRBC5IQ8s80BvV6PqqqYm5ubOkquUVWVd997\nn19/+xU7e0fcXF3YteMPihUrZupoQgiR58kzWwMwMzMrUIUWYMWKFezcd4DvNx3lu81HqdyoJW8N\nHmLqWEIIka9JsRX/cu78eWoFtcXO0QlFUQh8tQfnz583dSwhhMjXpNiKfylfrhxXTx5Gm5YKwPnD\neylbtqyJUwkhRP4mz2zFv2i1Wrr37MXJ02dwLeJBTMRD9uzeJQt2CCHEC5BFLcQLU1WVM2fOoNFo\nqFmzJk5OTqaOJIQQ+YIUW2Fyqqpy6tQpYmJiqF27Nu7u7qaOJIQQuSqzYisvUAqj0Ol09OrTl2PH\nT1KkqA8P7txixx/bqVmzpqmjCSGEwUmxLSSuXbvGt9O/Q6PR0KN7N7p162bU+69atYrLN27zxepd\nWFpZc3jret54axDnzpw2ao6XkZqayvLlywkPDycwMJBGjRqZOpIQIp+S2ciFwO3bt2kUGEiClQvO\nFWrz7ohRzF+wwKgZgoODqVCrPpZW1gD4N2zK3Tt3jJrhZaSlpdGydRtm/LyAI1fu0qV7D+bNn2/q\nWEKIfEqKbSGwaNEiGr7Sjc6D3ieoUy+GTP6eb7/73qgZatasyZkDO4iPiUJVVfatX0b16tUzbX/w\n4EHqNWhI2fIVee/9D0hJSTFiWti0aRORcRpG/7iMviPH8dGPyxk16kNkHoIQIjtkGLkQSNNqsbK2\neXZsZW2DTqszaob27dvz2p/HGNWxMfaOjri7ubFj+7YM2167do3OXbry2pjP8ClZhrU/fc3Q4e+y\naIHxepYxMTF4lyiNmVn676PeviVJSU5Cq9ViaWlptBxCiIJBZiMXAhcuXCCoWXN6vDcWVw8v1sya\nxpA3BzLmo4+MniUmJob4+HiKFy+e6VKX06dPZ9fJiwwc8xkAcVGPGdO9OXGxMUbLeePGDeo3aMg7\nn/1AqUrV2PDLDJIjQti3Z7fRMggh8h+ZjVyI+fv7s2XzJqZ+/gVXNBo+GP4O7737rkmyuLq64urq\nmmUbOzs7EmKinx3HRUVia2tr6Gj/Ur58eVauWM7Q4e/yKCKCwCaBrFm10qgZhBAFh/RsRZ4TGxtL\n7Tp1KelfB2+/Muxds5QJ4z5h6DvvmDqaEEJkSRa1EPlKVFQUs2fPJio6mrZt2vDKK6+YOpIQQjyX\nFFshhBDCwGQ/WyGEEMJEZIJUNoWEhLB48WLS0tLo1asX1apVM3UkIYQQeZT0bLPh1q1b1AkI4Pj1\nUC49iKVpUDOOHj1q6lhCCCHyKHlmmw1Dhw3nkdaK7kM/BODg5jXcOrKDXTv+MHGyzC1dupTxkyaT\nlqalVfNmLFq08NmCDUIIIXKHPLPNRQmaBNy9fZ4du3v5oNFoTJgoa5s2bWLw2+/Qoteb9B45nu27\n99Kte3dTxxJCiEJDntlmQ9fOnXl3xCiKlS6Pja0dq2d9wZA3Bpg6Vqa+/Oor2vZ5k1Y9BwLg5unN\n1+++ZuJUQghReEixzYauXbsSGRXFN19+jFar5Y2BAxg1cqSpY2VKURRUvf7ZsV6nz6I1aLVaFixY\nQGRkJH379qVUqVKGjiiEEAWaPLMtBLZv307nrl3pMXQ0rp5eLJ/xOa2CmrJy5Yr/tE1KSqJs+Yqk\nanU4F/EgLPgGq1asoFOnTiZILoQQ+YssapFPrF69mvETJ6PVptG3dy+mTp2aK9ddu3YtYz8dx5PU\nVNq3bcOcOXMybNevXz9OXb7BuF9WY2Fpye61v7Jx3kwiH4XnSg4hhCjIpNjmAxs3bqRn7z50f3sU\nDs4urPhhGgP69WH27NlGy1C/QQP8agXSedD7ADwKC+XjXq1JTsy7E8CEECKvkNnI+cDkKVPpMOAd\nOgx8h6DOvRn22UyWr1pt1AwN6tfn4Ja1aOJiUFWV3Wt+xc3dPdP2O3fupFadupQtX5FRH/6P1NRU\nI6YVQoj8QSZI5SE6vR4rm39u8m6NsUcHpk+fzuGjfzK8TR0srWywsDDnwL69GbY9e/Ysvfv2481x\nX+Hh48vKGZ/x4f9GM+uHmUbNLIQQeZ0MI+chixYtYti77/H6x5/h6OzKoi8/pUWTwAwnMhna7du3\niYyMpHbt2lhYZPw72eTJkzlz9xG93x8LQMT9EL56pxcPw+4bM6oQQuQZsnl8PvDGG28QFxfH19O/\nQ6fT0a5lCxYvXmySLGXKlKFMmTJZtrG3tychOvLZcVzUY+zs7AwdTQgh8h3p2Ypsi4yMpFbtOlQI\nCKSIjy97Vi9m+tdf8dprsmCGEKJwkglShdx3332Hl08x3D29aP/qq2i12hxfs0iRIpw6eYJGVcrg\npSSz4rdfpdAKIUQGpGdbCCxbtoxBg4cwaPxXuHp4sejL8VQoVYLdu3aaOpoQQhQo8p5tIVa/QQOK\n+den+zvpuxTdvXaJL97pQ0JcjImTCSFEwVJoJ0jdunWLpUuXotfr6devH5UqVTJ1JKOztLQkKSHh\n2XFyYkKW2+ulpaWxc+dOEhISaNKkCT4+Ppm2FUII8XwFuthevnyZJkFBNHylG2bmFjRqHMiunTuo\nXbu2qaMZ1bQvvqBFq9ZY29rh5uXN2jnT6durR4ZtU1JSaNm6DZGx8bh5FeXd997nj+3bqFOnjpFT\nCyFEwVGgh5H7DxiA6laCDgPfAWDX6iXEXD/DhvXrTJrLFPbu3cuHo0eTnPKE3j26M2nSpAzbzZo1\niyVrNjLy+/TN5Q9v28Dxjcs4deKYcQO/hPj4eObMmcPjyEhatWxJmzZtTB1JCFFIFcrZyAkJGty8\nvJ8du3kVJSEPb/JuSM2bN+fs6dNcu3wp00ILcO/+fUpXq/lsmLl89do8eBBmpJQvT6PRUL9hI7Ye\nPM69ZDNef2swc37+2dSxhBDiXwp0se3RrSsb583g1qWz3Ll6kXVzvqF7t66mjpWnNW7UiGPbNxDz\nOBy9Tsf2X3+hQYMGmbZfs2YNpcuWw8PTi9fffIukpCQjpk2/v0ORogz97Ac6v/Ueo2YsYvyECUbN\nIIQQz1Ogn9n279+f2NhYZn42GlVVGTZkMG8PGWLqWHlax44duXDxIqM6BaKgEFC/PsvWrsmw7bFj\nx3hn+Lu8O+0nPIqVYNn0Sbz7/gcsnD/PaHk1Gg2unl7Pjt08i5JYSEcvhBB5V4F+ZiuyLy0tjZSU\nFBwdHTNtM3HiRC7cj6bnu2MAePzgPl8M6kr4wwfGisn169dp0LARr38yjeJlKrDu528p7mrPqhXG\nX09aCCEK5TNbQzp58iT9BwygV+8+/PHHH6aOk+ssLS2zLLQArq6uRD649+z4UVgoTs7Oho72LxUq\nVGDD+nXsX/ELM0YMpEJxLxbOn2/UDEII8TzSs82GU6dO0bpNWzq8+R42tnZsmDud+b/MpVOnTqaO\nZlRxcXHUrVcfz5LlcC/qy+Eta1g4f16h+zwIIcRfZAWpXPTmoMGkOnnT/rW3ATixZztntq7IdN/X\nvGDjxo1MmDSFxMREunfrymdTp2S6dd7LiIuLY8mSJcTGxtKuXTvq1q2bC2mFECJ/KrQrSBmCTqfD\nwtLq2bGllRU6nS7T9lqtlnXr1pGUlES3bt1wcnIyRsxnjh49ypuDhzB4wre4enjz2/SJqOp4vvpy\nWo6v7ezszPvvv58LKf8rNTWV+Ph4ihQpYpDrCyGEsUjPNhsOHjxIl27d6T1iPNa2tqyc8RlffT41\nwx1vYmNjqVCpCqlaLdY2tmjiYjh6+BD+/v5Gy/u/0aMJTYIugz4AIPTmVX75dDi3b94wWoaX1adv\nX9asWYOqqri4urFvz26jfs6EECI7ZIJULmrSpAkrly/j1uHtnNu+mm+mfZ7p1nK9evfBu1R5Zm07\nzncbD9GsS186d+tm1LwO9vbERT5+dhwb+Qg7O/tM2yclJfHbb78xZ84cbt68aYyI/zJ79my2bPuD\nb9buZenxYBq060Lrtu2MnkMIIXKL9GwNrHylygT1eINmXfoAcOviWb4b+SYxUY+fc2buefjwIbXr\n1MU/sCXORbzYs3oJ8+bOoUuXLv9pq9FoaBTYBHM7R9y8fDhzYBe/b1hPkyZNjJb3lVdeQXXxYcDo\nSQAkJsTxTouaaNNSjZZBCCGyQ3q2JlKhbBkOb9tAWuoTVFXl0Na1Rn8GWbRoUU6dPEH9iiUpbqPj\n9/VrMyy0APPmzcO+SFH+N+s33hr/Da9/Mo33Row0al4/Pz9unD+F/ulz8FsXz2Jja2vUDEIIkZuk\nZ2tgSUlJVKxchajoaCytrFF1Wk4c+5MKFSqYOlqGxnz8McHxWroOHgFAxP0QvhnWh7B7oUbLkJKS\nQsnSZbCwsaOoXxkuHjvI1CmTGT16tNEyCCFEdshsZBOxs7PjbvBt9u/fT1JSEi1btsTGxsbUsTLV\nonlzlr45iPqtXsXdy4cNv3xHs2bNjJrBxsaG0Lt3+OKLLwgPD+ebiR8TFBRk1AxCCJGbpGcr/uPH\nH3/kk0/HkZycxCvtO/DrksXPXU1KCCGELGohXpKqqqiq+myrPSGEEM+X7WFkRVGsgW5AyX+2V1V1\nSm4GzG/27t3Lt999T1paGm+98Tq9e/c2daRcpSgKivKfrxchhBDZ8CLPbDcCccBp4Ilh47y4yMhI\nAJOsLnTo0CG69+xFrw8+xcbWjpGjx6DT6ejXr5/RswghhMj7njuMrCjKJVVVq770hQ00jPzkyRN6\n9enL7t27AGjVqjUrly/D2to61++VmdfffAs8StKm9xsAnDm4m2PrF3PowH6jZXhZS5YsYcKkySQm\naujWtRs/zJxh1M+ZEEIUBjl5z/aooijVDJApW6Z+9jkPYhL4addZftp1lrDoeKZ+9rlRMyiKglab\n9uxYp9Pm6Webe/fu5aOxnzBoykwmLdnC6Ss3+d/oj0wdSwghCo1Mh5EVRbkIqE/bvKEoSjDpw8gK\noKqqapKFak+cPEmTTr2xtErvlQV27MXJneuMmmHYO2/Tpm07LCyssLa1Zf3P3/LLnJ+MmuFlbN22\njWbdXqNs1ZoA9PrgE+aMeZtZP8w0cTIhhCgcsnpm28FoKV5C6VKluHryKLWatALg6smjlCpZMtP2\nhw8f5sef5qDX63l7yGCaN2+e4wx169Zl29YtzJw1C02alsUL5tO+ffscX9dQXF1cOHfm8rPjiHsh\nOBt5k3chhCjMXuSZ7a+qqr72vI9lcJ5Bntk+fvyYwKZBmNvYAaBLSeLwwQMZTpQ6dOgQnbp0pfOQ\nkZibW7B+7nSW/7qUNm3a5HquvCwqKoo6dQPwrVQdlyKeHN66jlUrltOqVStTRxNCiAIl2+/ZKopy\nRlXVWv84Ngcuqqpa+TnnGew926SkJA4fPgxA48aNsbOzy7Bdz159cCpbnRbd+wNweOt67hzbxbYt\nmw2SKzesWLGCz76YxpMnT+jbuzcTJ07A3Nw8x9eNjo7mt99+Q6PR0L59e6pXr54LaYUQQvzTS79n\nqyjKWOATwFZRlPi/PgykAr8YJOULsrOzo3Xr1s9tp9VpMbe0fHZsYWmZ5SbvprZr1y4+GPUhQybP\nwMHZhSXTPsHSypLx48bl+Npubm4G2+RdCCFE1l6kZztNVdWxL33hPLCC1I4dO+g/4HV6jxyHubkF\nK76fws8/zs50xxtTe2foMJLsPWnXbxAANy+cYc13E7h4/pyJkwkhhHgRL/3qj6IotRRFqQWs+evv\n//xj0LS5pE2bNixeOJ/r+7dwefcG5syelWcLLYCDgz2xkRHPjmMeR+Dg4JBp+xs3btC0aVPq1KnD\nqlWrjBFRCCFENmTas1UUZd/Tv9oAdYDzpA8j+wOnVFVtkOWF80DPNr8JCQkhoF59ajd/BXtnV/as\nWcLK5csyHDK/cOEC9Ro0pGzVmjgX8eDk3u38b9QoPv/cuO8cCyGE+FtOJkitByaqqnrx6XFVYJKq\nqt2fc54U22y4d+8e8+bNIzklhR7duxMQEJBhu9JlyuBTwZ/hn88C4PC2DSz9egIJcTHGjCsMTK/X\ns2nTJkJDQwkICKB+/fqmjiSEyEJO9rOt8FehBVBV9ZKiKJVyNZ14xtfXlylTnr/HQ2JSMiUr/b2w\nV4myFdHr9YaMJoxMVVX69ezBteOHqePlyFeTIvl06mcMG/6uqaMJIV7Si6wxeEFRlPmKogQ9/TMP\nuGDoYCJrjRs2YNuv8wgPvUOSJoHVP32No2Pmz3dF/nPo0CHOHjnAzs7VmR5Ylu2d/Rn9v//x5Eme\n2Q9ECPGCXqRn+wYwFPjg6fFBYI7BEokXsm7dOqr5V+ejHi3R63U4u7hx8fxZU8cSuejx48eUdXPE\n2iL9d2I/J1uszM1ISEiQTSSEyGdk8/gCQKfT5crCFyJvuXfvHrWqVWF+iwo09HHlp/P3WPtIx4Wr\n12WvYSHyqOy8+rP66X8vKopy4f//MWTY3LR161Zat21HqzZtWb9+vanjGIQU2oLJ19eXles2MOJY\nGD5z9vJHog2b/9gphVaIfCirV3+Kqqr6UFEUv4z+XVXVkCwvnAd6tjt27KDfawPpM2p8vljUQojM\nqKoqRVaIfCAnr/68BRxUVfXmS97Q5MW2a/ceeFSpR1Dn3gAc27mZa/s3s2P7NpPmykrffv34feMm\ndDodlStV4sjhQ5mu/SxEdqWmpjJ37lxu37xB7boB9O/fX4q5ELkgJ5vHlwDmKooSrCjKGkVR3lMU\npUbuR8x9FuYW6LTaZ8fatLQ8PeT66aefsn3HLsbPX8u36/aRpJrTqnXh2qFIGJ5Op6ND29YsnT6Z\n2EOr+HzMB7w3fKipYwlRoL3wBClFUWyBwcD/gGKqqmZZtfJCzza/bbFX1b86ddp1p03vNwC4ffkc\n377/OrHRkSZOJgqSo0eP0q9LB75r5om5mYImVceQraGE3A/D3d3d1PGEyNey3bNVFGWcoijbgZ1A\nWdKLbfHcj5j7AgMD2bhhPUl3LhF/8yyrVyzPs4UWwMnRgQd3bj07Dg+9g6WVZabtIyMjmTFjBl98\n8QUXL17MtJ0Q/5SYmIiLrSXmZuk/D+wtzbCxsiApKSnTc1JSUggJCSE1NdVYMYUoUF5oP1tAC2wF\nDgB/qqr63Lfq80LPNr/5a73jmoEtcHJ1Z//vK/lu+rcMGzbsP20fPXpE3YB6lKpWG0dXd45sXce6\ntWto1qyZCZKL/CQuLo7KFcrRvrg5Nbxt2X03kRALT06eOY+Z2X9//96wYQNvDHgNawszVDNz1v2+\nicDAQBMkFwWZTqfj66+msX3zJtyLePDZl19TpUoVU8d6admeIPX0ZCegEdAY6AE8UlW18XPOkWKb\nDVevXuXTTz8lOTmZ4cOH06FDhwzbTZgwgWPX7vLG2C8AOLFnG0fXLeb4n0eNGVfkU9evX2fo4DcJ\nDr5DrVq1+Hn+Qjw9Pf/T7sGDB1SpWJ7xDT0o62bDmQcafrqgISTsAba2tiZILgqqD0eOYMeaX+le\nzo6w+DR+D07h1Lnz+Pll+EJMnpXttZGfbjwQCDQlffefe8ChXE8oAKhUqdILvQ8cHRuLR7ESz449\ni/sRGxuX5TlpaWmkpqZib2+f45wif6tQoQJ7Dx55brsrV65Q0t2esm42ANTyccD6sobQ0FAqVKhg\n6JiiEFm4YAHfNvPCw96SWkXhflIUGzZsYMSIEaaOliteZDbyl4Aj8ANQSVXVZqqqTjBsLPE8r7Zv\nz+5Vi7l9+RyRD8NYM2saHdq/kmFbVVWZNGkyjk5OuLsXoWXrNsTGxho5sciP/Pz8CIlKJDo5fVb/\nvbgnxCU/oWjRoiZOJgoac3MztPq/R0PT9AVrwR5ZrjEfW7BwIZOnTCUlOZkePXrw/XfTsbKy+k+7\ndevWMWrMJ3w8ZwWOLm4s/vITPGwUVixbZoLUIr/58ovPmf71l5Qu4sDNRwnMmPUjAwYONHUsUcBM\nmTyJpXNm0qm0LWEaHfsf6jh38RLe3t6mjvZScvTMNps3lGKbR4wcOYoIvS2vvp7+LuWDu7eZ9eGb\n3A2+beJkIr+4cuUKwcHBVK5cmdKlS5s6jiiAVFVlwfz5bN/8O+4ennw6YVK+e14LOdvPVuRzxYsX\n48/te54t+Xfzwml8fIqZOpbIRypXrkzlypVNHUOQXpTi4+NxdHTMcPZ4fqUoCoMGD2bQ4MGmjmIQ\nBef/lMjU0KFD0Wti+HxQV2aPeZt1P37Fj7NmmjqWEOIlXb9+nYplS1PU0wMXJ0dWr15t6kh53pMn\nT3hv2Dv4FSuKf6XybNtmmuV6s9qIYDOQ6Tiwqqods7ywDCPnKampqezcuRONRkPTpk1lgosQ+Yyq\nqlQsW5pmbim8Us6F4JgUph59zNETp2VmeBaGDhnMmV0bGFjFiQhNGrPPxrJz735q165tkPtlZxj5\nW4MkESZhZWWV6Tu7wjT0en2BGgYUhhUXF8f9Bw94pV76M/PSrjZU83bkzJkzUmyz8PuG9Uxp4IKX\ngxXFnaybq8qmAAAgAElEQVRpFvmELVu2GKzYZibT73RVVQ9k9ceYIYUoSLZv346vtydWlpY0qF2T\nkJAsd6sUAgBHR0fMzcy5E5MCQIpWz52YFIoVk/kXWbG3tyMq+e8NaWJTVRwdHY2e40WWaywHTAMq\nAzZ/fVxV1SynJMowshD/FRwcTL1aNVjaqiIBRV2YeTaUzTFmnL10xdTRsnT9+nXOnTtHgwYNKFGi\nxPNPEAaxauVKhr09CH9vR4Jjkmn5SkfmL1oi2yNmYcWKFbw/dAht/Gx5nKJyI9ma0+cu4ObmZpD7\n5WQ/28PAROB74FXgDcDseQtbSLEV4r9WrFjBmi8+ZXHL8kD6c7hicw8QFh6Bs7OzidNlbMigQSxZ\nvBBXWwtikrWMnzSFcePGmTpWoXX9+nVOnz5NsWLFaNKkiRTaF3Dw4EG2bN6Mi6srb7/9tkF3t8pJ\nsT2tqmptRVEuqqpa7Z8fe855UmyF+H/27NnDewN6c7BbTazMzbgVk0iT1aeIS9DkydVy/vzzT5o1\nacx3bfwo7mTN1cdJTNh3j3sPwjNcS1mIwi4n79k+URTFDLipKMq7QBjgkNsBhSgMmjVrRtWAhrTa\ncIyano5sv/OYmT/MypOFFtL3hPZ1sqa4kzUAlTzssLcy5/Tp07Rr187E6YTIP16k2H4A2AHvA1OB\n5oCs1SZENpiZmbFy3QY2b95MWFgYwwICqFOnjqljZap+/fpMjH9CuCYVbwcrbkWnkJiqo2bNmqaO\nJkS+8sLLNT7dZk9VVTXhBdvLMLLINlVVOXLkCNHR0QQEBOS79VELkn69e7Nu7Wq8HKyI0KQy4sPR\nfPnVV6aOJUSelJNntnWARaTv/AMQB7ypqurp55wnxVZki06no3e3rlw4foSSLg6cDY9l47btNGjQ\nwNTRCq2TJ09y6tQpmjRpki839BbCWHJSbC8Aw1VVPfT0uDHwk6qq/s85T4qtyJaVK1cyfcwItnfy\nx8rcjM23Iph2LZ5LN2XjBCFE3pZZsX2R5Wt0fxVaAFVVDwPaLNoLkSMhISHU93LAyjz9yzOwuBsh\nYQ9MnEoIIbLvRYrtAUVR5iqKEqQoSlNFUX4C9iuKUktRlFqGDigKn7p167LlTjQPNCmoqsq8i2HU\nrlHd1LGEECLbXmQYeV8W/6yqqto8k/NkGFlk29fTpjF58mTsrCwoWqwYW3bskpWLhBB5nmweL/Kd\nxMRE4uPj8fLykgX7hRD5Qraf2SqK4qUoygJFUbY/Pa6sKMpbhggpxD/Z29tTtGjRXC20SUlJDB8+\nnC5durBs2bJcu64QQmTlRX6KLQZ2AD5Pj28AIwwVSAhDSUpKooyvD8fWLcP19imGvvk67733nqlj\nCSEKgRcptkVUVV0N6AFUVdUCOoOmEsIAPv30U7wsVPb0rseMFlXY0r0uC36eY+pYwgC+/PJLOnfu\nzKRJk9Dr9aaOI8QLLdeYqCiKO6ACKIpSn/SFLYTIVx4+fEiVIg6YPd0lpZKbA090OrRaLRYWmX8r\n6HS6PLt2sfivJg3rc/ncaQKKOfDz7m1sWLOK85evmjqWKORepGc7CtgElFEU5QiwFJCxN5Hv9OrV\ni003Izj+IIbENC3jD1/H280100K7bt06vNxdsbayIrBeXcLCwoycOP85efIkJX28cLGzpmyJ4ly9\natwid+7cOY6fOMHMdiUZWtebGe1KcvvWDbZt22bUHEL8f8/t2aqqekZRlKZABUABrquqmmbwZELk\nsi5duvDuh6PpMv1bUrRavN3d2H3oSIZtL1++zNC33mBVu8r4ezjx9am79OrSicMnThk5df4RHR1N\ns8BGtC/rRH1/H/beiaNBnVqER8VgY2NjlAx37tzB0docJ+v0H212luYUsbXk7t27Rrm/EJnJtGer\nKEpdRVG84dlz2trA58B0RVEMs8W9EAY2bdo0NKlpaPUq9x9HUalSpQzbHT16lNalPKjt7YKluRkf\n1y3F8TPnSEuT3zMzs3btWlysFPr5e1DGzYZBtTxR9Gns3bvXaBmaNWtGUprKHzdjeKLVczAknvDE\nNNq3b2+0DEJkJKth5LlAKoCiKE2AL0kfQo4DfjF8NCFMx9PTkytRiWifTq65HJmAk71dls92CztH\nR0eStXp0+vT361N1Kk90Kk5OTpmec/ToURYtWsTx48dzJYOLiwsr165n+ZU4eq29wS9nIpm/aAl+\nfn65cn0hsivTRS0URTmvqmr1p3//EXisquqkp8fnVFWtkeWFZVELkY/pdDo6t2/Hw6sXqOpuz/bg\nx8z8+Rd69+5t6mh5llarxbeoJ94WqdQr7sCBu/Gk2rpx825ohu9KTxz3KfPmzKaKhx2XHiUyfMSH\njJsw0QTJhcg9L72ClKIol4AaqqpqFUW5BgxRVfXgX/+mqmrV59xQiq3I13Q6HZs2bSI8PJyGDRtS\nvbqsz/w8Go2GPr17EXzjGpX9a/Lrb79l+Lz27t271KxWhR9a+eBsY0Fsspb3doVx9cYtfHx8Mriy\nEPlDZsU2qzGxFaRvQhAJJAN/bbFXFnn1RxQC5ubmdOnSxSDXDg8P58aNG9SpUwc7OzuD3MMUHBwc\n2Lxl63PbRURE4O1sh7NN+o8gF1sLPJ3siIiIkGIrCqRMn9mqqvo58CHpK0g1/kc31Qx59UeIbBvQ\nvx++xXx4tXVz3F2cWLFihakjGV3FihWJTErjRFgCqqry570E4lP1lCtXztTRhDAI2YhAFCrJycks\nXLiQiIgImjdvTlBQkFHvv2HDBvr36s53bUpS1NGKg3fj+elUBJqU1EK32cKff/5Jj66diXgcRVEv\nD9Zu2EhAQICpYwmRIznZPF6IAiElJYWmDeuzddaXpOxczmvdOvPL3LlGzbB7926qetpR1NEKgCYl\nndDq9Ny7d8+oOfKCBg0acP9hBPEJCYSGPZRCKwo0Kbai0Fi/fj32mihWtK3MuAZlWde+KmM/Gm3U\nDNWrV+dGVAqa1PTlxa8+TgIFihUrlmH7R48e8c7gQbRr2ZypUyYXyPd8bW1tTR1BCIOTlwZFoREf\nH08JB2uUp2sj+znbkpCUhF6vz3QIV6vVkpCQgIuLy7PzcmLIkCEsmjeXdzafp5iTFXdinzBuwqQM\n399NTEwksEE9KtokUsPNio0LLnDtymWWrVyd4xxCCOOSnq0oNJo3b862O4/ZcecxYQkpjD50i3at\nWmZaaBctXIirkyN+xXyoVrE8wcHBuZLjz5OnWbR8Ff0++ITDfx5nwoQJGbbbt28fdtpE3qruTgNf\nR8bUc2fD778THx+fKzkM4ebNmzRtVJ9iXh60bh5UKIfHhciI9GxFoVG+fHlWrtvAiGHv8DjyDs2a\nBbF0waIM2547d46xH45kf486lHOzZ/bZEHp06sjpi5dyJUu3bt1eqN0/O9Mv0rHes2cP586do0WL\nFtSokeW6M7lOo9HQIqgJrYtC/3pO7A+9RqtmTbl49TqWlpZGzfIyVFUlJiYGFxeXQjdJTRiPfGWJ\nQqVFixZcvH6T8KhoVqxdj7Ozc4btTpw4QauSRSjnZg/AsBolOH/lqlGfmQYFBRGHNUsvRnMqTMP0\nE9G0b/dKpssftmvdklfbtubnL8ZTv04txn36qdGyApw/fx57RUvH8i54OVjRs5ILiXHR3Lp1y6g5\nXsbJkycpXtQLv+I+FHFzYceOHaaOJAooKbZCZMDX15ezjxJI0aZPZDr5MJYirs5G7aE5ODhw+M8T\nONVqzRG1BC37DmLZqoyf127YsIFDB/Yxp0Npprfx47MWJfj6q2lGHXJ2dHQkNukJqbr09aSTtXoS\nklNxcHAwWoaXkZKSwquvtGVAOSuWdS7FR3Vd6dOzOxEREaaOJgogGUYWIgNt27ZlWeMgmqzdS8Ui\nThwJjWTxcuMvPlG0aFEWLvn1ue1OnjxJaVdbXG3Tv6XLu9tiaaZw/fp16tata+iYAFSrVo3GTZsx\n9egRqrkpnHqso0fPXvj6+hrl/i8rJCQES/Q08HUEoLKHHX5uKVy6dAkvLy8TpxMFjRRbITKgKAq/\nrlzFgQMHCA8P57uAAEqXLm3qWJlq1qwZM775irD4VIo5WXH6gQadClWqVDFaBkVRWLl2PUuWLOHa\n1auMr1GDvn37Gu3+L8vLy4vYpBTCNal4O1gR/0TLvehEihcvnmF7rVbLDzNmcPL4n5StUJExH4/N\ns712kffIClIiR1JTU9m6dSvx8fE0bdqUkiVLmjpSofXm66/z269LcbA2JylNz3czZzFs2DBTx8rT\n5vz0IxM++ZjKXg5cf5zIoHeG89kX0zJs2693T64e3UugjyUXorQkO/ty6M/jeXrylzC+l971Jxdu\nKMW2gEtJSaFVUBPSIu5TwsmGfSGRrN+8lcDAQFNHK7RCQ0O5fPky9erVw83NzdRx8oVLly5x6dIl\nypQpk+mQ+6NHjyhb0o8FHUpgbWGGXlUZvf8xC1f/Ll/v4l+ys+uPEFlatGgRdjEPWdWpGmaKwpZb\nzrz/zhDOXr5q6miZiouL44eZM4l4+ICWbdrSuXNnU0fKVSVKlKBEiRKmjpGvVK1alapVs9wxlLS0\nNCwszLAwS/8ZaqYo2Fqak5qamuk5cXFx3Lp1i2LFiuHt7Z2rmUX+I7ORRbY9fPiQGq7WmD19AbSm\nlzPheXgmp0ajoX6dWhz4bRbJxzfw/uCBfPP1V6aOJfIBHx8f/P2rM+dsNFcfJ7H8cgyJijX169fP\nsP2ePXso61eCN7q0p3K5svw0e7aRE4u8RoqtyLbAwEBW3Y7iblwSWr2e6WdCady4saljZWrDhg04\n6xJ4v04ROlV0Y1yDIkydMoW8/LgjNDSUPt270rhOLT784H2SkpJMHSnPS05OJiAgAO8ibtSoUZ24\nuJxvv60oCpu2/UGpwA6sjnBAX7Y+B478ib29/X/apqWl0adHNxa1LM/hbjU40KM2k8d9wrVr13Kc\nQ+RfMowssq1Vq1aM+nQiDT8ZS2paGs0CG7Ni4WJTx8pUUlISTtZ//37pbG3Bk9RUVFXNdN3j0NBQ\ngoODCQgIMPom73FxcQQ1akCfEg70L+nM/N2/0/fWTX7fut2oOfITnU6Hr7cHrhY6Ovg5cTzsJn5F\nvXgUG4+VlVWOru3k5MTc+Quf2y4iIgIzVaWJrzuQvgZ3TR83rl+/TsWKFXOUQeRf0rMVOfL+iBHE\naxJJ0CSyY+/+PD0pp3Xr1px+mMT+O3HcjU1h9plounR8NdMl+rp27kS50qXo/kobvFyd2bhxo1Hz\nHjhwgJK25owJKEWgrxvzWlRk9959udJTK6h+//13kpKS+aJFCTpWdGNSUAkUVcuMGTOMlsHT0xMd\nCkfuRwNwLz6Zsw+iKV++vNEyiLxHerYix8zMzLC2tjZ1jOcqVaoU23fuZuS7w9h85REtWnXku5mz\nMmy7aNEiDuzYzpmBjfF1smXJxXsM7NOL2KQUo+W1sLAgRat71vN+otOj16uYm5tnes6pU6cICQnB\n39+fcuXKGS1rXvHo0SOsLRSszNNHKizNFewtzYmKijJaBisrK5atWk2/nj3wdbYnJCaeiVOmUqlS\nJaNlEHmPvPojRAb69+9P2um9zGvrD4BeVXGbuRNNYqLRhpOTk5OpV6sGtWxSaeTlyNIbkVRo0or5\ni5dk2P7j0R+yfNFC/L1dOHE/ihk/zqFvv35GyZpXaDQaPN2caVvGhWalnDl+P4F1V6O5fvuO0Wdp\nR0dHc/PmTYoXL57pfsWi4Mns1R8ZRhYiA/7+/hwNiyEhVQvA/tAobC0tMi20YWFhDHpjIO1aNmfa\nF5+j1WpznMHW1pYDR4/h0awze+xK03X4h8xdkPEzw7Nnz7Js0QIO9ajF8lYV2PRqNYa+PYSUFOP1\nxF9WamoqLVu2pIRvcdq0aYNOp8vxNR0cHNjyxy7230vi490hbL2dwG8rV5vkdSg3Nzfq1auX64U2\nODiYdevWcfz48Vy9rjAs6dkKkQG9Xk/d6tW4e+smpV3suByp4YtvvmXEiBH/aRsbG0v1qpUJcNVR\n1sWS7SHJ1G3ZkXkLM96+zxA2btzInLEfsLrN3xNwyi06yulLVzJdftCUdDodXm4uOChp1C/uwNF7\nCaSY2/AoWp5HZ2X9+vUMen0Alb0duROVRKfuvfjx57mZTvATxic9WyFegpmZGacvXmbOkt/o+v4Y\nTpw9l2GhBdixYwc+1jpeq+ZGA19HPq7nzpJff8tywYPc5u/vz+mwaM4/St/lZ+31h9jY2WW6mMLl\ny5cJrFeX4p4evNqmFQ8ePDBaVoCFCxeSkpzEN6396OvvwTetS6JJ0LBq1Sqj5ngZ4eHhlPItjqud\nNUWLuHH69Gmj3l+n0/HGwAGMa+jBR3VcmN7ci83rVnPkyBGj5hDZIxOkhMhCz549n9smL4zglCpV\nijkLFvLqG69jjoqDoyO/b92OhcV/v8VjYmJo3awpH/l70+LVyiy+EkKHNq04df6i0TZPv3//Po7W\n5lhbpN/P1tIMByszQkJCMj1Hr9cTHR2Nq6trlpPEDEGn01GhdEnKuljQt44Hpx5oCKwfQPC9MKOt\nDpWQkIBWm0ZZNxsA7CzNKetuR2hoqFHuL3JGerZC5FCbNm0ISzFj+aVoToQl8PWJaPr37Z3j9zpf\nVvfu3XkcHcPV23e4c/8BNWvWzLDdyZMnKeNsyxvVilPCyZbx9Urx4P49wsLCjJb1rbfeIiZZy8Zr\n0UQmpbH+ahQJqXoGDBiQYftDhw5RzNODciVL4OPpwf79+42WFdJfw3qSmsongcWpV9yRYXW9cbez\n4LPPPjNaBmdnZ7y9PNkVnD7UHhr3hAvhCZn+fxZ5ixTbfGzr1q0E1KpOtQrl+GzqFPR6vakj5XkR\nERGM+uB9+vfszsIFC3KlV+rq6sqRYyewqd6S40opurz5Lr8sMN7z2n+ytLTE09Mzyx6qo6MjEZpk\n0p5u8h7zJI3EJ6kZroYE6T33DRs28O2337Jr165cyVmiRAl+nr+Q1VeiGbYlmHVXY1iw5NcMe4nx\n8fF079SRHwNLEjK4Cb8ElaFnl87ExMTkSpYXkdHkLQWM+j2nKAqbt+1gywMzBmwKYez+h8yc/ZO8\nUpRPyASpfOrIkSN0eqUt79RwxtnagkWX4unz9vuMnzjJ1NHyrNjYWGr7V6WNpzVV3ez4+XIEnV8f\nzJTPPzd1NKPS6/V0bt+OuBsXaeJlz+8hcbTp2Zdvv//vwg+qqvLmwNc4vGsbVdwtOR3xhDffGc7k\nqcb7nJ05c4aBnV7hSPcazz4WtP4Cc1ZvoF69ekbJoNPpcHW0o4KrJW3LunLqgYYDd+ONOoz8F71e\nT2RkJC4uLkYfPRHPJ1vsFTAj3n+P6IOr6F45fUm4m1HJLAg248rNYBMny7sWL17Muq8nsqxNek/g\ngSaFusuOE5+YVOhmc2q1WhYtWsTtW7eoXacO3bt3z/BzcP78edo0C2RWy6JYW5gRm6Jl+B/3uRN6\njyJFihgla3h4OJXKluFo77oUc7ThoSaFhqtOce7yVXx9ff/TXqPR0KNbV25du0Jxv9KsWb8+V7KG\nh4dTr3Yt4mMisbK1Z/P2HQQEBOT4uqJgkS32ChgbWzsS0v7+ZSbhiQ5b24yHAUW6tLQ07C3/Hl61\nszBHq9NluTZyQWVhYcHgwYOf2y4qKgpvJ9tnE5lcbCxwtrMmJiYmywKWlpaWa5uqe3t7M2HyZJp/\nPpUAHzdOPohmzNhPMiy0er2eyuVK46Ym0b6kE38Gn6VyuTLcj3ic416gt7c3IWHGnbUtCg7p2eZT\nISEh1K1Vk0Bvc5ytFLbcSWbuoqV06dLF1NHyrLCwMGr7V+NDf2+qFnFg+vkwSjZsmemKTCJ9FaSK\n5cowsKIdtX3s2Xsngd2PzLl++06GxfTo0aP069md0IfhlC/px4p1G6hRo0YGV355Fy5c4OrVq1So\nUCHTa+7fv58OrVuwpEs5LM0VdHqVwZtuM+OXRfTv3z9XcgiRFXnPtoDx8/PjxOkzlG43ALv6XVn9\n+2YptM9RrFgx9hw8xGEbX6bcfkK9rv346Zd5po6Va/R6PV9++SUDBw5k2bJluXJNNzc3tu3YxdZH\nNry+6S5ndZ7s2LMvw0IbHR1Nlw7tmVbLm6j3WvFheUdebdua5OTkXMni7+9Pr169sizeycnJmJsp\nmD/9yWamgJW5GU+ePMn0nOjoaE6dOkVEHt6LWeR/0rMVogDQ6/XUqFKR2Af3qVfUmV13I2nfpRvL\nVqw0WoZDhw4xemAfdnaq+uxjdVeeZt2ufVStWjWLM3NPamoqRYu4UdvDgmalnDl2P4F9IYncj3iM\nk5PTf9pv3ryZgf374uFgTURcMt9+P4NBLzC8LkRmpGcrXtiihQsp7uWBs4M9r/XuJRuWv4BvvvkG\nRxtLnKwtcHOy5+7du0a9/5IlS4gIDeX4aw1Z0K46B/o2YM3q1URGRhotg5eXF3ei44lJSQMgIvEJ\nEQmJeHh4GC2DlZUVp85fJMzCg+knHnMt1ZGjJ09nWGg1Gg0D+vVlbD13vmnqwZfNvPnow5FZLqwh\nRHZJsRX/sm/fPsb9byQrWpbjXP96aC78yQfDhpo61nPFxMRw+/Zt0tLSjH7vP/74g3Fjx9C1ogtj\nGvvgZw/VKhp3e7tbt25R3s0ee8v0OY+lnO2wsTA3atEvX748A996ixbrz/Hu/lu0WH+OMWPH4uXl\nZbQMkL6a1uXrN4mMT+J6cEimveqwsDAcrC2oUMQWAB9HK0q6O3Dz5k1jxhWFhBRb8S87d/zB6xU9\nqe7phLutFRPrlWTnjj9MHStLn0+Zgl8xH5rVq0PlsmW4ceOGUe8/evRoqnjY0a1yEap52fNJYHGS\nUrXcv3/faBk6d+7MmfA4Dt+PRq+qzDsfCooZ/v7+GbY/ceIElcqVwc7GmgZ1axMcnDuvjH09/Xvm\nr1pH46EfsXLTVsZ+Oj5XrmsIxYoVQ/NEy42o9GfKDxNSuRulKZT7AAvDk1d//mH//v3MnDUbnU7H\n4Lfe5NVXXzV1JKNzL+LB8fi/F9C/GZOIm5urCRNlbd++fSyYPYPT/evjZW/N3POh9O/ZnRPnLhgt\ng5WVFSm6v1cSStWlz1WwsbExWoa6desybspUekwcT3KaDmc7G1au35Dh6y6PHz/mlTateauKPTVr\n+LEz+AHtWrXgyo1bubLmcFBQEEFBQTm+zv+XlJREWFgYPj4+ma529TIcHBxYumw5A/v3xdPRhvDY\nJL757nv8/PxyIa0Q/yYTpJ46dOgQnbp0pfvwMVhaWrF69jTm/TyHzp07mzqaUcXFxdGobh1KWTyh\nmJ0l624+ZvnadbRq1crU0TL0/fffc33Zj3wdWBaApDQdJebuIzUt5/vJvqgzZ87QKKAOTUs6UdHD\njo3XoolJU4jV5M4s3Jel0WhwcHDI9N937NjBJ0MHMrH+379EDdp+n5PnLuXZQrN9+3b69+6Fk7Ul\ncSmpLFm2PNd+GY6KiuL27duUKFHC6KtBiYJHFrV4jp9/+YXOg0fQrHNvACytrZn14095utgu++03\nPps4nuTkZLr36sW0r7/N8UICzs7O/Hn6DCtWrECj0bCvdWujzSTNjtKlS7MwPJ7ENC32lhbsDomk\nrJE3Cq9VqxYbtmyjd/eu/BmWiKuHN+E3TPfcL6tCC+mv80TEp5Cq02NlbkZMspbElDRcXFwybK/T\n6ViyZAm3bt6kRs2a9OjRw6iLgMTGxvJa716sbFuJej6unHoYS8/+/bh2OzhXVoZyd3fH3d09F5IK\nkTkptk+pqgr/+AGS11cU2r17Nx+9/y4LW1bAw86KkZvWMMHSimlff5Pjazs6OjJkyJBcSGl4HTt2\nZPOGddRbuZmSro7ciE5g4zbjP2Nu27YtsZr8MWu7Tp06NG3ZmvGH9lDR1YJT4Sl8/PHHODs7/6et\nqqr06NKJ22ePUcVVYc1iHUcOHmDm7B+Nljc4OJiiTnbU80nvidcp6oKfqwO3bt0y2pKRQuSUDCM/\ndfDgQTp37UaP4WOwtLZm9axpzP3pxzy7UMQH7w6nyPndfFCnFADnH8Uz9EQEl27eNnEy41NVlfPn\nzxMZGUmNGjUK1A/g06dPM2zQm9y/H0ZAQABzFy3G09Mzx9fV6/WsX7+e4OBgatWqRcuWLTNsd+bM\nGTq2bs7MFl5YmpuhSdXxzvZ73LoTkis5XsTjx48pX7oke7vVooyrPXfikmi+9gwXr93Ax8fHKBmE\neFEyjPwcTZo0Yd2a1cz4YRY6nY75c3+mY8eOpo6VKScXV+4n/f2ay/2EFBwdHU2YyHQURcm1JQH/\nv4sXLxIREYG/v7/RistfIiIiaN+6FVMDfGlYqwpzLtygS4dXOHz8ZI5HXszMzOjevftz28XFxeFm\nb4Xl0yWZ7C3NsLexIiEhIdPPR2pqKrdv36ZcuXIZbl7/sjw8PPj2+xm0/nAUlb1cuRIRwxdffSOF\nVuQr0rPNpx4+fEj92jUJ8rTB08aCJdce8dvqtbRu3drU0QoEVVUZ8e5w1q5YThl3J65FxrN24yaa\nNGlitAzr169n3rhRrGpTEQC9quI37yB37oXh5uZmlAxxcXFUKl+WTn4W1Clqz94QDWeTHLhw5VqG\nhfTLL79k4rhPUAAUhZmzf+Ltt9/OlSx3797lxo0blC1bltKlS+fKNfOCR48eMej1AZw4cYLixYsx\nd8FiateubepYIptki70CKCIigoULF5KcnETnzl2oVauWqSMVGHv27GFov17s7VoDJ2sLdt+NZMSx\ne4Q+NN76ubt372bU6/040K0G5mYKEYlPqL70KNGxcUZ9rejKlSsMev01bgffobq/PwuX/kbx4sX/\n0+7q1avUrFaFiUG+VPG041SYhq+PPiA07KHRRwXyC1VVaVC3FkVTHtChjCOXHyXx6/UkLl29bvTF\nQETukGHkAsjLy4uxY8eaOkaBdPv2beoXdcHJOv1bpLmfOw82nc3VreOeJygoCK+yFeix7TL1PGxZ\nex8+i+4AABZNSURBVCeGMR+NMWqhBahcuTJHT5x+brvdu3fj7WhFFU87AOoUc8DRypwjR47k2bkP\nphYdHc3lK1f5uFNJzBSFoFLOHHuscvToUfmcFTCygpQQGfD392dvaCRhCSkALL/6gAqlS2ZaaM+f\nP0/LJo2oUrY0Q958nYSEhBxnsLCwYOvO3fT433jUpt35Zu5CJk6ZkuPrGkq1atWI0KQRm5z+jnOE\nJpW4J1oqV66cYfvk5GQmT5xAn25d+GzKlCx35imo7Ozs0Or0xKXoANDpVR4nPslwLee/HDp0iK+/\n/prly5ej0+mMFVXkkAwjiwLh7t27REZGUqlSpVxZXQjgu2+/YdKECbjZ24KVNVt27MrwneOH/9fe\nncdlVeZ9HP9c7JvIJioqmBLKuOSaWC7lMpGlYuVY01TTNtlUM1M9acvUlO3p06Q2aVk2LVa2ZxqZ\nNjqapiY2KrgrIEphgAg3AgL3ef6AmaEnUXl13/eBm+/7n/Jw7nO+L17Iz+tc1/ld331H/969uH9A\nHAPbhzNnaz5V8cl8vCzdJTlakovGjGL92jWcHR3E7sIKxqddxjvvvveT85xOJxeNuoCQIzlcEh/B\nJzlHsbokseyLlc3+tTtXe+QvD7Fw3hzO7+DPnlKLiK6/YPmXq07azev5ObN57OEHSYkLZn9JDfG9\nBrDks89d0vlLXENztuKVLMvinjv/xGsLF9KxbShHq50s+2Jloz2Bm6qkpITCwkLi4+NP2voQ4M03\n3+SDpx/ktTE9ADhR66TTvFUcKyvz+CPf5mDx4sVs3LiRkSNHMnHixJOes337diaMHknGVYPw8/Gh\nutbJOYs28eX6jfTo0cPDie23ZMkSNm7YQHxCAtdff/1Jf9ZqamoIDwtl9i870z4sgFqnxbR/HuGF\n1xc32w5vrZHmbMUrLV++nKWLF5Fx9blEBPnz1o58rpkyma07d7vk+hEREY12Vvq3kJAQCo+fwLIs\njDEcrazGx8d4bG63uZkyZQpTpkw55TnV1dUE+fnhWz+K9fMxBPr5UlPjuTabzcmECRNO+6ph3VaX\nFrGhdT9Xvj6Gjm0CKSoq8kBC+bk0Zyst2s6dO7mwcwQRQXW/gCYltWfnPs829rj44ospD4ng5pW7\nmPdtLmlLM5l2zzQ92juFPn36EBzdjnu/2s+6Q8X8z9p9RMd1bpWj2jMVHh5Or+Rk3so6SvmJWjbn\nO8gscDB06FC7o8kZULGVFi05OZlVh0r+s2H5R3sKSE7s3uj5O3bsIG1cKucN6McD907nxIkTjZ57\npoKDg/nn+g30vfImDvYYzoPPzuXhRx/92ddtLqqqqpg1axa33HQj8+fPx+l0nv5Dp+Hv788Xq/5J\nVXIKj+4/gdX7fD7/cpVLmmB4s0+WpXOkbSI3LTvIWznw4SefNtvNI+THNGcrLZplWUy7+07+/vIr\ndIwI5eiJxudsDx8+zMC+fbirbwd6x4Txv/86TLdhY1nw6t89H7yFqK2tJXXMKMpysugb7cuGghoG\nXJDK399YZHc0kWZJC6TEq+Xk5FBUVETPnj0bXY28YMECVs59ggWj6x5VllRW02PhWsorKvHx0UOe\nk9m0aROTL72I50a1x9fHUFnj5ObP8ti1dz8dO3a0O55Is6MFUq3cDz/8wMKFC3GUlTFh4kQGDx5s\ndySX6tq1K127dj3lOf7+/lQ02OT9eE0tfr6+jb5qYlkWa9eupaCggEGDBnHWWWe5MnKLUFFRQVig\nH74+dd+jQF9DcIAfFRWN79XrcDjIz8+nc+fOhISEeCqqSLOmkW0rcOTIEYYM6Mew6EA6Bvvy2q4j\nvPrWO4wbN87uaB519OhRBp3Th9TYQHpHhTAvq4C0397MjMcf/8m5lmVx7dVXsWbl58RHBJNV4ODN\ntxe3uu+Zw+Ggd3IPRsbUMqBDMKsOHifPP5ZNGf866QKwD95/nxuuv442Qf4cP+HknffUr1taFz1G\nbsVmzHiE7A//zuwL6x6ffpH9A0/uqyAjc4fNyTyvoKCApx5/jCPf5TPqoou54cYbTzqyTU9P5/br\nf81TI2IJ9PNhxw/HeXZLKUeKjtqQ+sxUV1ezcOFCcrKzGXzuuUyaNMklDSJycnK4ferv2Ld3D+f0\n68/z81+iXbt2Pznv+++/JzkpkYfOa0f3qCCyjhxn1uaj5OQdbtY7UmVlZZGZmUliYqI2AJCfTY+R\nW7HSkmN0Cf3vO59dwoMpc/xgYyL7tG/fnr/OmXva8/Ly8kiMDCLQr24ut2dMMMUlhzzaG7kpamtr\nuTT1lxTu207PtobFr77I5o0beOLpZ372tbt27crSz7847Xl79uyhS2Qo3aPqGnn0ig0hPLCMnJwc\n+vTp87NzuMOL8+fx0H3TGdo5hozvirlh6m088thPn3SI/FxaFdIKjJ84kQU7CvjqUDH7j5Zz//ps\nJqjJ+SkNHjyYLd85OFxa92rQ0r3H6NUzqVkWWoB169axL2srfx4aw5TeMcwYFsNzs2dTWlrqsQwJ\nCQnkFTsocNR9zw4eq6K4vOqkOwQ1ByUlJUy7+26Wp/Xj9TFJrLliAPOfn8vu3a5piCLSkEa2rcDI\nkSN5bv5L/M8D91FeXs5lV0zmiadn2h3LZZxOJ4sXLyY3N5dJkya5pDFC//79eXLWs/zxjjvwNYZO\nneL4NP1TF6R1j9LSUqJDA/6zkKlNgC+B/r6Ul5efsqm9KyUkJPDoE08y/YH76RodRnaRg+dfmEdk\nZKRH7t9UBQUFxISF0C2ibhFXdHAASe3acujQITXXEJfTnK20aE6nkz49kzhyKI/48GB2FjmYM28+\nN910k0uuX11dTWlpKVFRUc26QX5hYSG9eiYxJTGIPrHBfJ7tIMc3ls3fbvN47gMHDnDgwAGSkpKI\nj4/36L2borKyku7xnXlmSBfGJ7ZnQ/5Rrl6+k+279tChQwe743kFp9PJs/87i+XLlhLbvj2PPP4k\niYmJdsdyKy2QEq90//338+682Xx19XmE+PuyZF8Bt6/cSUlFpd3RPG7r1q3ccuNvOXgwjwEDBvDK\na29oA/LT2LRpE5dPGI/DUYavnz9vvrOY1NRUu2O5jNPppLCwkIiIiEY30nCnu+/8E8vfe4OJ3YLJ\nK6theV41/9qe6dXvaKvYildKS0sj5sAWnh1dt2fq8epaOr2wklqnfvbkzDidToqLi4mMjPSqfta7\ndu2qWzRXWEiN02LO889zww03ejRDm9AQ5oyNIzqkbq3DnIxiLv/Tw9x6660ezeFJjRVbLZCSFm3E\niBF8uv8IBeV1G4+/nnmIqDA1UpAz5+PjQ0xMjFcVWoBJ4y9hbGw1r09I4JlRHZl255/Yvn27RzP8\n/ykM6yTHGioqKmLFihVkZGTgbYM1LZCSFu2uu+5iRfoy+ixcTViAHyec8MGnS+2OJWKr8vJyDuQe\n5KlBdZtydA4PpF9cGzIyMjz6GtbUqVOZ+darTOwewsHSarKKa1iUlnbSc7/55hvGXTSWLm2DKCir\n4MKxqby+6G2vaaWqYistXvqKL8nOziY3N5eUlJRWuWG7SEMhISGEhYSwp6iSHjHBVNY42VdU4fEF\na089M5POnTuTvvRT2vfpwNcfP97o4rPrrr6K3yaHMjwhnKqatjz4z5V8+OGHXHHFFR7N7C6asxUR\n8UJLly7l2quvIjk2jNyjx7l4wiReeuXVZruqPjQ4iJcviSc0oO5x/qvbiki55m6mT59uc7KmUQcp\nEZFW5NJLL2XL1u1s2bKFuLg4hgwZ0mwLLUDf3r1YkZ1PWo8ISipq2Fxwglv797c7lstoZCsiIrY7\ncOAAqWNGUVpSTFnFCaZNm8ZfHplhd6wm06s/IiLSrNXU1JCXl0dERESz7Tx2Oiq2IiIibqY5W/Fa\nTqeT1atXU1hYSEpKSrNuESgirZNGttKi1dbW8qtJE9mZsZGzI8NYf6iIdz74iNGjR9sdTURaIY1s\nxSt99NFHHNy6mTWX9yfA14cvcwu55frr2HfwkN3RRET+wztac0irlZeXx8B2YQT41v0oD42LJO/7\nAptTiYj8mIqttGhDhgxhaXYhOceOY1kWz397kCEDvOfdvOZg79699P1FT+KiI0gZNJDCwkK7I4m0\nOJqzlRbvb3PnMm3aPfgaQ2L3bnzy2ed06dLF7lheobS0lC4dYxnaMZhzO4Wx8sAxsiv8OHyk0Gt6\n1oq4knb9Ea912x13UFJaRu7hfDK2ZarQutCiRYsI87W4dXB7BnUK457z4ygtPcbXX39tdzRxoZqa\nGh5/7FHGjBzGNVdNITc31+5IXkfFVryCv78/kZGRzbodXUvV8AHVv/9Xo1rvctvUW3j/pdkMJRfn\nztWcN2QwRUVFdsfyKnqMLCKNcjgcdO7QjkHtAxkcF8aXB45xuDqAvO9/UMH1ErW1tYQEB/HqhLMI\nq98EYOY3xdz84EyuvfZam9O1PHqMLCJNFhYWxtasXZS0iefN3RUEJvQma89+FVov1HBs5LROvcm7\nNJ1GtiIirdxtt97C2qXvc0nXYPYfq+brQsPWzB1ERUXZHa3F0chWREROas7zL3DtH6azNagH4QNT\n+XrTZhVaF9PIVkRExEU0shUREbGJiq2IiIibqdiKiIi4mXb9ERGRZmHnzp2sWbOGqKgo0tLS8Pf3\ntzuSy2iBlIiI2O6zzz7jN1f+isGdwvjOUUNEl0RWrl5DQECA3dGaRAukRKRV2LFjB+f0SibA35/k\ns7uRkZFhdyQ5A7//3U3cNTiK2/pHMmNYDI7D+3j77bftjuUyKratxObNm5k8cTzjRl/AywsWoKcO\n4o0qKytJHTuaYWHHWDSpG5fEVjHuorGUlJTYHU1Oo7D4KN0igwDwMYb4Nr4UFHjP3tQqtq3Ajh07\nuHjMKFLK9vObkGPMeug+Zv/1r3bHEnG5ffv24VNTxUXd2xLo58OIhHDahfiRmZlpdzQ5jeHnn8c7\nO0uorrXIKalk3aHjjBgxwu5YLqNi2wq88fprXNcjlpvPiefSxPbMuyCRl/421+5YIi4XHR1NsaOC\n0qoaAI5X11JQWkF0dLTNyeR03nh7Mcfb9eTKD/byl3WFzHpuLikpKXbHchmtRm4FfHx8cDZ4bKwm\n4+KtOnbsyB/+8Efue3k+A2KD2F5YxeQpV5GcnGx3NDmNmJgYVq5eQ21tLb6+vnbHcTmtRm4Fdu/e\nzbAh53LnOR3pEBrIkxmHuPOhGfz+ttvsjibiFitWrCAzM5OkpCTGjRunf1yKxzS2GlnFtpXYtm0b\nM594jPKyMi678tf85ppr7I4kIuJ1VGxFRETcTO/ZioiI2ETFVkRExM1UbEXklPbv30/qqAs4O6EL\nV0wY71WNBkQ8RcVWpJlbt24dL774Ivv37/f4vR0OB2NGDmd4bQFvXZBAp4KdjBs7mtraWo9nEWnJ\nVGxFmrFLUn/JmAtG8MT0P9KrZxIzZ8706P03b95MbIDhjgEJ9IgKY8bQ7hzJP0xubq5Hc4i0dCq2\nIs3Uu+++y5pV/+CFS85i7rizeHBEJ/58372cOHHCYxlCQ0MpPl5Fda0TAEd1LeVVJwgJCfFYBhFv\noA5SIs3Uhg0bSIoOJjqkbk/PPu1D8TEWe/fupVevXh7JMHDgQJL69mNKehYXdmzDx7klXDF5Mh06\ndPDI/UW8hUa2Is3U8OHD2V14nB/KqwHYku/AwnD22Wd7LIOPjw8fL0sn7Y/3caTvaH7/8JPMf3mh\nx+4v4i3U1EKkGZt82SSWLPmEiCA/jlXWMuu52dx+++12xxKRRqiDlEgLlZWVRVZWFsOGDSMuLs7u\nOCJyCiq2IvIj69evJzs7m759+9KnTx+744h4BbVrFJH/mHb3nfx64iV89PSDjBl+Pi+9ON/uSCJe\nTSNbkVZm27ZtXHzBCL6eMpCIIH/2l5Qz8t0M8guOEBYWZnc8kRZNI1sRAeDw4cP0jG1LRFDdK0Xd\nI0IJDwqgsLDQ5mQi3kvv2Yq0Mn379mXb90fZkH+UlLhI3tuVj29gEJ06dbI7mrRyK1eu5PP0dGLa\ntWPq1KlERETYHcll9BhZpBVKT0/nmquupLKqitiYaD78dBn9+vWzO5a0Yi8vWMCfp9/N2C6BfHcc\nDjrD2PztVsLDw+2O1iRajSwiP+J0OiktLaVt27YY85PfDSIe1aFdNPcOCqdbZBAAz2ws4pppj3LL\nLbfYnKxpNGcrIj/i4+NDRESECq00C8crKokO/u/MZlSQweFw2JjItVRsRUTEdmkTJvDi1hIOlVax\n8VAZX+WVk5qaancsl1GxFRER2734ykJ6XTiep7+t4LPicN7/eInHNtzwBM3ZioiIuIjmbEVERGyi\nYisiIuJmKrYiIiJupmIrIiLiZiq2IiIibqZiKyIi4mYqtiIiIm6mYisiIuJmKrYiIiJupmIrIiLi\nZiq2IiIibqZiKyIi4mYqtiIiIm6mYisiIuJmKrYiIiJupmIrIiLiZiq2IiIibqZiKyIi4mYqtiIi\nIm6mYisiIuJmKrYiIiJupmIrIiLiZn7uvLgxxp2XFxERaRGMZVl2ZxAREfFqeowsIiLiZiq2IiIi\nbqZiKyIi4mYqtiJuYox5wBiTaYzZaozZYowZ7OLrjzTGfHqmx11wv4nGmJ4N/rzKGDPA1fcR8UZu\nXY0s0loZY1KAcUA/y7JqjDFRQIAbbtXYCkd3rHxMA5YCu9xwbRGvppGtiHt0BAoty6oBsCyr2LKs\n7wGMMQOMMauNMd8YY9KNMe3rj68yxjxnjPnWGLPNGDOo/vhgY8x6Y0yGMeYrY8zZZxrCGBNijHnF\nGLOh/vPj649fZ4z5oP7+u40xTzf4zI31xzYYY14yxsw1xgwFJgDP1I/Su9Wf/itjzEZjzC5jzPmu\n+MaJeCMVWxH3+AKIry9CfzPGjAAwxvgBc4HLLcsaDLwKPNHgc8GWZfUHbqv/GsBOYJhlWQOBvwBP\nNiHHA8CXlmWlAKOAWcaY4PqvnQNMBvoCU4wxnYwxHYE/A+cC5wM9AcuyrK+BJcA9lmUNsCzrQP01\nfC3LGgLcCTzchFwirYoeI4u4gWVZ5fXzmcOpK3LvGGPuBTKA3sAKU9f1xQfIb/DRt+s/v9YY08YY\nEw6EA6/Xj2gtmvb39pfAeGPMPfV/DgDi6///S8uyHADGmCwgAWgHrLYs61j98feAU42kP6z/b0b9\n50XkJFRsRdzEqusYswZYY4zZDlwLbAEyLctq7JHr/59rtYBHgX9YlnWZMSYBWNWEGIa6UfTeHx2s\nm1OuanDIyX9/HzSl9du/r1GLfp+INEqPkUXcwBiTZIxJbHCoH5AL7Aba1Rc7jDF+xphfNDhvSv3x\nYcAxy7LKgLbA4fqvX9/EKMuBPzTI1e80538DjDDGtK1/5H15g6+VUTfKboz6s4o0QsVWxD3CgNfq\nX/35F5AMPGxZVjVwBfB0/fFvgaENPldpjNkCvADcUH/sGeApY0wGTf87+yjgX7/gKhOY0ch5FoBl\nWfnUzSFvAtYC2cCx+nPeAe6pX2jVjZOPwkXkJNQbWaSZMMasAu62LGuLzTlC6+ecfYGPgFcsy/rE\nzkwiLZ1GtiLNR3P5l+/Dxphvge3AARVakZ9PI1sRERE308hWRETEzVRsRURE3EzFVkRExM1UbEVE\nRNxMxVZERMTNVGxFRETc7P8AXsVpWPQcdcQAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_itml, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sparse Determinant Metric Learning\n", + "\n", + "Implements an efficient sparse metric learning algorithm in high dimensional space via an $l_1$-penalised log-determinant regularization. Compare to the most existing distance metric learning algorithms, the algorithm exploits the sparsity nature underlying the intrinsic high dimensional feature space.\n", + "\n", + "Link to paper here: [SDML](http://lms.comp.nus.edu.sg/sites/default/files/publication-attachments/icml09-guojun.pdf). \n", + "\n", + "One feature which we'd like to show off here is the use of random seeds.\n", + "Some of the algorithms feature randomised algorithms for selecting constraints - to fix these, and get constant results for each run, we pass a numpy random seed as shown in the example below." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/bhargavvader/Open_Source/metric-learn/venv/lib/python2.7/site-packages/sklearn/covariance/graph_lasso_.py:252: ConvergenceWarning: graph_lasso: did not converge after 100 iteration: dual gap: 2.377e-04\n", + " ConvergenceWarning)\n" + ] + } + ], + "source": [ + "sdml = metric_learn.SDML_Supervised(num_constraints=200)\n", + "X_sdml = sdml.fit_transform(X, Y, random_state = np.random.RandomState(1234))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdcVfX/wPHXYW8ERDaoqDhQwb1x5h65zVw5MlelmSMr\n00zNbOfKlZozR27JvTV3KuEAt6DI3nLv+f2BX8vfBVMucAXez8eDRx563895H9PefD7nMxRVVRFC\nCCFE3jEydAJCCCFEYSfFVgghhMhjUmyFEEKIPCbFVgghhMhjUmyFEEKIPCbFVgghhMhjJnnVsKIo\nsqZICCFEkaOqqvL/v5dnxfbJDfOyeSGEEOKVoig6dRaQYWQhhBAiz+Vpz7aoS01N5euvvyb06jWq\nBVRl+PDhmJjIb7kQQhQ1Sl4N9SqKohblYWSNRkOzFq+RqpjhX7cxf/6xhQq+Pqxe+auhUxNCCJFH\nFEXJ/3e2Rdnp06cJv3mLL9bsxsjYmIZtO/Nu2zrcvXsXDw8PQ6cnhBAiH8k72ycyMjKYMXMmbdt3\n4O2h7xAREaFXe2lpaVja2GJkbAyAiZk55hYWpKWl5Ua6QgghCpAiUWyvXLnCsWPHiI+Pzzbm7aHv\nsHLDZsoFteNeikq9+g2eG/9fqlevzuPkRNbP/5rrl86xfNYn+Hh7U7JkyRy3KYQQomAq1MVWVVWG\nDR9BvQaNeGvocMr5lef8+fM6cWlpaSxfvoz3Zi+iVrM2vPHeJBzcvAgODs7xva2srDiwby/aqNus\nmTUJZ3PYuX0bRkaF+rdcCCFEFgr1O9tt27axc/devtywH0trGw5tXU/vPn25eOHZgvu/iVzKvwqh\nkZGR3uuEPT092fDbOr3aEEIIUfAV6m5WaGgoFWrUw9LaBoCaTVtx7eoVnTgLCwt69OjJDx++zdlD\ne/htziwib16nRYsW+Z2yEEKIQqhQF9tKlSrx1/EDJMbFAHB0xybKV6iYZeziRQtp3yyIPzevwCo9\njqNHDlOsWLH8TFcIIUQhVejX2X44bjzz58/HwbkEmrRUdu3cQcWKWRdcIYQQQh/ZrbMt9MUW4M6d\nO0RHR1O2bFksLS3z9d779+/n2rVrVK5cmdq1a+frvYUQQuSvIl1sDeX90WNYt2EjfgE1uXjyMGPH\njOaDMWMMnZYQQog8IsU2n12+fJmgJk2ZsW4vVrZ2RD+4z/huzbl5IxxHR0dDpyeEECIPyHaNL+DA\ngQMcO3YMd3d33njjDb0ODYiIiMDdpzRWtnYAOJZww97RiYcPH0qxFUKIIqZQz0Z+Gd//8AM93niT\noyE3mPXDXFq3bYdGo8lxe1WqVOFO+FXOH9mHqqoc2roeNBmyg5QQQhRBMoxM5r7ItnZ2zFi7mxIe\n3mg1Gj7r34HvZs2gdevWOW734MGD9HqjN5ER9ynlW4b169ZSpUqVXMxcCCHEq0SGkZ8jNTUVVatS\n3M0TACNjY1y8ShIdHa1Xu40aNeLundukp6djZmaWG6kKIYQogGQYGbCxsSGgWjXW/jiDhNgYzh7a\nw6WTR6hfv36utC+FVgghijYZRn4iIiKCLt26c/bMGYoXL84vS5fQpEkTQ6clhBCiAMluGFl6tk8c\nOHCAkJAQaga1QDE1Y8kvy/Q+iEAIIYQA6dkCoNVqKebgyIT5ayjpV4n01BQ+6dOWJQvm0bRpU73a\njomJ4e7du/j4+GBra5tLGQshhHgVSc/2OZKSknicno5Pucw9k80sLPEpV5F79+7p1e7yFSsoWaoU\n7Tp1xrtkSXbt2pUb6QohhChgpNgCtra2lCxdmuDVS1BVlZtXLnPx5BFq1KiR4zbv3LnDqFHvMmnR\nBqav28uoWT/Tq3dvkpKScjFzIYQQBYEs/Xli86aNdOj0Omt+mIGpmRkLf15A+fLlc9xeaGgo3mXL\n41m6HADlA2thbWvH7du39WpXCCFEwSPF9omyZcsScukiCQkJWFtbY2SkX6ff19eXW9dCeXjvNs7u\nXtz4+yLxMTF4eHjkUsZCCCEKCim2/09uTWIqWbIkn0+ZwqQ+7XAvWZp7N8JYtPBnmSQlhBBFkLyz\nzUNly5YBVK6HXMLCwkL2RRZCiCJKlv7kkQcPHlC+QkVGzJxHhep1OLlnO6u+nsyNsDAsLCwMnV6e\n27FjBxs2bsLWxob33nsXb29vQ6ckhBB5Tpb+5LOLFy/i5VuOCtXrAFCrWRtMzCy4efOmXu1++eWX\nFHMqjrWtHf5Vquq9f3NeWLZsGf0HDuaxgyfXYlKoWas2d+7cMXRaQghhMFJs84iHhwd3blwnITYG\ngAd3bxEX/YgSJUpkGb9//37efPNNhg0bRlRUVJYxGzdu5JPJnzHo41lMXb4VU3tn6tZvkGfPkFPT\nps9k6Off07LnAHqNmkhg45YsWbLE0GkJIYTByASpPOLn58fQIUP45M02lK1SjZDTJ5gxfToODg46\nsQsWLGDku+9Ro/FrRF++Smnfsvwdcgl3d/dn4hYvXkyj9l2pHtQCgLcnf8WoNnXy5XleRlpaKjZ2\n9k+vrWztSUtLM2BGQghhWPLONo+dPHmSa9eu4e/vn+1Zto7OJXhzzGTqtuyAqqp89+FQnMxUtm/f\n/kxc7969CbkdydjvlgIQHvIXUwd3IyUpMa8f46V8NOljNm7bSY93JxEdeY/lsz5hzx/BBAYGGjo1\nIYTIU3KerYHUqlWLWrVqPTcmLS0NH79KQOZ/qNKVqnL16G6duJkzZ1K+YiW+Hz8MT18/dq5cRM8e\n3fMkb31M+WwyFhbmrP9+CjY2Nqxft1YKrRCiSJOe7SugQiV/7Nx9eOezb4mNiuSzgV14d/g7TJ48\nWSf25s2bvPPOO0THxNCta1fGjBmTr7lqNBru3r0rs4uFECIL2fVspdi+AiIiIqhdtx53bt3CyNiI\ntm3bsmnjRkOnpWPIkCEsXbYcTcZjTM3MmTZ1Sr4XeyGEeJVJsS0AMjIyMDIy0nuryJcVEhLCosWL\n0WRo6Nu3T5ZDvnv37qV1m7aM/W4pFWvW49C29Sz+YiL3797B0dExX/MVQohXlayzLQBMTEzyvdBe\nuHCB+g0bEhabzp00I5o1b8GRI0d04lasWIFXmfJUqlUfRVFo1K4r5hYWOpO4hBBC6JIJUkXcV19/\nQ5s+Q2nbdygADs6ufDFjJtu2bH4mzt/fn7XrN5KcEI+VrR0P790mJSmJgIAAQ6QthBAFivRsi7ik\npCRsHZyeXts5OJGckqITN3r0aOxsbfigSxO+/XAoE3u1pnLlyvj7++dnukIIUSBJsS3ievXozqaf\nv+XSn0cJPfcn636cQa9slhNt27IZEwVO79+Fc3EnNm5Yn8/ZCiFEwSQTpASfffYZX3/9DVpVy6CB\nA/n6669RlGff70dHR1OhYiW6DB9P1fqN2bdxJWd3b+HSXxcwMZG3EUIIATJBSmTj4MGDTJ85k8r1\nm1C9aRvmzJvP5s2bdeJOnz6Nq09pGrbrgp2DEx0GjCA2Lo7bt28bIGshhChYpEtSxA0dNpzWvQfT\nfdhYALzLlGfU+6Pp2LHjM3EODg5ERdwjPS0VM3MLEmJjSEpIwM7OzhBpCyFEgSLFtoCJiYlh3rx5\nPIqOplXLljRv3lyv9pKTU/AoVebptXtJX1JTU3XiqlevTv26dZgxtAd+1epy9kAwI0eOxMnJSSdW\nCCHEs+SdbQESHx9PjZq1cPerjIt3afZv+JVpUz9j0MCBOW6zX79+BO87yNgflmFiYso3YwZRoUxJ\ntm/dqhOr1WpZvXo1169fJzAwkHbt2unzOEIIUejIDlKFwLx581i2YSsjZ84D4MbfF/nhg0Hcu5vz\ng9m1Wi2NmzThxImTqKhUqlSJE8eOYWZmlltpCyFEkSETpAqBpKQk7Iv/c/i8QwlXkpKS9GozJCSE\n02fOUrK8P2UrV+Pvv0M5ffq0vqkKIYT4F+nZFiCXLl2iYaMgBkyaiZuPL7/9NJNyXq4sW7okx20G\nVKtOibKVGTD+cwDWzZ3NhQPbuRYamltpCyFEkSHn2RYClSpVYumSxQwaMoTk5GTq16vH/Llz9Goz\nJjaWhoE1n177BdTgyNa1+qZqUJGRkWzcuBFVVenYsSPu7u6GTkkIUcTJMHIBEhsbS/+3BuLg6kWt\nZu04eOgwX3zxhV5tVqnsz44VC0lOTCA9NYWty+ZRxtc3y9irV69SJSAQd09v6jdoSHR0tF73zgs3\nbtwgILAaq7b+wdodewmoVo2rV68aOi0hRBEnw8gFyIABAzh+IYSPf16HoiiEnD7OV+8NIDkxIcdt\npqenE1i9OiGXLoMCJUuW4tzZMzrrZ+Pj4/H09qFmszYENmzO3g2/EhEWyp1bN/P9pKLnGTBwEAmm\ndnR5ezQAW5bOQfMgnDWrVhk4MyFEUSATpAqByMhIfMpVerqVolcZPx6np+vVZlpaGra2dniV9qVU\nWT+sbWxIz6LNJUuWYOvgxMCPZlA9qAXvf7WAmJiYV24y1cOHD/EoXe7ptaevH1FRjwyYkRBCSLEt\nULp168ahrb8RdvkCqSnJrPzuC1zd3LKMDQ8Px83dE1t7B4o5OrFs2bIs46Z+Pg1zJ1e+WLObqSt3\n4VWpGuMmTNSJy673+v/3UDa0Vq+1YMfyeTyKvE/Mw0i2Lv2Jlq+1MHRaQogiToaRC5gRI0awcPFi\nHqen4+rmweGD+ylVqpROnF0xR8pXq03bvkP5++wJNiz4lqOHD1G9evVn4jq+3hmf2s2p27IDAH8d\nP8ih1Qs4dGD/M3Hx8fF4eftQo2kbAhs1Y++Gla/kMLJWq2XiR5OYM2cOqqoyaNAgvpr1JcbGxoZO\nTQhRBMimFoWMVqvNtshduXKFipUqseRIKCammZtTTB3cjbIeJfj999+fif108mR2HjzOiBlzMDIy\nZuGUD/Av6c4P33+n0+7ly5dp3uI1klNScHJ05OSJ47JdoxBC/Iu8sy1kntebtLa2RlUhLTXzEHhV\nVUlJSsTCwkIndsL48ThZmfBumzq8164OGXEP+WLa5zpxGRkZvDN8BN7lK9N56Bgs7R0Z/cHY3Hug\n/3D16lVK+pbB2tYOpxIurF2bv8uTNBoNYWFhPHz4MF/vK4QoHGSdbSHk4eGBc4kSTB3cnZY9BxBy\n6hgRt27w9e6dOrFGRkaYmprxOD0NjcYYU1PTLAv5yZMnuXXnHp+v2oWRsTEN23ZhVJtafDljOi4u\nLjnONT09nU2bNhETE0Pjxo3x8/PTidFqtdSp34DAoJa8++ZgQs6coG+//lSqVIlKlSrl+N4v6u7d\nu7zWqjVRjx6RlJjAoIGD+Obr2a/c+2ohxKtLeraFVNi1q6gpiaz6bhoXju4leOd2PDw8dOJmzJzJ\nvegEfvrjDD8FnyHD3JaPJn2sE5eamoq1nR1GT959mllYYm5umeUJQS8qNTWVoCZNmTrrG9bt2k/d\nevUJDg7WiQsPDychPo4B4z/H1bsUTTr1xNc/MNtJX7ntrUGDqVCvGd9sPc43W46yZWcwv/32W77c\nWwhROEixfUmqqjJ9xgycS7jgWLw4o8d8gEajMXRaOoaPGImDmxdvTZxO/dadGThoMImJiTpxp8+c\npW7r1zE1M8fYxIQG7bpy+swZnbhatWqRGB3F74t/IDzkL5bP+gRf39J4eXnpxCYnJ/Nm337Y2RfD\nzd2DRYsXZ5njihUrSFdMmTB/LQM/nsXbU79j+MhROnH29vZoNRoS42IA0Go0xDyMzLf3xefPnSOo\nY08URcHGrhjVGrfi7Nmz+XJvIUThIMX2Jf3yyy8sWLyUCQvW8dmybQTvP8SMmTMNndYzkpOTWbVq\nJe99vYg6LdrRZ+xnWDkUZ8+ePTqxZXx9uXj8IKqqoqoqfx07QJkyZXTibGxs2Ld3D9dP7OfHD98m\nPeouO7ZtzXLIeeS77xEWEcWsjQcYNXsREyd9zN69e3XiIiMj8Shb4elwrI9fJR4+eKATV7x4cerU\nqcsnfTvw++IfmTH8TR6nJDJqlG5hzgulSpXm/JF9AGQ8TufvU0ey/D0SQojsSLF9Sdt27KJVn6G4\n+ZSmuJsHHQa/x/Yduu9CDUlVVVCfnURlbGKKVqvVif3k40nE3rnOJ71bM6VfB66eOszM6bpbQKqq\nyofjJ5CYrqF2y46E3bjFl7O+yvL+wcHBdB0+HjsHJ0qW96dRp17symJ4OCgoiOM7N3En7AqP09PY\nOP8bgho3zrLNgwf282aProSd3IeflwvXr17JcsJXXlj483y2L/2R6UO6MaFbc3y93OnXr1++3FsI\nUTjIBKmX5OToQMTN60+vI25cx9HR0YAZ6bK2tqZ9x478NGEYTbv149r5U0TduUHTpk11Yu3t7Tl+\n9AgnTpxAo9FQu3ZtLC0tdeJOnz7N0eMnmL52N6Zm5rTqPZgxHRsw9oMxOsO5Do6O3L9xHTfvzPW/\nD26FUyuojk6bDRo04IvPp/LBoK4kJSbQtHkLlvy6IstnMjIyYvbs2Tn57dBbpUqVCLl8idOnT2Nn\nZ0f16tVlcpQQ4qXIOtuXdOvWLWrXqUv5GvUwNbfg9P6d7NuzhypVqhg6tWekpaXx2ZSpHD56FG8v\nL2Z8MQ1PT88ct/fHH3/wwaTJjJ+3Bsjs6b7frg7HjxzW2VRj9+7ddO/RkzotOxLz4D4xd29w4vgx\n7O3ts2xbVVW0Wq1sPCGEKPBkU4tcFBkZyZo1a9BoNLz++uuULFnS0CnluejoaCpUrETn4eOoWq8x\n+zet4tyerVy8cB4Tk2cHSDIyMnhn2DC2bd+BhYUlX305g86dOxsocyGEyD9SbIXezp07x1uDBhMe\ndp2qAQH8smQJPj4+OnFjPxzHzv2HeOP9T3gUeZ8l08axc/s2atasmUWrQghReEixFfnGp1RpRn61\nCI/SZQH4be5syjtbMW3atBy3qdFo+P777zly7Dg+3t5M+mgiDg4OuZWyEELkCtmuUWRJVVW+nDUL\nd08vXN3c+fjjT7KctfwyLK2siI+JenodHxOFlZWVXm0OeXsoi1etw6VKfc7fuEfDoMakpKTo1aYQ\nQuQX6dkWcUuWLuWzaTMYPv0njE3NmP/xKIb078OY0aNz3Obq1asZ+d77tOg5kJjIe5w/GMyZ06dw\ndXXNUXuJiYk4O5dgzu6zWFhZo6oqXwzuypdTP6VNmzY5zlMIIXKb9GxFljZv2Urb/sPx9PXDzbsU\nnYaMZsvWbXq12bNnT9as/BVnNZHqvu6c+vNkjgstZA4hK0ZGmJiaApl/mM3MLcjIyNArTyGEyC+y\nzraIc3Rw4OGdm0+vI2/fpFg2S3ReRtOmTbNc15sT9vb2NGvenLmTRtG0a19Cz57g0b1bBAUF5Ur7\nQgiR12QYuYi7fv06devXp2qD5piYmnJy91b27t5N1apVDZ3aM5KTk/lo0sccP3ECb28vZs2cibe3\nt6HTEkKIZ8hsZJGtO3fusGrVKjQaDd26dcPX19fQKQkhRIEkxVYIIYTIYzJBSgghhDAQKbZCCCFE\nHpNiK4QQQuQxKbZCCCFEHpNiK4QQQuQxKbZCCCFEHpNiK4QQQuQx2a5RFBh79+5lx44dVKxYkQED\nBhg6HSGEeGGyqYUoEN5//33mzl9AuarVuRl6mbJlfDl18oSh0xJCiGfIDlJFUHp6Onfu3MHZ2Rlb\nW1tDp5NjycnJ2BdzYOryLfiUq0hyQjyjX2/Et1/Nkh6uEOKVIjtIFTFnzpyhpJcH9WsE4uZSgp9/\nXmDolHIsPDwcYxMTfMpVBMDK1o6SfpW4ePGigTMTQogXI8W2EFJVlY7t2tC7jBnzW7nzVTN3Jnww\nhkuXLhk6tRzx8/PDSFE4tHU9ALeuhHDl/OlsD44/dOgQPXu9QY+evdi7d29+piqEEFmSYlsIxcTE\nEBsbS0MfOwDcbc2o5GrDhQsXDJxZzpiYmLDsl6UsnfERA+r7MalPOwYPGkizZs10Yg8ePEjH1ztj\nXboydmUD6dazF8HBwfmW68qVK/H08sbWzp4evXqRmJiYb/cWQry65J1tIaTRaCjuWIwJtZ0oX9yS\nxHQNH+yLZOP2YGrXrm3o9HIsIyODv//+m9KlS2NlZZVlTLcevbAvG0CzLr0BOLR1PTdP7mHbls16\n3fvMmTPs3LkTOzs7+vTpg729vU7MsWPHaN/xdd796mdKePqw4qtP8XK0ZuWKFXrdWwhRcMg72yLE\n2NiYFStXM+PEI6Ycj+G93ffp89bgAl1oIbOH6+/vn22hBdBoMjA2+WdFm4mpKRqNRq/7btu2jeav\nteR46C1WbdlFzdp1iIuL04kLDg6mQfuulKkciJ2DIz3fnZivvWohxKtL1tkWUuXLl8fdzY2zV65R\nzNaGhkGNDZ1Svnh78CDe7NsfMwsLjIyMWfP9NObP+UmvNsd+OJ63p3xLlbpBAMz5aCSLFi1i9OjR\nz8Q5OTkRefgkqqqiKAr3b4Th4OCo172FEIWDFNtCSFVVOrZpxRtuJgxr1YJT9+Po1fsNjp85i6+v\nr6HTy1MtW7Zk6eKFfP/jT2i1Wub99COdOnXSq82Y2BhcvUs9vS7h6UNMTIxOXP/+/Zm34Ge+GzOQ\n4h4+HNu5iWVLl+h1byFE4SDvbAuhmJgYfDzcufN20NPv9d19hZ4ffUHPnj0NmFnBNHDwEC6F36Hv\nh1N4eO8uP457m43rf6NBgwY6sUlJSfz666/ExcXRokULAgICDJCxEMJQsntnKz3bQsjW1hYVuBKd\nSDlHG1IzNIQ8jMfV1dXQqRVIP37/He8MH8HHvVtjY2PL999+k2WhBbC2tmbIkCH5nKEQ4lUnPdtC\naumSJYx7/11alHLm7IMEqjVswrJVq1EUnR+4hBBC5BLZrrEIOnfuHKdOncLT05OWLVtKoRVCiDwm\nxVYIIYTIY7LOVgghhDAQKbZCCCFEHpNiK4QQQuQxKbYiz8TFxZGWlmboNIQQwuCk2Ipc9+jRI4Lq\n1cHT1YVidrZM/vhjQ6ckhBAGJZtaiBeWnJzM17O/IuxKKAE1azF8+AiMjY114t4Z+Bbl0x7y+9tB\nRCWn027hXKpWq8brr79ugKyFEMLwpGcrUFWVW7ducePGDbJbrpWRkUHr5k05tWohVe+fY923M3mr\n75tZxp44cZxhVT0wUhRKWJvTrbQDx48d1SvHhw8f0qV9W7xcnKlbPZCzZ8/q1Z4QQuQnKbZFXFpa\nGq+3a0ONypWoE1iF5kENszzw/MSJEzy8GcbS1yowoLIXa9tUZPPvvxMREaET6+npybG7sQBoVZWT\nD1Pw8vbJcY6qqtKpbWtcI0LZ3rYi/Z01tG7ejAcPHuS4TSGEyE9SbIu4GV9MI/36RS73q0tI37o4\nx9xh0vhxOnFpaWnYmZti9GQXKksTY8xMTEhPT9eJ/XHBQiafukOv4FCabjhPeglvBg8enOMco6Oj\nuXjpEtPq++Jjb0mvCu4EuNhx5MiRHLcphBD5Sd7ZFnEXTp+iW2lHzIwzf+7qWbY4350+pRNXu3Zt\nHmmMmXkynObeDiwPfUC58uXx9PTUiQ0MDOTcpcscOXIEGxsbmjVrholJzv+oWVlZ8Vij5VFKOs5W\n5mi0KvcTUrC1tc1xm0IIkZ+kZ1vE+fqV5487cWhVFVVVCb4VS5ny5XXirK2t2Xv4CFeK+zHmQiyK\nfz0279iFkVHWf4RcXV3p0qULLVu21KvQAlhaWjL2g7G03fwXX564Tvcdl3AuXY7GjRvr1a4QQuQX\n2Ru5iIuPj+e1JkEkRNzF1NgIrbU9ew4extnZ2dCp6di0aRMnjh/D26ckAwcOxMzMzNApCSHEM+Qg\nApGtx48fc+rUKTQaDTVq1MDCwsLQKQkhRIEkxVYIIYTIY3LqjxBCCGEgUmyFEEKIPCbFVgghhMhj\nUmyFEEKIPCbFVgghhMhjUmyFEEKIPCbFVgghhMhjUmyFEEKIPCbFVrB82TJqVqlEdf8KzPnpp2zP\ntBVCCJEzcupPEbdhwwYmjX6X74PKYGak8O7UTzA3N2fgoEGGTi1f/PDDD1y4cIEWLVrQvXt3Q6cj\nhCikZLvGIq5n5040TrrOGxU9ANgR9oBFMVYEHzxs4MzyXs2AKty5doVabsXYf/sR7bt0ZcWvqwyd\nlhCiAMtuu0bp2RZxFpZWPHr0+On1o5THWFpZGTCj/LFq1SrCQv/m/IBG2JmbEBabTO1la/hq9je4\nuroaOj0hRCEjxbaIe//DcTQPakRiugZTI4W5FyPYuG2uodPKc1euXKGcow125pl/BUoXs8LazITr\n169LsRVC5DqZIFXEVa1alf1HjpJWuw1xgS3YuXcfDRo0MHRaea5t27acfxDP0bsxqKrKr5fu8Fir\nUr16dUOnJoQohOSdrSiypk2bxueTPyFDo2JpZsKSX1fRpUsXQ6clhCjA5Ig9If4fM2MjrC0sqObj\ngpm5OUZG8tdBCJE3pGcriqSQkBAa163NwW7VcLOx4GxkHJ22/MW9Bw+xtLQ0dHpCiAJKerZC/EtY\nWBhVXB1ws7EAINDFHiszEyIjIw2cmRCiMJJiK4qkChUqcO5+NKHRiQDsvhFFBgru7u5Zxm/dupXB\n/fsx5v33uH37dn6mKoQoBGQYWRRZy375hZHD3sHOwow0Lfz2+2YaNWqkE7do4UKmjB/LqCpu3EpM\n47cb8fx57ny2hVkIUXTJMLIQ/4+iKKiAFhUtKoqi8/cDgJmfT2Fxcz8GV/Vmav2ytPSwYdmyZfmb\nrBCiQJNiKwqd/fv30+P1jnTv2J7g4OAsY8LCwnh3xDCmN3Zlbkt3RlS1o0unDqSnp+vEpqenU8zc\n9Om1vakxqakpeZa/EKLwkWIrCpX9+/fTo1MH6idcIyglnL49urFz506duJCQEMo62+Jlbw5ANXcb\nFK2Ge/fu6cS+0acvIw5e48S9GH77+z4rQh/QubOsxxVCvDjZrlEUKvO+/5aPanjRv7IXAGZGRsz5\nZjatWrV6Jq506dJcj0ogOsUGR0sTrkenkpqhwcXFRafNqV9MZ3hMLP1/34SVlRVLV66iSpUq+fI8\nQojCQXq2olBRtVqMjf5592pspGR5Pm+FChUYO34io/fc59Oj0Uw5+pAlvyzPco3ttm3b2Lh2NT1K\n2hFgrTKUTV2EAAAgAElEQVRm5AhiYmLy9Dn+be3atTStV4cm9WqzZs2afLuvECL3SM9WFCqDho+k\nT/eumBkZYawoTDpxg/m/fJ5lbEC16ihGRly6E4WnmxvlypXLMm7iB6NZ0MyPJt5OAAzeHcLixYsZ\nM2aMXrmGhYVx4cIFfHx8CAwMzDJm06ZNjHlnCLPql0ZR4INhb2NiYiLbSgpRwEjPVhQqLVq04LMZ\nX/LN1Xi+Co1l3KdTaN++vU7c3bt3ebNHN1a0KM/DEc0ZW9GB9q1eIyMjQyc2Pj6ekvb/9HhLWpsS\nG6tfz3bN6tXUDqzK/Inv0b55EyaMHZtl3C8L5jG5ljdtfEvQunQJPqvlw/KFC/S6txAi/0mxFYXK\n2bNnmTRuLB2cTXnd1YzPP/2Y48eP68SdP3+eqq4O1Pd0RFEUelf0IC05ibt37+rEtmnXjolHw7kd\nn8LRuzH8EvqQ1q3b5DjHtLQ03h48kE3tK7OmZXkOd6/G8sU/c/bsWZ1YUzMzkh5rnl4nZWgwMTXV\niRNCvNr+cxhZURRzoAtQ8t/xqqpOybu0hMiZr2dOZ0yAB8MCfQAoYWXGrGlTWb9l2zNxbm5uhEbF\nEZ+WgZ25CeFxySSkpuPk5KTT5rc/zqFHl87UXrkXC3Nzvvr2O+rVq5fjHB89eoS5sRGVne0AcLQw\no7JLMW7evKkznDzqgw/p3K4NyY81KIrC7HN3Wb+l8J83LERh8yI929+BjkAGkPSvLyFeOcmJiRS3\n/KfnV9zSjJRk3T+ugYGBdO7xBo3Xn2XIvqu02nSeWbNnY2NjoxO7ePFi/ggOpoVXMbwtYOy7I4iO\njs5xji4uLphbWrE+9D4AFx8mcOruoyxnODdo0IDNO4O5VrIWV7yr8/uOXTRs2DDH9xZCGMZ/bteo\nKMpFVVX9X7ph2a5RGMCaNWuYMPIdfmhUBmMjhVEHrzFx+lf0HzBAJ1ZVVQ4ePEh4eDgBAQEEBARk\n2WZxWyu+bexHhzIuqKpK542ncazVhHXr1uU4zzNnzvB6uzakJCWRptGyYOEievTsmeP2hBCvhuy2\na3yR2chHFUWprKrqX3mQlxC5qkePHlz66y8Gz/kRFZU+A4fQr3//LGMVRSEoKIigoKDntpma9piA\nEnZPP1PLrRiHstj84mVUq1aNsNt3efDgAY6Ojpibm+vVnhDi1ZbtMLKiKH8pinIBaACcURQlVFGU\nC//6vhCvnL179/LVlzMIcrGimZstP377Ddu2bfvvDz6Hu5sL049fI12j5UZcMkv+ukPrNjmfIPU/\nxsbGuLm5SaEVogjIdhhZURSf531QVdWbz21YhpGFAVQq40snZyPG1fEF4PvT4Sy7mcSVm3dy3ObN\nmzdpWLM6d6MeYawodOjQgd82bsqtlIUQhchLDyP/r5gqirJcVdU+/6+x5UCfLD8ohAGlJidS1tHt\n6XU5B2tSQx7q1aaPjw+3HkSRnJyMhYUFRkayYk4I8XJe5P8alf59oSiKMVA9b9IRQj91gpryxbFr\nXI9N4kZcMlOOXqN6vfpZxoaFhdGh1Wv4l/Wl3xs9/3OGsZWVVa4W2vT0dC5cuEBiYmKutSmEeDU9\n753tBEVREoAqiqLEP/lKAB6QuRxIiFfO8l9/xaNSAPVXHKPO8iPYl/Zj3fqNOnHx8fE0bVifGsm3\nmVfLBZO/T9Khdcss91HOC6tWraKYjRV1qgfiaG/H+++9my/3FUIYxoss/ZmuquqEl25Y3tkKA4iI\niKBh7Vr4WICxAlcSMzh0/CSenp7PxAUHBzNl2Ftsb5+5qk2rqpRbcpSzl//Gw8Mjx/e/desWX82c\nQfSjKDp07kr37t11YtLT0ylmY8Wwmi408rEjLCaVCbtvseOPPTRu3DjH9xZCGF5272yf17OtpihK\nNWDd/3797688zVaIHJryyce0cjFjQ5uKrGtdke7eNkwa96FOnIWFBbGp6Wif/ECY9FhD6uMMLCws\nsmz38ePHXL58mTt3sp9odf/+fWpVr8b9g79RLOwQY4YN5ofvvtOJu3jxIgoqjXwylxOVdrCgnJMl\nu3btyskjCyEKgOets5395J8WQA3gPKAAVYBTQN28TU2Il3fv9k06l/hnF6iaJWxZcPuWTly9evUo\n7uNL310hBLnZsDYsml69emW5XePNmzdp3awJaQlxxCan0vON3vw4bz6K8uwPr7/++isBjgp9Kme2\nUcbJglkzv2Dku88OEZcpU4Z0jcqNmFRKOliQmK7hRmwq/v4vvXeMEKKAyLZnq6pqE1VVmwD3gWqq\nqtZQVbU6EAjo7tYuxCugXlATFoY8ICE9g6THGcy7HEG9oCY6cSYmJuzYvZd6/d7hSqnavDXhM+b+\nvDDLNof070tXNzPOvlGT833qcnT771meK5uWloaF8T8F2NLEiPT0xzpxdnZ2DH1nGON23+KTfbd5\nZ2sYFStXpXfv3no8uRDiVfYi72wvqar6/2ck63wvi8/JO1uR7zIyMhg2ZBC/LP8VgJ7du7Jw6TJM\n9Tgpx8O5OMEdKuFll3nM3szj11Abd+OL6dOfiQsNDaVurRq8Wd4GV1szVv6dQJueA/jyq9lZNcue\nPXsIDg6mcuXKvPnmmznOLyfi4uI4d+4cdnZ2BAQE6PTShRA5k9072xcptqvIPHhgxZNv9QZsVFXt\n9R+fk2IrDCYtLQ1VVbN9B/sygurWpr1lIkOqepGaoaHj1osMnTyDfv366cTOnz+f998dharNoFz5\nipw5ew5jY2O9c8hNly5dokXTxhS3MOJRYioNGjdj1brfZP2wELlAn2JrAbwDNHryrYPAXFVVU//j\nc1JsRaEQGhpKy6aNcTBWiUpKpWHTZqxYs06nOB0+fJjmjRtR08MGd1sztl2JoUwFf86dP2+YxLNR\nu3oANU0e8JqvPekaLZOPRDFu+rf06SP71AihrxwfRPCkqH7z5EuIIsfJyQkPT09Onz2HCvhXDciy\nF9i9e3equVsztn7m0qFqbtZ8tv+i3vdPTExk3759qKpKkyZNsLW11au962E3GBpUHAAzYyMqORhx\n9epVvfMUQmQv22KrKMpaVVW7K4ryF6DTRVVVVffwTSEKoUH9++KUeJPVXXyJScng0+9mUzWwGm3b\ntn0mLiUlBVdXs6fXxa1M0eg5uhMZGUn92jWxIxVFURitmnP4+ElcXV1z3GZl/0rsvXmdbhUcSEzX\ncPphBt2rVtUrTyHE8z3vJc3/1iu0A9pn8SVEkXD8xAk6lrHDSFFwsjKlgZspR48c0Ynr2bMnO6/F\ncvZ+EvcT0vnpZAQWZjmfmAXw8cTxVLFJY3I9Jz6t60hVmzQmTRinV5tLV6zkzwQr3gm+z9Adt2nf\n/U06d+6sV5tCiOd73kEE95/8sjlwUFVVGWcSRZKHuxshD2NwtjZFq6pci1dp6OWlEzd37lwuXLjA\nl0eOo1VVTE1MuRASqte9b4aHUcPxn4Jd3tGUP2+E69Wmj48Pf4WEEh4ejp2dnV69ZCHEi3mR6Yfe\nwHxFUcIURVmnKMpIRVEC8joxIV4V8xYuYWlIIrNOxTLh4EOsPMvx1ltv6cSlpKSgTUuhupcr3fxL\nYW5uzvXr1/W6d72GQfxxK5W0DC1pGVr+uJVKvYbPP+z+RZiamlKuXDkptELkk/+cjfw0UFEsgcHA\nB4CHqqrPXc8gs5FFYXLv3j0OHz6Mra0tLVq0wMREd1Bo3rx5bPpmGmtaV0RRFP4If8inl2K5eDXn\nBffx48f07d2LjZsyz/7o1KEDy1auwszM7D8+KYQwhBzPRlYUZRJQH7ABzpJZbA/leoZCvMLc3d2z\nPFTg3yIiIvAvZv50g4jKJex4cEC/nm1KSgo3w8PxLGaHoijcvBFOampqvhXbuLg4li5dSlxcHG3a\ntKFGjRr5cl8hCpsXGUbuDDgBu4ENwO//ep8rhHiiSZMmrL4Wxd+PEknN0DDt5A0aN9ZvyHfyxx9R\nOj2aU2/U4M9e1SmbEcOnH03MpYyfLzY2lprVAtj44+dcWPsjLZs1ZvPmzflybyEKm/8stqqqViNz\nktRJoAXwl6Ioh/M6MSEKmqCgID6d/iWtNp3Hc94+ol3LsmDJL3q1GXrxIm19HDBSFIwUhbY+Dvx9\n8a9cyvj5Fi9ejIdRIqNrFqdvFSfeq+7I2PdH5cu9hShs/rPYKoriT+YWjf2AHmQeQrA3j/MSokAa\nPGQIj+LiSU1LZ8vOYIoVK6ZXe5UCAtkYHo1Gq6LRqmwIi6ZSQGCWsdHR0bRt04aqlSowaNAgtFqt\nXveOiYmmxL92u3S1MSU+PkGvNoUoql5ku8atZG7ReBj4U1VV3WNMsv6cTJASQk+JiYl0aN2SqyGX\nUFDwLV+BLTuDsbGxeSYuOTkZH3cXSlqrBLhaE3w9DifvMpy5kPMdrI4dO0aH1q8xuoYjLjamLLoQ\nR4WgNizUs7cuRGGW472R9bihFFshcoFWq+Xq1auoqkq5cuWy3Cry66+/ZvbkCfzUthRGikJiuoZ+\nG69xPfwG3t7eOb73smXLGD1qBKlpaTRu3Jh1GzZhaWmpz+MIUahlV2zlmA8hXnFGRkb4+flRvnz5\nbE/miY+Px87cGKMnM6EtTYwwMcr8fk5FR0fz/ohhOBg/JsDZnN1/7GbJkiU5bk+Iokx6tkIUAuHh\n4VQsV5belZ2o4mrF9iuxnHqYwYOYuBwfnde9e3euHtrO5MaeKIrCsdsJzD0TRWzScw/8EqJIk56t\nEIVYqVKl2LB5C79fT+KjPXcISTLlxJlzep1Re+/eXfycLJ6uGy7jaEFa+gtN2RBC/D/Z/k1UFGWL\noiibs/vKzySFKOq0Wu1zZxerqsri+fMo5WDLgKo+GGsyWLdmtV73bNu2HbuuxxGZmE6GVmXNxSjc\nXF30alOIoirbYWRFUZ67Gl9V1QPPbViGkYXQm0ajYfSokSxYuBCAQW+9xbc//oSx8bO7pR4+fJiB\n3TpxpFs1zIyNiEhKI3DZMSKjonRmLr+Mzp06sXnL76gqlHB04OifpylVqpRezyREYfbS2zX+VzEV\nQuS9b77+ij+3byRkQANQoPfO35k9y4cPx094Ji4mJgafYtaYGWcOVrlYmWFtbkp8fLxexXbDpk1k\nZGSQmpqqVztCFHUvsqlFWUVRflMU5fKTk3/CFEUJy4/khCjq9u/axQh/VxwtzXC0MGNkZTf2/7FL\nJ65mzZqcj4zn96sRxKQ+5qtTN3F1d8+VU31MTEyk0AqhpxeZPbEEmAtkAE2AZcCKvExKCJHJxd2D\nC4+Snl5fiErCxc1DJ87V1ZWNW7fx4ck7lP35ABsfqmzd9YdeE6SEELnnRXaQOq2qanVFUf5SVbXy\nv7/3H5+Td7ZC6OnmzZs0rFOLao6WKAqcikrh8ImT+Pj46MTWqFqZu9evUt3VnoO3o+ne+00WLpZ1\nsULkpxzvIKUoylGgAfAbmXsi3wVmqKrq9x+fk2IrRC6Iiopi69atALRr147ixYvrxCxbtowxQwdz\ntn9D7MxNuBqdRL0VR7j/MApHR8f8TlmIIivH59kC7wJWwChgKtCUzEMJhBD5oHjx4vTv3/+5MaGh\noZR3ssHOPPOvdFlHa8xNjAkLC5NiK8Qr4EWO2PtTVdVEIB4YpapqZ1VVj+d9akKIF9WuXTvORMZx\n6n4sqqqy4tIdVBSqVKmiE6uqKvPmzqFdi6b07taVy5cvGyBjIYqW/+zZKopSg8xJUrZPruOAt1RV\nPZ3HuQkhXlDdunUZM34ibb6YhlZVsTA1ZdnqNZiZmenETp82jVVzv2N8NU9uPLhP4/r1OHHmrKyf\nFSIPvcg72wvAcFVVDz25bgDMUVVV90fmZz8n72yFyGcZGRlERETg7u6e7UxkHzcX1rYoSwWnzOU8\nYw9eoWTXwUyYMCHL+Beh1WoZP348hw8dpEzZcixYsAALC4v//qAQhYw+72w1/yu0AKqqHlYUJSNX\nsxNC5AoTExM8PT2fG/O/vY7/63svo2Hd2oRdPk/TUvac2nEBX+/t3LwXgYnJi/wvRojC70UW4R1Q\nFGW+oiiNFUUJUhRlDrBfUZRqiqJUy+sEhRC56+3hIxm4O5Tfr0bw3ekbbAyLpkePHjlu79atW5w8\ndZpZr5Wkh39xPm/mzePkeH7++edczFqIgu1Ffuys+uSfn/6/7wcCKpmzk4UQBcT4iRO5e+8ekzdv\nxNLGlrUbN+n1vvbBgweYGSvYmmX+7G5ipOBoacKjR49yK2UhCjw5z1aIQiQhIYHIyEi8vLwwNzfP\nMubnBQv4dPwHtCttyYNklT+j4cz5v3BxydmJPhkZGTgXs6WptxVtyjpwLiKJhWcecOHy35QtW1af\nxxGiwMnxebaKorgoirJIUZQdT64rKooyMC+SFELk3LJffsHd1YVGtavj7e7GsWPHsoybNmUyY2o6\n0q6cI28FOOFvD8uXL8/xfU1MTDhw9ASH7mcwdGsYv/wVw7KVq6XQCvEvL/LOdimwC3B/cn0FeC+v\nEhJCvLxr167x/qjhzGjsyryW7gz2t+L1Du3JyNCdy5iWloaN2T9H9FmbqKSkpOh1/717dmOESgu/\nEjjbWbFz2xZkZEuIf7xIsS2uqupaQAugqmoGoMnTrIQQL+XSpUuUc7bFyz5z6LiWhy0Z6alERETo\nxPZ6ozdzz8USGpXCwRvx7L+Tyuuvv55lu8uXLcPDxRlbayve6N6VpKQknZjExEQ+mjiRaY1KMDTA\ngRmNnNm55XdOn5al+EL8z4sU2yRFUZzInAyFoih1gLg8zUoI8VJKlSrF9UeJxKZm9mSvRafyWKvi\n7OysE/vl7K9p9+Zglt0y5YTWnU1btuHv768Td+jQIcaMGs7oAGvmtvLg3pkDDBsySCcuJiYGK3NT\nnK1NATA3McKjmCWRkZG5/JRCFFwvsqlFNeAHwB+4CDgDXVVVvfAfn5MJUkLko6mffcp3X8/Gx8mG\nsKhEFv+yPNse64v4+ONJXNk0n96VMw8+eJD0mElHoomIin4mTqPRUK50SZqXyKCVrz0XIpP57kwM\nl/6+gpubm067a9asYe7336AoCqPGfKhXjkK8anK8qYWqqmcURQkC/AAFCFVV9XEe5CiE0MPHn35G\ntx69uHXrFhUrVvzPzS3+i5NTce7/61Xunfg0HByK6cQZGxuz4489dO3UgYXrruDuUoL1mzZnWWjX\nr1/Pe+8MZoC/HaoKQ9/qi6npKtq1a6dXrkK86rLt2SqKUhO4rapqxJPrvkAX4CYwWVXV6Cw/+M/n\npWcrRAGWkJBAnRrVsM+Io4SFwqE7yfy6Zh2tWrXK9jMajQZjY+Ns/33r5k3xTwmloY8dAPvC47jh\nVJWNW3fkev5CGEJOlv7MB9KffLgRMANYRub72gV5kaQQ4tVha2vL9FmzeZBhzskHj+nY6XWaNWv2\n3M88r9ACmJqakK7RPr1O02gxMdU9LEGIwuZ5PdvzqqpWffLrn4CHqqpOfnJ9TlXVgOc2LD1bIQq0\ns2fP0rxxI96uak8Ja1OWXYqnUcdefPvDjzluc9++fXTt2J6u5WzQqirrryaxeftOGjRokIuZC2E4\nOenZGiuK8r93us2Avf/6d7K7uBCF3KZNG2niZUEdT1tKO1jwdtVirFu7Rq82mzRpwqZtO0gu15j0\n8k3ZtusPKbSiSHhe0VxF5iEEUUAK8L8j9sogS3+EKPSsrW1I+NdUyNjUDCwtsz82Ly4ujrCwMDw8\nPChRokS2cQ0bNqRhw4a5maoQr7xse7aqqk4DxpC5g1SDf40JGwEj8z41IQTAqVOn6Nq1K126dOHU\nqVP5dt/+/ftzOcGYBWej2BjyiG9OxfDplGlZxu7Zs4cyPt70ad8KP99SzJszJ9/yFKIgkIMIhHiF\n/fHHH7Rv04q6njYoKBy9k8CW7Ttp0aJFvtx/9+7djBw2lOSkJNp3ep0ffvxJ5+zb9PR0PF1LsKRp\nORp6ORIel0yL9ec4cuq07I8sipwcH0QghDCcEW8PomsFR96v6857dd3oXtGR4UPy5xyQq1ev0qNL\nZxrZJdK3jAnBG1bx+dQpOnERERGYoNLQyxGAUvZWBLg7EBoami95ClEQSLEV4hWWnJj4dL9jAE97\nc5ITE/Pl3qtXr6ahhzltyzlQ08OGUdUc+Hme7vCwi4sL6Vo4fi8GgNvxKZy/H5Ntr3bu3Ll4e3ni\n7eXFokWL8vQZhHhVyKxiIV5hdRoEsXLvdko7mKMoCisvRFGrcfabSuQmRVHQ8s9oWIZWRUFndAxz\nc3NWrF5Dr+7dcLW15F5cIlOmfYGfn59O7Oeff87UyZ/QtpwDqqoy7O3BxMXFMXr06Dx9FiEMTd7Z\nCvEKy8jIoEmjhpw8eQKAmjVrsf/QYUxM8v7n5Bs3blCzWgCtvc0pYW3C+qtJjPzwI0Z/MFYn9tdf\nf2XQgH6oWi1aFUZ/MJYZM2fqxDnaWNKtvB1ty2UOOW8KecTm60lExeueJvQyTpw4wYoVK3B2dubD\nDz/EwiL7WdNC5KXs3tlKsRVCZCs0NJQvpkwmLjaGTl170K9/f50JUqmpqTjY2TCipgsNfey4+iiF\nj/beZt/Bw9StW/eZWAcrM96pUYI6nrYAHLoZz6Jzj4hOSs1xjnPmzGH0qBFUdrEmIjGdxyZWhN2+\ni5WVVY7bFCKncnwQgRCi6PLz8+OXX1c9N+bs2bOYKDzd77iskyW+Dhbs2rVLp9hWqBzI4jNncbI0\nQQWWnntA1Zr19Mpx/AfvM7qeO3U8bdFoVSbsucWIESNYvHixXu0KkZuk2Aoh9FK2bFnSMrTcjkvD\ny96cxHQNt+LSqFixok7soaNHCahSmU/2Zc5U9qvgz779B/S6f1r6Y8oXtwTA2EihkrMlt27d0qtN\nIXKbFFshhF6KFy/OgIEDGbtkMeWdLbkenUrlgAC6d++uE2tsbMxfly7n6v2dHBxYf/kRAwJL8Cg5\ng73hcUzo/1qu3kMIfck7WyGE3q5fv07r11pwPfwGTo7F2LBpc77teXzx4kUa169DbELmJKt27dqx\nafOWfLm3EP+fTJASQry08PBw3nvvPWJiounXrz8DB+puqKHRaKhYrgwNiqXSyteO85FJzDufwKXQ\nK7i4uORbrvfu3cPR0VFmIguDkh2khBAvJTw8nMrl/Yg+tw+XR5cZ9c4Qxo0bpxN37949oqOi6OhX\nDHMTI2p52FLayZLTp0/rff+uHdpRq6o/I4YOIfE/NvNwd3eXQiteWVJshRBZGjVqFIGuFoyt70Gf\nqiWY0NCDud9/oxNXrFgxktPTeZSceURQWoaW+/GpODk5ZdluYmIi27ZtY/v27SQlZb2+NjY2liYN\n6lEp5hrTylvz4PAuenTulHsPJ0Q+kwlSQogsxcXF4mZj+vTaxdqUDI1GJ87W1pZPP53MR7OmU9PV\nipCYdIKat6RWrVo6sREREQTVrUMJ4wy0qspYIwv2Hz2Os7PzM3GHDh2itLUpY2qUBKCaiz2lFh4i\nJiYGBweH3H1QIfKB9GyFEFnq27cf267GciEyicjEdOaeisTX1zfL2A/HT2DVhi00HjSOL+csZvnK\n1TqbXwB8PH4crUqYsK29Pzs6VKaRvcLkSR/pxJmampL0OIP/zftIydCi0Wqz3TkrJSWFNWvW6D10\nLURekQlSQohsjRs3jrnff0OGRoOvry+Hjp2kWLFiOW7vtUYNGOKYwmulMnuyW65FsjrNiS3Bu5+J\nS0tLo37N6pQjkfquNqy4+oiqzdowb6HuwQXr16+nT8/umBhBaoaKh7sb4bfv5jhHIfQhE6SEEC9t\n5syZxKekk5yu4a+QK3oVWoDaDRqyKCSS1AwNyY81LAl9QK36ukuEzM3N+W3zVsLNi/NdSAwu/jX4\nfs7cLNsc8GYvOldwZHnnsizu5EtS9AN69uypV55C5DYptkIUMYmJibzZoxsOtjaUdHdl5cqV+Xbv\nSZ9OxrZiDUovPITvwoO4BNZj/ETdYeTk5GTatWyBvzaaiVWciQ85w+AB/bJsMzU9g9fKFENRFOzM\nTWjkY8fJkyfz+lGEeCkyQUqIImb4kMGkXjrJ6TdqcSMumd4j3qFkyZLUq6ffHsUvwtzcnLUbNxEX\nF5dZHO3ssow7cOAAdhnJfNXQH0VRaFmqOGUWbuCHufE6nzEzMeLMvSSalrbnsUbLmftJlK4emGW7\nsbGxLF++nISEBFq3bk1gYNZxQuQ2KbZCFDHBwbvY06kyxa3MKG5lRu9yzgQHB+dLsf0fe3v75/57\njUaDqZHR00lWJooRRoqCVqvVif1o8lQ++/gjtl6J5lHyY4xMzdm+fbtOXExMDLWqB+JulIyjOcye\n+QUrVq+jdevWufNQQjyHDCMLUcQUs7fnWkzy0+vrCY+zXROr1Wo5evQoO3fu5NGjR/mVIkFBQdxL\nV5hyPIw9N6MYuPtvXmvRPMt3xp07d8bG2prw2DQS0rW0aN0WMzMznbiFCxfibZzEmFpODKjqxMhA\nB8a+Nyo/HkcIKbZCFDWzvvuBwXtCmXj4Gm/svMx1rQX9+/fXicvIyKBT29YM6NKBmaOGULl8Oc6f\nP58vOdra2nLg2HGiStfghwgTKrbrzq9rf8sytkVQQ2qVMOG37n781LY0OzZv5Ntvv9WJi34UhYvl\nP9dutmbExcfl1SMI8QxZ+iNEEXTu3DmCg4Oxt7end+/e2NjY6MQsWrSIX774mI1t/TE1NmLF5bss\njzLh2OmzBsg4e+YmRvzc3pdilplvxX4594A490B27352OdHBgwfp2qEtY2s5UdzKhJ8vxOLfpD0L\nFi0xRNqikJLD44UQTwUEBBAQEPDcmLCw6zQoYY2pceYAWGMvR6ae/is/0nsp5qYmXItOpYaHDVpV\nJTQqleo1PXTiGjVqxDc/zuWj8WNJTIqhQ4cOfPfjHANkLIoi6dkKIbK0ceNGPho+hG0dKuNoYcrn\nJ8K5bOPFtj/2GDq1Z8yePZuPxn9IDXcb7iWkk6A15fqtO9nOdBYiL8kRe0KIl6KqKh+N/5Dvv/8B\na01Bxz0AABFjSURBVHMzPLy82LrrD9zd3Q2dmo79+/ezbNkynJ2d+fTTT7GysjJ0SqKIkmIrhMiR\nuLg4EhIS/q+9O4+OqszTOP59K3vIHgxLAihgBEF2EAUDskjbNtqCAspAo+LuuKD0mdNOn3HUQVGP\n2gd1Wm3HfdpeFDAuLAqYABIh7BoEmzUkSAgQUqlAUpV3/ki1jUOwxeTmJlXP5x+oW7duPXAO+fH+\n3ve+l44dO+LxaE2lyA/Rdo0i8pMkJyeTlZUVVoXW5/Nx/333MnLYUGbe8CvKysrcjiStnEa2IhI2\n6urqKCwspLKykoEDBza4uYa1lp+NHc3x3Vu4NCuWjYdq+PpEAus3byUuLq6Bq4r8g0a2IhLW/H4/\nV4+/ggmXj+XeG6bQM7s7RUVFp5xXWlrKFwVruGdQOgM6JnDDBamY6goKCgpcSC2hQrf+iEhYePXV\nV9mz+QueHd2eqAjDx99UcPMN01m5Zu33zqsfmRB8lm79AKXO0uDzeUV+LBVbEQkL3+zYTu80D1ER\n9UVzQPs4Fq7Zdcp57du3Z+TIkTy1di0jMqPZfMhPbGoGQ4cObe7IEkLURhaRsNCv/wC+OBjAWxPA\nWsunu7307dv3lPOMMbz21v+yu8rwwtqDrCmp5q13/kxMTIwLqSVUqNiKSFiYMmUK4yZM5taP9nLr\n4hK21iTzh9feaPDcbl2ySApUMr1vW85NtAwd2A+v19ssOX0+HzfNmE5mu7PodV53Fi1a1CzfK87S\namQRCSsHDx7E6/XSpUsXIiIiTnk/Ly+PsaNG8uaEc4mN9FBnLbfl7uT6W+7i6aefdjzf9OuvY1fB\nJ0zrlUTJsRrmbTjC8vzV9OnTx/HvlsbTamQRESAjI4OuXbs2WGih/gHzEcYQ5an/eekxhrgoDz6f\nr8Hzm9r7ubnc3CeF9gnRDOiYwCVZbTS6DQEqtiIiJ7niiivweAzPf3GArw9V88ctZZR6a5g9e/Yp\n5wYCAWbd868kJ7QhNTGB3/7mNzS2o5eY0IYyX+13rw8dt9rnOQRoNbKIyEkiIiJYvXY9o3OGU/BZ\nMVFRUfz53QV069btlHOfnPs4a97/K2uvH4K/ro6pb75Cx6wsbr/jjp/8/XPmPsn9d9/J6E6xHDgO\n5Z4kpk6d2pg/krQAmrMVkbDi9/upqalpkocVjBl+Mbe3PcHYc84C4N2vS/nAk8m7H3zUqOvm5eWx\neNEi0tu25aabbmpwpytpmTRnKyJh7/E5c0hsE09aSjJjR+Zw+PDhRl2vbUY7io5Uffd625Fq0jMy\nGhuTnJwc/mvOHGbNmqVCGyI0shWRsJCbm8usmTPIvfICMuKjmZ3/DVXn9OWdd+f/5Gtu27aNkcMu\nZnRWMn4Lqw9WseqLdXTu3LkJk0trcrqRreZsRSQsrFqZz5RuaXRMiAXg7n5ZjP9oVaOu2aNHDwo3\nb2HhwoV4PB7mTZhARhOMbCX0qNiKSFjIzOrER+XV1FmLxxgKSyvo0KFD46+bmckdjVgQJeFBbWQR\nCQvHjx9nzIhLqP22mI6JsawqPsIHi5cwZMgQt6NJCDldG1nFVkTCRk1NDYsWLcLr9ZKTk0NWVpbb\nkSTEqNiKSMhat24dS5cuJSUlhWnTppGQkOB2JAlTKrYiEpLmz5/PzBnTGJEVz4FqS2VMGp+vLVTB\nFVeo2IpISOrWOYuZ2RH0yqjfpGJuwSGm3v+fWrQkrtCmFiISko4eO0aHxOjvXneIMxw9etTFRCKn\nUrEVkVZt3GWX8frWoxyt9lNU5mPFvmrGjBnjdiyR71EbWURaNa/Xyy03zuDjRYtJTkrkyWd+x7XX\nXut2LAlTmrMVERFxmOZsRUREXKLtGkVEWpiNGzeybNkyUlNTmTJlCnFxcW5HkkZSG1lEpBlZazHm\nlC7jdxYuXMiN0/+FYVnxHPDVYVM6krd6jQpuK6E2soiIi5YsWUJWh3ZERUYyZEA/du/e3eB59951\nOw8MTmNm3zQeHJqO52gJb7/9dvOGlSanYisi4rA9e/Yw5dqJ3NYzhj9dcy497QHGXz6Ohrp/h49W\n0Cm5/r5hYwyZbTyUl5c3d2RpYiq2IiIOKygooFe7BPq0b0NUhGFCjxR279nDkSNHTjl39KhRvLm1\nAm9NgO3l1eQX+xg1apQLqaUpqdiKiDgsIyOD4orj1AbqADjgraXOQmJi4inn/s/rbxJ37iBu/nAv\nz2zyMe/3LzN48ODmjixNTAukREQcZq1lyjUT2Ph5Ht1TolhbUsXDjz3Bbbff7nY0aWLa1EJExEV1\ndXXk5uZSXFzM4MGD9dD6EKViKyIi4jDd+iMiIuISFVsRkRamsrKS1atXU1RU5HYUaSLarlFEpAXZ\nsmUL48aMIiXacKiymqsmTOSlV179wV2npOXTnK2ISAvS/4JeXNLmCGO6JlNdW8dvV5bx2HMvM3Hi\nRLejyY+gOVsRkVbgm527uDAzAYC4KA8XpEeyfft2l1M1TklJCQsWLCA/P7/BXbPCgdrIIiItyPk9\nziNv7wGuODcFb02ADWW1zOjd2+1YP9nKlSu56hc/J7ttPKWVJxg4dDh/mb8Qjye8xnpqI4uItCDb\nt2/nstGXQk01R6qOM3PmTJ565netds42u2sXrs0KcGFWIrUBy29XlvHQM//NpEmT3I7miNO1kTWy\nFRFpQbKzs/n6m53s2LGD1NRUMjMz3Y7UKPv2H+CCgV0AiIowZKdGsnfvXpdTNb/wGseLiLQCMTEx\n9O7du9UXWoCB/fvy4TfHsNZyyFfL2tLjDBo0yO1YzU5tZBERcczevXu5YtxYiouLOVHr5+GHH+GB\nX//a7ViO0XaNIiLiCmstBw8eJCkpibi4OLfjOErFVkRExGG6z1ZERMQlKrYiIiIOU7EVERFxmO6z\nFRFppU6cOMGiRYuoqqpixIgRIXGrUKjSAikRkVbI5/Nx6SXD8B3cR1pcFF+W+Vj8yTIGDhzodrSw\nph2kRERCyIsvvkjkkX08OrwtxhiW7zLcectM1hRucDuaNEBztiIirVDxvr10S/J8t2fyeW3jKCkt\ndTmVnI6KrYhIKzT8khw+219Dua+WQJ1lwY5Khg8f7nYsOQ21kUVEWqGrr76aL7du4fZHH8Vay4jh\nw3jhpT+4HUtOQwukRERaMb/fT01NDfHx8W5HEbSDlIhISIqMjAy7QlteXs606ybTt2c21/zySvbv\n3+92pH9KI1sREWk1AoEAQwcNoP2JA4zoFMe60mrWe+PY9GVRi3jIgUa2IiLS6u3cuZN9e3Yxs28q\n2elxXN87Dc8JLxs2tOxbnlRsRUSk1YiJiaHGH6C2rr5zGqiz+Gr9REdHu5zsh6mNLCIirYa1lskT\nJ/C3wnyGtotiwyE/sZ16sHTZCiIiItyOp+fZiohIaPD7/Tz33Dw2rFtLj/N7cd+s+4mNjXU7FqBi\nKyIi4jjtjSwiEqastSxYsICNGzfSvXt3pk6disejJTvNSSNbEZEQ98Cs+3jv7dcYnBHJl0fqOH9I\nDn96973v9lWWpqM2sohIGCorK6Nrl078/vLOJMZEUBOo455PD5C7ZDkDBgxwO17I0X22IiJhqKKi\ngoS4GBKi63/cR0d4aJsQS0VFhcvJwouKrYhICDv77LNJTk3nr9uOcrjaz9KdFXxb5ad///5uRwsr\nKrYiIiEsMjKSxZ8upzihG/cv/5bPj6ezdNkKUlJS3I4WVjRnKyIi0kQ0ZysiIk2qpKSEwsJCjh07\n5naUFk/FVkREztjcx+bQM7s71105jm5nd2bVqlVuR2rR1EYWEZEzsn79ei4fPZLHR2SQHh/Fuv1e\nXi46Tsm3B8P+3l21kUVEpEkUFRVxfrs2pMdHATAoM4GKY8fUTv4BKrYiInJGzjvvPIq+reJItR+A\n9SVekhITSEpKcjlZy6W9kUVE5IwMGjSIu2c9wD1zH6d9ShsOVdUy//3csG8h/xDN2YqIyE+yb98+\nSktLyc7O1n27QdobWURExGFaICUiIuISzdmKiIijNm3axIoVK0hLS2PSpEnExMS4HanZqY0sIiKO\nee+997h5xnQu7pTAfm+AmIzOrFi5OmQLruZsRUSk2XXq0I67esfS86x4rLU88nk5dz30JDNmzHA7\nmiM0ZysiIs3u8NEKOifXj2KNMWS18VBeXu5yquanYisiIo65dEQOb315FF9tgO3l1aws9jFy5Ei3\nYzU7tZFFRMQxhw8fZtp1k/l0xWekJifx7LznmTx5stuxHKM5WxEREYdpzlZERMQlKrYiIiIOU7EV\nERFxmIqtiIg4qrKykoKCAnbs2OF2FNeo2IqIiGM2b95MdrdzmDHxF1w0qD933nYr4bh4VquRRUTE\nMX3O78GlyccYfU4yvtoAD+aX8ezLbzB+/Hi3ozlCq5FFRKTZ7fjbLi7KSgAgPiqCC9pGs23bNpdT\nNT8VWxERcUyP7O7k7/UC4K0JsKmshl69ermcqvmpjSwiIo756quvGDdmFFF1tRz2VnPTzTfz1NPP\nYswpndaQoB2kRETEFdXV1Wzfvp20tDQ6derkdhxHqdiKiIg4TAukREREXKJiKyIi4jAVWxERcVwg\nEAjLzSz+TsVWREQcU1ZWxqic4cTGRJOekswbr7/udiRXaIGUiIg45mdjRhFTupXpvdPYX3mCR1Yf\n4oPFn3DhhRe6Hc0RWiAlIiLNLm/lKqb0TCEqwnB2SizDMuPJz893O1azU7EVERHHnJWexq6jJwCo\ns5Y9lQHatWvncqrmpzayiIg45sMPP2TadZMZkplAiddPWpdslixbQXR0tNvRHKFNLURExBVFRUXk\n5eWRnp7OVVddRVRUlNuRHKNiKyIi4jAtkBIREXGJiq2IiIjDIp28eKg+QklERORMODZnKyIiIvXU\nRhYREXGYiq2IiIjDVGxFREQcpmIr4hBjzIPGmK3GmE3GmPXGmMFNfP0RxpjcH3u8Cb7vKmNMj5Ne\nLzfGDGjq7xEJRY6uRhYJV8aYocDPgX7WWr8xJg1wYn+6061wdGLl4y+BD4BtDlxbJKRpZCvijA7A\nIWutH8Bae9haewDAGDPAGLPCGLPWGPOxMaZd8PhyY8yzxpgNxpjNxphBweODjTGrjTGFxpiVxphz\nf2wIY0y8MeYVY8ya4OfHB4//yhjzbvD7vzbGzD3pMzcFj60xxrxkjJlnjLkIuBJ4IjhK7xo8fZIx\npsAYs80YM6wp/uJEQpGKrYgzlgCdg0XoeWNMDoAxJhKYB0y01g4GXgXmnPS5OGttf+DO4HsARcBw\na+1A4D+Ax84gx4PAp9baocAo4CljTFzwvb7AtUAfYLIxJtMY0wH4d2AIMAzoAVhr7efA+8Bsa+0A\na+3O4DUirLUXAvcBD51BLpGwojayiAOstVXB+cxLqC9y7xhj/g0oBHoDS039ri8eoOSkj/4x+Pl8\nY0yiMSYJSALeCI5oLWf27/YyYLwxZnbwdTTQOfj7T621XgBjzJdAF+AsYIW1tiJ4/C/AD42k3wv+\nWhj8vIg0QMVWxCHBJ3HkAXnGmC3AdGA9sNVae7qW6/+fa7XAI8Aya+0EY0wXYPkZxDDUj6J3fO9g\n/ZzyiZMO1fGPnwdnsvXb368RQD9PRE5LbWQRBxhjso0x3U861A/YA3wNnBUsdhhjIo0x55903uTg\n8eFAhbW2EkgG9gffv+EMoywG7j4pV79/cv5aIMcYkxxseU886b1K6kfZp6P9WUVOQ8VWxBkJwOvB\nW382Aj2Bh6y1tcA1wNzg8Q3ARSd97rgxZj3wAnBj8NgTwOPGmELO/N/sI0BUcMHVVuDh05xnAay1\nJdTPIX8B5AO7gIrgOe8As4MLrbrS8ChcRBqgvZFFWghjzHLgfmvtepdztAnOOUcA84FXrLUL3cwk\n0tppZCvScrSU//k+ZIzZAGwBdqrQijSeRrYiIiIO08hWRETEYSq2IiIiDlOxFRERcZiKrYiIiMNU\nbEVERBymYisiIuKw/wO7E19JBWZHrgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_sdml, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Least Squares Metric Learning\n", + "\n", + "LSML is a simple, yet effective, algorithm that learns a Mahalanobis metric from a given set of relative comparisons. This is done by formulating and minimizing a convex loss function that corresponds to the sum of squared hinge loss of violated constraints. \n", + "\n", + "Link to paper: [LSML](http://web.cs.ucla.edu/~weiwang/paper/ICDM12.pdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "lsml = metric_learn.LSML_Supervised(num_constraints=200)\n", + "X_lsml = lsml.fit_transform(X, Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd4jXcbwPHvk5O9JJGIRIQYiVnE3mIXtXftvWmrrdVB\naUurqLYULVW1N7U3sRWJGUSQkEhEZM/zvH9E9U1PgkZOTsT9uS7X65z3Pr/nfnpF7vP8pqKqKkII\nIYTQHyNDJyCEEELkd1JshRBCCD2TYiuEEELomRRbIYQQQs+k2AohhBB6JsVWCCGE0DNjfTWsKIqs\nKRJCCPHGUVVV+fd7eiu2Ty+oz+aFEEKIPEVRdOosIN3IQgghhN5JsRVCCCH0TIqtEEIIoWdSbIUQ\nQgg9k2IrhBBC6JkUWyGEEELPpNgKIYQQeibFVgghhNAzKbZCCCGEnkmxFUIIIfRMiq0QQgihZ1Js\nhRBCCD2TYiuEEELomRRbIYQQQs+k2AohhBB6JsVWCCGE0DMptkIIIYSeSbEVQggh9EyKrRBCCKFn\nUmyFEEIIPZNiK8RLOnHiBD179aJLtx7s2bPH0OkIIV4jUmyFeAknT56kdZt3MHMrg32Zqrzbuy/b\nt283dFpCiNeEoqqqfhpWFFVfbQuR23r37YuRc0la9hgAwMk927i8fzP79uw2cGZCiLxEURRUVVX+\n/b482QrxEtLS0jA2MXn2WmNiQlpamgEzEkK8TowNnYAQr4PBAwfSuVt3LKysMTE1Z/XcL5jz7SxD\npyWEeE1IN7IQL2n37t18N3ceaWlpDB44gG7duhk6JSFEHpNVN7IUWyGEECKHyJitEEIIYSBSbIUQ\nQgg9k2IrhBBC6JkUWyGEEELPpNgKIYQQeibFVgghhNAzKbZCCCGEnkmxFUIIIfRMiq0QQgihZ1Js\nhRBCCD2TYiuEEELomRRbIYQQQs+k2AohhBB6JsVWCCGE0DMptkIIIYSeSbEVQggh9EyKrRBCCKFn\nUmyFEEIIPZNiK/KlpKQkBg8Zip29Ay6uRViwcKGhUxJCvMGMDZ2AEPrw8YSJnL92iy/X7CXmcSTT\nxg/CvWhRWrdubejUhBBvIHmyFfnSzl276DTiQ+ydnHH3LEuTrn3ZsWuXodMSQryhpNiKfMne3p7Q\nO4HPXofdCcTRwcGAGQkh3mSKqqr6aVhRVH21LcSLHD16lHbtO1CzRVtiHz8i+Polzpw+haOjo6FT\nE0LkY4qioKqqovO+FFuRX127do3t27djaWlJjx49sLe3N3RKQoh8ToqtEEIIoWdZFVsZsxVCCCH0\nTIqtEEIIoWdSbIUQQgg9k2IrhBBC6JkUWyGEEELPpNgKIYQQeibFVuhdXFwcly9fJjIy0tCpCCGE\nQUixFXp17NgxipcoQau27SnuUYJFixcbOiUhhMh1sqmF0JvU1FRc3dwYMOUbKtX1IfTubaYP6sQJ\n32N4enoaOj0hhMhxsqmFyHUPHz4kLU1Lpbo+ABR296BUhcpcuXLFwJkJIUTukmIr9MbR0RFtWioB\nF88B8Dg8jMAr/pQqVcrAmQkhRO6Sw+OF3piamvLHihX06t2HIsVLEhx0i48/+pAKFSoYOjUhhMhV\nMmYr9O7hw4dcvXqVokWLUqJECUOnI4QQeiOn/gghhBB6JhOkhMglvr6+vN26DQ18GrNg4ULkS6cQ\nQoqtEDno/PnzvNO2He7VfajVoS+zvpvH3HnzDJ2WEMLApBtZiBz0wfjx3I2DjkPGAXDD7y9WfzOF\nK5f8DJyZECI3SDeyELlAo9GQmpz07HVKchIajfwzE+JNJ0t/hMhBAwcMoHbdulhY22Dr4MiWJfP4\n6ouphk5LCGFg0o0sRA67dOkS3343h7i4eHp060LHjh0NnZIQIpfI0h8hhBBCz2TMVgghhDAQGbMV\nQgiRZyUlJXH69GkURaFGjRqYmpoaOqVskWIrhBAiT3r06BGN6tUhJfoRWlXFqmBhDh71xc7OztCp\n/WfSjSyEECJHJCcn88G4MZQs5oZ3xXLs3LnzldqbPOFjiitRzGzgyDcNnSicEs6nUyblULa5S4qt\nEEKIHDH+/XEc3rSSceVNaeMQQ6/uXTh79my227tx/SpVnExRFAVFUajsZMqNa9dyMOPcI8VWCCFE\njtiwbh2D3ypAMTszqhWxpom7JVu3bsl2e1Wq1eBwSCKpWpWUNJUj95OoWqNmDmace6TYCiGEyBGW\nlhZEJqQ+ex2VDNbWNtlub9r0GZgXq8CgHfcYvOMedqUqM+XTz3Ii1Vwn62zFa+fOnTusX7+ewoUL\n06NHD4yM5DujEHnBqlWrGDt8CC2LWRCepHI5xphzF/xwcnLKdpuqqhIcHIyiKBQpUgRF0VnCmqfI\nphYiX1i/fj29+vTFpVgJHj8MpaCDPdevXsHYWCbWC5EXHDx4kO3btmJboADDh4+gUKFChk4pV0mx\nFfmCXUFHuo2eRKN2XUlOSuTTPm1p08yHH3/80dCpCSGE7CAl8of4uFiq1G8MgKmZOZXq+nDtNZ2d\nKIR4c0ixFa+VAgXsObR5NQAxUY85uWcbtWrVMnBWQgjxfNKNLF4rx44do2Wr1miMTUiIi6Vqtar4\nHj0qk6SEEHmCjNmKfCMxMRFfX19cXV0pW7asodMRQohnpNgKIYQQeiYTpIQQQggDkWIrhBBC6JkU\nWyFe0r59+2j9TltatmrNhg0bDJ2OEOI1IsVWiJdw+PBhuvXoiXuNxng1bMOI0WNZt26dodMSQrwm\nZIKUEC/h3d69MStalmZd+gBw5sBOzu9cy8F9ew2cmRAiL5EJUkK8AkVR0KZpn73WarUY5fEN0YUQ\neYfs3i6yJS0tjU6dOuHn50fZsmXZunUrGo3G0GnpzYhhw2jTth0aYw0mpmZsWPANvyz62dBpCSFe\nE9KNLLLFxdUN1diEGk1ace7QHpLiYwi7H5KvC+6xY8f4fv6PpKalMnjgAN5++21DpySEyGNkUwuR\nYzZt2kSPd3vx464zWNrYkpgQz6iW1Zk7+1sGDx5s6PSEEMJgZMxW5JgbN25gXcAOSxtbAMwtLLG1\nL0hQUJBhExNCiDxKiq34z/r06UPskyh2/LGEJ5ER7Fv/O48fhjJgwABDpyaEEHmSdCOLbFm2bBmj\nxo4jOTERUzMzvpn5NcOHDzd0WkIIYVAyZiuEEELomYzZCiGEEAYixVYIIfKItLQ0Q6cg9ESKrRBC\nGNiuXbso7FQQUxMTvN+qQGBgoKFTEjlMiq3IMyIjIzl69CgBAQFZxsTExNC1ew9sbAvgVtSdVatW\n5WKGQuS8oKAgenbrwthK1qzv6kllk0e0adkcmfOSv0ixFXnC8ePH8SxThmFj36d2vfp8MP7DTOMG\nDx1KWFwys7ccY+iMHxk99j1OnjyZy9kKkXNOnz5NeWdryheyRGOk0M6zAMEhIURGRho6NZGDpNiK\nbHnw4AEdO3fBs2w52nXoSHBw8Cu116Pnu/SbNJMpv2xi5voDrN2wkYMHD+rE7dmzh26jJ2FjZ0+p\nilWo27oT+/fvf6VrC2FIzs7O3ItKJPnpQRcPYlNI06rY2toaODORk6TYiv8sJSWFps1boC3gzMDP\n52HqXByfJk1JTEzMdnvB9+5SpX4TAKxsClDGu2am3cn29g48uJM+nqWqKmF3b+Hg4JD9mxHCwBo0\naECdRk2ZeCScBRei+PToQ+Z+/z0mJiaGTk3kIDn1R/xn165dIzo2jq4jP0ZRFNw9y3L+8B78/f2p\nXr36f27PxMSEkqU9Ob5zM/VadyQq4iGXTh3j8/dH6sTOmf0t/QcOolbztoSH3CUxKpw+ffrkxG0J\nYRCKovDHmrXs2LGD4OBgplerRtWqVQ2dlshhUmzFf2ZhYUFCXBwpyUmYmpmTmpJMXEw0FhYW2W5z\n3ZrVtGrdhu1L5xMZ8ZCJEyZQt25dnbi2bduyf+8e9u3bh51PLXr06IGVldWr3I4QBqcoCq1btzZ0\nGkKPZAcp8Z+pqkrX7j0IuBNMlYYt8Du2nyKOdmzdvAnlFQ5UT0xM5NatWzg5OVGoUKEczFgIIXKH\nbNcoclRqaioLFy7E/9JlypUtw4gRI2SMSQjxxpNiK4QQQuiZ7I0shBBCGIgUWyGEEELPpNgKIYQQ\neibFVgghhNAzKbZCCCGEnsmmFiIDrVbLhg0bCAwMpEqVKjRv3tzQKQkhxGtPlv6IZ1RV5d3efTh3\n8RKeVWpy/sgeBvbry9TPPzN0akII8VqQdbbihc6dO8c7HTrx1Zq9mJpb8CQygg/a1Sf43l3s7e0N\nnZ4QQuR5ss42j7h48SKDBg+hT79+ee5ouMjISAq5FMHUPH2P4wIOjljb2hIVFWXgzIQQ4vUmxTYX\n+fn54dO4CXGWjhi7lKZbj3fZtm2bodN6xtvbm5CgmxzfvYW4mCds/20htjY2FC1a1NCpCSHEa026\nkXPR0GHDiTa1o23/9KPjzhzYyZmtf3D08CHDJvZ/zp49S78BA7kdGEilypX54/fleHh4GDotIYR4\nLWTVjSyzkXNRckoKZgUsn702s7AkNTXVgBnpqlatGpf8Lho6DSGEyFek2Oaivr170alrNwoUdMLC\nypo/Zn/OlAkfGTotIYQQeibdyLlsx44dzPp2NikpKfTr24dBAwe+0hmwImvh4eEcOXIES0tLmjRp\ngqmpqaFTEkLkc7L0R7xRLl++TOMmTSlWpgIxUZHYmJty6MB+LC0tX/xhIYTIJim2wmAePXpEQEAA\nbm5uuTazuXHTZnjU8KFplz6oqsoPE4bTtnE9JkyYkCvXF0K8mWSdrchRWq2W0aNHU7duPYYNG5bl\nRK+9e/dS2suLAcNGUvGtSnw3Z06u5HfvXjCelasD6T/8pd6qxt17wblybSGE+DcptiIDX19f3It7\nYOfgiFfZcty+fTvTuLcqV2HTjt24VKzJroNHKVe+IlqtNkNMSkoK3Xv0YNTXC/l02Vamr9rNl1/N\n5MqVK3q/j9q1a7Fn1a+kpaYS/TiS4zs2UKd2Lb1fVwghMiPFVjwTGhpKsxYtqfF2Rz6c/xtFylSm\navWaOk+tx44d41ZgIF8s30anoe/x+dLNPAgNZceOHRniwsPDQTGibNX0IlfQ2YWS5d8iICBA7/cy\nf95ciH3EkEYVGNu6Jh3fac27776r9+sKIURmZOmPeOaPP/7AqUhROg15D4DiUyoyuFEFzp07R82a\nNZ/FhYaGYmVti7mlFQBmFhZY29kTFhaWoT0nJycUBfxPHaVizfqE37/HzUsXKFOmjN7vpUCBAuzf\nu4fo6GhMTU0xNzfX+zXzo6tXr+Lv74+HhwfVq1c3dDpCvLak2IpnrK2tSYiNRZuWhpFGQ3JSImkp\nKVhbW2eIa968OQlxMWxd9hN1W7bn9IGdREU85J133skQZ2Jiwro1a+jctSt2BQsRHhrClzNm5Eqx\n/ZutrW2uXet1kZSUxOxvv+Gy30UqVq7C+x+Mz3RZ1C+/LOGj98dR3tmGGxFx9Bs8jK9mzjJAxkK8\n/mQ2sngmMTERN/diuJepSJX6TTi0ZQ2a1CQCrl3ViT1y5Aidu3YjOjoaGxtbVq5YTrNmzTJtNzo6\nmlu3buHq6oqzs7O+byNfCgoK4uHDh5QpU+aVvkBotVpaNm1MbNBlqjkZc+phCk5e3mzbuTvDeu/Y\n2FhcnJ2Y5eNKEVtTYpLSGLf/AQePnaBChQo5cUtC5EsyG1m8kLm5OQHXruJkocF38x9UK++V5daN\nDRo04GHoAxLj4wgPe5BloYX0p8sqVapIoc2mTyZNoHqligzt0g6vEh6cPn06221duXKFyxfP82GN\ngjQtacdHNRw5c+oEN27cyBD38OFDrM1MKWKb/sRrY6ahuIMVwcEyo1uI7JBuZJGBg4MDu3fvNnQa\n4qmjR4+yYvEiTveoTkELU7beDKNn507cvHsvW+0lJydjaqzB6On3bmMjMDM2Jjk5OUOcm5sbGJvg\nezeauu62XI9I4GZErDzVCpFNUmyFyMOuXbtGPTd7ClqkP2G2KVmIfjv8SElJwcTE5D+3V6FCBawd\nnPjN/zE1Xcw5fj+RgoVddcbRTU1N2b5zN+3atGLB+SCMNBpWrFydXoSFEP/ZC4utoihmQCeg+P/H\nq6o6TX9pCSEAypYty4zgSMLjk3CyNGPLjTA83Ipkq9BCehHdf/go748ZxepL/lSoVJu93/+AsbHu\nr4KqVatyN+QBjx49wt7ePtMYIcTLeeEEKUVRdgFPgHNA2t/vq6o6+wWfkwlSIsfdvXuXug0acj/4\nHsYmpvTt3YtFixYZOi29mvrpJ3w/Zw6udtZEJqWyZccuqlWrZui0hBCZyPbeyIqiXFJV9T8P1Eix\nFQCqqrJy5UpOnj6NR7FijBgx4pXWvBYvUYqiZSsxYNKXhN4LYsbQbsyeNZOhQ4fmYNZ5T3BwMA8f\nPsTT01NnKZYQIu94ldnIxxVFqaiHnEQeFRwczM6dO4mMjHxuTJt32uJRshQtW7UmKCgo07j3PxjP\n51/O5LHGltXbdtGsRcss91F+Gffvh9Bz3GTMLa0o7lUenw49WbduXbbbe124ubnh7e0thfZfduzY\nQdOmTWnVqhV//fWXodMRIktZFltFUfwVRfED6gF/KYpyXVEUv/97X+RDI0eNokTJkvTs0w+XIkWY\nN2+eTkxycjJNm7fAokgpRn6zBPuSFWnStBkJCQkZ4mJiYli4cCEf/bSS1r2HMvbbJTx4GMHRo0ez\nnZ+pqSn3bl0H0p+a71y/JEuK3lC//PILHdu2wfLeX6RcP06dmtXx9fU1dFpCZOp5Mx7a5FoWItu2\nb9/O6LHjSEhIpEqVSmzZtCnbh6T7+vryy69Lmb7iT4qWKoP/ySN89P5A+vbti52d3bO469evE5+Y\nRIch76EoCkU8RnFm/3b8/f2pUaPGs7iEhARMTU2xtE7fhMHIyIgCDo7ExcVl+34/Gv8BX30wiFrN\n2/LgTiChQTc59OfmbLcnXl+fThjPkGrONC2R/rNpa6Zh1LAhnPe/bODMhNCV5ZOtqqp3VFW9A0z/\n++///17upZi/JCYmsmvXLrZt20ZUVNQrtXXq1Ck6d+lKnbY96DfpK64F3qVe/YbZbu/QoUMULeVF\n0VLpy0Aq1mqAmYWlTveclZUVsTHRJCcmApCSnETskyisrKwyxDk5OVG+QgV+/+ZTggMD2Lv2N+7f\nvkGdOnWyneOnn37K6pUrKahJoVGNKgQF3sLBwSHb7YmsxcbG0tSnEcVdClGjahXu3Llj6JQySE5K\nxtnqn1nZLjamxMXGGDAjIbL2MhOk/lJV1fv/XmsAf1VVy73gczJB6l+io6Np0MiHZFXB3NKK8HtB\nHD1ymOLFi2ervc6dOxOWqDLii/Su3siHD3ivbT2Sk5Ky1d7u3bvp0Kkz32w4gEMhF4KuX+azvu0I\nvneXQoUKPYtTVZXeffpy/sp1Kjdohv/xg5R2L8L6tWsybPkHEBkZyfCRIzl79hxFixZlwY8/ULZs\n2WzlJ3KPVqulRFFX7LSxNC9ZgHP34zgXlsTdB2F5Zr/pFk2bcPOv43xY15XEFC3TjwTTd9hoZn/3\nnaFTE2+w/zwbWVGUicAkwAKI//ttIBlYpKrqxBdcUIrtv0yeMoXj/gEM/mw2iqKw5df5JN2/xaYN\n67PVXvfu3bnzOJ4xX/8EwMOQu3zYuQnJiQkv+GTW3mnXnn379uFavBQhgQGMGDGc72brrvJKS0tj\n6dKl+Ptfoly5sgwaNAiNRpPt64q8xc/PjxrelfmjU2lMNEaoqsrIHbf5cNosxo4da+j0gPS5Az4N\n6nH+r3MoikKbtu1Zsy57/5aEyClZFdssx2xVVf0K+EpRlK9eVFjFy7l9OwjPKjWfPf2VrVqbLScO\nZLu9KVOmUK16Ddb+9A1uJT3Z8PMc6tSp/Uo5btuymf3793Pu3Dl8fHyyPFZNo9EwaNCgV7qWyLuS\nk5NRFCVDT4WxorzSTPKcZmpqiu/J7O8TLURuet6TrXem/8dTqqo+d569PNnq+uGHH1iw9Hfen7sM\nUzNzFk/9gArFXfnpxx+y3aavry+Dhw4jLi6eenVr8/vy5RgZyfkS4tVotVrcnB0pbp5Ki1J2nLsf\ny8G7sdwJCZUxciGeIzvdyAef/tUcqAZcJL0b+S3grKqqz32EkmKrS6vVMnTYcFb8/jsaY2Pq1a/P\n+rVrZO2kyJMiIiJo1aIpdwMDcXB0Yt3mrZQvX97QaeVLqqqyf/9+7t27R7Vq1ahYUbY2eF29yg5S\nG4HPVFX1f/q6AvC5qqqdX/A5KbZZiI2NJSUlBXt7e0OnIoQwMFVVGdC3N4d2/0kpB3PO34/lu+9/\noE/fvoZOTWTDq+wg5fV3oQVQVfUSINNJX4G1tbUUWiHyoLS0NM6cOcPRo0eJj4/PMm7Z0qW4OhXE\n2sKC7p06EBsbm+1r+vr6sn/ndmY1LMToKnZMrefEiOHDSElJyXabIu95mWM8/BRFWQKsePr6XUB2\nkBI8evSIoKAgihUrhqOjo6HTESJL169fZ/lvy1BVlV69+1CunO7KxcTERFq1aEbg1UtYmhqTpDHn\n0LHjFC1aNEPc4cOHmfzBe6x9uxzuthaMP/oXI4cM4reVq7OV2/379ylmb4GZcfqzT9ECZhiRvlSw\nYMGC2WpT5D0v82TbH7gMjH3658rT90Q+lJaWxooVK5g2bRrbtm3LMm7Dhg2ULF2a7r37UrJ0aVau\nWpWLWQrx8vz8/KhTozrXty7h5rZfqFe7JmfPntWJmzd3Lon3rjGnsTNfN3Ckln0Ko4frHnCxb99e\nenk68lYhW+zMTfisZnH27d2X7fyqVavG5dAYrkckoKoqfwZEUbiws0xEy2de+GSrqmoiMOfpH5GP\nqapKtx49uXIjEC/vWvy6fDzHT5zkqy9nZIh7/PgxAwcP5qMf/sCjbEXu3bzGiKFdaezjQ+HChQ2U\nvXgd+Pv7c+jQIRwcHOjcuTNmZmZ6v+bMGV/QvqQF7cqkFy8Hi8d8Oe0zNm79M0NcwLUrVCqoQWOU\nPtxWtbAFv94I0GnP0dGJg9HJqKqKoihcj4zFwSH7w0IlSpRg2YqV9OvTi9i4eEqX8GD7rj91NogR\nr7fnHUSw9un/+j89gCDDn9xLUeSWs2fPcvL0GSYsXE3XUR8zcdE65s+fr3P6T1BQEAULueBRNn3G\nZNFSZXAp6kFgYKAh0hYGFh8fT+OGDXAtWACvEsU4ceJEpnFbt26lUb3a7Fowg28njaNxg3okZXO3\ns/8iJiYae/N/niscLIyJiY7WiatctTonQlNIStWiVVUO3YunchXdFZADBw7kNpZ023WV8UdvMPRA\nALPn//hKObZt25ZHj58Q9SSay9dv4Onp+UrtibzneU+2f28TIwcSvCGioqIo6OyCiWn604aNnT2W\n1tZER0dn6NIqVqwYEWH3uRNwhWKe5QgJvMGDe7fx8PAwVOrCgKq+VQHjmDAGVXQgICKOJg3rc/n6\nDZ2fh9HDhzK+ekHKF7JEVVW+OBHEqlWr6Nevn17z69qjF5PeH0UhaxM0Cqy6FsukGb114kaMGMFJ\n36MM+fNPzIw1FC3uwdIfF+jEWVtbc/zMOdasWUN0dDRjmzXLkSVRiqJgaWn5yu2IvOl5O0g9ePrX\npsARVVVv5E5KwlCqVq1K6J1AjmxbR6U6jTi4aSUFHRx0Jog4ODiw+OefGTKsO85F3AkLucv38+bh\n4uJioMyFocTGxhIQeJuVnTyxMDGimqs11yISmTt3rs7xjI8eR1HMLv1nSVEU3KyNiIiI0HuOvXr3\n5kn0E+Z/NxtV1TJ2wicMGjRYJ06j0fDH6rWEhISQlJREsWLFstyC1MrKigEDBug7dZGPvMxsZHfg\nZ0VRigPngCPAUVVVL+gxL2EADg4O7N61kwGDBrN63nQqVa7M7p07Mv2F07VrV3x8fAgMDKR48eJy\npmw23blzh6ioKLy8vDA3Nzd0Otmm8s+aei1qpruYNWpQnz8uX6BfRXvuRSfjGxzP5IbZP6Xqvxg5\nchQjR456qdgiRYroORvxJnrhphbPAhXFAhgMjAeKqKr63F3nZVMLIbKmqipjR45g1YoVONlYkGhk\nwq79B1/LsbqKZb1ICb9H+7L2XI9IYE9gDFdv3MLd3T1DXGRkJO9268L+Q0ews7Vh7vwf6Nmzp4Gy\nFkI/XmUHqSlAXcAaOA8cI/3J9sELPifFVogsbNy4kc/GDGNH24oUMDPh54t32Rxjge+Zc4ZO7T9L\nTk6mY/t2+P11Bjv7gqxYs4633nory/i/Z/EKkR/951N//k9HIBX4EzgMnFBVVf9TCIXIx65cuUKz\nIrYUMEs//LxT6cJ8ufL1PMHG1NSU7Tt2vnS8FFrxJnrhphZPD45vCpwGmgH+iqIc03diQuRnXl5e\nHLgfQ2xy+pF1W289pIxnaQNnJYTQlxcW26cHD7wL9AW6ASFA9g9hFULQuXNnajZvhffK0zTceJE5\nVyL4dcVKQ6f1xlu2bBmuhZxwLmjP6NGjDZ2OyEdeZsx2O+kzkI8BZ1RVfandsWXMVogXCwgIICoq\ninLlyslRi9kQGxvLsmXLsLa2pnfv3lku1XkZy5YtY+igAbTzcsDGTMPqSxG06dCZNWvW5GDGIr/L\n9gSpV7igFNvXUGBgICNGjuZWYCDe3pX5cf58OWRA5Ennzp2jQe2amGogVaui0ZgQGHw/23sKuxZy\nop6TSq9KTgCcCo5h/ukwohPl9B3x8l7liD3xhoiJiaFhIx8KelVh0BffE6NY8nbrNmi1WkOnJvI4\nVVV5/PgxqampuXbNt5v40KCYDcval2JZ+9IUt9VQp2aNbLeXlpaKjdk/T8Y2ZhqQBwaRQ6TYGoCq\nqqSlpT03Zs2aNXiUKk0R92L07dsvVwre6dOnKeBUmDZ9h+Feuiy9xn/O7dtBBAcH6/3a4vV148YN\nypYuSVFXF+wL2LDi999z5bpJifE0LG6LoiiYaBQaFrflUdhzVyQ+V9eevVh9KYJTwTFcCY/nh1Oh\nFC1RKgczFm8yKba5SFVVJk2egpWVNZZWVvTu2y/Tjdh37NhBn379adCxN13HTGHn/gN07NRJ7/lZ\nWloS+ySCFT3zAAAgAElEQVQK7dMvAonxcSQlJWBhYaH3a4vXV/s2rWhgl8DKDh581ciFsaOGc/ny\nZb1f19TMnBN3Y9K/vGpVjt+Lwc6xULbbmz9/Pm06dGb+6VC+PBKCvVsJ/C7p/z7EmyHLMVtFUbYB\nWfahqKra9rkN55Mx26NHj7Jy1WoszM0ZMWI4pUpl/5vur7/+ypffzuH9ucswt7RiweTRNKrpzTez\nZmaIq1e/PvYlKtBz3GQAAq9c5OsRvYh58viV7uVF0tLSaNaiJbGpUKZ6Pc7s3Ubd6t4sWbxIr9cV\nr6+4uDgc7O1Y26nks/Wz8/6Kou+kmfTp00ev1z5y5Agtm/pgY6ohJU1LKkYE3L4rxzwa2M6dO9my\ncQMF7O0ZM3bcG7f9ZXbGbL8FZj/nT773559/0r5TZ6LN7Ln9JJladeoQEKB7vuXL2n/wII279MXe\nyRkLK2ta9xvBgYMHdeIURcmw12z6lxb9f3HRaDTs/HM73d5piWVsGO+PHMqinxfq/bri9WVpaYmF\nuTk3IxMBSErVEvg4ETc3N71fu0GDBgSHhjPiw8lMnj6TiCexUmj1JCYmht49u1PE2YlK5cpwMJPf\nWwDLli6lf89upJ7bxvU/f6O6d2UePMh+135+IrORn6N23XrU7tSP6j4tAVj30zcUtVSYN29uttr7\nYPyHXA4Op+/H0wHYvXop4VdO8+e2bRnidu/eTdv2Heg6fDwOzi78MXcGjerWYv369a92Q0LowaZN\nmxjYtzcVXWwIepyAT4vWLF2+QnaKykc6tXuHx1dO0qOsLUFRSSy4+IQTp8/i5eWVIa5ksaIM9dJQ\nxjF96OmncxE06v8BEyZMMETaBpHt7RoVRSkNfAWUA54dS6KqaokczTAPSkxMxKaA/bPX1gXsSYgJ\nzXZ7Ez7+iFq16zDnvQFYWFlx5exxDmfyDbFFixb88ftyJkyaTFJyMu1bt2TRIunKFXlThw4dqFjx\nAmfPnsXV1ZX69etLoc2GtLQ0YmJiKFCgQJ7676eqKtt37mJ5uxJYmBjhbG3KX+Gp7N+/X6fYJiUl\nYWNq8+y1tQkkJMTndsp50svsjbwU+AyYA/gA/XlDJlb1ercnC2Z/Tq8PpxEX/YQdyxeybs2qbLfn\n5OTEX+fOsm3bNpKTk2m5+EdcXV0zje3cuTOdO3fO9rWEyE2lSpV6pfkM+ZVWq2X//v2Eh4dTq1Yt\nSpTI/Bll/fr1DOzfj9TUFFwLF2brjl2ULVs2l7PNnKIoWFlYEB6fgnsBM1RV5VGiFhsbG53YHu/2\nYuHa3+hdzobwuBQO3E3gk476n9z5OniZHaTOqapaVVEUf1VVK/7/ey/43GvfjayqKt/Ons2KP1Zi\nZmbGpAkf0759e0OnJYR4DWi1Wjq3b4vfmeO42ZrhFxrLqrXradGiRYa4mzdvUsO7Mp/WcaKEgzl7\nbkWxK8yEG7fv5Jkn3IULFzB10sf4uJlxLw6izR05ceYclpaWGeJSU1OZ+tknbN6wHltbW6bP/BYf\nHx8DZW0Yr3LE3nGgHrCe9D2RQ4CvVVX1esHnXvtiK4TIH1JTU7lw4QKqqlK5cmVMTEz0fs2NGzcy\nefRgptdzwkSj4B8Wx4IrSQQ/CMsQt3btWn6YMpbx1eyevddrcxC37wVTsGBBvef5svbt28eB/fso\n5FyYQYMGyfaiWXiVI/bGApbAGOALoDHphxIIIYRBbdiwgblz5mBiasqMGTOoXbu2Tkx0dDTNGzci\n7F4QigL2zkXYd+gI9vb2ug3moJCQEEramWCiSf+9W8bRgtCHITrn+bq5uXE7MoGEFFssTIwIikpE\nVaBAgQJ6ze+/atq0KU2bNjV0Gq+tlzli74yqqrFANDBGVdWOqqqe1H9qIic9efKERo0a4enpydix\nYw2djnhDJScns3379ucuoUtLS2Pqp59Suawndat5s3v37kzjFixYwLvdu1Io8irm987j06Aehw4d\n0omb+uknWEcHM6dxIeb4FMI5OYzJEz7OqVvKUq1atTgVEkdIdDKqqrLpehTVvSvrdA3Xrl2bVu07\nMv5QGN+di2Kqbzg/L1qCsfHLPAuJ18XLdCNXI32S1N+j4U+AAaqqnnvB56QbORf4+/szafIUIh49\n4u0WLZg0aaLOP9LY2FhcirjhXNQDz8rVOfbnekqXKsm5M2cMlLV4E61bt46+PbujKJCcpqVkCQ+u\n3QjUiftk0kT2rlzGV7WL8SA2iXFHb/Hn3v1Ur149Q5yroz1dS1vQ2CP9CXDFxXACFGcu/GvXp1bN\nmlAp8Tq1i6b/Cjt7P5ajqW4cPHZCT3f6j8WLFzFuzBhUVYtn6VJs/XMX7u7uOnGqqnLs2DGCg4Op\nUqUKZcqU0XtuQj9epRv5V2CEqqpHnzZUj/Ti+1bOpvhmiI+PZ+/evaSkpODj4/NKYzJ37tyhUePG\ntBs4lkoepVn3yzwiHj3i+3+tAx4yZAi2BZ2YumwzRhoNzbv1ZXyHRiQkyFaMIvcM6NWTzuUd6FzO\nkciEVN7fdZshQ4boLGtbs/IPfqtfgvKO6cXxUkQs69et1Sm2KSnJOFr+MyO2kJUJfhGxOtet5O2N\n7wY/ahRJH2P0vZ9EpRbPnd+ZYwYPHsKAAQOJj4/PdPbu3xRFoX79+rmSkzCMlym2aX8XWgBVVY8p\nipJ7R3vkI1FRUdSt3wCNuRXmVtaMGTuOo0cOU7JkyWy1t3XrVrwbNKdZt34AuHqUYmLXpjrFNiws\njMLuHhg9PevTybUoKiqhoaF4eHi80j2JvGXTpk1cvHiRFi1aZDp+aUgJKam0Kp0+TupgYUxdd1uO\nHDmiE2duZkZkwj/H2j1KSsPlX7NeAWrUqc+SE4f4oI4riala/vCPYMjo93TiPvlsKm1PnWLY7vMo\nioJnmXJM/+rrHLyz59NoNM8ttOLN8DLF9rCiKD8Dq0jfM7AbcEhRFG8AVVX/0mN++crMWbNwKV2e\nAVNmoSgK239byPvjP2TLpo3Zas/Y2JjkpMRnr5MTE9AY6x6ePWTIEPr068f5YwcoXbEKm3+Zj7mF\nlRTa18STJ0+YN3cO94ODadysOV26dMl0SYhPg3qcOXWCYgXMmTnjC0aOGce3s/POzqqmxkZcDI2n\ndlEbUtK0+IXFUbGe7trciZ9PY/DIYYyq6MKD+BS2B8dwatBgnbgt27bTrLEPE/efxMhIoX3nbsyc\nNUsnztLSkr0HD3Pz5k1UVaVUqVIYGb0RWwWIPORlxmwz3wQznaqqauMsPidjtv/ybu/eWJeoRKP2\n3QG4fuEM2376inNnTmervfDwcCpX8aZ6s7a4eJRi1x+LGNinF1MmT9aJHTFiBEt/W05qSjIWltZs\n3byRRo0avcrtiFc0Y8YMvvnyC1JSUylZoiSHfE/oHHweFxdHDe/KuKhReNgYse9eEgNGvccnn36W\nIW7t2rUM7N2TBW08sDUzJjAykY/23iHicRS2tra5eVtZmjhxIt99M5MS9uY8jEtBa2RCWGQUpqam\nOrH79u1j8/p1WNnYMnL06EzHOYXIi7K9zvYVLijF9l9+WrCA7xcuYfz85ZiamfPzp+Pw9irB/O/n\nZbvN4OBgvvp6JuEREbRq2YK+ffvmmYXw+U1YWBhr164lLS2N9u3bU7x48Wy3tXbtWvq+24MP67ri\namPKknNhJFgX5nLAzQxxa9as4ZsJo/mklgOKohARn8LIXfeIi0/I8HQ2adIktv/6PdMbF332Xo/1\nARzyPakz1mlIR44c4eeff8bd3Z3p06ej0ej2xIjnS0pK4uHDhxQuXDhX1guL/+ZVNrVwBr4EXFVV\nfVtRlHJAbVVVf3nB56TY/otWq2XM2HEsXrwIBYUWb7/Nqj9W6OzCInLXqJEj2bJ6JYqRQr/ho5g2\nbZpOzN27d6lTvRoNnK0w0yjsuPOYfYePULFixWxds2XLlpgFnWagtzMAj+JTGLY9kKRUbYa4pUuX\n8vvXExlXNX2sMylVy7ubAolPSMgw6/zAgQO0btGMb5oXw72AGSeDY5hz4gGPY+IwNzdH5A/btm2j\nz7s9MTYCRWPM+k1baNCggaHTEv/nVYrtTtJnH09WVbWSoijGwPm/t258zuek2GYhMTGRtLQ0rKys\nDJ3KaykkJARfX1/s7Oxo0qTJKz0djR41irVLFzOvSXmS0rSM2X+ZCZ9NY+LEiRniRgwZjJX/IT6p\nlT6ZbdHFexy3LMbG7Tuydd0uXboQfHIXE+unH0V3PSKBzw8HE5eUce5hcHAwlStWoIeXJSXtzdh0\nIxbH8rXYsGWbTpsjhw9n8aKfMTM2IkWrsmDREvr375+t/ETeExoaSlnPUkyq5YiXowV/PYjlx4sx\nBN0Lkd8leUh2zrP9m6OqqmsBLYCqqqlAWg7n90YxNzeXfxzZdPz4capUKMeKaR/z4cDetG7elJSU\nlBd/MAtbVv/BnMblaFWyEB08C/NFPU9++/knnbjIiHBKF/hnmVQpOwsiIyKyfd1vv/0W//AkZh+/\nz2r/cL44HEzvfgN14tzc3Nh74CD+mmIsDFAp69OW5StXZ9rmjwsWcD/sIbsOHCbySYwU2nzm2rVr\nuDtY4fX0+DpvF2ssTYy4c+eOgTMTL+Nlim2coigFeXp6uaIotUjf2EKIXDdsQD++q1eCFc28ONSx\nMvFB11mxYkW221OMjEj8v67bxFQtipHuk3KLNm2Z5/eAW4/jCI5JYNb5EFq80zbb1y1WrBj+V69j\nWaY2t61KMG3mtyz8+edMY6tUqcIh3xNcuxXEwsW/PPeLmqOjI3Xr1pWhiXzI3d2de5FxPIpP/3J5\nPyaZx7GJuLi4GDgz8TJeZunP+8BWoKSiKL6AEyBnv+VTqamp/Pbbb9y6dQtvb286deqUpyZchTwI\npXbD9A0JNEYK1R0tCQ4OznZ7g8e8x7gvphKVlEJympYvTtzk+wW6Ra9f//48uH+flt/NJk2bRr/+\nA/howsRMWnx5Hh4e7Ny165XayK9SU1O5desW1tbWFClSxNDp5AklSpRgwqTJfPT1l5RytCbgYQzf\nzZun9z2eRc54qdnIT8dpvQAFuK6q6gv77WTMNvc8fPiQ+/fvU6FChSz3U71y5QrvvT+ekPsh1Ktb\nl9nffqPzhKTVamnfsRNB98PwqlqHcwd30KFNa2Z/+01u3MZLad2sCaWj7zK1dglCYhNps9WfRSvX\nvtIG6bNnz2bJj/MxMjJi/ORPpPvVwEJCQni7aWNiHkUQnZBIpy5dWLjkV1kb+9Tly5e5desWZcuW\npXTp0oZOR/zLf54gpShKdeCeqqqhT1/3AToBd4DPVVWNfMEFpdjmgtZt2rB79x5MzMww1mjYteNP\n6tatmyEmLCyMtypVpnW/kZSsWIVdKxZjb6awdfOmDHGnT5+mU7cefLlmL8YmpsQ+ecx779TlTtDt\nPHPUV1hYGB1at8Lv8mW0qsq0adMY/5H+N5V/U6mqSlxcHFZWVln2cCQlJTH10yn4HjqEi5sbX34z\nO8tD0l9G25bNKRMdxOQaHsSlpNFu2yVGfTGTvn3lsDGR92VngtTPQPLTDzcAvgaWkz5eu+g5nxO5\n5LvvvsP3xCnmbffllyNXaNV7KO06dNSJO3DgACUrVqFZt36UKFeJIVO/Y/eunSQkJGSIi46Oxt7J\nGWOT9E0GrGztsLSyIjZWd79ZQ3F2dub42XOEhIbxJCZWCm02XbhwgTVr1nDx4sUsY/z8/ChZzB1H\nB3ucHOzZs2dPpnGD+vbhr82r+MANPB9epWGd2jx69Cjbufn7+9PD0xlFUbA2NaaNuy0Xz8tGdeL1\n9rxiq/m/p9duwCJVVTeoqvoJoLvHWj4VGxvL/v378fX1JTU1b20JfeDAAWo2a4NDIRcURaF5t35E\nPdbtcDA3Nycu+gl/9zTEx8YA6HQ5V6tWjfDgOxzYuJKIByFsXDibQoUK4ebmpv+b+Y8KFCggC/r/\nJSYmhimTJtK7WxfmzZ1LWlrmiwZmfjmDVo0bsmr6RFr6NGDO7G91YlJSUmjdsjnt3LSs6VSSD6oW\noHuXTty/fz9DXFJSEms3bGRZs7I0ci/I+GrFecvBgr1792b7PkqXLs3OoPSZ3slpWg48iMWrbLls\ntydy1+PHj9m7dy+nTp1Cq9W++ANviOdNkNIoimL8dKlPE2DIS37utbBv3z62bttGAVtbRo4cSeHC\nhXVi7ty5Q8NGPlg7OBEfG4OrsxN7d+/KMyfleHp6sm7rDlKSkzAxNePK2eOYW+rOVG3ZsiWffj6V\nRZ+/T4nylTm6dQ3jxr2nU6zs7OzYu2c3g4cMY9uSuVSuUoXdO3fILj8Gtn//fkYPHczDiAjq16vH\nkt9+1+nWT0pKokmDepRMe0L9wjas/N4Xv/N/8ctvyzPE3bt3j1lffcWJHtUpbGVGcEwCdT/7jB7v\n9srwbyA4OJjUxAR8irsCUL6QJSULJuHn54erq+uzOCMjI3h6ZJ6lSfrPSVJa2iv9zPy05FeaN2rI\npjt+RMQlUqVmbQYO1F0WJfKeS5cu0dSnIS5WxjyKS6JKjdps2LJNzubl+WO2k4FWQATgDnirqqoq\nilIK+E1V1bqZfvCfz+fZMdsVK1bwwUcf06RrfyJDg7nke4BzZ8/g7OycIa5dh46Yu5ak/aCxaLVa\nfpgwnDaN6jA5k72HDSE1NZXSXmWIjounUBF3Aq/48f3cOQwdOlQnNjo6mtnffUdwyH0a1KtLnz59\n8tQsY5G5mzdvUruaNwsblaZSIVtmnbvLHVs3dh88nCFu3759fDywNwc6vIWiKMQmp+L16zHu3n+Q\nYbbqyZMnGdm9Iwc7/HNCZp31F/h9606qVKny7L2YmBhcnAvxXVNXClubEpecxrj9D9h72Je33sp4\nuuaYEcM5vWMzg8s6cTY8jgOP0jjr5/9KJ93ExsZy4cIFrK2tqVSpkvysZkNcXBx3796lSJEiubY/\ndu3q3lQ1CqV5yQKkpKl8cSKC0Z/NfKO+LP3n82xVVZ2hKMp+wAXY83+V0wgYrZ80c8fUL6YzfMYP\neFVO3zN2yRcf8ttvv/HRRx9liLt16xY9Oqf/kBgZGVGuRj1u3Lqp056hGBsbc+tGAIsXLyY4OJgu\nSxfp/CL8m62tLVM//zx3E3xNqKpKZGQkGo0GOzu7LOPWrFnDvFlfkZaaRv9hIxg6bJjei8Dhw4dp\nVtyJZh5OAHxVtyQuCw6QkpKSoWciJSUFK1PjZ/mYGxthrDHS2fDDy8uLe0/iOXj3ET7uBdkXFE54\nXBKlSmUcGbKxsWHWN98wefJE3ipsTUBEAj1798v052vuDz/yw3wvdh/Yj2vtYhz79LNXPlLO2tqa\nevXqvVIbb7K9e/fSvUsnrE01RMUns2jJL3Tr3l3v1w28HcSguumHaZhoFMrZKdy6mXd+ZxrSc5/t\nVVU9mcl7AfpLJ3fEx8dj6+D47LWtvSNxcXE6cd7e3hzevJriZSqSnJTIqd1bGNavV26m+kJGRkaZ\nPsmKlxMfH0/3Th05fOQwWq1Khw7t+XX5Cp1urz///JMPRgxlXv2SmBmb8P7nUzAxMWHgoEF6zc/O\nzo7b0QloVRUjReFOdALmpqY6+dWtW5eQRJVZZ25T39WOZdfCqFGjBk5OThni7O3tWbtpM907dyQx\n4TIWlhas37I10+I4YuQo6tarz8WLFylRokSWxc/IyIgxY8cxZuy4nLtxkW1xcXF079KJ8dXsKV/I\nkqDHiQwbMoh69evrfc1y5UqV2Bt0lR7l7IhN1nL6YSodq1bV6zVfF2/kqT/vvf8BB46fpvu4KYTf\nv8eyLyeyb89uvL29M8Q9fvyY1u+05XpAAMlJiXTo0JGlvyyRMcx8ZPx7Y7m1ZxOLm5QhVavSc9cV\nWgwayccTJmWIe7dLJ2pHXadPhfTJYrsCH7LokTn7jh3Xa37Jyck0bVgf00chvGVvzrqbEUyZ8TVD\nhw3Tib179y4fjh3DnduBVK1Zi6+/nZ3lE6ZWq+Xx48fY29vL+tV85urVq7RsWIf5Tf8Zg//0eCRz\nlq7Gx8dHr9cOCQmhZdPGhIXeJz4xhaHDhvHtd3PeqGGA/9yNnJ/Nmvk1Uz75lGVT38fW1pY1q1bq\nFFpIfwo4duQwISEhmJmZUahQIQNkK/TpzPHjfFDGGVONEaYa6FXakZ3HdQuouYUFUWH/zEZ/kpSK\nWS6cpmNqasreQ0f4/fffCQ0NZUX9+jRs2DDTWHd3d9Zs2vxS7RoZGeWZtdMiZxUpUoSo+GSCHidS\n3N6csNhk7kbG4eHhkSvXvnDpCsHBwVhbW8vP2P95I59shfhbnx7dcA46zye1SqCqKmMPB1CwYVtm\nz814xvDFixdp2rA+Q8s5Y6YxYr7/A1Zv3Ezjxo0NlLl4Waqq8vWXX/LT93NRVRg8fDiffj41Xz9t\nrV61iuFDB1PMwYq7kXFMm/Elo0aPMXRabwQ5PF6ITISEhNCoTm0Km2hJTtOSaGHLId8Tme436+/v\nz6IFP5GWmkrv/gOoXbu2ATIW/9XiRYuYN3UyS5t6oVEUBu4PYMD4SYweO9bQqelVSEgIAQEBeHh4\nULx4cUOn88aQYitEFmJiYjh69CgajYaGDRvKYevZkJqamn4EnLt7lstMUlJS+GzKJPbs+BOHggWZ\n8c13VK9eXe+5tX+7Be2NwujomT6GuTPwIUufWLPr0BG9X1u8eV7lPFsh8jUbGxtatWpFixYtpND+\ny9mzZ1myZAkHDhwgqy/P27dvp4CVBdWrVKKgvR39+2W+h/G4USM5tWElX5ezpb1pFK2aNeXWrVv6\nTB8AO4eC3I7+Z2vSwCcJ2DnIWKLIXfJkK8QbJjY2li+mfs4V/4tUrFyFKZ9+nun5t9/Pm8v0zz6h\nios11yISeKdzN37I5PhBa3NT+lR0oGVpe4Kjk/hwzx2Wr1pLp06dMsTZ21hzukd1nK3MAHjvcAAV\n+4xh3Dj9LhkKCAigQe1atHa3Q2MEmwMjOXjMl/Lly+v1uuLNJE+22bR//34GDBrMiJGjuHr1qqHT\nESJLP/zwA7bmpmiMFAoVsObIEd1u0rS0NN5u1oQzm5dTLvYKvuuX0qZlc509bKOjo5k4YQJfNnRm\nZBU7ZjYqxPrVq7hw4UKGuIcPH5KYnEKLUumbgbjZmlGpsBW7Mjmn19TEhCdJ/2yy8SRFi6mpaU7c\n+nN5enpy+vwFvHoMo2TXoZz667wUWpHrpNg+x5YtW+jesxfagu6Ea82p16CBFFyRJ124cIGP3hvH\nkhYVCBnRlJFvudK2ZXOdwzMuXbpE0M3rjK7qQF13W8ZVK8jVS34EBGTcq+bRo0fYWphRyCp9lypL\nEw1u9paEhoZmiHN0dMTISCHgUSIACSlabjxKyLSYfTxpMj13X2XJxbt8fPQG5x4n061bt5z8z5Al\nd3d3PvroIz7++ONcWQIjxL9JsX2Or2bOou/EL2nRvT8dh4yjUYdeLFi40NBpCQPRarWcOXMGX1/f\n555msnHjRiqV86J0cXemTJqQ5ek7OWndunV4O9vSskQhLE00jKvqQVpaGn5+fhni1Kc7URk97eRS\nFNAYGencj5ubG6aWVuwLTD8t6mJoHIGP4qhUqVKGOCMjIyZMmsInB+/xyYG7DN8eSGH3EowZo7vM\n5P3x45n+/QKuFK2KrU8HTp47L+swxRvjjdzU4mUlJydjaW397LWFjQ1JceEGzEjoQ3BwMFu3bkWj\n0dCxY0edLQ4hfZzzLa/ShIeHozFSsLC04tylKxlOwAE4cuQIQ/v3ZZS3PXbmGpYsX4yiKHwx4yu9\n3oO7uzu3n8STmJqGubGGkNhEklLTdI5HrFChAoWKuPPzhVBqFDblxP0kinqUxMvLK0OciYkJO3bv\npVO7d1hw9gaO9vas37QFFxcXnWtPmzaN5s2bs23bNjw9Penfv3+Wu1J17dqVrl275tyNC/GakAlS\nzzHv+++Z++MC3n3/c+JinrB81idsWr+OBg0aGDo1kUOuXLlCo3p1qVzIlBQtBESrnDxzjqJFi2aI\na9G0CUkBF1jXrirGRgpDd/tzy8Sec/6XM8SNHT2KqKNr6VQu/YntVmQiP99QuXYrSK/3odVqKVOi\nOEpMJPWK2LP15kPqNmnG5m3bdWKjoqKY+NGHXLnkR8VKlfly5jfPPRUmKSkJMzMzfaYvRL4h2zVm\nw5jRozFSFH5fOhczMzN+X7ZUCm0+M/njD3nHw4x2XumbWKzwf8T0qZ/z85JfMsQFXr/KhApFMDNO\nf2LrXaEIQ/df12nP2saGoOR/vmQ+TkjFykr/x5sZGRlx5WYgkydPJiAggM+GNGHUqFGZxtrZ2bFg\n0eKXblsKrRCvTp5sxRutXs1qNLd8SGUXKwAOBT3hjqM3G7ZmfCKsV6smzo/v8svbb6EAE49c53C0\nEZdvBmaICw4OplqVytRwVLA1hZ1BCfz2x2ratGmTW7ckhDAgebIVIhMtWrVh/ZL5FLMzI0Wrsv12\nAh/01S2M6zdv4S2v0lRedhQzjRFhCakcO31WJ87NzY2z5y+weNEi4uJi2dapM3Xq1MmNWxFC5GHy\nZCveaKmpqbw3djTLli7DSGPE2LHjmPrF9Ew3qY+Pj+f3338nJSWFXr16PfegeSHEm0n2RhZC6M2p\nU6c4efIkLi4udOrUSc58Fm8sKbZCiP8kPj6eTyZP4tzpk5Ty9OKrWd9muixqyZLFTBz/PrWKWBH4\nJBWPCt5s+XOnFFzxRpJiK4R4aaqq8nbzpiTc9sPHzZwLEclcT7LmL79LWFhYPIvTarXYWlsxq7EL\nbrZmpGlVJhwJZ+4vK3j77bcNeAdCGIbsjSzEayoyMpLJEycwsE8vli9fnuXpOznpwYMHnDp5grHV\nCuLtak3/ivYYJUZz8uTJDHGJiYmkpKbiapO+x7HGSKGIrSkRERF6z1HkPbGxsXTt1AErC3OcHR1Y\n+uuvhk4pz5BiK0QOS01N5euvv2bcuHE6ewn/V7GxsdSrUZ2QnWuoEHKebyZ+wGdTpuRQpllL/3ZO\nhn5rZasAABXSSURBVMKeplV1Jo5ZWlpSqUJ5Vl9+TFKqFv+wOC7cj6V27dp6z1HkPSOGDCbc7xiL\nW7szsXoBJnwwlkOHDhk6rTxBupGFyEGxsbEULmgPqhYLEyNik9P4+Zdl9OnTJ1vtrVq1il8//4gN\nrdI39g+NS6Ly8uPExidkuSViTlBVlY5t2xB66TQNipjiF5HCA2NHTp07r7PJxf379+neuSMnz5zD\n2dGBxUt/o2XLlnrLTeRdLk4FmVbbDmfr9J6OVf4RlGw3hOnTZxg4s9wj3chCZGHpr79Sv3pVGtWq\nwcaNG1+prfLly1HExphlHUqxpG1J2ng6MHLwgGy3l5SURAGzf5bD25oak6bVPvcghJygKAqr12+k\nRe9hXLEuR5nm3Th07Himu0m5urpy5H/t3Wl4FFWixvH/SSchJIQsgCAIhD0kQRNAEMOmwogIERCR\nGRF1vCOCC26ICiMgzqg412XGDYHxARRGUUG4iCiIFxhkC4IEBTEsCQTCEkII2ejuuh/oizABhZCi\nkvT7+6JdqT79to+dN+dUddWq1RSXlJC5L1tF68eio6PIzCsBTv7BtrcQatcufVKdP9LMVvzajOnT\nmTDqUV5ObkqJx8tjK9OZ+v5sevfuXabxIkJDuC02gpTYaAAyjhYz+qvdHC8p251/srKyaNsmgScT\nLyfpsnBe3ZhFjYQOzJrzcZnGE7HTl19+yeCBA0huWIODhR6OV4tm1dr1hIeHOx3tktHZyCJn0bNL\nMvdGFdK72WUAzEjbw6qIlsz6uGwz3EYNGxJWdJjnr29IkCuAWd8fZOH2XI4Vu3/7yeeQlpbGk488\nzP59WXS7vgd/nfTyGWcEi1QkW7Zs4auvviI8PJzBgwcTFhbmdKRLSpdrFEfs3r2bO2+/jfUbN9G4\nQX2mznif5ORkp2OdEhQcTP6JY6ceHzvhIfgiLrz//ebNXFG3NvfM+5nqQS7yit288c75X/T/bBIS\nEvh8ydcXNYbIpRIfH098fLzTMSoczWzFNl6vl6viYulfJ4BhV17B8swcRq7YwaYffjzrfVGdsGTJ\nEu4YOIBHr6pPsdfijc37WLTka9q3b1/mMd1uN8899xwHDx5k9OjRxMTElF9gEanQtIwsl9z+/ftp\n07IFP//x2lNfGbnti6088NLfSUlJcTjdL1auXMn0qVNwBQYy7IEHSUpKcjqSiFRSWkaWS65mzZoU\nlJwgK7+YBuEhFLu97MrNp1atWk5HO0Pnzp3p3Lmz0zFEpApT2YptQkNDmfDcBG56+UVujqnF2gP5\nJHbqolvOiYjf0TKy2G7ZsmWkpqbSqFEjBg4caOvFGPxZbm4uzz83gZ0/b6fDtck8/sQoAgP197TI\npaRjtiJVWFFRER3bJVHPfZiEWoEs21tC3LU38P7sD52OJuJXdMxW5BLZt28fU6dOpeB4Pv0H3EqH\nDh1sf80VK1bgzjvEiM61MMZwzRVe7p43j5ycHKKjo21/fRH5dVrPEzlPM2fMoH2bOBJbt+Tvr712\n1rvvZGVl0SEpkd1z34P//ZS+v+vBokWLbM/m8XgICjCnzvp2BRhcJgCPp2xXrhKR8qVlZKmydu7c\nyeLFiwkNDWXAgAHUqFGjzGN99tlnPHTvXbzZvQUhrgAeXp7OyGcncv/w4WfsN3bMMxz64l9M6toS\ngEU7DvDKbjdrNn5/Ue/lt+Tn53NlXGs6RJ4gvnYwSzMLCW1yFQsXf1XqTj0iYh/diED8ytq1a+mQ\nlMjKdyYx669juaZtIrm5uWUe78OZ0xnd9gq6NaxFx/pRTOzYmDnvzyi1X35eHvVDg049rl8jhPz8\n/DK/7vmqUaMGK75djat1FxYdq01ir9v5eN58Fa1IBaFjtlIlPTnyIf5yTWMGt64PwIivt/L311/n\n2XHjyjReaFgNDmWfOPX4UGEJ1UMjS+3X79aB/OGDmbStW5PLQqvx9Lc76X/7XWV7ExeoQYMGfPDh\nnEvyWiJyYVS2UiUdOHCA+Ga/XBIyPjKErOyy38j9kVFPcl3nZI6f8FDNZXgnLZtPFrxZar/u3bvz\n6tvv8syzYyksLGTg4CGMn/h8mV9Xfl1RUREbN24kKCiIxMREXC6X05FEzkrHbKVKemDYfexd8QVv\nXteSQ4UlDFy4hUnvTKV///5lHnPr1q1MfXcyHo+HIUPvol27duWYWC5UdnY213VJxp2fS5HbTbNW\n8Xz+5RLdEUkcpe/Zil8pKCjgT3cP5ZN586kWHMSfxz3LE6NGOx1LytEdt99G0Q/LGZoQhdeCV9Yf\npseQEYwbP8HpaOLH9D1b8SuhoaF88NHHzPR6McboRKEymjx5MsuWLSMhIYFnnnmmQl39a9vWHxlY\nLwRjDC4DbWsHsnVLmtOxRM6q4nxyRGwQEBCgoi2jW/r05smRD3Bo3Re88dJzXBnXCq/X63SsU65M\nTGLFniK8lsUJj5dv958gUUv7UkFpGVlESsnIyKBZkxgm921K7dAgitxe7pufzpvTpnPHHXc4HQ+A\nnJwcbup5Axk7d3DC46Fzl6589Ok8goODnY4mfkzLyCJy3nbt2kX1wABq+74zHBIYQL3wYHbu3Olw\nsl9ER0ezau16fv75Z4KDg4mJidEqhlRYWkYWOU+WZZGens5PP/1UoZZT7dChQwc8GD7/6Qger8X6\nrHx25RbTr18/p6OdweVy0apVK5o0aaKilQpNM1vxe7t27WLu3Lm4XC4GDRpEvXr1Su1TXFzMgJQ+\nrF2zmsCAAJo0b8Gir5YSERHhQGL7hYSE8PG8+fx+YH/eTc2merCLv736OgkJCU5HE6mUdMxWqiTL\nsnj7rTf5aOYMqoeG8tS4CXTr1q3Ufps3b+aGrl3o0ySaEo/Fsn35/HvtOmJiYs7Yb8L4cSye8RZP\ndKhFgIG3v8uhSZe+vDNl2iV6R85xu926L67IedK1kcWvvPbKK7z5lwk8fLmH/sE5DEzpw/r160vt\nN+7p0YxKrM9r3Vry1vWtGNIsihefn1hqv+83pNKpXhCBAYYAY+hSP4TvN353Kd6K41S0IhdPZStV\n0vSpk3m9SzN6xNRhcOv63J9Qj1nvzyy135HDh2keFXrqcYvIEHIOHii1X2x8AqkHT+C1LCzLYn12\nMbFx8ba+BxGpOlS2Uibbtm3j2vZtiQwPo0PilWzZssXpSGcIDAyk8LR7uRa6LQIDg0rt16vvLby4\nYQ8ZeYX8fOQ4r27aR6+U0icBPTP2z3hqN2Xk0mwe/+YgO6woJv33q7a+BxGpOnTM1k8UFRUxbdo0\n9u7JJLlzF26++eaLGiuuRTNGtIhkYMu6zE8/yN/SDvDD9vSLumdseXp/5kyeefRhRiU14GDhCd7Z\nks2K1Wto1arVGft5PB6efnIU7/1zGq4AFw8/+hhPjxlz1jNbPR4PGzduxOPxkJiYqO9zikgpujay\nHyspKeG6Lsl4DuyiaTisyCrhwcefYvTTT5dpvE2bNvH73j1ZPajtqW3dP/2etz+aS8eOHcsr9kVb\nsGABH70/g+phYTzyxJPExcU5HUlEqjiVrR9bsGABTw+/h+e71CbAGA4VnGDEogzyjxeU6eSXjIwM\n2ibE8d2QjkRUCyK/xE27Wev4ZvXaUjNHERF/oitI+bG8vDxqhQUR4FsajQoJxLIsSkpKylS2jRo1\nYsjQu+g19yN6NohgWVYeKQMGqGhFRM5BM1s/kJmZSWKbeO6JD6dVrRDmbT9GYZ2WfL18ZZnHtCyL\n+fPnk5aWRmxsLAMGDNAVfETE72kZ2c+tWbOGEffdy759+0lOTmbytPeIjo52OpaISJWishUREbGZ\nriAlfsfr9ZKWlsaePXucjiIifk5lK1VSeno6l0dH0jEpkWaNG9H+qjZV/k49IlJxqWylSrq5x/Xc\n1DCCvQ/cwPb7rqNg7y6GDRvmdCwR8VMqW6mSDmRnMyyxEQHGEBkSxND4BqxfvcrpWCLip1S2UiWF\nhYWxPPMwAF7LYlnGYRo0inE2lIj4LZ2N7CdSU1MZOXwY+7Ky6JSczBuTpxAZGel0LNssWrSI225J\nIa52DXIKSzhGINt27q7S71lEnKev/vixvXv30rZNAhM7NKR9vQhe37SH7OgYvli6zOlottq5cycz\nZswgPDycESNGEBIS4nSkSsftdnPgwAFq166tGy+InAeVrR+bNWsWc14Yw/QeJy+n6PZ6qf/2MnJy\njxIaGvobzxZ/tWbNGm5N6cuJ4kJKPBb/nDGT/v37Ox1LpELTtZH9WFhYGPvzi7EsC2MMBwtKMMZo\npiLnVFJSwoC+fXi54xX0aV6X77KPcuvdQ2nf/gcaNmzodDyRSkcnSPmBXr16Qa163Ln4R15dt4OU\nBWmMGTOmTDchEP+wZ88eXF43fZrXBSCpbgRX1otmy5YtDicTqZz029YPVKtWjaXLVzJ58mT2ZOzm\nxce7aTlQftVll13G0aJituXk0yq6BocLS/jhwBHNakXKSMdsReSsZkyfzhMjH6J9g1ps2n+EP414\nkPETn3c6lkiFphOkxO9s376dzz//nNDQUAYNGkRERITTkSqd7du3k5aWRpMmTUhMTHQ6jkiFp7IV\nv7Jq1Spu6d2LlKZ1OFjkZluh4dvUDbqtoIjYSmUrfqVrx6u5O7qEgbGXA/Dg11tp2u8uxk+Y4HAy\nEanKdIs98SuHDx+mVa2wU49bR4Zw+OABBxOJiD9T2coZNm3aRMekq7gsKpLfde9KZmam05HKpMeN\nvfjL+kxyCkvYlpPPlB8P0LPXTU7HEhE/pWVkOeXIkSPEt2zOn9vW54bGtZmxJYt5h7xs+mErLpfL\n6XgXpKioiOF/upePP/mU6iHVGDtuPA+PfMTpWCJSxemYrfymJUuWMGH4PSzsEw+AZVnEzVzDv1M3\nEhMT42w4EZFKQMdsbZaTk8OsWbOYPXs2ubm5Tscpk4iICLLyCih2ewE4UnSCY4XF1KxZ0+FkUp5y\nc3N5YNh93JDciYeG38/Ro0edjiRS5WlmWw4yMjJI7tyFBs1b47W8ZO/czrer/k39+vWdjnZBLMti\nUP9+7Nm0li51w1iQcZR+Q+7mhUkvOx3Nr3m9XqZMmUJ6ejr9+/enU6dOZR7L7XZz7dXtiOcYfWOi\nmbfzMOnBtVj+7ZpKd6hApCLSMrKN7rzrbgpDorj1/scB+PAfL1In8ART3p3scLIL5/F4+OCDD0hP\nT6dt27akpKRgTKn/b+QS8Xq9xLdszsGsTK6IqMbWQ4WMHTeBsWPHlmm8jRs3MqhXD9YNbocxBq9l\nkTRrHQu/WUlcXFw5pxfxP7rrj4327dtH0s3dTz1uEncV276Z71ygi+ByuRg6dKjTMcTnhRde4Ej2\nHt7u05RqgQFsOVDAhAnjyly2xhg8loXXApcBr2Xh8VoEBOiIkoid9AkrB926duHL2dMoyD9GwbE8\nlnz0Ht27dXU6llQB27ZtI7Z2daoFnvyotq5TnRK3l4KCgjKNl5CQQMPmLblv6Vbm/rSf/1qyjVYJ\nbWjZsmV5xhaR/6CyLQdPP/UUV7dpzfAeiQzvmURyu0Qef+wxp2NJFXDjjTeyPiuffcdKAFj40xHC\nQ6sRGhpapvFcLhcLv1xC896DmGfVIy7lD8xftFgzWxGb6ZhtOXK73QC6T6yUqzuH3MHs2bMJCjC4\nXC7mLlhIz549nY4lImehE6REKrG8vDwyMjKIjY3VH3MiFZjKVkRExGa6qIWIiIhDVLYiIiI2U9mK\niIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOVrYiIiM1UtiIiIjZT2YqIiNhMZSsi\nImIzla2IiIjNVLYiIiI2U9mKiIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOVrYiI\niM1UtiIiIjZT2YqIiNhMZStnSE1NJTEulogaYXTr1JFdu3Y5HUlEpNJT2cophw8fps+NPXmocTCb\n7uzI9UF59O5xPW632+loIiKVmspWTtmwYQMtosK4LbY+0SHBPNquMXlHcsjMzHQ6mohIpaaylVOi\noqLIPHqcIrcHgIMFxeQVFhMZGelwMhGRyi3Q6QBScbRr145ru99A7/krSK5bg0UZR3j0sceIiopy\nOpqISKVmLMuyZ2BjLLvGFvt4vV7mzJnDjh07SEpKolevXk5HEhGpNIwxWJZlSm1X2YqIiJSPc5Wt\njtmKiIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOVrYiIiM1UtiIiIjZT2YqIiNhM\nZSsiImIzla2IiIjNVLYiIiI2U9mKiIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOV\nrYiIiM1UtiIiIjZT2YqIiNgs0M7BjTF2Di8iIlIpGMuynM4gIiJSpWkZWURExGYqWxEREZupbEVE\nRGymshWxiTFmjDEmzRizyRizwRhzdTmP380Ys+B8t5fD691ijIk97fEyY0zb8n4dkarI1rORRfyV\nMeYaoDeQaFmW2xgTDQTb8FLnOsPRjjMf+wH/A2y1YWyRKk0zWxF7XA4csizLDWBZVo5lWfsBjDFt\njTHfGGPWGWMWGWPq+rYvM8a8Zoz5zhjzvTGmvW/71caYVcaYVGPMSmNMi/MNYYwJNcZMM8as9j2/\nr2/7XcaYT3yvv80Y89Jpz7nXt221MeZdY8w/jDGdgBRgkm+W3tS3+yBjzBpjzFZjTHJ5/IcTqYpU\ntiL2+BJo5CuhN40xXQGMMYHAP4BbLcu6GngP+Otpz6tuWVYS8IDvZwA/Ap0ty2oHjANeuIAcY4Cl\nlmVdA1wP/M0YU933s6uA24ArgduNMQ2MMZcDY4EOQDIQC1iWZX0LzAdGWZbV1rKsHb4xXJZldQQe\nBcZfQC4Rv6JlZBEbWJZ13Hc8swsnS+5fxpingFQgAfjKnLzqSwCQddpTZ/uev8IYE26MqQnUBGb4\nZrQWF/a5/R3Q1xgzyvc4GGjk+/ellmXlAxhjtgCNgTrAN5ZlHfVtnwP82kz6U98/U33PF5GzUNmK\n2MQ6ecWY5cByY8xmYCiwAUizLOtcS67/eazVAiYCX1uWNcAY0xhYdgExDCdn0dvP2HjymHLxaZu8\n/PL74EIu/fb/Y3jQ7xORc9IysogNjDEtjTHNT9uUCOwGtgF1fGWHMSbQGBN32n63+7Z3Bo5alnUM\niAD2+n5+zwVGWQw8fFquxN/Yfx3Q1RgT4VvyvvW0nx3j5Cz7XHR9VpFzUNmK2KMGMN331Z+NQGtg\nvGVZJ4CBwEu+7d8BnU57XpExZgPwFvBH37ZJwIvGmFQu/DM7EQjynXCVBjx3jv0sAMuysjh5DHkt\nsALYCRz17fMvYJTvRKumnH0WLiJnoWsji1QQxphlwOOWZW1wOEeY75izC5gLTLMs6zMnM4lUdprZ\nilQcFeUv3/HGmO+AzcAOFa3IxdPMVkRExGaa2YqIiNhMZSsiImIzla2IiIjNVLYiIiI2U9mKiIjY\nTGUrIiJis/8Ddjbvid3BBjsAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_lsml, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Neighborhood Components Analysis\n", + "\n", + "NCA is an extrememly popular metric-learning algorithm, and one of the first few (published back in 2005).\n", + "\n", + "Neighbourhood components analysis aims at \"learning\" a distance metric by finding a linear transformation of input data such that the average leave-one-out (LOO) classification performance is maximized in the transformed space. The key insight to the algorithm is that a matrix $A$ corresponding to the transformation can be found by defining a differentiable objective function for $A$, followed by use of an iterative solver such as conjugate gradient descent. One of the benefits of this algorithm is that the number of classes $k$ can be determined as a function of $A$, up to a scalar constant. This use of the algorithm therefore addresses the issue of model selection.\n", + "\n", + "You can read more about it in the paper here: [NCA](https://papers.nips.cc/paper/2566-neighbourhood-components-analysis.pdf). " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "nca = metric_learn.NCA(max_iter=1000, learning_rate=0.01)\n", + "X_nca = nca.fit_transform(X, Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd4VVXi9fHvvje9QiCEICX0FkB6lSJIk6qC6FiGURDF\nMo69t59j17E7FixYEFEQBBVQOkrvHUJvCSWV1Hv3+0cyjq8jEpDDSVmf5+EZcpJ7s5wZs7LL2cdY\naxERERHneNwOICIiUtapbEVERBymshUREXGYylZERMRhKlsRERGHqWxFREQcFuDUGxtjdE+RiIiU\nO9Za89trjpVt0Td08u1FRERKFGP+p2cBTSOLiIg4TmUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExl\nKyIi4jCVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWt\niIiIw1S2xZCVlcWoG8ZQr0EjOl/QjeXLl7sdSUREShGVbTFc89eRbNpzkNFPvkaL3pfQp18/du/e\n7XYsEREpJYy11pk3NsY69d7nks/nIzQsjLfnrCc4NBSAtx+5nasG9+W6665zOZ2IiJQkxhistea3\n1zWyPQWPx0NgYBDpx48CYK0l/VgKYWFhLicTEZHSIsDtACWdMYaHHnqQ5265im5Dr2Tvlg3kpB1j\n0KBBbkcTEZFSQtPIxfTVV1/x49y5xMfFccsttxAVFeV2JBERKWFONo2ssj0Ltm7dyldffUVQUBBX\nXnklVatWdTuSiIi4QGu2Dlm6dCkdOnZi/tptTF+wlJatWrN37163Y4mISAmike2fdFGfvtTp0Ivu\nQ0YAMOGVp6gZFcCrL7/scjIRETnXNLJ1SGpqKnE1En75OK5GAsePH3cvkIiIlDgq2z9p0MABfPXm\ncyTv38OerZv4dvxbDBowwO1YIiJSgujWnz/p/vvuIy0tnSevu4TAoCDuvvMOhg8fDhSOet966y2+\n/X4mUVFRPPLQg7Rp08blxCIicq5pzdYBPp+P0TeM4ZNPPsZiqFGvEZ37DmbauFdZtHABjRs3djui\niIg4QGu259Drr7/OktXreHP2at6dt4GqNRI4tHcXXQYN57PPPnM7noiInGMqWwf8vGQpnQcMIyQs\nnIDAQHpedhVJG9fiy8/D6/W6HU9ERM4xla0DatdOYMuKn/jPNPrGZYspyM9jycypXH311a5mExGR\nc09rtg5IT0+nW48Lyc734Q0MYt+OrVzUuzcPPXA/kyZ9yeq1a2nYoAGPPPwQkZGRbscVEZGzRMc1\nnmO5ubnMnz+fvLw8unTpQlRUFP0uHkBanqVd70GsXfgDJ1L2s2jBfAICtClcRKQsUNm6LCkpifYd\nO/HitJ/Iy83h63GvsfCbSQy7dCiXDx9ObGwsiYmJGPM//xuJiEgpod3ILrPWYjwefAX5/PPGK0g7\nmsIVtz3A3J9XMPyKv3BR335cc+1f8fv9bkcVEZGzTCPbc8Tv99P9wp6k5fpIPXaUpz77DmMMudnZ\n3NSnNc9OnMWr94zhsfvvYcSIEW7HFRGRM6CRrcs8Hg/Tp02lTtXKeD2eX6aLAwID8Xg8BAQG0bht\nZ7Zt2+ZyUhEROdtUtudQZGQk48d/hMnP4Ys3nmPj8p94/cFbqZfYEk9AAEtmTWfa9OksXLjQ7agi\nInIWqWzPsYiICObPm0tI9lG++fezbF25hANJW7mlX3via9UhoEJVLurdmzfeeMPtqCIicpZozdZl\neXl5tO/QkbYDr+CHLz+mYmwc1es2ZO7kz3jumacYdf31bkcUEZFiOtmarW7wdFlQUBBh4eHsT9pG\neGQU/3jhXYwxdO47mDtHXUb7du1o1qyZbgkSESnFNI1cAtx8040smDaRmLhqv5Rq5Wo1yMzI4MKL\nejNw8BB8Pp/LKUVE5EypbEuAK664giefeJyls6ezauGPHD18kHH/vJ/mHbsRU7U6c+fNJyq6Ak88\n8YRKV0SkFNKabQkye/Zsbv377STt2EGzjl3ZtHIpjVu148JL/sLK+bNYOP0rwsJC2b93LyEhIW7H\nFRGR39BxjaVIl67dyPWGsn7pIt6es5aAwECstdx/RV9ysk9QtXJFVq9c6XZMERH5DR1qUYp89MH7\nrPtpHmCx9r/HNxqPl2vueowN6zcwe/Zs8vPz3QspIiLFprItgerUqcOMGdMBePEf17P0hxm89+R9\nADRu1R6Px8PIUTfQpWs3MjMz3YwqIiLFoGnkEmzChAmMvO56PF4vNeo14pJRf2fhjK/4eeY0atZv\nzLHkg/Tp1ZNPP/lYtwaJiJQAWrMtpbKysmjTth1HjqdhPB4yUo8RGh7BsBvvJPnAXmaMf5u83Bwa\nNm7CpImfk5iY6HZkEZFyS2u2pVR4eDirVq7gsYfuJ7ZiNMbj4Y6XxtF9yAiG33QXXQcOIyAwiL17\n99K2fQeWLVvmdmQREfkNnSBVCoSEhHDTTTcRExPDyOtGEfSr236CQ8OIrhTLqzN+ZuGMyQweegn7\n9uzG49HvUSIiJYV+Ipci7dq1w1o/r9xzE+uXLGTOlAnMnjSeTn0HA9Cl/1DS0tJYvXo1BQUFLqcV\nEZH/0JptKTN+/HiuHz2awMBgrPXj8/t4dfoSZn3xEfO+/pzkA3uJrhhDdGQkM6Z/Q9OmTd2OLCJS\nbmiDVBmSlZXFI488yr9eeZmAgEA8Hi8hYWGc36UnK+fPwvr91GrYlMyU/SRt3+52XBGRckMbpMqQ\n8PBwnn/+Od575x2shfy8HB565wvWL13IhZdcSad+gzlycB/79u6j/4CBZGdnux1ZRKRcU9mWYtde\ney2DBvQnKCSU/Tu3ExtfndQjyezatJ6Lr76B5h27MWfuXG4ae7PbUUVEyjVNI5dyBw4coE69+rTv\n2Z+927dwcO9O3vh+OSFh4fj9fu4e1pPU5MNkZaa7HVVEpMzTNHIZVa1aNV58/jmWzfmO/bu2Y4Cg\n4MJbgzweD+GR0eQX5DFhwgSOHDniblgRkXJKI9syJDU1lVoJtWnWuQd9Lh/JuiULmPr+61RLqEtc\nfDV2bVrH/HlzqV+/vttRRUTKJO1GLic2bdpE23YdsAas309i+wu4/fm3AZg+/t9kJK3n6ymTXU4p\nIlI2aRq5nGjcuDHbt21hzKjrqVmzJi27XPjL5+o0PZ+Zs2YREV2RSpVjGTL0Enbt2uVeWBGRckIj\n2zLszbfe4uU33+aOlz8kKCSUB/7Sn7ycHIZcfyt7tm1izlef4vf7+NdLLzF27Fi344qIlHonG9nq\nbOQybMwNN7Bly1bG9mmDz+cjIDCIpyZ8T3zN2gCkHztK1Rq1eODhR+jYsSOtWrVyObGISNmkaeQy\nzBjDv156kazMTGIqVQZrCQkN++XzoeERRFSIoeUFvVi8eLGLSUVEyjaVbTkQGBjI7bfdisfr5dX7\nxrJ55RJ++PJjls/5jvM792D7+tX88OMct2OKiJRZWrMtR7p168aSZcuJqliJrIx0Eho2JSP1GIHB\nwezfsZWxY2/izjvuID4+3u2oIiKlknYjC/PmzePJJx4nLDiQgIAAqtWuR6NW7Ug/fpQrbrufzYdS\nad2mLYOGDCWmcmXqN2zEd99953ZsEZFSTyPbcujw4cM0a96CAX+7hSnjXuOulz8goWHho/j+MaQr\nDZq35vJb7mXfji289eCtLJg/T4/qExEpBo1s5RdxcXHM+fEH9q5cSHZmOuGR0QBYa0k5sJe/3vt/\nVIyNo1mHrrS5sB9TpkxxObGISOmmsi2nmjZtyqzvv2XU9aN47/E72bZ2JYtmTMYbEMjhfbuBwvLd\nvW0z//fkk/zjjjvRTIWIyJnRNHI5V1BQwCOPPsY306dToUIFLujciX+/+x4d+13C7i0byMpI4x8v\nvMvzt1zNi8/8kyFDhrgdWUSkxNLZyFJsCxcu5KLevRk66nb6jPgrQSGhTHztGVrWiuXhhx92O56I\nSImlNVspti5dutA0sRkRFSoSFBJKXk42m5Yv0tOCRETOkEa28rvWrVtH7z59ialajaOHD9Hzwh6M\n//ADPB79fiYicjKaRpbTlpaWxpo1a6hQoQLNmjXDmP/5/4+IiPyKylbOmuTkZFJSUqhTpw6hoaFu\nxxERKTG0ZitnxVNPP03d+vXpP2gIdevVZ/Xq1W5HEhEp8TSylWL76aefGHrZcB5+fwoVY+NYOGMy\n33/4Gtu3bnE7mohIiaCRrfxp69atI7F9FyrGxgHQqe9gdiXtID8/3+VkIiIlm8pWiq1BgwZsXvEz\nmempAKxe+CPVzqtOYGCgy8lEREq2ALcDSOnRvXt3Rgwfxr3DelK1ek0O79vN15Mnux1LRKTE05qt\nnLatW7dy6NAhEhMTiYmJcTuOiEiJoVt/REREHKYNUiIiIi5R2YqIiDhMZSsiIuIwla2IiIjDVLYi\nIiIOU9mKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqI\niDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi\n4jCVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiI\nw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIO\nU9mKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqIiDhM\nZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLisIBTfYExJhi4FEj49ddbax93LpaIiEjZccqy\nBb4G0oAVQK6zcURERMqe4pRtdWttX8eTiIiIlFHFWbNdbIxp5ngSERGRMspYa3//E8asAyyFo9/6\nQBKF08gGsNba5n/4xsbYk723iIhIWWSMwVprfnv9j6aRBziYR0REpNw46cj2ly8wZry19upTXfud\n12lkKyIi5crJRrbFWbNt+ps38gKtz1YwERGRsu6kZWuMuc8YkwE0N8akF/3JAJIpvB1IREREiqE4\n08hPWWvvO+031jSyiIiUMyebRv6j3cit/ugNrbUrT/ENVbYiIlKunEnZzin6awjQBlhD4W0/zYHl\n1tqOp/iGKlsRESlXTnuDlLW2h7W2B3AQaGWtbWOtbQ20BPY7F1VERKRsKc5u5IbW2nX/+cBaux5o\n7FwkERGRsqU4ZyOvNca8C3xc9PFfgLXORRIRESlbirMbOQS4EehadGk+8Ka1NucUr9OarYiIlCun\nvUHqLHxDla2IiJQrp302sjFmorV2+K8eSPD/OdWDCERERKTQH936E2+tPWiMqfV7n7fW7v7DN9bI\nVuSsS01NZcuWLZx33nlUr17d7Tgi8htncuvPwaK/9gKCrLW7f/3HqaAi8vvmzp1Lg9oJjBk+hBZN\nGvHc00+7HUlEiqk4G6QeAy4AEoAVFG6QWmCtXX2K12lkK3KW+Hw+qlWJ5e3udelRsxIHM3Po8eUq\nvp0znxYtWrgdT0SKnPFTf6y1j1hrL6Tw6T8LgLsoLF0ROUeOHz9OXm4OPWpWAiA+IoR251Vi8+bN\nLicTkeI4ZdkaYx40xnwLzATqAXcCWiwSOYdiYmIIDgnhh91HANifkcOS/Udp3Fjny4iUBsWZRl4J\nFADTgXnAT9ba3FO+saaRRc6q+fPnM2zIYOIigtl3PIO/33EnG9auYd2aNdRv0IBX//0ONWvWdDum\nSLn2p+6zNcZEAZ2BLsAwINla2+UUr1HZipxlaWlpbN++ndjYWIZc3I8uoXmMaFCFb3cd5fN92aze\nuImwsDC3Y4qUW2e8ZmuMSaTwiMZrgcspfAjBj2c9oYicUnR0NK1bt+bEiRMcO3SQJzrVITE2krva\nJhBh81i9+g/3LYqIS4pzNvLTFO5AfgVYZq3NdzaSiJxKWFgYWbl5ZBf4CQv0ku/zk5qdS2hoqNvR\nROR36LhGkVLIWss1V4xg59IFDKoZzawDGQQlNGLadzPxeIrzMC8RcYLORhYpY3w+H2+//TZrV66g\nQZOmjB07lqCgILdjiZRrKlsRERGHnfEGKREpnWbOnEnzRg2oXqUyf/3LlWRmZrodSaTc+qMHEUzj\nd5728x/W2kF/+MYa2Yq4ZsOGDXTv1IFRjeOYtPUgh0/kUTU+nhXrNhIREeF2PJEy67QfsQc872Ae\nEXHQzJkz6VotmnfW7uWdvs2oXzGce+ZtZsx1I/n48y/cjidS7py0bK21885lEBE5e6Kjo1l/NJPL\nGlblwlqVAXi1V1NafjKDo0eP8uTjj7F3ZxIdu3bntttvx+v1upxYpGwrzqEW9Y0xk4wxG40xSf/5\ncy7CiciZufzyy8nyhrDt+Ilfru1KyyYqPIKuHdqRsfAb+ubv4as3XuSm0aNcTCpSPhTnbOSFwCPA\nS8BAYCTgsdY+fIrXac1WxEWHDh2ifcvzaRbpoXHFMD7emsIVI69j+dSJfDMwEYD03ALqvTuP42np\nOhBD5Cz4M7uRQ621P1BYzLuttY8CF5/tgCJydlWtWpX1W7fR++Z7Cev7FyZ9M4MuXboQEvDfKeMg\nr8EYg9/vdzGpSNlXnJHtYgofQDCJwjOR9wNPW2sbnuJ1GtmKlDCpqamc37Qxw2tEsDEljUUHUgkJ\nCWXcpxPo16+f2/FESr0zPtTCGNMW2ARUAJ4AooFnrbU/n+J1KluREmjPnj306dGV6v4TPNu9ITtT\nTzBmzja+nzOPli1buh1PpFT70ydIFT1mz1prM4r59SpbkRKqSsUKzL/sfKpFhADw4KLtxA+9jvvu\nu8/lZCKl2595xF4bY8w6YC2wzhizxhjT2omQInJuREaEsz8j55eP95/I12EXIg4qzgapccBN1toE\na20CMBZ439FUIuKox596hqtnbuLpn3cwevYmNpzw0LJlS3p07kiDhJr89S9Xkpqa6nZMkTKjOGu2\nq6y1LX9zbaW1ttUpXqdpZJESbP78+Xz37bdUjIlh0KBBdOvUkQdaVeP8KlGMnrmeHceziAgL494H\n7ufuezS9LFIcZ3Jc43/MM8b8G/iMwrOSLwfmGmNaAVhrV57VpCJyTnTt2pWuXbsCMGHCBNrFR3Nt\nYnUeXLCFWhHBfHdZW1Jz8xn24nPk5OZz7bXXUqtWLZdTi5ROxZlGbgE0oPBgi0eBxkBL4AV0frJI\nmRAWFkZKVi7WWubuOco9HepRMSSQ2tFh3NC0Kq8/9xStEpvwrxdfcDuqSKmk59mKCLm5uXTt2J5q\nOcfYknycm1slcFXT8wC4ZdZ6Zu8+QlxYMPty/CxYupyGDf/wNnuRcuvP7EaOM8a8Z4z5tujjJsaY\n65wIKSLuCA4O5scFi+hw7Via9+jLvQu2cf23axn+9UoW7T/Ogis7khAdRpjxs337drfjipQ6xdkg\n9S2Fu48fsNa2MMYEAKustc1O8TqNbEVKqRHDLuWbr6cwtlUCN7dKIDo4kHfW7OHBeZvp3LkTwcHB\n3H7PffTq1cvtqCIlyp85G7mytXYi4Aew1hYAvrOcT0RKkNbt2hMc4GV3WjYRgQGk5uTz8vIkAgM8\nDAo6Ti//IYb070tMhWhuGj2KnJycU7+pSDlWnJHtXOBSYJa1tpUxpgPwjLW22ylep5GtSCmVm5tL\np/bt2LZpIwV+Pz6/JTTQy3PdG3N542oAjFu7l0cXbiXX56d9h47MXbjQ5dQi7vszI9t/AFOBusaY\nRcBHwC1nOZ+IlCDBwcH8vGw5jZo0oWmVClzT9DwCPB4CPP/9GRLk9RDk9TD10jYsXfITu3btci+w\nSAlXrN3IReu0DQEDbLHW5hfjNRrZipRyOTk5vPLKyyRt3cKWHTtZu2QRL/Zogs9vuWvuJs6LCKFy\nWBCL9x+nQlQk1466gbvvvptKlSq5HV3EFaf9IIKip/3stdYeKvr4Ggqnk3cDj1prj53iG6psRcoQ\nn89Hz27dWLtiKR4g0GOICw+mV61KhAYG8OnGAxzNycMTHMLaDZuoXr2625FFzrkzmUb+N5BX9OKu\nwNMUTiGnAW87EVJESi6v18ucBQuYv2wllavXICOvgM3HsqgaEcLkbYd4u28zPh5wPt78PBrWrcui\nRYvcjixSYvzRyHaNtbZF0d9fB1KstY8WfbzaWnv+H76xRrYiZdqiRYvo2aM7LSqFc2/HevSsVRmA\nD9fv4/8WbyPXGjbt2El8fLzLSUXOnTMZ2XqL1moBegI//upzxTlTWUTKsM6dO/Pss8+xPS2bIyfy\nfrmenJVLn9qxeKyfyZMnu5hQpOT4o9L8jMKHEBwBsoEFAMaYehROJYtIOXfr3/+Oz1puv/dudqdn\ncyLfx/gN+5kxrC2LD6SSm5tLeno6ubm5hIWFER4e7nZkEVf84W7kontq44GZ1tqsomsNgIhTPe1H\n08gi5ceiRYvod1FP2lQO56HO9VmTksGDC7aS77cY68da8FtLp44d+XH+Arxer9uRRRxx2ruRz8I3\nVNmKlCNbt27lskEDSNq1m6jwMCoFWJKOZdKndmX2Z+YQ4DEcyMihfd+BTPh8ottxRRyhshWRc6Z7\nh3Zs27CWV3slkhgbyT9+3MiifceIDAogywRwNC3D7YgijvgzJ0iJiJyWGgkJpOYU0Dw2kmFTVtCg\nYjg/XtGBG1vWIjcnh/T0dLcjipxTGtmKyFm3b98+zm/cgJ7nRbNw/3E2XtcNYwp/2e/55Sqe+eAz\nWrVqxdSpUykoKKBfv366RUjKBE0ji8g5lZSUxMC+fUhKSmLb6B5EBQeQ7/PT7P0FHMzMITIkiA7V\nYwnwGlamZDFv8U96KL2UeipbEXHFTaOvZ8l30xhUM5oZSSlYazm/ShRrUtLZeCSTyKAAPMbQvH0n\nZsz+we24In+KylZEXGGt5ZNPPuGbqV+zdM4sll3RjkFfLeNgZi6zL++Az+/n8cXb+G5vOinp2jgl\npZs2SImIK4wxXHXVVTz86GPk+CzZBX68xjC4fhzz9x6l7fhFbD6aRU5ONs/880m344o4QiNbETln\nbrlxDLO//pKU42lUCg1kT3oO0y5tS7tqFTiUlUunT3/m2znzadu2rdtRRc6IRrYi4rpX3niTl8aN\nxwSHkJnvIzTQS7tqFQCoGh5Mw+hQevboQZsWzbmwc0c+/OAD9Eu7lAUqWxE5Z4wx9O3bl04dO+I1\nBq+BWTtTANiRmsX6Ixn4crPZsGED2bs2c88tN/LqKy+7nFrkz9M0soiccwcPHqRN80Tqhli2HMsi\nMiiA/Zk5DKgTy9TtyVQKDaJL9RhWHU4j1QfJaZm/3KcrUpJpGllESoz4+Hg2bk8ip3INUnPy2Zue\nzWUNqhIfEUJwgIdZl7fn3X7NmXdlR2x+PnPnznU7ssiforIVEVdER0ezdNUannzqaYzHw6ebDvDj\n7qOEBXipERUKQERQALUrhDGkXx8G9O7F1q1bXU4tcmZUtiLiqjvvvpvsvHymfD2VPSfyOVHg4701\ne/D5LT/sPsLmo5m82bsp7XP20aNLJ5KTk92OLHLatGYrIiVGcnIyzz//PG+9/BJZ+QUEez18POB8\neiXEYq2l/5Q1tOh/CWPHjqVp06ZuxxX5H1qzFZESr0qVKjz77LOMGnMjXWvH47fQKi4agLvnbmbX\n0TQOzP2GCzt35IP333c5rUjxaWQrIiVOXl4ef795LJ9+/DHVwgIYWLcKH23Yz/JruxAZFMDWY5lc\nOGklKceOExwc7HZckV9oZCsipUZQUBBvvP0Ox7NOcOsTz/JzQBUaValAZFAAAA1iIgj2ejl+/LjL\nSUWKRyNbESnxdu3aRdsWzZnYvwmt4qIZv/EA/9qcytZdu/F4NGaQkuNkI9sAN8KIiJyOhIQExn38\nCZdd9Reyc3KoeV41pn73vYpWSg2NbEWk1PD7/WRlZREREaETpaRE0vNsRUREHKZpZBEp0/Lz85k8\neTJHjhyha9euJCYmuh1J5Bca2YpIqZefn0/fnj04sTeJRhVDmZ6Uwjsfjmfw4MFuR5NyRiNbESmz\nvvjiC3L37+Tbwc3wGMMV9Stz/ZjRdOvWjQ0bNlClShXq16/vdkwpx1S2IlLqJScn07RiKJ6iTVOJ\nlSM5lHKURnXrUDMqlN3HM7hm5N947qV/uZxUyivtmxeRUu+CCy5gyvZk1iSnk1vg58klO4kICeL5\nTrWYPaQZy65oy5RPx/PNN99w8OBB/H6/25GlnFHZikip17p1a156/U0u/XYj1d78ka3h1UjLzuHi\nOlUAqBASSPu4CC4ZOpTmDetzftPG7Nmzx+XUUp5og5SIlCl+vx+Px0PT+nW5rW44IxpXIzkrl06f\nLOalCxszoG4czy/fzUJbkTmLfnI7rpQxus9WRMqVNWvWMKDPRYR7YP+xNILwk5HvIyYkkDva1uGJ\nZbtJzzrhdkwpY1S2IlLuZGdns2nTJvpf1Is4bwG5/gLqVAxm3u40vN4AgkJCSahVk3EffULz5s3d\njitlgG79EZFyJzQ0lIiICPAVkOHP57netfAYWJd8gs41IulbvyIrDx6ld88ebNq6nYoVK7odWcoo\nbZASkTItJiaG9OwczosMItBrOHIin+x8P1c0q0xMaAC96kQTVpBF3Tq1+OSTT9yOK2WUylZEyrTK\nlSszevRolh/MZPWhLLzGkJnnIyOv8PaffJ+fjFwfbWM8jBp5DZMmTXI5sZRFWrMVkXLhmWee4bmn\n/0laRibxVWIxeSdoHxfEqkNZxIUHclfnaiw7kMnkg8Fs2LrD7bhSSmmDlIgI8J+fS++//z43jLqe\nDtUj+EfHang9hu3HcnhjUwHbd+91OaWUVipbEZHfGHbpUL77Zhp3dapGdIiX15enMOKG2+jdtx87\nduygefPmtGzZ0u2YUoqobEVEfsNayz13382nH30AWK7923Xk5OQw4aMPaBwbxppDmTz02BPcetvf\n3Y4qpYTKVkTkFDZu3Ei3Tu15uVc8EUFekrPyuW3mPoYPG0ZmehoDh17K1ddcgzH/87NUBNB9tiIi\np3Tw4EEnM8X1AAAPeElEQVSqVwwjIsgLQJXwQAJtAYd+mk6jyiE8ctd8DhzYz7333e9yUiltdOuP\niEiRxMRE9hzPZu3hLKy1zN+VToHfcn2rWHrUjuaudjG89MLzbseUUkgjWxGRInFxcUz8cjKDLu5H\nfn4BXg+0ig8n0Fs4Lgn0GAoKfC6nlNJIa7YiIr/Rp9sFVErewYETmaw+fIJLmsTg81smbzpGVr4l\npkIUb73zHpdeeqnbUaWE0QYpEZFiGv/RR9x7283k5mbTr14FJm06iscYLq5fgSuaxZJ0PId//nyE\nBT8tpUmTJm7HlRLkZGWrNVsRkd+4+ppreOL5l4iMiWXixqOMaFqp6DzlWLweQ/1KobSoEsrixYvd\njiqlhMpWROR3/O266/hxwSIqRkWwNz2PkADD7rRcAAr8lu0pmcTFxbmcUkoLTSOLiJxEdnY28VVi\naVzBEOw1rD50grbnRbDjWA4nPKEcSDmKx+Phow8/ZNLnnxIVFc29Dz5MYmKi29HFJZpGFhE5TaGh\noXz48SesP1bAkv1ZXFg7mny/5Wie4fW338Xj8fDaK6/w0J230SBtPUFb59O9S2cWL17MjBkzWL58\nORp0CGhkKyJySsnJyUyZMoXvp08jICCAkaNuoG/fvgA0qF2LGxoUruMCjFt1mO93ZJJYI4YDqdn0\nHTiYd9//UKdOlRPajSwi4oCa8XH8vUUY9WJCAPhwdTLTtx7j3i7VaVoljHvnJ/PquE/o37+/y0nl\nXNA0soiIA86rmcBzi/bz874Mpm89xuykNEa3rso7Kw8THOChTqRh9uzZbscUl6lsRUTO0OrVq1mz\nehU5+X7Gr0lhQ0o2j3avQfvqkaTm+EjJymfFvnTefO0Vpk2b5nZccZHKVkTkDH0x8XP6140iKsRL\nWk4BIxIrU6tCMB+vTcEAN8/YydDGlbi1XRy3jx3jdlxxkc5GFhE5Q0HBweRZwwt9EnhnxWHu/H4X\neT5LaIChb/2KDGtaibBAL/N3p5GScpR9+/ZRvXp1t2OLCzSyFRE5QyNH/o2fk31M3HicOhVDiAwL\n4corr6Rtx87MTkpjQ/IJXvppP68vPUSI10+9hJqMHDmSvXv3uh1dzjHtRhYR+RN27tzJS88/R0Z6\nGpcMH8HAgQM5cuQILRKbcvRICpHBXl7uV5uIIC+zd6Qyfm0K3pAIZv04hypVqlCxYkXCwsLc/seQ\ns0S3/oiInENJSUk0ql+P3vUqMLp14bGOuQV+rvxyG5c1qcSMpEwCA7zk5BXwzLPPMvaWW11OLGfD\nycpWa7YiIg6Ij4/HAqsOZpGZ5yMiyMuivRlUjwpiwe50RjSuwMUNYjicmcf9Dz1A+46daNOmjdux\nxSFasxURcUBoaCjPPf8Cx7MLGDV1B2Om7WD8mhRGtarCgYw8+tWvCEBcRBCNK3h45513mDFjBllZ\nWS4nFydoGllExEErVqxg3LhxTJ0yhcOHDgAGY+ChrtVpXjWcnAI/N09PIivfT+WocIKjYlj481Kq\nVKnidnQ5A1qzFRFx0VNPPcX8D15gVMtKDJ+4lbAgL7Wig9ifnkfTKmH0q1eBpxbsJzzYS3iFyrzx\n7vv06dPH7dhymlS2IiIuOnjwIK1aNKNDrIfNyRlYC7tScxnTtio9EqJYuCeDcauSGd06jjyfnw83\nZPDFlKn06NHD7ehyGnQ2soiIi+Lj41m2cjX1+l1Np4uHkdDqAgK9hrBAD8YYZu1IZUybODrWiKRb\nQjTDGoTzzpuvux1bzhLtRhYROUeqV6/Oc8+/ABQ+mL5yTEVeW3KQ7cdySM7Kp8D/39nAAr/FG6Af\n0WWFppFFRFwyefJkrrnqSsIDPRzNyiE8MICrmsWQ77N8sTWTGTNn06FDB7djymnQmq2ISAl04MAB\nNm/eTK1atdi+fTvj3n4Tb0AAt95+p4q2FFLZioiUYtZajh49SnBwMJGRkW7HkZPQBikRkVLI5/Ox\nc+dOunfpRJ2aNYiLrczNN41Bg5nSRWUrIlJC3XPPPYSHBNOofl1WLV/G/3WryrsDajF78ueMGzfO\n7XhyGlS2IiIl0FtvvcUrLz7P071q8MXwhlzTojLPLNpPeKCHrucFsfSnRW5HlNOgNVsRkRKoeZPG\nhKTt5cFuNX65dsWkrXRLiOSnvZlUjI0jMbE527dtoUbNmrzyxr9p2LChi4kFtGYrIlKqhIQEsys1\nl+x8PwC7UnPI81n2pubxULcamIyjpG1czOi6lvOOb6Jjuzbs2bPH5dRyMhrZioiUQF9//TVXjxhG\niMdSu2IIaw9n4fNZ3h1SjwCP4fqvd/DJpfWZse04H61Jwee3hAUHsXLdBurVq+d2/HJLI1sRkVJk\n8ODBfPrFVwRGRLMrNYcb2sQRHuwlI9dHgMfgs5ZlBzKZuuUYr/WvzaTLG9K1ZhiD+unhBSWRRrYi\nIiVYUlISHdq2oVVsAPtST3AwPZdLGscwZ3cGKZl5XFQnmmvOL3wcX2p2ATdM30V2Xr7Lqcuvk41s\ndfCmiEgJVqdOHVatXcfnn3+Oz+cjMjKSVcuXMbx3Fb6Y+DnrUw7h81u8HsO324/jtT4qhIfgDQik\nTp3avPnOONq0aeP2P0a5p5GtiEgplZqaSs34KlQO8RAe5GHHsVyqhAfSJDaUoY1j2HQkm/fXpzPz\nhzm0bdvW7bjlgtZsRUTKmAoVKtCmXTtaVA2jwG8Z1jSGQ5l5jG4TR1xEEN0TokkIh+5du/Dee++6\nHbdcU9mKiJRiDz7yOPMP5JGV56fAZ7FAao4PAL+1ZOT6GNkshjtuu43k5GR3w5ZjmkYWESnllixZ\nQvcLLsD6C0isEsr+jHx61o5mzeEsAB7rUZN7Fxzhs6nf07p1a5fTlm2aRhYRKaPat2/PspUrMR4v\nm4/kcCLPx+frjxAS4OHR7jXYcSyHI5m51KlTx+2o5ZZGtiIiZYTP52PixIlkZmYCcPcd/yAiOIDM\n3ALGf/oZAwYMcDlh2afn2YqIlDOZmZns37+f6tWrEx4e7nacckFlKyIi4jCt2YqIiLhEZSsiIuIw\nla2ISDl14sQJcnJy3I5RLqhsRUTKmZycHIYNHUxMhWiioyK5cfQo/H6/27HKNJWtiEg58+D993Jg\n7WI+HlqHDwbVZuGMr3jtlVfcjlWmqWxFRMqZhfPm0j8hlCCvhwCPoUEUTJr4OdnZ2W5HK7NUtiIi\n5UyNmrXYfCyP9NwC7pq5m1UHsziwbT2tWzTjyJEjABw4cIAJEybwzTffkJeX53Li0k/32YqIlDM7\nd+7kgk4dIDuT5rFB3NAmDoBxa48T32kgo8bcSJ9ePWkSG8qx7Hyi4hP4Yd4CQkNDXU5e8ulQCxER\n+cWxY8e4qNsF9K2YRvvqkQAs2ZfBUk8d0tPT6RScTI/a0fit5dklR7n8tge57bbbXE5d8ulQCxER\n+UVMTAz9Bg5kzr5c8n1+8n1+5uzLpX2nThw8eJBGlQtHsR5jqBsJe/fscTlx6aayFREppx565DHi\nmrZj5LQ9jJy2hypN2vLQI4/RsXNnvt6Wgc9vOZZdwPwD+XTu0sXtuKWappFFRMq5I0eOYK0lNjYW\ngOPHj3PpkIEs/mkJGMMD99/PQ4886m7IUkJrtiIiclqysrIICgoiMDDQ7SilhspWRETEYdogJSIi\n4hKVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiI\nw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqIyDkza9YsenbtTKe2rXjrrTcpL889\nV9mKiMg5sWjRIkZcOpSWvl30jkjhmYfv443XXnM71jlhnPqtwhhjy8tvLCIicmpjRl9P7rKpDG1c\nCYB1h7P4KiWSFWs3uJzs7DHGYK01v72uka2IiJwTQYHB5Pr++3FugSUgMMC9QOdQ+finFBER140Z\nO5YuH48nwHOM8EDDl9uyePO9f7kd65zQNLKIiJwz69ev5+UXXyAn5wRXXfs3+vTp43aks+pk08gq\nWxERkbNEa7YiIiIuUdmKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNU\ntiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiIwwKcfHNj/uf5\nuSIiIuWOsda6nUFERKRM0zSyiIiIw1S2IiIiDlPZioiIOExlK+IQY8wDxpj1xpg1xpiVxpi2Z/n9\nuxljphX3+ln4foONMY1+9fEcY0yrs/19RMoiR3cji5RXxpgOQH/gfGttgTEmBghy4FudbIejEzsf\nhwDfAJsdeG+RMk0jWxFnxANHrLUFANbaY9baQwDGmFbGmLnGmGXGmG+NMXFF1+cYY/5ljFlljFlr\njGlTdL2tMWaxMWaFMWahMaZ+cUMYY8KMMe8ZY34uev3AouvXGmO+LPr+W4wxz/zqNdcVXfvZGPO2\nMeZVY0xHYBDwbNEovU7Rlw83xiwxxmw2xnQ+G//FiZRFKlsRZ8wEahaV0OvGmK4AxpgA4FXgUmtt\nW+B94J+/el2otbYlMLbocwCbgC7W2tbAI8BTp5HjAeAHa20H4ELgeWNMaNHnWgDDgObA5caY84wx\n8cCDQDugM9AIsNban4CpwF3W2lbW2qSi9/Baa9sDtwOPnkYukXJF08giDrDWZhWtZ15AYclNMMbc\nC6wAEoFZpvDUFw9w4Fcv/azo9QuMMZHGmCggCvioaERrOb1/b3sDA40xdxV9HATULPr7D9baTABj\nzAagFhALzLXWphVd/wL4o5H0V0X/uaLo9SLyO1S2Ig6xhSfGzAfmG2PWAdcAK4H11tqTTbn+dq3V\nAk8AP1prLzHG1ALmnEYMQ+Eoetv/d7FwTTn3V5f8/Pfnwekc/faf9/ChnyciJ6VpZBEHGGMaGGPq\n/erS+cBuYAsQW1R2GGMCjDFNfvV1lxdd7wKkWWszgGhgf9HnR55mlO+BW3+V6/xTfP0yoKsxJrpo\nyvvSX30ug8JR9snofFaRk1DZijgjAviw6Naf1UBj4FFrbT5wGfBM0fVVQMdfvS7HGLMSeAP4W9G1\nZ4GnjTErOP1/Z58AAos2XK0HHj/J11kAa+0BCteQlwILgJ1AWtHXTADuKtpoVYffH4WLyO/Q2cgi\nJYQxZg5wh7V2pcs5wovWnL3AZOA9a+3XbmYSKe00shUpOUrKb76PGmNWAeuAJBWtyJ+nka2IiIjD\nNLIVERFxmMpWRETEYSpbERERh6lsRUREHKayFRERcZjKVkRExGH/D8EdZOhfkDUfAAAAAElFTkSu\nQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_nca, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Local Fischer Discriminant Analysis\n", + "\n", + "LFDA is a linear supervised dimensionality reduction method. It is particularly useful when dealing with multimodality, where one ore more classes consist of separate clusters in input space. The core optimization problem of LFDA is solved as a generalized eigenvalue problem.\n", + "\n", + "Link to paper: [LFDA](http://www.machinelearning.org/proceedings/icml2006/114_Local_Fisher_Discrim.pdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "lfda = metric_learn.LFDA(k=2, dim=2)\n", + "X_lfda = lfda.fit_transform(X, Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd0FFUbx/HvpPfeKEkgjd57B5FepUoVwUq1IRZeK9hB\nRZRqQaQXUUGlE5pAgAChBAgkpBECSUhh03Z33j+C2EghyWZTns85HNll78xvFPPsvXPnXkVVVYQQ\nQghhOCbGDiCEEEJUdlJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAzQx1YURR5\npkgIIUSVo6qq8u/3DFZs757QkIcXQgghyhVF+U+dBWQYWQghhDA4KbZCCCGEgUmxFUIIIQxMiq0Q\nQghhYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEVQgghDEyK\nrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYmxVYIIYQwMCm2QgghhIFJsRVCCCEM\nTIqtEEIIYWBSbIUQQggDk2IrhBBCGJgUWyGEEMLApNgKIYQQBibFVgghhDAwKbZCCCGEgUmxFUII\nIQxMiq0QQghhYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEV\nQgghDEyKrRBCVAIZGRncuXPH2DFEPqTYCiFEBZaTk8OoEcNwd3XB1dmJcaMfJTc319ixxL9IsRVC\niAps7rvvcOXYPr4fVJvvB/tx4dAuPnj/PWPHEv9iZuwAQgghiu9g8F56+VpjaZbXd+rhY8Xh/cFG\nTiX+TXq2QghRgfnW9iM8Oefe6/CUXHxq1TZiInE/iqqqhjmwoqiGOrYQQog8169fp0Pb1jibZKOq\nkKZYc+jIMTw9PY0drUpSFAVVVZX/vC/FVgghKrb09HT27NmDoig89NBD2NnZGTtSlSXFVgghhDCw\n/Iqt3LMVQgghDEyKrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYmxVYIIYQwMCm2\nQgghhIFJsRVCCCEMTIqtEEIIYWBSbIUQQhRKo9HwxOOP4VujGs0bNWDfvn3GjlShyNrIQgghCjV6\nxDBiTwYzqq4D0anZLDmTyuGjIdStW9fY0coVWRtZCCFEsf30y1aeaepMDQcL2nnb076GDdu3bzd2\nrApDiq0QQohC2VhbkaTR3nudlK1ib29vxEQViwwjCyGEKNTixYt4+7VZPOxtSZwGruNASOhp2Tv3\nX2Q/WyGEECWyY8cOdu7YgZu7O8888wyOjo7GjlTuSLEVQgghDEwmSAkhhBBGYmbsAH/Kzc1lwYIF\nnDl7jvp16/Dcc89haWlp7FhCCCFEiZWLYWRVVRk8ZCgxN1No3rU3pw/swtXWgl+3/oKJiXS+hRBC\nVAzl+p7t5cuXad+pM/N+Ooi5hSXa3FxmDe3K9l+30qhRI4PkE0IIIUpbub5nm52djZWVNWbmFgCY\nmplhZWNLdna2kZMJIYQQJVcuerZarZYWrVrj3bAlbXoO4MTe37l4NJhTJ0/IfVshhBAVRrnu2ZqZ\nmbFrx3YcVA0b5r2BhSaZPbt2SqEVQghRKZSLnq0QQghRGZTrnq0QQghRmUmxFUIIIQxMiq0QQghh\nYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMLAy22Lv7NmzrPzhBxRF4fEJE6hT\np05ZnVoIIYQwqjLp2YaEhNC5S1cuJ2USfiOd9h06cubMmbI4tRBCCGF0ZbJc46BHhuBWvxUPDxsH\nwLaVS1BvRrFq5UqDnFsIIYQwBqMu13jnzh2c3DzuvXZy8yAj405ZnFoIIYQwujK5ZztyxHDmfvgx\nTq4e6HRatiz9lI/em1MWpxZCCCGMrkyGkVVV5fMFC1i8ZBmKojBj+lSeefppg5xXCCGEMJb8hpFl\niz0hhBCilMgWe0IIIYSRlHmx1ev1zJs/n64PdWfIsOGEhYWVdQQhhBCiTJV5sZ39vzdYtuIH2gyZ\ngINfI7o+9BCRkZFlHUMIIYQoM2V+z9bD04vXlm3E07sWAN99MJvuLerz8ssvGySHEEIIUVbKzT1b\nU1NTtFrtvdc6XS6mpqZlHUMIIYQoM2Xes33/gw9Y+s139H98Kjeio9j34w+EnjhBjRo1DJJDCCGE\nKCv59WzLbCOCP70yaxYeHh5s3fYrLs7O/HHokBRaIYQQlZo8ZyuEEEKUknJzz1YIIUTFkJCQwBOP\nP8bDXTryxuzXycnJMXakCkt6tkIIIf4jIyODpg3r08Q+m/ouFuyMzsS3eWfWbdps7GjlWrm5ZyuE\nEKL827dvH/Zk8VgjVwAae9rw2M9bSUtLw8HBwcjpKh4ZRhZCCPEfivKfzpkoAenZCiGE+I+uXbty\nx8SGb84kU8/ZnN0xWQwaOFB6tcUkPVshhBD/YWtry8Ejx/BqN4BQiwD6T5jC96vWGDtWhSUTpIQQ\nQohSIhOkhBBCGFxCQgI//fQTAIMHD8bT09PIicoH6dkKIYQoFVeuXKFD29Y0cMnrx51L1nH46DH8\n/PyMnKzs5NezlWIrhBCiVIwbNRIuBDO8vjMA684lY9rgIb5fXXXu9coKUkIIIQzqRsJ1fBz+ujvp\n62hO4o3rRkxUflT5Ypuens57773H1OnT2bhxo7HjCCFEhdWr3wB+uppJcqaW5EwtW65o6Nm3v7Fj\nlQtVehg5MzOTtu074FDNF996jTjw0zqemDCO/82ebexoQghR4ej1ema++AJLlywB4Kmnn+bjefMx\nMak6/Tq5Z3sfGzZsYO68z3n5qzUoikJy4nVeeqQLdzIyqtRfDiGEKE1//uyviqtQyaM/96HRaHBw\ndr33F8LeyQWdTodWq8XCwsLI6YSovGJjYzl06BBOTk48/PDDmJqaGjuSKEVVscgWpkr3bGNjY2na\nvDnDpryCX/3GbFuxCAdTHb/8tMXY0YSotP744w8G9e1N2xouRKdq8Aioy7YduzA3Nzd2NCFKTIaR\n83Hy5EmmP/8CCdev07lzZz7/dD729vbGjiVEpdW0fl1e9LNiUKAXOr3KkG1nGfvqO0yaNMnY0YQo\nMRlGzkfz5s05GLzP2DGEqDLirl+nbcfmAJiaKLRwtSY2NtbIqYQwrHI1CygmJoYLFy6Qm5tb5DY6\nne6BPi+EMK62rVuz4FQMelUlLj2LzZHJtGvXztixRDkQHR3N+vXr2bNnD3q93thxSlW5KLaqqvLk\nU0/TqElTevTpR8PGTYiJiSm0zaxXXsXWzg5bOztGjhpNVlZWGSUWQhTX1ytXEYozNRbvo/nKwzz7\n4sv07NnT2LGEke3evZumjRrwxewZPDFqKMMfGVSpCm65uGe7cuVK5nw8n1lfrcHKxpYtyz4n7doF\nfv91W75tli9fzoeffsGLC1ZgZW3Loten0qF5I+bP+6S0LkEIYUBpaWlYW1vLxKgS0ul03LhxA2dn\nZ6ytrY0dp9hq1azOxEAzmlazJVenMvvgTeYsWMrQoUONHe2BlOvlGs+EhdG0c0+sbe1QFIX2fR8h\nLCyswDb79u+n27BxOLq4YWltTZ/xz7AvOLiMEgshSsrBwUEKbQmFh4cTWLsWjeoG4e7qzJLFi40d\nqdjibyRSzz3vy4K5qUKAk3mlupdfLoptnaAgzh0JJic7bxj45L4dBAYGFtimulc1oi6cufc68vwZ\nqlerZtCcQghRngwZ2J/eXjq+7e/NvO41+N8rMwkNDTV2rGJp2awpP19ORVVVEjJyCInX0KpVK2PH\nKjXlYhhZp9MxctRoDh46jJOrG5lpt9m9aycBAQH5tklKSqJd+w7Ye1TD2taeS6eOEbx3L/Xq1St2\nZr1eT1JSEk5OTvKNWwhRrmVlZWFvZ8vG4YH3FpH4IvQ2o2bOZeLEiUZO9+Cio6MZ0KcXVyMj0elV\nPv5kHlOmTjV2rAdW7p+zVVWVsLAwMjIyaNKkCba2toW2SU9PZ9u2beTk5NCzZ0+8vLyKnTcsLIwB\nAweRkpKCqqosX7aUESNGFPt4QghhSKqq4unmwvPNHGjgYUNmrp5ZwYl8vWYj3bt3N3a8YlFVlZSU\nFOzt7Stsh6fcF1tj0uv11Pbzp++kGXTuP4yoi+f4eMoYQo4dxd/f39jxhBDivrZv386oEcOo425H\n9G0N/QYNYcnyb2S5RCOSRS0KcPPmTVLT0ujcfxgAteo0oE7TVpw6dUqKrRCi3OrVqxenz54nNDQU\nLy8vWrVqVWkLbVxcHM9Nm0LEpYs0a9GS+Z9/gZOTk7FjFZkUW8DZ2RmdVkv05Qv4BNZDk5FO1MVz\neHt7GzuaEEIUyNvbu9L/rNJoNHTt2J4WDtmMqm7F3qPb6durBwf/OFphdmiTYgtYWFiwdMlipkwe\nTd1mrYgKP8eIYUNp3bq1saMJIUSVFxISgoVWw+gGbgAEuVrx5G/hREdHU6tWLeOGKyIptneNGjWK\nli1bEhoaio+PD23btjV2JCGEEOR1iDJzdOhVFRNFIVenkqvVVaitUGWClBCiQktOTubT+fNIvB7P\nQz17M2LEiAp93/LQoUO8/85baDR3GPPYRCZOmlShr6c0aLVaunXqgHLzKk1czTickEtg6y6s3bDJ\n2NH+Q2YjCyEqnfT0dFo3bUJre5XGLtYsu5DI+CkzeG32/4wdrVhOnDhBj25dGFvPHntLU364kMHL\nb81hypSK97xpadNoNHz80UdcDj9Pi9ZtmDZ9BmZm5W9wVoqtEKLSWblyJavmvs76PvUBiEnLpN3a\nEFIz7lTI3uCMaVNJ2r+OEXfvTZ5L1LA2zorTFy4aOZkoqnK9NrKhBAcHM+iRIfTtP4BNm8rfcIMQ\nomSysrJwtvyrd+NkZU5OrpaK+kXf1NQU3d82utHqVUxMTY0XSJSaSltsDx06xCNDh+HVuD3+HXsz\nedoM1q1bZ+xYQohS1KtXL/bEpPD9uThCb6Ty9O6LDB/ySIV5HOTfJj35FNujs9gSnsyeyFQWnbrN\n8zNnGTuWKAWVdhj5sccngnsteo/KWyP0RPAOjm1Zyf59e42WSYiqQKfTER8fj5OTE/b29gY/X2ho\nKC8/N43EG4l0e7gHH3wyDysrK4Of11BOnz7NJx++T+adO4x+7HGGDBli7EjiAVTJFaT+Xuz1en2F\nvIcjREUSERFB/54Pk5qSQkZ2Dm+9/TYvznzZoOds1qwZO4MPGvQcZalJkyasXL3W2DH+QVVVdu3a\nRUREBI0aNaJjx47GjlThVMpiq6oq3bp0ZtqMGaCCvZMzG7/6kIWff2bsaEJUaqOGDuFxXxueHVSX\nuPQsen34Pm3atZcfzkWUkpLCuXPn8PDwICgoyNhx7pkxbQo/rV9NAzcr3r2RybQXZvLq67ONHatC\nqXTDyDqdjhGPjuKPo8dwdHElNvIKLVu04PnnZjBw4MAyzyNEVaGqKuZmZlyf3B1Ls7x7pi8eiKDR\n+OlMnz7dyOnKvyNHjjCgb2887SxISNUwfsJE5n++oNTPEx8fz4EDB7C3t6dHjx6F7q5z/vx5urZv\nw4Ie1bAxNyU5U8szW69y+GgIzZs3L/V8FV2VGUZeuXIl4Vev8cGG3ZiamrHof89x6ewJIiMjyc3N\nrbDbNglR3imKQq0a1dkXk0Sv2u5ocnUcTUhnYO3axo5WIYwaPpQnG9rRtqY9GTlOvLL6e/oNHFSq\n2+WdOHGCXg8/RD13G5I0ubh6+7Nzb3CB97hv3LiBl4M1NuZ5s6JdrM2wtzBh2CODuHw1ClOZLV0k\nFXPKXgEiIiKo27IDZuYWLH7zBW7diKf7o5NYsf5HHhk6DL1eX/hBhBDF8u2q1UwJjmDIbxdov/4E\nLbo9TP/+/Yt9vLi4OEY+MpjmDery2OhHSUpKKsW05YdWqyU67jqtqtsBYGdhSgN3K3bt2kX/3j3o\n1LYV8z75uMQ/v559ciLj69oys6UT73VyIzfhCsuXL7/35ykpKWzdupU9e/aQm5sLQOPGjYlJ0XAs\nLh2dXmXX1dsoQEbqbWJiYkqUpyqpdD3bpk2bsmb2m7Ts1oszR/azYOthLKys6TF8HLOGduPs2bM0\nbtzY2DGFqJQ6derEmQsXOXnyJO7u7rRs2bLYExMzMzN5uHNHBnia80xjF9ZcDKFvj+4cDjlR6XpT\nZmZmBNT2ZX90Ot1qOXA7U8up63c4uOBzxjVwwMPWnGXz3yctNZW3351T7PPExcVTr40DACaKQoC9\nQmxMNAAXL16kW+eOVLc1IT1Li7tPXq/X1dWVjz/9jOmTnyZbq8fbwZLn2lVj7qFEHB0dS+X6q4JK\n17MdOnQoA/v2Zs4TQzExMcHcMm94xMzcAjsHRzQajZETClG5eXl50bdv3xLvrXry5EmstFnMbuNH\nCy8nPuoUQHx0FJGRkaWYtvzY8ONPrL+ay/TdN5i6I5aGzVvxcC07egc407yaHdOaOfH1sqUlOkf7\n9u356XJeDzU5U8uBhFzad8ibvDb1mSfp723GG21d+LCLO9yM5IsvvgBg0qRJDBw4iCAvZ1rUdGDx\n6TRmznwJZ2fnEl93VVHpiq2iKHw6fx6XL13Cw82V9Qs/JPryBbYs/xxdThZNmjQxdkQhRBFYWlqS\nkZ2LTp830TJbpycrR1uhdnp5EI0bNyYi8hpbd+/n8tUounV/mFz9X5NMc3XqP3r0ERER9H64G3X8\nfBk9YhjJycmFnmPJ19+S7urP6B+v8syv13hi6vP3Jo5ei7pGQ4+8zomJolDPyYTIK5eBvJ+rq9dv\n5K1PF9FyzPMsXbmWt94pfg+7Irt8+TJfffUVP/zwA5mZmUVuV+lmI/9dQkICk6dOIyzsLHWCAvnq\ny4X4+PgYNZMQomh0Oh29u3fDMiGSHjUd2BKVgmeTtqzZuKlKPDMfHR1Ny2ZN6VHTHHcbU36M0PDC\na28y4/nnSU1NpUHdIHpVN6GJhxU7ou6QbO/DoaMhRfp3k56ejpWV1T8mjI59dAQpp4N5ookzmVo9\n7xy+xavvf8pjjz1myMusUIKDg3lkYH/a1LDlVqaOXFt3Dh45hp2d3b3PVPmNCKKiooiPj6du3bq4\nuLgYO44QogiysrKYP+8TLl84T+PmLZk2fXq53OnFUCIiIvjwvbncTk5i8LARjBk7FoDt27fzytPj\nebt93s8yvaoycVsMZ8MvU7169WKdKyUlhR4PdeHc+Qvo9fDEExNZ+NXiKvHFpqiaNKhLP9cM2ta0\nR1VVPjmWxNBpr/Pcc8/d+0yVefTnfl566SUWLV6CjZ0D2ZkZ/PzTT3Tt2tXYsYQQhbCysuK1Krx4\nQkBAAMu++fY/79vY2JB+d4jd1EQhS6snJ1dXomUqDx8+zNUrV+lY25XY9FwiLl1Cp9NVqS83hbl1\nK4la/nkTzBRFwdsWEm8kFKltoT1bRVEsgaFALf5WnFVVfaeQduWiZ7t161aGDR/BwyPG4+JRjV++\n+wptThZpt2/LNzYhRIWk1Wrp3qUTudcjaOBswqGEXDr3HcLiZcsLb5yPmtU8mdzAioYeNuj0Kq/v\nv4G5SzXcXJyZ9MwURo8eXYpXUDGNGzWS+ON7eLqZC7c0ucw5fIsV6zbRo0ePe58p9jCyoii/A6nA\nCUD35/uqqs4rpF25KLZDhw4l3dSOx1+dC8D5438w/4VJxEZfk5l0QpQBnU7Hxo0biYmJoXXr1nTu\n3NnYkSqFrKwsvvjiC65cukjrdu2ZMGFCiXY7sjA3Z9UjfvdW//ry2HWydSrtatqz4nwaH332JWPH\njSut+BVSeno648eMYttv27GxtuS9Dz5k8uQp//hMSYrtWVVVGz5oqPJSbB+fOJFUM0eGP/siANGX\nLvDOE0PJSEuVnq0QBqbX6xk+eCDRp4/T2sOOn6/eokuvPvQbMJABAwb8Y2KJMK5unTrgdjuC0Q2c\niUvP4fXd0bzRpSaBrtaExGWwL9uLA0dCjB2zXFBVNd/6UZJ7tocVRWmkqmpYSQMaw5TJk3moew98\nAuvi7O7FN++9yojhw6XQClEG9u/fz/njRzkwrBkWpiZMblyD5ivWcz3kAHPemM3BY8dlhKmcWLNh\nE0MHDWDkxlBMTBRaV7Mh0NUayNvEvrItJFISxakf+Y45KIoSpijKGaAjcFJRlIuKopz52/sVQsuW\nLdm0cT2HNn3H6g9fY9zIoSwv4YPh97Nnzx4eHTWa0WPHcujQoVI/vhAVUVJSEv4udliY5v2oqWlv\nhbWpCcu7B9HSVs+8jz82csL7O3HiBOvWrePs2bPGjlJmvLy8OHQ0hPQ7d9h/8BBnb6v8cjGZnVdu\nszwsledfftXYESu0ggb4+wMDgD5AANDz7us/368wevToQciRP7h44Rxz3n03329oer2exMTEe2uC\nFtWOHTsY/ugo7PybYOVdjwGDBnPgwIHSiC5EhdamTRuOxiazPfIm6TlaPjp6BV9Ha1yszGnqasP1\n2Oh82+7du5dhA/sxpF8ftm3bVmaZ33nzDQb17M6aOa/SvWN7vlq4sMzOXR5YWlrSunVrftuxi8zA\nLiTWbMPKtRsYNGiQsaNVaEW5Z7tSVdVxhb13n3bl4p5tUYWFhTFg4CBSUlLQ6/UsX7aUkSNHFqlt\n3/4DqN22Bx37DQFg54bvuXM1jHVr1xgyshAVwv79+3lywniiYmJxt7bgp0daYG9pxtBtZ5n5/jzG\njx//nzbBwcGMGDSAN1r7YGFiwpvHrrH4u5UG3yYzIiKCDi2bc3hkC9xtLIlK1dBp3XEiY2ILfT4/\nJyeH3NxcbG1tDZpRlG/53bMtytS1Bv86kCnQorSClQd6vZ7+AwbS5/FpLN4bxuvLNvDslKlERETc\n9/MajYbHJz2BV7Xq1K3fgMSbNzH720os5uYWaHW6+7YVoqrp3LkzF69GcTstnU49+9B+zRGafn+Y\nRyY8xbh8Zrcu+/ILXmnhzbgGNRlZrzrvtavN4s/nF3quuLg4Jo4bS6+unXj7jTceeJQqNjYWfzdH\n3G0sAajlaIOHvQ03btzIt42qqrwx+3Uc7O1wdXaiR7cupKamPtB5SyIzM5Nnn3qCAF9v2rZoJrex\nyql8J0gpivIq8BpgrShK2p9vAzlA6d/0NKJbt25x+/ZtOg8YDoBvUH3qNm3FqVOnCAgI+M/nJz3x\nJBHXb/Ha8s0kxESyYObTXLv2Diampui0WjYt+oRVK1eU9WUIUa5ZW1uzZuMmtFotJiYmBT6moqoq\n/+4aFDZSlpqaSud2bXikhjV9PB1YsvZrnoi4zIrVRR9hql+/PpdvpXEoNpkONV349UoiGVqVWrVq\n5dtmw4YN/LDsK5b29cXe0pTFpy4w9ZmnWLlmXZHPWxJPTpxA1LE9PNfQnmu3ExnQtzdHj58kMDCw\nTM4viibfYquq6vvA+4qivK+qaqW+M+7s7Ixer+fapfP4BtVHk5FO1MVzeHt7/+ezWVlZ/LjlR+Zv\nOYCzuyfu1WvSZfCjOOnvELYjb83WFd9+Ta9evYxwJUKUf/mtSKTVatFoNDg4OPDklGmMHDwQC1MT\nLEwV3jh6jUXffl/gcXft2oWftQlvtPUHoGNNZ2ov3cjir7/B2tq6SNk8PDxYtX4DY0aOIDcnBzs7\nO37cuq3A9gf3B9O1ujlO1nnXNcDfjk/LsHe5+cctLO/vi52FKT6OlpxL0bF9+/YHKraqqhIdHY1G\noyEwMFBWjTKAgnq2ze/+dsPffn+PqqonDZaqjJmbm7N82VKemTyaes1aE3XxHMOHDqFNmzb/+Wxw\ncDBmZubcjI/F2d0TgOtRV+g2qA8vvfRSWUcXolL4cuEXvDxzJqgqDevX48etv7Jm8xa+nP8JOp2O\npd+/V+gm9Iqi8Pe+b3GnjPTs2ZOEW0mkpKTg4uJS6EIRNX18+eU3/b1nLy8mZVGjxn+/qJemO3fu\ncPHiRdzd3bG2suR2lhY7i7yJn7ez9djY2BT5WDqdjsfHj2XrLz9jY2GOk5sHO/cGU61aNUPFr5Ly\nnSClKMreu7+1AloCp8kbRm4MHFdVtV2BB64gE6RUVWXXrl3ExMTg7u5OdnY23t7e9y20ANu2bWPy\ncy+Qdvs2XQc/SkJ0JKcO7ObC+XMFDjUJIe7vwIEDjBo8gG2DGlPLwZq5xyI5YerO7gMP1jtMS0uj\nRaOG9PG0pLWnHV9fSMSvQ3e+XlFwj7ikNBoNXTu2R3MzFmdrM8JvZbFrb7DBtvM8efIkfXv1wN5c\n4WaahnbtO3DmxDF6+lgSe0clWmdHSOhpHBwcinS8ZcuWseDtV3ijvRsWpgqrzqWQ49OCLVt/NUj+\nyu6BF7VQVbXb3YabgeZ/LmqhKEpD4C0D5SxTqqoyYeIkgg8epna9Rpw5sp95H3+Ub6EF6NSpE4o2\nl6YdupGUEM+Na1fp2KkTvr6+ZZhciMrjyJEjDPJzo7ZjXm9selMf6nx78IGP4+DgwIGjx3j7f7NZ\nHx1Fz8eHMXPWrNKO+x82NjYc+OMoO3bsQKPR0KVLF7y8vAx2vlHDhzIm0JIutRxJy9byanAIU196\nhYT4ODp7eDBt2vQiF1qAM6Enae1hdm+Zxs7etnwaVmGWUqgwijIwX+fvq0epqnpWUZR6BsxUZg4c\nOMDe4P28u+o3LKysibt6mSmPDWTsmDH53rNwcHDgwP5gXnr5Za5di2dA7x68/95cWZFKiGKqUaMG\nm27dQavXY2ZiwrHrKVT39CjWsby8vFhUgsX4i8vS0pIBAwy//IBeryci6hodWwUB4GBpRiMPaxwd\nHXn11eJNralTvwHfbttAX52KualCSLyGoKD6pRlbULRie0ZRlOXAD3dfjwEqxdeehIQEvAPqYGGV\nN/mhhl8giolCWlpagc/UeXt7s26NPEMrxIMIDw/n8uXL1KlTh6CgoHvvjxgxgjXff0fXTafxd7bl\nYMwt1m3eYsSk5ZeJiQl+Pt4cjkmnk68D6dk6wm5mMrPe/fs/8fHxXLt2jYCAANzd3e/7maeffprt\nv25l+q4/sLe2IMfEkj0/fmPIy6iaVFUt8Bd592yfB368++t5wKoI7dTy7sqVK6qzi6v6zoqf1VUn\notXHZr5m76dWAAAgAElEQVStBtapq+r1emNHE6JS+Wz+PNXD0V7tWddXdXewUxcv+uoff67VatXt\n27erq1evViMjI40TsoI4fvy46uHqrNap4aY629mos2a+dN/PLVm8WHW0s1Hr1XRXnext1c2bN+d7\nTL1er546dUo9fPiweufOHUNFrxLu1r7/1MRCV5AqrvIyQWrHjh289PIr3L6dQp/evZk+bSq//fYb\npqamPProo4SEhPDYhAmkp6dTt159fty0UZ5PE6IUxcTE0LRBPQ6MaEFNe2uu3tbQbeMJLl2Nyre3\nJQqWnp7OhQsX8PDwuO/EzGvXrtG0YX0+6OpFNXsLIpKzeOfQTWLir2Nvb1/2gauQB54gpSjKelVV\nRyiKEgb8p2qqqtq4lDOWujNnzvDo6DFM+t/HVPP1Y+2CubRt34H2vQejzclm7vvvY29nT65Wh6KY\ncDMxkR+3bGHmSy/JPVghSklMTAx+ro7UtM+7XePnZEM1Bzvi4uKk2BaBqqpcu3aN7Oxsdu/axZHD\nB6nl58/Ml2flWzivXLlCLVc7qtlbABDgYoWjtTmxsbHUy2fIWRhWQfdsZ9z9Z8EPt5Vjv//+O+37\nPELzzg8D8Pir7/PiI12Y8MocAKb0aknXkRPpOeIx4qOu8M6koSz4chF2dnZMfvZZY0YXokJSVZXN\nmzcTFhZGnTp1GDlyJEFBQUQmp3MkPoW21Z0JjknipiYLPz8/Y8ct93JychgxZDAHD+xHr9Wi1+sY\nVs+FQyG72frTFv4IOYGlpeV/2gUGBhKVfIfYNFtqOlhy8VYmaVna+y7UI8pGQY/+XL/724eB/aqq\nXi6bSKXHzs6OlMSEe6+TE69jaZO3SHh2ZiZpKcn0GJ63CHr1Wv40atsZeycXNmzaLMVWiGJ4buoU\n9mzZSG9vB+Z/m8Hvv/zMd6tWs3LtOkY9OhILEwWdorBu048P9HhKVfXp/E+IP3uMJb1rYGqisDgk\ngdi0HKa38uS1Awns3buX3r17/6edt7c3ny74ghlTp+DhYEPSnWxWrl6DnZ2dEa5CQNFmI/sASxRF\nqQWcAPYDB1RVPWXAXKVizJgxfPb5Aha/8RyePrXZuW4F1rZ2JMZFk5OVhampKRFhoQQ2bk5WpobI\nC2E0ad8Ve/kLKcQDi42N5Yfvv+fUuDY4WprzklZHi9W/cf78efr06UN84k1u3LiBl5cXFhYWxo5b\nIZw6cZx2XuaY390PuGttR74LTURRFGwtTMnJycm37YQJj9O//wCio6Px8/PDycmprGKL+yi02Kqq\n+iaAoijWwJPATOAz4P6bwpYjjo6OHD3yB0uWLCE5JYUNa1ezY+cu5kx8BFNTU0aNepT5z0+gZkA9\nEqKjcHR144/fNrNj++/Gji5EhZOamoqLrRWOlnk7YFmbmeJlb3NvBxxLS0t8fHyMGbFciY2NZeYL\nM4i8coXW7drz/ocf/2d7vjr1G7In9ABda6mYKPBHTDr2lqb8GH6bOI1Kp06dCjyHm5sbbm5uhrwM\nUURF2c92NtABsANCgYPk9WyvF9KuXMxG/reoqChGjRnLieMh1PT24f25c7h58ybHjx+nevXqjBkz\nhgYNGhR+ICHEP+Tk5NAwKJDxvjaMrOPFtqs3mX/uJucuRcgM2H/JyMigcf16tHbOpZGbBTujM7H0\nbcTvO3f/Y3KmRqOhT4/uRF0Ox9LMhNuZWtzc3QgICODTL77C39/fiFch7ie/2chFKbYnAS2wDQgG\n/lBVNbsIJyx3xVZVVRo0akyTbv3oNWoS548fZvk7LxJ2+jQ1atQwdjwhKryrV68yadwYzp4/T6C/\nP8u//4H69Q27GpGqqkRFRZGbm4u/vz+mpuV+0I3t27cz6+lxvNPeFQCtXmX8lkjCLoQTFRWFlZUV\nrVq1wszMDJ1Ox6lTp8jJyaFZs2ZYWVkZOb0oyAM/+vMnVVWbK4riQF7vtgewVFGURFVVOxogp0El\nJiYSHxfH7MenoCgKzTp1J7BRc0JCQv5RbHfu3MmChV+iqirPPv0U/fr1M2JqISoOPz8/9h76o8zO\nl5OTw8ghgzl88CCWZqbUrO3Hrzt3l/v7k2ZmZmTlaO/tFKTVq+Roc2nRtAk1HK24k6PFJ6Auv+3c\njbW1NS1atDB2ZFFCBe8dxb2NB8YAjwEjgThgj4FzGYSDgwM52dkkJcQDkJuTTUJ0JK6urvc+s3v3\nbh4dM5aaLbvi2+ZhJkx6gq1btxorshCiAPPnfYLm0hnOjW/LmbGtqaNN4eUXnjN2rEJ17NiRDL0Z\nnx25zr7IVObuj6WJpw1KbiZzO7oyr6sH2uuX+ezT+caOKkpJUWYjf0DeDOQFQIiqqrmGjWQ41tbW\nzJk7h7lPDqN5lx5cCQuldcsWdOz4Vyf9q8VLGPrsTDr3HwaAiYnCwq8WFbqXphCi7J05cZwhtZ2x\nuDtbd2SgO2+Hhho5VeEsLS0Z89gEfv5mIcfjM2hRzZZGHja8fzAOAFMThUYuplwOv2DkpA8uJSWF\n27dv4+3tLZvQ/02hPVtVVfurqvqRqqqHK3Kh/dMLzz/PhrWrebhlQ957azbr1qz+x4QERTH5x67T\nfw7zlIZdu3axYMECduzYUSrHE6KqC6rfgN9jU9Hp89af3RaVRFAFWSFpwoTHuZFjSkNPG7wdLfn0\nWCKutpboVZUsrZ7DCbm0aFPgtuHlzpx33qZm9Wq0a9GEeoH+XLlyxdiRyo1Kvzbyg9q7dy/DRoxk\n6OSXMTU1ZeOXH7Hi26/p27dviY4765VXWb1uPQ3adOL8sYOMGDqETz7+qJRSC1E1aTQa+vV8mNiI\nS1iZm4KtI7v3H8TDo3hb9JW1kJAQ3vnf66Snp9Gr3wC2bNrI1YjLZGt1DB48mG+//6FCTPiCvFtw\nj40cwtxOHjhbm/Hzpduc0Xty7GS5X5KhVBV7NnIJTlghiy3Anj17+OLLr1BVlWeeevK+K7Q8iOjo\naBo3bcbHm/dh5+hMRtptZg3txomQY9SuXbuUUgtRNf05W1er1dK0adP7Ll9YUej1emJjY7G0tMTT\n09OoWVRVJT4+Hmtr6wK3HP3TvHnzOPDNR0xskvfZzFw9j/0cSVZ2/gtvVEb5FdtCh5Grok6dOvFQ\n1y5U8/LiypUraLXaEh3v1q1buHlWw87RGQA7ByfcPKtx69at0ogrRLkTHx/P4cOHSUxMNPi5TE1N\nadGiBW3atKnQhRby9qv18fExeqFNSkqifeuWNKoXhE+N6jz71JMU1nny8/PjfEou2Vo9AKEJGdTy\nlkcq/5RvsVUU5RdFUX7O71dZhixLqqoybMRIvlmzkTu2HixZsZpRY8YW+hetIHXq1EGTdpsDWzeR\nm5PNwV9/JC35FnXr1i3F5EKUD998vZxGdYOYMWY49QL82bB+vbEjiQc05ZmncLsTwzf9vFnWz4fg\nrZv47rvvCmwzePBg2nbtxXO7E3j7jxSWn73DilVryyZwBZDvMLKiKF0KaqiqanCBB66gw8jnz5/n\noR49+fjH/ZiZW5CTncWLAzty5PBBAgICinSM7OxsEhIS8PT0vPcA+unTpxk1ZiyXL4bjHxjEmlU/\n0KxZM0NeihBlLjY2lib167JrSDP8nW05k5jGwF/OcDU6ttw/+yr+ElTbl6n1TKjllPfz65eLyZi1\nHMiiJcsKbKeqKidPniQpKYlmzZpVyS0UH3hRi8KKaWWl0WiwsXPAzDxvoXRzC0ts7OzIzMwsUvu8\nCVYjMDUzJycri5Xfr2DAgAE0adKE82fDSnV2sxDlzdWrVwlyd8LfOW+N38YeDnjYWRMTEyPFtgKp\n7Veb0zcuUsvJCp1e5WyyjqGBdQptpyiKLMCRj6Is1xgIvA/UB+6tE6aqaoGbUVbUnm1WVhaNmzaj\ncdc+tHqoD0d3/EL4kb2cOnmi0J1KMjIy8K1dm2fe/YKGbToSERbK/OcncPHChQozO1KIkoiPj6dR\n3SB+HdyUeq52nEi4zbBfzxEZE/dAW+qFh4dz+fJl6tSpQ1BQkAETi/uJiIiga6cOuFtCWlYu3gF1\n+X3XHlkqsghKMkHqW2AReesjdwO+B34o3Xj3d+3aNbZv386lS5fK4nQAWFlZsWnDemJPH2HhS0+i\nT45j984dRdoSLDIyEnsnFxq2yVskI6BRM6r7+hEeHl5o299++42GjZvi7VuLZ56dTFZWVomvRYiy\nVr16dRZ8tZjeP56i3YZQhv16jm9XrnqgQvv5p/Pp0rY1C1+eRsdWLViyeJEBE4v7CQgI4Fz4JT5c\nupLvNvzErn37pdCWUFF6tidUVW2hKEqYqqqN/v5eIe1K1LNdvWYNU6ZMpXbdBly7HM4rr8xi5osv\nFvt4RXXq1Cl69e5DtVr+3EqIp3XLFmxYt7ZIz7olJSVR28+ft77/hWo+tUm6cZ3Zo3tz6uQJatWq\nlW+70NBQuvfoyZNvzcezpi9rP59Do4BafL1saSlemRBlJykpiejoaGrVqoWzs3OR28XExNC0QT32\nD2+Bt4M1V29r6LbxBBevRMrokKgQir0RAZCtKIoJcFlRlKnkrY1s0N3V09PTeeaZZ5m9fCPeAXVJ\nTrzOG2P7MXjgQAIDAw15aiY9+RRDpsyic/9haHNz+ODZUaxevZpx48YV2tbV1ZX58+fx8qQh+NVr\nRGT4Wd743+wCCy3k9Wo79h9G0w7dABg/ay5vje8nxVZUWK6urv9Yc7yoYmJi8HN1xNvBGgA/Jxuq\nO9oRFxcnxVZUaEUptjMAG2A68C7wEHmbEhjM9evXsXdyxjsg79EYF49q+PgHERUVZfBiGxl5lSfa\ndgbAzNyCoGZtHmjJsScmTaJb166Eh4fj7+9fpMd77OzsSLnx1/bASQnx2Noa9PuMEOVSUFAQkcnp\nHIlPoW11Z4Jjkki8k1Wq+7ZGR0ej0WgICAiQtXtFmSnKFnshAHd7t9NVVU03dChvb2+yM+8QdmQ/\njdp25tql80RdukC9Ul7zVFVVLl68SGpqKg0bNsTW1pamTZuxd/NqBj85g/TbKYTu287ETz95oOP6\n+/s/0A+H8ePHs+CLhSx583nca/iy78dVfDbvwc4pRFm4evUqV65coU6dOvj4+JT68d3c3Fi5dh2j\nHh2JhYmCFoW1Gzc/0D3f/Oh0OiaOH8uvv/yCnZUFju6e/L57L15eXqWQXIiCFeWebUvyJknZ330r\nFZioquqJQtqV6J5tcHAwQ4cPx9zCCk1GOsuWLmHEiBHFPt6/6fV6JkycxG+//46LmweatNvs2rkD\nGxsbevftR2JiIpo7d5g+fTrvvze31M6bn5SUFJYuXUrK7dv06d2bLl0KfMxZiDK3cMHnvP2/2dT3\ndOb8jRTmf/El48aPN8i5srOzuXHjBl5eXkWanFgUS5cu5bv332Rzv4ZYm5nw1pGrxHjUZePPvxTY\nLiMjg/j4eGrWrImNjU2pZBGVV7HXRlYU5QwwRVXVA3dfdwS+UlW1cSHtSvzoT1ZWFnFxcXh5eWFr\na1uiY/3b6tWrefuDj3l18Xosra3Zue47Luz/nT8OH0Sn0xEbG4uDg8MDTe4QorK6du0aLRo1YN/w\nFvg4WHMxOYOem09xNTqmwvw/MvXZp6l5PpjJzWsBcP5WOo8djOVi5LV822xYv54nJj6OvbU5mbl6\n1m/6ke7du5dRYlERleTRH92fhRZAVdWD5D0GZHBWVlb4+/uXeqGFvOf4GrbtgqV13kSMlg/14dLl\ni0DeWqu+vr4V5oeIEIZ27do1At2d8Lk7camOix2e9jbExcUV2C45OZn169ezadMmMjIySi2Pqqqs\nXr2al2e+xLJly4q0fnlg3frsjEsnV5e3du+vUUkFPsMbHx/PU09M5O2OHnzQ2Z1AOz1D+vfhhRnT\ni7zIjRB/KsrsgGBFUZYAawAVGAnsUxSlOYCqqicNmM9gGjZsyOqN79J33NPY2Nlz+Pct1K/foNB2\nWq2W9PR0nJycZCUoUWUEBQVx+VYqpxLTaOrhwKHYZJIys/H19c23TVRUFF3at6WeoyU5Oj2vzzTj\nwNFjpbKE37Rnn+HQti0M8HFk5eYMfvv5Jzb9/EuB/09OnjyZ3b//Rsu1x3C2tiQdc3YG57/84KVL\nl/BxsaWmowUvbb9GI08bevo7s/uXVQy9cJ5t23ca7WdAVlYWp0+fxtLSksaNG2NiInvKlHdFGUbe\nW8Afq6qqPpRPu3K9gpSqqkyeMpU1a9fi5OKKourZtWN7gRObvvn2W6ZNm46iKPj4+rL155/w8ytw\nIS0hKo1Nmzbx5OMTcLK2ID1Hy+r1G+nRo0e+nx89fCj+188xs1UtAF4+cBmrdn347IuFJcqRkJBA\nvQA/zoxvh6OlOTk6Pa3XHmfDbzsLXSpQr9cTFhaGRqOhSZMmBd6DjYqKonnjhkxq5MhP4cl83NMX\nRVHQ6lUmbYvm3MUIqlevXqJrKY74+Hge6twRvSYNTY6WBk2a8fOvv1f4HY8qi2I/Z6uqajfDRDIu\nRVFY9NWXvPrKLFJTUwkKCirwL2toaCgvz3qVd1ZupZqvH7/+sJShw0cQeuJ4GaYWwniGDh1Kr169\nijxZKD4mhpE17O+9buVuy/bo/O+PFlVGRgb2VpY4WOT9+LIwNcHT3qZIw9QmJiY0adIk3z/XarWE\nhoai0+lo1qwZb77zLrNfexUn878+o6p5v4zVq50++Rma2GgY09odnV7lw6NhfPbpp8x65ZViH1On\n02FiYiKjdQZU6NiDoiieiqJ8rSjKb3df11cUZZLho5UNHx8fGjVqVOi3wpCQEJp27Eb1Wv4oikKf\n0U8QdvoUubm5ZZRUCOOzs7MjKCioSLNy23fpwuKz18nU6kjNzuXr8Ju071r87+7h4eHs3bsXW1tb\nnNw9eO9YJFGpGpaejiE+U1viXbQyMjLo3L4tw/v3ZNwj/WjVrAljxo7j6PGTWDh7sCQ0mT9i0vnw\njwTq1a9vtD1nL4ZfoHW1vKUTTU0UmrubEX4urFjHunPnDsMGD8Taygp7Wxs+/OB9AG7cuMF3333H\nqlWrSE1NLbXsVVlRBvq/A7YDf46XXAKeM1Sg8qpmzZpEnj9DTnbemsWXw07i4uaGubl5IS3zpKSk\n3Jsokp5u8EeVhTC6N9+Zg3Pjtvgu2UfAsv007dGfadNnPPBxbt26xfjRo+jSthWznxxPk/p1+d87\nczhn50P/X8PZlu3Ajr3BJX4Wd847b2OVEs1nD3nySVd3aispzHrpBerXr0/IyVPEmbmx9ORNMrJy\niY+8zMsvGX752H/LycnB0cWVlWHJxKZmk6PTc/SGlsbNi7fTznPTpnDr7B+sesSPz3rU4Kt5H7Jw\n4UIaN6jPN3Nn8fnrM2jSsD6JiYmlfCVVT1Hu2YaoqtpKUZRQVVWb3X3vlKqqTQtpV67v2T4oVVUZ\nPXYch48eo6Z/EBeOH2Hl9yvo169foW2vXbtGh46dqO5fh9ycHNJvJfDHoYNVcq9HUfVkZmZiYmJS\nrHuK27ZtY/SI4dgoeo6M64CzlTl7rt1i8oEo4hJvluqw5yP9++CfcobOvnlF+1TCHXbc8eDg0eNc\nvHiRjq1b8EXP6tiYm5KRo+PZ32M5f/EyNWrUuHeM9evX89Gct8nOyeGxSU/y4kszSy1jdnY23bt0\nIjX2Ci7mOo7FpmNuYU6PHj1Zt3FzsVbDCvD15rmG5vg45v232XIhiUOpNnR0ymJQ3bynMRaFJIBf\nK37bvqNUrqOyK8mjP3cURXElbyYyiqK0JW9hiypFURRW/7CS779exvSJ4zgecqxIhRZg1quv0r7/\nCJ7/9Fte/nIVQS078vY77xo4sRDlg7W1dbEKbWZmJuNHj+KpBtXo6uOKs1XeKFI3H1eSbqeW+uM3\nTVq04vD1HHJ1Kjq9ysG4LJre7TEmJyfj7mCDjXnehiR2Fqa42FqRnJx8r/327duZ9vQk+rumMdY7\nh8Xz3mfBZ5+WWr5Vq1aRlRDJu53cebFdNV7tWAMXZxc2/vhTsZed9PT05Gpy3midqqpEZajk5mTj\n5/zXQiJ13Kw5tH8vsbGxpXIdVVVRiu0LwM+Av6Ioh8jbYm+aQVOVU4qi0KVLF0aOHPlAs5Dj4uIJ\naNz83mv/Rs2ILeT5RCGquoSEBGzMTekf4MH+mCRi0/OK66aLCfhUr1bqqzm9+trrOAc25ZnfY3l2\nezyZLrV578OPgbxHBVOy9eyJTEWTq+O3iNvozCz/sVb7mpUreCTAlubV7KjnbsOE+vas+v67Ust3\n48YNfGwVTO72lP1cLEm+nVqinvOnC79ixYUMvjiZwpwjySSaOtP5oYdZdzYJTa6O21latl5KwdHG\nmsuXL5fWpVRJRZmNfFJRlC5AHUABLqqqKrOCHkDHDh3YufZb6jRthVarZe+mH3h8dOktPSlEZVSt\nWjWydHru5OqY3qI2bVcewtrMDFMbO37duavUz2dpacnW33cQFRWFTqfDz8/v3vOr9vb2bN+1h7GP\njmBpaCT16gSxY/eGf+zxam1rR2q2/t7rtBxdqX4h6Ny5M59++B4P1cqmmp0F6y6k0qVTxxIds3Xr\n1pw8HcauXbuwsbFh4MCBZGVl4VtjLeM2X8ZEUehWy4EjCTmluhlElaSq6n1/Aa0Ar7+9Hg/8BCwA\nXPJr97fPqyJPVlaWOnLUaNXC0lI1t7BQn3r6GVWr1Ro7lhDl3o4dO1Q3Rwe1QU1P1cHWWn33nbfV\nrKwsY8e6r/DwcNXVyUEd3tBdHdfEXXVxsFN37NhRquf4evly1cHOVjUzNVG7d+mk3rp1q1SP/6eN\nGzaojnY2av2abqqjnY26eNEig5ynMrpb+/5TE/OdIKUoykngYVVVkxVF6QysJW/4uClQT1XVYQUV\n8co2Qao0ZGVloSiKPHwuKpTExESeGD+WP44epZqnJ18t/4aOHUvWo3oQqampXL16lZo1a5b7SYWX\nLl1i6ZLF5GRlM2rsWNq1a1fq51BVFb1ej6mpaakf++9u3LhBREQEtWrV+sckMFGwB96IQFGU06qq\nNrn7+y+Bm6qqvnX3dZWYjZyTk8OHH31E6KnT1AkM5PXXX8POTvaZFVVL57ZtaKxLYkYzb0Ku32bG\ngaucOBNmkC32hKjoijMb2VRRlD/v6XYH9vztzyr9jsuqqjJsxEh+3hlM9eadORx2ke49epKbm0tO\nTg4DBgygTp06TJw40dhRhTCYjIwMQkJDmdPBH09bS/oHeNLR25VDhw4ZO1q5k5mZydq1a/n666+J\niooydhxRzhRUNNeQtwnBLSAT+HOLvQCqwKM/MTExHDx4kM+2HcHcwpJ2vQYxe1Qvjh49yqBHhmJt\n70jjdp3Zsm0b+wICuRohM/VE5WNlZYWiKMSlZ+HtYI1Or3ItVYOTk5Oxo5UrGRkZdGrXBpP0RFys\nTJn5wnP8un0nbdu2NXY0UU7kW2xVVZ2rKMpuoBqw429jwiZUgUd/dDodpqammJrm/StSFAUzc3NW\nrFiBVq9n7qptWFhZM3DiVKb1acOJEycKXQRdVVXOnTtHVlYWDRs2/MdMRiHKIzMzM+bOnUu/D+Yy\n1M+Vk0mZuPoFFbgBQVW0aNEi7DMTebGdK4qisP+aKdOffZpjoaeNHU2UEwUOB6uqeuQ+710yXJzy\nw9fXl7p167L83Zl06DuU04f2YIYerVaLq2c1LKzy9vV0dHHDysaWixcvFlhsc3NzeWToMI6fOImN\nnT3mJrBn106ZeCAMJisrC51OV+L9oJ9/8SXqN2zEiu++o2aQGTNmzCj2IgqVVcL1eGrZKfeeefV3\ntmJDlCxxKP4imyDeh1arZeHChQQGBKCm3WL39wtxMc0leO8enn32WeIiL3N4+09o0tPY+v0ScnOy\nGThwYIHHXLhwIfFJqXz8YzBz1+6gYcceTC3GOrFCFEav1zNjymScHBxwc3FmyID+aDSaYh9Pp9Px\n5WfzuXR4L6bnDtO/x0OsWb26FBNXLNevX2fWzJk8PelxfvnlFwC6PdSdvbHZ3MjIIUenZ+PFNLp2\n7WrcoKJckWL7L39OjPp27SZ0rj6kZOZSvXo1vvvmazw8PGjdujWvvfIKX895hae7N+GnrxfwzfJl\nhc5SvhB+kcYdH8bMPG8ZtJbd+3LhQnhZXJKoYpYsXszhrZu5NKkT0U91Rb1yhtdenlns423bto3Y\nc6fZMbgxC7oEsqlfQ6Y88zR/f9pAVVWWLF5Mp1Yt6N6xHb/++mtpXEq5k5iYSNsWzUjfu5mAq0eY\nNnE8SxYvpn///jz3ymye3xXP6M1XsA1qycLFS40dV5QjUmz/JTw8nCNHj/L8p9/Qe9REXvjsW/bv\n309ERMS9z7z55ptoMtLR5uaSkZbK6NGj2bdvHy+8+CJvv/02N27c+M9xGzaoT2jwdnKys1BVlaM7\nfqZhwwZleWmiijgcvJcJddxwsjLH0syEZxtV448D+4t9vMTEROq52GJ2dzWl+q72pGsy/7G95NIl\nS5j/1mxe9FaY4JTJ46MfZd++fSW9lHLnhx9+oJO7FR90CuTppr6s6FGXD959G4AXX5pJ+h0Nmsws\nft72G/b29oUcTVQlUmz/RaPRYGNrj7lF3sIT5haW2NjZF7jo+dq1axn+6Cjic8w5dO4KLVu1/s+W\nVJMnTybAuxovDuzIrKFduRxygIULPjfotYiqqWat2hxNvHOv53n0eho1vB/8mVhVVYmMjMTLy4ud\nUTc5fv02uTo9HxyLpHWzJlhY/LVY/ffLlvBRh9o85OvGoEAvXmxag1XffVtq11ReZGZm4mzx12IS\nLlYWZGVn33utKIrczxb3JX8r/qVhw4aYmypsWvQJrbr34+jOX7C1tqJu3br5tnnjrbd5ds4X1GvR\nlqO7thGydzsNGjXmsfHjGTd2DL///js2NjYsXbyIlJQUMjMzqVu3bpH3whXiQcx69TU6//wT/X45\ni52FGedSMtl78JcHOkZubi6jhg3lQPA+rM3NcHR2YdSOiySlptGuZXPWb9nyj8+bW5hzJ+evnm56\nrjbTrVQAAB93SURBVA6LSrhS2uDBg+n68Yc0c7OjtpM1bx2L5tHRo0v9PKGhoUwcP4Zr0TE0bdKE\nFavW4O3tXernEWWn0P1si33gCryCVFxcHJOnTuNCeDgN6tfnyy8WUL169Xw/X8PbhxcXruL2zRt8\n8eoUJr/7OU7uHiyaPZ3EuBi6DBxBekoSMRdOE3LsKG5ubmV4NaIq0mg07Ny5k9zcXLp164arq+sD\ntZ/z7jtsX7aAzYOaY/H/9u4zvopqffv4b6WHQCAJJLQA0qQjHaRKkyLCQUCxUxQFRUApikpTVLoH\nwQoesIBiFAgIBBAIHZRilC7SlFACIb3sZP4vkgfxoYZksglc3ze6J2tm39uP2VfWzCquhqHhB0mp\n3IBZc7+84jKBoaGhPPPk4wytVYKYlDRm/hbJT+s3UL169Wx9jtTUVHbu3IllWdSqVetfvWln2bBh\nA68PfZkLF6Lp8GAXRo97K0f/cI6KiqJyxfI8dnc+agXlY8WfsexO8uXXPfsubowgt64sL9eYA2+Y\nZ8M2q14aNJjwbTvwLRxEsdJl6dz7BQBGPtaBB3sNoEHrjH1vZ789gnurlmPMmDHOLFfkusqVLMag\nygE8Va0kAD+fjGbQjrP8euDQFdvHxcXx2muvsXF9OGVKl2b0uLeyHbQXLlzg/pYtiDn5F8ZAvsJB\nhK0Nx8/P77K2lmUx+o03+GjmDACe6z+A0ePG5ejm8rll+fLlvN7/ad5smPE5Lcui749/seO3Perd\n5gHZ2TxeMiUlJbFjxw72799PWloayZnPaiZNnEDLe+vz26afOPP3Pxssx12Ipmipuy6+Dip1F+ej\no3O9bpGsSEtL49jJ0yw/fBpHejqWZbH40Cny+xa8YvvY2FjurVeHIysX0sQ9lg3r1uTI3qdjR71B\nBUc0m3vUYlP3WtRwjeeNV0dcse2MD6YTOudTVnSuzorO1Vk85xNmfDA92zU4Q6FChTgdm0RqWkZn\nJSY5jYSUVA24yuPUs71BR48epVXrNqS7uHE+6gzJiYmkpCTTvMV9LPhmPv7+/pw8eZK69epTpVEL\nCgYEsmLeLMpXu4e+b04k5lwU77/yDJ9/9gnt27d39scRuSrLsvDzzU8VXw8i45PxcXfjr7hk3n1/\nOs8+++xl7WfOnMny6e/yZbsqAGw8cY6B205y8OjxbNXRqW0rHvE8R6fyQQCE/XmGT855Exa+4bK2\nD7RpSU+v8xfbhh46xbwkP5as/OmytnazLItjx46RnJxMuXLlsrw7j2VZPNS5Ewd3bqZyQRe2nU7l\niWeeZ9zb79hUseQk9Wyzqd/z/anb7j+8PT+MaaGbKFWxCk8PG4e7f3H6PJPxBVSsWDF++Xk7zWpU\npFwhdz6c8QEXTv3N8B5teG/AY7w2YpiCVm55xhgmTJrC0cR07i0ZQD5vLypVq8bTTz99xfbnz5+n\nbIF/nqWW8/PhQkxstuuoUbsu3x2OwpGeTlq6xYI/oqheu/YV2/oFFOZQ9D8LdxyKTsS/cO5vx+dw\nOHjkoa7Uq1GN1vc2oFHd2kRFRWXpGsYYFvywiKFvT6VKtwFMn/WFgvY2oJ7tDSp9V1lemvo/ipUu\nC0DonI+IPnuKjk88y+gnHuDM6X/PrT179izVqtfg/sf7UfGeuoTNn413WhLLf1zqjPLlNrNp0yZm\nTptKWpqDXv2ep23btjn+HuHh4YSHhxMYGMhTTz111X2Yt2/fzgNtWzGnTWXKFsrHa5sOk696Q+bO\n+yZb75+YmEiXju2J2LUTg6FStWos+nH5FReQOXDgAM0aNaRdqYwNEpYfiyZ88xYqVqyYrRqyasrk\nySz5cArz21fB09WF4RsOkXR3ff731bxcrUOcRwOksqldh44UvKsKXfq+REpSIu/0f4xmnbqTv5Af\nK+d8QMTuXf9qHxISwnvTP2Lw1Iy5ho7UVJ5tUY3TpyL17EWyZfPmzTzYri0j6gTj6erCWz8fY9aX\n8+jYsWOu15KQkMDChQtZHx7O8tBFxCUk0L5dO2Z+Oitbez+vXr2aFcuW4RcQQLt27ShQoABly5a9\n5mjcEydOsGDBAgC6d+9OyZIlb/r9b1avxx+lVuRunq6eMZDpl8hoXv41mh2/a7W4O8XVwlbzbG/Q\nJx99SKs2bdkaFkrUmVN4enmz/5dNRGxZz6KFP1zW3tPTk4TYGCzLwhhDUkIc6elpmlsr2fbR9PcZ\nXrskz9TMWKjCx92VDyZPyPWwjY2NpVmjBvinxFI0nwdx8QksXracRo0aZeu6s2fN4s1hL9OrUhF2\nx6TwxazP2LJj53WnvZQsWZLBgwdn672zq0LlKoT9vJ7Hq6bj5uLCj0fOUeHuKk6tSW4N6tlmQXJy\nMnv27MHDw4MDBw4QExND8+bNKVOmzGVtk5KSaHhvYwoWL0O5GnXYGPotHdq0YuqUyblfuNxWHu/R\njbpRe+ldI6P3FHroFHMu+Fxx4JCdJkyYwNY5HzC7TWWMMYTsP8knkYbNO3Zd/+RrCA4K5OvW5akZ\n6AvAEyv20uGl1+jXr19OlG2rpKQkOrVry5F9e/DxcCPFw4dV4euvOU9fbi/q2eYAT09PatWqBUDV\nqtde19jLy4v169YyecoUjh3/k2GDXqR37965Uabc5no/15+eXbuQ38MVT1cXRm45yqSZH+d6HadO\n/k11P6+Lc1lrFPHl9K9/ZPu6cQkJFM//z/Ph4vnciY3N/oCr3ODl5cXy1WvYtWsXKSkp3HPPPXh7\ne9/UtUJCQnhv3BhSUlPo1fdZBg4anCfnDUsG9Wxz2JEjR1iyZAkeHh5069YNf39/Z5ckt6EVK1Yw\nfdKEjAFSzw2gR48euV7D0qVLGdjrCb5/oBpFfTwZuPYAPvc0ZfYXX2bruk8/9ijndmxgdIPSHDgX\nz4vrDrF20+br/oF7OwkLC+PxHg/x3D2F8HZz4bOIGAaPHMMLAwc6uzS5Dg2QygU7d+6kddu21Gra\nhsT4OE7sj2Db1i0EBQU5uzQRW0ydPJlRb75BUkoKndq3439fzcv2AMDExEQGvziAFcuW4e/vx3tT\n/0vr1q1zqOK84anHH8V7/090qJCxitTuyHiWRPuxdcduJ1cm16OwzQVt7m9Pmfr30bJrxsLkX0wc\nRZXi/kyaNNHJlYnYx7Is0tPTs7x4g1zdc8/0JW7bYnpUzVjTesOxGLZZwazduMXJlcn1aFGLm+Bw\nONi3bx9HjhzhRv5wOHv2DCXL/TOvr3jZipw+e8bOEkWczhhjW9A6HA4G9n8eX598+BXIz5hRb97Q\n72JeN3DwEJYfTebriCi+3xvFrIgLvDZqrLPLkmxQ2F7FmTNnqNegIS3b3E/tevV5uOejOByOa57T\ntk0bFn06jZjz5zh1/Agr58+mnQ2LDYjcKcaPG8euFYvY8VgD1veow3effcjsWbOcXZbtqlSpwoYt\nWyl638N41+/M4h9XZGvhkrS0NGbNmsUrLw9hzpw5pKen52C1ciN0G/kqHnn0UWJdfHhsyJukJicx\nZXBv+jzanYHXGKCQkpLCCy8O5KuvvsLDw51hw4YxYvhwjSAUyRQaGsryJaEEBAYy8KVB191usln9\nurwSbGhRKuN26vy9f7PGpxzzQi6f2y5XZlkWDz/UlX3bw7nH35VfzqZRv1V7Pp+bvYFscmW6jZxF\nERG/cW/7/2CMwcPLmzot27Pr14hrnuPh4cEnH39EfFws58+d49URIxS0IplmfDCdgb2fJHhfOCeW\nfEXDOrU4f/78Nc8pHBjIvnPxF1/vPZ9IQGCg3aWSmJjIwP7PUePuCrRt0ZRdu7I3d9iZ9u7dS/ia\n1bx5b2G6VQ1gVOPCLFr4A0eOHHF2aXcUhe1VVKxYkZ3hK4GMpRYjNq6h8t25u86qyO1k/NgxfN2u\nCv1rl2Fai7upWcCF+fPnX/Occe9NZPLukwxYc4Deq/bx/fFYXn39Tdtr7fPk4xxd+yPT6xSmk/t5\n7m95H8ePZ28Xo6xISkri2LFjOXKtuLg4CubzwMM14+vey82FAl6exMXF5cj15cZoUYurmP7+NO5r\n1Zrd61eREB9H1cqVrnkLWUSuLSExicLe/+wOFODpRmJi4jXPqVq1Kj/v/pXFixfj5ubGRw89dN1b\nz9mVlpZGyKLFHHm2OT7ubtQKKsiG0wmEhYXRp08fW98boOfDPVjw3QIMBh9vT1asXkuDBg1u+nrV\nqlUjxcWThfujaVA8H+tPxONVoGCub9Jwp1PYXkXJkiX5dddOdu/ejaenJzVr1rzu2qwiAunp6bz3\nznhCQ77Dt2BBRo1/l0aNGtGjR3cGrF3JG/VKcfBcPD/8cYYNN7Cec3BwMAMGDMiFyjO4uLjg7upG\ndJIDH/eMr8joJAdeXl62v/fHH3/MkoXfM6NDWYrmd2dexFkeuL81Z6JvfgWtfPnysXrdep7p9RQr\nf95PlSpVWbV4Lh4eHtc/WXKMBkiJSI56/dURhH39P0bXL8WxmETe2HqUtRs3U758eV4bNpQVPy7F\n39+fd6ZMo3Hjxs4u94reGjuGrz6cTt/Kgew6l8iOeFe27tyVrZ2MbkSXLl1I/n0dz9crCkCyI51H\nvjtAWrq+S/MKLWohIrmiTPGifNemPBX9M4LpjQ0H8X/gSUaNGuXkym6cZVnMnz+fdatXElisOIOH\nvIyfn1+OXT8yMpLTp09Trlw5fHx8Lh4fMmQIP3w+k8n3l8HNxbDrZDzvbfqb+ORrTzuUW4c2IhCR\nXOHu5kZcatrF13GOdIpdZeP5W5Uxhp49e9KzZ88cv/Y7b7/Fu+PHE1DAm4Q0WLJsBXXr1gXg3Xff\n5bt5XzJg6WFK+noScTqBsW+/k+M1SO5Tz1ZEbkhqairffvstkZGRNG7cmIYNG16x3UcffsiEUSN5\nqUYxjsel8PUf59m2c5dTNnO/1WzZsoUuHdrybvMg/L3d2Hgshm+Pwp/H/7rYxuFwMGXKFE6cOEGP\nHj1o0qSJEyuWrNJtZBG5aQ6Hg/atW5J47BA1AvKx8I8zvD1pKr2usm1kSEhIxgApPz+GDB12xT2f\n70SzZ89m3sSRvFirEJBxu7rbgkPExMbe9FZ8cmtR2IrITVu4cCHjBz3Pis41cHUx7D8XR+uQnUTH\nxmnhlizYuHEj3R/swIQWQfh6uvLzX3HMPpDCiZOnnF2a5BA9s7VJXFwc0dHRFCtWTLueyG0rKiqK\n8oXy4eqS8R1SvpAPicnJpKamagpJFjRu3Jhezz7PwBkfUKxQPk7FprBoyVJnlyW5QD3bbJg4aRKj\nRo3G28eHAH9/lv+4lLJlyzq7LJEcd+DAARrXq8vsNndTK7Ag7/18hN/ci7Bm42Znl5YnHT58mMjI\nSCpXrpyjo5zF+XQbOYeFh4fz8KOPM/KzEAKCirF07kcc2LyabVv05SO3p2XLlvHCs32JPBtF00YN\nmTv/WwJzYZ1ikbxEYXsdcXFxuLu743mDUxSmTp3Kyu0RPDE0Y4/J5MREnr2vGinJyXaWKXJHSElJ\nweFwkC9fPmeXIpIl2vXnKmJjY2nf8QEKFy6Cb8GCDBs+4oY2py5TpgyHfv2FlOQkAH7btoHgUqXt\nLlfktmZZFkOHDMY3f378CxWkS8f2xMfHX/9EkVvcHT9AatCQl0l08eLT8D0kxMUy8YXHqFa1Ck8+\n+eQ1z+vcuTMLQr5n5CNtKRpchj/3RbDoB+2xKZIdn3/+OasWfM2+3k3I7+HG8z/tYdiQQcz4+FNn\nlyaSLXd8z3bjxk3c/9gzuLm74+vnT5NOD7Nh46brnufi4sJXX8wl5Jt5jBnxMr9HRGjyuchVpKam\nMuzlIdx9V2nqVKvCkiVLrthuc/hanqgQgL93xpZw/asXZ9P68FyuViTn3fFhW7JECQ7u/hnIuIX1\nR8QvlAq+sZVujDE0aNCAjh07UrRoUTvLFLHNvHnzqF6xPBVKB/P6qyNIS0u7/klZ9OqwV9i2cD5z\nmpTktfLe9Hn8UbZu3XpZu+KlSvPz2YSLj3K2R16geAmtPCV53x0/QGrPnj3c17IVZavWJDb6PO6k\nsX7d2hzb3cOyLMLDwzl58iR16tShQoUKOXJdkZywatUqnurRjU9aViTA253BGw7T7om+jBo77ort\n09LSsCwLN7esPYG6q0QxFrQud3Fzgne2/IFp0Y3x7/x73d+YmBiaNWpA/qQYCnm5s/NMHKvDN1Cp\nUqWb+4C3uLi4OMLCwnA4HLRq1YqAgABnlyTZpEUtrqJKlSpE/LqbtWvX4u3tTZs2bXJs30rLsni6\ndx/Whq8nuHwl9gx4gc8+/YSuXbvmyPVFsmthyAIGVC9K02B/AMY3LMOgb7+5LGwty2LkiOFMnTaN\ndMuiW5cuzJr7xQ3/ruT38SEyPvli2J5MdFDhCn/Q+vr6svnnHYSFhZGcnMzn991HkSJFsvkpb01R\nUVHcW78uBdLj8XB1YVBsGhs2b9Vc/dvUHd+ztdPq1avp068/o+cuwdPbm8N7djPxhSc4fy5KS9zJ\nLWH40KEkrPmet5tm3HEJPXSKmScNG3/e8a92s2fN4r+jX+W79lXJ7+FG31X7qNSuK5OmvX/ZNSMj\nI3E4HJQoUeLi/+cLFixgYL++PFM5iL8SUll1KpFtO3cTFBRk/4e8RQ15aSAHwubzbK2M3mzI3vPE\nlapHyKLQ656blJTE3LlzOXXqFM2aNaN58+Z2lys3SD1bJzhx4gRlKlXDM3OB8bsq1yAxMYGEhIR/\n7WEp4iwvDBxIwzn/I3ndfgI8XZm19zRffLPgsnbhq1fRp1IggT4Z89Bfqlmc135a9a82qampPP5w\nD1atDMPVxYXqNWuycOkyChQoQPfu3QkKCiJ00UJK+fqy7bnn7+igBThx/CgVCv3zFVzR34PQE8ev\ne15ycjL3NW1M2pmjlM7vwowpExnz7gT69XvOznIlm+74AVJ2qlu3LhFb13Pij/0ArPxmDuXKV1DQ\nyi0jODiYbTt3UaLzU1jNurJ05Wruv//+y9oVLVmSXVEJF1/vOh1L0WLF/9Vm8qSJRP22jb1P38u+\npxsRGH2CkcOHXvx5s2bNmDh5CqNGjb7jgxagWYtWrDyeTFxKGsmOdJb+mUiT5vdd97xFixaReOoY\nIxsV5okaAbzZuDDDXnnlhtYHEOdRz9ZGVatW5b/TptKv138AKFGyJEsWL3JyVSL/VqJECUaNGn3N\nNkOHj6BJSAjdftxDfg9XNp+M4af1If9qs3PrFh4pF4CXW8aGHI9VLMK727bZVXae1/+FF9i3bw9P\nfzoLY6BLp068Nf76G8VHR0cT6ON68RZ9kI87CYmJpKenazOUW5ie2eYCh8NBbGwshQoV0rNaybNi\nY2NZunQpKSkptG3b9rLpbq8OG8aR5d/yUcu7McYwavMfnCtbhzlfz3dSxXlDSkoK6enpNzzY7ODB\ngzSoW5sXahWinJ8X3+y7QFrxqqxYvcbmSuVGaG1kEbFVbGwsbVo0I+H033i6uhLn6sWajZs0B90G\nq1evZkC/Zzhz9izNmjZl9twvtXvQLUJhKyK2S01NZevWrTgcDurXr6+NBOSOo40IRG5R58+f55GH\n/kOJIoWpVbUy69evd3ZJN83d3Z0mTZrQokULBa3IJRS2Ik72WI9ueB3eRdiDVXmlrBddO3Xkzz//\ndHZZIpKDFLYiTpSamsqqteuY1LQCwb7edCofRKvShVmz5s4b7LJx40ZaN21MvRpVGTtqlC1rNIs4\ni8JWxInc3NzwdHfnr7iMfZEty+J4bDK+vr5Orix3/f7773Tp2J6HfWJ5u3J+ls/9hNeGD3N2WSI5\nRgOkRJzsv9OmMeXtMfQsX5hd5xK54BvEmg2b8PT0dHZpuWbs2LGcDZ3DW00ylo3843w8nZfv51jk\naSdXJpI1Wq5R5BY1cNAgKletyrp1a3mgWHF69+59RwUtgJeXFzGp6RdfRyc78HD3cGJFIjlLPVsR\ncbqTJ09S756aPFS6AKXzezI9IpJX33qHZ/v1c3ZpIlmiebYicks7fvw406ZM5sK5KDp17Ubnzp2d\nXZJIlilsRUREbKZFLURERJxEYSsiImIzha2IiIjNFLYiIiI2U9iKiIjYTGErIiJiM4WtiIiIzRS2\nIiIiNlPYioiI2ExhKyIiYjOFrYiIiM0UtiIiIjZT2IrcBtLT06/fSEScRmErkoetWrWKUsWCcHd3\no17NGqxatYrBA1/k+Wf6sm7dOmeXJyKZFLYiedTx48fp2a0rMxqX4swLrWnqGU/nDu3x2LaMUgc2\n0KNzJ0JDQ51dpoigsL0tfP3115S+qywBRYrQq09fEhMTnV2S5ILt27dTv0QAzYMDcHNxIcWRxvO1\nghnZqDwDapdhWrPyTBg32tlliggK2zxv/fr1vDR4CL1HTWXslz+y98hfDBo8xNllSS4IDAzkYFQs\nSY40AM4kphDg5XHx535e7iQnJzurPBG5hJuzC5DsWbZsGc3/8xgVa9YB4JGXRjJxwKNOrkpyQ+PG\njWnQohWtf1hD3SBfVp24wOoT0dxVMB+FvNwZuvEwvQYPd3aZIoLCNs/z8/PjTMSWi68jjx+hYKFC\nTqxIcosxhrnz5rN06VKOHj3Kc/XqcerUKSaMG01SUhy9hwxn0JCXnV2miADGsix7LmyMZde15R/R\n0dHUq9+AoLJ34x9UnA1LQ/hy7hw6dOjg7NJERO44xhgsyzKXHVfY5n0XLlzgiy++IDY2lnbt2lGr\nVi1nlyQickdS2IqIiNjsamGr0cgiIiI2U9iKiIjYTGErIiJiM039ERG5jvT0dJYsWcLx48epV68e\n9evXd3ZJksdogJSIyDVYlsWjPbrxy4Y1VPDzYPvf8YwZ/x7P9+/v7NLkFqTRyCIiNyE8PJwnu3dm\nUotAPFxdOBmbwpBVf3H+QgweHh7Xv4DcUTQaWUTkJpw+fZqSBb3wcM34uiya3x1XY4iJiXFyZZKX\nKGxFRK6hfv367DkVx6+R8TjSLRbujyY4uCQBAQHOLk3yEIWtiMg1lCpVim+++56P9ibTY8FBItID\nWbI8DGMuu1MoclV6ZisicoPS09NxcVEfRa5Oz2xFRLJJQSs3S//niIiI2ExhKyIiYjOtICUi15Wa\nmspnn33GHwcPUrtuXXr27KkBQiJZoAFSInJN6enpdO7QjrhDv9G8aH4WHommeeduvD9jprNLE7nl\naAUpEbkpW7du5YnOHdncozburi5EJ6VSfe4m/jh6nMKFCzu7PJFbikYji8hNiYuLI7CAN+6ZKygV\n9HTDx9OD+Ph4J1cmknfoma2IXFPdunU5FpfCp78ep1WpAObsPUmxksEEBwc7uzSRPEM9WxG5poIF\nCxK2Zh1LkwvSZcUB/vQvx9KwVZpzKpIFemYrIiKSQ/TMVkRExEkUtiIiIjZT2IqIiNhMYSsiImIz\nha2IiIjNFLYiIiI2U9iKiIjYTGErIiJiM4WtiIiIzRS2IiIiNlPYioiI2ExhKyIiYjOFrYiIiM0U\ntiIiIjZT2IqIiNhMYSsiImIzha2IiIjNFLYiIiI2U9iKiIjYTGErIiJiM4WtiIiIzRS2IiIiNlPY\nioiI2ExhKyIiYjOFrYiIiM0UtiIiIjZT2IqIiNhMYSsiImIzha2IiIjNFLYiIiI2U9iKiIjYTGEr\nIiJiM4WtiIiIzRS2IiIiNlPYioiI2ExhKyIiYjOFrYiIiM3c7Ly4McbOy4uIiOQJxrIsZ9cgIiJy\nW9NtZBEREZspbEVERGymsBUREbGZwlbEJsaYkcaY34wxu40xO4wx9XL4+s2NMaE3ejwH3q+zMabS\nJa/XGGNq5/T7iNyObB2NLHKnMsY0BDoA91iW5TDG+AMeNrzV1UY42jHysQuwBNhnw7VFbmvq2YrY\noxhw1rIsB4BlWecsy4oEMMbUNsasNcZsN8YsM8YEZR5fY4yZZozZaYz51RhTN/N4PWPMJmPML8aY\nDcaYCjdahDEmnzFmljFmS+b5nTKPP2WMCcl8//3GmPcuOadP5rEtxphPjDHTjTGNgAeBCZm99LKZ\nzXsYY7YaY/YZYxrnxH84kduRwlbEHmFAqcwQmmGMaQZgjHEDpgMPWZZVD/gcGH/Jed6WZdUCBmT+\nDGAv0MSyrDrAKOCdLNQxElhtWVZDoCUwyRjjnfmzmkB3oAbwsDGmhDGmGPA6UB9oDFQCLMuyNgOL\ngaGWZdW2LOtw5jVcLctqAAwGRmehLpE7im4ji9jAsqz4zOeZTckIufnGmBHAL0A1YKXJWPXFBfj7\nklPnZZ6/3hhTwBjjC/gCczN7tBZZ+71tC3QyxgzNfO0BlMr899WWZcUBGGN+B0oDRYC1lmVdyDy+\nALhWT/r7zH/+knm+iFyBwlbEJlbGijHhQLgxJgJ4EtgB/GZZ1tVuuf7/z1otYBzwk2VZXY0xpYE1\nWSjDkNGLPvivgxnPlJMvOZTOP98HWVn67f9dIw19n4hclW4ji9jAGFPRGFP+kkP3AEeB/UCRzLDD\nGONmjKlySbuHM483AS5YlhULFAT+yvx5ryyWsgIYeEld91yn/XagmTGmYOYt74cu+VksGb3sq9H6\nrCJXobAVsUd+YE7m1J9dQGVgtGVZqUA34L3M4zuBRpecl2SM2QHMBHpnHpsAvGuM+YWs/86OA9wz\nB1z9Boy9SjsLwLKsv8l4hrwNWA/8CVzIbDMfGJo50KosV+6Fi8gVaG1kkVuEMWYN8LJlWTucXIdP\n5jNnV+AHYJZlWYucWZNIXqeercit41b5y3e0MWYnEAEcVtCKZJ96tiIiIjZTz1ZERMRmClsRERGb\nKWxFRERsprAVERGxmcJWRETEZgpbERERm/0fblQhEznlSdgAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_lfda, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Relative Components Analysis\n", + "\n", + "RCA is another one of the older algorithms.\n", + "It learns a full rank Mahalanobis distance metric based on a weighted sum of in-class covariance matrices. It applies a global linear transformation to assign large weights to relevant dimensions and low weights to irrelevant dimensions. Those relevant dimensions are estimated using “chunklets”, subsets of points that are known to belong to the same class.\n", + "\n", + "Link to paper: [RCA](https://www.aaai.org/Papers/ICML/2003/ICML03-005.pdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2)\n", + "X_rca = rca.fit_transform(X, Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdcVuX/x/HXYe8p4EARt7hyb9ziKPfObTa0LEu/ZZkz\nNbcNM83UHKVlmVsT3BsniDMDRWTI3vv8/sCsfi4Ebs4NfJ6PB4+HN5xzXe+7lM99nXOd61JUVUUI\nIYQQumOgdQAhhBCiuJNiK4QQQuiYFFshhBBCx6TYCiGEEDomxVYIIYTQMSm2QgghhI4Z6aphRVHk\nmSIhhBAljqqqyv//ns6K7cMOddm8EEIIoVcU5bE6C8hlZCGEEELnpNgKIYQQOibFVgghhNAxKbZC\nCCGEjkmxFUIIIXRMiq0QQgihY1JshRBCCB2TYiuEEELomBRbIYQQQsek2ApRAiUkJPDmuPE0btqM\ngYMHExISonUkIYo1KbZClDCqqtKzdx+uB4fT7fXJZFo549mmLUlJSVpHE6LY0unayEII/RMaGsqF\nCxf4ev95DI2MqNGgKdcvnOL06dN06NBB63hCFEsyshWihDE2NiYrK5PMjAwgZ6SbnpKCkZF89hZC\nVxRd7cyjKIoqu/4IoZ8GvzqUa3/dpXnX3lz1PU5yRAgnjh3FxMRE62hCFGmKojxxiz0Z2QpRAm34\nYR1D+/Uk6upZWtWrySEfbym0QuiQjGyFEEKIAiIjWyGEEEIjUmyFEEIIHZNiK4QQQuiYFFtRJMTF\nxREdHa11DCGEyBMptkKvZWZmMnTYcMqULUcFt4q80rMXKSkpWscSQogXIsVW6LVFixcTcPsO3xy4\nyAqfS0SlZPLptOlaxxJCiBcixVbotVOnz9C6x0BMzc0xMjahbe8hnDl7VutYOqWqKqGhoURGRmod\nRQhRQKTYCr3mXrEi18+f4u9ntq+fP42bm5vGqXQnPj6edh06UrNWbSpWqsTwkaPIysrSOpYQIp9k\nUQuh16Kjo2ndpi0Ym2JkbEJCZDjHjh6hXLlyWkfTibGvv8HtiDjGTJ1PenoaS94bydihg5gwYYLW\n0YQQufC0RS1k5XGh1xwcHDh39gxHjhwhOzub1q1bY21trXUsnTl3/jy93vkUA0NDzMwtaN61D2fP\nndM6lhAin6TYCr1nbm5Oly5dtI5RKNzd3Qk4e4xq9RqSnZ3Ndd8TtGtST+tYQoh8ksvIQuiRu3fv\n4tmmLdalXEhNSsTexoqD3gewsrLSOpoQIheedhlZiq0QeiYhIYFTp05hYmJCy5YtMTY21jqSECKX\npNgKIYQQOia7/gghhBAakWIrhBBC6JgUWyGEEELHpNgKIYQQOibFVgghhNAxKbZCCCGEjkmxFUII\nIXRMiq0QQgihY1JshRBCCB2TYiuEEELomBRbIYQQQsek2AohhBA6JsVWCCGE0DEptkIIIYSOSbEV\nQgghdEyKrRBCCKFjUmyFEEIIHZNiK4QQQuiYFFshhBBCx6TYCiGEEDomxVYIIYTQMSm2QgghhI5J\nsRVCCCF0TIqtEEIIoWNSbIUQQggdk2IrhBBC6JgUW6EXYmNjCQ8PR1VVraMIIUSBk2IrNJWdnc2b\nb42jnGt5qteoSZt27YmNjdU6lhBCFCgptkJTq1ev5sips3y59yzLD1zExLEM7058X+tYQghRoKTY\nigJx48YNmjRrjq2dPY2bNuPatWu5Ou/0WV+ad+2NhZU1BoaGtOszlHPnzuk4rRBCFC4ptiLfUlJS\n6NTZi9rtXmbBb0eo27Ennb26kJyc/NxzK7tX5Pr5U2RnZwNw1fckFSu6PXacqqpER0eTkZFR0PEL\nRFpaGnPmzmXIq0P5bM4c0tLStI4khNAjUmxFvl27dg1jc0s6DRiBjb0DHfsNw8zKOlej24kTJ6Im\nxTJj+MssGDeYI7+u58tly/5zTFBQELXr1qNCxYrY2dnz7cqVunoreaKqKr369GX7gcNYV63PTp9j\nvNKz16MPEEIIYaR1AFH02dvbExMZQXJiAhZW1qQkJRL1IAI7O7vnnmthYcHRw4c4fvw4qamptGjR\nAltb2/8c03/gIOq1f5lPRrxFeHAQ094YQMMGDWjcuLGu3tILuXHjBhcvXWLhtmMYGRvTuntfJvf2\n5Nq1a9SqVUvreEIIPSDFVuSbu7s7gwYOZO7YftRu3pYrp48woF8/KleunKvzjY2Nadeu3RN/lp2d\nzcXz55jw1U8oikLpCu681LoDvr6+elNs09PTMTYxxdAo55+TgaEhJmZmpKena5ys5Lly5Qo7duzA\nwsKCYcOG4ejoqHUkIQC5jCwKyPKvv2Lh3Nk0qVKWhXNmseKb5QXSroGBAS6ly3Djki8AGelpBF71\nw9XVtUDaLwgeHh442tuxcdEMblzyZdOSWdhaWcqotpAdOnQIzzZtOXktiG0+x2jQsBERERFaxxIC\nAEVXiwgoiqLKAgWiIPzxxx8MGjyEGg2acD/wNk0aNWDzj5swMNCfz4qRkZG8P2kyAQEBeHh4sGTR\nQpycnAq0j+zsbKZ+Oo01a9diaGjIxPfe5YP330dRlALtp6hq0qw5LfuNokmHbgCsmTuFFh6VmDlz\npsbJREmiKAqqqj72j1IuIwu917lzZy6cP8fZs2dxdnamTZs2eldgSpUqxfp1a3Xax6LFi/lt117+\nt2IzGWmpfDVlPKVdXBg6dKhO+y0qYuPicHb9Zya7s6sbMbJAitATMrIVooho6dkGz0GvU7d5GwCO\n7fqVCP+TbP15i8bJ9MP7H0zi8OlzjPpkPvExkXz1vzf5Yc1qvLy8tI4mShAZ2QpRxDnY2RERcvfR\n64iQOzjY22uYSL98Pm8u7018n1kje2Bmbs5ns2ZIoRV6Q0a2QhQRFy9epEOnTjTr3JOM9DT8jvtw\n+tRJ3N3dtY4mhHjoaSNb/ZlhIvLtxo0btGjtiZOzC23atiMwMFDrSKIA1a9fn7OnT+NZtyqdm9bj\n/DlfKbRCFBEysi0mkpKSqFHTg46Dx9KovRen9m3n1K6fuXrFH1NTU63jCSFEiSAj22LO398fC1t7\nOg8aiYNzGboPf5PMbJVbt25pHU0IIUo8KbbFhI2NDTGREaSnpgCQnJhAXEw0NjY2GicTQgghl5GL\nCVVVGTZ8BOf9A/Bo4onfCR86tmldYCs5CSGEeL6nXUaWYluMZGdn8+OPP3L9+nXq1KnDgAED9G7x\nByGEKM6k2IpiS1VVHjx4gLGxMfby3KkQQkMyQUoUS4mJiXTu0pUqVavhWr4Cw0eOIisrS+tYQgjx\nH1JsRZH24UdTSDe25BvvS3z9x3kuXbvJl19+qXUsIYT4Dym2okg76+tL295DMDQywszcghbd+nLW\n91yh51izdi3NW7ailWcbfvvtt0LvXwih36TYiiLNvaI7V8+dBHLu3d44fwp394qFmmH9+vV8OmMW\nbQa/QbM+I3hj3Hj27NlTqBmEEPpNJkgJvfPnn3+ybt06MrOyeHXIEOrUqfPUY4ODg2nt2QYbp9Kk\npSRjaWrMkUMHsba2LrS87Tp0pOErQ2jYpjMAh37fTOyN8/y8+adCyyCE0A+y648oEq5du0YrT09a\ndOuLkbEJbdq2Y8/uXTRr1uyJx5cvXx5/v8ucPHkSIyMjWrVqVejLUxobG5OSlPTodUpiQrFfIvPK\nlStcv36datWqUbduXa3jCKH3ZGQr9MqoMa+RYlGKnqPfBnJGiSEXjrJ75w6Nkz2dt7c3AwcPofuI\ncWRkpLN/4yr+2L+PRo0avXBbGRkZzJg5iwM+Prg4OzN/3lw8PDx0kDrvln3xBZ/NmUe1ug246XeB\nDz+czOQPPtA6lhB6QR79EUVCYmIi9k4uj17bO7mQkJioYaLn69ixI9u3/YZRbAjWqdH4eB/IU6EF\neGvcePYcOorX6Ik4VKtPm3btuH//fgEnzrvQ0FCmz5jB9HXbeWfhKmas38mcz+Zw7949raMJodfk\nMrLQK/379mHi5A8pXcEdI2MTfvlqHh9MGK91rOdq1aoVrVq1ylcb2dnZbNy4ga/2+WJlY4dHo+bc\nuebH7t27GTt2bAElzZ/79+/jXKYcpcqUA8DRpQwu5coTEhKCq6urxumE0F9SbIVeGTBgADGxsSyZ\n/wlZWVm8OXYM48aN0zpWoVAUBUMDQzLSUh99Ly01BSMj/flnWqVKFaIfhON/+ih1mnkScPYEkeH3\nqVatmtbRhNBrcs9WCD0ydeqnbP7tdzoNGk3wrWv4HzvApYsX9GoZyiNHjtC3f38yM7MwMjTg5y1b\naN++vdaxhNALsjayEEWAqqqs/v57fHwO4uzszMdTPqJ06dJax3pMVlYWkZGRlCpVCkNDQ63jCKE3\npNgKIYQQOiazkYUQQgiNSLEVQgghdEyKrRBCCKFjUmyLiMjISA4dOsTVq1e1jiKEEOIF6c8DfOI/\nYmJiWLFiBdExMbiWK8fsOXMoV7EKoXcDeXXIEJYtXaJ1RCGEELkkxVYPxcXF0bhpM1xr1MWlQiWW\nf/wJExZ8y0st25GcEM+MEa/Q45WX5dlGIYQoIuQysh7atGkTLu7VeWPmUl4e/gbpaWnUbd4GAAtr\nG6q91Jg///xT45RCCCFyS4qtHkpMTMTu4WL8RsYmOLtW4OiurQBER4QScPZ4nrc1y87OZtkXX9Cx\nsxcDBg6We8BCCFEI5DKyHurWrRufz29LzUYtKONWiVIuZdi8dDa7135NXHQU06Z9+tT9XZ9n+vQZ\nbNm2nR6vvUd4cCCebdty3tcXNze3An4Xhe/8+fP4+vpSvnx5unXrhqI89lw5kLNn7qFDh7C3t6dP\nnz7Ffu9ZIYT2ZAUpPeXt7c0H//uQ2JgYunbpwufz5hIeHo6TkxMODg55btfZpTRTVv1C6QruAKyd\n9zGdm9Rl0qRJBRVdE6u++45Ppk7jpVbt+SvgEs0aN2Tj+h8eK7j79+9n8JBXadSuC+H3gjBTsjly\n6CBmZmYaJRdCFCdPW0FKRrZ6qmPHjly+cP4/37Ozs8t3uw//Ijx6nZ2d9dQRYFGRnp7OxPcmMvvH\nvZSp4E56WirTXu3KsWPH8PT0/M+xb094lzc/+5I6zTxRVZXF745gw4YNerOFnRCieJJ7tiXM22+P\nZ/mUcZw+sIvt33/F5WPeDBw4UOtY+RIfH4+BkSFlHo7WTUzNKOdelYiIiMeOjXwQQYWqNQEICbxF\nfHwcGzduxN/f/6nth4SE8NGUKYx/+x18fHx08yaEEMWaFNsSZuonnzD5vXf468Q+TBIjOHn8eJHf\n9NvR0ZFy5VzZs3EV2VlZXDt/musXz9KoUaPHjm3brj1bv13En/4XmDWmLx6NmlOqen3atG3H2bNn\nHzv+/v37NGrcBP/gB8SZ2DF46DB+/PHHwnhbQohiRO7ZCr2UlZXFvn37iIyMpGXLllSpUuWZx9++\nfZt+Awbid+kiTi4urFuzhi5dujx2XExMDK8OG8ahQ4fp+8b7dB/2OgDeWzcQde0cv//263+Onz17\nNsf9/2TklDkAXD13iq3LZnEt4OkjYSFEySX3bEWRkZmZycs9evJXcAhl3Sox8f0P+OnHTXh5eT31\nnMqVK3Px/DkyMzMxMnr6X2t7e3v27NpFj169sXdyfvR9W0cngpOTHzs+KTkZK7t/JqTZODiSkvL4\ncUII8SxSbIXe2bp1K8FhD5i25ncMjYwI8D3J2Dfe5G5Q4HPPfVah/bfBAwcw6aOPcXAug4GhEb8u\nX8C0Tz567Li+ffrg1bUbbjVq4ehShp+WzGJQEb/HLYQofFJshd4JDQ3FrUZtDB8Wzsq16hF6P4RV\nq1YxcuRITExM8t3H4MGDiU9I4MtlM1FVmDxxAqNHjXrsuMaNG/PTpo18On0GiYlJ9OndixnTp+W7\nfyFEySL3bIXeOX36NK/07MVH326hdAV3Nn85j/NH/qB0ufLYmBri/cf+XI9ghRCiMD3tnq0UW6GX\nVn//Pe+++x6pqSm416jD+0tWY+tQilmjevLFws+fOPlJCCG09rRiK4/+CL302pgx3A+5h7GxCTN/\n2I69kwsGhoaUKlOOuLg4reMJIcQLkZFtPqWmpmJsbIyhoaHWUYqllq09sXOrTtehr3Pr8jk2LpqG\n36VLlCtX7oXbyszMZNOmTdy5c4dGjRrRrVs3HSQWQpRkMrItYNHR0XTo1BkbW1usrK1ZtHix1pGK\nlZiYGF7u0ZNzZ89yePtmPhnYiSNbVrNn1648Fdrs7Gx69enLwq++5eKdB7wx/h1mzZ6tg+RCCPE4\nGdnmUd/+A0hQzBg+eRbREWHMHzeY1d9+Q9euXbWOViz07N2HJENzXp04jftBt1k6cTS7d26nadOm\neWrv2LFjDBv1GrN/3IeRsTGxkRG837M1kQ8isLS0LOD0QoiSSka2BezE8eO8MnI8hkZGOJV1pVmX\n3hw/flzrWMXGQR8fBoz/CDMLSyp51KV5114cPnw4z+3FxsbiVLYcRsbGQM4iFmbm5iQkJBRQYiGE\neDoptnnkUqYMtwMuATmXKO9e96ds2bIapyo+HBwduXf7OgCqqnL/9g1KlSqV5/aaNm1K0PUATu7f\nTlzUA377djEVKlTAxcWloCILIcRTyWXkPDp58iQv9+hB7SatiQy9h62FKYd8vGVf1AKyc+dORowa\nTZMO3Qi7G4gpmfned9bX15fXXn+Du3fv0LBBI35YtyZP93+f59atW9y4cYOqVatSvXr1Am9fXwQH\nB5OYmEiVKlUwfnjFQIiSTp6z1YE7d+5w9OhRrK2t6datW4GsbCT+4e/vz6FDh3BwcKB///6Ymppq\nHem5vv12JR9PnUpljzr8de0K06d9yoR33tE6VoHKzs5m9Gtj2b59OxZW1thaW3Fg/z6dfHARoqiR\nYlvMZGVlcevWLYyMjKhcuXKR3wC+OAgPD6dq9erM2rAbF1c3Hty/x7Sh3Qi44l+sCtHatWtZ+OU3\nTF6+CVMzc35dsYj08CB27tiudTQhNCcTpIqRuLg4Wrb2pF3HzjRv1Zqu3V8mNTVV61glXkhICM5l\nXHFxdQPAqawrpcu7ERwcrHGygnX5sh8vtfHCzNwCRVFo0a0P/leuaB1LCL0mxbYI+vCjKViVdmPR\n9uMs2XGS2LRsPp8/H8i5Xzhr1ixmz57NX3/9pXHSkqVy5cpEPwjj2vlTANy45EvE/WCqVq2qcbKC\nVb16NQJOHSYzIx2Ai0cPUK1qNY1TCaHfZDX3IsjP35/2w97GwMAAAwMDGnfszuVzh7l8+TLtOnSg\nRdc+ZGdns6xpU44dOYKHh4fWkQtNZmYmVx6OsurUqVOoK3vZ2try8+bNDBg0CCMjYzLS0/hx0yYc\nHR1fuC1VVfl25UrWrF2HsbEx/5v0Ab169dJB6hc3duxY9h/w5sO+7bCxdyA1IY6DPt5axxJCr8k9\n2yJo9GtjCUvOYuikmajZ2Xz76bt4NqjNjVu3sHKvjdegnK3idq5bgUFMMBvXr9c4ceGIi4ujY2cv\nwh9Ekp2dTQXXcuzfuwdra+tCzZGWlkZYWBilS5fO86SulatWMXf+IoZOnkVaajI/zPuYHzduoFOn\nTgWcNm9UVcXPz4+kpCTq1asnC4MI8ZBMkCpGoqKiaNehI4nJqWRkpONW3pX9e/fQq09f6nbpT6O2\nXgCc2r+DwFN/sHP773nu6+//h0VhAtbb70zgekgkoz75HIDvZrxPw+ruLF60UONkL65Fq9Z4Dn6D\nei3aAnDgl/Vk3r/J+nXrNM0lhHg2mSBVjDg6OnLu7Bm2bFrP9l9/4cihg1haWtK7V09+X7mEuzev\nEXQjgO2rv6B3r5556kNVVWbOmoWNrR0WlpaMff0NMjIyCvidFKyr16/ToK3Xo8vrDdp6cfXaNa1j\n5YmpqSkpif+sbpWcEI+pif4/+iSEeDK5Z1tEmZiY0Lhx4/9876033yQuLo5vPnoDRVGY8PZ4Ro0c\nmaf2f/jhB37YtJm5m//AzMKCFZ+8w/QZM5k757MCSK8bdWrVwtdnN/VatgNV5ZzPHprVqa11rDz5\ncPIkXh02nJgH4aSmJOG9ZS2HDx7UOpYQIo/kMrJ4oiGvDsW6yku07TUIgGvnT7Nv9WLOnj6lcbKn\nS0hIoEu37gQG3UFVVapVrcKeXTuL7P3EY8eOsX7DRoyNjRn31pvUrl00PzgIUZI87TKyjGzFE7k4\nO3Pz9o1Hr+/9eR0nJycNEz2ftbU1Rw8f4saNGyiKQvXq1TEwKLp3Slq3bk3r1q21jiGEKAAyshVP\nFB4eTpOmzShXrRZm5hb4nTrM4YMHZXQlhBDPILORxQuLiYlh27ZtpKen0717d8qXL691JPGCzpw5\nw9zP55OcnMzAAf0ZM3p0kZhZLkRRJcW2GAkNDeXGjRu4ubnh7u6udZwiIyoqitDQUNzd3YvsfdwX\n4efnR9t27enz5iRsSzmx9ev5THrvHd4pZhsjCKFP5NGfYmLbtm141KrNO5M+okGjxixZulTrSEXC\n8uXf4F6pEi/36kPFSpU4ceKE1pF0bv2GDbTrO4wO/YbSqK0Xoz9dyIqV32kdS4gSSSZIFSHJycmM\nGj2GyV9vpJJHXaLCQ5k+rDsvd+9OtWqyNu3TBAQEMG3mTD77cR9OZctz8ZgPffr14/69e4W6nGNh\nUxSF7OysR6+zszKRK8hCaEOKbQHw8/Pj+vXr1KhRg7p16+qsn7CwMMwtLankkdOHo0sZ3KrV5Pbt\n21Jsn+Hq1atUr9cQp7I595zrt+7AqulpREVF4ezsrHE63Rk9ahQtW7fG0tYOO0cntn27hOmffqx1\nLCFKJLmMnE+LliyhQycvvlj9Ax06dWbh4sU666ts2bKkp6UScDbnEmhI4J8EXg+gZs2aOuuzOKha\ntSq3/C8SGxkB5DwzbGhokKcNAoqSmjVrcsjHBzUikHvnDrN4wTxeHztW61hClEgyQSof7t+/T02P\nWszZ/AeOLmWIjgjlk0FeOt0s/NChQ/QbMABLa1tioyL56qsvGTF8uE76Kk7mzp3HwsWLKevmTujd\nQDb/+COdO3fWOpYQopiRRS104P79+ziXdcXRpQwADs5lcC7ryv3793VWbNu1a8fdoCDu3LlD2bJl\nsbOz00k/xc3HH09h0KCBhISEUKNGDb1foEMIUbzIZeR8qFq1KjGR4fifPgqA/5ljREWE6nyzcEtL\nSzw8PDQptNeuXWP16tVs376drKys55+gRypVqkTr1q2fW2gzMjKY+uk0Wrb2pG//Ady6dauQEgoh\niiu5jJxPR44cod+AAaSnpWNsYszWn3+mbdu2j37+9y/u3Xv2YGdnz/x5c2jZsqV2gfNhx44djBw9\nhpdatud+4E0qupZl984dxW5G7+gxr3Hp+p90HfYmQdev4LNlDX6XLxXryVRCiIKR50UtFEUxBfoC\nFfnXZWdVVWc957wSUWwBsrKyiI6OxsHB4bHCM/7tdzh5wY++4yYTdjeQHxfP5MTxY0VyUlOZsuV4\nc+5yqtVrRFZmJnPH9mPmJx/Sv3//Amk/KyuLmzdvUrlyZUxMTAqkzbxkMLewYIX3JSyscjadXz5l\nHK8N6sPIPO6gJIQoOfKzqMV2oCeQCST960s8ZGhoiJOT0xNHeJu3bGbMpwupXOslWnbtTTOvnuzc\nuVODlPmjqiqRDyJwr1kHAEMjI8pX8yAsLKxA2l+yZAmW1jbUqVsXS2trhgwZUiDtvihFUTAwMCAz\nI/3R9zLS0ord6F0IUbhyU2xdVVUdqKrqAlVVF//9pfNkxYSpqRmJ8bGPXicnxGJmZqZhorxRFIXm\nLVuxbdVSsrOyuHvrGucO7y+QS+KJiYl8/MlURn44mw1nA5m68md+3fY7O3bsKIDkL8bAwIDx499m\n6cRRHN+zjR+XziY86BavvPJKoWcRQhQfuZmNfFJRlDqqqvrrPE0xNPWTj/ls8ut0HjyG8OBA/vI7\nz5C1K7WOlSdbfvqRvv0HMLJ5FSwsrVj+9Vc0aNAg3+0ePnwYQyMj2vQYAEC1eg1xr1mHTZs20aNH\nj3y3/6IWLphPpW+/5eChw7iVLs13p07KrG8hRL489Z6toij+gEpOQa4K/AWkAQqgqqr6zKWSStI9\n2+fZsWMHu/fsxd7ejonvvYeLi4vWkfIlPT0dY2PjAts95u7du1SuUoX5v/hQpoI7yYkJTOzRiv99\nMJGpU6cWSB9CCFEYXniClKIobs9qUFXVO8/pUIqtyLVWrVpz4dIlPBq14HbAJcxNTLgfEqx1LCGE\neCEvPEFKVdU7DwvqZ3//+d/f02VYUfIcP36Mz2bNxNYwkxGvDpZCK4QoVnLz6M8FVVUb/Ou1IeCv\nqqrHc86Tka0QQogS5YVHtoqiTFEUJQGoqyhK/MOvBCCCnMeBhBBCCJELuRnZzlNVdcoLNywjWyGE\nECVMXiZIPfOZDlVVLzynQym2QgghSpS8rCC1+OHXcuAMsAr47uGfl+siZHFz+fJlmrZoQZlyrvTo\n1ZuIiAitI2nqxIkTdOzsRZNmzZn3+edkZ2drHUkIIQrFs2Yjt1NVtR0QCjRQVbWRqqoNgfpASGEF\nLKoiIyPp1NmLlzr3ZcqqX1BtXej+Sg9K6mj/ypUrvNKjJ1Vbd8VrzPv88NMvTJ8+Q+tYQghRKHKz\nXGP1f68eparqFaDoraJfyE6dOkWFah607TkQp7LlGfzuJ9y6eZMHDx5oHU0Tv/zyC617DMTz5X54\nNGrOmGkLWbd+vdaxHpOZmcncz2bTpa0nI4YM4s6dZz5OLoQQuZKbYuunKMpqRVHaPvz6DvDTdbCi\nztramuiIMLIf7vmaEBtNWloaFhYWGifThrGxMWkp/+xfkZqUpNnOPs8yYdxb7FuzgtG2iZQLukjr\nZk2IjIzUOpYQoojLzWxkM+AtwPPht44CK1RVTX3OeSV6glRWVhZdunUnMjGFKvUac857N4P792Xu\nnJK5Hsi9e/do2KgxLV7uh2NpV/asX8H0qR/zxuuvax3tkaysLCzNzbk5pjV2ZsYADPvjOn0/nMXw\n4cM1TieEKAqeNkHquRsRPCyqSx9+iVwyNDRk984drFmzhjt37jD48zn07t1b61iacXV15fSpkyxe\nupT4e9dY+O0yAAAgAElEQVT4etkS+vTpg6qqxMbGYm1tjZFRbvbF0L1s/vmQmK2qBbYGtBCi5HrW\noz8/q6o64F8bEvyHbEQg8uvmzZu83KMn90Ny5tut+GY5w4YN0zTTO+Pe5Pze7YyrXZrLkUn8cjeB\nC/4BODg4aJpLCFE05OU52zKqqoY+bUMC2YhA5JdH7To07T6QzoNGcu+vm8x/azCHD/pQu3btXLex\nZcsWPp76KYkJCfTs2ZMvv1iWr/2Cs7KyWLZkMUe8/8ClbDmmz56Dq6trntsTQpQsL1xs/3XiGOCo\nqqq3XrBDKbbiqZKTk7F3cGDtyVuPLtOu/PRdRvXvwYgRI3LVxvHjx+ndtz/j5i3H0aUsGxZ+SiOP\naqz4ZjmqquLv709cXBz16tXDxsZGl29HCCGAvC1q8bcKwEpFUf5SFOUXRVHeURTlpYKPKEoSc3Nz\nLCws+evqZQDSUlIIun7lhUaRe/fupU2vwdSo3wSnsq4MmTiNnbt2kZ2dzZChw+jctTtvTJhIjZoe\nBAQE6OqtCKEpX19fRg4dwqsD++Pt7a11HPEUuZkgNR1AURRzYCwwGVgGGOo2mijOFEVhzferGfPa\nSGo2akbwreu0b9uG9u3b57oNW1tbIv1vPnodEXIXGxtbfvrpJy4FXGP+1oOYmJlz8NdNjHrtNc6e\nOqWLtyKEZnx9ffHq2J4+lS2wMFIY3G8fP/y4hW7dumkdTfw/ubmMPBVoCVgBF4HjwDFVVUOfc55c\nRhbP9eeff3L+/HnKli1Lq1atXmjmb3R0NI2bNMW1Rl3snctwbOfPrFvzPefOncP/fiz9x00GIOZB\nOFOHeBEtz8uKYmbk0CEYXvWhZ42cCXwn7sZzzqAiPkdPaJys5Mrzoz9AHyAT2A0cAU6pqppWwPlE\nCVWlShWqVKmSp3MdHBw453uWdevWER8fz9S9e2jUqBHp6els2PIp3Ya9jqW1Lcd3/0qd2nUKOLkQ\n2svMzMTc8J/f68aGCpkZmRomEk/z3JEtgKIoNuSMblsB/YEIVVVbPeccGdkKTaiqysT3P2DN2jXY\n2DlgZmLMgf37cHd31zqaEAXq4MGDDOzTk5G1bDAxVFgXkMD8ZV8zVONH6Eqy/MxGrg20BtoAjYBg\nci4jT3vOeVJshaZCQ0OJi4ujcuXKGBsbax1HCJ3Yu3cviz+fS2ZmBq+9OV4KrcbyU2x3kbNE43HA\nV1XVjFx2KMVWCCHECzl8+DAzPvmIhIQE+g4cxEdTPsHAIDcPzuiHPBfbfHQoxVYIIUSuXbx4kQ5t\nWjO6ji2O5kZsuJrAgNfGM2PWbK2j5Vp+nrMVQgghdO7nLZvp5GaOp5sNtZwteLOeLevXrdE6VoGQ\nYiuEEEIvmJqZkfKvydRJGdmYmphqF6gAyWVkUeLduXOHy5cvU758eerXr691HCFKrLt379Ko/ku0\nLWuEvakB228nM3/pl4wYOVLraLmWl40IdvKE3X7+pqpqj+d0KMVW6L1t27YxZsQwqjtbExiVxLDR\nY1i4WHaTFEIrgYGBfLFkMQkJ8fTpP5Du3btrHemF5KXYtnlWg6qqHnlOh1JshV7LzMzE0d6O6S2d\nqOJgRmJ6FpMOhbN9nzeNGzfWOp4Qogh64RWknldMhSjqYmJiQM2mikPOlnxWJoZUdrQgKChIiq0Q\nokA9d4KUoihVFUXZqijK1Yc7//ylKMpfhRFOCF1ydHTExsaGo0HxANyNS+NqWAJ169bVOJkQorjJ\nzWzktcAKctZHbgesBzbqMpQQhcHAwIAdu/eyOTCTMbvv8dGhUJZ9/Q3Vq1d/dIyqqnz1xRc0qV8X\nz+ZN2bdvn4aJhRBFVW6Krbmqqj7k3N+9o6rqDKBo3bEW4inq169PUHAIFwOu8SAqmmHDh//n518u\nW8qyuTPoYR9LC6N7vDqwH8ePH9cobe5s27aN6u5ulHZ0YMzwYSQnJ2sdSYgSLzfLNZ4kZwOCrcBB\nIAT4XFXV6s85TyZICZ1QVZUjR44QHh5O06ZNqVixos76ql+7JgNKJ1PL2QKA369FYdKoB99+t1pn\nfebH2bNnecWrI2s71qCirTlTTgZSqpEn36+Xi1FCFIb8bLH3LmABTABmA+2BEQUbT/x/ycnJXLp0\nCUtLS+rWrftC+7wWZ9nZ2Qwd2J+LJ45SzcGKt+9FsWHzz3Tp0kUn/ZmYmJCSkfjodUoWWJuZ6aSv\ngrBv3z5ereZMK9ec/U3ntXCn/bbdGqcSQjy32Kqq6gugKIoBMEFV1QSdpyrhAgMD6dTGExslk+jk\nNOo3bcYvv+/AyCg3n42Ktz179hBw+jhH+9bH1MiA4/eieW3EcO6FR+ikv48+ncHro4bTJymDxIxs\nDtxN4/j4t3XSV0Gws7PjcuI/e4UExaVgY22tYSIhBORuNnIjRVH8AT/AX1GUy4qiNNR9tJJr/Ngx\nDHWz5HCfepwb3IjY65dYtWqV1rH0QnBwMA2crDA1yvmr26ysHWGRUWRlZemkv969e/PT1m1keHTE\npllPjp06TY0aNXTSV0EYMWIE19ONGfXHNWae/JNR3teZJ4t0CKG53Nyz9QPGq6p67OHrVsA3qqo+\n8/kIuWebd1XdyvNjWzeqO1gB8NX5IMLrtOfL5d9onEx758+f5+VO7dnToy6V7Cz44sIddsWbcvbS\nZa2j6Y34+HjWrVtHTEwMXbt2pUmTJlpHEqLEyM+uP1l/F1oAVVWPk/MYkNCR2rXrsOVmBKqqkpSR\nya7gOOo1kIsJAA0bNmT2/EW03nKO8quOsvWBypZtv2sdS6/Y2NgwYcIEpk+fLoW2gGzevJnqlStS\noYwL77/7DhkZudrWW4hHcjOyXQaYAz+Rs1byQCCVh8/aqqp64Snnycg2j0JDQ/Fq35bE6EgSUtLo\n3qMHa9ZvLFIbKOtaRkYGCQkJ2Nvby+QxoVOHDx9mQK9XmNjIAVszQ1b7xdGh/wgWLl6idTShh/K8\nebyiKIee8WNVVdX2TzlPim0+ZGRkcPv2bSwsLKhQoYLWcYQosSa+O4GoI5vp5+EIQFBMKl9fz+Zm\n4B2Nkwl9lOdHf1RVbaebSOJZjI2N9XoiTkEKCQkhLCyMatWqYf2MmbNhYWEEBQVRqVIlnJ2dCzGh\nKMlsbO24mfrPwOFBciZWVjLDW7yY3MxGdlEU5XtFUfY+fO2hKMoY3UcT+igkJARfX1/i4uIKpL05\ns2ZRt0Z1RvXuTjX3ipw5c+aJx63/4QdqVavC+EF9qFmlMj9v2fLctnfu3MnkSZNYtmyZrKIk8mzc\n+PFcjjPgm/ORbPKP5JtLscxZsFjrWKKIyc1l5L3krI/8iaqq9RRFMQIuqqpa5znnyWXkYmbR/PnM\n/WwW5e1tCEtMZev2HbRu3TrP7Z0+fZoBL3flYJ+XcLY0Zdef4Xx8IYygkND/HBcaGkrt6lXZ1/sl\nqjtY4f8gnld2+PFn0F0cHBye2Pbnc+eyfMl82pYz4a8EyLR35ejJ05iamuY5ryi5wsPDWbNmDUmJ\nifTs1Ut2hRJPlZ8VpEqpqvqzoihTAFRVzVQURTcPNQq9dfHiRZbMn8vJQY0pa2WGd9ADBvTpRUj4\ngzxP3Lp27RotXR1wtswpgN0rOzNq3xWSk5OxsLB4dFxgYCCVHG0fPQpVx8mGMjaW3L375GKblZXF\nzFkz+dqrPE6Wxqiqyqcn7rFnzx569+6dp6yiZHNxcWHKlClaxxBFWG5+SyYpiuJIzkxkFEVpBhTM\nNURRZNy4cYOm5Rwpa5WzVGHHik4kJycTGxub5zZr1KjByZBoHiSnAbD3rweUdnLE3Nz8P8dVqlSJ\nv6LiuRqZs3jZxfA4whKScXNze2K7mZmZZGVlYW+e81lSURQczY1ITEx84vFCCKFruRnZvg/sACor\ninICcAL66TSV0DvVq1fnTEgUoYmplLEyw+dOJBYWFtjZ2eW5zebNmzNm3Ds0XbqE8vbWhCamsW3X\n7sce5SldujRfrfiWbm+9QTlbK+7HJfHduh+wt7d/Yrumpqa082zFqktX6V3VmltRqfiFJ9G2bds8\nZxVCiPx47j1bgIf3aasDCnBDVdXnPtEt92yLn4Wff87ncz+jvJ01oYmp/PL7djw9PfPd7t27dwkL\nC6N69erY2to+9bjIyEju3LmDu7v7U+/V/i02NpY3x47hxPFjlHYpzfJVq2WBByGEzr3wc7aKojQG\nglVVDXv4ejjQF7gDzFBVNfo5HUqxLYbu3btHaGgo1apVe2ZhFAJy7ssHBQXh4eHx1Mv+QhQneSm2\nF4COqqpGK4riCWwG3gFeAmqqqvrMS8lSbIUo2T6f+xlLFyygtos9fmHRfPXtKgYNHqx1LCF0Ki/F\n9rKqqvUe/nk58EBV1RkPX19SVfWl53QoxVaIYiYkJIRjx45ha2tLp06dnrrt4/Xr12nTrAnHBzTE\nxdKUgMgEum67REh4BJaWloWcWojCk5dHfwwVRTFSVTUT6AC8nsvzhBDF0JkzZ+ju1RkPZwseJKXj\n4l6dPw4exsTE5LFjg4KCqOVij8vDx7pqlbLGxsyEsLAwKleuXNjRhdDcsx79+Qk4oijKdiAF+HuL\nvSrIoz9ClDhvjR3NqFpWTGpkxzxPJ5Lu3WTNmjVPPNbDwwO/sGj8H8QDcCDwAemqgqura2FGFkJv\nPHWEqqrqHEVRfIAywB//uiZsQM69WyFECRJyP5QaLXNmgRsoClWsFe7dC37isRUqVGD5yu/o/toY\nrM1MyFAVtm7fISt4iRIrV4/+5KlhuWcrRLHS8+WuEHiBUXXtiUnJZNrxB6xc/xPdunV76jlJSUmE\nh4dTrlw5KbSiRMjzFnv56FCKrRDFSGRkJL1e6cb5C5dQgRnTZ/DRxx9rHUsIvSLFVhQJN2/eZMG8\nucTHxdBv0KsMGDBA60ji/0lISMDMzAxjY2Otowihd/KzEYEQhSIwMJCWzZrQubwpZcwN+GDcYSIj\nHzBu3PjnnhsVFYW/vz/Ozs54eHgUQtqS61l7DgshnkxGtkJvzJw5gwtbljPmpVIA3IxK4dsbWfwZ\n9ORJOH87efIkvV/uTmV7SwKjExg0dBhLvvzqsTWWC5OqqmzcuJFDf+zDuUxZJv3vQ0qVKvXEY5OS\nkrh37x5ly5bFz8+Pq1evUqNGjXxtXyiE0IaMbIXey8zIxPhfD6OZGChkZT1/N8dhAwfwRSt3ulV2\nJi4tg46/bKZ7z1507NhRh2mf7bNZM9m8cjmvezjjH3CKlr/8wtlLlx9b4nL//v28OnAAtqbGRMQn\nYGFqxEtlbLgSkczI199izrzPNXoHuZORkUFqaqqMdoV4jrxtRCqEDgwaPJiDwan8cTuWi6FJfH0p\nljGvv/nMc7Kysgi6fx8vdycAbE2NaVHWllu3buU5h6qq3Lx5k8uXL5Oenp6n8xfMn88v3TwYVac8\nS9pUo7JZNr///vt/jouPj+fVgf3Z1LkGu3vUhuwsFrYvy/j6dsxv68KKr78iKCgoz+9D1xYtXICN\nlRUuTo60aNyQiIgIrSMJobek2Aq9UatWLfb+4c1fNh54p7gwbvJUPpn66TPPMTQ0pGblSmy+fh+A\nsKQ0DgZHU6dOnTxlyMzMZGCf3rRt1pgBXTrSsG5tQkNDX6gNVVXJyMzCxuSfCUS2JkaPFe6goCCc\nLc1oXs6eB8npuFgaY2eWc7HJxtQQF1sLwsPD8/Q+dO3AgQMs/XwOX3cpz4+9K1E65R4jhw7ROpYQ\nekuKrdArTZs2Zc8BH46d9uW99z/I1X3Xn37dxvwrkTT46RyNN53hrYkf0KpVqzz1v2LFCiL8znL5\n1aacHdiAzvYw4c03XqgNAwMDBg3ox1if6/iGxvK9XzAHg2Po0qXLf45zdXUlND6JG9GJVLazIC41\ni+N348lWVU4FJxCVkknNmjXz9D7+/PNP2nu2pEIZF7p17khISEie2nmaEydO0LKsKU6WxhgoCj2r\n2nDm7FkSEhIY3L8v9jbWuJcvx2+//Vag/QpRVMk9W1Hk1alThxt/BfHXX39RqlQpnJyc8tzWVb/L\ndK9gi6lRzufQPpVL8frpKwBER0ejqiqOjo7PbWfl92uZOuVDPjzwB84upTlw+CfKly//n2McHBz4\ncvkKur4znprOdmQaGPHDtWSWnArDtYwLO/fsw8bG5oXfQ2JiIu3btKZzGRjYyIpDd6/QuX1bLgdc\ne+rGAS+qfPny7IjPJitbxdBA4XpkCmVKl2bsqBFEXD7GFx1KE5KQzthRw3Fzc6Nhw4YF0q8QRZXM\nRhbiX7744gt2fL2AzV1qYWKoMOdsILfsK2NkYsrefftQFOjQvj2bt/6GmZlZgfR59+5dbt68SeXK\nlXF3dyc9Pf2Ji/vn1vHjx3ljcG/mtc6Z/ayqKuMPhOFz4gzVq1cvkMwZGRl07dSB4JsBuFiZEBCR\nxI7de+nexYsvO5V5dDl83eUomgx7n48++qhA+hVC38lsZCFyYdy4cRzx/oMGP53A2tQEA0sbvJpW\n5/jvG1nXww0FWOzry6zp05g7f0GB9FmhQgUqVKjw6HV+Ci2ApaUlsSnpZGSpGBsqpGRmk5iajpWV\nVX6jPmJsbMw+74N4e3sTGxtLq1atcHV1xc7WhtCEdOzMjFBVlbAUFXt7+wLrV4iiSka2Qvw/qqpy\n7do1UlJSqF27Nr26d+WltJs0L5/zeMu5kEROqBXwOXrime3cvHmTu3fv4uHhQdmyZQsjOpCTv/cr\n3QnyO0NdB0POPcikpVdPvluzVud9b926lTfHjKJNeXNCk1USzRw55Xte9rAVJYaMbIXey8jIYMHn\n8zh36iRulaswbeYsHBwcCj2Hoij/WYXKvUoVrhwNoJmriqIo+Eel4964yjPbmDt7FosXLcDNwYqg\n6CTW/LCBXr166To6kJN/6+87WLNmDdevXeWj+g0YNmxYofTdr18/3Nzc8Pb2xs7OjuHDh0uh1Vhk\nZCT79u1DURS6desmVxo0IiNboTcG9+tLpN8ZhlYtxdHQeM6lmnDmwqUCuzeaV5GRkXi2aIZhShwG\nBgophhYcO3UGFxeXJx4fEBBA2xbNWNS+NPbmRtyKSmH2qUjCH0TJzjeiUAUFBdG6WVPqO5qTpapc\njc/g+BlfypUrp3W0YutpI1t59EfohaioKPbs3cNGr5r0rlaaJZ5VMU2O59ixY1pHo1SpUpy/7M/i\n7zexYNUGLvoHPLXQAty+fZsqTlbYm+dcOKrqaI6JgSKLPohCN23KhwyvbMfGzjX4yasmvV0tmTVt\nqtaxSiQptsVIQEAAA3v3pLNnK5YuXkR2drbWkXJNVVUUFAwfPlerKApGhorevAdzc3M6d+6Ml5fX\ncy+L1qpVixsRCdyLTwPg3P1EFCNjSpcu/Z/jVFXl6tWr+Pr6kpqaqrPsouQKCwmhvtM/E+Pql7Ik\nLOSeholKLim2xURQUBDtW7eifsxNxjqksOmLhUyfWnQ+wTo6OuLp6clr3tfxDopk2snbRKsmeV6c\nQkuVK1dmyZdf8dHhMMb9Ecq3/on8tn0nxsbGqKpKWlrao5WqOrduweg+r1CvZnXu3LmjdXSdOnny\nJI1fqkuFsqUZ8epgEhIStI70XLdv32bRokUsW7aMsLCwXJ0TExPD4cOH8fPzQ+tbaZ4dO/G1fyhx\naRnEpGawIiAczw6dNM1UUsk922Ji8eLFXN20nKVtqgHwV2wyXXdeITQyWuNkuZeSksKMT6fmTJCq\nVJm5Cxc9NhrML1VVmT1zBl8sXUpWdhajRo1i0dIvMDQ0LNB+IGft4/DwcMqXL4+ZmRk7duzgtZEj\niIlPoHwZFxyVDPb0fAlTIwMW+gZx0cKVXX94F3gOfRAYGEjDl+ryWh0bKtmbsfVGPJbVGrN91x6t\noz3VxYsX8Wrfjp7uDqRmqRwMTeSk7znc3Nyees758+fp2rkjZaxNCI9PoevLPahQwY09O37Hzs6e\nmZ8voEWLFoX2HjIzMxn/xuus27ABBYWxr41m2VfLdfL3XeSQzeOLuSVLluC34Su+bJuzaMGt6CRe\n2XOV+w+iNE6mX75fvZpl06ewyasmZkaGjPa+QddRb/LJp9N02u/t27dp1qA+m7t60LC0LV9fuMPK\nS3e4MtoTRVG4GZ3I4ENB3LpbPC/xrV69mi2LpvJOg5yZsGmZ2by67TYpqWl6+4u/Z1cv2qTf47W6\nOSt/zTp1m5R67Vi+ctVTz/GoVoVupVLwrGhDamY2k73vYWFswpdtqxEYl8Inp4M4eupMnpfhzKvM\nzEwURdHb/9bFiUyQKuYGDBjA/uB4Fp4N5NcboYw4cJ3xE97TOpbeObB7JxPqlKGirQWlLU2ZXL8c\nB3bv0nm/vr6+tKpQisZl7DBQFN5p4EZkcjrhyWmoqsqvfz7Ao1YtnefQipWVFdGpWY8uq8akZmJq\nYoyBgf7+CoqJiqKKncWj11VszYiNfvaH18A7wTQql3NP38zIgNqlTOhSwZ6mZe0ZVLMsA6s4abJe\ntJGRkRRajenv33TxQlxdXTl2+gz3Kzdmp1KW92bO5eMidM+2sDg6u3A9NuXR6xvRSTjmYy3l3Cpd\nujTXIuNJzczZn/dmTBKqotBqy3mabLnAjohMln/3vU4zqKrKxYsX8fHxITq6cG8v9OzZkywrJxb7\nRrE1IIpZJyOZPfuzXG00oZUuPXoy93wwd+JSuBGdyDK/ULq80vOZ59SqWZ1DQTn3ouNSMzkTkoSr\n9T+PrsVlZmv+KJvQhlxGFiVKcHAwLRo3ormzOWYGBuy/G8PBY8eppeNRpaqqDB88iAvHDvGSszXe\nQQ9YsOxLWrVqTVJSEjVq1Mj3Mo3P63/k0CEc2LcHF2szQhLS2bP/AI0aNXruuampqfj4+JCamkqb\nNm0oVapUnjIkJiaycuVKQu/fp32HDnTr1i1P7RSWrKwspvxvMj+sXYuRkSHvTZrEpMkfPvMDws2b\nN+nSsT3pyYnEJafh6enJtQu+vF27NIEJafwenMi5y37PfHRMFG1yz1aIhyIiIvj111/JzMykV69e\nj+3GoyuqqrJ//35CQkJo1KgR9erVK5R+AX799Vc+fud1PmtVClMjA47eiWdvpAUBN24987yEhATa\nt2qBUUIUtqbGXIlOxufo8QLb0KAwnTlzhv3792Nvb8/IkSOxtrbWST8ZGRkEBgZib2+Pk5MT27Zt\nY/fvv2Hr4MjEDybh6ur66Ni0tDQ+/XgKPvv34uTszNxFS2nQoIFOconCIcVWvLDY2Fi2bdtGeno6\n3bp1K7SiVFIkJCQw6b0JnDx2jHKuriz5+pv/LBNZkBYsWMCp9YsZVTdn+cuk9Cxe232XpJRnP987\na+YMrvyylu861kBRFL69dJcjRmXYfcBHJzl15eeff2bc2NG0LW9BRKpKpIEtZ85f1FnBza3XRg7n\n7smDfNjAlatRicw+dw/fS5efOeNZ6DeZICVeSEREBPXr1ub7OR+xdek06tetg5+fn2Z5UlJSSE9P\n16z/3FJVFR8fH3744QcCAgKeeezgfn2IO3uIbxo704kHdGzTWmerTNWrV4/z4WnEpWYC4BOUQG2P\n58+IDQ4MpKmz5aNLp03L2HIvOFgnGXXpww/eY3ITR4bXdWRSk1I4ZMWxYcMGTTOpqsqPm7ewqkM1\nGpexY0RtV7pUdGD37t2a5hK6IcVWPNGCz+dR2yqd/zVx4J0GDvSvas7/3n+30HMkJyfTt8fL2Nva\nYGNlxaSJ72m+UABAXFwcGzduZO3atY8WO1BVlVHDh/LakH5smPcRni2asmnjxieen5iYiM/hIyxv\nV416zjaMrVee+k7WHD58WCd5vby8GPXmeMbtu8eb++9z6IERm7b88tzzmnu2YdOtKKJT08nIyubb\nK6E0a9lSJxl1KS4+ARerf+6Ju5grxMXFaZgoh6mxMfFpmY9ex6Vn6fTevdCO7Pojnig87D5uVv88\nKlDR1oQzYeGFnmPK5Emot/0IfqMdSRmZ9P31J1bX9GDs668Xepa/RURE0KJxQ6pbGmBmaMDHkydx\n+MRJwsLCOLx/DwvbOmNqZMDdOHPefGMsgwYPfuyxi5zVpCA+PRNHcxNUVSUmNUOnM1Vnzp7DhPfe\nJyYmBjc3N4yNjZ97zqhRowjwu0yNb1ZgaKDQpnUrVi37UmcZdaVbt26s9T3IyNq23E9I53BwMh93\n7vxCbXh7e7Ni2RJUVWXs2xPo2rVrvjIpisKHUz5mwFdLeMPDhasxKQQkZvNDv375alfoJxnZiifq\n5NWNvXdSeZCUQWJ6Fr/eSqKjV/5+ueTFiaOHGV+nLKZGBjiYmzCiWilOHj1c6Dn+bd5ns+nsZMJP\nXjVZ27E679R25uNJ7xMaGoqbvRmmRjn/rCrYmpKdnf3EZQlNTU15d8I79N59hVWX7jDW+zoZ1g50\nfsEC8KIcHR2pUqVKrgot5BSExcu+IDY+nrAHkew54KP5fc68WLl6DW5NOvK/o5GsvQ1r1m+iYcOG\nuT7/4MGDvNq/Dx3Tg+mSFcLoIYMK5HLvh1OmMH3JVwS4NsCxY19OnbuAnZ1dvtsV+kdGtuKJhg0f\nTlBQIO8tmE9GZhaDBw7gs7nzALh37x4JCQkv9Es7r8qWc8U3PJjGZexQVRXfyGQqNK6g0z6fJ/x+\nCG0c/1nsoF4pa/YEhdKwYUP8QxO4FWVKFQczdt+Ko3y5ctja2j6xnc8XLsKjTl1OHTtCnXYV+X7i\nxDyNbJOTk9mzZw/Jycl07NhRJxvVm5mZFennQy0tLVn/4095Pn/lV1/waaMKvForZ2s6QwVWfrmM\n7t275yuXoigMHjyYwYMH56sdof9kNrJ4pr//Hz6cYce418fyy5bN2JqbYmZjx76Dh3U6S/nGjRu0\nb92S+k5WxKdlEmtsydFTZzT99L/y229ZOWc6P3f1wMzIkFHe12nS51XmfD6fbdu2MWrEcFJSU6ni\nXpHfd+2hatWqOssSHx9Pq2ZNMEqKwsbUkIDIVLwPHSnUx4r+LSMjAyMjo0JZrCIwMBA/Pz8qVKhA\n/Z0hCBkAACAASURBVPr1ddrXwN49aZ14m+G1cx7b+eX6fX7PcmZnMV3LWuTd02Yjo6qqTr5yms67\nrKwsNSoqSs3KyspXO6LgbNy4UW1Q3lm9N66DGvtuZ/WTltXUrh3a6bzfiIgIdfPmzepvv/2mJiUl\n6by/58nOzlY/mjxJNTc1UU2NjdVRw15V09LS/vPzwso5a+ZMtX2VUurvg6qr2wfXUMc3Ka22bdm8\nUPr+t5iYGNWrYzvV2NBQtTQ3U5cuXqTT/n7++We1lI2V6lXTTS3nYKt+OOl9nfbn7e2tOttZq990\nqq2u9KqjlrazUXft2qXTPkXR9LD2PVYT9XJke+jQIQb07fN/7d13QNXV/8fx57ksQYaAogIuFMWF\nigKaCq7MzJ2lllqZliMtM8s0c2ZZlpWWWdmyvuUeaeXMNDT33rJEQdl7Xz6/P+Br9fumCd7Lh/F+\n/GN87me8Lul93/M553MO2dlZ2Nvbs27j5lJdKUP8s1denop1yEamBjQEICIlkz4/XeDqjcq5KHpB\nQQGapuk65+y4Z8aQf3QzfZsUPj8bnpTNsksaF0IjSjXHow8PIPXsfsa0ciEhK485IfGs+G4VvXr1\nuutzREVFsWvXLuzs7Ojbty+2trb/uF9ubi5uri5s6eeLr5sjSdl5dFpzlI3bdhWrH/b69evExsbi\n7e2Nvb39v+7/3wFSBQUFPDPx+XseICUqpnLznG1iYiKPDBzApNYOfDegAWOa2jKgz0Okp6frHa3S\na9zEh90x6eTkFy7o/nN4PI0aNdI5lX4MBoPuk7sHd+vOrmu5JGblk2csYOOVDDoHdyn1HPv27uPh\nxg5YWShq2VsT7GHN3t9+u+vjjxw5QpuWLfjmzWm8NXUC9wW0u+2/+cTERKyUwtfNEQDnKlb41nQu\n1nrAb8ydS0ufxozo9yCNG9Tn8OHD/3pMjx49WLflJzb89IsUWlFsZa7YXrhwgZoONrSqVbhyRjsP\nexxsDISGhuqcTDz55JN4tm5Pux8O023DKT65lMwnX3yld6xyq6CggF9//ZU1a9YQVcKJIoYMGcLI\nZ59j7E+RDFsfimOTdiz+cKmJk/67mm5uhCYVzkalaRoR6VC7GAO1nh8/luFNq/JCW2dm3eeKU1Ys\nS5f+8/uoUaMGdvZVWXcxBoBz8Wkcvp6Ar6/vXV3rwIEDLP9wMYeGBbB/cGsWBnoydNDAu84KhZOs\nLFq0iEkTJrBq1aoy8ey3KNvK3GhkDw8PopMzSM5yopqtJXEZecSnZZl8EXFRfBYWFvywbj2nT58m\nLS2NVq1a3dXtN/G/CgoKeHTgAM4dPkBDF3vGX0tg9YZNdO3atVjnUUoxe+48Zs6ajdFYehMixMfH\nM/PVaYRfvkSbgEDe/XApjwwawJG4AuIz87Gp4cno0aPv+nw3btygUcvC0c5KKRo4KGKu//PavhYW\nFmzY8hMD+/Rm2v4wsvMLWLb807u+y3Lu3Dk613HBraoNAP0a1eTJn07h79ucMRMm8cyzz97x+Nzc\nXLoFdcIiIZLGTgamr/0Pp04c4403F971+xWVT5krtvXq1eOll19m6nvv4uNWlXM305k9Z56sklFG\nKKXuugUhbm/9+vVEnDjE3sFtsLYwsDMijjFPjOBKCRePP3nyJE+NeIyIq1G0atmSb/7zA/Xr1zdt\n6CLZ2dl07XQfHe2NjPJw4rstqzhz6iRHjp9kz549ODo60rdvX2xsbO76nEHBwWw4uIPxbaxIyTGy\n+1ou73frftv9/fz8CIu6zo0bN3B1dS3WY0k+Pj7Mu5ZIQlYurrbW/BwWh5udFa83tuPF16djbW3N\nk089ddvjd+7cSUpMJAs6V8egFF0b5DPmvcW8Pntusd6zqFzKXLEFmDFzFr169+HSpUs0a9ZMt8cY\nhDCXq1ev0q6GPdYWhT05HTycubb1dInOlZiYSK/7uzO8iS1+Pd3ZERZBrx7dOHvxsln6lA8cOIBN\ndhoLe/milKJbveo0+TIEW1tbRo0aVaJzfvjxJzw25BGGrduJhaUFM2fOZODAO9/atbCwwMPDo9jX\n6tixIyOfGYf/kg+pbmNBbFoWq/v7EeBejXmBRr786os7FtvMzEyqVbHEUPR4k721BQZVuIKPFFtx\nO2Wuz/a/2rZty7Bhw6TQigopMDCQLeHxRKZkoWkaH5+4RoBf6xKd6+jRo3g62tClvhOONpYM8qlG\ncmJ8ifuB/03haMs/f9Y00Iq2l5SDgwM//vQL6ZmZZGRmMX3Ga/ce9A7mvrGAI6fO0NAvgInt6hPg\nXvjcdlJ2Hjb/0koOCgriSlIO20OTuZqSw6cnEmkfEICjo6NZM4vyrcwWWwE///wzQYH++Ps25913\n3i6zgzAyMjI4ffo0cXFxekcpMU3TSnVVoY4dO/LK63No//1B6n22jy3JBr5dvbZE53J2diY2LZtc\nY+Eo8dQcI+nZubeduepetW/fHqN9NabsvcyWKzd5aud5goKCTNLVY21tjcFQOh9L9evXZ/5bb/Px\nmRu8cyiM94+EM+tQJFOm37nQu7m5sWvPXk5b1GPx6VxcW3dh/eYtpZJZlF9l8jlbASEhIQx66EHe\n7dwQ1ypWTDsQwfBJLzL15Wl6R/ubAwcO0H/AQOwcnUiIvcH8efN4ftKkOx4THh7O2rVrMRgMDB06\n9NatwLlz57Jnzx4aNmzIkiVLSm16wG3btvHk448Rl5RMs8aNWLNxc6ktjp6bm0taWhouLi4lbhlq\nmsbQwQ9z5uBvNK1mweHYXEaOHse8ouk1zSExMZHZr80g7PJF/AI6MOP118vtLdRTp07x+fJPMBrz\neWLUaAICAvSOBBQueLFt2zYsLCx46KGHzPblSZiWLB5fzkwcP44ap3fzQrsGAByKTmbq6SSOn7uo\nc7I/aZqGu6cnj0+dT9vg+4mPuc7cpwawc/svt739f/r0aboHdaZ/AxfyNI1tUan8fvAQz455mhMH\nQ+hS35GTNzLJsrIn4loMlpaWxMXFsX//fuzt7QkODsbS0nRDDa5evUpb3xZ8c78PHTycWXH6Gp+G\nZXA+NKzUWlimUFBQwKpVqwgLC6NNmzb07t1b70jiHly5coVOHQJpXM2S3AK4mWfFH4ePykDRcuB2\nxbZMDpASYFOlCim5xls/p+TkYW1dtloOycnJpKWm0Tb4fgCq1/bAp40/586du22xnfvadF5q7c7Y\n1oWLCSw8FM7M6dPY89tvrOjXCGdbS/JbaozbEsayZcsICgri/m5daOhsQ0JmHp6NmvLLzt0me8Tl\n2LFjtHN3oaNn4QxMY3zr8NbhEGJjY3V/3CwqKoqIiAi8vb3/NYvBYJDJ7O/g6tWrvLVgPonx8fTp\nP5DHhw8vlfmbS2raSy/Sq44Vg3ycAfjiZCJvzJ3Dhx99rHMyUVLl56t7JTN2/AS+vZzAGwdC+eR4\nJM/9doVXXp+td6y/cXJyooptFc4e3g9ASmI8l04dpXHjxrc9JikhAS+nP6fha+hYhdiYG1gZDFSr\nUjhy1tKgqFHVihs3bjB29FMM9bbl1QBnFgbVIP3qBT777DOTvYdatWpxMT6VzLzCLzahyRlk5eXp\nvszZ8mXLaN28KVOfGEqLJt6sXrVK1zzl2Y0bNwhs50d8yEZqXjvAzCkTWfTO23rHuqOoqKvk5xs5\nHpNBnrEALycLoq+ZZ8CbKB1SbMuoRo0a8fvBQ+T4P0Bow/Z8u3Y9gwYN0jvW3xgMBlb/8APLZkxg\n/qgBTB9yPxPGjbvj/LS9BwzkrWPXCE3O4FJiOu+ejGbI8BFYW1vx7al4krPy2RuZypXEbIYNG0ZU\n1DVauBUWZwuDoomTIiI83GTvITAwkO4P9aXr+hOM33OZ3ptO8d77H+i6nFxkZCQzXnmZ3YP92N6/\nJZv6tuTZ0U+TkpJS4vM9eH93vOp60PfBB7h+/bqJE5dtP/zwAy2dDQxv6Up3r2q85O/Cu2W42EZG\nRnLlSiiHozP47lQcU7dHsjU8i673m3etY2Fechu5DPP29ub9JaU/9V5xdOvWjUsXLnD+/Hnc3d3x\n8vK64/4vvDiFpMQken/6CRYGC557YTKjx4yhbbt2PNSzO5svhmJrY83yz7+gRYsW+Pv7szX0ME/5\nupCWYyTkRj7vdOhwx2scO3aMrVu34uDgwMiRI3FxcbntvkopPv3iK3bs2MHVq1d50c8PPz+/Ev0u\nTCU8PJzGbtVo4FS4Zm7LGo7UsLfl2rVrxR4kk5WVRffgznRwzqVPSzv2XTtJjy5BnDx7vtRmm9Jb\nfn4+1n9pVthYGsjPN97+AJ1NeX4ivRrYMaSZC5qmsWh/NHluDRk3foLe0cQ9kAFSokyLj4+n34MP\ncObsOfKMRl58cTLzF7x12/62n3/+meFDH6FrHVsScxQRuTYcPnYCV1fXUk5+Z1FRUWzfvh1bW1v6\n9ev3t2kvr1+/TqtmPmzp50uz6g4cjE5i2LbzhEddx8HBoVjXOXz4MI/1f5B3u9QACge1TdoVy9bd\ne2nRooVJ31NZFRYWhr9fax5tXBUPeytWXcrggUdHsui9xXpH+0ftWrVgsFsazWoUftnaFZZMbJ37\n+H7NOp2TibshA6REuVS9enVCDh0hMTERW1tb7Ozs7rj/tCkvMKGNM+3cC4vX0iPxLF++nOnTp5dG\n3Lty/PhxenXvRpc6ziRl57Fg9ix+P3T4Vj+xh4cHS5Yt58Fnn6GWox1xGdms/H5VsQstgJ2dHWnZ\nueQZC7CyMJBr1MjIyf3X32NF4uXlxc5ff+O1aVM5mZjI0GdHM236DBITE9m4cSNGo5E+ffpQu3Zt\nvaMCENihI9t2raexqy15Ro0913MZNaST3rHEPZKWrahQ6nvW5uXWtng6Fo7cXn02AfeeT/D2O+/o\nnOxPPYI6MqBKKiObFz5fPGH3Bbz6j2DO3Hl/2y8pKYlr165Rr169Es9OpGkaD/fvS/iJP2jjauBI\nvJEW93Xjux9Wl+nRuOYWHR1Ne/+21LctwNJCcSY+l337D5Ta89V3kp6ezsMD+rF//36MBRpDhwzh\nsy++1H05R3F3pGUrKoWH+vTjm23rGONbjfjMPLZHZrGmT5+7OtZoNJbKB9rNGzdp5e9262dfF1tC\no6P/Zz9nZ2ecnZ3v6VpKKVav38hnn33GudOneK51G55++ulKVWiNRiMGg+Fv73n+3NkEuBQwsmVh\nf/7Gi0lMf/kl1m360eTXz87OZu/evRiNRjp37vyvK2XZ29vzy45dJCYmYmlpKZNZVBAyGlmUOydP\nnmTBggV88MEHJCYm/u219z74kNY9+vPq7wksv2hk8UefEBwcfMfzHTp0CO96dbC2tqJpwwacOHHC\nnPEJ7taNd49fIzPPyPW0bFZciDPrSFNLS0vGjRvHko+X8cwzz1SaFlJcXBw9gjpRxcaG6tWc+Obr\nr2+9djM6mnoOf/4e6jtaE3vzhskzJCUlEeDXmslPP8arY5+gVfOmRP/DF6v/TymFq6urFNoKRG4j\ni3Jlx44dPP7Iwwxt7MaNrHyOpxbwx7HjJR4AlZKSgk9DL95uX4c+DWuy7lIMs4/FcCk80mz9mpmZ\nmTw9cgTrN2/GysKSaa9OY8bM1++ptalpGt+uXMn61d/jVM2Zaa+9jo+PjwlTlz+9e3SnQUokczp4\ncTkpg4FbzrB52w4CAgL4+OOP+HD+TKYFumJlULx7OJH+T41n1py5Js0w5YXnOffL94xtUzgd53dn\nkrBuHszK7+W56YrqdreRpWUrypXpUyazJLgR8zs24vMePnSoZmDZsmUlPt+5c+fwsLehv3ctLAyK\nR33ccbRUXL582YSp/87Ozo7v164jMyubtMxMXnt91j3f1v3w/fd5bcokvBJPop3dRecO7Qk34fPI\n5dFvISG86l8fawsDzas7MLBhDfbt2wfAuHHjGTj8aSb8EsXoLZH49+zPjJmvmzxD2JXLNHexuvX/\nt3l1a8LDQk1+HVH2SbEV5UpySsqt508BGthbk5yYUOLzubm5cTUpjcTswhV/4jJziEnJoEaNGvec\n9d9YWFiYrO/0g8WLeKGtM8H1nXi4qQv31bZi5cqVJjl3eVXT1YVTcakAFGgaZ5Kybs0trJTizbff\nIT0zi8zsbJZ/vsKkc27/V2DHTvx6PYec/ALyjBo7r2YR2OE+k19HlH0yQEqUK7379GXmjs0sDmrI\njfQcVlyI5eu5dzcA6p80bNiQp8aMocfKr+nkUY3friUxecoU3N3dTZja/AoKNIrWoWdfZCoHr6Zw\n7KMlODs58dykSZVqQNR/ffjJpzzx+DAe9HLjUnIWVet4MWTIkL/tY+7fy0tTX+b0yRM8uWkTBqUI\n6tyZeQveMus1RdkkfbaiXMnJyeGF58azYf16qtrZMWv+AkY+8cQ9n3f37t1cvHiR5s2bExQUZIKk\npeuN+fP4aul7tKtuwS+hybzQ3h0bS8Xykym8PGcB48aN1zuiLs6fP8/evXtxdXWlf//+WFlZ6ZIj\nKSkJo9GIq6trpfziU5nIEnvif6xatYp1//kWO3sHXnp1eqWZUagi0jSNj5cu5a35c+hfz4qejQon\nyDgWk87OzJr8fvCIzgnF7WiaRkREBEajES8vr3K1tKP4XzJASvzN5599xqsTx9EjJ5KGUcfo1rkT\nFy+WnbVyRfEopZgwcSIP9O5Dam7Bre2pOcZKNVtUeZOTk0O/Bx+gfZtWBPm3pYVPExYsWEBISIje\n0YSJScu2kvJt4s07rVzo4FE4acLskMvYdBvMm28t1DlZ6cjOzmbDhg2kpqbSvXt3GjVqpHckkzh7\n9ixB93WgR10bbCwUW8MzWbvxR7p27ap3NPEP5sx6nUOrvuTL+3148qeTXEpKo0kNO47G5jFz7htM\nmDhR74iimGQGKfE3BQUFWBn+/PtgbTBgLMMroZhSZmYmXTp2wC49gTr2Nrz2ylTWbNxMly5d9I52\nz5o3b87vfxzk00+WkZ+Xx0/LnyQwMFDvWOI2Th87ysNeLhyITuZiUhrv9KyHpUFxMz2X56e+xDNj\nx+rWzyxMS4ptJTV63ASeW/Qms/zrcDMjhxUXbrLr05F6xzKb3bt38/7CN8nLzaVmvQa4ZSfy3UPN\nUUrxUx0nXhg/lhPnLugd0ySaNm3K4g8+NOk58/LymDL5edauXo2trS1z5i9g+IgRJr1GZeTdtBnb\nfj7LA3Wc8HCwxrLoC7BbVSuUgoyMjFsLVIjyTYptJfX85MnY2tmx4j/fUtWhJj/+sgJfX1+9Y5nF\n77//ztBBA5gXWI+q9pZMXLOKZ1t63BoV2qKGA3EHruqcsmyb9vJL7P9xNbMCnUjOzmfKpPG4e3jQ\nrVs3vaOVazNen8UDe37ljSNhRCelcywmnabV7dh0KZkm3o3KTKHNzs7mu+++Iy4uji5dutC+fXu9\nI5U70mcrKrzRT4ygceQRxrapB8Cig6F8ciqKXwb74+lQhSl7r2Bs7Md/ZL3QW2JiYnh5ymTCQ68Q\n2OE+Nm3YwKTmltSvVgWAdecScA4awvsfLtE5afmXn5/PsWPHOHjwIO+9/RYxsXH4+7Xh+zXr8PT0\n1DseOTk5BHfsgDE+ijpVFfuuZ/Hehx8xYmTFvRN2L6TPVlRaymAgr+DPL35NqzvgWrM23dYdJzM7\nmwd79OCbz7/QMWHZkp6eTucOgbRxzKWnqzU7t3xPako2sRlOt4ptXLZGA2cXnZNWDJaWlgQEBBAQ\nEMDEMjggas2aNeTGRTHrvsJnhIPqZDNl8vNSbItJiq2o8J6dMJEHu2/AxkJR1cqS+UeiWPL5Fwwc\nOJCCgoJKswrO3QoJCaGqls2IFoWLOzR3s2PEpjA+Pp7MxYRcknM1LmZY8tWECTonFaUhKSmJ2lX/\nnFrU3dGa5NR0NE2TCTqKQZ6zFRVeu3bt2LJ9BydrtmRPVS8+XfkdgwYNQiklhfYfWFhYkGcs4L/d\nQAWahkEZWLtxM80Hj6PnmJc5euIUbm5u/3ImURF07dqVP66nc+pmBqk5+Xx1KokeXYOl0BaT9NmK\nMiMsLIyIiAiaNm1K7dq19Y5TaWVnZxPYtg3uxgSau1qx51oO3oFd+H71Wr2jCZ1s3bqVieOeISEp\nmS7BwXy18jucnZ31jlUmyXSNokx7Z+FbvPnGPOq52BORmMGKr75h0KBBeseqtJKSkpg3exbhoZfx\n79CRqS+/Is97CnEXpNiKMuvChQt0DGzHoq61cLWz4kpiNnNCYrkRG4+tra3e8YQQ4q7J3MiizAoL\nC6NhdXtc7QpbTo1cqmBrZcGNGzd0TiaEEKYhxVbormnTplyJSycqJQeAEzEZGDGUuzVlhRDiduTR\nH6G7Bg0a8P6Sj3huwjic7WzIzNdYu2ETNjY2ekcTQgiTkD5bUWakpKQQHR1NvXr1ZFk4IUS5JAOk\nhBBCCDOTAVJCCCGETqTYCiGEEGYmA6REhZWcnMzRo0dxdHSkXbt2Mr2cEEI30rIVFdLZs2dp0cSb\nWWOfZFifXgzu3xej0ah3rArDaDRy5MgRQkJCyMrK0juOECV29OhROga0o2FdT54a8ThpaWlmuY4M\nkBIVUkf/tgyplsuTLTzINRYwYMsZRr02n1GjRukdrdzLzs6m34MPEH7+LPY2VmRZ2LBrXwgeHh56\nRxOiWKKiomjTsgXDm1alkUsVNlxKw867LT/+9EuJzykDpESlEh4RQY96heutWlsYCKpZlSuXL+uc\nqmJ4f/FirGLCODjEj98GtaJ/LWsmTxivdyxhZklJScTFxVGRGlG7d++mZU1bujVwoq6TDeP9XNi2\nfSe5ubkmv5YUW1EhtfL1ZeW5GDRNIyk7jx+vpuDXtq3esSqEy+fPcr+HI5aGwo+PXvVcuXzpgs6p\nhLnk5+cz8vFheLrXomH9uvS6vzsZGRl6xzIJOzs7UnKMt75ApOQYsbC0wNLS9MOZpNiKCumzr1ey\nNUnRfOVBfL/ZT89HhvLwww/rHatCaOnXlo0RSWTlF35Irboci2/rNnrHEmbyweL3OL1vB1/2rc9X\nfeuRG3mGaVOn6B3LJPr06YPm4MbiwwlsOJ/AnJB4Zs58DYPB9KVR+mxFhWU0GomMjMTR0ZHq1avr\nHafCyM/PZ8TQIezcvg1baytq1anLzzt34+rqqnc0YQaPDuqPe8xhujVwAuBMbCabEqtx8NhJnZOZ\nRnp6OkuXLiXm+jWCu3a756U9ZQYpIYTJaJpGVFQUOTk5eHl5YWFhoXckYSavTH2J45u/YYKfC0op\nvj+bREGjDqxat0HvaGWSFFshhBDFlpKSQpdO95GTdBMbSwPJBdbs2/8Hnp6eekcrk6TYCiGEKJGc\nnBz27dtHfn4+HTt2xMHBQe9IZZYUWyGEEMLM5DlbIYTZyOxcorguXLjA9u3biY6O1jtKqZBiK4Qo\nsejoaDoG+mNjbU11ZyfWrFmjdyRRDsyZNZNOge14dexIWjRtwqZNm/SOZHZyG1kIUWIdA/3xzLrK\nkGbOhCdls+BAPHtCDtCyZUu9o4ky6uTJk9wf3Il3u9XCqYollxKymH8gnriEJKysrPSOd8/kNrIQ\nwqTy8/M5eOQYQ5s5Y2lQeLva4u/pwP79+/WOJsqw0NBQvGvY41SlcJamxq62GNCIj4/XOZl5SbEV\nQpSIhYUFTg72hCfnAGAs0LiakkvNmjVNcv6EhAQOHDjA9evXTXI+UTa0aNGCCzfTuJZa+Pfm4LU0\nbGxscXNz0zmZecltZCFEia1evZpxY0YR4OFAZGoe9Zu15seft93zJBdbt25lxGNDqeVoS3RyBvMX\nvMlzEyeZKLXQ2xdfrOD5ic/haGtDPhZs3voTgYGBescyCXn0R9xWSEgIH7yzkLzcXIY/PUbmEBbF\ncurUKfbv30/NmjXp16/fPRfarKws3Gu6Mb29K02q23IzPZdpe27yx9HjeHt7myi10FtqaiqxsbHU\nqVMHGxsbveOYzO2KremXNhDlysGDBxnQuxev+dfF3sqCyc+OJi8vj6FDh+odTZQTvr6++Pr6mux8\nMTExVLEy0KS6LQA17a3xqmHPlStXpNhWII6Ojjg6Ouodo9RIn20lt2L5Mia39uCplnV4xMedRR0b\nsGzxu3rHEpWYu7s7OfkaZ2MzAYhJyyUsPp3GjRvrnEyIkpOWbSWnaRp/vd+h1P/c/RCiVFWpUoUf\n1qxl2KODcbHLIDY1k0XvvU/Dhg31jiZEiUmxreSefnYcfR9YT1UrC+ytLZl9KJKFS5bpHUtUcj17\n9iQsMorQ0FA8PT0r/EhVUfHJACnB77//zgfvLCQ3J5sRo59l8ODBekcSQohySUYjCyGEEGYmM0gJ\nIYQQOpFiK4QwiQ0bNvDEY0MZ/8wYrly5onccIcoUKbZCiHv2xYoVTH5mFG1jT+N0eg+dAgOIiIjQ\nO5YQZYb02Qqhk9TUVGJiYqhbty62trZ6x7knzRt5sbhtDdq7OwPw6r5LVH9oOHPmzNU5mRClS/ps\nhShDVn7zDZ7utejRqT11PWqzd+9evSPdk7z8fOws/5ym0c7SQH5eXqlmOH/+PPcHd6Zx/bo8/uhg\nEhMTS/X6QtyJtGyFKGURERH4+bZgbmc36jrZcDwmg6UnU7kWc7PczhH7xry5rP10KXMC6hGTns1r\nByPZ+ds+WrVqVSrXT0hIwLdpE15sWZPOHs58eiaay1Xc2BNyQCZqEaVKWrZClBHnzp2jUQ176joV\nFtY2tatiiUZ0dLTOyUru1RmvMeL5qbxztYCNea5s2PJTqRVagP3799PMpSpjfOvg42rPoiBvTp06\nRUJCQqllEOJOZAYpIUqZl5cXofHpJGTa42pnRWhiNpl5+SZbB1YPBoOBF1+ayosvTdXl+nZ2dsRl\n5lCgaRiUIjknj9x8I1WqVNEljxD/nxRbIUqZj48Pr7w6gykL3qCea1UiEjL44qtvsLOz0ztauRUU\nFISTZ32G/3KO+2pWZXVYImPHjsXe3l7vaEIA0mcrhG6uXLlCREQETZs2xcPDQ+845V52djZLVLTb\nFgAABWRJREFUly7langYAR3u4/HHH5f+WlHqZLpGIYQQwsxkgJQQQgihEym2QgghhJlJsRVCCCHM\nTIqtEEIIYWZSbIUQQggzk2IrhBBCmJkUWyFEmZGWlsYff/wh6+GKCkeKrRCiTDhx4gQ+DRswfsgg\nOrbz44XnJiDP6ouKQia1EEKUCb4+TXiuvg1Dm7qTkpNHz42nePfzr+ndu7fe0YS4azKphRCiTLsY\nFka/RoWLMTjZWNHF3YkLFy7onEqUhtOnTxPcsT2N6tXhiceHkZqaqnckk5NiK4QoE5p5e7P20g0A\nErNz2X0tmebNm+ucSpjbzZs36R4cRMv8SF5oYUXMkV08MrCf3rFMTlb9EUKUCStXreahnj34+Fws\nN1MyGDN2LA888IDesYSZ7dmzh8auNvRsWA2AcW2sGbY+hIyMDKpWrapzOtORYiuEKBNatGjBhdBw\nLl26hKurK56ennpHEqXAzs6OlOx8NE1DKUVarhENsLa21juaSckAKSGEELrJycmhU/sAbNOi8XY0\nsCc6l6FPPcMbby7UO1qJyBJ7QgghyqSMjAyWLl1KVEQ4nYK7MGTIkHK7FrEUWyGEEMLM5NEfIYQQ\nQidSbIUQQggzk2IrhBBCmJkUWyGEEMLMpNgKIYQQZibFVgghhDAzKbZCCCGEmUmxFUIIIcxMiq0Q\nQghhZlJshRBCCDMz66o/5XVuSyGEEMKUzDY3shBCCCEKyW1kIYQQwsyk2AohhBBmJsVWCCGEMDMp\ntkKYiVJqhlLqjFLqpFLqmFLK38TnD1ZK/Xi3201wvf5KKZ+//PyrUsrP1NcRoiIy62hkISorpVR7\noDfQWtO0fKWUC2BthkvdboSjOUY+DgC2ABfMcG4hKjRp2QphHrWBeE3T8gE0TUvUNO0GgFLKTym1\nRyl1WCn1s1KqZtH2X5VS7yuljiulTiml2hVt91dK7VdKHVVK/a6U8r7bEEopO6XUCqXUH0XH9y3a\n/oRSal3R9S8qpRb+5Zini7b9oZT6VCm1RCnVAegHvF3USvcq2v1RpdRBpdQFpVRHU/zihKiIpNgK\nYR7bgbpFRegjpVQQgFLKElgCPKxpmj/wJbDgL8fZaprWBphQ9BrAeaCTpmltgVnAm8XIMQPYpWla\ne6AbsEgpZVv0WivgEcAXGKKU8lBK1QZeAwKAjoAPoGmadgDYDEzVNM1P07SwonNYaJoWCEwGZhcj\nlxCVitxGFsIMNE3LKOrP7ExhkftBKTUNOAq0AHaowllfDED0Xw79vuj4fUopB6WUI+AIfFPUotUo\n3r/bnkBfpdTUop+tgbpF/71L07R0AKXUWaAeUAPYo2laStH2NcCdWtLri/48WnS8EOIfSLEVwky0\nwhlj9gJ7lVKngZHAMeCMpmm3u+X6//taNWAesFvTtEFKqXrAr8WIoShsRV/+28bCPuWcv2wq4M/P\ng+JM/fbfcxiRzxMhbktuIwthBkqpxkqpRn/Z1BqIBC4CNYqKHUopS6VUs7/sN6RoeycgRdO0NMAJ\nuF70+lPFjLINmPSXXK3/Zf/DQJBSyqnolvfDf3ktjcJW9u3I/KxC3IYUWyHMwx74uujRnxNAU2C2\npml5wGBgYdH240CHvxyXrZQ6BnwMjCra9jbwllLqKMX/NzsPsCoacHUGmHub/TQATdOiKexDPgTs\nA8KBlKJ9fgCmFg208uKfW+FCiH8gcyMLUUYopX4FpmiadkznHFWL+pwtgA3ACk3TNumZSYjyTlq2\nQpQdZeWb72yl1HHgNBAmhVaIeyctWyGEEMLMpGUrhBBCmJkUWyGEEMLMpNgKIYQQZibFVgghhDAz\nKbZCCCGEmUmxFUIIIczs/wCB5ulx+0dRCQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_rca, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Manual Constraints\n", + "\n", + "Some of the algorithms we've mentioned have alternate ways to pass constraints.\n", + "So far we've been passing them as just class labels - and letting the internals of metric-learn deal with creating our constrints.\n", + "\n", + "We'll be looking at one other way to do this - which is to pass a Matrix X such that - (a,b,c,d) indices into X, such that $d(X[a],X[b]) < d(X[c],X[d])$. \n", + "\n", + "This kind of input is possible for ITML and LSML.\n", + "\n", + "We're going to create these constraints through the labels we have, i.e $Y$.\n", + "\n", + "This is done internally through metric learn anyway (do check out the `constraints` class!) - but we'll try our own version of this. I'm going to go ahead and assume that two points labelled the same will be closer than two points in different labels.\n", + "\n", + "Do keep in mind that we are doing this method because we know the labels - we can actually create the constraints any way we want to depending on the data!" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def create_constraints(labels):\n", + " import itertools\n", + " import random\n", + " \n", + " # aggregate indices of same class\n", + " zeros = np.where(Y==0)[0]\n", + " ones = np.where(Y==1)[0]\n", + " twos = np.where(Y==2)[0]\n", + " # make permutations of all those points in the same class\n", + " zeros_ = list(itertools.combinations(zeros, 2))\n", + " ones_ = list(itertools.combinations(ones, 2))\n", + " twos_ = list(itertools.combinations(twos, 2))\n", + " # put them together!\n", + " sim = np.array(zeros_ + ones_ + twos_)\n", + " \n", + " # similarily, put together indices in different classes\n", + " dis = []\n", + " for zero in zeros:\n", + " for one in ones:\n", + " dis.append((zero, one))\n", + " for two in twos:\n", + " dis.append((zero, two))\n", + " for one in ones:\n", + " for two in twos:\n", + " dis.append((one, two))\n", + " \n", + " # pick up just enough dissimilar examples as we have similar examples\n", + " dis = np.array(random.sample(dis, len(sim)))\n", + " \n", + " # return a four-tuple of arrays with d(X[a],X[b])" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot(X_itml, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And that's the result of ITML after being trained on our manual constraints! A bit different from our old result but not too different. We can also notice that it might be better to rely on the randomised algorithms under the hood to make our constraints if we are not very sure how we want our transformed space to be.\n", + "\n", + "RCA and SDML also have their own specific ways of taking in inputs - it's worth one's while to poke around in the constraints.py file to see how exactly this is going on.\n", + "\n", + "This brings us to the end of this tutorial!\n", + "Have fun Metric Learning :)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From f4b593cd662be6fdee8e9a1be2358061e9e324cf Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 14 Dec 2016 13:57:49 -0600 Subject: [PATCH 029/210] typo fix [ci skip] --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index df4ed8a6..f50781fe 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -63,7 +63,7 @@ is available, the LMNN implementation will use the fast C++ version from there. The two implementations differ slightly, and the C++ version is more complete. -Naviagtion +Navigation ---------- :ref:`genindex` | :ref:`modindex` | :ref:`search` From 2e6748e91131d4c6e026a3bbb3f23e009936045e Mon Sep 17 00:00:00 2001 From: ab-anssi Date: Wed, 21 Dec 2016 18:23:57 +0100 Subject: [PATCH 030/210] Nca (#44) * debug NCA: error if a feature has a null variance Make sure A is initialized with non zero values in the diagonal. * add dimension reduction feature to NCA * add test for NCA with dimension reduction --- metric_learn/nca.py | 13 +++++++++---- test/metric_learn_test.py | 9 ++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index c0616e2f..51a3dcfc 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -9,10 +9,12 @@ from .base_metric import BaseMetricLearner +EPS = np.finfo(float).eps class NCA(BaseMetricLearner): - def __init__(self, max_iter=100, learning_rate=0.01): + def __init__(self, num_dims=None, max_iter=100, learning_rate=0.01): self.params = { + 'num_dims': num_dims, 'max_iter': max_iter, 'learning_rate': learning_rate, } @@ -27,9 +29,12 @@ def fit(self, X, labels): labels: scalar labels, (n) """ n, d = X.shape + num_dims = self.params['num_dims'] + if num_dims is None: + num_dims = d # Initialize A to a scaling matrix - A = np.zeros((d, d)) - np.fill_diagonal(A, 1./(X.max(axis=0)-X.min(axis=0))) + A = np.zeros((num_dims, d)) + np.fill_diagonal(A, 1./(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))) # Run NCA dX = X[:,None] - X[None] # shape (n, n, d) @@ -39,7 +44,7 @@ def fit(self, X, labels): for it in xrange(self.params['max_iter']): for i, label in enumerate(labels): mask = masks[i] - Ax = A.dot(X.T).T # shape (n, d) + Ax = A.dot(X.T).T # shape (n, num_dims) softmax = np.exp(-((Ax[i] - Ax)**2).sum(axis=1)) # shape (n) softmax[i] = 0 diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 50006486..aa989bbf 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -86,9 +86,10 @@ def test_iris(self): class TestNCA(MetricTestCase): def test_iris(self): n = self.iris_points.shape[0] + + # Without dimension reduction nca = NCA(max_iter=(100000//n), learning_rate=0.01) nca.fit(self.iris_points, self.iris_labels) - # Result copied from Iris example at # https://github.com/vomjom/nca/blob/master/README.mkd expected = [[-0.09935, -0.2215, 0.3383, 0.443], @@ -97,6 +98,12 @@ def test_iris(self): [-0.9405, -0.8461, 2.281, 2.794]] assert_array_almost_equal(expected, nca.transformer(), decimal=3) + # With dimension reduction + nca = NCA(max_iter=(100000//n), learning_rate=0.01, num_dims=2) + nca.fit(self.iris_points, self.iris_labels) + csep = class_separation(nca.transform(), self.iris_labels) + self.assertLess(csep, 0.15) + class TestLFDA(MetricTestCase): def test_iris(self): From 11e95d62b03a25170199dd0788a79885ec301129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C5=A0vec?= Date: Wed, 11 Jan 2017 18:08:12 +0100 Subject: [PATCH 031/210] Added __repr__ to BaseMetricLearner (#47) * Added __repr__ to BaseMetricLearner * Added test for __repr__ --- metric_learn/base_metric.py | 58 +++++++++++++++++++++++++++++++++++++ test/test_base_metric.py | 20 +++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 test/test_base_metric.py diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 0978b17b..c8dff4aa 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,4 +1,6 @@ +import numpy as np from numpy.linalg import inv,cholesky +from six import iteritems class BaseMetricLearner(object): @@ -95,3 +97,59 @@ def set_params(self, **kwarg): """ self.params.update(kwarg) return self + + # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py#L287 + def __repr__(self): + class_name = self.__class__.__name__ + params = self.get_params(deep=False) + return '%s(%s)' % (class_name, _pprint(params, offset=len(class_name))) + +############################################################################### + +# __pprint taken from: +# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py#L124 +def _pprint(params, offset=0, printer=repr): + """Pretty print the dictionary 'params' + Parameters + ---------- + params : dict + The dictionary to pretty print + offset : int + The offset in characters to add at the begin of each line. + printer : callable + The function to convert entries to strings, typically + the builtin str or repr + """ + # Do a multi-line justified repr: + options = np.get_printoptions() + np.set_printoptions(precision=5, threshold=64, edgeitems=2) + params_list = list() + this_line_length = offset + line_sep = ',\n' + (1 + offset // 2) * ' ' + for i, (k, v) in enumerate(sorted(iteritems(params))): + if type(v) is float: + # use str for representing floating point numbers + # this way we get consistent representation across + # architectures and versions. + this_repr = '%s=%s' % (k, str(v)) + else: + # use repr of the rest + this_repr = '%s=%s' % (k, printer(v)) + if len(this_repr) > 500: + this_repr = this_repr[:300] + '...' + this_repr[-100:] + if i > 0: + if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): + params_list.append(line_sep) + this_line_length = len(line_sep) + else: + params_list.append(', ') + this_line_length += 2 + params_list.append(this_repr) + this_line_length += len(this_repr) + + np.set_printoptions(**options) + lines = ''.join(params_list) + # Strip trailing space to avoid nightmare in doctests + lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) + return lines +############################################################################### \ No newline at end of file diff --git a/test/test_base_metric.py b/test/test_base_metric.py new file mode 100644 index 00000000..b63f1e2d --- /dev/null +++ b/test/test_base_metric.py @@ -0,0 +1,20 @@ +import unittest +import numpy as np + +from metric_learn import ( + LMNN, NCA, LFDA, Covariance, MLKR, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + + +class TestBaseMetric(unittest.TestCase): + + def test_string_repr(self): + ''' + Test string representation of some of the learning methods. + ''' + self.assertEqual(str(Covariance()), "Covariance()") + self.assertEqual(str(NCA()), "NCA(learning_rate=0.01, max_iter=100, num_dims=None)") + self.assertEqual(str(LFDA()), "LFDA(dim=None, k=7, metric='weighted')") + +if __name__ == '__main__': + unittest.main() From 7ae6b11e70bdcf69cbfee7e286de95773977f80c Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 11 Jan 2017 12:22:30 -0600 Subject: [PATCH 032/210] rewriting _pprint a bit, adding more tests --- metric_learn/base_metric.py | 123 ++++++++++++++++++------------------ test/test_base_metric.py | 53 ++++++++++++---- 2 files changed, 103 insertions(+), 73 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index c8dff4aa..23b19564 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -47,16 +47,12 @@ def transform(self, X=None): X = self.X L = self.transformer() return X.dot(L.T) - + def fit_transform(self, *args, **kwargs): - """ - Function calls .fit() and returns the result of .transform() + """Calls .fit() then returns the result of .transform() + Essentially, it runs the relevant Metric Learning algorithm with .fit() and returns the metric-transformed input data. - - Paramters - --------- - Since all the parameters passed to fit_transform are passed on to fit(), the parameters to be passed must be noted from the corresponding Metric Learning algorithm's fit method. @@ -65,7 +61,6 @@ def fit_transform(self, *args, **kwargs): ------- transformed : (n x d) matrix Input data transformed to the metric space by :math:`XL^{\\top}` - """ self.fit(*args, **kwargs) return self.transform() @@ -97,59 +92,63 @@ def set_params(self, **kwarg): """ self.params.update(kwarg) return self - - # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py#L287 + + # Mimics sklearn's BaseEstimator.__repr__ def __repr__(self): class_name = self.__class__.__name__ - params = self.get_params(deep=False) - return '%s(%s)' % (class_name, _pprint(params, offset=len(class_name))) - -############################################################################### - -# __pprint taken from: -# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py#L124 -def _pprint(params, offset=0, printer=repr): - """Pretty print the dictionary 'params' - Parameters - ---------- - params : dict - The dictionary to pretty print - offset : int - The offset in characters to add at the begin of each line. - printer : callable - The function to convert entries to strings, typically - the builtin str or repr - """ - # Do a multi-line justified repr: - options = np.get_printoptions() - np.set_printoptions(precision=5, threshold=64, edgeitems=2) - params_list = list() - this_line_length = offset - line_sep = ',\n' + (1 + offset // 2) * ' ' - for i, (k, v) in enumerate(sorted(iteritems(params))): - if type(v) is float: - # use str for representing floating point numbers - # this way we get consistent representation across - # architectures and versions. - this_repr = '%s=%s' % (k, str(v)) - else: - # use repr of the rest - this_repr = '%s=%s' % (k, printer(v)) - if len(this_repr) > 500: - this_repr = this_repr[:300] + '...' + this_repr[-100:] - if i > 0: - if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): - params_list.append(line_sep) - this_line_length = len(line_sep) - else: - params_list.append(', ') - this_line_length += 2 - params_list.append(this_repr) - this_line_length += len(this_repr) - - np.set_printoptions(**options) - lines = ''.join(params_list) - # Strip trailing space to avoid nightmare in doctests - lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) - return lines -############################################################################### \ No newline at end of file + params = self.get_params() + offset = min(len(class_name) + 1, 40) + return '%s(%s)' % (class_name, _pprint(params, offset=offset)) + + +def _pprint(params, offset=0): + """Make a pretty-printable representation of a dictionary. + + Parameters + ---------- + params : dict + The dictionary to pretty print + offset : int + The offset in characters to add at the begin of each line. + """ + repr_chunks = [] + linewidth = 79 - offset + stored_printoptions = np.get_printoptions() + try: + np.set_printoptions(precision=5, threshold=64, edgeitems=2, + linewidth=linewidth) + for k, v in sorted(iteritems(params)): + # use str for representing floating point numbers + # this way we get consistent representation across + # architectures and versions. + if isinstance(v, float): + this_repr = '%s=%s' % (k, v) + else: + this_repr = '%s=%r' % (k, v) + + if len(this_repr) > 500: + this_repr = this_repr[:300] + '...' + this_repr[-100:] + repr_chunks.append(this_repr) + finally: + np.set_printoptions(**stored_printoptions) + + if not repr_chunks: + return '' + + params_list = [repr_chunks[0]] + this_line_length = offset + len(repr_chunks[0]) + line_sep = ',\n' + ' ' * offset + for this_repr in repr_chunks[1:]: + if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): + params_list.append(line_sep) + this_line_length = len(line_sep) + else: + params_list.append(', ') + this_line_length += 2 + params_list.append(this_repr) + this_line_length += len(this_repr) + + lines = ''.join(params_list) + # Strip trailing space to avoid nightmare in doctests + lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) + return lines diff --git a/test/test_base_metric.py b/test/test_base_metric.py index b63f1e2d..69a250c0 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,20 +1,51 @@ import unittest -import numpy as np - -from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) +import metric_learn class TestBaseMetric(unittest.TestCase): def test_string_repr(self): - ''' - Test string representation of some of the learning methods. - ''' - self.assertEqual(str(Covariance()), "Covariance()") - self.assertEqual(str(NCA()), "NCA(learning_rate=0.01, max_iter=100, num_dims=None)") - self.assertEqual(str(LFDA()), "LFDA(dim=None, k=7, metric='weighted')") + # we don't test LMNN here because it could be python_LMNN + + self.assertEqual(str(metric_learn.Covariance()), "Covariance()") + + self.assertEqual(str(metric_learn.NCA()), + "NCA(learning_rate=0.01, max_iter=100, num_dims=None)") + + self.assertEqual(str(metric_learn.LFDA()), + "LFDA(dim=None, k=7, metric='weighted')") + + self.assertEqual(str(metric_learn.ITML()), """ +ITML(convergence_threshold=0.001, gamma=1.0, max_iters=1000, verbose=False) +""".strip('\n')) + self.assertEqual(str(metric_learn.ITML_Supervised()), """ +ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, + max_iters=1000, num_constraints=None, num_labeled=inf, + verbose=False) +""".strip('\n')) + + self.assertEqual(str(metric_learn.LSML()), + "LSML(max_iter=1000, tol=0.001, verbose=False)") + self.assertEqual(str(metric_learn.LSML_Supervised()), """ +LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled=inf, + prior=None, tol=0.001, verbose=False, weights=None) +""".strip('\n')) + + self.assertEqual(str(metric_learn.SDML()), """ +SDML(balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose=False) +""".strip('\n')) + self.assertEqual(str(metric_learn.SDML_Supervised()), """ +SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, + sparsity_param=0.01, use_cov=True, verbose=False) +""".strip('\n')) + + self.assertEqual(str(metric_learn.RCA()), "RCA(dim=None)") + self.assertEqual(str(metric_learn.RCA_Supervised()), + "RCA_Supervised(chunk_size=2, dim=None, num_chunks=100)") + + self.assertEqual(str(metric_learn.MLKR()), """ +MLKR(A0=None, alpha=0.0001, epsilon=0.01, max_iter=1000, num_dims=None) +""".strip('\n')) if __name__ == '__main__': unittest.main() From c01ce1b1e22864ff534f1949ecda50f16fe00572 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 11 Jan 2017 12:31:17 -0600 Subject: [PATCH 033/210] misc pep8 fixes --- metric_learn/constraints.py | 15 ++++++++++----- metric_learn/itml.py | 9 ++++++--- metric_learn/lmnn.py | 2 +- metric_learn/lsml.py | 6 ++++-- metric_learn/nca.py | 1 + metric_learn/rca.py | 3 ++- metric_learn/sdml.py | 6 ++++-- 7 files changed, 28 insertions(+), 14 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 903c5148..afa16351 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -19,7 +19,8 @@ def __init__(self, partial_labels): self.known_labels = partial_labels[self.known_label_idx] def adjacency_matrix(self, num_constraints, random_state=np.random): - a, b, c, d = self.positive_negative_pairs(num_constraints, random_state=random_state) + a, b, c, d = self.positive_negative_pairs(num_constraints, + random_state=random_state) row = np.concatenate((a, c)) col = np.concatenate((b, d)) data = np.ones_like(row, dtype=int) @@ -28,15 +29,19 @@ def adjacency_matrix(self, num_constraints, random_state=np.random): # symmetrize return adj + adj.T - def positive_negative_pairs(self, num_constraints, same_length=False, random_state=np.random): - a, b = self._pairs(num_constraints, same_label=True, random_state=random_state) - c, d = self._pairs(num_constraints, same_label=False, random_state=random_state) + def positive_negative_pairs(self, num_constraints, same_length=False, + random_state=np.random): + a, b = self._pairs(num_constraints, same_label=True, + random_state=random_state) + c, d = self._pairs(num_constraints, same_label=False, + random_state=random_state) if same_length and len(a) != len(c): n = min(len(a), len(c)) return a[:n], b[:n], c[:n], d[:n] return a, b, c, d - def _pairs(self, num_constraints, same_label=True, max_iter=10, random_state=np.random): + def _pairs(self, num_constraints, same_label=True, max_iter=10, + random_state=np.random): num_labels = len(self.known_labels) ab = set() it = 0 diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 6a6fcf04..c9b594f8 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -182,6 +182,9 @@ def fit(self, X, labels, random_state=np.random): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - return ITML.fit(self, X, c.positive_negative_pairs(num_constraints, random_state=random_state), - bounds=self.params['bounds'], A0=self.params['A0']) + c = Constraints.random_subset(labels, self.params['num_labeled'], + random_state=random_state) + pos_neg = c.positive_negative_pairs(num_constraints, + random_state=random_state) + return ITML.fit(self, X, pos_neg, bounds=self.params['bounds'], + A0=self.params['A0']) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index e580f3ed..436ffb6b 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -64,7 +64,7 @@ def fit(self, X, labels): target_neighbors = self._select_targets() impostors = self._find_impostors(target_neighbors[:,-1]) if len(impostors) == 0: - # L has already been initialized to an identity matrix of requisite shape + # L has already been initialized to an identity matrix return # sum outer products diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 343c0b7f..93f3bafd 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -171,7 +171,9 @@ def fit(self, X, labels, random_state=np.random): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - pairs = c.positive_negative_pairs(num_constraints, same_length=True, random_state=random_state) + c = Constraints.random_subset(labels, self.params['num_labeled'], + random_state=random_state) + pairs = c.positive_negative_pairs(num_constraints, same_length=True, + random_state=random_state) return LSML.fit(self, X, pairs, weights=self.params['weights'], prior=self.params['prior']) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 51a3dcfc..9651617e 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -11,6 +11,7 @@ EPS = np.finfo(float).eps + class NCA(BaseMetricLearner): def __init__(self, num_dims=None, max_iter=100, learning_rate=0.01): self.params = { diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 420dac65..ddd73380 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -124,5 +124,6 @@ def fit(self, X, labels, random_state=np.random): random_state : a random.seed object to fix the random_state if needed. """ chunks = Constraints(labels).chunks(num_chunks=self.params['num_chunks'], - chunk_size=self.params['chunk_size'], random_state=random_state) + chunk_size=self.params['chunk_size'], + random_state=random_state) return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 852b00f3..774085d5 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -105,5 +105,7 @@ def fit(self, X, labels, random_state=np.random): num_classes = np.unique(labels) num_constraints = 20*(len(num_classes))**2 - c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - return SDML.fit(self, X, c.adjacency_matrix(num_constraints, random_state=random_state)) + c = Constraints.random_subset(labels, self.params['num_labeled'], + random_state=random_state) + adj = c.adjacency_matrix(num_constraints, random_state=random_state) + return SDML.fit(self, X, adj) From 8cef69efb71c8f26931f6e448057942ea2533676 Mon Sep 17 00:00:00 2001 From: Arik Poznanski Date: Tue, 31 Jan 2017 21:20:42 +0200 Subject: [PATCH 034/210] replaced A*(v*v)*A with (A*v)*(A*v), which improves the performance from O(n^3) to O(n^2), as described in the original ITML paper. (#48) note that A=A' in all stages of the algorithm since A is a Mahalanobis metrix --- metric_learn/itml.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index c9b594f8..f13018dc 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -101,7 +101,8 @@ def fit(self, X, constraints, bounds=None, A0=None): _lambda[i] -= alpha beta = alpha/(1 - alpha*wtw) pos_bhat[i] = 1./((1 / pos_bhat[i]) + (alpha / gamma)) - A += beta * A.dot(np.outer(v,v)).dot(A) + Av = A.dot(v) + A += beta * np.outer(Av, Av) # update negatives vv = self.X[c] - self.X[d] @@ -111,7 +112,8 @@ def fit(self, X, constraints, bounds=None, A0=None): _lambda[i+num_pos] -= alpha beta = -alpha/(1 + alpha*wtw) neg_bhat[i] = 1./((1 / neg_bhat[i]) - (alpha / gamma)) - A += beta * A.dot(np.outer(v,v)).dot(A) + Av = A.dot(v) + A += beta * np.outer(Av, Av) normsum = np.linalg.norm(_lambda) + np.linalg.norm(lambdaold) if normsum == 0: From 98bbd30244fbd8671592eb8090964473ed7a85f3 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 9 Feb 2017 11:57:30 -0500 Subject: [PATCH 035/210] Adding simple ASV benchmark suite --- bench/.gitignore | 4 ++ bench/asv.conf.json | 74 ++++++++++++++++++++++++++++++++++++ bench/benchmarks/__init__.py | 0 bench/benchmarks/iris.py | 38 ++++++++++++++++++ 4 files changed, 116 insertions(+) create mode 100644 bench/.gitignore create mode 100644 bench/asv.conf.json create mode 100644 bench/benchmarks/__init__.py create mode 100644 bench/benchmarks/iris.py diff --git a/bench/.gitignore b/bench/.gitignore new file mode 100644 index 00000000..824e23ac --- /dev/null +++ b/bench/.gitignore @@ -0,0 +1,4 @@ +results +env +metric-learn +html diff --git a/bench/asv.conf.json b/bench/asv.conf.json new file mode 100644 index 00000000..782d3ab2 --- /dev/null +++ b/bench/asv.conf.json @@ -0,0 +1,74 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "metric-learn", + + // The project's homepage + "project_url": "https://github.com/all-umass/metric-learn", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": ["master"], // for git + // "branches": ["tip"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // the base URL to show a commit for the project. + "show_commit_url": "http://github.com/all-umass/metric-learn/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["2.7", "3.3"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "numpy": ["1.12"], + "scipy": ["0.18"], + "scikit-learn": ["0.18"] + }, + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + // "env_dir": "env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + // "results_dir": "results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + // "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "wheel_cache_size": 4 +} diff --git a/bench/benchmarks/__init__.py b/bench/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py new file mode 100644 index 00000000..d0b76895 --- /dev/null +++ b/bench/benchmarks/iris.py @@ -0,0 +1,38 @@ +import numpy as np +from sklearn.datasets import load_iris + +import metric_learn + +CLASSES = { + 'Covariance': metric_learn.Covariance(), + 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), + 'LFDA': metric_learn.LFDA(k=2, dim=2), + 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), + 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), + 'MLKR': metric_learn.MLKR(), + 'NCA': metric_learn.NCA(max_iter=700, learning_rate=0.01, num_dims=2), + 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, + chunk_size=2), + 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), +} + +try: + from metric_learn.lmnn import python_LMNN + if python_LMNN is not metric_learn.LMNN: + CLASSES['python_LMNN'] = python_LMNN(k=5, learn_rate=1e-6, verbose=False) +except ImportError: + pass + + +class IrisDataset(object): + params = [sorted(CLASSES)] + param_names = ['alg'] + + def setup(self, alg): + iris_data = load_iris() + self.iris_points = iris_data['data'] + self.iris_labels = iris_data['target'] + + def time_fit(self, alg): + np.random.seed(5555) + CLASSES[alg].fit(self.iris_points, self.iris_labels) From b9ac998b561adbe9ebeeb3185a45e979305443aa Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 9 Feb 2017 11:58:08 -0500 Subject: [PATCH 036/210] Fixing docstrings for SDML --- metric_learn/sdml.py | 70 ++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 774085d5..be4e16f7 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -21,16 +21,18 @@ class SDML(BaseMetricLearner): def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose=False): - ''' - balance_param: float, optional + """ + Parameters + ---------- + balance_param : float, optional trade off between sparsity and M0 prior - sparsity_param: float, optional + sparsity_param : float, optional trade off between optimizer and sparseness (see graph_lasso) - use_cov: bool, optional + use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False verbose : bool, optional if True, prints information while learning - ''' + """ self.params = { 'balance_param': balance_param, 'sparsity_param': sparsity_param, @@ -52,11 +54,19 @@ def metric(self): return self.M def fit(self, X, W): - """ - X: data matrix, (n x d) - each row corresponds to a single instance - W: connectivity graph, (n x n) - +1 for positive pairs, -1 for negative. + """Learn the SDML model. + + Parameters + ---------- + X : array-like, shape (n, d) + data matrix, where each row corresponds to a single instance + W : array-like, shape (n, n) + connectivity graph, with +1 for positive pairs and -1 for negative + + Returns + ------- + self : object + Returns the instance. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix @@ -71,23 +81,25 @@ def fit(self, X, W): class SDML_Supervised(SDML): def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, num_labeled=np.inf, num_constraints=None, verbose=False): - SDML.__init__(self, balance_param=balance_param, - sparsity_param=sparsity_param, use_cov=use_cov, - verbose=verbose) - ''' - balance_param: float, optional + """ + Parameters + ---------- + balance_param : float, optional trade off between sparsity and M0 prior - sparsity_param: float, optional + sparsity_param : float, optional trade off between optimizer and sparseness (see graph_lasso) - use_cov: bool, optional + use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False num_labeled : int, optional number of labels to preserve for training - num_constraints: int, optional + num_constraints : int, optional number of constraints to generate verbose : bool, optional if True, prints information while learning - ''' + """ + SDML.__init__(self, balance_param=balance_param, + sparsity_param=sparsity_param, use_cov=use_cov, + verbose=verbose) self.params.update(num_labeled=num_labeled, num_constraints=num_constraints) def fit(self, X, labels, random_state=np.random): @@ -95,15 +107,23 @@ def fit(self, X, labels, random_state=np.random): Parameters ---------- - X: data matrix, (n x d) - each row corresponds to a single instance - labels: data labels, (n,) array-like - random_state : a numpy random.seed object to fix the random_state if needed. + X : array-like, shape (n, d) + data matrix, where each row corresponds to a single instance + labels : array-like, shape (n,) + data labels, one for each instance + random_state : {numpy.random.RandomState, int}, optional + Random number generator or random seed. If not given, the singleton + numpy.random will be used. + + Returns + ------- + self : object + Returns the instance. """ num_constraints = self.params['num_constraints'] if num_constraints is None: - num_classes = np.unique(labels) - num_constraints = 20*(len(num_classes))**2 + num_classes = len(np.unique(labels)) + num_constraints = 20 * num_classes**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) From c5c2c687db3575769417689b1ce7a198bf6b745b Mon Sep 17 00:00:00 2001 From: ab-anssi Date: Mon, 13 Feb 2017 15:16:05 +0100 Subject: [PATCH 037/210] Handle rank-deficient inner covariance in RCA (#50) * Add citation to JMLR 2005 Our RCA implementation is highly inspired by the matlab open source implementation provided by Tomer Hertz (http://www.scharp.org/thertz/code.html) and cited in this paper. * Apply PCA to get a full rank inner_cov matrix As advised in the paper JMLR 2005, the input data are first projected with a PCA if the inner_cov matrix is not full rank. * User warning if the embedding dimension is changed * new tests for RCA - data with a null variance features - dataset digits * debug travis errors * remove self.pca * rm _compute_inner_cov function * use warnings library * remove test on digits * address pull request remarks + pep8 fixes * add parameter 'pca_comps' * _process_inputs - code refactoring * update test_base_metric with pca_comps * address perimosocordiae comments * fix indent --- metric_learn/rca.py | 99 +++++++++++++++++++++++++++------------ test/metric_learn_test.py | 15 ++++++ test/test_base_metric.py | 4 +- 3 files changed, 85 insertions(+), 33 deletions(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index ddd73380..70a87ebb 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -8,47 +8,72 @@ subsets of points that are known to belong to the same class. 'Learning distance functions using equivalence relations', ICML 2003 +'Learning a Mahalanobis metric from equivalence constraints', JMLR 2005 """ from __future__ import absolute_import import numpy as np from six.moves import xrange +from sklearn import decomposition +import warnings from .base_metric import BaseMetricLearner from .constraints import Constraints +# mean center each chunklet separately +def _chunk_mean_centering(data, chunks, num_chunks): + chunk_mask = chunks != -1 + chunk_data = data[chunk_mask] + chunk_labels = chunks[chunk_mask] + for c in xrange(num_chunks): + mask = chunk_labels == c + chunk_data[mask] -= chunk_data[mask].mean(axis=0) + + return chunk_mask, chunk_data + + class RCA(BaseMetricLearner): """Relevant Components Analysis (RCA)""" - def __init__(self, dim=None): + def __init__(self, dim=None, pca_comps=None): """Initialize the learner. Parameters ---------- dim : int, optional embedding dimension (default: original dimension of data) + pca_comps : int, float, None or string + Number of components to keep during PCA preprocessing. + If None (default), does not perform PCA. + If ``0 < pca_comps < 1``, it is used as the minimum explained variance ratio. + See sklearn.decomposition.PCA for more details. """ - self.params = { - 'dim': dim, - } + self.params = {'dim': dim, 'pca_comps': pca_comps} def transformer(self): return self._transformer - def _process_inputs(self, X, Y): - X = np.asanyarray(X) - self.X = X - n, d = X.shape + def _process_data(self, data): + data = np.asanyarray(data) + self.X = data + n, d = data.shape + return data, d + def _process_chunks(self, data, chunks): + chunks = np.asanyarray(chunks) + num_chunks = chunks.max() + 1 + return _chunk_mean_centering(data, chunks, num_chunks) + + def _process_parameters(self, d): if self.params['dim'] is None: self.params['dim'] = d - elif not 0 < self.params['dim'] <= d: - raise ValueError('Invalid embedding dimension, must be in [1,%d]' % d) - - Y = np.asanyarray(Y) - num_chunks = Y.max() + 1 - - return X, Y, num_chunks, d + elif not self.params['dim'] > 0: + raise ValueError('Invalid embedding dimension, dim must be greater than 0.') + elif self.params['dim'] > d: + self.params['dim'] = d + warnings.warn('dim must be smaller than the data dimension. ' + + 'dim is set to %d.' % (d)) + return self.params['dim'] def fit(self, data, chunks): """Learn the RCA model. @@ -61,34 +86,46 @@ def fit(self, data, chunks): when ``chunks[i] == -1``, point i doesn't belong to any chunklet, when ``chunks[i] == j``, point i belongs to chunklet j. """ - data, chunks, num_chunks, d = self._process_inputs(data, chunks) - # mean center - data -= data.mean(axis=0) + data, d = self._process_data(data) - # mean center each chunklet separately - chunk_mask = chunks != -1 - chunk_data = data[chunk_mask] - chunk_labels = chunks[chunk_mask] - for c in xrange(num_chunks): - mask = chunk_labels == c - chunk_data[mask] -= chunk_data[mask].mean(axis=0) + # PCA projection to remove noise and redundant information. + M_pca = None + if self.params['pca_comps'] is not None: + pca = decomposition.PCA(n_components=self.params['pca_comps'], + svd_solver='full') + data = pca.fit_transform(data) + d = data.shape[1] + M_pca = pca.components_ + else: + data -= data.mean(axis=0) - # "inner" covariance of chunk deviations + chunk_mask, chunk_data = self._process_chunks(data, chunks) inner_cov = np.cov(chunk_data, rowvar=0, bias=1) + rank = np.linalg.matrix_rank(inner_cov) + + if rank < d: + warnings.warn('The inner covariance matrix is not invertible, ' + 'so the transformation matrix may contain Nan values. ' + 'You should adjust pca_comps to remove noise and ' + 'redundant information.') # Fisher Linear Discriminant projection - if self.params['dim'] < d: + dim = self._process_parameters(d) + if dim < d: total_cov = np.cov(data[chunk_mask], rowvar=0) tmp = np.linalg.lstsq(total_cov, inner_cov)[0] vals, vecs = np.linalg.eig(tmp) - inds = np.argsort(vals)[:self.params['dim']] - A = vecs[:,inds] + inds = np.argsort(vals)[:dim] + A = vecs[:, inds] inner_cov = A.T.dot(inner_cov).dot(A) self._transformer = _inv_sqrtm(inner_cov).dot(A.T) else: self._transformer = _inv_sqrtm(inner_cov).T + if M_pca is not None: + self._transformer = self._transformer.dot(M_pca) + return self @@ -99,7 +136,7 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): - def __init__(self, dim=None, num_chunks=100, chunk_size=2): + def __init__(self, dim=None, pca_comps=None, num_chunks=100, chunk_size=2): """Initialize the learner. Parameters @@ -109,7 +146,7 @@ def __init__(self, dim=None, num_chunks=100, chunk_size=2): num_chunks: int, optional chunk_size: int, optional """ - RCA.__init__(self, dim=dim) + RCA.__init__(self, dim=dim, pca_comps=pca_comps) self.params.update(num_chunks=num_chunks, chunk_size=chunk_size) def fit(self, X, labels, random_state=np.random): diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index aa989bbf..66bb75d8 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -120,6 +120,21 @@ def test_iris(self): csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) + def test_feature_null_variance(self): + X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M = 1))) + + # Apply PCA with the number of components + rca = RCA_Supervised(dim=2, pca_comps=3, num_chunks=30, chunk_size=2) + rca.fit(X, self.iris_labels) + csep = class_separation(rca.transform(), self.iris_labels) + self.assertLess(csep, 0.30) + + # Apply PCA with the minimum variance ratio + rca = RCA_Supervised(dim=2, pca_comps=0.95, num_chunks=30, chunk_size=2) + rca.fit(X, self.iris_labels) + csep = class_separation(rca.transform(), self.iris_labels) + self.assertLess(csep, 0.30) + class TestMLKR(MetricTestCase): def test_iris(self): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 69a250c0..ad242b86 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -39,9 +39,9 @@ def test_string_repr(self): sparsity_param=0.01, use_cov=True, verbose=False) """.strip('\n')) - self.assertEqual(str(metric_learn.RCA()), "RCA(dim=None)") + self.assertEqual(str(metric_learn.RCA()), "RCA(dim=None, pca_comps=None)") self.assertEqual(str(metric_learn.RCA_Supervised()), - "RCA_Supervised(chunk_size=2, dim=None, num_chunks=100)") + "RCA_Supervised(chunk_size=2, dim=None, num_chunks=100, pca_comps=None)") self.assertEqual(str(metric_learn.MLKR()), """ MLKR(A0=None, alpha=0.0001, epsilon=0.01, max_iter=1000, num_dims=None) From 20198e774c6e851eb8ec934b0d83144844b08e5a Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Sat, 18 Feb 2017 11:39:36 -0500 Subject: [PATCH 038/210] Adding Landscape.io config --- .landscape.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .landscape.yml diff --git a/.landscape.yml b/.landscape.yml new file mode 100644 index 00000000..ae342735 --- /dev/null +++ b/.landscape.yml @@ -0,0 +1,16 @@ +strictness: medium +pep8: + disable: + - E111 + - E114 + - E231 + - E225 + - E402 + - W503 +pylint: + disable: + - bad-indentation + - invalid-name + - too-many-arguments +ignore-paths: + - bench/ From 3e38fe09a87e6ef05289f3cbe6ffa03e2dc716e8 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 2 Mar 2017 16:05:20 -0500 Subject: [PATCH 039/210] Overhauling code to improve sklearn compat Fixes issue #34. Also changed some parameter names, so this commit breaks backwards compatibility! --- metric_learn/base_metric.py | 117 ++---------------------- metric_learn/constraints.py | 7 +- metric_learn/covariance.py | 13 +-- metric_learn/itml.py | 127 ++++++++++++++------------ metric_learn/lfda.py | 116 +++++++++++++++--------- metric_learn/lmnn.py | 167 +++++++++++++++++----------------- metric_learn/lsml.py | 123 +++++++++++++------------ metric_learn/mlkr.py | 173 +++++++++++++++++++----------------- metric_learn/nca.py | 29 +++--- metric_learn/rca.py | 134 ++++++++++++++-------------- metric_learn/sdml.py | 48 +++++----- test/metric_learn_test.py | 11 +-- test/test_base_metric.py | 56 +++++++----- test/test_fit_transform.py | 8 +- test/test_sklearn_compat.py | 65 ++++++++++++++ 15 files changed, 614 insertions(+), 580 deletions(-) create mode 100644 test/test_sklearn_compat.py diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 23b19564..abd2d0f7 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,9 +1,9 @@ -import numpy as np -from numpy.linalg import inv,cholesky -from six import iteritems +from numpy.linalg import inv, cholesky +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.validation import check_array -class BaseMetricLearner(object): +class BaseMetricLearner(BaseEstimator, TransformerMixin): def __init__(self): raise NotImplementedError('BaseMetricLearner should not be instantiated') @@ -44,111 +44,8 @@ def transform(self, X=None): Input data transformed to the metric space by :math:`XL^{\\top}` """ if X is None: - X = self.X + X = self.X_ + else: + X = check_array(X, accept_sparse=True) L = self.transformer() return X.dot(L.T) - - def fit_transform(self, *args, **kwargs): - """Calls .fit() then returns the result of .transform() - - Essentially, it runs the relevant Metric Learning algorithm with .fit() - and returns the metric-transformed input data. - Since all the parameters passed to fit_transform are passed on to - fit(), the parameters to be passed must be noted from the corresponding - Metric Learning algorithm's fit method. - - Returns - ------- - transformed : (n x d) matrix - Input data transformed to the metric space by :math:`XL^{\\top}` - """ - self.fit(*args, **kwargs) - return self.transform() - - def get_params(self, deep=False): - """Get parameters for this metric learner. - - Parameters - ---------- - deep: boolean, optional - @WARNING doesn't do anything, only exists because - scikit-learn has this on BaseEstimator. - - Returns - ------- - params : mapping of string to any - Parameter names mapped to their values. - """ - return self.params - - def set_params(self, **kwarg): - """Set the parameters of this metric learner. - - Overwrites any default parameters or parameters specified in constructor. - - Returns - ------- - self - """ - self.params.update(kwarg) - return self - - # Mimics sklearn's BaseEstimator.__repr__ - def __repr__(self): - class_name = self.__class__.__name__ - params = self.get_params() - offset = min(len(class_name) + 1, 40) - return '%s(%s)' % (class_name, _pprint(params, offset=offset)) - - -def _pprint(params, offset=0): - """Make a pretty-printable representation of a dictionary. - - Parameters - ---------- - params : dict - The dictionary to pretty print - offset : int - The offset in characters to add at the begin of each line. - """ - repr_chunks = [] - linewidth = 79 - offset - stored_printoptions = np.get_printoptions() - try: - np.set_printoptions(precision=5, threshold=64, edgeitems=2, - linewidth=linewidth) - for k, v in sorted(iteritems(params)): - # use str for representing floating point numbers - # this way we get consistent representation across - # architectures and versions. - if isinstance(v, float): - this_repr = '%s=%s' % (k, v) - else: - this_repr = '%s=%r' % (k, v) - - if len(this_repr) > 500: - this_repr = this_repr[:300] + '...' + this_repr[-100:] - repr_chunks.append(this_repr) - finally: - np.set_printoptions(**stored_printoptions) - - if not repr_chunks: - return '' - - params_list = [repr_chunks[0]] - this_line_length = offset + len(repr_chunks[0]) - line_sep = ',\n' + ' ' * offset - for this_repr in repr_chunks[1:]: - if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): - params_list.append(line_sep) - this_line_length = len(line_sep) - else: - params_list.append(', ') - this_line_length += 2 - params_list.append(this_repr) - this_line_length += len(this_repr) - - lines = ''.join(params_list) - # Strip trailing space to avoid nightmare in doctests - lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) - return lines diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index afa16351..8824450a 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -13,7 +13,7 @@ class Constraints(object): def __init__(self, partial_labels): '''partial_labels : int arraylike, -1 indicating unknown label''' - partial_labels = np.asanyarray(partial_labels) + partial_labels = np.asanyarray(partial_labels, dtype=int) self.num_points, = partial_labels.shape self.known_label_idx, = np.where(partial_labels >= 0) self.known_labels = partial_labels[self.known_label_idx] @@ -72,7 +72,10 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] idx = 0 while idx < num_chunks and all_inds: - c = random_state.randint(0, high=len(all_inds)-1) + if len(all_inds) == 1: + c = 0 + else: + c = random_state.randint(0, high=len(all_inds)-1) inds = all_inds[c] if len(inds) < chunk_size: del all_inds[c] diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 2142b337..0e230d43 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -10,22 +10,23 @@ from __future__ import absolute_import import numpy as np +from sklearn.utils.validation import check_array from .base_metric import BaseMetricLearner class Covariance(BaseMetricLearner): def __init__(self): - self.params = {} + pass def metric(self): - return self.M + return self.M_ def fit(self, X, y=None): """ - X: data matrix, (n x d) - y: unused, optional + X : data matrix, (n x d) + y : unused """ - self.X = X - self.M = np.cov(X.T) + self.X_ = check_array(X, ensure_min_samples=2) + self.M_ = np.cov(self.X_.T) return self diff --git a/metric_learn/itml.py b/metric_learn/itml.py index f13018dc..7f20cc3e 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -17,6 +17,7 @@ import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances +from sklearn.utils.validation import check_array from .base_metric import BaseMetricLearner from .constraints import Constraints @@ -24,48 +25,53 @@ class ITML(BaseMetricLearner): """Information Theoretic Metric Learning (ITML)""" - def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, - verbose=False): - """Initialize the learner. + def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, + A0=None, verbose=False): + """Initialize ITML. Parameters ---------- gamma : float, optional value for slack variables - max_iters : int, optional + + max_iter : int, optional + convergence_threshold : float, optional + + A0 : (d x d) matrix, optional + initial regularization matrix, defaults to identity + verbose : bool, optional if True, prints information while learning """ - self.params = { - 'gamma': gamma, - 'max_iters': max_iters, - 'convergence_threshold': convergence_threshold, - 'verbose': verbose, - } - - def _process_inputs(self, X, constraints, bounds, A0): - self.X = X + self.gamma = gamma + self.max_iter = max_iter + self.convergence_threshold = convergence_threshold + self.A0 = A0 + self.verbose = verbose + + def _process_inputs(self, X, constraints, bounds): + self.X_ = X = check_array(X) # check to make sure that no two constrained vectors are identical a,b,c,d = constraints - ident = _vector_norm(self.X[a] - self.X[b]) > 1e-9 + ident = _vector_norm(X[a] - X[b]) > 1e-9 a, b = a[ident], b[ident] - ident = _vector_norm(self.X[c] - self.X[d]) > 1e-9 + ident = _vector_norm(X[c] - X[d]) > 1e-9 c, d = c[ident], d[ident] # init bounds if bounds is None: - self.bounds = np.percentile(pairwise_distances(X), (5, 95)) + self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) else: assert len(bounds) == 2 - self.bounds = bounds + self.bounds_ = bounds # init metric - if A0 is None: - self.A = np.identity(X.shape[1]) + if self.A0 is None: + self.A_ = np.identity(X.shape[1]) else: - self.A = A0 + self.A_ = check_array(self.A0) return a,b,c,d - def fit(self, X, constraints, bounds=None, A0=None): + def fit(self, X, constraints, bounds=None): """Learn the ITML model. Parameters @@ -76,25 +82,21 @@ def fit(self, X, constraints, bounds=None, A0=None): (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) bounds : list (pos,neg) pairs, optional bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg - A0 : (d x d) matrix, optional - initial regularization matrix, defaults to identity """ - verbose = self.params['verbose'] - a,b,c,d = self._process_inputs(X, constraints, bounds, A0) - gamma = self.params['gamma'] - conv_thresh = self.params['convergence_threshold'] + a,b,c,d = self._process_inputs(X, constraints, bounds) + gamma = self.gamma num_pos = len(a) num_neg = len(c) _lambda = np.zeros(num_pos + num_neg) lambdaold = np.zeros_like(_lambda) gamma_proj = 1. if gamma is np.inf else gamma/(gamma+1.) - pos_bhat = np.zeros(num_pos) + self.bounds[0] - neg_bhat = np.zeros(num_neg) + self.bounds[1] - A = self.A + pos_bhat = np.zeros(num_pos) + self.bounds_[0] + neg_bhat = np.zeros(num_neg) + self.bounds_[1] + A = self.A_ - for it in xrange(self.params['max_iters']): + for it in xrange(self.max_iter): # update positives - vv = self.X[a] - self.X[b] + vv = self.X_[a] - self.X_[b] for i,v in enumerate(vv): wtw = v.dot(A).dot(v) # scalar alpha = min(_lambda[i], gamma_proj*(1./wtw - 1./pos_bhat[i])) @@ -105,10 +107,10 @@ def fit(self, X, constraints, bounds=None, A0=None): A += beta * np.outer(Av, Av) # update negatives - vv = self.X[c] - self.X[d] + vv = self.X_[c] - self.X_[d] for i,v in enumerate(vv): wtw = v.dot(A).dot(v) # scalar - alpha = min(_lambda[i+num_pos],gamma_proj*(1./neg_bhat[i] - 1./wtw)) + alpha = min(_lambda[i+num_pos], gamma_proj*(1./neg_bhat[i] - 1./wtw)) _lambda[i+num_pos] -= alpha beta = -alpha/(1 + alpha*wtw) neg_bhat[i] = 1./((1 / neg_bhat[i]) - (alpha / gamma)) @@ -120,17 +122,19 @@ def fit(self, X, constraints, bounds=None, A0=None): conv = np.inf break conv = np.abs(lambdaold - _lambda).sum() / normsum - if conv < conv_thresh: + if conv < self.convergence_threshold: break lambdaold = _lambda.copy() - if verbose: + if self.verbose: print('itml iter: %d, conv = %f' % (it, conv)) - if verbose: + + if self.verbose: print('itml converged at iter: %d, conv = %f' % (it, conv)) + self.n_iter_ = it return self def metric(self): - return self.A + return self.A_ # hack around lack of axis kwarg in older numpy versions try: @@ -145,7 +149,7 @@ def _vector_norm(X): class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" - def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, + def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, num_labeled=np.inf, num_constraints=None, bounds=None, A0=None, verbose=False): """Initialize the learner. @@ -154,39 +158,48 @@ def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3, ---------- gamma : float, optional value for slack variables - max_iters : int, optional + max_iter : int, optional convergence_threshold : float, optional num_labeled : int, optional number of labels to preserve for training num_constraints: int, optional number of constraints to generate + bounds : list (pos,neg) pairs, optional + bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg + A0 : (d x d) matrix, optional + initial regularization matrix, defaults to identity verbose : bool, optional if True, prints information while learning """ - ITML.__init__(self, gamma=gamma, max_iters=max_iters, - convergence_threshold=convergence_threshold, verbose=verbose) - self.params.update(num_labeled=num_labeled, num_constraints=num_constraints, - bounds=bounds, A0=A0) - - def fit(self, X, labels, random_state=np.random): + ITML.__init__(self, gamma=gamma, max_iter=max_iter, + convergence_threshold=convergence_threshold, + A0=A0, verbose=verbose) + self.num_labeled = num_labeled + self.num_constraints = num_constraints + self.bounds = bounds + + def fit(self, X, y, random_state=np.random): """Create constraints from labels and learn the ITML model. - Needs num_constraints specified in constructor. Parameters ---------- - X : (n x d) data matrix - each row corresponds to a single instance - labels : (n) data labels - random_state : a numpy random.seed object to fix the random_state if needed. + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + + y : (n) array-like + Data labels. + + random_state : numpy.random.RandomState, optional + If provided, controls random number generation. """ - num_constraints = self.params['num_constraints'] + y = check_array(y, ensure_2d=False) + num_constraints = self.num_constraints if num_constraints is None: - num_classes = np.unique(labels) - num_constraints = 20*(len(num_classes))**2 + num_classes = len(np.unique(y)) + num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(labels, self.params['num_labeled'], + c = Constraints.random_subset(y, self.num_labeled, random_state=random_state) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) - return ITML.fit(self, X, pos_neg, bounds=self.params['bounds'], - A0=self.params['A0']) + return ITML.fit(self, X, pos_neg, bounds=self.bounds) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 5e8c590a..d36273a7 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -13,8 +13,10 @@ from __future__ import division, absolute_import import numpy as np import scipy +import warnings from six.moves import xrange from sklearn.metrics import pairwise_distances +from sklearn.utils.validation import check_array from .base_metric import BaseMetricLearner @@ -24,61 +26,80 @@ class LFDA(BaseMetricLearner): Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction Sugiyama, ICML 2006 ''' - def __init__(self, dim=None, k=7, metric='weighted'): + def __init__(self, num_dims=None, k=None, metric='weighted'): ''' - dim : dimensionality of reduced space (defaults to dimension of X) - k : nearest neighbor used in local scaling method (default: 7) - metric : type of metric in the embedding space (default: 'weighted') - 'weighted' - weighted eigenvectors - 'orthonormalized' - orthonormalized - 'plain' - raw eigenvectors + Initialize LFDA. + + Parameters + ---------- + num_dims : int, optional + Dimensionality of reduced space (defaults to dimension of X) + + k : int, optional + Number of nearest neighbors used in local scaling method. + Defaults to min(7, num_dims - 1). + + metric : str, optional + Type of metric in the embedding space (default: 'weighted') + 'weighted' - weighted eigenvectors + 'orthonormalized' - orthonormalized + 'plain' - raw eigenvectors ''' if metric not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid metric: %r' % metric) - - self.params = { - 'dim': dim, - 'metric': metric, - 'k': k, - } + self.num_dims = num_dims + self.metric = metric + self.k = k def transformer(self): - return self._transformer + return self.transformer_ - def _process_inputs(self, X, Y): - X = np.asanyarray(X) - self.X = X - n, d = X.shape - unique_classes, Y = np.unique(Y, return_inverse=True) + def _process_inputs(self, X, y): + self.X_ = check_array(X) + n, d = self.X_.shape + unique_classes, y = np.unique(y, return_inverse=True) num_classes = len(unique_classes) - if self.params['dim'] is None: - self.params['dim'] = d - elif not 0 < self.params['dim'] <= d: - raise ValueError('Invalid embedding dimension, must be in [1,%d]' % d) + if self.num_dims is None: + dim = d + else: + if not 0 < self.num_dims <= d: + raise ValueError('Invalid num_dims, must be in [1,%d]' % d) + dim = self.num_dims + + if self.k is None: + k = min(7, d - 1) + elif self.k >= d: + warnings.warn('Chosen k (%d) too large, using %d instead.' % (self.k,d-1)) + k = d - 1 + else: + k = int(self.k) - if not 0 < self.params['k'] < d: - raise ValueError('Invalid k, must be in [0,%d]' % (d-1)) + return self.X_, y, num_classes, n, d, dim, k - return X, Y, num_classes, n, d + def fit(self, X, y): + '''Fit the LFDA model. - def fit(self, X, Y): - ''' - X: (n, d) array-like of samples - Y: (n,) array-like of class labels + Parameters + ---------- + X : (n, d) array-like + Input data. + + y : (n,) array-like + Class labels, one per point of data. ''' - X, Y, num_classes, n, d = self._process_inputs(X, Y) + X, y, num_classes, n, d, dim, k_ = self._process_inputs(X, y) tSb = np.zeros((d,d)) tSw = np.zeros((d,d)) for c in xrange(num_classes): - Xc = X[Y==c] + Xc = X[y==c] nc = Xc.shape[0] # classwise affinity matrix dist = pairwise_distances(Xc, metric='l2', squared=True) # distances to k-th nearest neighbor - k = min(self.params['k'], nc-1) + k = min(k_, nc-1) sigma = np.sqrt(np.partition(dist, k, axis=0)[:,k]) local_scale = np.outer(sigma, sigma) @@ -96,25 +117,32 @@ def fit(self, X, Y): tSb = (tSb + tSb.T) / 2 tSw = (tSw + tSw.T) / 2 - if self.params['dim'] == d: - vals, vecs = scipy.linalg.eigh(tSb, tSw) - else: - vals, vecs = scipy.sparse.linalg.eigsh(tSb, k=self.params['dim'], M=tSw, - which='LA') - - order = np.argsort(-vals)[:self.params['dim']] - vals = vals[order] + vals, vecs = _eigh(tSb, tSw, dim) + order = np.argsort(-vals)[:dim] + vals = vals[order].real vecs = vecs[:,order] - if self.params['metric'] == 'weighted': + if self.metric == 'weighted': vecs *= np.sqrt(vals) - elif self.params['metric'] == 'orthonormalized': + elif self.metric == 'orthonormalized': vecs, _ = np.linalg.qr(vecs) - self._transformer = vecs.T + self.transformer_ = vecs.T return self def _sum_outer(x): s = x.sum(axis=0) return np.outer(s, s) + + +def _eigh(a, b, dim): + try: + return scipy.sparse.linalg.eigsh(a, k=dim, M=b, which='LA') + except (ValueError, scipy.sparse.linalg.ArpackNoConvergence): + pass + try: + return scipy.linalg.eigh(a, b) + except np.linalg.LinAlgError: + pass + return scipy.linalg.eig(a, b) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 436ffb6b..3682f3f6 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -11,55 +11,66 @@ from __future__ import print_function, absolute_import import numpy as np +import warnings from collections import Counter from six.moves import xrange from sklearn.metrics import pairwise_distances +from sklearn.utils.validation import check_X_y, check_array from .base_metric import BaseMetricLearner # commonality between LMNN implementations class _base_LMNN(BaseMetricLearner): - def __init__(self, **kwargs): - self.params = kwargs + def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, + regularization=0.5, convergence_tol=0.001, use_pca=True, + verbose=False): + """Initialize the LMNN object. + + Parameters + ---------- + k : int, optional + Number of neighbors to consider, not including self-edges. + + regularization: float, optional + Weighting of pull and push terms, with 0.5 meaning equal weight. + """ + self.k = k + self.min_iter = min_iter + self.max_iter = max_iter + self.learn_rate = learn_rate + self.regularization = regularization + self.convergence_tol = convergence_tol + self.use_pca = use_pca + self.verbose = verbose def transformer(self): - return self.L + return self.L_ # slower Python version class python_LMNN(_base_LMNN): - def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, - regularization=0.5, convergence_tol=0.001, verbose=False): - """Initialize the LMNN object - - k: number of neighbors to consider. (does not include self-edges) - regularization: weighting of pull and push terms - """ - _base_LMNN.__init__(self, k=k, min_iter=min_iter, max_iter=max_iter, - learn_rate=learn_rate, regularization=regularization, - convergence_tol=convergence_tol, verbose=verbose) def _process_inputs(self, X, labels): - num_pts = X.shape[0] - assert len(labels) == num_pts - unique_labels, self.label_inds = np.unique(labels, return_inverse=True) - self.labels = np.arange(len(unique_labels)) - self.X = X - self.L = np.eye(X.shape[1]) - required_k = np.bincount(self.label_inds).min() - assert self.params['k'] <= required_k, ( - 'not enough class labels for specified k' - ' (smallest class has %d)' % required_k) - - def fit(self, X, labels): - k = self.params['k'] - verbose = self.params['verbose'] - reg = self.params['regularization'] - learn_rate = self.params['learn_rate'] - convergence_tol = self.params['convergence_tol'] - min_iter = self.params['min_iter'] - self._process_inputs(X, labels) + self.X_ = check_array(X, dtype=float) + num_pts, num_dims = self.X_.shape + unique_labels, self.label_inds_ = np.unique(labels, return_inverse=True) + if len(self.label_inds_) != num_pts: + raise ValueError('Must have one label per point.') + self.labels_ = np.arange(len(unique_labels)) + if self.use_pca: + warnings.warn('use_pca does nothing for the python_LMNN implementation') + self.L_ = np.eye(num_dims) + required_k = np.bincount(self.label_inds_).min() + if self.k > required_k: + raise ValueError('not enough class labels for specified k' + ' (smallest class has %d)' % required_k) + + def fit(self, X, y): + k = self.k + reg = self.regularization + learn_rate = self.learn_rate + self._process_inputs(X, y) target_neighbors = self._select_targets() impostors = self._find_impostors(target_neighbors[:,-1]) @@ -68,8 +79,8 @@ def fit(self, X, labels): return # sum outer products - dfG = _sum_outer_products(self.X, target_neighbors.flatten(), - np.repeat(np.arange(self.X.shape[0]), k)) + dfG = _sum_outer_products(self.X_, target_neighbors.flatten(), + np.repeat(np.arange(self.X_.shape[0]), k)) df = np.zeros_like(dfG) # storage @@ -81,17 +92,17 @@ def fit(self, X, labels): # initialize gradient and L G = dfG * reg + df * (1-reg) - L = self.L + L = self.L_ objective = np.inf # main loop - for it in xrange(1, self.params['max_iter']): + for it in xrange(1, self.max_iter): df_old = df.copy() a1_old = [a.copy() for a in a1] a2_old = [a.copy() for a in a2] objective_old = objective # Compute pairwise distances under current metric - Lx = L.dot(self.X.T).T + Lx = L.dot(self.X_.T).T g0 = _inplace_paired_L2(*Lx[impostors]) Ni = 1 + _inplace_paired_L2(Lx[target_neighbors], Lx[:,None,:]) g1,g2 = Ni[impostors] @@ -116,16 +127,16 @@ def fit(self, X, labels): targets = target_neighbors[:,nn_idx] PLUS, pweight = _count_edges(plus1, plus2, impostors, targets) - df += _sum_outer_products(self.X, PLUS[:,0], PLUS[:,1], pweight) + df += _sum_outer_products(self.X_, PLUS[:,0], PLUS[:,1], pweight) MINUS, mweight = _count_edges(minus1, minus2, impostors, targets) - df -= _sum_outer_products(self.X, MINUS[:,0], MINUS[:,1], mweight) + df -= _sum_outer_products(self.X_, MINUS[:,0], MINUS[:,1], mweight) in_imp, out_imp = impostors - df += _sum_outer_products(self.X, in_imp[minus1], out_imp[minus1]) - df += _sum_outer_products(self.X, in_imp[minus2], out_imp[minus2]) + df += _sum_outer_products(self.X_, in_imp[minus1], out_imp[minus1]) + df += _sum_outer_products(self.X_, in_imp[minus2], out_imp[minus2]) - df -= _sum_outer_products(self.X, in_imp[plus1], out_imp[plus1]) - df -= _sum_outer_products(self.X, in_imp[plus2], out_imp[plus2]) + df -= _sum_outer_products(self.X_, in_imp[plus1], out_imp[plus1]) + df -= _sum_outer_products(self.X_, in_imp[plus2], out_imp[plus2]) a1[nn_idx] = act1 a2[nn_idx] = act2 @@ -140,7 +151,7 @@ def fit(self, X, labels): assert not np.isnan(objective) delta_obj = objective - objective_old - if verbose: + if self.verbose: print(it, objective, delta_obj, total_active, learn_rate) # update step size @@ -157,34 +168,26 @@ def fit(self, X, labels): learn_rate *= 1.01 # check for convergence - if it > min_iter and abs(delta_obj) < convergence_tol: - if verbose: + if it > self.min_iter and abs(delta_obj) < self.convergence_tol: + if self.verbose: print("LMNN converged with objective", objective) break else: - if verbose: - print("LMNN didn't converge in %(max_iter)d steps." % self.params) + if self.verbose: + print("LMNN didn't converge in %d steps." % self.max_iter) # store the last L - self.L = L + self.L_ = L + self.n_iter_ = it return self - def metric(self): - return self.L.T.dot(self.L) - - def transform(self, X=None): - if X is None: - X = self.X - return self.L.dot(X.T).T - def _select_targets(self): - k = self.params['k'] - target_neighbors = np.empty((self.X.shape[0], k), dtype=int) - for label in self.labels: - inds, = np.nonzero(self.label_inds == label) - dd = pairwise_distances(self.X[inds]) + target_neighbors = np.empty((self.X_.shape[0], self.k), dtype=int) + for label in self.labels_: + inds, = np.nonzero(self.label_inds_ == label) + dd = pairwise_distances(self.X_[inds]) np.fill_diagonal(dd, np.inf) - nn = np.argsort(dd)[...,:k] + nn = np.argsort(dd)[..., :self.k] target_neighbors[inds] = inds[nn] return target_neighbors @@ -192,9 +195,9 @@ def _find_impostors(self, furthest_neighbors): Lx = self.transform() margin_radii = 1 + _inplace_paired_L2(Lx[furthest_neighbors], Lx) impostors = [] - for label in self.labels[:-1]: - in_inds, = np.nonzero(self.label_inds == label) - out_inds, = np.nonzero(self.label_inds > label) + for label in self.labels_[:-1]: + in_inds, = np.nonzero(self.label_inds_ == label) + out_inds, = np.nonzero(self.label_inds_ > label) dist = pairwise_distances(Lx[out_inds], Lx[in_inds]) i1,j1 = np.nonzero(dist < margin_radii[out_inds][:,None]) i2,j2 = np.nonzero(dist < margin_radii[in_inds]) @@ -243,28 +246,20 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): from modshogun import RealFeatures, MulticlassLabels class LMNN(_base_LMNN): - def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, - regularization=0.5, convergence_tol=0.001, use_pca=True, - verbose=False): - _base_LMNN.__init__(self, k=k, min_iter=min_iter, max_iter=max_iter, - learn_rate=learn_rate, regularization=regularization, - convergence_tol=convergence_tol, use_pca=use_pca, - verbose=verbose) - - def fit(self, X, labels): - self.X = X - self.L = np.eye(X.shape[1]) - labels = MulticlassLabels(labels.astype(np.float64)) - self._lmnn = shogun_LMNN(RealFeatures(X.T), labels, self.params['k']) - self._lmnn.set_maxiter(self.params['max_iter']) - self._lmnn.set_obj_threshold(self.params['convergence_tol']) - self._lmnn.set_regularization(self.params['regularization']) - self._lmnn.set_stepsize(self.params['learn_rate']) - if self.params['use_pca']: + + def fit(self, X, y): + self.X_, y = check_X_y(X, y, dtype=float) + labels = MulticlassLabels(y) + self._lmnn = shogun_LMNN(RealFeatures(self.X_.T), labels, self.k) + self._lmnn.set_maxiter(self.max_iter) + self._lmnn.set_obj_threshold(self.convergence_tol) + self._lmnn.set_regularization(self.regularization) + self._lmnn.set_stepsize(self.learn_rate) + if self.use_pca: self._lmnn.train() else: - self._lmnn.train(self.L) - self.L = self._lmnn.get_linear_transform() + self._lmnn.train(np.eye(X.shape[1])) + self.L_ = self._lmnn.get_linear_transform() return self except ImportError: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 93f3bafd..049cf901 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -11,48 +11,51 @@ import numpy as np import scipy.linalg from six.moves import xrange +from sklearn.utils.validation import check_array from .base_metric import BaseMetricLearner from .constraints import Constraints class LSML(BaseMetricLearner): - def __init__(self, tol=1e-3, max_iter=1000, verbose=False): - """Initialize the learner. + def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False): + """Initialize LSML. Parameters ---------- tol : float, optional max_iter : int, optional + prior : (d x d) matrix, optional + guess at a metric [default: covariance(X)] verbose : bool, optional if True, prints information while learning """ - self.params = { - 'tol': tol, - 'max_iter': max_iter, - 'verbose': verbose, - } - - def _prepare_inputs(self, X, constraints, weights, prior): - self.X = X + self.prior = prior + self.tol = tol + self.max_iter = max_iter + self.verbose = verbose + + def _prepare_inputs(self, X, constraints, weights): + self.X_ = X = check_array(X) a,b,c,d = constraints - self.vab = X[a] - X[b] - self.vcd = X[c] - X[d] - assert self.vab.shape == self.vcd.shape, 'Constraints must have same length' + self.vab_ = X[a] - X[b] + self.vcd_ = X[c] - X[d] + if self.vab_.shape != self.vcd_.shape: + raise ValueError('Constraints must have same length') if weights is None: - self.w = np.ones(self.vab.shape[0]) + self.w_ = np.ones(self.vab_.shape[0]) else: - self.w = weights - self.w /= self.w.sum() # weights must sum to 1 - if prior is None: - self.M = np.cov(X.T) + self.w_ = weights + self.w_ /= self.w_.sum() # weights must sum to 1 + if self.prior is None: + self.M_ = np.cov(X.T) else: - self.M = prior + self.M_ = self.prior def metric(self): - return self.M + return self.M_ - def fit(self, X, constraints, weights=None, prior=None): + def fit(self, X, constraints, weights=None): """Learn the LSML model. Parameters @@ -63,28 +66,24 @@ def fit(self, X, constraints, weights=None, prior=None): (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) weights : (m,) array of floats, optional scale factor for each constraint - prior : (d x d) matrix, optional - guess at a metric [default: covariance(X)] """ - verbose = self.params['verbose'] - self._prepare_inputs(X, constraints, weights, prior) - prior_inv = scipy.linalg.inv(self.M) - s_best = self._total_loss(self.M, prior_inv) + self._prepare_inputs(X, constraints, weights) + prior_inv = scipy.linalg.inv(self.M_) + s_best = self._total_loss(self.M_, prior_inv) step_sizes = np.logspace(-10, 0, 10) - if verbose: + if self.verbose: print('initial loss', s_best) - tol = self.params['tol'] - for it in xrange(1, self.params['max_iter']+1): - grad = self._gradient(self.M, prior_inv) + for it in xrange(1, self.max_iter+1): + grad = self._gradient(self.M_, prior_inv) grad_norm = scipy.linalg.norm(grad) - if grad_norm < tol: + if grad_norm < self.tol: break - if verbose: + if self.verbose: print('gradient norm', grad_norm) M_best = None for step_size in step_sizes: step_size /= grad_norm - new_metric = self.M - step_size * grad + new_metric = self.M_ - step_size * grad w, v = scipy.linalg.eigh(new_metric) new_metric = v.dot((np.maximum(w, 1e-8) * v).T) cur_s = self._total_loss(new_metric, prior_inv) @@ -92,21 +91,22 @@ def fit(self, X, constraints, weights=None, prior=None): l_best = step_size s_best = cur_s M_best = new_metric - if verbose: + if self.verbose: print('iter', it, 'cost', s_best, 'best step', l_best * grad_norm) if M_best is None: break - self.M = M_best + self.M_ = M_best else: - if verbose: + if self.verbose: print("Didn't converge after", it, "iterations. Final loss:", s_best) + self.n_iter_ = it return self def _comparison_loss(self, metric): - dab = np.sum(self.vab.dot(metric) * self.vab, axis=1) - dcd = np.sum(self.vcd.dot(metric) * self.vcd, axis=1) + dab = np.sum(self.vab_.dot(metric) * self.vab_, axis=1) + dcd = np.sum(self.vcd_.dot(metric) * self.vcd_, axis=1) violations = dab > dcd - return self.w[violations].dot((np.sqrt(dab[violations]) - + return self.w_[violations].dot((np.sqrt(dab[violations]) - np.sqrt(dcd[violations]))**2) def _total_loss(self, metric, prior_inv): @@ -115,12 +115,12 @@ def _total_loss(self, metric, prior_inv): def _gradient(self, metric, prior_inv): dMetric = prior_inv - scipy.linalg.inv(metric) - dabs = np.sum(self.vab.dot(metric) * self.vab, axis=1) - dcds = np.sum(self.vcd.dot(metric) * self.vcd, axis=1) + dabs = np.sum(self.vab_.dot(metric) * self.vab_, axis=1) + dcds = np.sum(self.vcd_.dot(metric) * self.vcd_, axis=1) violations = dabs > dcds # TODO: vectorize - for vab, dab, vcd, dcd in zip(self.vab[violations], dabs[violations], - self.vcd[violations], dcds[violations]): + for vab, dab, vcd, dcd in zip(self.vab_[violations], dabs[violations], + self.vcd_[violations], dcds[violations]): dMetric += ((1-np.sqrt(dcd/dab))*np.outer(vab, vab) + (1-np.sqrt(dab/dcd))*np.outer(vcd, vcd)) return dMetric @@ -151,29 +151,34 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, verbose : bool, optional if True, prints information while learning """ - LSML.__init__(self, tol=tol, max_iter=max_iter, verbose=verbose) - self.params.update(prior=prior, num_labeled=num_labeled, - num_constraints=num_constraints, weights=weights) + LSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, + verbose=verbose) + self.num_labeled = num_labeled + self.num_constraints = num_constraints + self.weights = weights - def fit(self, X, labels, random_state=np.random): + def fit(self, X, y, random_state=np.random): """Create constraints from labels and learn the LSML model. - Needs num_constraints specified in constructor. Parameters ---------- - X : (n x d) data matrix - each row corresponds to a single instance - labels : (n) data labels - random_state : a numpy random.seed object to fix the random_state if needed. + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + + y : (n) array-like + Data labels. + + random_state : numpy.random.RandomState, optional + If provided, controls random number generation. """ - num_constraints = self.params['num_constraints'] + y = check_array(y, ensure_2d=False) + num_constraints = self.num_constraints if num_constraints is None: - num_classes = np.unique(labels) - num_constraints = 20*(len(num_classes))**2 + num_classes = len(np.unique(y)) + num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(labels, self.params['num_labeled'], + c = Constraints.random_subset(y, self.num_labeled, random_state=random_state) pairs = c.positive_negative_pairs(num_constraints, same_length=True, random_state=random_state) - return LSML.fit(self, X, pairs, weights=self.params['weights'], - prior=self.params['prior']) + return LSML.fit(self, X, pairs, weights=self.weights) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 13a88e23..d0007685 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -11,6 +11,7 @@ from scipy.optimize import minimize from scipy.spatial.distance import pdist, squareform from sklearn.decomposition import PCA +from sklearn.utils.validation import check_X_y from .base_metric import BaseMetricLearner @@ -18,90 +19,94 @@ class MLKR(BaseMetricLearner): - """Metric Learning for Kernel Regression (MLKR)""" - def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001, - max_iter=1000): - """ - MLKR initialization - - Parameters - ---------- - num_dims: If given, restrict to a num_dims-dimensional transformation. - A0: Initialization of transformation matrix. Defaults to PCA loadings. - epsilon: Step size for congujate gradient descent. - alpha: Stopping criterion for congujate gradient descent. - max_iter: Cap on number of congugate gradient iterations. - """ - self.params = { - "A0": A0, - "epsilon": epsilon, - "alpha": alpha, - "max_iter": max_iter, - "num_dims": num_dims, - } - - def _process_inputs(self, X, y): - self.X = np.array(X, copy=False) - y = np.array(y, copy=False).ravel() - if X.ndim == 1: - X = X[:, np.newaxis] - n, d = X.shape - if y.shape[0] != n: - raise ValueError('Data and label lengths mismatch: %d != %d' - % (n, y.shape[0])) - - A = self.params['A0'] - m = self.params['num_dims'] - if m is None: - m = d - if A is None: - # initialize to PCA transformation matrix - # note: not the same as n_components=m ! - A = PCA().fit(X).components_.T[:m] - elif A.shape != (m, d): - raise ValueError('A0 needs shape (%d,%d) but got %s' % ( - m, d, A.shape)) - return y, A - - def fit(self, X, y): - """ - Fit MLKR model - - Parameters: - ---------- - X : (n x d) array of samples - y : (n) data labels - """ - y, A = self._process_inputs(X, y) - - # note: this line takes (n*n*d) memory! - # for larger datasets, we'll need to compute dX as we go - dX = (X[None] - X[:, None]).reshape((-1, X.shape[1])) - - res = minimize(_loss, A.ravel(), (X, y, dX), method='CG', jac=True, - tol=self.params['alpha'], - options=dict(maxiter=self.params['max_iter'], - eps=self.params['epsilon'])) - self._transformer = res.x.reshape(A.shape) - return self - - def transformer(self): - return self._transformer + """Metric Learning for Kernel Regression (MLKR)""" + def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001, + max_iter=1000): + """ + Initialize MLKR. + + Parameters + ---------- + num_dims : int, optional + Dimensionality of reduced space (defaults to dimension of X) + + A0: array-like, optional + Initialization of transformation matrix. Defaults to PCA loadings. + + epsilon: float, optional + Step size for congujate gradient descent. + + alpha: float, optional + Stopping criterion for congujate gradient descent. + + max_iter: int, optional + Cap on number of congugate gradient iterations. + """ + self.num_dims = num_dims + self.A0 = A0 + self.epsilon = epsilon + self.alpha = alpha + self.max_iter = max_iter + + def _process_inputs(self, X, y): + self.X_, y = check_X_y(X, y) + n, d = self.X_.shape + if y.shape[0] != n: + raise ValueError('Data and label lengths mismatch: %d != %d' + % (n, y.shape[0])) + + A = self.A0 + m = self.num_dims + if m is None: + m = d + if A is None: + # initialize to PCA transformation matrix + # note: not the same as n_components=m ! + A = PCA().fit(X).components_.T[:m] + elif A.shape != (m, d): + raise ValueError('A0 needs shape (%d,%d) but got %s' % ( + m, d, A.shape)) + return self.X_, y, A + + def fit(self, X, y): + """ + Fit MLKR model + + Parameters: + ---------- + X : (n x d) array of samples + y : (n) data labels + """ + X, y, A = self._process_inputs(X, y) + + # note: this line takes (n*n*d) memory! + # for larger datasets, we'll need to compute dX as we go + dX = (X[None] - X[:, None]).reshape((-1, X.shape[1])) + + res = minimize(_loss, A.ravel(), (X, y, dX), method='CG', jac=True, + tol=self.alpha, + options=dict(maxiter=self.max_iter, eps=self.epsilon)) + self.transformer_ = res.x.reshape(A.shape) + self.n_iter_ = res.nit + return self + + def transformer(self): + return self.transformer_ def _loss(flatA, X, y, dX): - A = flatA.reshape((-1, X.shape[1])) - dist = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) - K = squareform(np.exp(-dist**2)) - denom = np.maximum(K.sum(axis=0), EPS) - yhat = K.dot(y) / denom - ydiff = yhat - y - cost = (ydiff**2).sum() - - # also compute the gradient - np.fill_diagonal(K, 1) - W = 2 * K * (np.outer(ydiff, ydiff) / denom) - # note: this is the part that the matlab impl drops to C for - M = (dX.T * W.ravel()).dot(dX) - grad = 2 * A.dot(M) - return cost, grad.ravel() + A = flatA.reshape((-1, X.shape[1])) + dist = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) + K = squareform(np.exp(-dist**2)) + denom = np.maximum(K.sum(axis=0), EPS) + yhat = K.dot(y) / denom + ydiff = yhat - y + cost = (ydiff**2).sum() + + # also compute the gradient + np.fill_diagonal(K, 1) + W = 2 * K * (np.outer(ydiff, ydiff) / denom) + # note: this is the part that the matlab impl drops to C for + M = (dX.T * W.ravel()).dot(dX) + grad = 2 * A.dot(M) + return cost, grad.ravel() diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 9651617e..40757d23 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -6,6 +6,7 @@ from __future__ import absolute_import import numpy as np from six.moves import xrange +from sklearn.utils.validation import check_X_y from .base_metric import BaseMetricLearner @@ -14,23 +15,21 @@ class NCA(BaseMetricLearner): def __init__(self, num_dims=None, max_iter=100, learning_rate=0.01): - self.params = { - 'num_dims': num_dims, - 'max_iter': max_iter, - 'learning_rate': learning_rate, - } - self.A = None + self.num_dims = num_dims + self.max_iter = max_iter + self.learning_rate = learning_rate def transformer(self): - return self.A + return self.A_ - def fit(self, X, labels): + def fit(self, X, y): """ X: data matrix, (n x d) - labels: scalar labels, (n) + y: scalar labels, (n) """ + X, labels = check_X_y(X, y) n, d = X.shape - num_dims = self.params['num_dims'] + num_dims = self.num_dims if num_dims is None: num_dims = d # Initialize A to a scaling matrix @@ -41,8 +40,7 @@ def fit(self, X, labels): dX = X[:,None] - X[None] # shape (n, n, d) tmp = np.einsum('...i,...j->...ij', dX, dX) # shape (n, n, d, d) masks = labels[:,None] == labels[None] - learning_rate = self.params['learning_rate'] - for it in xrange(self.params['max_iter']): + for it in xrange(self.max_iter): for i, label in enumerate(labels): mask = masks[i] Ax = A.dot(X.T).T # shape (n, num_dims) @@ -53,8 +51,9 @@ def fit(self, X, labels): t = softmax[:, None, None] * tmp[i] # shape (n, d, d) d = softmax[mask].sum() * t.sum(axis=0) - t[mask].sum(axis=0) - A += learning_rate * A.dot(d) + A += self.learning_rate * A.dot(d) - self.X = X - self.A = A + self.X_ = X + self.A_ = A + self.n_iter_ = it return self diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 70a87ebb..78964ba0 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -13,16 +13,18 @@ from __future__ import absolute_import import numpy as np +import warnings from six.moves import xrange from sklearn import decomposition -import warnings +from sklearn.utils.validation import check_array from .base_metric import BaseMetricLearner from .constraints import Constraints # mean center each chunklet separately -def _chunk_mean_centering(data, chunks, num_chunks): +def _chunk_mean_centering(data, chunks): + num_chunks = chunks.max() + 1 chunk_mask = chunks != -1 chunk_data = data[chunk_mask] chunk_labels = chunks[chunk_mask] @@ -35,96 +37,94 @@ def _chunk_mean_centering(data, chunks, num_chunks): class RCA(BaseMetricLearner): """Relevant Components Analysis (RCA)""" - def __init__(self, dim=None, pca_comps=None): + def __init__(self, num_dims=None, pca_comps=None): """Initialize the learner. Parameters ---------- - dim : int, optional + num_dims : int, optional embedding dimension (default: original dimension of data) + pca_comps : int, float, None or string Number of components to keep during PCA preprocessing. If None (default), does not perform PCA. - If ``0 < pca_comps < 1``, it is used as the minimum explained variance ratio. + If ``0 < pca_comps < 1``, it is used as + the minimum explained variance ratio. See sklearn.decomposition.PCA for more details. """ - self.params = {'dim': dim, 'pca_comps': pca_comps} + self.num_dims = num_dims + self.pca_comps = pca_comps def transformer(self): - return self._transformer - - def _process_data(self, data): - data = np.asanyarray(data) - self.X = data - n, d = data.shape - return data, d - - def _process_chunks(self, data, chunks): - chunks = np.asanyarray(chunks) - num_chunks = chunks.max() + 1 - return _chunk_mean_centering(data, chunks, num_chunks) - - def _process_parameters(self, d): - if self.params['dim'] is None: - self.params['dim'] = d - elif not self.params['dim'] > 0: - raise ValueError('Invalid embedding dimension, dim must be greater than 0.') - elif self.params['dim'] > d: - self.params['dim'] = d - warnings.warn('dim must be smaller than the data dimension. ' + - 'dim is set to %d.' % (d)) - return self.params['dim'] - - def fit(self, data, chunks): - """Learn the RCA model. - - Parameters - ---------- - X : (n x d) data matrix - each row corresponds to a single instance - chunks : (n,) array of ints - when ``chunks[i] == -1``, point i doesn't belong to any chunklet, - when ``chunks[i] == j``, point i belongs to chunklet j. - """ + return self.transformer_ - data, d = self._process_data(data) + def _process_data(self, X): + self.X_ = X = check_array(X) # PCA projection to remove noise and redundant information. - M_pca = None - if self.params['pca_comps'] is not None: - pca = decomposition.PCA(n_components=self.params['pca_comps'], - svd_solver='full') - data = pca.fit_transform(data) - d = data.shape[1] + if self.pca_comps is not None: + pca = decomposition.PCA(n_components=self.pca_comps, svd_solver='full') + X = pca.fit_transform(X) M_pca = pca.components_ else: - data -= data.mean(axis=0) + X -= X.mean(axis=0) + M_pca = None - chunk_mask, chunk_data = self._process_chunks(data, chunks) - inner_cov = np.cov(chunk_data, rowvar=0, bias=1) - rank = np.linalg.matrix_rank(inner_cov) + return X, M_pca + def _check_dimension(self, rank): + d = self.X_.shape[1] if rank < d: warnings.warn('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' 'You should adjust pca_comps to remove noise and ' 'redundant information.') + if self.num_dims is None: + dim = d + elif self.num_dims <= 0: + raise ValueError('Invalid embedding dimension: must be greater than 0.') + elif self.num_dims > d: + dim = d + warnings.warn('num_dims (%d) must be smaller than ' + 'the data dimension (%d)' % (self.num_dims, d)) + else: + dim = self.num_dims + return dim + + def fit(self, data, chunks): + """Learn the RCA model. + + Parameters + ---------- + X : (n x d) data matrix + Each row corresponds to a single instance + chunks : (n,) array of ints + When ``chunks[i] == -1``, point i doesn't belong to any chunklet. + When ``chunks[i] == j``, point i belongs to chunklet j. + """ + data, M_pca = self._process_data(data) + + chunks = np.asanyarray(chunks, dtype=int) + chunk_mask, chunked_data = _chunk_mean_centering(data, chunks) + + inner_cov = np.cov(chunked_data, rowvar=0, bias=1) + dim = self._check_dimension(np.linalg.matrix_rank(inner_cov)) + # Fisher Linear Discriminant projection - dim = self._process_parameters(d) - if dim < d: + if dim < data.shape[1]: total_cov = np.cov(data[chunk_mask], rowvar=0) tmp = np.linalg.lstsq(total_cov, inner_cov)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] A = vecs[:, inds] inner_cov = A.T.dot(inner_cov).dot(A) - self._transformer = _inv_sqrtm(inner_cov).dot(A.T) + self.transformer_ = _inv_sqrtm(inner_cov).dot(A.T) else: - self._transformer = _inv_sqrtm(inner_cov).T + self.transformer_ = _inv_sqrtm(inner_cov).T if M_pca is not None: - self._transformer = self._transformer.dot(M_pca) + self.transformer_ = self.transformer_.dot(M_pca) return self @@ -136,20 +136,22 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): - def __init__(self, dim=None, pca_comps=None, num_chunks=100, chunk_size=2): + def __init__(self, num_dims=None, pca_comps=None, num_chunks=100, + chunk_size=2): """Initialize the learner. Parameters ---------- - dim : int, optional + num_dims : int, optional embedding dimension (default: original dimension of data) num_chunks: int, optional chunk_size: int, optional """ - RCA.__init__(self, dim=dim, pca_comps=pca_comps) - self.params.update(num_chunks=num_chunks, chunk_size=chunk_size) + RCA.__init__(self, num_dims=num_dims, pca_comps=pca_comps) + self.num_chunks = num_chunks + self.chunk_size = chunk_size - def fit(self, X, labels, random_state=np.random): + def fit(self, X, y, random_state=np.random): """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. @@ -157,10 +159,10 @@ def fit(self, X, labels, random_state=np.random): ---------- X : (n x d) data matrix each row corresponds to a single instance - labels : (n) data labels + y : (n) data labels random_state : a random.seed object to fix the random_state if needed. """ - chunks = Constraints(labels).chunks(num_chunks=self.params['num_chunks'], - chunk_size=self.params['chunk_size'], - random_state=random_state) + chunks = Constraints(y).chunks(num_chunks=self.num_chunks, + chunk_size=self.chunk_size, + random_state=random_state) return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index be4e16f7..d353f524 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -13,6 +13,7 @@ from scipy.sparse.csgraph import laplacian from sklearn.covariance import graph_lasso from sklearn.utils.extmath import pinvh +from sklearn.utils.validation import check_array from .base_metric import BaseMetricLearner from .constraints import Constraints @@ -26,32 +27,34 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, ---------- balance_param : float, optional trade off between sparsity and M0 prior + sparsity_param : float, optional trade off between optimizer and sparseness (see graph_lasso) + use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False + verbose : bool, optional if True, prints information while learning """ - self.params = { - 'balance_param': balance_param, - 'sparsity_param': sparsity_param, - 'use_cov': use_cov, - 'verbose': verbose, - } + self.balance_param = balance_param + self.sparsity_param = sparsity_param + self.use_cov = use_cov + self.verbose = verbose def _prepare_inputs(self, X, W): - self.X = X + self.X_ = X = check_array(X) + W = check_array(W, accept_sparse=True) # set up prior M - if self.params['use_cov']: - self.M = np.cov(X.T) + if self.use_cov: + self.M_ = np.cov(X.T) else: - self.M = np.identity(X.shape[1]) + self.M_ = np.identity(X.shape[1]) L = laplacian(W, normed=False) - self.loss_matrix = self.X.T.dot(L.dot(self.X)) + return X.T.dot(L.dot(X)) def metric(self): - return self.M + return self.M_ def fit(self, X, W): """Learn the SDML model. @@ -68,13 +71,12 @@ def fit(self, X, W): self : object Returns the instance. """ - self._prepare_inputs(X, W) - P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix + loss_matrix = self._prepare_inputs(X, W) + P = pinvh(self.M_) + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) - self.M, _ = graph_lasso(emp_cov, self.params['sparsity_param'], - verbose=self.params['verbose']) + self.M_, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) return self @@ -100,16 +102,17 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, SDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, use_cov=use_cov, verbose=verbose) - self.params.update(num_labeled=num_labeled, num_constraints=num_constraints) + self.num_labeled = num_labeled + self.num_constraints = num_constraints - def fit(self, X, labels, random_state=np.random): + def fit(self, X, y, random_state=np.random): """Create constraints from labels and learn the SDML model. Parameters ---------- X : array-like, shape (n, d) data matrix, where each row corresponds to a single instance - labels : array-like, shape (n,) + y : array-like, shape (n,) data labels, one for each instance random_state : {numpy.random.RandomState, int}, optional Random number generator or random seed. If not given, the singleton @@ -120,12 +123,13 @@ def fit(self, X, labels, random_state=np.random): self : object Returns the instance. """ - num_constraints = self.params['num_constraints'] + y = check_array(y, ensure_2d=False) + num_constraints = self.num_constraints if num_constraints is None: - num_classes = len(np.unique(labels)) + num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(labels, self.params['num_labeled'], + c = Constraints.random_subset(y, self.num_labeled, random_state=random_state) adj = c.adjacency_matrix(num_constraints, random_state=random_state) return SDML.fit(self, X, adj) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 66bb75d8..e67dad7a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -107,7 +107,7 @@ def test_iris(self): class TestLFDA(MetricTestCase): def test_iris(self): - lfda = LFDA(k=2, dim=2) + lfda = LFDA(k=2, num_dims=2) lfda.fit(self.iris_points, self.iris_labels) csep = class_separation(lfda.transform(), self.iris_labels) self.assertLess(csep, 0.15) @@ -115,22 +115,23 @@ def test_iris(self): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25) def test_feature_null_variance(self): - X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M = 1))) + X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) # Apply PCA with the number of components - rca = RCA_Supervised(dim=2, pca_comps=3, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(num_dims=2, pca_comps=3, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.30) # Apply PCA with the minimum variance ratio - rca = RCA_Supervised(dim=2, pca_comps=0.95, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(num_dims=2, pca_comps=0.95, num_chunks=30, + chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.30) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index ad242b86..10b47254 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -2,50 +2,66 @@ import metric_learn -class TestBaseMetric(unittest.TestCase): - - def test_string_repr(self): - # we don't test LMNN here because it could be python_LMNN +class TestStringRepr(unittest.TestCase): + def test_covariance(self): self.assertEqual(str(metric_learn.Covariance()), "Covariance()") + def test_lmnn(self): + self.assertRegexpMatches( + str(metric_learn.LMNN()), + r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " + r"max_iter=1000,\n min_iter=50, regularization=0.5, " + r"use_pca=True, verbose=False\)") + + def test_nca(self): self.assertEqual(str(metric_learn.NCA()), "NCA(learning_rate=0.01, max_iter=100, num_dims=None)") + def test_lfda(self): self.assertEqual(str(metric_learn.LFDA()), - "LFDA(dim=None, k=7, metric='weighted')") + "LFDA(k=None, metric='weighted', num_dims=None)") + def test_itml(self): self.assertEqual(str(metric_learn.ITML()), """ -ITML(convergence_threshold=0.001, gamma=1.0, max_iters=1000, verbose=False) +ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000, + verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, - max_iters=1000, num_constraints=None, num_labeled=inf, - verbose=False) + max_iter=1000, num_constraints=None, num_labeled=inf, + verbose=False) """.strip('\n')) - self.assertEqual(str(metric_learn.LSML()), - "LSML(max_iter=1000, tol=0.001, verbose=False)") + def test_lsml(self): + self.assertEqual( + str(metric_learn.LSML()), + "LSML(max_iter=1000, prior=None, tol=0.001, verbose=False)") self.assertEqual(str(metric_learn.LSML_Supervised()), """ LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled=inf, - prior=None, tol=0.001, verbose=False, weights=None) + prior=None, tol=0.001, verbose=False, weights=None) """.strip('\n')) - self.assertEqual(str(metric_learn.SDML()), """ -SDML(balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose=False) -""".strip('\n')) + def test_sdml(self): + self.assertEqual(str(metric_learn.SDML()), + "SDML(balance_param=0.5, sparsity_param=0.01, " + "use_cov=True, verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, - sparsity_param=0.01, use_cov=True, verbose=False) + sparsity_param=0.01, use_cov=True, verbose=False) """.strip('\n')) - self.assertEqual(str(metric_learn.RCA()), "RCA(dim=None, pca_comps=None)") + def test_rca(self): + self.assertEqual(str(metric_learn.RCA()), + "RCA(num_dims=None, pca_comps=None)") self.assertEqual(str(metric_learn.RCA_Supervised()), - "RCA_Supervised(chunk_size=2, dim=None, num_chunks=100, pca_comps=None)") + "RCA_Supervised(chunk_size=2, num_chunks=100, " + "num_dims=None, pca_comps=None)") - self.assertEqual(str(metric_learn.MLKR()), """ -MLKR(A0=None, alpha=0.0001, epsilon=0.01, max_iter=1000, num_dims=None) -""".strip('\n')) + def test_mlkr(self): + self.assertEqual(str(metric_learn.MLKR()), + "MLKR(A0=None, alpha=0.0001, epsilon=0.01, " + "max_iter=1000, num_dims=None)") if __name__ == '__main__': unittest.main() diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 8f9f32a1..eff8fa01 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -84,11 +84,11 @@ def test_nca(self): assert_array_almost_equal(res_1, res_2) def test_lfda(self): - lfda = LFDA(k=2, dim=2) + lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) res_1 = lfda.transform() - lfda = LFDA(k=2, dim=2) + lfda = LFDA(k=2, num_dims=2) res_2 = lfda.fit_transform(self.X, self.y) # signs may be flipped, that's okay @@ -98,12 +98,12 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) res_1 = rca.transform() seed = np.random.RandomState(1234) - rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) res_2 = rca.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py new file mode 100644 index 00000000..58c7cd05 --- /dev/null +++ b/test/test_sklearn_compat.py @@ -0,0 +1,65 @@ +import numpy as np +import unittest +from sklearn.utils.estimator_checks import check_estimator + +from metric_learn import ( + LMNN, NCA, LFDA, Covariance, MLKR, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + + +# Wrap the _Supervised methods with a deterministic wrapper for testing. +class deterministic_mixin(object): + def fit(self, X, y): + rs = np.random.RandomState(1234) + return super(deterministic_mixin, self).fit(X, y, random_state=rs) + + +class dLSML(deterministic_mixin, LSML_Supervised): + pass + + +class dITML(deterministic_mixin, ITML_Supervised): + pass + + +class dSDML(deterministic_mixin, SDML_Supervised): + pass + + +class dRCA(deterministic_mixin, RCA_Supervised): + pass + + +class TestSklearnCompat(unittest.TestCase): + def test_covariance(self): + check_estimator(Covariance) + + def test_lmnn(self): + check_estimator(LMNN) + + def test_lfda(self): + check_estimator(LFDA) + + def test_mlkr(self): + check_estimator(MLKR) + + def test_nca(self): + check_estimator(NCA) + + def test_lsml(self): + check_estimator(dLSML) + + def test_itml(self): + check_estimator(dITML) + + # This fails due to a FloatingPointError + # def test_sdml(self): + # check_estimator(dSDML) + + # This fails because the default num_chunks isn't data-dependent. + # def test_rca(self): + # check_estimator(RCA_Supervised) + + +if __name__ == '__main__': + unittest.main() From cdd8ba605f35c75dcd57d2450710e96b2ee7d3fe Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 2 Mar 2017 16:09:08 -0500 Subject: [PATCH 040/210] Bumping version [ci skip] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2031754a..c661dd10 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from setuptools import setup -version = "0.3.0" +version = "0.4.0" setup(name='metric-learn', version=version, description='Python implementations of metric learning algorithms', From efb05df4c6a21d575fbee7bfd0034e61577d971c Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 2 Mar 2017 16:10:02 -0500 Subject: [PATCH 041/210] Ignore .cache dir [ci skip] --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3f029036..4c81e9fa 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ dist/ *.egg-info .coverage htmlcov/ +.cache/ From 471cddc4617cf1252a56e24d79482eb14508d292 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 2 Mar 2017 17:11:32 -0500 Subject: [PATCH 042/210] Removing unneeded param --- metric_learn/rca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 78964ba0..7f9b7273 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -63,7 +63,7 @@ def _process_data(self, X): # PCA projection to remove noise and redundant information. if self.pca_comps is not None: - pca = decomposition.PCA(n_components=self.pca_comps, svd_solver='full') + pca = decomposition.PCA(n_components=self.pca_comps) X = pca.fit_transform(X) M_pca = pca.components_ else: From 68a5b237178b865408d1ec33d9570fefdc238297 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 2 Mar 2017 17:37:13 -0500 Subject: [PATCH 043/210] Fixing TravisCI errors --- metric_learn/itml.py | 5 +++-- metric_learn/lfda.py | 6 +++--- metric_learn/lsml.py | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7f20cc3e..b40145b6 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -17,7 +17,7 @@ import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_array, check_X_y from .base_metric import BaseMetricLearner from .constraints import Constraints @@ -64,6 +64,7 @@ def _process_inputs(self, X, constraints, bounds): else: assert len(bounds) == 2 self.bounds_ = bounds + self.bounds_[self.bounds_==0] = 1e-9 # init metric if self.A0 is None: self.A_ = np.identity(X.shape[1]) @@ -192,7 +193,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - y = check_array(y, ensure_2d=False) + X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index d36273a7..cd699d17 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -16,7 +16,7 @@ import warnings from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_X_y from .base_metric import BaseMetricLearner @@ -55,9 +55,9 @@ def transformer(self): return self.transformer_ def _process_inputs(self, X, y): - self.X_ = check_array(X) - n, d = self.X_.shape unique_classes, y = np.unique(y, return_inverse=True) + self.X_, y = check_X_y(X, y) + n, d = self.X_.shape num_classes = len(unique_classes) if self.num_dims is None: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 049cf901..f329fe5e 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -11,7 +11,7 @@ import numpy as np import scipy.linalg from six.moves import xrange -from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_array, check_X_y from .base_metric import BaseMetricLearner from .constraints import Constraints @@ -171,7 +171,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - y = check_array(y, ensure_2d=False) + X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) From 238be72c00fde4f4351601ef1ce8c2fcd710c9e3 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 20 Mar 2017 12:19:35 -0400 Subject: [PATCH 044/210] Adding pdist example, fixing bad distance expr --- README.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 22a81155..9bb762b4 100644 --- a/README.rst +++ b/README.rst @@ -40,11 +40,13 @@ default implementations for the methods ``metric``, ``transformer``, and ``metric`` or ``transformer``. For an instance of a metric learner named ``foo`` learning from a set of -``d``-dimensional points, ``foo.metric()`` returns a ``d`` by ``d`` -matrix ``M`` such that a distance between vectors ``x`` and ``y`` is -expressed ``(x-y).dot(M).dot(x-y)``. +``d``-dimensional points, ``foo.metric()`` returns a ``d x d`` +matrix ``M`` such that the distance between vectors ``x`` and ``y`` is +expressed ``sqrt((x-y).dot(inv(M)).dot(x-y))``. +Using scipy's ``pdist`` function, this would look like +``pdist(X, metric='mahalanobis', VI=inv(foo.metric()))``. -In the same scenario, ``foo.transformer()`` returns a ``d`` by ``d`` +In the same scenario, ``foo.transformer()`` returns a ``d x d`` matrix ``L`` such that a vector ``x`` can be represented in the learned space as the vector ``x.dot(L.T)``. From 70c48fd5f5bf2c80c97b502f172dd3abe0e1e926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Wed, 17 May 2017 02:06:17 +0200 Subject: [PATCH 045/210] Clarified documentation of ITML (resolves #58) (#59) --- metric_learn/itml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index b40145b6..4c154ad4 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -80,7 +80,8 @@ def fit(self, X, constraints, bounds=None): X : (n x d) data matrix each row corresponds to a single instance constraints : 4-tuple of arrays - (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) + (a,b,c,d) indices into X, with (a,b) specifying positive and (c,d) + negative pairs bounds : list (pos,neg) pairs, optional bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg """ From cc88cbe8b56ef9c4fda8847836367c53f74293a7 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Tue, 16 May 2017 20:29:06 -0400 Subject: [PATCH 046/210] Replace 'metric' parameter with a better name Fixes #54. --- metric_learn/lfda.py | 14 +++++++------- test/metric_learn_test.py | 4 ++++ test/test_base_metric.py | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index cd699d17..dbe5aa4f 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -26,7 +26,7 @@ class LFDA(BaseMetricLearner): Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction Sugiyama, ICML 2006 ''' - def __init__(self, num_dims=None, k=None, metric='weighted'): + def __init__(self, num_dims=None, k=None, embedding_type='weighted'): ''' Initialize LFDA. @@ -39,16 +39,16 @@ def __init__(self, num_dims=None, k=None, metric='weighted'): Number of nearest neighbors used in local scaling method. Defaults to min(7, num_dims - 1). - metric : str, optional + embedding_type : str, optional Type of metric in the embedding space (default: 'weighted') 'weighted' - weighted eigenvectors 'orthonormalized' - orthonormalized 'plain' - raw eigenvectors ''' - if metric not in ('weighted', 'orthonormalized', 'plain'): - raise ValueError('Invalid metric: %r' % metric) + if embedding_type not in ('weighted', 'orthonormalized', 'plain'): + raise ValueError('Invalid embedding_type: %r' % embedding_type) self.num_dims = num_dims - self.metric = metric + self.embedding_type = embedding_type self.k = k def transformer(self): @@ -122,9 +122,9 @@ def fit(self, X, y): vals = vals[order].real vecs = vecs[:,order] - if self.metric == 'weighted': + if self.embedding_type == 'weighted': vecs *= np.sqrt(vals) - elif self.metric == 'orthonormalized': + elif self.embedding_type == 'orthonormalized': vecs, _ = np.linalg.qr(vecs) self.transformer_ = vecs.T diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e67dad7a..1e7f31fe 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -112,6 +112,10 @@ def test_iris(self): csep = class_separation(lfda.transform(), self.iris_labels) self.assertLess(csep, 0.15) + # Sanity checks for learned matrices. + self.assertEqual(lfda.metric().shape, (4, 4)) + self.assertEqual(lfda.transformer().shape, (2, 4)) + class TestRCA(MetricTestCase): def test_iris(self): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 10b47254..d73138cd 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -20,7 +20,7 @@ def test_nca(self): def test_lfda(self): self.assertEqual(str(metric_learn.LFDA()), - "LFDA(k=None, metric='weighted', num_dims=None)") + "LFDA(embedding_type='weighted', k=None, num_dims=None)") def test_itml(self): self.assertEqual(str(metric_learn.ITML()), """ From db23d51126cf45d2f0215ad89faa2b463e4dfead Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 24 May 2017 09:31:43 -0400 Subject: [PATCH 047/210] Prevent use-before-init bug in LSML As identified in gh-62. --- metric_learn/lsml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index f329fe5e..0a251a87 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -68,9 +68,11 @@ def fit(self, X, constraints, weights=None): scale factor for each constraint """ self._prepare_inputs(X, constraints, weights) + step_sizes = np.logspace(-10, 0, 10) prior_inv = scipy.linalg.inv(self.M_) + # Keep track of the best step size and the loss at that step. + l_best = 0 s_best = self._total_loss(self.M_, prior_inv) - step_sizes = np.logspace(-10, 0, 10) if self.verbose: print('initial loss', s_best) for it in xrange(1, self.max_iter+1): From 5c1cc6672bf8c7ef1aa1cbd392f0671feef126a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Wed, 24 May 2017 15:33:17 +0200 Subject: [PATCH 048/210] Fixed parameter name in docstring of RCA.fit() (#64) --- metric_learn/rca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 7f9b7273..0d9b3620 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -97,7 +97,7 @@ def fit(self, data, chunks): Parameters ---------- - X : (n x d) data matrix + data : (n x d) data matrix Each row corresponds to a single instance chunks : (n,) array of ints When ``chunks[i] == -1``, point i doesn't belong to any chunklet. From 932de85ce38ac86f46d0559f6cf1b93829d56037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Fri, 26 May 2017 16:03:36 +0200 Subject: [PATCH 049/210] Implementation of MMC (#61) * Implementation of PGDM * Python2 compatibility * Speed up PGDM on high-dimensional data * Optimized some summations using `np.einsum` * Addressed requests from review by perimosocordiae * Renamed PGDM to MMC * Addressed 2nd review by perimosocordiae --- README.rst | 1 + metric_learn/__init__.py | 1 + metric_learn/_util.py | 12 + metric_learn/itml.py | 19 +- metric_learn/mmc.py | 436 ++++++++++++++++++++++++++++++++++++ test/metric_learn_test.py | 41 +++- test/test_base_metric.py | 11 + test/test_fit_transform.py | 14 +- test/test_sklearn_compat.py | 9 +- 9 files changed, 526 insertions(+), 18 deletions(-) create mode 100644 metric_learn/_util.py create mode 100644 metric_learn/mmc.py diff --git a/README.rst b/README.rst index 9bb762b4..1e8adbe7 100644 --- a/README.rst +++ b/README.rst @@ -15,6 +15,7 @@ Metric Learning algorithms in Python. - Local Fisher Discriminant Analysis (LFDA) - Relative Components Analysis (RCA) - Metric Learning for Kernel Regression (MLKR) +- Mahalanobis Metric for Clustering (MMC) **Dependencies** diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index 5a7508c0..b86c10e1 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -10,3 +10,4 @@ from .lfda import LFDA from .rca import RCA, RCA_Supervised from .mlkr import MLKR +from .mmc import MMC, MMC_Supervised diff --git a/metric_learn/_util.py b/metric_learn/_util.py new file mode 100644 index 00000000..b34860d6 --- /dev/null +++ b/metric_learn/_util.py @@ -0,0 +1,12 @@ +import numpy as np + + +# hack around lack of axis kwarg in older numpy versions +try: + np.linalg.norm([[4]], axis=1) +except TypeError: + def vector_norm(X): + return np.apply_along_axis(np.linalg.norm, 1, X) +else: + def vector_norm(X): + return np.linalg.norm(X, axis=1) \ No newline at end of file diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 4c154ad4..7169fb36 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -21,6 +21,7 @@ from .base_metric import BaseMetricLearner from .constraints import Constraints +from ._util import vector_norm class ITML(BaseMetricLearner): @@ -54,10 +55,10 @@ def _process_inputs(self, X, constraints, bounds): self.X_ = X = check_array(X) # check to make sure that no two constrained vectors are identical a,b,c,d = constraints - ident = _vector_norm(X[a] - X[b]) > 1e-9 - a, b = a[ident], b[ident] - ident = _vector_norm(X[c] - X[d]) > 1e-9 - c, d = c[ident], d[ident] + no_ident = vector_norm(X[a] - X[b]) > 1e-9 + a, b = a[no_ident], b[no_ident] + no_ident = vector_norm(X[c] - X[d]) > 1e-9 + c, d = c[no_ident], d[no_ident] # init bounds if bounds is None: self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) @@ -138,16 +139,6 @@ def fit(self, X, constraints, bounds=None): def metric(self): return self.A_ -# hack around lack of axis kwarg in older numpy versions -try: - np.linalg.norm([[4]], axis=1) -except TypeError: - def _vector_norm(X): - return np.apply_along_axis(np.linalg.norm, 1, X) -else: - def _vector_norm(X): - return np.linalg.norm(X, axis=1) - class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py new file mode 100644 index 00000000..7760e1b1 --- /dev/null +++ b/metric_learn/mmc.py @@ -0,0 +1,436 @@ +""" +Mahalanobis Metric Learning with Application for Clustering with Side-Information, Xing et al., NIPS 2002 + +MMC minimizes the sum of squared distances between similar examples, +while enforcing the sum of distances between dissimilar examples to be +greater than a certain margin. +This leads to a convex and, thus, local-minima-free optimization problem +that can be solved efficiently. +However, the algorithm involves the computation of eigenvalues, which is the +main speed-bottleneck. +Since it has initially been designed for clustering applications, one of the +implicit assumptions of MMC is that all classes form a compact set, i.e., +follow a unimodal distribution, which restricts the possible use-cases of +this method. However, it is one of the earliest and a still often cited technique. + +Adapted from Matlab code at http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz +""" + +from __future__ import print_function, absolute_import, division +import numpy as np +from six.moves import xrange +from sklearn.metrics import pairwise_distances +from sklearn.utils.validation import check_array, check_X_y + +from .base_metric import BaseMetricLearner +from .constraints import Constraints +from ._util import vector_norm + + + +class MMC(BaseMetricLearner): + """Mahalanobis Metric for Clustering (MMC)""" + def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, + A0=None, diagonal=False, diagonal_c=1.0, verbose=False): + """Initialize MMC. + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + A0 : (d x d) matrix, optional + initial metric, defaults to identity + only the main diagonal is taken if `diagonal == True` + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + """ + self.max_iter = max_iter + self.max_proj = max_proj + self.convergence_threshold = convergence_threshold + self.A0 = A0 + self.diagonal = diagonal + self.diagonal_c = diagonal_c + self.verbose = verbose + + def fit(self, X, constraints): + """Learn the MMC model. + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + constraints : 4-tuple of arrays + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs + """ + constraints = self._process_inputs(X, constraints) + if self.diagonal: + return self._fit_diag(X, constraints) + else: + return self._fit_full(X, constraints) + + def _process_inputs(self, X, constraints): + + self.X_ = X = check_array(X) + + # check to make sure that no two constrained vectors are identical + a,b,c,d = constraints + no_ident = vector_norm(X[a] - X[b]) > 1e-9 + a, b = a[no_ident], b[no_ident] + no_ident = vector_norm(X[c] - X[d]) > 1e-9 + c, d = c[no_ident], d[no_ident] + if len(a) == 0: + raise ValueError('No non-trivial similarity constraints given for MMC.') + if len(c) == 0: + raise ValueError('No non-trivial dissimilarity constraints given for MMC.') + + # init metric + if self.A0 is None: + self.A_ = np.identity(X.shape[1]) + if not self.diagonal: + # Don't know why division by 10... it's in the original code + # and seems to affect the overall scale of the learned metric. + self.A_ /= 10.0 + else: + self.A_ = check_array(self.A0) + + return a,b,c,d + + def _fit_full(self, X, constraints): + """Learn full metric using MMC. + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + constraints : 4-tuple of arrays + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs + """ + a,b,c,d = constraints + num_pos = len(a) + num_neg = len(c) + num_samples, num_dim = X.shape + + error1 = error2 = 1e10 + eps = 0.01 # error-bound of iterative projection on C1 and C2 + A = self.A_ + + # Create weight vector from similar samples + pos_diff = X[a] - X[b] + w = np.einsum('ij,ik->jk', pos_diff, pos_diff).ravel() + # `w` is the sum of all outer products of the rows in `pos_diff`. + # The above `einsum` is equivalent to the much more inefficient: + # w = np.apply_along_axis( + # lambda x: np.outer(x,x).ravel(), + # 1, + # X[a] - X[b] + # ).sum(axis = 0) + t = w.dot(A.ravel()) / 100.0 + + w_norm = np.linalg.norm(w) + w1 = w / w_norm # make `w` a unit vector + t1 = t / w_norm # distance from origin to `w^T*x=t` plane + + cycle = 1 + alpha = 0.1 # initial step size along gradient + + grad1 = self._fS1(X, a, b, A) # gradient of similarity constraint function + grad2 = self._fD1(X, c, d, A) # gradient of dissimilarity constraint function + M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 + + A_old = A.copy() + + for cycle in xrange(self.max_iter): + + # projection of constraints C1 and C2 + satisfy = False + + for it in xrange(self.max_proj): + + # First constraint: + # f(A) = \sum_{i,j \in S} d_ij' A d_ij <= t (1) + # (1) can be rewritten as a linear constraint: w^T x = t, + # where x is the unrolled matrix of A, + # w is also an unrolled matrix of W where + # W_{kl}= \sum_{i,j \in S}d_ij^k * d_ij^l + x0 = A.ravel() + if w.dot(x0) <= t: + x = x0 + else: + x = x0 + (t1 - w1.dot(x0)) * w1 + A[:] = x.reshape(num_dim, num_dim) + + # Second constraint: + # PSD constraint A >= 0 + # project A onto domain A>0 + l, V = np.linalg.eigh((A + A.T) / 2) + A[:] = np.dot(V * np.maximum(0, l[None,:]), V.T) + + fDC2 = w.dot(A.ravel()) + error2 = (fDC2 - t) / t + if error2 < eps: + satisfy = True + break + + # third constraint: gradient ascent + # max: g(A) >= 1 + # here we suppose g(A) = fD(A) = \sum_{I,J \in D} sqrt(d_ij' A d_ij) + + obj_previous = self._fD(X, c, d, A_old) # g(A_old) + obj = self._fD(X, c, d, A) # g(A) + + if satisfy and (obj > obj_previous or cycle == 0): + + # If projection of 1 and 2 is successful, and such projection + # improves objective function, slightly increase learning rate + # and update from the current A. + alpha *= 1.05 + A_old[:] = A + grad2 = self._fS1(X, a, b, A) + grad1 = self._fD1(X, c, d, A) + M = self._grad_projection(grad1, grad2) + A += alpha * M + + else: + + # If projection of 1 and 2 failed, or obj <= obj_previous due + # to projection of 1 and 2, shrink learning rate and re-update + # from the previous A. + alpha /= 2 + A[:] = A_old + alpha * M + + delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old) + if delta < self.convergence_threshold: + break + if self.verbose: + print('mmc iter: %d, conv = %f, projections = %d' % (cycle, delta, it+1)) + + if delta > self.convergence_threshold: + self.converged_ = False + if self.verbose: + print('mmc did not converge, conv = %f' % (delta,)) + else: + self.converged_ = True + if self.verbose: + print('mmc converged at iter %d, conv = %f' % (cycle, delta)) + self.A_[:] = A_old + self.n_iter_ = cycle + return self + + def _fit_diag(self, X, constraints): + """Learn diagonal metric using MMC. + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + constraints : 4-tuple of arrays + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs + """ + a,b,c,d = constraints + num_pos = len(a) + num_neg = len(c) + num_samples, num_dim = X.shape + + s_sum = np.sum((X[a] - X[b]) ** 2, axis=0) + + it = 0 + error = 1.0 + eps = 1e-6 + reduction = 2.0 + w = np.diag(self.A_).copy() + + while error > self.convergence_threshold: + + fD0, fD_1st_d, fD_2nd_d = self._D_constraint(X, c, d, w) + obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 + fS_1st_d = s_sum # first derivative of the similarity constraints + + gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective + hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective + step = np.dot(np.linalg.inv(hessian), gradient); + + # Newton-Rapshon update + # search over optimal lambda + lambd = 1 # initial step-size + w_tmp = np.maximum(0, w - lambd * step) + + obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) + obj_previous = obj * 1.1 # just to get the while-loop started + + inner_it = 0 + while obj < obj_previous: + obj_previous = obj + w_previous = w_tmp.copy() + lambd /= reduction + w_tmp = np.maximum(0, w - lambd * step) + obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) + inner_it += 1 + + w[:] = w_previous + error = np.abs((obj_previous - obj_initial) / obj_previous) + if self.verbose: + print('mmc iter: %d, conv = %f' % (it, error)) + it += 1 + + self.A_ = np.diag(w) + return self + + def _fD(self, X, c, d, A): + """The value of the dissimilarity constraint function. + + f = f(\sum_{ij \in D} distance(x_i, x_j)) + i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} + """ + diff = X[c] - X[d] + return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + 1e-6) + + def _fD1(self, X, c, d, A): + """The gradient of the dissimilarity constraint function w.r.t. A. + + For example, let distance by L1 norm: + f = f(\sum_{ij \in D} \sqrt{(x_i-x_j)A(x_i-x_j)'}) + df/dA_{kl} = f'* d(\sum_{ij \in D} \sqrt{(x_i-x_j)^k*(x_i-x_j)^l})/dA_{kl} + + Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) + so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij + df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)}) + * 0.5*(\sum_{ij \in D} (1/sqrt{tr(d_ij'*d_ij*A)})*(d_ij'*d_ij)) + """ + dim = X.shape[1] + diff = X[c] - X[d] + # outer products of all rows in `diff` + M = np.einsum('ij,ik->ijk', diff, diff) + # faster version of: dist = np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) + dist = np.sqrt(np.einsum('ijk,jk', M, A)) + # faster version of: sum_deri = np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis=0) + sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) + sum_dist = dist.sum() + return sum_deri / (sum_dist + 1e-6) + + def _fS1(self, X, a, b, A): + """The gradient of the similarity constraint function w.r.t. A. + + f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij' + df/dA = d(d_ij*A*d_ij')/dA + + Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) + so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij + """ + dim = X.shape[1] + diff = X[a] - X[b] + return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` + + def _grad_projection(self, grad1, grad2): + grad2 = grad2 / np.linalg.norm(grad2) + gtemp = grad1 - np.sum(grad1 * grad2) * grad2 + gtemp /= np.linalg.norm(gtemp) + return gtemp + + def _D_objective(self, X, c, d, w): + return np.log(np.sum(np.sqrt(np.sum(((X[c] - X[d]) ** 2) * w[None,:], axis=1) + 1e-6))) + + def _D_constraint(self, X, c, d, w): + """Compute the value, 1st derivative, second derivative (Hessian) of + a dissimilarity constraint function gF(sum_ij distance(d_ij A d_ij)) + where A is a diagonal matrix (in the form of a column vector 'w'). + """ + diff = X[c] - X[d] + diff_sq = diff * diff + dist = np.sqrt(diff_sq.dot(w)) + sum_deri1 = np.einsum('ij,i', diff_sq, 0.5 / np.maximum(dist, 1e-6)) + sum_deri2 = np.einsum( + 'ij,ik->jk', + diff_sq, + diff_sq / (-4 * np.maximum(1e-6, dist**3))[:,None] + ) + sum_dist = dist.sum() + return ( + np.log(sum_dist), + sum_deri1 / sum_dist, + sum_deri2 / sum_dist - np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) + ) + + def metric(self): + return self.A_ + + def transformer(self): + """Computes the transformation matrix from the Mahalanobis matrix. + L = V.T * w^(-1/2), with A = V*w*V.T being the eigenvector decomposition of A with + the eigenvalues in the diagonal matrix w and the columns of V being the eigenvectors. + + The Cholesky decomposition cannot be applied here, since MMC learns only a positive + *semi*-definite Mahalanobis matrix. + + Returns + ------- + L : (d x d) matrix + """ + if self.diagonal: + return np.sqrt(self.A_) + else: + w, V = np.linalg.eigh(self.A_) + return V.T * np.sqrt(np.maximum(0, w[:,None])) + + +class MMC_Supervised(MMC): + """Mahalanobis Metric for Clustering (MMC)""" + def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, + num_labeled=np.inf, num_constraints=None, + A0=None, diagonal=False, diagonal_c=1.0, verbose=False): + """Initialize the learner. + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + num_labeled : int, optional + number of labels to preserve for training + num_constraints: int, optional + number of constraints to generate + A0 : (d x d) matrix, optional + initial metric, defaults to identity + only the main diagonal is taken if `diagonal == True` + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + """ + MMC.__init__(self, max_iter=max_iter, max_proj=max_proj, + convergence_threshold=convergence_threshold, + A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, + verbose=verbose) + self.num_labeled = num_labeled + self.num_constraints = num_constraints + + def fit(self, X, y, random_state=np.random): + """Create constraints from labels and learn the MMC model. + Parameters + ---------- + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + y : (n) array-like + Data labels. + random_state : numpy.random.RandomState, optional + If provided, controls random number generation. + """ + X, y = check_X_y(X, y) + num_constraints = self.num_constraints + if num_constraints is None: + num_classes = len(np.unique(y)) + num_constraints = 20 * num_classes**2 + + c = Constraints.random_subset(y, self.num_labeled, + random_state=random_state) + pos_neg = c.positive_negative_pairs(num_constraints, + random_state=random_state) + return MMC.fit(self, X, pos_neg) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1e7f31fe..351b6298 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,8 +6,8 @@ from numpy.testing import assert_array_almost_equal from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LMNN, NCA, LFDA, Covariance, MLKR, MMC, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -149,5 +149,42 @@ def test_iris(self): self.assertLess(csep, 0.25) +class TestMMC(MetricTestCase): + def test_iris(self): + + # Generate full set of constraints for comparison with reference implementation + n = self.iris_points.shape[0] + mask = (self.iris_labels[None] == self.iris_labels[:,None]) + a, b = np.nonzero(np.triu(mask, k=1)) + c, d = np.nonzero(np.triu(~mask, k=1)) + + # Full metric + mmc = MMC(convergence_threshold=0.01) + mmc.fit(self.iris_points, [a,b,c,d]) + expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], + [+0.00083371, +0.00149466, -0.00200719, -0.00296284], + [-0.00111959, -0.00200719, +0.00269546, +0.00397881], + [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] + assert_array_almost_equal(expected, mmc.metric(), decimal=6) + + # Diagonal metric + mmc = MMC(diagonal=True) + mmc.fit(self.iris_points, [a,b,c,d]) + expected = [0, 0, 1.21045968, 1.22552608] + assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) + + # Supervised Full + mmc = MMC_Supervised() + mmc.fit(self.iris_points, self.iris_labels) + csep = class_separation(mmc.transform(), self.iris_labels) + self.assertLess(csep, 0.15) + + # Supervised Diagonal + mmc = MMC_Supervised(diagonal=True) + mmc.fit(self.iris_points, self.iris_labels) + csep = class_separation(mmc.transform(), self.iris_labels) + self.assertLess(csep, 0.2) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_base_metric.py b/test/test_base_metric.py index d73138cd..31db4e6f 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -63,5 +63,16 @@ def test_mlkr(self): "MLKR(A0=None, alpha=0.0001, epsilon=0.01, " "max_iter=1000, num_dims=None)") + def test_mmc(self): + self.assertEqual(str(metric_learn.MMC()), """ +MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, + max_iter=100, max_proj=10000, verbose=False) +""".strip('\n')) + self.assertEqual(str(metric_learn.MMC_Supervised()), """ +MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, + diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, + num_labeled=inf, verbose=False) +""".strip('\n')) + if __name__ == '__main__': unittest.main() diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index eff8fa01..707815ec 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -5,7 +5,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) class TestFitTransform(unittest.TestCase): @@ -118,6 +118,18 @@ def test_mlkr(self): assert_array_almost_equal(res_1, res_2) + def test_mmc_supervised(self): + seed = np.random.RandomState(1234) + mmc = MMC_Supervised(num_constraints=200) + mmc.fit(self.X, self.y, random_state=seed) + res_1 = mmc.transform() + + seed = np.random.RandomState(1234) + mmc = MMC_Supervised(num_constraints=200) + res_2 = mmc.fit_transform(self.X, self.y, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + if __name__ == '__main__': unittest.main() diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 58c7cd05..f1e1a09d 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -4,7 +4,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -22,6 +22,10 @@ class dITML(deterministic_mixin, ITML_Supervised): pass +class dMMC(deterministic_mixin, MMC_Supervised): + pass + + class dSDML(deterministic_mixin, SDML_Supervised): pass @@ -52,6 +56,9 @@ def test_lsml(self): def test_itml(self): check_estimator(dITML) + def test_mmc(self): + check_estimator(dMMC) + # This fails due to a FloatingPointError # def test_sdml(self): # check_estimator(dSDML) From 9c68f7dd377c3e524807ad3b121f840f5623c028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Wed, 2 Aug 2017 16:56:20 +0200 Subject: [PATCH 050/210] [ITML] Avoid unnecessary computations (2x speedup) --- metric_learn/itml.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7169fb36..4d27c412 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -95,30 +95,30 @@ def fit(self, X, constraints, bounds=None): gamma_proj = 1. if gamma is np.inf else gamma/(gamma+1.) pos_bhat = np.zeros(num_pos) + self.bounds_[0] neg_bhat = np.zeros(num_neg) + self.bounds_[1] + pos_vv = self.X_[a] - self.X_[b] + neg_vv = self.X_[c] - self.X_[d] A = self.A_ for it in xrange(self.max_iter): # update positives - vv = self.X_[a] - self.X_[b] - for i,v in enumerate(vv): + for i,v in enumerate(pos_vv): wtw = v.dot(A).dot(v) # scalar alpha = min(_lambda[i], gamma_proj*(1./wtw - 1./pos_bhat[i])) _lambda[i] -= alpha beta = alpha/(1 - alpha*wtw) pos_bhat[i] = 1./((1 / pos_bhat[i]) + (alpha / gamma)) Av = A.dot(v) - A += beta * np.outer(Av, Av) + A += np.outer(Av, Av * beta) # update negatives - vv = self.X_[c] - self.X_[d] - for i,v in enumerate(vv): + for i,v in enumerate(neg_vv): wtw = v.dot(A).dot(v) # scalar alpha = min(_lambda[i+num_pos], gamma_proj*(1./neg_bhat[i] - 1./wtw)) _lambda[i+num_pos] -= alpha beta = -alpha/(1 + alpha*wtw) neg_bhat[i] = 1./((1 / neg_bhat[i]) - (alpha / gamma)) Av = A.dot(v) - A += beta * np.outer(Av, Av) + A += np.outer(Av, Av * beta) normsum = np.linalg.norm(_lambda) + np.linalg.norm(lambdaold) if normsum == 0: From 12b781e81bced0618f9c647849f4fd669efbea42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Tue, 16 May 2017 15:46:33 +0200 Subject: [PATCH 051/210] Define distance consistently as `(x-y)^T*M*(x-y)` Fixes the transformes returned by ITML and LSML. The following now holds also for ITML, LSML, SDML and the covariance method: learner.transformer().T.dot(learner.transformer()) == learner.metric() --- README.rst | 4 ++-- metric_learn/base_metric.py | 6 +++--- metric_learn/covariance.py | 6 +++++- metric_learn/lsml.py | 8 ++++++-- metric_learn/sdml.py | 6 +++--- test/metric_learn_test.py | 2 +- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index 1e8adbe7..ddfca2d2 100644 --- a/README.rst +++ b/README.rst @@ -43,9 +43,9 @@ default implementations for the methods ``metric``, ``transformer``, and For an instance of a metric learner named ``foo`` learning from a set of ``d``-dimensional points, ``foo.metric()`` returns a ``d x d`` matrix ``M`` such that the distance between vectors ``x`` and ``y`` is -expressed ``sqrt((x-y).dot(inv(M)).dot(x-y))``. +expressed ``sqrt((x-y).dot(M).dot(x-y))``. Using scipy's ``pdist`` function, this would look like -``pdist(X, metric='mahalanobis', VI=inv(foo.metric()))``. +``pdist(X, metric='mahalanobis', VI=foo.metric())``. In the same scenario, ``foo.transformer()`` returns a ``d x d`` matrix ``L`` such that a vector ``x`` can be represented in the learned diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index abd2d0f7..02519de1 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -22,13 +22,13 @@ def metric(self): def transformer(self): """Computes the transformation matrix from the Mahalanobis matrix. - L = inv(cholesky(M)) + L = cholesky(M).T Returns ------- - L : (d x d) matrix + L : upper triangular (d x d) matrix """ - return inv(cholesky(self.metric())) + return cholesky(self.metric()).T def transform(self, X=None): """Applies the metric transformation. diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 0e230d43..8fc07873 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -28,5 +28,9 @@ def fit(self, X, y=None): y : unused """ self.X_ = check_array(X, ensure_min_samples=2) - self.M_ = np.cov(self.X_.T) + self.M_ = np.cov(self.X_, rowvar = False) + if self.M_.ndim == 0: + self.M_ = 1./self.M_ + else: + self.M_ = np.linalg.inv(self.M_) return self diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 0a251a87..bc02e6f2 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -26,7 +26,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False): tol : float, optional max_iter : int, optional prior : (d x d) matrix, optional - guess at a metric [default: covariance(X)] + guess at a metric [default: inv(covariance(X))] verbose : bool, optional if True, prints information while learning """ @@ -48,7 +48,11 @@ def _prepare_inputs(self, X, constraints, weights): self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 if self.prior is None: - self.M_ = np.cov(X.T) + self.M_ = np.cov(X, rowvar = False) + if self.M_.ndim == 0: + self.M_ = 1./self.M_ + else: + self.M_ = np.linalg.inv(self.M_) else: self.M_ = self.prior diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index d353f524..93280334 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -47,7 +47,7 @@ def _prepare_inputs(self, X, W): W = check_array(W, accept_sparse=True) # set up prior M if self.use_cov: - self.M_ = np.cov(X.T) + self.M_ = pinvh(np.cov(X, rowvar = False)) else: self.M_ = np.identity(X.shape[1]) L = laplacian(W, normed=False) @@ -72,11 +72,11 @@ def fit(self, X, W): Returns the instance. """ loss_matrix = self._prepare_inputs(X, W) - P = pinvh(self.M_) + self.balance_param * loss_matrix + P = self.M_ + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) - self.M_, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) + _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 351b6298..6d78c657 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -57,7 +57,7 @@ def test_iris(self): itml.fit(self.iris_points, self.iris_labels) csep = class_separation(itml.transform(), self.iris_labels) - self.assertLess(csep, 0.4) # it's not great + self.assertLess(csep, 0.2) class TestLMNN(MetricTestCase): From ddfac991c0d32f3931831d649133dfbd15c61eff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Tue, 23 May 2017 11:28:12 +0200 Subject: [PATCH 052/210] Added unit test for transformer-metric conversion --- test/test_transformer_metric_conversion.py | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 test/test_transformer_metric_conversion.py diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py new file mode 100644 index 00000000..e027d176 --- /dev/null +++ b/test/test_transformer_metric_conversion.py @@ -0,0 +1,80 @@ +import unittest +import numpy as np +from sklearn.datasets import load_iris +from numpy.testing import assert_array_almost_equal + +from metric_learn import ( + LMNN, NCA, LFDA, Covariance, MLKR, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + + +class TestTransformerMetricConversion(unittest.TestCase): + @classmethod + def setUpClass(self): + # runs once per test class + iris_data = load_iris() + self.X = iris_data['data'] + self.y = iris_data['target'] + + def test_cov(self): + cov = Covariance() + cov.fit(self.X) + L = cov.transformer() + assert_array_almost_equal(L.T.dot(L), cov.metric()) + + def test_lsml_supervised(self): + seed = np.random.RandomState(1234) + lsml = LSML_Supervised(num_constraints=200) + lsml.fit(self.X, self.y, random_state=seed) + L = lsml.transformer() + assert_array_almost_equal(L.T.dot(L), lsml.metric()) + + def test_itml_supervised(self): + seed = np.random.RandomState(1234) + itml = ITML_Supervised(num_constraints=200) + itml.fit(self.X, self.y, random_state=seed) + L = itml.transformer() + assert_array_almost_equal(L.T.dot(L), itml.metric()) + + def test_lmnn(self): + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn.fit(self.X, self.y) + L = lmnn.transformer() + assert_array_almost_equal(L.T.dot(L), lmnn.metric()) + + def test_sdml_supervised(self): + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.X, self.y, random_state=seed) + L = sdml.transformer() + assert_array_almost_equal(L.T.dot(L), sdml.metric()) + + def test_nca(self): + n = self.X.shape[0] + nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca.fit(self.X, self.y) + L = nca.transformer() + assert_array_almost_equal(L.T.dot(L), nca.metric()) + + def test_lfda(self): + lfda = LFDA(k=2, num_dims=2) + lfda.fit(self.X, self.y) + L = lfda.transformer() + assert_array_almost_equal(L.T.dot(L), lfda.metric()) + + def test_rca_supervised(self): + seed = np.random.RandomState(1234) + rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) + rca.fit(self.X, self.y, random_state=seed) + L = rca.transformer() + assert_array_almost_equal(L.T.dot(L), rca.metric()) + + def test_mlkr(self): + mlkr = MLKR(num_dims=2) + mlkr.fit(self.X, self.y) + L = mlkr.transformer() + assert_array_almost_equal(L.T.dot(L), mlkr.metric()) + + +if __name__ == '__main__': + unittest.main() From c1c2b1408098c92d8c8839692720ffc59563ff10 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 3 Aug 2017 10:04:46 -0400 Subject: [PATCH 053/210] LSML: Don't recompute prior_inv for default args Also: - use `np.linalg.inv` for small matrix inverses - clean up some minor style issues --- metric_learn/lsml.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index bc02e6f2..404fe286 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -7,7 +7,7 @@ Paper: http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf """ -from __future__ import print_function, absolute_import +from __future__ import print_function, absolute_import, division import numpy as np import scipy.linalg from six.moves import xrange @@ -48,13 +48,11 @@ def _prepare_inputs(self, X, constraints, weights): self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 if self.prior is None: - self.M_ = np.cov(X, rowvar = False) - if self.M_.ndim == 0: - self.M_ = 1./self.M_ - else: - self.M_ = np.linalg.inv(self.M_) + self.prior_inv_ = np.atleast_2d(np.cov(X, rowvar=False)) + self.M_ = np.linalg.inv(self.prior_inv_) else: self.M_ = self.prior + self.prior_inv_ = np.linalg.inv(self.prior) def metric(self): return self.M_ @@ -73,14 +71,13 @@ def fit(self, X, constraints, weights=None): """ self._prepare_inputs(X, constraints, weights) step_sizes = np.logspace(-10, 0, 10) - prior_inv = scipy.linalg.inv(self.M_) # Keep track of the best step size and the loss at that step. l_best = 0 - s_best = self._total_loss(self.M_, prior_inv) + s_best = self._total_loss(self.M_) if self.verbose: print('initial loss', s_best) for it in xrange(1, self.max_iter+1): - grad = self._gradient(self.M_, prior_inv) + grad = self._gradient(self.M_) grad_norm = scipy.linalg.norm(grad) if grad_norm < self.tol: break @@ -92,7 +89,7 @@ def fit(self, X, constraints, weights=None): new_metric = self.M_ - step_size * grad w, v = scipy.linalg.eigh(new_metric) new_metric = v.dot((np.maximum(w, 1e-8) * v).T) - cur_s = self._total_loss(new_metric, prior_inv) + cur_s = self._total_loss(new_metric) if cur_s < s_best: l_best = step_size s_best = cur_s @@ -113,14 +110,16 @@ def _comparison_loss(self, metric): dcd = np.sum(self.vcd_.dot(metric) * self.vcd_, axis=1) violations = dab > dcd return self.w_[violations].dot((np.sqrt(dab[violations]) - - np.sqrt(dcd[violations]))**2) + np.sqrt(dcd[violations]))**2) - def _total_loss(self, metric, prior_inv): - return (self._comparison_loss(metric) + - _regularization_loss(metric, prior_inv)) + def _total_loss(self, metric): + # Regularization loss + sign, logdet = np.linalg.slogdet(metric) + reg_loss = np.sum(metric * self.prior_inv_) - sign * logdet + return self._comparison_loss(metric) + reg_loss - def _gradient(self, metric, prior_inv): - dMetric = prior_inv - scipy.linalg.inv(metric) + def _gradient(self, metric): + dMetric = self.prior_inv_ - np.linalg.inv(metric) dabs = np.sum(self.vab_.dot(metric) * self.vab_, axis=1) dcds = np.sum(self.vcd_.dot(metric) * self.vcd_, axis=1) violations = dabs > dcds @@ -132,11 +131,6 @@ def _gradient(self, metric, prior_inv): return dMetric -def _regularization_loss(metric, prior_inv): - sign, logdet = np.linalg.slogdet(metric) - return np.sum(metric * prior_inv) - sign * logdet - - class LSML_Supervised(LSML): def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, num_constraints=None, weights=None, verbose=False): From 4da95f2d148d6827dd56fdd9a482b79fab35d1e4 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Sun, 15 Oct 2017 15:49:10 -0400 Subject: [PATCH 054/210] Updating docs --- doc/metric_learn.mlkr.rst | 27 +++++++++++++++++++++++++++ doc/metric_learn.mmc.rst | 27 +++++++++++++++++++++++++++ doc/metric_learn.rst | 2 ++ metric_learn/mlkr.py | 2 +- 4 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 doc/metric_learn.mlkr.rst create mode 100644 doc/metric_learn.mmc.rst diff --git a/doc/metric_learn.mlkr.rst b/doc/metric_learn.mlkr.rst new file mode 100644 index 00000000..a2f36c4f --- /dev/null +++ b/doc/metric_learn.mlkr.rst @@ -0,0 +1,27 @@ +Metric Learning for Kernel Regression (MLKR) +============================================ + +.. automodule:: metric_learn.mlkr + :members: + :undoc-members: + :inherited-members: + :show-inheritance: + +Example Code +------------ + +:: + + from metric_learn import MLKR + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + mlkr = MLKR() + mlkr.fit(X, Y) + +References +---------- +`Information-theoretic Metric Learning `_ Jason V. Davis, et al. diff --git a/doc/metric_learn.mmc.rst b/doc/metric_learn.mmc.rst new file mode 100644 index 00000000..f3ddaa9e --- /dev/null +++ b/doc/metric_learn.mmc.rst @@ -0,0 +1,27 @@ +Mahalanobis Metric Learning for Clustering (MMC) +================================================ + +.. automodule:: metric_learn.mmc + :members: + :undoc-members: + :inherited-members: + :show-inheritance: + +Example Code +------------ + +:: + + from metric_learn import MMC_Supervised + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + mmc = MMC_Supervised(num_constraints=200) + mmc.fit(X, Y) + +References +---------- +`Distance metric learning with application to clustering with side-information `_ Xing, Jordan, Russell, Ng. diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 226fd324..70a99a04 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -11,6 +11,8 @@ Submodules metric_learn.lfda metric_learn.lmnn metric_learn.lsml + metric_learn.mlkr + metric_learn.mmc metric_learn.nca metric_learn.rca metric_learn.sdml diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index d0007685..35b80495 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -72,7 +72,7 @@ def fit(self, X, y): """ Fit MLKR model - Parameters: + Parameters ---------- X : (n x d) array of samples y : (n) data labels From 84dbcbef3896e62b664dbbdf83fa673d4976ec40 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Sun, 15 Oct 2017 15:51:57 -0400 Subject: [PATCH 055/210] Bumping docs version, fixing MMC docstrings --- doc/conf.py | 6 +-- metric_learn/mmc.py | 104 +++++++++++++++++++++++--------------------- 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 467691ab..1c8beeab 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,10 +15,10 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2016, CJ Carey and Yuan Tang' +copyright = u'2015-2017, CJ Carey and Yuan Tang' author = u'CJ Carey and Yuan Tang' -version = '0.3.0' -release = '0.3.0' +version = '0.4.0' +release = '0.4.0' language = 'en' exclude_patterns = ['_build'] diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 7760e1b1..a37f8098 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -57,9 +57,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, self.diagonal = diagonal self.diagonal_c = diagonal_c self.verbose = verbose - + def fit(self, X, constraints): """Learn the MMC model. + Parameters ---------- X : (n x d) data matrix @@ -73,11 +74,11 @@ def fit(self, X, constraints): return self._fit_diag(X, constraints) else: return self._fit_full(X, constraints) - + def _process_inputs(self, X, constraints): - + self.X_ = X = check_array(X) - + # check to make sure that no two constrained vectors are identical a,b,c,d = constraints no_ident = vector_norm(X[a] - X[b]) > 1e-9 @@ -88,7 +89,7 @@ def _process_inputs(self, X, constraints): raise ValueError('No non-trivial similarity constraints given for MMC.') if len(c) == 0: raise ValueError('No non-trivial dissimilarity constraints given for MMC.') - + # init metric if self.A0 is None: self.A_ = np.identity(X.shape[1]) @@ -98,11 +99,12 @@ def _process_inputs(self, X, constraints): self.A_ /= 10.0 else: self.A_ = check_array(self.A0) - + return a,b,c,d def _fit_full(self, X, constraints): """Learn full metric using MMC. + Parameters ---------- X : (n x d) data matrix @@ -115,11 +117,11 @@ def _fit_full(self, X, constraints): num_pos = len(a) num_neg = len(c) num_samples, num_dim = X.shape - + error1 = error2 = 1e10 eps = 0.01 # error-bound of iterative projection on C1 and C2 A = self.A_ - + # Create weight vector from similar samples pos_diff = X[a] - X[b] w = np.einsum('ij,ik->jk', pos_diff, pos_diff).ravel() @@ -131,27 +133,27 @@ def _fit_full(self, X, constraints): # X[a] - X[b] # ).sum(axis = 0) t = w.dot(A.ravel()) / 100.0 - + w_norm = np.linalg.norm(w) w1 = w / w_norm # make `w` a unit vector t1 = t / w_norm # distance from origin to `w^T*x=t` plane - + cycle = 1 alpha = 0.1 # initial step size along gradient - + grad1 = self._fS1(X, a, b, A) # gradient of similarity constraint function grad2 = self._fD1(X, c, d, A) # gradient of dissimilarity constraint function M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 - + A_old = A.copy() for cycle in xrange(self.max_iter): - + # projection of constraints C1 and C2 satisfy = False - + for it in xrange(self.max_proj): - + # First constraint: # f(A) = \sum_{i,j \in S} d_ij' A d_ij <= t (1) # (1) can be rewritten as a linear constraint: w^T x = t, @@ -164,28 +166,28 @@ def _fit_full(self, X, constraints): else: x = x0 + (t1 - w1.dot(x0)) * w1 A[:] = x.reshape(num_dim, num_dim) - + # Second constraint: # PSD constraint A >= 0 # project A onto domain A>0 l, V = np.linalg.eigh((A + A.T) / 2) A[:] = np.dot(V * np.maximum(0, l[None,:]), V.T) - + fDC2 = w.dot(A.ravel()) error2 = (fDC2 - t) / t if error2 < eps: satisfy = True break - + # third constraint: gradient ascent # max: g(A) >= 1 # here we suppose g(A) = fD(A) = \sum_{I,J \in D} sqrt(d_ij' A d_ij) - + obj_previous = self._fD(X, c, d, A_old) # g(A_old) obj = self._fD(X, c, d, A) # g(A) - + if satisfy and (obj > obj_previous or cycle == 0): - + # If projection of 1 and 2 is successful, and such projection # improves objective function, slightly increase learning rate # and update from the current A. @@ -195,15 +197,15 @@ def _fit_full(self, X, constraints): grad1 = self._fD1(X, c, d, A) M = self._grad_projection(grad1, grad2) A += alpha * M - + else: - + # If projection of 1 and 2 failed, or obj <= obj_previous due # to projection of 1 and 2, shrink learning rate and re-update # from the previous A. alpha /= 2 A[:] = A_old + alpha * M - + delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old) if delta < self.convergence_threshold: break @@ -221,7 +223,7 @@ def _fit_full(self, X, constraints): self.A_[:] = A_old self.n_iter_ = cycle return self - + def _fit_diag(self, X, constraints): """Learn diagonal metric using MMC. Parameters @@ -236,33 +238,33 @@ def _fit_diag(self, X, constraints): num_pos = len(a) num_neg = len(c) num_samples, num_dim = X.shape - + s_sum = np.sum((X[a] - X[b]) ** 2, axis=0) - + it = 0 error = 1.0 eps = 1e-6 reduction = 2.0 w = np.diag(self.A_).copy() - + while error > self.convergence_threshold: - + fD0, fD_1st_d, fD_2nd_d = self._D_constraint(X, c, d, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 fS_1st_d = s_sum # first derivative of the similarity constraints - + gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective step = np.dot(np.linalg.inv(hessian), gradient); - + # Newton-Rapshon update # search over optimal lambda lambd = 1 # initial step-size w_tmp = np.maximum(0, w - lambd * step) - + obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) obj_previous = obj * 1.1 # just to get the while-loop started - + inner_it = 0 while obj < obj_previous: obj_previous = obj @@ -271,32 +273,32 @@ def _fit_diag(self, X, constraints): w_tmp = np.maximum(0, w - lambd * step) obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) inner_it += 1 - + w[:] = w_previous error = np.abs((obj_previous - obj_initial) / obj_previous) if self.verbose: print('mmc iter: %d, conv = %f' % (it, error)) it += 1 - + self.A_ = np.diag(w) return self def _fD(self, X, c, d, A): """The value of the dissimilarity constraint function. - + f = f(\sum_{ij \in D} distance(x_i, x_j)) i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} """ diff = X[c] - X[d] return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + 1e-6) - + def _fD1(self, X, c, d, A): """The gradient of the dissimilarity constraint function w.r.t. A. - + For example, let distance by L1 norm: f = f(\sum_{ij \in D} \sqrt{(x_i-x_j)A(x_i-x_j)'}) df/dA_{kl} = f'* d(\sum_{ij \in D} \sqrt{(x_i-x_j)^k*(x_i-x_j)^l})/dA_{kl} - + Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)}) @@ -312,31 +314,31 @@ def _fD1(self, X, c, d, A): sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) - + def _fS1(self, X, a, b, A): """The gradient of the similarity constraint function w.r.t. A. - + f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij' df/dA = d(d_ij*A*d_ij')/dA - + Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij """ dim = X.shape[1] diff = X[a] - X[b] return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` - + def _grad_projection(self, grad1, grad2): grad2 = grad2 / np.linalg.norm(grad2) gtemp = grad1 - np.sum(grad1 * grad2) * grad2 gtemp /= np.linalg.norm(gtemp) return gtemp - + def _D_objective(self, X, c, d, w): return np.log(np.sum(np.sqrt(np.sum(((X[c] - X[d]) ** 2) * w[None,:], axis=1) + 1e-6))) - + def _D_constraint(self, X, c, d, w): - """Compute the value, 1st derivative, second derivative (Hessian) of + """Compute the value, 1st derivative, second derivative (Hessian) of a dissimilarity constraint function gF(sum_ij distance(d_ij A d_ij)) where A is a diagonal matrix (in the form of a column vector 'w'). """ @@ -355,18 +357,18 @@ def _D_constraint(self, X, c, d, w): sum_deri1 / sum_dist, sum_deri2 / sum_dist - np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) ) - + def metric(self): return self.A_ - + def transformer(self): """Computes the transformation matrix from the Mahalanobis matrix. L = V.T * w^(-1/2), with A = V*w*V.T being the eigenvector decomposition of A with the eigenvalues in the diagonal matrix w and the columns of V being the eigenvectors. - + The Cholesky decomposition cannot be applied here, since MMC learns only a positive *semi*-definite Mahalanobis matrix. - + Returns ------- L : (d x d) matrix @@ -384,6 +386,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled=np.inf, num_constraints=None, A0=None, diagonal=False, diagonal_c=1.0, verbose=False): """Initialize the learner. + Parameters ---------- max_iter : int, optional @@ -414,6 +417,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, def fit(self, X, y, random_state=np.random): """Create constraints from labels and learn the MMC model. + Parameters ---------- X : (n x d) matrix From 4b889d472db21351bb811bf3658689d49f79e7ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gautheron?= Date: Mon, 27 Nov 2017 15:08:54 +0100 Subject: [PATCH 056/210] LMNN: fix mistake and improve performances (#78) Fix mistake in LMNN Issue in function _find_impostors: - the squared euclidean distance is used to compute the margins in variable "margin_radii" - the euclidean distance is used (through the function sklearn.metrics.pairwise.pairwise_distances) to compute distances between samples of different labels in variable "dist" - the issue is that the impostors are found by testing "dist < margin_radii" which is wrong because "dist" represent distances, and "margin_radii" represent squared distances. I propose to solve this problem by computing always the squared distances. --- metric_learn/lmnn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 3682f3f6..dea12f0c 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -14,8 +14,8 @@ import warnings from collections import Counter from six.moves import xrange -from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_X_y, check_array +from sklearn.metrics import euclidean_distances from .base_metric import BaseMetricLearner @@ -185,7 +185,7 @@ def _select_targets(self): target_neighbors = np.empty((self.X_.shape[0], self.k), dtype=int) for label in self.labels_: inds, = np.nonzero(self.label_inds_ == label) - dd = pairwise_distances(self.X_[inds]) + dd = euclidean_distances(self.X_[inds], squared=True) np.fill_diagonal(dd, np.inf) nn = np.argsort(dd)[..., :self.k] target_neighbors[inds] = inds[nn] @@ -198,7 +198,7 @@ def _find_impostors(self, furthest_neighbors): for label in self.labels_[:-1]: in_inds, = np.nonzero(self.label_inds_ == label) out_inds, = np.nonzero(self.label_inds_ > label) - dist = pairwise_distances(Lx[out_inds], Lx[in_inds]) + dist = euclidean_distances(Lx[out_inds], Lx[in_inds], squared=True) i1,j1 = np.nonzero(dist < margin_radii[out_inds][:,None]) i2,j2 = np.nonzero(dist < margin_radii[in_inds]) i = np.hstack((i1,i2)) From 66deb6b62971a92990920598dd27cac56914d99a Mon Sep 17 00:00:00 2001 From: Syed Shalan Naqvi Date: Tue, 12 Dec 2017 09:22:57 -0600 Subject: [PATCH 057/210] fixed infinite looping issue in fit_diag (#80) --- metric_learn/mmc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index a37f8098..efe33c38 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -247,7 +247,7 @@ def _fit_diag(self, X, constraints): reduction = 2.0 w = np.diag(self.A_).copy() - while error > self.convergence_threshold: + while error > self.convergence_threshold and it < self.max_iter: fD0, fD_1st_d, fD_2nd_d = self._D_constraint(X, c, d, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 From 1193c7b774ada526636d6550eb3bda4fd6dfcba8 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 29 Jan 2018 16:36:46 -0500 Subject: [PATCH 058/210] Updating old URLs --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ddfca2d2..07a1a497 100644 --- a/README.rst +++ b/README.rst @@ -63,10 +63,10 @@ there. The two implementations differ slightly, and the C++ version is more complete. -.. _sphinx documentation: http://all-umass.github.io/metric-learn/ +.. _sphinx documentation: http://metric-learn.github.io/metric-learn/ .. |Travis-CI Build Status| image:: https://api.travis-ci.org/all-umass/metric-learn.svg?branch=master - :target: https://travis-ci.org/all-umass/metric-learn + :target: https://travis-ci.org/metric-learn/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg From fb6733c190911d2c408bd7f0b8c9b54ff005fa8d Mon Sep 17 00:00:00 2001 From: "Yuan (Terry) Tang" Date: Tue, 27 Feb 2018 13:14:30 -0500 Subject: [PATCH 059/210] Update Travis build status badge to reflect org change (#86) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 07a1a497..a7f8cf88 100644 --- a/README.rst +++ b/README.rst @@ -65,7 +65,7 @@ more complete. .. _sphinx documentation: http://metric-learn.github.io/metric-learn/ -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/all-umass/metric-learn.svg?branch=master +.. |Travis-CI Build Status| image:: https://api.travis-ci.org/metric-learn/metric-learn.svg?branch=master :target: https://travis-ci.org/metric-learn/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org From c79875c3d8587feba64577a83152e489b0e60386 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 2 May 2018 15:27:11 +0200 Subject: [PATCH 060/210] [MRG] move from unittest to pytest (#90) * use pytest rather than unittest * update README.txt --- .travis.yml | 2 +- README.rst | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 378cc5f5..5daa20b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,4 +8,4 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn -script: python setup.py test +script: pytest test diff --git a/README.rst b/README.rst index a7f8cf88..22b3e7e3 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,8 @@ Run ``pip install metric-learn`` to download and install from PyPI. Run ``python setup.py install`` for default installation. -Run ``python setup.py test`` to run all tests. +Run ``pytest test`` to run all tests (you will need to have the ``pytest`` +package installed). **Usage** From 4c887d7d6486760d919642b1cd741086dbbbb007 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 18 May 2018 17:48:58 +0200 Subject: [PATCH 061/210] Deals with scipy's new version, where eigsh can call eigh. (#94) --- metric_learn/lfda.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index dbe5aa4f..809f092b 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -139,10 +139,11 @@ def _sum_outer(x): def _eigh(a, b, dim): try: return scipy.sparse.linalg.eigsh(a, k=dim, M=b, which='LA') - except (ValueError, scipy.sparse.linalg.ArpackNoConvergence): - pass - try: - return scipy.linalg.eigh(a, b) except np.linalg.LinAlgError: - pass + pass # scipy already tried eigh for us + except (ValueError, scipy.sparse.linalg.ArpackNoConvergence): + try: + return scipy.linalg.eigh(a, b) + except np.linalg.LinAlgError: + pass return scipy.linalg.eig(a, b) From faa240fd7469176036a91430ae6a0a45e627c94a Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Mon, 9 Jul 2018 16:13:12 +0200 Subject: [PATCH 062/210] [MRG] Add memory efficient implementation of NCA (#99) * FIX Fixes #45 Add memory efficient implementation of NCA - Make gradient computation more memory efficient - Remove the hard-coded test but adds others * ENH Add scipy optimizer lbfgs-b - Add deprecation for learning rate (not needed anymore) - TST: test deprecation - TST: force the algorithm to converge to pass test_iris using tol * FIX fix tests: - use checked labels instead of raw y - update string representation with new arguments * FIX: remove init parameter in NCA * FIX: remove random_state as well as unused imports * DOC: add docstring for NCA * TST: add more tests for edge cases and toy examples * STY: replace np.sum(array) by array.sum() --- metric_learn/nca.py | 95 ++++++++++++++++++++++------ test/metric_learn_test.py | 128 +++++++++++++++++++++++++++++++++----- test/test_base_metric.py | 3 +- 3 files changed, 190 insertions(+), 36 deletions(-) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 40757d23..6ee0845a 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -4,20 +4,51 @@ """ from __future__ import absolute_import + +import warnings import numpy as np -from six.moves import xrange +from scipy.optimize import minimize +from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_X_y +try: # scipy.misc.logsumexp is deprecated in scipy 1.0.0 + from scipy.special import logsumexp +except ImportError: + from scipy.misc import logsumexp + from .base_metric import BaseMetricLearner EPS = np.finfo(float).eps class NCA(BaseMetricLearner): - def __init__(self, num_dims=None, max_iter=100, learning_rate=0.01): + def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated', + tol=None): + """Neighborhood Components Analysis + + Parameters + ---------- + num_dims : int, optional (default=None) + Embedding dimensionality. If None, will be set to ``n_features`` + (``d``) at fit time. + + max_iter : int, optional (default=100) + Maximum number of iterations done by the optimization algorithm. + + learning_rate : Not used + + .. deprecated:: 0.4.0 + `learning_rate` was deprecated in version 0.4.0 and will + be removed in 0.5.0. The current optimization algorithm does not need + to fix a learning rate. + + tol : float, optional (default=None) + Convergence tolerance for the optimization. + """ self.num_dims = num_dims self.max_iter = max_iter - self.learning_rate = learning_rate + self.learning_rate = learning_rate # TODO: remove in v.0.5.0 + self.tol = tol def transformer(self): return self.A_ @@ -27,33 +58,57 @@ def fit(self, X, y): X: data matrix, (n x d) y: scalar labels, (n) """ + if self.learning_rate != 'deprecated': + warnings.warn('"learning_rate" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5', DeprecationWarning) + X, labels = check_X_y(X, y) n, d = X.shape num_dims = self.num_dims if num_dims is None: num_dims = d + # Initialize A to a scaling matrix A = np.zeros((num_dims, d)) np.fill_diagonal(A, 1./(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))) # Run NCA - dX = X[:,None] - X[None] # shape (n, n, d) - tmp = np.einsum('...i,...j->...ij', dX, dX) # shape (n, n, d, d) - masks = labels[:,None] == labels[None] - for it in xrange(self.max_iter): - for i, label in enumerate(labels): - mask = masks[i] - Ax = A.dot(X.T).T # shape (n, num_dims) - - softmax = np.exp(-((Ax[i] - Ax)**2).sum(axis=1)) # shape (n) - softmax[i] = 0 - softmax /= softmax.sum() - - t = softmax[:, None, None] * tmp[i] # shape (n, d, d) - d = softmax[mask].sum() * t.sum(axis=0) - t[mask].sum(axis=0) - A += self.learning_rate * A.dot(d) + mask = labels[:, np.newaxis] == labels[np.newaxis, :] + optimizer_params = {'method': 'L-BFGS-B', + 'fun': self._loss_grad_lbfgs, + 'args': (X, mask, -1.0), + 'jac': True, + 'x0': A.ravel(), + 'options': dict(maxiter=self.max_iter), + 'tol': self.tol + } + + # Call the optimizer + opt_result = minimize(**optimizer_params) self.X_ = X - self.A_ = A - self.n_iter_ = it + self.A_ = opt_result.x.reshape(-1, X.shape[1]) + self.n_iter_ = opt_result.nit return self + + @staticmethod + def _loss_grad_lbfgs(A, X, mask, sign=1.0): + A = A.reshape(-1, X.shape[1]) + X_embedded = np.dot(X, A.T) # (n_samples, num_dims) + # Compute softmax distances + p_ij = pairwise_distances(X_embedded, squared=True) + np.fill_diagonal(p_ij, np.inf) + p_ij = np.exp(-p_ij - logsumexp(-p_ij, axis=1)[:, np.newaxis]) + # (n_samples, n_samples) + + # Compute loss + masked_p_ij = p_ij * mask + p = masked_p_ij.sum(axis=1, keepdims=True) # (n_samples, 1) + loss = p.sum() + + # Compute gradient of loss w.r.t. `transform` + weighted_p_ij = masked_p_ij - p_ij * p + gradient = 2 * (X_embedded.T.dot(weighted_p_ij + weighted_p_ij.T) - + X_embedded.T * weighted_p_ij.sum(axis=0)).dot(X) + return sign * loss, sign * gradient.ravel() diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 6d78c657..729b00a8 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1,14 +1,15 @@ import unittest import numpy as np +from scipy.optimize import check_grad from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.datasets import load_iris -from numpy.testing import assert_array_almost_equal +from sklearn.datasets import load_iris, make_classification +from numpy.testing import assert_array_almost_equal, assert_array_equal +from sklearn.utils.testing import assert_warns_message -from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) -# Import this specially for testing. +from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, + LSML_Supervised, ITML_Supervised, SDML_Supervised, + RCA_Supervised, MMC_Supervised) from metric_learn.lmnn import python_LMNN @@ -88,22 +89,119 @@ def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction - nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca = NCA(max_iter=(100000//n)) nca.fit(self.iris_points, self.iris_labels) - # Result copied from Iris example at - # https://github.com/vomjom/nca/blob/master/README.mkd - expected = [[-0.09935, -0.2215, 0.3383, 0.443], - [+0.2532, 0.5835, -0.8461, -0.8915], - [-0.729, -0.6386, 1.767, 1.832], - [-0.9405, -0.8461, 2.281, 2.794]] - assert_array_almost_equal(expected, nca.transformer(), decimal=3) + csep = class_separation(nca.transform(), self.iris_labels) + self.assertLess(csep, 0.15) # With dimension reduction - nca = NCA(max_iter=(100000//n), learning_rate=0.01, num_dims=2) + nca = NCA(max_iter=(100000//n), num_dims=2, tol=1e-9) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) self.assertLess(csep, 0.15) + def test_finite_differences(self): + """Test gradient of loss function + + Assert that the gradient is almost equal to its finite differences + approximation. + """ + # Initialize the transformation `M`, as well as `X` and `y` and `NCA` + X, y = make_classification() + M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1]) + mask = y[:, np.newaxis] == y[np.newaxis, :] + + def fun(M): + return NCA._loss_grad_lbfgs(M, X, mask)[0] + + def grad(M): + return NCA._loss_grad_lbfgs(M, X, mask)[1].ravel() + + # compute relative error + rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) + np.testing.assert_almost_equal(rel_diff, 0., decimal=6) + + def test_simple_example(self): + """Test on a simple example. + + Puts four points in the input space where the opposite labels points are + next to each other. After transform the same labels points should be next + to each other. + + """ + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA(num_dims=2,) + nca.fit(X, y) + Xansformed = nca.transform(X) + np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], + np.array([2, 3, 0, 1])) + + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA(num_dims=2, learning_rate=0.01) + msg = ('"learning_rate" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5') + assert_warns_message(DeprecationWarning, msg, nca.fit, X, y) + + def test_singleton_class(self): + X = self.iris_points + y = self.iris_labels + + # one singleton class: test fitting works + singleton_class = 1 + ind_singleton, = np.where(y == singleton_class) + y[ind_singleton] = 2 + y[ind_singleton[0]] = singleton_class + + nca = NCA(max_iter=30) + nca.fit(X, y) + + # One non-singleton class: test fitting works + ind_1, = np.where(y == 1) + ind_2, = np.where(y == 2) + y[ind_1] = 0 + y[ind_1[0]] = 1 + y[ind_2] = 0 + y[ind_2[0]] = 2 + + nca = NCA(max_iter=30) + nca.fit(X, y) + + # Only singleton classes: test fitting does nothing (the gradient + # must be null in this case, so the final matrix must stay like + # the initialization) + ind_0, = np.where(y == 0) + ind_1, = np.where(y == 1) + ind_2, = np.where(y == 2) + X = X[[ind_0[0], ind_1[0], ind_2[0]]] + y = y[[ind_0[0], ind_1[0], ind_2[0]]] + + EPS = np.finfo(float).eps + A = np.zeros((X.shape[1], X.shape[1])) + np.fill_diagonal(A, + 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca.fit(X, y) + assert_array_equal(nca.A_, A) + + def test_one_class(self): + # if there is only one class the gradient is null, so the final matrix + # must stay like the initialization + X = self.iris_points[self.iris_labels == 0] + y = self.iris_labels[self.iris_labels == 0] + EPS = np.finfo(float).eps + A = np.zeros((X.shape[1], X.shape[1])) + np.fill_diagonal(A, + 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca.fit(X, y) + assert_array_equal(nca.A_, A) + class TestLFDA(MetricTestCase): def test_iris(self): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 31db4e6f..81a0fe6e 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -16,7 +16,8 @@ def test_lmnn(self): def test_nca(self): self.assertEqual(str(metric_learn.NCA()), - "NCA(learning_rate=0.01, max_iter=100, num_dims=None)") + ("NCA(learning_rate='deprecated', max_iter=100, " + "num_dims=None, tol=None)")) def test_lfda(self): self.assertEqual(str(metric_learn.LFDA()), From c5ca3a3376357cd253ff6b2322ca3d8b05be6efd Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 7 Aug 2018 15:07:43 +0200 Subject: [PATCH 063/210] [MRG] FIX: use addition for starting while loop and checks NaN (#103) * FIX: fixes #74: use addition rather than multiplication for getting the loop started, and throw error when NaN * MAINT: make loop initialization clearer by just adding 1 --- metric_learn/mmc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index efe33c38..ef08aeef 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -20,7 +20,7 @@ import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_array, check_X_y +from sklearn.utils.validation import check_array, check_X_y, assert_all_finite from .base_metric import BaseMetricLearner from .constraints import Constraints @@ -261,9 +261,9 @@ def _fit_diag(self, X, constraints): # search over optimal lambda lambd = 1 # initial step-size w_tmp = np.maximum(0, w - lambd * step) - obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) - obj_previous = obj * 1.1 # just to get the while-loop started + assert_all_finite(obj) + obj_previous = obj + 1 # just to get the while-loop started inner_it = 0 while obj < obj_previous: @@ -273,6 +273,7 @@ def _fit_diag(self, X, constraints): w_tmp = np.maximum(0, w - lambd * step) obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) inner_it += 1 + assert_all_finite(obj) w[:] = w_previous error = np.abs((obj_previous - obj_initial) / obj_previous) From b4debe39b5d4f471119edf340f91be3c7dd7c8f5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 7 Aug 2018 16:29:47 +0200 Subject: [PATCH 064/210] [MRG] Update __init__.py to allow printing version (#108) * Update __init__.py * MAINT: put version number only in init --- metric_learn/__init__.py | 2 ++ setup.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index b86c10e1..119db584 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -11,3 +11,5 @@ from .rca import RCA, RCA_Supervised from .mlkr import MLKR from .mmc import MMC, MMC_Supervised + +__version__ = '0.4.0' diff --git a/setup.py b/setup.py index c661dd10..32a21246 100755 --- a/setup.py +++ b/setup.py @@ -1,8 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- from setuptools import setup +import metric_learn -version = "0.4.0" +version = metric_learn.__version__ setup(name='metric-learn', version=version, description='Python implementations of metric learning algorithms', From 90205d0d3b53505c6fa30e3480632747004bcbdc Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 9 Aug 2018 09:20:57 +0200 Subject: [PATCH 065/210] Revert "[MRG] Update __init__.py to allow printing version (#108)" (#109) This reverts commit b4debe39b5d4f471119edf340f91be3c7dd7c8f5. --- metric_learn/__init__.py | 2 -- setup.py | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index 119db584..b86c10e1 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -11,5 +11,3 @@ from .rca import RCA, RCA_Supervised from .mlkr import MLKR from .mmc import MMC, MMC_Supervised - -__version__ = '0.4.0' diff --git a/setup.py b/setup.py index 32a21246..c661dd10 100755 --- a/setup.py +++ b/setup.py @@ -1,9 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- from setuptools import setup -import metric_learn -version = metric_learn.__version__ +version = "0.4.0" setup(name='metric-learn', version=version, description='Python implementations of metric learning algorithms', From 67828051f5ff8322d374033d670b36b3e771074b Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 9 Aug 2018 20:55:43 +0200 Subject: [PATCH 066/210] Add version number in __init__.py, new PR (#110) * Add version number in __init__.py, new PR * FIX: other proposal for single source version * FIX: add newline at end of file --- metric_learn/__init__.py | 2 ++ metric_learn/_version.py | 1 + setup.py | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 metric_learn/_version.py diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index b86c10e1..b2b84559 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -11,3 +11,5 @@ from .rca import RCA, RCA_Supervised from .mlkr import MLKR from .mmc import MMC, MMC_Supervised + +from ._version import __version__ diff --git a/metric_learn/_version.py b/metric_learn/_version.py new file mode 100644 index 00000000..abeeedbf --- /dev/null +++ b/metric_learn/_version.py @@ -0,0 +1 @@ +__version__ = '0.4.0' diff --git a/setup.py b/setup.py index c661dd10..34fedd76 100755 --- a/setup.py +++ b/setup.py @@ -1,10 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- from setuptools import setup +import os + +version = {} +with open(os.path.join('metric_learn', '_version.py')) as fp: + exec(fp.read(), version) -version = "0.4.0" setup(name='metric-learn', - version=version, + version=version['__version__'], description='Python implementations of metric learning algorithms', author=['CJ Carey', 'Yuan Tang'], author_email='ccarey@cs.umass.edu', From b60b72b1b71b21656a597834de6b813357d3c991 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 16 Aug 2018 18:55:28 +0200 Subject: [PATCH 067/210] [MRG] FIX: fix MLKR cost and gradient (#111) * FIX: fix mlkr cost and gradient * FIX: fix numerical problems in MLKR with logsumexp * STY: remove useless parenthesis * ENH: improve MLKR memory performance --- metric_learn/mlkr.py | 24 ++++++++++-------------- test/metric_learn_test.py | 25 ++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 35b80495..af772664 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -8,6 +8,7 @@ """ from __future__ import division, print_function import numpy as np +from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize from scipy.spatial.distance import pdist, squareform from sklearn.decomposition import PCA @@ -79,11 +80,7 @@ def fit(self, X, y): """ X, y, A = self._process_inputs(X, y) - # note: this line takes (n*n*d) memory! - # for larger datasets, we'll need to compute dX as we go - dX = (X[None] - X[:, None]).reshape((-1, X.shape[1])) - - res = minimize(_loss, A.ravel(), (X, y, dX), method='CG', jac=True, + res = minimize(_loss, A.ravel(), (X, y), method='CG', jac=True, tol=self.alpha, options=dict(maxiter=self.max_iter, eps=self.epsilon)) self.transformer_ = res.x.reshape(A.shape) @@ -94,19 +91,18 @@ def transformer(self): return self.transformer_ -def _loss(flatA, X, y, dX): +def _loss(flatA, X, y): A = flatA.reshape((-1, X.shape[1])) dist = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) - K = squareform(np.exp(-dist**2)) - denom = np.maximum(K.sum(axis=0), EPS) - yhat = K.dot(y) / denom + dist = squareform(dist ** 2) + np.fill_diagonal(dist, np.inf) + softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis]) + yhat = softmax.dot(y) ydiff = yhat - y cost = (ydiff**2).sum() # also compute the gradient - np.fill_diagonal(K, 1) - W = 2 * K * (np.outer(ydiff, ydiff) / denom) - # note: this is the part that the matlab impl drops to C for - M = (dX.T * W.ravel()).dot(dX) - grad = 2 * A.dot(M) + W = softmax * ydiff[:, np.newaxis] * (yhat[:, np.newaxis] - y) + X_emb_t = A.dot(X.T) + grad = 4 * (X_emb_t * W.sum(axis=0) - X_emb_t.dot(W + W.T)).dot(X) return cost, grad.ravel() diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 729b00a8..e367b0db 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -3,9 +3,10 @@ from scipy.optimize import check_grad from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.datasets import load_iris, make_classification +from sklearn.datasets import load_iris, make_classification, make_regression from numpy.testing import assert_array_almost_equal, assert_array_equal from sklearn.utils.testing import assert_warns_message +from sklearn.utils.validation import check_X_y from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, LSML_Supervised, ITML_Supervised, SDML_Supervised, @@ -246,6 +247,28 @@ def test_iris(self): csep = class_separation(mlkr.transform(), self.iris_labels) self.assertLess(csep, 0.25) + def test_finite_differences(self): + """Test gradient of loss function + + Assert that the gradient is almost equal to its finite differences + approximation. + """ + # Initialize the transformation `M`, as well as `X`, and `y` and `MLKR` + X, y = make_regression(n_features=4, random_state=1, n_samples=20) + X, y = check_X_y(X, y) + M = np.random.randn(2, X.shape[1]) + from metric_learn.mlkr import _loss + + def fun(M): + return _loss(M, X, y)[0] + + def grad_fn(M): + return _loss(M, X, y)[1].ravel() + + # compute relative error + rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M)) + np.testing.assert_almost_equal(rel_diff, 0.) + class TestMMC(MetricTestCase): def test_iris(self): From acca56781a462fea67a908ffda96d26dafd7ef52 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 17 Aug 2018 15:54:57 +0200 Subject: [PATCH 068/210] [MRG] Improve gradient computation in NCA and MLKR (#113) * ENH: Use multidot in NCA and MLKR * ENH: Reuse X_embedded already computed --- metric_learn/mlkr.py | 13 +++++++------ metric_learn/nca.py | 6 ++++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index af772664..38e83511 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -10,8 +10,8 @@ import numpy as np from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize -from scipy.spatial.distance import pdist, squareform from sklearn.decomposition import PCA +from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_X_y from .base_metric import BaseMetricLearner @@ -93,8 +93,8 @@ def transformer(self): def _loss(flatA, X, y): A = flatA.reshape((-1, X.shape[1])) - dist = pdist(X, metric='mahalanobis', VI=A.T.dot(A)) - dist = squareform(dist ** 2) + X_embedded = np.dot(X, A.T) + dist = pairwise_distances(X_embedded, squared=True) np.fill_diagonal(dist, np.inf) softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis]) yhat = softmax.dot(y) @@ -102,7 +102,8 @@ def _loss(flatA, X, y): cost = (ydiff**2).sum() # also compute the gradient - W = softmax * ydiff[:, np.newaxis] * (yhat[:, np.newaxis] - y) - X_emb_t = A.dot(X.T) - grad = 4 * (X_emb_t * W.sum(axis=0) - X_emb_t.dot(W + W.T)).dot(X) + W = softmax * ydiff[:, np.newaxis] * (y - yhat[:, np.newaxis]) + W_sym = W + W.T + np.fill_diagonal(W_sym, - W.sum(axis=0)) + grad = 4 * (X_embedded.T.dot(W_sym)).dot(X) return cost, grad.ravel() diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 6ee0845a..adb1a991 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -10,6 +10,7 @@ from scipy.optimize import minimize from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_X_y +from numpy.linalg import multi_dot try: # scipy.misc.logsumexp is deprecated in scipy 1.0.0 from scipy.special import logsumexp @@ -109,6 +110,7 @@ def _loss_grad_lbfgs(A, X, mask, sign=1.0): # Compute gradient of loss w.r.t. `transform` weighted_p_ij = masked_p_ij - p_ij * p - gradient = 2 * (X_embedded.T.dot(weighted_p_ij + weighted_p_ij.T) - - X_embedded.T * weighted_p_ij.sum(axis=0)).dot(X) + weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T + np.fill_diagonal(weighted_p_ij_sym, - weighted_p_ij.sum(axis=0)) + gradient = 2 * (X_embedded.T.dot(weighted_p_ij_sym)).dot(X) return sign * loss, sign * gradient.ravel() From e8eb1a600e0ed7e0e162d4b5493577527e93f832 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Sat, 18 Aug 2018 01:16:28 +0200 Subject: [PATCH 069/210] FIX: add MLKR and MMC to doc (#114) --- doc/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/index.rst b/doc/index.rst index f50781fe..38ed730a 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -25,6 +25,8 @@ metric learning algorithms. metric_learn.nca metric_learn.lfda metric_learn.rca + metric_learn.mmc + metric_learn.mlkr Each metric supports the following methods: From 7441357b859960db181ebe29ddbdddd72a901691 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Sat, 18 Aug 2018 01:18:35 +0200 Subject: [PATCH 070/210] [WIP] Add verbose to NCA and MLKR (#105) * ENH: Add verbose to NCA * ENH: add verbose to MLKR * ENH: Add test for convergence warning, and fix datasets (use classification for classification and regression for regression * STY: update code according to review https://github.com/metric-learn/metric-learn/pull/105#pullrequestreview-142488961 * FIX: return the real training time * FIX: fix forgotten function call in test_no_verbose * MAINT: Add L-BFGS-B to MLKR, and improve the tests with less features for regression * FIX: remove MLKR previous arguments for conjugate gradient and put arguments for L-BFGS-B * FIX: fix test string representation for mlkr * FIX: convert y to numeric in MLKR (since it is a regression algorithm). * FIX: fix MLKR test using the method mlkr._loss --- metric_learn/mlkr.py | 103 ++++++++++++++++++++++++++------------ metric_learn/nca.py | 53 ++++++++++++++++++-- test/metric_learn_test.py | 68 +++++++++++++++++++++++-- test/test_base_metric.py | 6 +-- 4 files changed, 187 insertions(+), 43 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 38e83511..ddcb698a 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -7,12 +7,16 @@ for dimensionality reduction and high dimensional data visualization. """ from __future__ import division, print_function +import time +import sys +import warnings import numpy as np from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize from sklearn.decomposition import PCA from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_X_y +from sklearn.exceptions import ConvergenceWarning from .base_metric import BaseMetricLearner @@ -21,8 +25,8 @@ class MLKR(BaseMetricLearner): """Metric Learning for Kernel Regression (MLKR)""" - def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001, - max_iter=1000): + def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000, + verbose=False): """ Initialize MLKR. @@ -34,23 +38,23 @@ def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001, A0: array-like, optional Initialization of transformation matrix. Defaults to PCA loadings. - epsilon: float, optional - Step size for congujate gradient descent. - - alpha: float, optional - Stopping criterion for congujate gradient descent. + tol: float, optional (default=None) + Convergence tolerance for the optimization. max_iter: int, optional Cap on number of congugate gradient iterations. + + verbose : bool, optional (default=False) + Whether to print progress messages or not. """ self.num_dims = num_dims self.A0 = A0 - self.epsilon = epsilon - self.alpha = alpha + self.tol = tol self.max_iter = max_iter + self.verbose = verbose def _process_inputs(self, X, y): - self.X_, y = check_X_y(X, y) + self.X_, y = check_X_y(X, y, y_numeric=True) n, d = self.X_.shape if y.shape[0] != n: raise ValueError('Data and label lengths mismatch: %d != %d' @@ -80,30 +84,67 @@ def fit(self, X, y): """ X, y, A = self._process_inputs(X, y) - res = minimize(_loss, A.ravel(), (X, y), method='CG', jac=True, - tol=self.alpha, - options=dict(maxiter=self.max_iter, eps=self.epsilon)) + # Measure the total training time + train_time = time.time() + + self.n_iter_ = 0 + res = minimize(self._loss, A.ravel(), (X, y), method='L-BFGS-B', + jac=True, tol=self.tol, + options=dict(maxiter=self.max_iter)) self.transformer_ = res.x.reshape(A.shape) - self.n_iter_ = res.nit + + # Stop timer + train_time = time.time() - train_time + if self.verbose: + cls_name = self.__class__.__name__ + # Warn the user if the algorithm did not converge + if not res.success: + warnings.warn('[{}] MLKR did not converge: {}' + .format(cls_name, res.message), ConvergenceWarning) + print('[{}] Training took {:8.2f}s.'.format(cls_name, train_time)) + return self def transformer(self): return self.transformer_ - -def _loss(flatA, X, y): - A = flatA.reshape((-1, X.shape[1])) - X_embedded = np.dot(X, A.T) - dist = pairwise_distances(X_embedded, squared=True) - np.fill_diagonal(dist, np.inf) - softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis]) - yhat = softmax.dot(y) - ydiff = yhat - y - cost = (ydiff**2).sum() - - # also compute the gradient - W = softmax * ydiff[:, np.newaxis] * (y - yhat[:, np.newaxis]) - W_sym = W + W.T - np.fill_diagonal(W_sym, - W.sum(axis=0)) - grad = 4 * (X_embedded.T.dot(W_sym)).dot(X) - return cost, grad.ravel() + def _loss(self, flatA, X, y): + + if self.n_iter_ == 0 and self.verbose: + header_fields = ['Iteration', 'Objective Value', 'Time(s)'] + header_fmt = '{:>10} {:>20} {:>10}' + header = header_fmt.format(*header_fields) + cls_name = self.__class__.__name__ + print('[{cls}]'.format(cls=cls_name)) + print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name, + header=header, + sep='-' * len(header))) + + start_time = time.time() + + A = flatA.reshape((-1, X.shape[1])) + X_embedded = np.dot(X, A.T) + dist = pairwise_distances(X_embedded, squared=True) + np.fill_diagonal(dist, np.inf) + softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis]) + yhat = softmax.dot(y) + ydiff = yhat - y + cost = (ydiff ** 2).sum() + + # also compute the gradient + W = softmax * ydiff[:, np.newaxis] * (y - yhat[:, np.newaxis]) + W_sym = W + W.T + np.fill_diagonal(W_sym, - W.sum(axis=0)) + grad = 4 * (X_embedded.T.dot(W_sym)).dot(X) + + if self.verbose: + start_time = time.time() - start_time + values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}' + print(values_fmt.format(cls=self.__class__.__name__, + n_iter=self.n_iter_, loss=cost, + start_time=start_time)) + sys.stdout.flush() + + self.n_iter_ += 1 + + return cost, grad.ravel() diff --git a/metric_learn/nca.py b/metric_learn/nca.py index adb1a991..2f15c7af 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -6,11 +6,13 @@ from __future__ import absolute_import import warnings +import time +import sys import numpy as np from scipy.optimize import minimize from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_X_y -from numpy.linalg import multi_dot +from sklearn.exceptions import ConvergenceWarning try: # scipy.misc.logsumexp is deprecated in scipy 1.0.0 from scipy.special import logsumexp @@ -24,7 +26,7 @@ class NCA(BaseMetricLearner): def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated', - tol=None): + tol=None, verbose=False): """Neighborhood Components Analysis Parameters @@ -45,11 +47,15 @@ def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated', tol : float, optional (default=None) Convergence tolerance for the optimization. + + verbose : bool, optional (default=False) + Whether to print progress messages or not. """ self.num_dims = num_dims self.max_iter = max_iter self.learning_rate = learning_rate # TODO: remove in v.0.5.0 self.tol = tol + self.verbose = verbose def transformer(self): return self.A_ @@ -70,6 +76,9 @@ def fit(self, X, y): if num_dims is None: num_dims = d + # Measure the total training time + train_time = time.time() + # Initialize A to a scaling matrix A = np.zeros((num_dims, d)) np.fill_diagonal(A, 1./(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))) @@ -86,15 +95,41 @@ def fit(self, X, y): } # Call the optimizer + self.n_iter_ = 0 opt_result = minimize(**optimizer_params) self.X_ = X self.A_ = opt_result.x.reshape(-1, X.shape[1]) self.n_iter_ = opt_result.nit + + # Stop timer + train_time = time.time() - train_time + if self.verbose: + cls_name = self.__class__.__name__ + + # Warn the user if the algorithm did not converge + if not opt_result.success: + warnings.warn('[{}] NCA did not converge: {}'.format( + cls_name, opt_result.message), ConvergenceWarning) + + print('[{}] Training took {:8.2f}s.'.format(cls_name, train_time)) + return self - @staticmethod - def _loss_grad_lbfgs(A, X, mask, sign=1.0): + def _loss_grad_lbfgs(self, A, X, mask, sign=1.0): + + if self.n_iter_ == 0 and self.verbose: + header_fields = ['Iteration', 'Objective Value', 'Time(s)'] + header_fmt = '{:>10} {:>20} {:>10}' + header = header_fmt.format(*header_fields) + cls_name = self.__class__.__name__ + print('[{cls}]'.format(cls=cls_name)) + print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name, + header=header, + sep='-' * len(header))) + + start_time = time.time() + A = A.reshape(-1, X.shape[1]) X_embedded = np.dot(X, A.T) # (n_samples, num_dims) # Compute softmax distances @@ -113,4 +148,14 @@ def _loss_grad_lbfgs(A, X, mask, sign=1.0): weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T np.fill_diagonal(weighted_p_ij_sym, - weighted_p_ij.sum(axis=0)) gradient = 2 * (X_embedded.T.dot(weighted_p_ij_sym)).dot(X) + + if self.verbose: + start_time = time.time() - start_time + values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}' + print(values_fmt.format(cls=self.__class__.__name__, + n_iter=self.n_iter_, loss=loss, + start_time=start_time)) + sys.stdout.flush() + + self.n_iter_ += 1 return sign * loss, sign * gradient.ravel() diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e367b0db..1f2af2f7 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1,4 +1,6 @@ +import re import unittest +import pytest import numpy as np from scipy.optimize import check_grad from six.moves import xrange @@ -6,6 +8,7 @@ from sklearn.datasets import load_iris, make_classification, make_regression from numpy.testing import assert_array_almost_equal, assert_array_equal from sklearn.utils.testing import assert_warns_message +from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, @@ -111,12 +114,14 @@ def test_finite_differences(self): X, y = make_classification() M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1]) mask = y[:, np.newaxis] == y[np.newaxis, :] + nca = NCA() + nca.n_iter_ = 0 def fun(M): - return NCA._loss_grad_lbfgs(M, X, mask)[0] + return nca._loss_grad_lbfgs(M, X, mask)[0] def grad(M): - return NCA._loss_grad_lbfgs(M, X, mask)[1].ravel() + return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() # compute relative error rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) @@ -257,13 +262,14 @@ def test_finite_differences(self): X, y = make_regression(n_features=4, random_state=1, n_samples=20) X, y = check_X_y(X, y) M = np.random.randn(2, X.shape[1]) - from metric_learn.mlkr import _loss + mlkr = MLKR() + mlkr.n_iter_ = 0 def fun(M): - return _loss(M, X, y)[0] + return mlkr._loss(M, X, y)[0] def grad_fn(M): - return _loss(M, X, y)[1].ravel() + return mlkr._loss(M, X, y)[1].ravel() # compute relative error rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M)) @@ -307,5 +313,57 @@ def test_iris(self): self.assertLess(csep, 0.2) +@pytest.mark.parametrize(('algo_class', 'dataset'), + [(NCA, make_classification()), + (MLKR, make_regression())]) +def test_verbose(algo_class, dataset, capsys): + # assert there is proper output when verbose = True + X, y = dataset + model = algo_class(verbose=True) + model.fit(X, y) + out, _ = capsys.readouterr() + + # check output + lines = re.split('\n+', out) + header = '{:>10} {:>20} {:>10}'.format('Iteration', 'Objective Value', + 'Time(s)') + assert lines[0] == '[{}]'.format(algo_class.__name__) + assert lines[1] == '[{}] {}'.format(algo_class.__name__, header) + assert lines[2] == '[{}] {}'.format(algo_class.__name__, '-' * len(header)) + for line in lines[3:-2]: + # The following regex will match for instance: + # '[NCA] 0 6.988936e+01 0.01' + assert re.match("\[" + algo_class.__name__ + "\]\ *\d+\ *\d\.\d{6}e[+|-]" + "\d+\ *\d+\.\d{2}", line) + assert re.match("\[" + algo_class.__name__ + "\] Training took\ *" + "\d+\.\d{2}s\.", lines[-2]) + assert lines[-1] == '' + + +@pytest.mark.parametrize(('algo_class', 'dataset'), + [(NCA, make_classification()), + (MLKR, make_regression(n_features=10))]) +def test_no_verbose(dataset, algo_class, capsys): + # assert by default there is no output (verbose=False) + X, y = dataset + model = algo_class() + model.fit(X, y) + out, _ = capsys.readouterr() + # check output + assert (out == '') + + +@pytest.mark.parametrize(('algo_class', 'dataset'), + [(NCA, make_classification()), + (MLKR, make_regression(n_features=10))]) +def test_convergence_warning(dataset, algo_class): + X, y = dataset + model = algo_class(max_iter=2, verbose=True) + cls_name = model.__class__.__name__ + assert_warns_message(ConvergenceWarning, + '[{}] {} did not converge'.format(cls_name, cls_name), + model.fit, X, y) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 81a0fe6e..4b132af4 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -17,7 +17,7 @@ def test_lmnn(self): def test_nca(self): self.assertEqual(str(metric_learn.NCA()), ("NCA(learning_rate='deprecated', max_iter=100, " - "num_dims=None, tol=None)")) + "num_dims=None, tol=None,\n verbose=False)")) def test_lfda(self): self.assertEqual(str(metric_learn.LFDA()), @@ -61,8 +61,8 @@ def test_rca(self): def test_mlkr(self): self.assertEqual(str(metric_learn.MLKR()), - "MLKR(A0=None, alpha=0.0001, epsilon=0.01, " - "max_iter=1000, num_dims=None)") + "MLKR(A0=None, max_iter=1000, num_dims=None, tol=None, " + "verbose=False)") def test_mmc(self): self.assertEqual(str(metric_learn.MMC()), """ From efeab88f7f0a84c90c0beced5978e6f5da37fb9a Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Sat, 18 Aug 2018 02:49:35 +0200 Subject: [PATCH 071/210] [MRG] FIX Fix LMNN rollback (#101) * FIX fixes #88 Stores L and G in addition to what was stored before * TST: non regression test for this PR - test that LMNN converges on a simple example where it should converge - test that the objective function never has twice the same value * MAINT: Invert the order of algorithm: Try forward updates rather than doing rollback after wrong updates * MAINT: update code according to comments https://github.com/metric-learn/metric-learn/pull/101#pullrequestreview-134072011 * FIX: update also test_convergence_simple_example * FIX: remove \xc2 character * FIX: use list to copy list for python2 compatibility * MAINT: make code more readable with while break (see https://github.com/metric-learn/metric-learn/pull/101#discussion_r206998425) * FIX: remove non ascii character * FIX: remove keyring.deb * STY: remove unused imports --- metric_learn/lmnn.py | 156 +++++++++++++++++++++----------------- test/metric_learn_test.py | 33 +++++++- 2 files changed, 117 insertions(+), 72 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index dea12f0c..f58bc00a 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -90,83 +90,49 @@ def fit(self, X, y): a1[nn_idx] = np.array([]) a2[nn_idx] = np.array([]) - # initialize gradient and L - G = dfG * reg + df * (1-reg) + # initialize L L = self.L_ - objective = np.inf - - # main loop - for it in xrange(1, self.max_iter): - df_old = df.copy() - a1_old = [a.copy() for a in a1] - a2_old = [a.copy() for a in a2] - objective_old = objective - # Compute pairwise distances under current metric - Lx = L.dot(self.X_.T).T - g0 = _inplace_paired_L2(*Lx[impostors]) - Ni = 1 + _inplace_paired_L2(Lx[target_neighbors], Lx[:,None,:]) - g1,g2 = Ni[impostors] - - # compute the gradient - total_active = 0 - for nn_idx in reversed(xrange(k)): - act1 = g0 < g1[:,nn_idx] - act2 = g0 < g2[:,nn_idx] - total_active += act1.sum() + act2.sum() - - if it > 1: - plus1 = act1 & ~a1[nn_idx] - minus1 = a1[nn_idx] & ~act1 - plus2 = act2 & ~a2[nn_idx] - minus2 = a2[nn_idx] & ~act2 - else: - plus1 = act1 - plus2 = act2 - minus1 = np.zeros(0, dtype=int) - minus2 = np.zeros(0, dtype=int) - - targets = target_neighbors[:,nn_idx] - PLUS, pweight = _count_edges(plus1, plus2, impostors, targets) - df += _sum_outer_products(self.X_, PLUS[:,0], PLUS[:,1], pweight) - MINUS, mweight = _count_edges(minus1, minus2, impostors, targets) - df -= _sum_outer_products(self.X_, MINUS[:,0], MINUS[:,1], mweight) - - in_imp, out_imp = impostors - df += _sum_outer_products(self.X_, in_imp[minus1], out_imp[minus1]) - df += _sum_outer_products(self.X_, in_imp[minus2], out_imp[minus2]) - - df -= _sum_outer_products(self.X_, in_imp[plus1], out_imp[plus1]) - df -= _sum_outer_products(self.X_, in_imp[plus2], out_imp[plus2]) - - a1[nn_idx] = act1 - a2[nn_idx] = act2 - - # do the gradient update - assert not np.isnan(df).any() - G = dfG * reg + df * (1-reg) - # compute the objective function - objective = total_active * (1-reg) - objective += G.flatten().dot(L.T.dot(L).flatten()) - assert not np.isnan(objective) - delta_obj = objective - objective_old + # first iteration: we compute variables (including objective and gradient) + # at initialization point + G, objective, total_active, df, a1, a2 = ( + self._loss_grad(L, dfG, impostors, 1, k, reg, target_neighbors, df, a1, + a2)) + + for it in xrange(2, self.max_iter): + # then at each iteration, we try to find a value of L that has better + # objective than the previous L, following the gradient: + while True: + # the next point next_L to try out is found by a gradient step + L_next = L - 2 * learn_rate * G + # we compute the objective at next point + # we copy variables that can be modified by _loss_grad, because if we + # retry we don t want to modify them several times + (G_next, objective_next, total_active_next, df_next, a1_next, + a2_next) = ( + self._loss_grad(L_next, dfG, impostors, it, k, reg, + target_neighbors, df.copy(), list(a1), list(a2))) + assert not np.isnan(objective) + delta_obj = objective_next - objective + if delta_obj > 0: + # if we did not find a better objective, we retry with an L closer to + # the starting point, by decreasing the learning rate (making the + # gradient step smaller) + learn_rate /= 2 + else: + # otherwise, if we indeed found a better obj, we get out of the loop + break + # when the better L is found (and the related variables), we set the + # old variables to these new ones before next iteration and we + # slightly increase the learning rate + L = L_next + G, df, objective, total_active, a1, a2 = ( + G_next, df_next, objective_next, total_active_next, a1_next, a2_next) + learn_rate *= 1.01 if self.verbose: print(it, objective, delta_obj, total_active, learn_rate) - # update step size - if delta_obj > 0: - # we're getting worse... roll back! - learn_rate /= 2.0 - df = df_old - a1 = a1_old - a2 = a2_old - objective = objective_old - else: - # update L - L -= learn_rate * 2 * L.dot(G) - learn_rate *= 1.01 - # check for convergence if it > self.min_iter and abs(delta_obj) < self.convergence_tol: if self.verbose: @@ -181,6 +147,54 @@ def fit(self, X, y): self.n_iter_ = it return self + def _loss_grad(self, L, dfG, impostors, it, k, reg, target_neighbors, df, a1, + a2): + # Compute pairwise distances under current metric + Lx = L.dot(self.X_.T).T + g0 = _inplace_paired_L2(*Lx[impostors]) + Ni = 1 + _inplace_paired_L2(Lx[target_neighbors], Lx[:, None, :]) + g1, g2 = Ni[impostors] + # compute the gradient + total_active = 0 + for nn_idx in reversed(xrange(k)): + act1 = g0 < g1[:, nn_idx] + act2 = g0 < g2[:, nn_idx] + total_active += act1.sum() + act2.sum() + + if it > 1: + plus1 = act1 & ~a1[nn_idx] + minus1 = a1[nn_idx] & ~act1 + plus2 = act2 & ~a2[nn_idx] + minus2 = a2[nn_idx] & ~act2 + else: + plus1 = act1 + plus2 = act2 + minus1 = np.zeros(0, dtype=int) + minus2 = np.zeros(0, dtype=int) + + targets = target_neighbors[:, nn_idx] + PLUS, pweight = _count_edges(plus1, plus2, impostors, targets) + df += _sum_outer_products(self.X_, PLUS[:, 0], PLUS[:, 1], pweight) + MINUS, mweight = _count_edges(minus1, minus2, impostors, targets) + df -= _sum_outer_products(self.X_, MINUS[:, 0], MINUS[:, 1], mweight) + + in_imp, out_imp = impostors + df += _sum_outer_products(self.X_, in_imp[minus1], out_imp[minus1]) + df += _sum_outer_products(self.X_, in_imp[minus2], out_imp[minus2]) + + df -= _sum_outer_products(self.X_, in_imp[plus1], out_imp[plus1]) + df -= _sum_outer_products(self.X_, in_imp[plus2], out_imp[plus2]) + + a1[nn_idx] = act1 + a2[nn_idx] = act2 + # do the gradient update + assert not np.isnan(df).any() + G = dfG * reg + df * (1 - reg) + # compute the objective function + objective = total_active * (1 - reg) + objective += G.flatten().dot(L.T.dot(L).flatten()) + return G, objective, total_active, df, a1, a2 + def _select_targets(self): target_neighbors = np.empty((self.X_.shape[0], self.k), dtype=int) for label in self.labels_: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1f2af2f7..1d0a5d02 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1,5 +1,5 @@ -import re import unittest +import re import pytest import numpy as np from scipy.optimize import check_grad @@ -76,6 +76,37 @@ def test_iris(self): self.assertLess(csep, 0.25) +def test_convergence_simple_example(capsys): + # LMNN should converge on this simple example, which it did not with + # this issue: https://github.com/metric-learn/metric-learn/issues/88 + X, y = make_classification(random_state=0) + lmnn = python_LMNN(verbose=True) + lmnn.fit(X, y) + out, _ = capsys.readouterr() + assert "LMNN converged with objective" in out + + +def test_no_twice_same_objective(capsys): + # test that the objective function never has twice the same value + # see https://github.com/metric-learn/metric-learn/issues/88 + X, y = make_classification(random_state=0) + lmnn = python_LMNN(verbose=True) + lmnn.fit(X, y) + out, _ = capsys.readouterr() + lines = re.split("\n+", out) + # we get only objectives from each line: + # the regexp matches a float that follows an integer (the iteration + # number), and which is followed by a (signed) float (delta obj). It + # matches for instance: + # 3 **1113.7665747189938** -3.182774197440267 46431.0200999999999998e-06 + objectives = [re.search("\d* (?:(\d*.\d*))[ | -]\d*.\d*", s) + for s in lines] + objectives = [match.group(1) for match in objectives if match is not None] + # we remove the last element because it can be equal to the penultimate + # if the last gradient update is null + assert len(objectives[:-1]) == len(set(objectives[:-1])) + + class TestSDML(MetricTestCase): def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. From c5f3175b73a8a0f060eaec02de93b3d3f6a03ab5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 21 Aug 2018 15:12:54 +0200 Subject: [PATCH 072/210] [MRG] Add github issue template message (#116) * ENH: add github issue_template message * FIX: update message according to comment https://github.com/terrytangyuan * STY: style knitpicks --- .github/issue_template.md | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/issue_template.md diff --git a/.github/issue_template.md b/.github/issue_template.md new file mode 100644 index 00000000..d4fb0abe --- /dev/null +++ b/.github/issue_template.md @@ -0,0 +1,44 @@ +#### Description + + +#### Steps/Code to Reproduce + + +#### Expected Results + + +#### Actual Results + + +#### Versions + + From 22f60dde29f4754712e72308621f2ea9390f4b57 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 31 Aug 2018 16:02:43 +0200 Subject: [PATCH 073/210] [MRG] Add documentation for supervised classes and add __init__ docstrings to doc (#115) * FEAT: Add documentation for supervised classes * Add doc for ITML, and put mlkr to the right place in index * FIX: update doc * FIX: add __init__ docstrings in doc * FIX: Update doc to suit regression too * STY: add points at end of docstrings * MAINT: address https://github.com/metric-learn/metric-learn/pull/115#pullrequestreview-150575623 * DOC: fix semi to weakly supervised --- doc/index.rst | 35 +++++++++++++++++++++++++++------ doc/metric_learn.covariance.rst | 1 + doc/metric_learn.itml.rst | 1 + doc/metric_learn.lfda.rst | 1 + doc/metric_learn.lmnn.rst | 1 + doc/metric_learn.lsml.rst | 1 + doc/metric_learn.mlkr.rst | 1 + doc/metric_learn.mmc.rst | 1 + doc/metric_learn.nca.rst | 1 + doc/metric_learn.rca.rst | 1 + doc/metric_learn.sdml.rst | 1 + metric_learn/itml.py | 12 ++++++++--- metric_learn/lsml.py | 13 +++++++++--- metric_learn/mmc.py | 12 ++++++++--- metric_learn/rca.py | 6 +++++- metric_learn/sdml.py | 13 +++++++++--- 16 files changed, 82 insertions(+), 19 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index 38ed730a..36a6e80c 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -13,22 +13,45 @@ metrics. This package contains efficient Python implementations of several popular metric learning algorithms. +Supervised Algorithms +--------------------- +Supervised metric learning algorithms take as inputs points `X` and target +labels `y`, and learn a distance matrix that make points from the same class +(for classification) or with close target value (for regression) close to +each other, and points from different classes or with distant target values +far away from each other. + .. toctree:: - :caption: Algorithms :maxdepth: 1 metric_learn.covariance metric_learn.lmnn - metric_learn.itml - metric_learn.sdml - metric_learn.lsml metric_learn.nca metric_learn.lfda + metric_learn.mlkr + +Weakly-Supervised Algorithms +-------------------------- +Weakly supervised algorithms work on weaker information about the data points +than supervised algorithms. Rather than labeled points, they take as input +similarity judgments on tuples of data points, for instance pairs of similar +and dissimilar points. Refer to the documentation of each algorithm for its +particular form of input data. + +.. toctree:: + :maxdepth: 1 + + metric_learn.itml + metric_learn.lsml + metric_learn.sdml metric_learn.rca metric_learn.mmc - metric_learn.mlkr -Each metric supports the following methods: +Note that each weakly-supervised algorithm has a supervised version of the form +`*_Supervised` where similarity constraints are generated from +the labels information and passed to the underlying algorithm. + +Each metric learning algorithm supports the following methods: - ``fit(...)``, which learns the model. - ``transformer()``, which returns a transformation matrix diff --git a/doc/metric_learn.covariance.rst b/doc/metric_learn.covariance.rst index 92326cc0..493878c1 100644 --- a/doc/metric_learn.covariance.rst +++ b/doc/metric_learn.covariance.rst @@ -6,6 +6,7 @@ Covariance metric (baseline method) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.itml.rst b/doc/metric_learn.itml.rst index d6fb2221..addb4c76 100644 --- a/doc/metric_learn.itml.rst +++ b/doc/metric_learn.itml.rst @@ -6,6 +6,7 @@ Information Theoretic Metric Learning (ITML) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.lfda.rst b/doc/metric_learn.lfda.rst index 95cde90d..41088a68 100644 --- a/doc/metric_learn.lfda.rst +++ b/doc/metric_learn.lfda.rst @@ -6,6 +6,7 @@ Local Fisher Discriminant Analysis (LFDA) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.lmnn.rst b/doc/metric_learn.lmnn.rst index 4062bfa0..bc65161e 100644 --- a/doc/metric_learn.lmnn.rst +++ b/doc/metric_learn.lmnn.rst @@ -6,6 +6,7 @@ Large Margin Nearest Neighbor (LMNN) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.lsml.rst b/doc/metric_learn.lsml.rst index c6c8ede9..0deae4e6 100644 --- a/doc/metric_learn.lsml.rst +++ b/doc/metric_learn.lsml.rst @@ -6,6 +6,7 @@ Least Squares Metric Learning (LSML) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.mlkr.rst b/doc/metric_learn.mlkr.rst index a2f36c4f..f71697de 100644 --- a/doc/metric_learn.mlkr.rst +++ b/doc/metric_learn.mlkr.rst @@ -6,6 +6,7 @@ Metric Learning for Kernel Regression (MLKR) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.mmc.rst b/doc/metric_learn.mmc.rst index f3ddaa9e..bb9031ba 100644 --- a/doc/metric_learn.mmc.rst +++ b/doc/metric_learn.mmc.rst @@ -6,6 +6,7 @@ Mahalanobis Metric Learning for Clustering (MMC) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.nca.rst b/doc/metric_learn.nca.rst index 6a2675e5..7a4ee2c4 100644 --- a/doc/metric_learn.nca.rst +++ b/doc/metric_learn.nca.rst @@ -6,6 +6,7 @@ Neighborhood Components Analysis (NCA) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.rca.rst b/doc/metric_learn.rca.rst index 2430cd82..027d583b 100644 --- a/doc/metric_learn.rca.rst +++ b/doc/metric_learn.rca.rst @@ -6,6 +6,7 @@ Relative Components Analysis (RCA) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/doc/metric_learn.sdml.rst b/doc/metric_learn.sdml.rst index 83570483..3e350a70 100644 --- a/doc/metric_learn.sdml.rst +++ b/doc/metric_learn.sdml.rst @@ -6,6 +6,7 @@ Sparse Determinant Metric Learning (SDML) :undoc-members: :inherited-members: :show-inheritance: + :special-members: __init__ Example Code ------------ diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 4d27c412..7a9bc2d9 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -145,7 +145,11 @@ class ITML_Supervised(ITML): def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, num_labeled=np.inf, num_constraints=None, bounds=None, A0=None, verbose=False): - """Initialize the learner. + """Initialize the supervised version of `ITML`. + + `ITML_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `ITML` for training. Parameters ---------- @@ -153,8 +157,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, value for slack variables max_iter : int, optional convergence_threshold : float, optional - num_labeled : int, optional - number of labels to preserve for training + num_labeled : int, optional (default=np.inf) + number of labeled points to keep for building pairs. Extra + labeled points will be considered unlabeled, and ignored as such. + Use np.inf (default) to use all labeled points. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 404fe286..c0bca855 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -134,7 +134,12 @@ def _gradient(self, metric): class LSML_Supervised(LSML): def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, num_constraints=None, weights=None, verbose=False): - """Initialize the learner. + """Initialize the supervised version of `LSML`. + + `LSML_Supervised` creates quadruplets from labeled samples by taking two + samples from the same class, and two samples from different classes. + This way it builds quadruplets where the two first points must be more + similar than the two last points. Parameters ---------- @@ -142,8 +147,10 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, max_iter : int, optional prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] - num_labeled : int, optional - number of labels to preserve for training + num_labeled : int, optional (default=np.inf) + number of labeled points to keep for building quadruplets. Extra + labeled points will be considered unlabeled, and ignored as such. + Use np.inf (default) to use all labeled points. num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index ef08aeef..b98c31e0 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -386,15 +386,21 @@ class MMC_Supervised(MMC): def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled=np.inf, num_constraints=None, A0=None, diagonal=False, diagonal_c=1.0, verbose=False): - """Initialize the learner. + """Initialize the supervised version of `MMC`. + + `MMC_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `MMC` for training. Parameters ---------- max_iter : int, optional max_proj : int, optional convergence_threshold : float, optional - num_labeled : int, optional - number of labels to preserve for training + num_labeled : int, optional (default=np.inf) + number of labeled points to keep for building pairs. Extra + labeled points will be considered unlabeled, and ignored as such. + Use np.inf (default) to use all labeled points. num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 0d9b3620..327c5002 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -138,7 +138,11 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): def __init__(self, num_dims=None, pca_comps=None, num_chunks=100, chunk_size=2): - """Initialize the learner. + """Initialize the supervised version of `RCA`. + + `RCA_Supervised` creates chunks of similar points by first sampling a + class, taking `chunk_size` elements in it, and repeating the process + `num_chunks` times. Parameters ---------- diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 93280334..1746ec7d 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -83,7 +83,12 @@ def fit(self, X, W): class SDML_Supervised(SDML): def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, num_labeled=np.inf, num_constraints=None, verbose=False): - """ + """Initialize the supervised version of `SDML`. + + `SDML_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `SDML` for training. + Parameters ---------- balance_param : float, optional @@ -92,8 +97,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, trade off between optimizer and sparseness (see graph_lasso) use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False - num_labeled : int, optional - number of labels to preserve for training + num_labeled : int, optional (default=np.inf) + number of labeled points to keep for building pairs. Extra + labeled points will be considered unlabeled, and ignored as such. + Use np.inf (default) to use all labeled points. num_constraints : int, optional number of constraints to generate verbose : bool, optional From 63ba00378d38a6c2e54a1003e54b1aee933c0f0f Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 4 Sep 2018 11:11:52 +0200 Subject: [PATCH 074/210] REL: update copyright (#120) --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 1c8beeab..dff9ce47 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,7 +15,7 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2017, CJ Carey and Yuan Tang' +copyright = u'2015-2018, CJ Carey and Yuan Tang' author = u'CJ Carey and Yuan Tang' version = '0.4.0' release = '0.4.0' From 5c80f501d1cc575a96dfd0e4839d2496522e9281 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 5 Sep 2018 14:59:26 +0200 Subject: [PATCH 075/210] FIX: Update setup.py (#121) - new algorithms - new website --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 34fedd76..273587e9 100755 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ description='Python implementations of metric learning algorithms', author=['CJ Carey', 'Yuan Tang'], author_email='ccarey@cs.umass.edu', - url='http://github.com/all-umass/metric-learn', + url='http://github.com/metric-learn/metric-learn', license='MIT', classifiers=[ 'Development Status :: 4 - Beta', @@ -40,5 +40,9 @@ 'Information Theoretic Metric Learning', 'Sparse Determinant Metric Learning', 'Least Squares Metric Learning', - 'Neighborhood Components Analysis' + 'Neighborhood Components Analysis', + 'Local Fisher Discriminant Analysis', + 'Relative Components Analysis', + 'Mahalanobis Metric for Clustering', + 'Metric Learning for Kernel Regression' ]) From 617f7e5e5d202422f8957dafecea7d361f4110cd Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 5 Sep 2018 17:22:14 +0200 Subject: [PATCH 076/210] FIX: add long_description in setup.py (#122) --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 273587e9..96210994 100755 --- a/setup.py +++ b/setup.py @@ -7,9 +7,14 @@ with open(os.path.join('metric_learn', '_version.py')) as fp: exec(fp.read(), version) +# Get the long description from README.md +with open('README.rst', encoding='utf-8') as f: + long_description = f.read() + setup(name='metric-learn', version=version['__version__'], description='Python implementations of metric learning algorithms', + long_description=long_description, author=['CJ Carey', 'Yuan Tang'], author_email='ccarey@cs.umass.edu', url='http://github.com/metric-learn/metric-learn', From 8e607d11546c2d2a287b5d2705a5885a9480ebf6 Mon Sep 17 00:00:00 2001 From: "Yuan (Terry) Tang" Date: Wed, 10 Oct 2018 09:12:56 -0400 Subject: [PATCH 077/210] Fix python2.7 compatibility in setup.py (#126) --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 96210994..168fbcb6 100755 --- a/setup.py +++ b/setup.py @@ -2,13 +2,14 @@ # -*- coding: utf-8 -*- from setuptools import setup import os +import io version = {} -with open(os.path.join('metric_learn', '_version.py')) as fp: +with io.open(os.path.join('metric_learn', '_version.py')) as fp: exec(fp.read(), version) # Get the long description from README.md -with open('README.rst', encoding='utf-8') as f: +with io.open('README.rst', encoding='utf-8') as f: long_description = f.read() setup(name='metric-learn', From ac0e230000556b7c413e08b77d893bbaccbdfbcf Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 11 Oct 2018 17:12:00 +0200 Subject: [PATCH 078/210] [MRG] Quick fix of failed tests due to new scikit-learn version (0.20.0) (#130) * TST: Quick fix of failed tests due to new scikit-learn version (0.20.0) * FIX update values to pass test --- metric_learn/itml.py | 2 +- metric_learn/lmnn.py | 2 +- metric_learn/lsml.py | 2 +- metric_learn/mmc.py | 2 +- test/metric_learn_test.py | 15 ++++++++------- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7a9bc2d9..7b218895 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -191,7 +191,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - X, y = check_X_y(X, y) + X, y = check_X_y(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index f58bc00a..d1a41a33 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -52,7 +52,7 @@ def transformer(self): class python_LMNN(_base_LMNN): def _process_inputs(self, X, labels): - self.X_ = check_array(X, dtype=float) + self.X_ = check_array(X, dtype=float, ensure_min_samples=2) num_pts, num_dims = self.X_.shape unique_labels, self.label_inds_ = np.unique(labels, return_inverse=True) if len(self.label_inds_) != num_pts: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index c0bca855..4e315b0b 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -178,7 +178,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - X, y = check_X_y(X, y) + X, y = check_X_y(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index b98c31e0..02974f7e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -434,7 +434,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - X, y = check_X_y(X, y) + X, y = check_X_y(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1d0a5d02..e5bd071c 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -44,7 +44,7 @@ def test_iris(self): csep = class_separation(cov.transform(), self.iris_labels) # deterministic result - self.assertAlmostEqual(csep, 0.73068122) + self.assertAlmostEqual(csep, 0.72981476) class TestLSML(MetricTestCase): @@ -133,7 +133,7 @@ def test_iris(self): nca = NCA(max_iter=(100000//n), num_dims=2, tol=1e-9) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) - self.assertLess(csep, 0.15) + self.assertLess(csep, 0.20) def test_finite_differences(self): """Test gradient of loss function @@ -319,16 +319,17 @@ def test_iris(self): # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(self.iris_points, [a,b,c,d]) - expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], - [+0.00083371, +0.00149466, -0.00200719, -0.00296284], - [-0.00111959, -0.00200719, +0.00269546, +0.00397881], - [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] + expected = [[ 0.000514, 0.000868, -0.001195, -0.001703], + [ 0.000868, 0.001468, -0.002021, -0.002879], + [-0.001195, -0.002021, 0.002782, 0.003964], + [-0.001703, -0.002879, 0.003964, 0.005648]] assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(self.iris_points, [a,b,c,d]) - expected = [0, 0, 1.21045968, 1.22552608] + expected = [0, 0, 1.210220, 1.228596] + assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full From 23d07466961fa7a72aa8692bc42d6d569b80c5c9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 2 Jan 2019 15:19:52 +0100 Subject: [PATCH 079/210] [MRG] New api design (#139) [MRG] New api design --- .gitignore | 1 + README.rst | 23 +- bench/benchmarks/iris.py | 2 +- doc/conf.py | 4 + doc/getting_started.rst | 42 + doc/index.rst | 96 +- doc/introduction.rst | 38 + doc/metric_learn.nca.rst | 2 +- doc/metric_learn.rst | 12 +- doc/preprocessor.rst | 111 +++ doc/supervised.rst | 209 ++++ doc/user_guide.rst | 15 + doc/weakly_supervised.rst | 345 +++++++ examples/README.txt | 4 + examples/{sandwich.py => plot_sandwich.py} | 6 +- metric_learn/_util.py | 316 +++++- metric_learn/base_metric.py | 358 ++++++- metric_learn/constraints.py | 10 + metric_learn/covariance.py | 32 +- metric_learn/exceptions.py | 12 + metric_learn/itml.py | 124 ++- metric_learn/lfda.py | 57 +- metric_learn/lmnn.py | 99 +- metric_learn/lsml.py | 111 ++- metric_learn/mlkr.py | 58 +- metric_learn/mmc.py | 208 ++-- metric_learn/nca.py | 47 +- metric_learn/rca.py | 81 +- metric_learn/sdml.py | 106 +- test/metric_learn_test.py | 160 +--- test/test_base_metric.py | 44 +- test/test_fit_transform.py | 24 +- test/test_mahalanobis_mixin.py | 169 ++++ test/test_sklearn_compat.py | 279 +++++- test/test_transformer_metric_conversion.py | 20 +- test/test_utils.py | 1013 ++++++++++++++++++++ 36 files changed, 3534 insertions(+), 704 deletions(-) create mode 100644 doc/getting_started.rst create mode 100644 doc/introduction.rst create mode 100644 doc/preprocessor.rst create mode 100644 doc/supervised.rst create mode 100644 doc/user_guide.rst create mode 100644 doc/weakly_supervised.rst create mode 100644 examples/README.txt rename examples/{sandwich.py => plot_sandwich.py} (97%) create mode 100644 metric_learn/exceptions.py create mode 100644 test/test_mahalanobis_mixin.py create mode 100644 test/test_utils.py diff --git a/.gitignore b/.gitignore index 4c81e9fa..c532a6cb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist/ .coverage htmlcov/ .cache/ +doc/auto_examples/* diff --git a/README.rst b/README.rst index 22b3e7e3..b1893cc6 100644 --- a/README.rst +++ b/README.rst @@ -34,27 +34,8 @@ package installed). **Usage** -For full usage examples, see the `sphinx documentation`_. - -Each metric is a subclass of ``BaseMetricLearner``, which provides -default implementations for the methods ``metric``, ``transformer``, and -``transform``. Subclasses must provide an implementation for either -``metric`` or ``transformer``. - -For an instance of a metric learner named ``foo`` learning from a set of -``d``-dimensional points, ``foo.metric()`` returns a ``d x d`` -matrix ``M`` such that the distance between vectors ``x`` and ``y`` is -expressed ``sqrt((x-y).dot(M).dot(x-y))``. -Using scipy's ``pdist`` function, this would look like -``pdist(X, metric='mahalanobis', VI=foo.metric())``. - -In the same scenario, ``foo.transformer()`` returns a ``d x d`` -matrix ``L`` such that a vector ``x`` can be represented in the learned -space as the vector ``x.dot(L.T)``. - -For convenience, the function ``foo.transform(X)`` is provided for -converting a matrix of points (``X``) into the learned space, in which -standard Euclidean distance can be used. +See the `sphinx documentation`_ for full documentation about installation, API, + usage, and examples. **Notes** diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index d0b76895..305c3a0f 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -10,7 +10,7 @@ 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), 'MLKR': metric_learn.MLKR(), - 'NCA': metric_learn.NCA(max_iter=700, learning_rate=0.01, num_dims=2), + 'NCA': metric_learn.NCA(max_iter=700, num_dims=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), diff --git a/doc/conf.py b/doc/conf.py index dff9ce47..ed476edd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -7,6 +7,7 @@ 'sphinx.ext.viewcode', 'sphinx.ext.mathjax', 'numpydoc', + 'sphinx_gallery.gen_gallery' ] templates_path = ['_templates'] @@ -31,3 +32,6 @@ html_static_path = ['_static'] htmlhelp_basename = 'metric-learndoc' +# Option to only need single backticks to refer to symbols +default_role = 'any' + diff --git a/doc/getting_started.rst b/doc/getting_started.rst new file mode 100644 index 00000000..040adedc --- /dev/null +++ b/doc/getting_started.rst @@ -0,0 +1,42 @@ +############### +Getting started +############### + +Installation and Setup +====================== + +Run ``pip install metric-learn`` to download and install from PyPI. + +Alternately, download the source repository and run: + +- ``python setup.py install`` for default installation. +- ``python setup.py test`` to run all tests. + +**Dependencies** + +- Python 2.7+, 3.4+ +- numpy, scipy, scikit-learn +- (for running the examples only: matplotlib) + +**Notes** + +If a recent version of the Shogun Python modular (``modshogun``) library +is available, the LMNN implementation will use the fast C++ version from +there. The two implementations differ slightly, and the C++ version is +more complete. + + +Quick start +=========== + +This example loads the iris dataset, and evaluates a k-nearest neighbors +algorithm on an embedding space learned with `NCA`. + +>>> from metric_learn import NCA +>>> from sklearn.datasets import load_iris +>>> from sklearn.model_selection import cross_val_score +>>> from sklearn.pipeline import make_pipeline +>>> +>>> X, y = load_iris(return_X_y=True) +>>> clf = make_pipeline(NCA(), KNeighborsClassifier()) +>>> cross_val_score(clf, X, y) diff --git a/doc/index.rst b/doc/index.rst index 36a6e80c..9dbcd9b0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -2,103 +2,31 @@ metric-learn: Metric Learning in Python ======================================= |License| |PyPI version| -Distance metrics are widely used in the machine learning literature. -Traditionally, practicioners would choose a standard distance metric -(Euclidean, City-Block, Cosine, etc.) using a priori knowledge of -the domain. -Distance metric learning (or simply, metric learning) is the sub-field of -machine learning dedicated to automatically constructing optimal distance -metrics. - -This package contains efficient Python implementations of several popular -metric learning algorithms. - -Supervised Algorithms ---------------------- -Supervised metric learning algorithms take as inputs points `X` and target -labels `y`, and learn a distance matrix that make points from the same class -(for classification) or with close target value (for regression) close to -each other, and points from different classes or with distant target values -far away from each other. +Welcome to metric-learn's documentation ! +----------------------------------------- .. toctree:: - :maxdepth: 1 - - metric_learn.covariance - metric_learn.lmnn - metric_learn.nca - metric_learn.lfda - metric_learn.mlkr + :maxdepth: 2 -Weakly-Supervised Algorithms --------------------------- -Weakly supervised algorithms work on weaker information about the data points -than supervised algorithms. Rather than labeled points, they take as input -similarity judgments on tuples of data points, for instance pairs of similar -and dissimilar points. Refer to the documentation of each algorithm for its -particular form of input data. + getting_started .. toctree:: - :maxdepth: 1 - - metric_learn.itml - metric_learn.lsml - metric_learn.sdml - metric_learn.rca - metric_learn.mmc - -Note that each weakly-supervised algorithm has a supervised version of the form -`*_Supervised` where similarity constraints are generated from -the labels information and passed to the underlying algorithm. - -Each metric learning algorithm supports the following methods: - -- ``fit(...)``, which learns the model. -- ``transformer()``, which returns a transformation matrix - :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a - data matrix :math:`X \in \mathbb{R}^{n \times d}` to the - :math:`D`-dimensional learned metric space :math:`X L^{\top}`, - in which standard Euclidean distances may be used. -- ``transform(X)``, which applies the aforementioned transformation. -- ``metric()``, which returns a Mahalanobis matrix - :math:`M = L^{\top}L` such that distance between vectors ``x`` and - ``y`` can be computed as :math:`\left(x-y\right)M\left(x-y\right)`. - - -Installation and Setup -====================== - -Run ``pip install metric-learn`` to download and install from PyPI. + :maxdepth: 2 -Alternately, download the source repository and run: + user_guide -- ``python setup.py install`` for default installation. -- ``python setup.py test`` to run all tests. - -**Dependencies** - -- Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn -- (for running the examples only: matplotlib) +.. toctree:: + :maxdepth: 2 -**Notes** + Package Overview -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. The two implementations differ slightly, and the C++ version is -more complete. +.. toctree:: + :maxdepth: 2 -Navigation ----------- + auto_examples/index :ref:`genindex` | :ref:`modindex` | :ref:`search` -.. toctree:: - :maxdepth: 4 - :hidden: - - Package Overview - .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat diff --git a/doc/introduction.rst b/doc/introduction.rst new file mode 100644 index 00000000..9f2b4165 --- /dev/null +++ b/doc/introduction.rst @@ -0,0 +1,38 @@ +============ +Introduction +============ + +Distance metrics are widely used in the machine learning literature. +Traditionally, practitioners would choose a standard distance metric +(Euclidean, City-Block, Cosine, etc.) using a priori knowledge of +the domain. +Distance metric learning (or simply, metric learning) is the sub-field of +machine learning dedicated to automatically construct task-specific distance +metrics from (weakly) supervised data. +The learned distance metric often corresponds to a Euclidean distance in a new +embedding space, hence distance metric learning can be seen as a form of +representation learning. + +This package contains a efficient Python implementations of several popular +metric learning algorithms, compatible with scikit-learn. This allows to use +all the scikit-learn routines for pipelining and model selection for +metric learning algorithms. + + +Currently, each metric learning algorithm supports the following methods: + +- ``fit(...)``, which learns the model. +- ``metric()``, which returns a Mahalanobis matrix + :math:`M = L^{\top}L` such that distance between vectors ``x`` and + ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. +- ``transformer_from_metric(metric)``, which returns a transformation matrix + :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a + data matrix :math:`X \in \mathbb{R}^{n \times d}` to the + :math:`D`-dimensional learned metric space :math:`X L^{\top}`, + in which standard Euclidean distances may be used. +- ``transform(X)``, which applies the aforementioned transformation. +- ``score_pairs(pairs)`` which returns the distance between pairs of + points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, + 2, n_features)``, or it can be a 2D array-like of pairs indicators of + shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more + details). diff --git a/doc/metric_learn.nca.rst b/doc/metric_learn.nca.rst index 7a4ee2c4..00bc4eac 100644 --- a/doc/metric_learn.nca.rst +++ b/doc/metric_learn.nca.rst @@ -21,7 +21,7 @@ Example Code X = iris_data['data'] Y = iris_data['target'] - nca = NCA(max_iter=1000, learning_rate=0.01) + nca = NCA(max_iter=1000) nca.fit(X, Y) References diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 70a99a04..c2472408 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -1,8 +1,8 @@ metric_learn package ==================== -Submodules ----------- +Module Contents +--------------- .. toctree:: @@ -16,11 +16,3 @@ Submodules metric_learn.nca metric_learn.rca metric_learn.sdml - -Module contents ---------------- - -.. automodule:: metric_learn - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/preprocessor.rst b/doc/preprocessor.rst new file mode 100644 index 00000000..ad1ffd8f --- /dev/null +++ b/doc/preprocessor.rst @@ -0,0 +1,111 @@ +.. _preprocessor_section: + +============ +Preprocessor +============ + +Estimators in metric-learn all have a ``preprocessor`` option at instantiation. +Filling this argument allows them to take more compact input representation +when fitting, predicting etc... + +If ``preprocessor=None``, no preprocessor will be used and the user must +provide the classical representation to the fit/predict/score/etc... methods of +the estimators (see the documentation of the particular estimator to know the +type of input it accepts). Otherwise, two types of objects can be put in this +argument: + +Array-like +---------- +You can specify ``preprocessor=X`` where ``X`` is an array-like containing the +dataset of points. In this case, the fit/predict/score/etc... methods of the +estimator will be able to take as inputs an array-like of indices, replacing +under the hood each index by the corresponding sample. + + +Example with a supervised metric learner: + +>>> from metric_learn import NCA +>>> +>>> X = np.array([[-0.7 , -0.23], +>>> [-0.43, -0.49], +>>> [ 0.14, -0.37]]) # array of 3 samples of 2 features +>>> points_indices = np.array([2, 0, 1, 0]) +>>> y = np.array([1, 0, 1, 1]) +>>> +>>> nca = NCA(preprocessor=X) +>>> nca.fit(points_indices, y) +>>> # under the hood the algorithm will create +>>> # points = np.array([[ 0.14, -0.37], +>>> # [-0.7 , -0.23], +>>> # [-0.43, -0.49], +>>> # [ 0.14, -0.37]]) and fit on it + + +Example with a weakly supervised metric learner: + +>>> from metric_learn import MMC +>>> X = np.array([[-0.7 , -0.23], +>>> [-0.43, -0.49], +>>> [ 0.14, -0.37]]) # array of 3 samples of 2 features +>>> pairs_indices = np.array([[2, 0], [1, 0]]) +>>> y_pairs = np.array([1, -1]) +>>> +>>> mmc = MMC(preprocessor=X) +>>> mmc.fit(pairs_indices, y_pairs) +>>> # under the hood the algorithm will create +>>> # pairs = np.array([[[ 0.14, -0.37], [-0.7 , -0.23]], +>>> # [[-0.43, -0.49], [-0.7 , -0.23]]]) and fit on it + +Callable +-------- +Alternatively, you can provide a callable as ``preprocessor``. Then the +estimator will accept indicators of points instead of points. Under the hood, +the estimator will call this callable on the indicators you provide as input +when fitting, predicting etc... Using a callable can be really useful to +represent lazily a dataset of images stored on the file system for instance. +The callable should take as an input a 1D array-like, and return a 2D +array-like. For supervised learners it will be applied on the whole 1D array of +indicators at once, and for weakly supervised learners it will be applied on +each column of the 2D array of tuples. + +Example with a supervised metric learner: + +>>> def find_images(file_paths): +>>> # each file contains a small image to use as an input datapoint +>>> return np.row_stack([imread(f).ravel() for f in file_paths]) +>>> +>>> nca = NCA(preprocessor=find_images) +>>> nca.fit(['img01.png', 'img00.png', 'img02.png'], [1, 0, 1]) +>>> # under the hood preprocessor(indicators) will be called + + +Example with a weakly supervised metric learner: + +>>> pairs_images_paths = [['img02.png', 'img00.png'], +>>> ['img01.png', 'img00.png']] +>>> y_pairs = np.array([1, -1]) +>>> +>>> mmc = NCA(preprocessor=find_images) +>>> mmc.fit(pairs_images_paths, y_pairs) +>>> # under the hood preprocessor(pairs_indicators[i]) will be called for each +>>> # i in [0, 1] + + +.. note:: Note that when you fill the ``preprocessor`` option, it allows you + to give more compact inputs, but the classical way of providing inputs + stays valid (2D array-like for supervised learners and 3D array-like of + tuples for weakly supervised learners). If a classical input + is provided, the metric learner will not use the preprocessor. + + Example: This will work: + + >>> from metric_learn import MMC + >>> def preprocessor_wip(array): + >>> raise NotImplementedError("This preprocessor does nothing yet.") + >>> + >>> pairs = np.array([[[ 0.14, -0.37], [-0.7 , -0.23]], + >>> [[-0.43, -0.49], [-0.7 , -0.23]]]) + >>> y_pairs = np.array([1, -1]) + >>> + >>> mmc = MMC(preprocessor=preprocessor_wip) + >>> mmc.fit(pairs, y_pairs) # preprocessor_wip will not be called here diff --git a/doc/supervised.rst b/doc/supervised.rst new file mode 100644 index 00000000..26934a47 --- /dev/null +++ b/doc/supervised.rst @@ -0,0 +1,209 @@ +========================== +Supervised Metric Learning +========================== + +Supervised metric learning algorithms take as inputs points `X` and target +labels `y`, and learn a distance matrix that make points from the same class +(for classification) or with close target value (for regression) close to each +other, and points from different classes or with distant target values far away +from each other. + +Scikit-learn compatibility +========================== + +All supervised algorithms are scikit-learn `Estimators`, so they are +compatible with Pipelining and scikit-learn model selection routines. + +Algorithms +========== + +Covariance +---------- + +.. todo:: Covariance is unsupervised, so its doc should not be here. + +`Covariance` does not "learn" anything, rather it calculates +the covariance matrix of the input data. This is a simple baseline method. + +.. topic:: Example Code: + +:: + + from metric_learn import Covariance + from sklearn.datasets import load_iris + + iris = load_iris()['data'] + + cov = Covariance().fit(iris) + x = cov.transform(iris) + +.. topic:: References: + + .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 + +LMNN +----- + +Large-margin nearest neighbor metric learning. + +`LMNN` learns a Mahanalobis distance metric in the kNN classification +setting using semidefinite programming. The learned metric attempts to keep +k-nearest neighbors in the same class, while keeping examples from different +classes separated by a large margin. This algorithm makes no assumptions about +the distribution of the data. + +.. topic:: Example Code: + +:: + + import numpy as np + from metric_learn import LMNN + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + lmnn = LMNN(k=5, learn_rate=1e-6) + lmnn.fit(X, Y, verbose=False) + +If a recent version of the Shogun Python modular (``modshogun``) library +is available, the LMNN implementation will use the fast C++ version from +there. Otherwise, the included pure-Python version will be used. +The two implementations differ slightly, and the C++ version is more complete. + +.. topic:: References: + + .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor + Classification + `_ Kilian Q. Weinberger, John + Blitzer, Lawrence K. Saul + +NCA +--- + +Neighborhood Components Analysis (`NCA`) is a distance metric learning +algorithm which aims to improve the accuracy of nearest neighbors +classification compared to the standard Euclidean distance. The algorithm +directly maximizes a stochastic variant of the leave-one-out k-nearest +neighbors (KNN) score on the training set. It can also learn a low-dimensional +linear embedding of data that can be used for data visualization and fast +classification. + +.. topic:: Example Code: + +:: + + import numpy as np + from metric_learn import NCA + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + nca = NCA(max_iter=1000) + nca.fit(X, Y) + +.. topic:: References: + + .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. + "Neighbourhood Components Analysis". Advances in Neural Information + Processing Systems. 17, 513-520, 2005. + http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf + + .. [2] Wikipedia entry on Neighborhood Components Analysis + https://en.wikipedia.org/wiki/Neighbourhood_components_analysis + +LFDA +---- + +Local Fisher Discriminant Analysis (LFDA) + +`LFDA` is a linear supervised dimensionality reduction method. It is +particularly useful when dealing with multimodality, where one ore more classes +consist of separate clusters in input space. The core optimization problem of +LFDA is solved as a generalized eigenvalue problem. + +.. topic:: Example Code: + +:: + + import numpy as np + from metric_learn import LFDA + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + lfda = LFDA(k=2, dim=2) + lfda.fit(X, Y) + +.. topic:: References: + + .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local + Fisher Discriminant Analysis `_ Masashi Sugiyama. + + .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering + `_ Yuan Tang. + + +MLKR +---- + +Metric Learning for Kernel Regression. + +`MLKR` is an algorithm for supervised metric learning, which learns a +distance function by directly minimising the leave-one-out regression error. +This algorithm can also be viewed as a supervised variation of PCA and can be +used for dimensionality reduction and high dimensional data visualization. + +.. topic:: Example Code: + +:: + + from metric_learn import MLKR + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + mlkr = MLKR() + mlkr.fit(X, Y) + +.. topic:: References: + + .. [1] `Metric Learning for Kernel Regression `_ Kilian Q. Weinberger, + Gerald Tesauro + + +Supervised versions of weakly-supervised algorithms +--------------------------------------------------- + +Note that each :ref:`weakly-supervised algorithm ` +has a supervised version of the form `*_Supervised` where similarity tuples are +generated from the labels information and passed to the underlying algorithm. + +.. todo:: add more details about that (see issue ``_) + + +.. topic:: Example Code: + +:: + + from metric_learn import MMC_Supervised + from sklearn.datasets import load_iris + + iris_data = load_iris() + X = iris_data['data'] + Y = iris_data['target'] + + mmc = MMC_Supervised(num_constraints=200) + mmc.fit(X, Y) diff --git a/doc/user_guide.rst b/doc/user_guide.rst new file mode 100644 index 00000000..fb7060ce --- /dev/null +++ b/doc/user_guide.rst @@ -0,0 +1,15 @@ +.. title:: User guide: contents + +.. _user_guide: + +========== +User Guide +========== + +.. toctree:: + :numbered: + + introduction.rst + supervised.rst + weakly_supervised.rst + preprocessor.rst \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst new file mode 100644 index 00000000..deae9b40 --- /dev/null +++ b/doc/weakly_supervised.rst @@ -0,0 +1,345 @@ +.. _weakly_supervised_section: + +================================= +Weakly Supervised Metric Learning +================================= + +Weakly supervised algorithms work on weaker information about the data points +than supervised algorithms. Rather than labeled points, they take as input +similarity judgments on tuples of data points, for instance pairs of similar +and dissimilar points. Refer to the documentation of each algorithm for its +particular form of input data. + + +Input data +========== + +In the following paragraph we talk about tuples for sake of generality. These +can be pairs, triplets, quadruplets etc, depending on the particular metric +learning algorithm we use. + +Basic form +---------- +Every weakly supervised algorithm will take as input tuples of points, and if +needed labels for theses tuples. + + +The `tuples` argument is the first argument of every method (like the X +argument for classical algorithms in scikit-learn). The second argument is the +label of the tuple: its semantic depends on the algorithm used. For instance +for pairs learners ``y`` is a label indicating whether the pair is of similar +samples or dissimilar samples. + +Then one can fit a Weakly Supervised Metric Learner on this tuple, like this: + +>>> my_algo.fit(tuples, y) + +Like in a classical setting we split the points ``X`` between train and test, +here we split the ``tuples`` between train and test. + +>>> from sklearn.model_selection import train_test_split +>>> pairs_train, pairs_test, y_train, y_test = train_test_split(pairs, y) + +These are two data structures that can be used to represent tuple in metric +learn: + +3D array of tuples +------------------ + +The most intuitive way to represent tuples is to provide the algorithm with a +3D array-like of tuples of shape ``(n_tuples, t, n_features)``, where +``n_tuples`` is the number of tuples, ``tuple_size`` is the number of elements +in a tuple (2 for pairs, 3 for triplets for instance), and ``n_features`` is +the number of features of each point. + +.. topic:: Example: + Here is an artificial dataset of 4 pairs of 2 points of 3 features each: + +>>> import numpy as np +>>> tuples = np.array([[[-0.12, -1.21, -0.20], +>>> [+0.05, -0.19, -0.05]], +>>> +>>> [[-2.16, +0.11, -0.02], +>>> [+1.58, +0.16, +0.93]], +>>> +>>> [[+1.58, +0.16, +0.93 ], # same as tuples[1, 1, :] +>>> [+0.89, -0.34, +2.41]], +>>> +>>> [[-0.12, -1.21, -0.20 ], # same as tuples[0, 0, :] +>>> [-2.16, +0.11, -0.02]]]) # same as tuples[1, 0, :] +>>> y = np.array([-1, 1, 1, -1]) + +.. warning:: This way of specifying pairs is not recommended for a large number + of tuples, as it is redundant (see the comments in the example) and hence + takes a lot of memory. Indeed each feature vector of a point will be + replicated as many times as a point is involved in a tuple. The second way + to specify pairs is more efficient + + +2D array of indicators + preprocessor +------------------------------------- + +Instead of forming each point in each tuple, a more efficient representation +would be to keep the dataset of points ``X`` aside, and just represent tuples +as a collection of tuples of *indices* from the points in ``X``. Since we loose +the feature dimension there, the resulting array is 2D. + +.. topic:: Example: An equivalent representation of the above pairs would be: + +>>> X = np.array([[-0.12, -1.21, -0.20], +>>> [+0.05, -0.19, -0.05], +>>> [-2.16, +0.11, -0.02], +>>> [+1.58, +0.16, +0.93], +>>> [+0.89, -0.34, +2.41]]) +>>> +>>> tuples_indices = np.array([[0, 1], +>>> [2, 3], +>>> [3, 4], +>>> [0, 2]]) +>>> y = np.array([-1, 1, 1, -1]) + +In order to fit metric learning algorithms with this type of input, we need to +give the original dataset of points ``X`` to the estimator so that it knows +the points the indices refer to. We do this when initializing the estimator, +through the argument `preprocessor`. + +.. topic:: Example: + +>>> from metric_learn import MMC +>>> mmc = MMC(preprocessor=X) +>>> mmc.fit(pairs_indice, y) + + +.. note:: + + Instead of an array-like, you can give a callable in the argument + ``preprocessor``, which will go fetch and form the tuples. This allows to + give more general indicators than just indices from an array (for instance + paths in the filesystem, name of records in a database etc...) See section + :ref:`preprocessor_section` for more details on how to use the preprocessor. + + +Scikit-learn compatibility +========================== + +Weakly supervised estimators are compatible with scikit-learn routines for +model selection (grid-search, cross-validation etc). See the scoring section +for more details on the scoring used in the case of Weakly Supervised +Metric Learning. + +.. topic:: Example + +>>> from metric_learn import MMC +>>> from sklearn.datasets import load_iris +>>> from sklearn.model_selection import cross_val_score +>>> rng = np.random.RandomState(42) +>>> X, _ = load_iris(return_X_y=True) +>>> # let's sample 30 random pairs and labels of pairs +>>> pairs_indices = rng.randint(X.shape[0], size=(30, 2)) +>>> y = rng.randint(2, size=30) +>>> mmc = MMC(preprocessor=X) +>>> cross_val_score(mmc, pairs_indices, y) + +Scoring +======= + +Some default scoring are implemented in metric-learn, depending on the kind of +tuples you're working with (pairs, triplets...). See the docstring of the +`score` method of the estimator you use. + + +Algorithms +================== + +ITML +---- + +Information Theoretic Metric Learning, Davis et al., ICML 2007 + +`ITML` minimizes the differential relative entropy between two multivariate +Gaussians under constraints on the distance function, which can be formulated +into a Bregman optimization problem by minimizing the LogDet divergence subject +to linear constraints. This algorithm can handle a wide variety of constraints +and can optionally incorporate a prior on the distance function. Unlike some +other methods, ITML does not rely on an eigenvalue computation or semi-definite +programming. + +.. topic:: Example Code: + +:: + + from metric_learn import ITML + + pairs = [[[1.2, 7.5], [1.3, 1.5]], + [[6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6]], + [[6.2, 5.5], [5.4, 5.4]]] + y = [1, 1, -1, -1] + + # in this task we want points where the first feature is close to be closer + # to each other, no matter how close the second feature is + + + itml = ITML() + itml.fit(pairs, y) + +.. topic:: References: + + .. [1] `Information-theoretic Metric Learning `_ Jason V. Davis, + et al. + + .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/ + itml/ + + +LSML +---- + +`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared +Residual + +.. topic:: Example Code: + +:: + + from metric_learn import LSML + + quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], + [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], + [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] + + # we want to make closer points where the first feature is close, and + # further if the second feature is close + + lsml = LSML() + lsml.fit(quadruplets) + +.. topic:: References: + + .. [1] Liu et al. + "Metric Learning from Relative Comparisons by Minimizing Squared + Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + + +SDML +---- + +`SDML`: An efficient sparse metric learning in high-dimensional space via +L1-penalized log-determinant regularization + +.. topic:: Example Code: + +:: + + from metric_learn import SDML + + pairs = [[[1.2, 7.5], [1.3, 1.5]], + [[6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6]], + [[6.2, 5.5], [5.4, 5.4]]] + y = [1, 1, -1, -1] + + # in this task we want points where the first feature is close to be closer + # to each other, no matter how close the second feature is + + sdml = SDML() + sdml.fit(pairs, y) + +.. topic:: References: + + .. [1] Qi et al. + An efficient sparse metric learning in high-dimensional space via + L1-penalized log-determinant regularization. ICML 2009. + http://lms.comp.nus.edu.sg/sites/default/files/publication-attachments/ + icml09-guojun.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439945 + + +RCA +--- + +Relative Components Analysis (RCA) + +`RCA` learns a full rank Mahalanobis distance metric based on a weighted sum of +in-class covariance matrices. It applies a global linear transformation to +assign large weights to relevant dimensions and low weights to irrelevant +dimensions. Those relevant dimensions are estimated using "chunklets", subsets +of points that are known to belong to the same class. + +.. topic:: Example Code: + +:: + + from metric_learn import RCA + + pairs = [[[1.2, 7.5], [1.3, 1.5]], + [[6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6]], + [[6.2, 5.5], [5.4, 5.4]]] + y = [1, 1, -1, -1] + + # in this task we want points where the first feature is close to be closer + # to each other, no matter how close the second feature is + + rca = RCA() + rca.fit(pairs, y) + + +.. topic:: References: + + .. [1] `Adjustment learning and relevant component analysis + `_ Noam Shental, et al. + + .. [2] 'Learning distance functions using equivalence relations', ICML 2003 + + .. [3]'Learning a Mahalanobis metric from equivalence constraints', JMLR + 2005 + +MMC +--- + +Mahalanobis Metric Learning with Application for Clustering with +Side-Information, Xing et al., NIPS 2002 + +`MMC` minimizes the sum of squared distances between similar examples, while +enforcing the sum of distances between dissimilar examples to be greater than a +certain margin. This leads to a convex and, thus, local-minima-free +optimization problem that can be solved efficiently. However, the algorithm +involves the computation of eigenvalues, which is the main speed-bottleneck. +Since it has initially been designed for clustering applications, one of the +implicit assumptions of MMC is that all classes form a compact set, i.e., +follow a unimodal distribution, which restricts the possible use-cases of this +method. However, it is one of the earliest and a still often cited technique. + +.. topic:: Example Code: + +:: + + from metric_learn import MMC + + pairs = [[[1.2, 7.5], [1.3, 1.5]], + [[6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6]], + [[6.2, 5.5], [5.4, 5.4]]] + y = [1, 1, -1, -1] + + # in this task we want points where the first feature is close to be closer + # to each other, no matter how close the second feature is + + mmc = MMC() + mmc.fit(pairs, y) + +.. topic:: References: + + .. [1] `Distance metric learning with application to clustering with + side-information `_ Xing, Jordan, Russell, Ng. + .. [2] Adapted from Matlab code `here `_. diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 00000000..10dbe0d5 --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,4 @@ +Examples +======== + +Below is a gallery of example metric-learn use cases. \ No newline at end of file diff --git a/examples/sandwich.py b/examples/plot_sandwich.py similarity index 97% rename from examples/sandwich.py rename to examples/plot_sandwich.py index 34b48a00..0e7658d3 100644 --- a/examples/sandwich.py +++ b/examples/plot_sandwich.py @@ -1,4 +1,8 @@ +# -*- coding: utf-8 -*- """ +Sandwich demo +============= + Sandwich demo based on code from http://nbviewer.ipython.org/6576096 """ @@ -30,7 +34,7 @@ def sandwich_demo(): for ax_num, ml in enumerate(mls, start=3): ml.fit(x, y) - tx = ml.transform() + tx = ml.transform(x) ml_knn = nearest_neighbors(tx, k=2) ax = plt.subplot(3, 2, ax_num) plot_sandwich_data(tx, y, axis=ax) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index b34860d6..27707be9 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -1,5 +1,8 @@ import numpy as np - +import six +from sklearn.utils import check_array +from sklearn.utils.validation import check_X_y +from metric_learn.exceptions import PreprocessorError # hack around lack of axis kwarg in older numpy versions try: @@ -9,4 +12,313 @@ def vector_norm(X): return np.apply_along_axis(np.linalg.norm, 1, X) else: def vector_norm(X): - return np.linalg.norm(X, axis=1) \ No newline at end of file + return np.linalg.norm(X, axis=1) + + +def check_input(input_data, y=None, preprocessor=None, + type_of_inputs='classic', tuple_size=None, accept_sparse=False, + dtype='numeric', order=None, + copy=False, force_all_finite=True, + multi_output=False, ensure_min_samples=1, + ensure_min_features=1, y_numeric=False, + warn_on_dtype=False, estimator=None): + """Checks that the input format is valid, and converts it if specified + (this is the equivalent of scikit-learn's `check_array` or `check_X_y`). + All arguments following tuple_size are scikit-learn's `check_X_y` + arguments that will be enforced on the data and labels array. If + indicators are given as an input data array, the returned data array + will be the formed points/tuples, using the given preprocessor. + + Parameters + ---------- + input : array-like + The input data array to check. + + y : array-like + The input labels array to check. + + preprocessor : callable (default=`None`) + The preprocessor to use. If None, no preprocessor is used. + + type_of_inputs : `str` {'classic', 'tuples'} + The type of inputs to check. If 'classic', the input should be + a 2D array-like of points or a 1D array like of indicators of points. If + 'tuples', the input should be a 3D array-like of tuples or a 2D + array-like of indicators of tuples. + + accept_sparse : `bool` + Set to true to allow sparse inputs (only works for sparse inputs with + dim < 3). + + tuple_size : int + The number of elements in a tuple (e.g. 2 for pairs). + + dtype : string, type, list of types or None (default='numeric') + Data type of result. If None, the dtype of the input is preserved. + If 'numeric', dtype is preserved unless array.dtype is object. + If dtype is a list of types, conversion on the first type is only + performed if the dtype of the input is not in the list. + + order : 'F', 'C' or None (default=`None`) + Whether an array will be forced to be fortran or c-style. + + copy : boolean (default=False) + Whether a forced copy will be triggered. If copy=False, a copy might + be triggered by a conversion. + + force_all_finite : boolean or 'allow-nan', (default=True) + Whether to raise an error on np.inf and np.nan in X. This parameter + does not influence whether y can have np.inf or np.nan values. + The possibilities are: + - True: Force all values of X to be finite. + - False: accept both np.inf and np.nan in X. + - 'allow-nan': accept only np.nan values in X. Values cannot be + infinite. + + ensure_min_samples : int (default=1) + Make sure that X has a minimum number of samples in its first + axis (rows for a 2D array). + + ensure_min_features : int (default=1) + Make sure that the 2D array has some minimum number of features + (columns). The default value of 1 rejects empty datasets. + This check is only enforced when X has effectively 2 dimensions or + is originally 1D and ``ensure_2d`` is True. Setting to 0 disables + this check. + + warn_on_dtype : boolean (default=False) + Raise DataConversionWarning if the dtype of the input data structure + does not match the requested dtype, causing a memory copy. + + estimator : str or estimator instance (default=`None`) + If passed, include the name of the estimator in warning messages. + + Returns + ------- + X : `numpy.ndarray` + The checked input data array. + + y: `numpy.ndarray` (optional) + The checked input labels array. + """ + + context = make_context(estimator) + + args_for_sk_checks = dict(accept_sparse=accept_sparse, + dtype=dtype, order=order, + copy=copy, force_all_finite=force_all_finite, + ensure_min_samples=ensure_min_samples, + ensure_min_features=ensure_min_features, + warn_on_dtype=warn_on_dtype, estimator=estimator) + + # We need to convert input_data into a numpy.ndarray if possible, before + # any further checks or conversions, and deal with y if needed. Therefore + # we use check_array/check_X_y with fixed permissive arguments. + if y is None: + input_data = check_array(input_data, ensure_2d=False, allow_nd=True, + copy=False, force_all_finite=False, + accept_sparse=True, dtype=None, + ensure_min_features=0, ensure_min_samples=0) + else: + input_data, y = check_X_y(input_data, y, ensure_2d=False, allow_nd=True, + copy=False, force_all_finite=False, + accept_sparse=True, dtype=None, + ensure_min_features=0, ensure_min_samples=0, + multi_output=multi_output, + y_numeric=y_numeric) + + if type_of_inputs == 'classic': + input_data = check_input_classic(input_data, context, preprocessor, + args_for_sk_checks) + + elif type_of_inputs == 'tuples': + input_data = check_input_tuples(input_data, context, preprocessor, + args_for_sk_checks, tuple_size) + + else: + raise ValueError("Unknown value {} for type_of_inputs. Valid values are " + "'classic' or 'tuples'.".format(type_of_inputs)) + + return input_data if y is None else (input_data, y) + + +def check_input_tuples(input_data, context, preprocessor, args_for_sk_checks, + tuple_size): + preprocessor_has_been_applied = False + if input_data.ndim == 2: + if preprocessor is not None: + input_data = preprocess_tuples(input_data, preprocessor) + preprocessor_has_been_applied = True + else: + make_error_input(201, input_data, context) + elif input_data.ndim == 3: + pass + else: + if preprocessor is not None: + make_error_input(420, input_data, context) + else: + make_error_input(200, input_data, context) + input_data = check_array(input_data, allow_nd=True, ensure_2d=False, + **args_for_sk_checks) + # we need to check num_features because check_array does not check it + # for 3D inputs: + if args_for_sk_checks['ensure_min_features'] > 0: + n_features = input_data.shape[2] + if n_features < args_for_sk_checks['ensure_min_features']: + raise ValueError("Found array with {} feature(s) (shape={}) while" + " a minimum of {} is required{}." + .format(n_features, input_data.shape, + args_for_sk_checks['ensure_min_features'], + context)) + # normally we don't need to check_tuple_size too because tuple_size + # shouldn't be able to be modified by any preprocessor + if input_data.ndim != 3: + # we have to ensure this because check_array above does not + if preprocessor_has_been_applied: + make_error_input(211, input_data, context) + else: + make_error_input(201, input_data, context) + check_tuple_size(input_data, tuple_size, context) + return input_data + + +def check_input_classic(input_data, context, preprocessor, args_for_sk_checks): + preprocessor_has_been_applied = False + if input_data.ndim == 1: + if preprocessor is not None: + input_data = preprocess_points(input_data, preprocessor) + preprocessor_has_been_applied = True + else: + make_error_input(101, input_data, context) + elif input_data.ndim == 2: + pass # OK + else: + if preprocessor is not None: + make_error_input(320, input_data, context) + else: + make_error_input(100, input_data, context) + + input_data = check_array(input_data, allow_nd=True, ensure_2d=False, + **args_for_sk_checks) + if input_data.ndim != 2: + # we have to ensure this because check_array above does not + if preprocessor_has_been_applied: + make_error_input(111, input_data, context) + else: + make_error_input(101, input_data, context) + return input_data + + +def make_error_input(code, input_data, context): + code_str = {'expected_input': {'1': '2D array of formed points', + '2': '3D array of formed tuples', + '3': ('1D array of indicators or 2D array of ' + 'formed points'), + '4': ('2D array of indicators or 3D array ' + 'of formed tuples')}, + 'additional_context': {'0': '', + '2': ' when using a preprocessor', + '1': (' after the preprocessor has been ' + 'applied')}, + 'possible_preprocessor': {'0': '', + '1': ' and/or use a preprocessor' + }} + code_list = str(code) + err_args = dict(expected_input=code_str['expected_input'][code_list[0]], + additional_context=code_str['additional_context'] + [code_list[1]], + possible_preprocessor=code_str['possible_preprocessor'] + [code_list[2]], + input_data=input_data, context=context, + found_size=input_data.ndim) + err_msg = ('{expected_input} expected' + '{context}{additional_context}. Found {found_size}D array ' + 'instead:\ninput={input_data}. Reshape your data' + '{possible_preprocessor}.\n') + raise ValueError(err_msg.format(**err_args)) + + +def preprocess_tuples(tuples, preprocessor): + try: + tuples = np.column_stack([preprocessor(tuples[:, i])[:, np.newaxis] for + i in range(tuples.shape[1])]) + except Exception as e: + raise PreprocessorError(e) + return tuples + + +def preprocess_points(points, preprocessor): + """form points if there is a preprocessor else keep them as such (assumes + that check_points has already been called)""" + try: + points = preprocessor(points) + except Exception as e: + raise PreprocessorError(e) + return points + + +def make_context(estimator): + """Helper function to create a string with the estimator name. + Taken from check_array function in scikit-learn. + Will return the following for instance: + NCA: ' by NCA' + 'NCA': ' by NCA' + None: '' + """ + estimator_name = make_name(estimator) + context = (' by ' + estimator_name) if estimator_name is not None else '' + return context + + +def make_name(estimator): + """Helper function that returns the name of estimator or the given string + if a string is given + """ + if estimator is not None: + if isinstance(estimator, six.string_types): + estimator_name = estimator + else: + estimator_name = estimator.__class__.__name__ + else: + estimator_name = None + return estimator_name + + +def check_tuple_size(tuples, tuple_size, context): + """Helper function to check that the number of points in each tuple is + equal to tuple_size (e.g. 2 for pairs), and raise a `ValueError` otherwise""" + if tuple_size is not None and tuples.shape[1] != tuple_size: + msg_t = (("Tuples of {} element(s) expected{}. Got tuples of {} " + "element(s) instead (shape={}):\ninput={}.\n") + .format(tuple_size, context, tuples.shape[1], tuples.shape, + tuples)) + raise ValueError(msg_t) + + +class ArrayIndexer: + + def __init__(self, X): + # we check the array-like preprocessor here, and we as much permissive + # as possible (because the user will check for the desired + # format with arguments in check_input, and only this latter function + # should return the appropriate errors). We do this only to have a numpy + # array object which can be indexed by another numpy array object. + X = check_array(X, + accept_sparse=True, dtype=None, + force_all_finite=False, + ensure_2d=False, allow_nd=True, + ensure_min_samples=0, + ensure_min_features=0, + warn_on_dtype=False, estimator=None) + self.X = X + + def __call__(self, indices): + return self.X[indices] + + +def check_collapsed_pairs(pairs): + num_ident = (vector_norm(pairs[:, 0] - pairs[:, 1]) < 1e-9).sum() + if num_ident: + raise ValueError("{} collapsed pairs found (where the left element is " + "the same as the right element), out of {} pairs " + "in total.".format(num_ident, pairs.shape[0])) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 02519de1..9af79ecc 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,51 +1,359 @@ -from numpy.linalg import inv, cholesky -from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.utils.validation import check_array +from numpy.linalg import cholesky +from sklearn.base import BaseEstimator +from sklearn.utils.validation import _is_arraylike +from sklearn.metrics import roc_auc_score +import numpy as np +from abc import ABCMeta, abstractmethod +import six +from ._util import ArrayIndexer, check_input -class BaseMetricLearner(BaseEstimator, TransformerMixin): - def __init__(self): - raise NotImplementedError('BaseMetricLearner should not be instantiated') +class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): - def metric(self): - """Computes the Mahalanobis matrix from the transformation matrix. + def __init__(self, preprocessor=None): + """ + + Parameters + ---------- + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + """ + self.preprocessor = preprocessor - .. math:: M = L^{\\top} L + @abstractmethod + def score_pairs(self, pairs): + """Returns the score between pairs + (can be a similarity, or a distance/metric depending on the algorithm) + + Parameters + ---------- + pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) + 3D array of pairs. Returns ------- - M : (d x d) matrix + scores: `numpy.ndarray` of shape=(n_pairs,) + The score of every pair. """ - L = self.transformer() - return L.T.dot(L) - def transformer(self): - """Computes the transformation matrix from the Mahalanobis matrix. + def check_preprocessor(self): + """Initializes the preprocessor""" + if _is_arraylike(self.preprocessor): + self.preprocessor_ = ArrayIndexer(self.preprocessor) + elif callable(self.preprocessor) or self.preprocessor is None: + self.preprocessor_ = self.preprocessor + else: + raise ValueError("Invalid type for the preprocessor: {}. You should " + "provide either None, an array-like object, " + "or a callable.".format(type(self.preprocessor))) + + def _prepare_inputs(self, X, y=None, type_of_inputs='classic', + **kwargs): + """Initializes the preprocessor and processes inputs. See `check_input` + for more details. + + Parameters + ---------- + input: array-like + The input data array to check. - L = cholesky(M).T + y : array-like + The input labels array to check. + + type_of_inputs: `str` {'classic', 'tuples'} + The type of inputs to check. If 'classic', the input should be + a 2D array-like of points or a 1D array like of indicators of points. If + 'tuples', the input should be a 3D array-like of tuples or a 2D + array-like of indicators of tuples. + + **kwargs: dict + Arguments to pass to check_input. Returns ------- - L : upper triangular (d x d) matrix + X : `numpy.ndarray` + The checked input data array. + + y: `numpy.ndarray` (optional) + The checked input labels array. """ - return cholesky(self.metric()).T + self.check_preprocessor() + return check_input(X, y, + type_of_inputs=type_of_inputs, + preprocessor=self.preprocessor_, + estimator=self, + tuple_size=getattr(self, '_tuple_size', None), + **kwargs) + - def transform(self, X=None): +class MetricTransformer(six.with_metaclass(ABCMeta)): + + @abstractmethod + def transform(self, X): """Applies the metric transformation. Parameters ---------- - X : (n x d) matrix, optional - Data to transform. If not supplied, the training data will be used. + X : (n x d) matrix + Data to transform. Returns ------- transformed : (n x d) matrix Input data transformed to the metric space by :math:`XL^{\\top}` """ - if X is None: - X = self.X_ + + +class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, + MetricTransformer)): + """Mahalanobis metric learning algorithms. + + Algorithm that learns a Mahalanobis (pseudo) distance :math:`d_M(x, x')`, + defined between two column vectors :math:`x` and :math:`x'` by: :math:`d_M(x, + x') = \sqrt{(x-x')^T M (x-x')}`, where :math:`M` is a learned symmetric + positive semi-definite (PSD) matrix. The metric between points can then be + expressed as the euclidean distance between points embedded in a new space + through a linear transformation. Indeed, the above matrix can be decomposed + into the product of two transpose matrices (through SVD or Cholesky + decomposition): :math:`d_M(x, x')^2 = (x-x')^T M (x-x') = (x-x')^T L^T L + (x-x') = (L x - L x')^T (L x- L x')` + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. + """ + + def score_pairs(self, pairs): + """Returns the learned Mahalanobis distance between pairs. + + This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` + where ``M`` is the learned Mahalanobis matrix, for every pair of points + ``x`` and ``x'``. This corresponds to the euclidean distance between + embeddings of the points in a new space, obtained through a linear + transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - + x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See + :class:`MahalanobisMixin`). + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores: `numpy.ndarray` of shape=(n_pairs,) + The learned Mahalanobis distance for every pair. + """ + pairs = check_input(pairs, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=2) + pairwise_diffs = self.transform(pairs[:, 1, :] - pairs[:, 0, :]) + # (for MahalanobisMixin, the embedding is linear so we can just embed the + # difference) + return np.sqrt(np.sum(pairwise_diffs**2, axis=-1)) + + def transform(self, X): + """Embeds data points in the learned linear embedding space. + + Transforms samples in ``X`` into ``X_embedded``, samples inside a new + embedding space such that: ``X_embedded = X.dot(L.T)``, where ``L`` is + the learned linear transformation (See :class:`MahalanobisMixin`). + + Parameters + ---------- + X : `numpy.ndarray`, shape=(n_samples, n_features) + The data points to embed. + + Returns + ------- + X_embedded : `numpy.ndarray`, shape=(n_samples, num_dims) + The embedded data points. + """ + X_checked = check_input(X, type_of_inputs='classic', estimator=self, + preprocessor=self.preprocessor_, + accept_sparse=True) + return X_checked.dot(self.transformer_.T) + + def metric(self): + return self.transformer_.T.dot(self.transformer_) + + def transformer_from_metric(self, metric): + """Computes the transformation matrix from the Mahalanobis matrix. + + Since by definition the metric `M` is positive semi-definite (PSD), it + admits a Cholesky decomposition: L = cholesky(M).T. However, currently the + computation of the Cholesky decomposition used does not support + non-definite matrices. If the metric is not definite, this method will + return L = V.T w^( -1/2), with M = V*w*V.T being the eigenvector + decomposition of M with the eigenvalues in the diagonal matrix w and the + columns of V being the eigenvectors. If M is diagonal, this method will + just return its elementwise square root (since the diagonalization of + the matrix is itself). + + Returns + ------- + L : (d x d) matrix + """ + + if np.allclose(metric, np.diag(np.diag(metric))): + return np.sqrt(metric) + elif not np.isclose(np.linalg.det(metric), 0): + return cholesky(metric).T else: - X = check_array(X, accept_sparse=True) - L = self.transformer() - return X.dot(L.T) + w, V = np.linalg.eigh(metric) + return V.T * np.sqrt(np.maximum(0, w[:, None])) + + +class _PairsClassifierMixin(BaseMetricLearner): + + _tuple_size = 2 # number of points in a tuple, 2 for pairs + + def predict(self, pairs): + """Predicts the learned metric between input pairs. (For now it just + calls decision function). + + Returns the learned metric value between samples in every pair. It should + ideally be low for similar samples and high for dissimilar samples. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to predict, with each row corresponding to two + points, or 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) + The predicted learned metric value between samples in every pair. + """ + return self.decision_function(pairs) + + def decision_function(self, pairs): + """Returns the learned metric between input pairs. + + Returns the learned metric value between samples in every pair. It should + ideally be low for similar samples and high for dissimilar samples. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to predict, with each row corresponding to two + points, or 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) + The predicted learned metric value between samples in every pair. + """ + pairs = check_input(pairs, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) + return self.score_pairs(pairs) + + def score(self, pairs, y): + """Computes score of pairs similarity prediction. + + Returns the ``roc_auc`` score of the fitted metric learner. It is + computed in the following way: for every value of a threshold + ``t`` we classify all pairs of samples where the predicted distance is + inferior to ``t`` as belonging to the "similar" class, and the other as + belonging to the "dissimilar" class, and we count false positive and + true positives as in a classical ``roc_auc`` curve. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs, with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + + y : array-like, shape=(n_constraints,) + The corresponding labels. + + Returns + ------- + score : float + The ``roc_auc`` score. + """ + return roc_auc_score(y, self.decision_function(pairs)) + + +class _QuadrupletsClassifierMixin(BaseMetricLearner): + + _tuple_size = 4 # number of points in a tuple, 4 for quadruplets + + def predict(self, quadruplets): + """Predicts the ordering between sample distances in input quadruplets. + + For each quadruplet, returns 1 if the quadruplet is in the right order ( + first pair is more similar than second pair), and -1 if not. + + Parameters + ---------- + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + (n_quadruplets, 4) + 3D Array of quadruplets to predict, with each row corresponding to four + points, or 2D array of indices of quadruplets if the metric learner + uses a preprocessor. + + Returns + ------- + prediction : `numpy.ndarray` of floats, shape=(n_constraints,) + Predictions of the ordering of pairs, for each quadruplet. + """ + quadruplets = check_input(quadruplets, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) + return np.sign(self.decision_function(quadruplets)) + + def decision_function(self, quadruplets): + """Predicts differences between sample distances in input quadruplets. + + For each quadruplet of samples, computes the difference between the learned + metric of the first pair minus the learned metric of the second pair. + + Parameters + ---------- + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + (n_quadruplets, 4) + 3D Array of quadruplets to predict, with each row corresponding to four + points, or 2D array of indices of quadruplets if the metric learner + uses a preprocessor. + + Returns + ------- + decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) + Metric differences. + """ + return (self.score_pairs(quadruplets[:, :2]) - + self.score_pairs(quadruplets[:, 2:])) + + def score(self, quadruplets, y=None): + """Computes score on input quadruplets + + Returns the accuracy score of the following classification task: a record + is correctly classified if the predicted similarity between the first two + samples is higher than that of the last two. + + Parameters + ---------- + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + (n_quadruplets, 4) + 3D Array of quadruplets to score, with each row corresponding to four + points, or 2D array of indices of quadruplets if the metric learner + uses a preprocessor. + + y : Ignored, for scikit-learn compatibility. + + Returns + ------- + score : float + The quadruplets score. + """ + return -np.mean(self.predict(quadruplets)) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 8824450a..17523a46 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -100,3 +100,13 @@ def random_subset(all_labels, num_preserved=np.inf, random_state=np.random): partial_labels = np.array(all_labels, copy=True) partial_labels[idx] = -1 return Constraints(partial_labels) + +def wrap_pairs(X, constraints): + a = np.array(constraints[0]) + b = np.array(constraints[1]) + c = np.array(constraints[2]) + d = np.array(constraints[3]) + constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d)))) + y = np.vstack([np.ones((len(a), 1)), - np.ones((len(c), 1))]) + pairs = X[constraints] + return pairs, y \ No newline at end of file diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 8fc07873..10bc9582 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -10,27 +10,35 @@ from __future__ import absolute_import import numpy as np -from sklearn.utils.validation import check_array +from sklearn.base import TransformerMixin -from .base_metric import BaseMetricLearner +from .base_metric import MahalanobisMixin -class Covariance(BaseMetricLearner): - def __init__(self): - pass +class Covariance(MahalanobisMixin, TransformerMixin): + """Covariance metric (baseline method) - def metric(self): - return self.M_ + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + + def __init__(self, preprocessor=None): + super(Covariance, self).__init__(preprocessor) def fit(self, X, y=None): """ X : data matrix, (n x d) y : unused """ - self.X_ = check_array(X, ensure_min_samples=2) - self.M_ = np.cov(self.X_, rowvar = False) - if self.M_.ndim == 0: - self.M_ = 1./self.M_ + X = self._prepare_inputs(X, ensure_min_samples=2) + M = np.cov(X, rowvar = False) + if M.ndim == 0: + M = 1./M else: - self.M_ = np.linalg.inv(self.M_) + M = np.linalg.inv(M) + + self.transformer_ = self.transformer_from_metric(np.atleast_2d(M)) return self diff --git a/metric_learn/exceptions.py b/metric_learn/exceptions.py new file mode 100644 index 00000000..424d2c4f --- /dev/null +++ b/metric_learn/exceptions.py @@ -0,0 +1,12 @@ +""" +The :mod:`metric_learn.exceptions` module includes all custom warnings and +error classes used across metric-learn. +""" + + +class PreprocessorError(Exception): + + def __init__(self, original_error): + err_msg = ("An error occurred when trying to use the " + "preprocessor: {}").format(repr(original_error)) + super(PreprocessorError, self).__init__(err_msg) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7b218895..48e71f56 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -17,17 +17,20 @@ import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_array, check_X_y - -from .base_metric import BaseMetricLearner -from .constraints import Constraints +from sklearn.utils.validation import check_array +from sklearn.base import TransformerMixin +from .base_metric import _PairsClassifierMixin, MahalanobisMixin +from .constraints import Constraints, wrap_pairs from ._util import vector_norm -class ITML(BaseMetricLearner): +class _BaseITML(MahalanobisMixin): """Information Theoretic Metric Learning (ITML)""" + + _tuple_size = 2 # constraints are pairs + def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - A0=None, verbose=False): + A0=None, verbose=False, preprocessor=None): """Initialize ITML. Parameters @@ -44,23 +47,24 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, verbose : bool, optional if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ self.gamma = gamma self.max_iter = max_iter self.convergence_threshold = convergence_threshold self.A0 = A0 self.verbose = verbose + super(_BaseITML, self).__init__(preprocessor) - def _process_inputs(self, X, constraints, bounds): - self.X_ = X = check_array(X) - # check to make sure that no two constrained vectors are identical - a,b,c,d = constraints - no_ident = vector_norm(X[a] - X[b]) > 1e-9 - a, b = a[no_ident], b[no_ident] - no_ident = vector_norm(X[c] - X[d]) > 1e-9 - c, d = c[no_ident], d[no_ident] + def _fit(self, pairs, y, bounds=None): + pairs, y = self._prepare_inputs(pairs, y, + type_of_inputs='tuples') # init bounds if bounds is None: + X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) else: assert len(bounds) == 2 @@ -68,35 +72,20 @@ def _process_inputs(self, X, constraints, bounds): self.bounds_[self.bounds_==0] = 1e-9 # init metric if self.A0 is None: - self.A_ = np.identity(X.shape[1]) + self.A_ = np.identity(pairs.shape[2]) else: self.A_ = check_array(self.A0) - return a,b,c,d - - def fit(self, X, constraints, bounds=None): - """Learn the ITML model. - - Parameters - ---------- - X : (n x d) data matrix - each row corresponds to a single instance - constraints : 4-tuple of arrays - (a,b,c,d) indices into X, with (a,b) specifying positive and (c,d) - negative pairs - bounds : list (pos,neg) pairs, optional - bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg - """ - a,b,c,d = self._process_inputs(X, constraints, bounds) gamma = self.gamma - num_pos = len(a) - num_neg = len(c) + pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] + num_pos = len(pos_pairs) + num_neg = len(neg_pairs) _lambda = np.zeros(num_pos + num_neg) lambdaold = np.zeros_like(_lambda) gamma_proj = 1. if gamma is np.inf else gamma/(gamma+1.) pos_bhat = np.zeros(num_pos) + self.bounds_[0] neg_bhat = np.zeros(num_neg) + self.bounds_[1] - pos_vv = self.X_[a] - self.X_[b] - neg_vv = self.X_[c] - self.X_[d] + pos_vv = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] + neg_vv = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] A = self.A_ for it in xrange(self.max_iter): @@ -134,17 +123,57 @@ def fit(self, X, constraints, bounds=None): if self.verbose: print('itml converged at iter: %d, conv = %f' % (it, conv)) self.n_iter_ = it + + self.transformer_ = self.transformer_from_metric(self.A_) return self - def metric(self): - return self.A_ +class ITML(_BaseITML, _PairsClassifierMixin): + """Information Theoretic Metric Learning (ITML) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + + def fit(self, pairs, y, bounds=None): + """Learn the ITML model. + + Parameters + ---------- + pairs: array-like, shape=(n_constraints, 2, n_features) or + (n_constraints, 2) + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y: array-like, of shape (n_constraints,) + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + bounds : list (pos,neg) pairs, optional + bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg + + Returns + ------- + self : object + Returns the instance. + """ + return self._fit(pairs, y, bounds=bounds) + + +class ITML_Supervised(_BaseITML, TransformerMixin): + """Supervised version of Information Theoretic Metric Learning (ITML) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See `transformer_from_metric`.) + """ -class ITML_Supervised(ITML): - """Information Theoretic Metric Learning (ITML)""" def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, num_labeled=np.inf, num_constraints=None, bounds=None, A0=None, - verbose=False): + verbose=False, preprocessor=None): """Initialize the supervised version of `ITML`. `ITML_Supervised` creates pairs of similar sample by taking same class @@ -169,10 +198,13 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, initial regularization matrix, defaults to identity verbose : bool, optional if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ - ITML.__init__(self, gamma=gamma, max_iter=max_iter, - convergence_threshold=convergence_threshold, - A0=A0, verbose=verbose) + _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, + convergence_threshold=convergence_threshold, + A0=A0, verbose=verbose, preprocessor=preprocessor) self.num_labeled = num_labeled self.num_constraints = num_constraints self.bounds = bounds @@ -180,6 +212,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, def fit(self, X, y, random_state=np.random): """Create constraints from labels and learn the ITML model. + Parameters ---------- X : (n x d) matrix @@ -191,7 +224,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) @@ -201,4 +234,5 @@ def fit(self, X, y, random_state=np.random): random_state=random_state) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) - return ITML.fit(self, X, pos_neg, bounds=self.bounds) + pairs, y = wrap_pairs(X, pos_neg) + return _BaseITML._fit(self, pairs, y, bounds=self.bounds) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 809f092b..2feff211 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -16,17 +16,23 @@ import warnings from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_X_y +from sklearn.base import TransformerMixin +from .base_metric import MahalanobisMixin -from .base_metric import BaseMetricLearner - -class LFDA(BaseMetricLearner): +class LFDA(MahalanobisMixin, TransformerMixin): ''' Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction Sugiyama, ICML 2006 + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. ''' - def __init__(self, num_dims=None, k=None, embedding_type='weighted'): + + def __init__(self, num_dims=None, k=None, embedding_type='weighted', + preprocessor=None): ''' Initialize LFDA. @@ -44,20 +50,32 @@ def __init__(self, num_dims=None, k=None, embedding_type='weighted'): 'weighted' - weighted eigenvectors 'orthonormalized' - orthonormalized 'plain' - raw eigenvectors + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. ''' if embedding_type not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid embedding_type: %r' % embedding_type) self.num_dims = num_dims self.embedding_type = embedding_type self.k = k + super(LFDA, self).__init__(preprocessor) + + def fit(self, X, y): + '''Fit the LFDA model. - def transformer(self): - return self.transformer_ + Parameters + ---------- + X : (n, d) array-like + Input data. - def _process_inputs(self, X, y): + y : (n,) array-like + Class labels, one per point of data. + ''' + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) unique_classes, y = np.unique(y, return_inverse=True) - self.X_, y = check_X_y(X, y) - n, d = self.X_.shape + n, d = X.shape num_classes = len(unique_classes) if self.num_dims is None: @@ -74,21 +92,6 @@ def _process_inputs(self, X, y): k = d - 1 else: k = int(self.k) - - return self.X_, y, num_classes, n, d, dim, k - - def fit(self, X, y): - '''Fit the LFDA model. - - Parameters - ---------- - X : (n, d) array-like - Input data. - - y : (n,) array-like - Class labels, one per point of data. - ''' - X, y, num_classes, n, d, dim, k_ = self._process_inputs(X, y) tSb = np.zeros((d,d)) tSw = np.zeros((d,d)) @@ -99,8 +102,8 @@ def fit(self, X, y): # classwise affinity matrix dist = pairwise_distances(Xc, metric='l2', squared=True) # distances to k-th nearest neighbor - k = min(k_, nc-1) - sigma = np.sqrt(np.partition(dist, k, axis=0)[:,k]) + k = min(k, nc - 1) + sigma = np.sqrt(np.partition(dist, k, axis=0)[:, k]) local_scale = np.outer(sigma, sigma) with np.errstate(divide='ignore', invalid='ignore'): diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index d1a41a33..1d7ddf2a 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -14,17 +14,16 @@ import warnings from collections import Counter from six.moves import xrange -from sklearn.utils.validation import check_X_y, check_array from sklearn.metrics import euclidean_distances - -from .base_metric import BaseMetricLearner +from sklearn.base import TransformerMixin +from .base_metric import MahalanobisMixin # commonality between LMNN implementations -class _base_LMNN(BaseMetricLearner): +class _base_LMNN(MahalanobisMixin, TransformerMixin): def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, - verbose=False): + verbose=False, preprocessor=None): """Initialize the LMNN object. Parameters @@ -34,6 +33,10 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization: float, optional Weighting of pull and push terms, with 0.5 meaning equal weight. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ self.k = k self.min_iter = min_iter @@ -43,44 +46,41 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, self.convergence_tol = convergence_tol self.use_pca = use_pca self.verbose = verbose - - def transformer(self): - return self.L_ + super(_base_LMNN, self).__init__(preprocessor) # slower Python version class python_LMNN(_base_LMNN): - def _process_inputs(self, X, labels): - self.X_ = check_array(X, dtype=float, ensure_min_samples=2) - num_pts, num_dims = self.X_.shape - unique_labels, self.label_inds_ = np.unique(labels, return_inverse=True) + def fit(self, X, y): + k = self.k + reg = self.regularization + learn_rate = self.learn_rate + + X, y = self._prepare_inputs(X, y, dtype=float, + ensure_min_samples=2) + num_pts, num_dims = X.shape + unique_labels, self.label_inds_ = np.unique(y, return_inverse=True) if len(self.label_inds_) != num_pts: raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) if self.use_pca: warnings.warn('use_pca does nothing for the python_LMNN implementation') - self.L_ = np.eye(num_dims) + self.transformer_ = np.eye(num_dims) required_k = np.bincount(self.label_inds_).min() if self.k > required_k: raise ValueError('not enough class labels for specified k' ' (smallest class has %d)' % required_k) - def fit(self, X, y): - k = self.k - reg = self.regularization - learn_rate = self.learn_rate - self._process_inputs(X, y) - - target_neighbors = self._select_targets() - impostors = self._find_impostors(target_neighbors[:,-1]) + target_neighbors = self._select_targets(X) + impostors = self._find_impostors(target_neighbors[:, -1], X) if len(impostors) == 0: # L has already been initialized to an identity matrix return # sum outer products - dfG = _sum_outer_products(self.X_, target_neighbors.flatten(), - np.repeat(np.arange(self.X_.shape[0]), k)) + dfG = _sum_outer_products(X, target_neighbors.flatten(), + np.repeat(np.arange(X.shape[0]), k)) df = np.zeros_like(dfG) # storage @@ -91,14 +91,15 @@ def fit(self, X, y): a2[nn_idx] = np.array([]) # initialize L - L = self.L_ + L = self.transformer_ # first iteration: we compute variables (including objective and gradient) # at initialization point G, objective, total_active, df, a1, a2 = ( - self._loss_grad(L, dfG, impostors, 1, k, reg, target_neighbors, df, a1, - a2)) + self._loss_grad(X, L, dfG, impostors, 1, k, reg, target_neighbors, df, + a1, a2)) + # main loop for it in xrange(2, self.max_iter): # then at each iteration, we try to find a value of L that has better # objective than the previous L, following the gradient: @@ -110,7 +111,7 @@ def fit(self, X, y): # retry we don t want to modify them several times (G_next, objective_next, total_active_next, df_next, a1_next, a2_next) = ( - self._loss_grad(L_next, dfG, impostors, it, k, reg, + self._loss_grad(X, L_next, dfG, impostors, it, k, reg, target_neighbors, df.copy(), list(a1), list(a2))) assert not np.isnan(objective) delta_obj = objective_next - objective @@ -143,14 +144,14 @@ def fit(self, X, y): print("LMNN didn't converge in %d steps." % self.max_iter) # store the last L - self.L_ = L + self.transformer_ = L self.n_iter_ = it return self - def _loss_grad(self, L, dfG, impostors, it, k, reg, target_neighbors, df, a1, - a2): + def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, + a1, a2): # Compute pairwise distances under current metric - Lx = L.dot(self.X_.T).T + Lx = L.dot(X.T).T g0 = _inplace_paired_L2(*Lx[impostors]) Ni = 1 + _inplace_paired_L2(Lx[target_neighbors], Lx[:, None, :]) g1, g2 = Ni[impostors] @@ -174,16 +175,16 @@ def _loss_grad(self, L, dfG, impostors, it, k, reg, target_neighbors, df, a1, targets = target_neighbors[:, nn_idx] PLUS, pweight = _count_edges(plus1, plus2, impostors, targets) - df += _sum_outer_products(self.X_, PLUS[:, 0], PLUS[:, 1], pweight) + df += _sum_outer_products(X, PLUS[:, 0], PLUS[:, 1], pweight) MINUS, mweight = _count_edges(minus1, minus2, impostors, targets) - df -= _sum_outer_products(self.X_, MINUS[:, 0], MINUS[:, 1], mweight) + df -= _sum_outer_products(X, MINUS[:, 0], MINUS[:, 1], mweight) in_imp, out_imp = impostors - df += _sum_outer_products(self.X_, in_imp[minus1], out_imp[minus1]) - df += _sum_outer_products(self.X_, in_imp[minus2], out_imp[minus2]) + df += _sum_outer_products(X, in_imp[minus1], out_imp[minus1]) + df += _sum_outer_products(X, in_imp[minus2], out_imp[minus2]) - df -= _sum_outer_products(self.X_, in_imp[plus1], out_imp[plus1]) - df -= _sum_outer_products(self.X_, in_imp[plus2], out_imp[plus2]) + df -= _sum_outer_products(X, in_imp[plus1], out_imp[plus1]) + df -= _sum_outer_products(X, in_imp[plus2], out_imp[plus2]) a1[nn_idx] = act1 a2[nn_idx] = act2 @@ -195,18 +196,18 @@ def _loss_grad(self, L, dfG, impostors, it, k, reg, target_neighbors, df, a1, objective += G.flatten().dot(L.T.dot(L).flatten()) return G, objective, total_active, df, a1, a2 - def _select_targets(self): - target_neighbors = np.empty((self.X_.shape[0], self.k), dtype=int) + def _select_targets(self, X): + target_neighbors = np.empty((X.shape[0], self.k), dtype=int) for label in self.labels_: inds, = np.nonzero(self.label_inds_ == label) - dd = euclidean_distances(self.X_[inds], squared=True) + dd = euclidean_distances(X[inds], squared=True) np.fill_diagonal(dd, np.inf) nn = np.argsort(dd)[..., :self.k] target_neighbors[inds] = inds[nn] return target_neighbors - def _find_impostors(self, furthest_neighbors): - Lx = self.transform() + def _find_impostors(self, furthest_neighbors, X): + Lx = self.transform(X) margin_radii = 1 + _inplace_paired_L2(Lx[furthest_neighbors], Lx) impostors = [] for label in self.labels_[:-1]: @@ -260,11 +261,19 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): from modshogun import RealFeatures, MulticlassLabels class LMNN(_base_LMNN): + """Large Margin Nearest Neighbor (LMNN) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. + """ def fit(self, X, y): - self.X_, y = check_X_y(X, y, dtype=float) + X, y = self._prepare_inputs(X, y, dtype=float, + ensure_min_samples=2) labels = MulticlassLabels(y) - self._lmnn = shogun_LMNN(RealFeatures(self.X_.T), labels, self.k) + self._lmnn = shogun_LMNN(RealFeatures(X.T), labels, self.k) self._lmnn.set_maxiter(self.max_iter) self._lmnn.set_obj_threshold(self.convergence_tol) self._lmnn.set_regularization(self.regularization) @@ -273,7 +282,7 @@ def fit(self, X, y): self._lmnn.train() else: self._lmnn.train(np.eye(X.shape[1])) - self.L_ = self._lmnn.get_linear_transform() + self.transformer_ = self._lmnn.get_linear_transform(X) return self except ImportError: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 4e315b0b..73296b46 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -11,14 +11,18 @@ import numpy as np import scipy.linalg from six.moves import xrange -from sklearn.utils.validation import check_array, check_X_y +from sklearn.base import TransformerMixin -from .base_metric import BaseMetricLearner +from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints -class LSML(BaseMetricLearner): - def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False): +class _BaseLSML(MahalanobisMixin): + + _tuple_size = 4 # constraints are quadruplets + + def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, + preprocessor=None): """Initialize LSML. Parameters @@ -29,17 +33,23 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False): guess at a metric [default: inv(covariance(X))] verbose : bool, optional if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ self.prior = prior self.tol = tol self.max_iter = max_iter self.verbose = verbose + super(_BaseLSML, self).__init__(preprocessor) - def _prepare_inputs(self, X, constraints, weights): - self.X_ = X = check_array(X) - a,b,c,d = constraints - self.vab_ = X[a] - X[b] - self.vcd_ = X[c] - X[d] + def _fit(self, quadruplets, y=None, weights=None): + quadruplets = self._prepare_inputs(quadruplets, + type_of_inputs='tuples') + + # check to make sure that no two constrained vectors are identical + self.vab_ = quadruplets[:, 0, :] - quadruplets[:, 1, :] + self.vcd_ = quadruplets[:, 2, :] - quadruplets[:, 3, :] if self.vab_.shape != self.vcd_.shape: raise ValueError('Constraints must have same length') if weights is None: @@ -48,28 +58,14 @@ def _prepare_inputs(self, X, constraints, weights): self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 if self.prior is None: + X = np.vstack({tuple(row) for row in + quadruplets.reshape(-1, quadruplets.shape[2])}) self.prior_inv_ = np.atleast_2d(np.cov(X, rowvar=False)) self.M_ = np.linalg.inv(self.prior_inv_) else: self.M_ = self.prior self.prior_inv_ = np.linalg.inv(self.prior) - def metric(self): - return self.M_ - - def fit(self, X, constraints, weights=None): - """Learn the LSML model. - - Parameters - ---------- - X : (n x d) data matrix - each row corresponds to a single instance - constraints : 4-tuple of arrays - (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) - weights : (m,) array of floats, optional - scale factor for each constraint - """ - self._prepare_inputs(X, constraints, weights) step_sizes = np.logspace(-10, 0, 10) # Keep track of the best step size and the loss at that step. l_best = 0 @@ -103,6 +99,8 @@ def fit(self, X, constraints, weights=None): if self.verbose: print("Didn't converge after", it, "iterations. Final loss:", s_best) self.n_iter_ = it + + self.transformer_ = self.transformer_from_metric(self.M_) return self def _comparison_loss(self, metric): @@ -131,9 +129,52 @@ def _gradient(self, metric): return dMetric -class LSML_Supervised(LSML): +class LSML(_BaseLSML, _QuadrupletsClassifierMixin): + """Least Squared-residual Metric Learning (LSML) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + + def fit(self, quadruplets, weights=None): + """Learn the LSML model. + + Parameters + ---------- + quadruplets : array-like, shape=(n_constraints, 4, n_features) or + (n_constraints, 4) + 3D array-like of quadruplets of points or 2D array of quadruplets of + indicators. In order to supervise the algorithm in the right way, we + should have the four samples ordered in a way such that: + d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < + n_constraints. + weights : (n_constraints,) array of floats, optional + scale factor for each constraint + + Returns + ------- + self : object + Returns the instance. + """ + return self._fit(quadruplets, weights=weights) + + +class LSML_Supervised(_BaseLSML, TransformerMixin): + """Supervised version of Least Squared-residual Metric Learning (LSML) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, - num_constraints=None, weights=None, verbose=False): + num_constraints=None, weights=None, verbose=False, + preprocessor=None): """Initialize the supervised version of `LSML`. `LSML_Supervised` creates quadruplets from labeled samples by taking two @@ -157,9 +198,12 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, scale factor for each constraint verbose : bool, optional if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ - LSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, - verbose=verbose) + _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, + verbose=verbose, preprocessor=preprocessor) self.num_labeled = num_labeled self.num_constraints = num_constraints self.weights = weights @@ -178,7 +222,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) @@ -186,6 +230,7 @@ def fit(self, X, y, random_state=np.random): c = Constraints.random_subset(y, self.num_labeled, random_state=random_state) - pairs = c.positive_negative_pairs(num_constraints, same_length=True, - random_state=random_state) - return LSML.fit(self, X, pairs, weights=self.weights) + pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, + random_state=random_state) + return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], + weights=self.weights) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index ddcb698a..6b79638e 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -11,22 +11,31 @@ import sys import warnings import numpy as np +from sklearn.exceptions import ConvergenceWarning from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize +from scipy.spatial.distance import pdist, squareform +from sklearn.base import TransformerMixin from sklearn.decomposition import PCA -from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_X_y -from sklearn.exceptions import ConvergenceWarning -from .base_metric import BaseMetricLearner + +from sklearn.metrics import pairwise_distances +from .base_metric import MahalanobisMixin EPS = np.finfo(float).eps -class MLKR(BaseMetricLearner): - """Metric Learning for Kernel Regression (MLKR)""" +class MLKR(MahalanobisMixin, TransformerMixin): + """Metric Learning for Kernel Regression (MLKR) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. + """ + def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000, - verbose=False): + verbose=False, preprocessor=None): """ Initialize MLKR. @@ -46,16 +55,30 @@ def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000, verbose : bool, optional (default=False) Whether to print progress messages or not. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ self.num_dims = num_dims self.A0 = A0 self.tol = tol self.max_iter = max_iter self.verbose = verbose + super(MLKR, self).__init__(preprocessor) - def _process_inputs(self, X, y): - self.X_, y = check_X_y(X, y, y_numeric=True) - n, d = self.X_.shape + def fit(self, X, y): + """ + Fit MLKR model + + Parameters + ---------- + X : (n x d) array of samples + y : (n) data labels + """ + X, y = self._prepare_inputs(X, y, y_numeric=True, + ensure_min_samples=2) + n, d = X.shape if y.shape[0] != n: raise ValueError('Data and label lengths mismatch: %d != %d' % (n, y.shape[0])) @@ -71,18 +94,6 @@ def _process_inputs(self, X, y): elif A.shape != (m, d): raise ValueError('A0 needs shape (%d,%d) but got %s' % ( m, d, A.shape)) - return self.X_, y, A - - def fit(self, X, y): - """ - Fit MLKR model - - Parameters - ---------- - X : (n x d) array of samples - y : (n) data labels - """ - X, y, A = self._process_inputs(X, y) # Measure the total training time train_time = time.time() @@ -105,9 +116,6 @@ def fit(self, X, y): return self - def transformer(self): - return self.transformer_ - def _loss(self, flatA, X, y): if self.n_iter_ == 0 and self.verbose: diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 02974f7e..596f085f 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -19,19 +19,22 @@ from __future__ import print_function, absolute_import, division import numpy as np from six.moves import xrange -from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_array, check_X_y, assert_all_finite +from sklearn.base import TransformerMixin +from sklearn.utils.validation import check_array, assert_all_finite -from .base_metric import BaseMetricLearner -from .constraints import Constraints +from .base_metric import _PairsClassifierMixin, MahalanobisMixin +from .constraints import Constraints, wrap_pairs from ._util import vector_norm - -class MMC(BaseMetricLearner): +class _BaseMMC(MahalanobisMixin): """Mahalanobis Metric for Clustering (MMC)""" + + _tuple_size = 2 # constraints are pairs + def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, - A0=None, diagonal=False, diagonal_c=1.0, verbose=False): + A0=None, diagonal=False, diagonal_c=1.0, verbose=False, + preprocessor=None): """Initialize MMC. Parameters ---------- @@ -49,6 +52,9 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, metric learning verbose : bool, optional if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. """ self.max_iter = max_iter self.max_proj = max_proj @@ -57,42 +63,15 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, self.diagonal = diagonal self.diagonal_c = diagonal_c self.verbose = verbose + super(_BaseMMC, self).__init__(preprocessor) - def fit(self, X, constraints): - """Learn the MMC model. - - Parameters - ---------- - X : (n x d) data matrix - each row corresponds to a single instance - constraints : 4-tuple of arrays - (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) - dissimilar pairs - """ - constraints = self._process_inputs(X, constraints) - if self.diagonal: - return self._fit_diag(X, constraints) - else: - return self._fit_full(X, constraints) - - def _process_inputs(self, X, constraints): - - self.X_ = X = check_array(X) - - # check to make sure that no two constrained vectors are identical - a,b,c,d = constraints - no_ident = vector_norm(X[a] - X[b]) > 1e-9 - a, b = a[no_ident], b[no_ident] - no_ident = vector_norm(X[c] - X[d]) > 1e-9 - c, d = c[no_ident], d[no_ident] - if len(a) == 0: - raise ValueError('No non-trivial similarity constraints given for MMC.') - if len(c) == 0: - raise ValueError('No non-trivial dissimilarity constraints given for MMC.') + def _fit(self, pairs, y): + pairs, y = self._prepare_inputs(pairs, y, + type_of_inputs='tuples') # init metric if self.A0 is None: - self.A_ = np.identity(X.shape[1]) + self.A_ = np.identity(pairs.shape[2]) if not self.diagonal: # Don't know why division by 10... it's in the original code # and seems to affect the overall scale of the learned metric. @@ -100,9 +79,12 @@ def _process_inputs(self, X, constraints): else: self.A_ = check_array(self.A0) - return a,b,c,d + if self.diagonal: + return self._fit_diag(pairs, y) + else: + return self._fit_full(pairs, y) - def _fit_full(self, X, constraints): + def _fit_full(self, pairs, y): """Learn full metric using MMC. Parameters @@ -113,17 +95,16 @@ def _fit_full(self, X, constraints): (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) dissimilar pairs """ - a,b,c,d = constraints - num_pos = len(a) - num_neg = len(c) - num_samples, num_dim = X.shape + num_dim = pairs.shape[2] error1 = error2 = 1e10 eps = 0.01 # error-bound of iterative projection on C1 and C2 A = self.A_ + pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] + # Create weight vector from similar samples - pos_diff = X[a] - X[b] + pos_diff = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] w = np.einsum('ij,ik->jk', pos_diff, pos_diff).ravel() # `w` is the sum of all outer products of the rows in `pos_diff`. # The above `einsum` is equivalent to the much more inefficient: @@ -140,9 +121,10 @@ def _fit_full(self, X, constraints): cycle = 1 alpha = 0.1 # initial step size along gradient - - grad1 = self._fS1(X, a, b, A) # gradient of similarity constraint function - grad2 = self._fD1(X, c, d, A) # gradient of dissimilarity constraint function + grad1 = self._fS1(pos_pairs, A) # gradient of similarity + # constraint function + grad2 = self._fD1(neg_pairs, A) # gradient of dissimilarity + # constraint function M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 A_old = A.copy() @@ -183,8 +165,8 @@ def _fit_full(self, X, constraints): # max: g(A) >= 1 # here we suppose g(A) = fD(A) = \sum_{I,J \in D} sqrt(d_ij' A d_ij) - obj_previous = self._fD(X, c, d, A_old) # g(A_old) - obj = self._fD(X, c, d, A) # g(A) + obj_previous = self._fD(neg_pairs, A_old) # g(A_old) + obj = self._fD(neg_pairs, A) # g(A) if satisfy and (obj > obj_previous or cycle == 0): @@ -193,8 +175,8 @@ def _fit_full(self, X, constraints): # and update from the current A. alpha *= 1.05 A_old[:] = A - grad2 = self._fS1(X, a, b, A) - grad1 = self._fD1(X, c, d, A) + grad2 = self._fS1(pos_pairs, A) + grad1 = self._fD1(neg_pairs, A) M = self._grad_projection(grad1, grad2) A += alpha * M @@ -222,9 +204,11 @@ def _fit_full(self, X, constraints): print('mmc converged at iter %d, conv = %f' % (cycle, delta)) self.A_[:] = A_old self.n_iter_ = cycle + + self.transformer_ = self.transformer_from_metric(self.A_) return self - def _fit_diag(self, X, constraints): + def _fit_diag(self, pairs, y): """Learn diagonal metric using MMC. Parameters ---------- @@ -234,12 +218,9 @@ def _fit_diag(self, X, constraints): (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) dissimilar pairs """ - a,b,c,d = constraints - num_pos = len(a) - num_neg = len(c) - num_samples, num_dim = X.shape - - s_sum = np.sum((X[a] - X[b]) ** 2, axis=0) + num_dim = pairs.shape[2] + pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] + s_sum = np.sum((pos_pairs[:, 0, :] - pos_pairs[:, 1, :]) ** 2, axis=0) it = 0 error = 1.0 @@ -249,19 +230,20 @@ def _fit_diag(self, X, constraints): while error > self.convergence_threshold and it < self.max_iter: - fD0, fD_1st_d, fD_2nd_d = self._D_constraint(X, c, d, w) + fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 fS_1st_d = s_sum # first derivative of the similarity constraints gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective - step = np.dot(np.linalg.inv(hessian), gradient); + step = np.dot(np.linalg.inv(hessian), gradient) # Newton-Rapshon update # search over optimal lambda lambd = 1 # initial step-size w_tmp = np.maximum(0, w - lambd * step) - obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) + obj = (np.dot(s_sum, w_tmp) + self.diagonal_c * + self._D_objective(neg_pairs, w_tmp)) assert_all_finite(obj) obj_previous = obj + 1 # just to get the while-loop started @@ -271,7 +253,8 @@ def _fit_diag(self, X, constraints): w_previous = w_tmp.copy() lambd /= reduction w_tmp = np.maximum(0, w - lambd * step) - obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) + obj = (np.dot(s_sum, w_tmp) + self.diagonal_c * + self._D_objective(neg_pairs, w_tmp)) inner_it += 1 assert_all_finite(obj) @@ -282,18 +265,20 @@ def _fit_diag(self, X, constraints): it += 1 self.A_ = np.diag(w) + + self.transformer_ = self.transformer_from_metric(self.A_) return self - def _fD(self, X, c, d, A): + def _fD(self, neg_pairs, A): """The value of the dissimilarity constraint function. f = f(\sum_{ij \in D} distance(x_i, x_j)) i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} """ - diff = X[c] - X[d] + diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + 1e-6) - def _fD1(self, X, c, d, A): + def _fD1(self, neg_pairs, A): """The gradient of the dissimilarity constraint function w.r.t. A. For example, let distance by L1 norm: @@ -305,8 +290,8 @@ def _fD1(self, X, c, d, A): df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)}) * 0.5*(\sum_{ij \in D} (1/sqrt{tr(d_ij'*d_ij*A)})*(d_ij'*d_ij)) """ - dim = X.shape[1] - diff = X[c] - X[d] + dim = neg_pairs.shape[2] + diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] # outer products of all rows in `diff` M = np.einsum('ij,ik->ijk', diff, diff) # faster version of: dist = np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) @@ -316,7 +301,7 @@ def _fD1(self, X, c, d, A): sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) - def _fS1(self, X, a, b, A): + def _fS1(self, pos_pairs, A): """The gradient of the similarity constraint function w.r.t. A. f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij' @@ -325,8 +310,8 @@ def _fS1(self, X, a, b, A): Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij """ - dim = X.shape[1] - diff = X[a] - X[b] + dim = pos_pairs.shape[2] + diff = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` def _grad_projection(self, grad1, grad2): @@ -335,15 +320,17 @@ def _grad_projection(self, grad1, grad2): gtemp /= np.linalg.norm(gtemp) return gtemp - def _D_objective(self, X, c, d, w): - return np.log(np.sum(np.sqrt(np.sum(((X[c] - X[d]) ** 2) * w[None,:], axis=1) + 1e-6))) + def _D_objective(self, neg_pairs, w): + return np.log(np.sum(np.sqrt(np.sum(((neg_pairs[:, 0, :] - + neg_pairs[:, 1, :]) ** 2) * + w[None,:], axis=1) + 1e-6))) - def _D_constraint(self, X, c, d, w): + def _D_constraint(self, neg_pairs, w): """Compute the value, 1st derivative, second derivative (Hessian) of a dissimilarity constraint function gF(sum_ij distance(d_ij A d_ij)) where A is a diagonal matrix (in the form of a column vector 'w'). """ - diff = X[c] - X[d] + diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] diff_sq = diff * diff dist = np.sqrt(diff_sq.dot(w)) sum_deri1 = np.einsum('ij,i', diff_sq, 0.5 / np.maximum(dist, 1e-6)) @@ -359,33 +346,52 @@ def _D_constraint(self, X, c, d, w): sum_deri2 / sum_dist - np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) ) - def metric(self): - return self.A_ - def transformer(self): - """Computes the transformation matrix from the Mahalanobis matrix. - L = V.T * w^(-1/2), with A = V*w*V.T being the eigenvector decomposition of A with - the eigenvalues in the diagonal matrix w and the columns of V being the eigenvectors. +class MMC(_BaseMMC, _PairsClassifierMixin): + """Mahalanobis Metric for Clustering (MMC) - The Cholesky decomposition cannot be applied here, since MMC learns only a positive - *semi*-definite Mahalanobis matrix. + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + + def fit(self, pairs, y): + """Learn the MMC model. + + Parameters + ---------- + pairs: array-like, shape=(n_constraints, 2, n_features) or + (n_constraints, 2) + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y: array-like, of shape (n_constraints,) + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. Returns ------- - L : (d x d) matrix + self : object + Returns the instance. """ - if self.diagonal: - return np.sqrt(self.A_) - else: - w, V = np.linalg.eigh(self.A_) - return V.T * np.sqrt(np.maximum(0, w[:,None])) + return self._fit(pairs, y) -class MMC_Supervised(MMC): - """Mahalanobis Metric for Clustering (MMC)""" +class MMC_Supervised(_BaseMMC, TransformerMixin): + """Supervised version of Mahalanobis Metric for Clustering (MMC) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled=np.inf, num_constraints=None, - A0=None, diagonal=False, diagonal_c=1.0, verbose=False): + A0=None, diagonal=False, diagonal_c=1.0, verbose=False, + preprocessor=None): """Initialize the supervised version of `MMC`. `MMC_Supervised` creates pairs of similar sample by taking same class @@ -414,11 +420,14 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, metric learning verbose : bool, optional if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ - MMC.__init__(self, max_iter=max_iter, max_proj=max_proj, - convergence_threshold=convergence_threshold, - A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, - verbose=verbose) + _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, + convergence_threshold=convergence_threshold, + A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, + verbose=verbose, preprocessor=preprocessor) self.num_labeled = num_labeled self.num_constraints = num_constraints @@ -434,7 +443,7 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) @@ -444,4 +453,5 @@ def fit(self, X, y, random_state=np.random): random_state=random_state) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) - return MMC.fit(self, X, pos_neg) + pairs, y = wrap_pairs(X, pos_neg) + return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 2f15c7af..81045287 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -4,29 +4,32 @@ """ from __future__ import absolute_import - import warnings import time import sys import numpy as np from scipy.optimize import minimize from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_X_y from sklearn.exceptions import ConvergenceWarning +from sklearn.utils.fixes import logsumexp +from sklearn.base import TransformerMixin -try: # scipy.misc.logsumexp is deprecated in scipy 1.0.0 - from scipy.special import logsumexp -except ImportError: - from scipy.misc import logsumexp - -from .base_metric import BaseMetricLearner +from .base_metric import MahalanobisMixin EPS = np.finfo(float).eps -class NCA(BaseMetricLearner): - def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated', - tol=None, verbose=False): +class NCA(MahalanobisMixin, TransformerMixin): + """Neighborhood Components Analysis (NCA) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. + """ + + def __init__(self, num_dims=None, max_iter=100, tol=None, verbose=False, + preprocessor=None): """Neighborhood Components Analysis Parameters @@ -38,13 +41,6 @@ def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated', max_iter : int, optional (default=100) Maximum number of iterations done by the optimization algorithm. - learning_rate : Not used - - .. deprecated:: 0.4.0 - `learning_rate` was deprecated in version 0.4.0 and will - be removed in 0.5.0. The current optimization algorithm does not need - to fix a learning rate. - tol : float, optional (default=None) Convergence tolerance for the optimization. @@ -53,24 +49,16 @@ def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated', """ self.num_dims = num_dims self.max_iter = max_iter - self.learning_rate = learning_rate # TODO: remove in v.0.5.0 self.tol = tol self.verbose = verbose - - def transformer(self): - return self.A_ + super(NCA, self).__init__(preprocessor) def fit(self, X, y): """ X: data matrix, (n x d) y: scalar labels, (n) """ - if self.learning_rate != 'deprecated': - warnings.warn('"learning_rate" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5', DeprecationWarning) - - X, labels = check_X_y(X, y) + X, labels = self._prepare_inputs(X, y, ensure_min_samples=2) n, d = X.shape num_dims = self.num_dims if num_dims is None: @@ -98,8 +86,7 @@ def fit(self, X, y): self.n_iter_ = 0 opt_result = minimize(**optimizer_params) - self.X_ = X - self.A_ = opt_result.x.reshape(-1, X.shape[1]) + self.transformer_ = opt_result.x.reshape(-1, X.shape[1]) self.n_iter_ = opt_result.nit # Stop timer diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 327c5002..3380f4c9 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -16,9 +16,9 @@ import warnings from six.moves import xrange from sklearn import decomposition -from sklearn.utils.validation import check_array +from sklearn.base import TransformerMixin -from .base_metric import BaseMetricLearner +from .base_metric import MahalanobisMixin from .constraints import Constraints @@ -35,9 +35,16 @@ def _chunk_mean_centering(data, chunks): return chunk_mask, chunk_data -class RCA(BaseMetricLearner): - """Relevant Components Analysis (RCA)""" - def __init__(self, num_dims=None, pca_comps=None): +class RCA(MahalanobisMixin, TransformerMixin): + """Relevant Components Analysis (RCA) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. + """ + + def __init__(self, num_dims=None, pca_comps=None, preprocessor=None): """Initialize the learner. Parameters @@ -51,29 +58,17 @@ def __init__(self, num_dims=None, pca_comps=None): If ``0 < pca_comps < 1``, it is used as the minimum explained variance ratio. See sklearn.decomposition.PCA for more details. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ self.num_dims = num_dims self.pca_comps = pca_comps + super(RCA, self).__init__(preprocessor) - def transformer(self): - return self.transformer_ - - def _process_data(self, X): - self.X_ = X = check_array(X) - - # PCA projection to remove noise and redundant information. - if self.pca_comps is not None: - pca = decomposition.PCA(n_components=self.pca_comps) - X = pca.fit_transform(X) - M_pca = pca.components_ - else: - X -= X.mean(axis=0) - M_pca = None - - return X, M_pca - - def _check_dimension(self, rank): - d = self.X_.shape[1] + def _check_dimension(self, rank, X): + d = X.shape[1] if rank < d: warnings.warn('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' @@ -92,7 +87,7 @@ def _check_dimension(self, rank): dim = self.num_dims return dim - def fit(self, data, chunks): + def fit(self, X, chunks): """Learn the RCA model. Parameters @@ -103,17 +98,26 @@ def fit(self, data, chunks): When ``chunks[i] == -1``, point i doesn't belong to any chunklet. When ``chunks[i] == j``, point i belongs to chunklet j. """ - data, M_pca = self._process_data(data) + X = self._prepare_inputs(X, ensure_min_samples=2) + + # PCA projection to remove noise and redundant information. + if self.pca_comps is not None: + pca = decomposition.PCA(n_components=self.pca_comps) + X_t = pca.fit_transform(X) + M_pca = pca.components_ + else: + X_t = X - X.mean(axis=0) + M_pca = None chunks = np.asanyarray(chunks, dtype=int) - chunk_mask, chunked_data = _chunk_mean_centering(data, chunks) + chunk_mask, chunked_data = _chunk_mean_centering(X_t, chunks) inner_cov = np.cov(chunked_data, rowvar=0, bias=1) - dim = self._check_dimension(np.linalg.matrix_rank(inner_cov)) + dim = self._check_dimension(np.linalg.matrix_rank(inner_cov), X_t) # Fisher Linear Discriminant projection - if dim < data.shape[1]: - total_cov = np.cov(data[chunk_mask], rowvar=0) + if dim < X_t.shape[1]: + total_cov = np.cov(X_t[chunk_mask], rowvar=0) tmp = np.linalg.lstsq(total_cov, inner_cov)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] @@ -136,8 +140,16 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): + """Supervised version of Relevant Components Analysis (RCA) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The learned linear transformation ``L``. + """ + def __init__(self, num_dims=None, pca_comps=None, num_chunks=100, - chunk_size=2): + chunk_size=2, preprocessor=None): """Initialize the supervised version of `RCA`. `RCA_Supervised` creates chunks of similar points by first sampling a @@ -150,8 +162,12 @@ def __init__(self, num_dims=None, pca_comps=None, num_chunks=100, embedding dimension (default: original dimension of data) num_chunks: int, optional chunk_size: int, optional + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ - RCA.__init__(self, num_dims=num_dims, pca_comps=pca_comps) + RCA.__init__(self, num_dims=num_dims, pca_comps=pca_comps, + preprocessor=preprocessor) self.num_chunks = num_chunks self.chunk_size = chunk_size @@ -166,6 +182,7 @@ def fit(self, X, y, random_state=np.random): y : (n) data labels random_state : a random.seed object to fix the random_state if needed. """ + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, random_state=random_state) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 1746ec7d..1892d176 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -10,18 +10,20 @@ from __future__ import absolute_import import numpy as np -from scipy.sparse.csgraph import laplacian +from sklearn.base import TransformerMixin from sklearn.covariance import graph_lasso from sklearn.utils.extmath import pinvh -from sklearn.utils.validation import check_array -from .base_metric import BaseMetricLearner -from .constraints import Constraints +from .base_metric import MahalanobisMixin, _PairsClassifierMixin +from .constraints import Constraints, wrap_pairs -class SDML(BaseMetricLearner): +class _BaseSDML(MahalanobisMixin): + + _tuple_size = 2 # constraints are pairs + def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - verbose=False): + verbose=False, preprocessor=None): """ Parameters ---------- @@ -36,59 +38,88 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose : bool, optional if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. """ self.balance_param = balance_param self.sparsity_param = sparsity_param self.use_cov = use_cov self.verbose = verbose + super(_BaseSDML, self).__init__(preprocessor) + + def _fit(self, pairs, y): + pairs, y = self._prepare_inputs(pairs, y, + type_of_inputs='tuples') - def _prepare_inputs(self, X, W): - self.X_ = X = check_array(X) - W = check_array(W, accept_sparse=True) # set up prior M if self.use_cov: + X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) self.M_ = pinvh(np.cov(X, rowvar = False)) else: - self.M_ = np.identity(X.shape[1]) - L = laplacian(W, normed=False) - return X.T.dot(L.dot(X)) + self.M_ = np.identity(pairs.shape[2]) + diff = pairs[:, 0] - pairs[:, 1] + loss_matrix = (diff.T * y).dot(diff) + P = self.M_ + self.balance_param * loss_matrix + emp_cov = pinvh(P) + # hack: ensure positive semidefinite + emp_cov = emp_cov.T.dot(emp_cov) + _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) + + self.transformer_ = self.transformer_from_metric(self.M_) + return self + - def metric(self): - return self.M_ +class SDML(_BaseSDML, _PairsClassifierMixin): + """Sparse Distance Metric Learning (SDML) - def fit(self, X, W): + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + + def fit(self, pairs, y): """Learn the SDML model. Parameters ---------- - X : array-like, shape (n, d) - data matrix, where each row corresponds to a single instance - W : array-like, shape (n, n) - connectivity graph, with +1 for positive pairs and -1 for negative + pairs: array-like, shape=(n_constraints, 2, n_features) or + (n_constraints, 2) + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y: array-like, of shape (n_constraints,) + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. Returns ------- self : object Returns the instance. """ - loss_matrix = self._prepare_inputs(X, W) - P = self.M_ + self.balance_param * loss_matrix - emp_cov = pinvh(P) - # hack: ensure positive semidefinite - emp_cov = emp_cov.T.dot(emp_cov) - _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) - return self + return self._fit(pairs, y) -class SDML_Supervised(SDML): +class SDML_Supervised(_BaseSDML, TransformerMixin): + """Supervised version of Sparse Distance Metric Learning (SDML) + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See :meth:`transformer_from_metric`.) + """ + def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - num_labeled=np.inf, num_constraints=None, verbose=False): + num_labeled=np.inf, num_constraints=None, verbose=False, + preprocessor=None): """Initialize the supervised version of `SDML`. `SDML_Supervised` creates pairs of similar sample by taking same class samples, and pairs of dissimilar samples by taking different class samples. It then passes these pairs to `SDML` for training. - Parameters ---------- balance_param : float, optional @@ -105,10 +136,13 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, number of constraints to generate verbose : bool, optional if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. """ - SDML.__init__(self, balance_param=balance_param, - sparsity_param=sparsity_param, use_cov=use_cov, - verbose=verbose) + _BaseSDML.__init__(self, balance_param=balance_param, + sparsity_param=sparsity_param, use_cov=use_cov, + verbose=verbose, preprocessor=preprocessor) self.num_labeled = num_labeled self.num_constraints = num_constraints @@ -130,7 +164,7 @@ def fit(self, X, y, random_state=np.random): self : object Returns the instance. """ - y = check_array(y, ensure_2d=False) + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) @@ -138,5 +172,7 @@ def fit(self, X, y, random_state=np.random): c = Constraints.random_subset(y, self.num_labeled, random_state=random_state) - adj = c.adjacency_matrix(num_constraints, random_state=random_state) - return SDML.fit(self, X, adj) + pos_neg = c.positive_negative_pairs(num_constraints, + random_state=random_state) + pairs, y = wrap_pairs(X, pos_neg) + return _BaseSDML._fit(self, pairs, y) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e5bd071c..74bc25de 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,14 +6,16 @@ from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.datasets import load_iris, make_classification, make_regression -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import assert_array_almost_equal from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y -from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, - RCA_Supervised, MMC_Supervised) +from metric_learn import ( + LMNN, NCA, LFDA, Covariance, MLKR, MMC, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) +# Import this specially for testing. +from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -42,7 +44,7 @@ def test_iris(self): cov = Covariance() cov.fit(self.iris_points) - csep = class_separation(cov.transform(), self.iris_labels) + csep = class_separation(cov.transform(self.iris_points), self.iris_labels) # deterministic result self.assertAlmostEqual(csep, 0.72981476) @@ -52,7 +54,7 @@ def test_iris(self): lsml = LSML_Supervised(num_constraints=200) lsml.fit(self.iris_points, self.iris_labels) - csep = class_separation(lsml.transform(), self.iris_labels) + csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible @@ -61,7 +63,7 @@ def test_iris(self): itml = ITML_Supervised(num_constraints=200) itml.fit(self.iris_points, self.iris_labels) - csep = class_separation(itml.transform(), self.iris_labels) + csep = class_separation(itml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) @@ -72,7 +74,8 @@ def test_iris(self): lmnn = LMNN_cls(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) - csep = class_separation(lmnn.transform(), self.iris_labels) + csep = class_separation(lmnn.transform(self.iris_points), + self.iris_labels) self.assertLess(csep, 0.25) @@ -115,7 +118,7 @@ def test_iris(self): sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.iris_points, self.iris_labels, random_state=rs) - csep = class_separation(sdml.transform(), self.iris_labels) + csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) @@ -126,137 +129,33 @@ def test_iris(self): # Without dimension reduction nca = NCA(max_iter=(100000//n)) nca.fit(self.iris_points, self.iris_labels) - csep = class_separation(nca.transform(), self.iris_labels) + csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # With dimension reduction - nca = NCA(max_iter=(100000//n), num_dims=2, tol=1e-9) + nca = NCA(max_iter=(100000//n), num_dims=2) nca.fit(self.iris_points, self.iris_labels) - csep = class_separation(nca.transform(), self.iris_labels) + csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20) - def test_finite_differences(self): - """Test gradient of loss function - - Assert that the gradient is almost equal to its finite differences - approximation. - """ - # Initialize the transformation `M`, as well as `X` and `y` and `NCA` - X, y = make_classification() - M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1]) - mask = y[:, np.newaxis] == y[np.newaxis, :] - nca = NCA() - nca.n_iter_ = 0 - - def fun(M): - return nca._loss_grad_lbfgs(M, X, mask)[0] - - def grad(M): - return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() - - # compute relative error - rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) - np.testing.assert_almost_equal(rel_diff, 0., decimal=6) - - def test_simple_example(self): - """Test on a simple example. - - Puts four points in the input space where the opposite labels points are - next to each other. After transform the same labels points should be next - to each other. - - """ - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=2,) - nca.fit(X, y) - Xansformed = nca.transform(X) - np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], - np.array([2, 3, 0, 1])) - - def test_deprecation(self): - # test that the right deprecation message is thrown. - # TODO: remove in v.0.5 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=2, learning_rate=0.01) - msg = ('"learning_rate" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5') - assert_warns_message(DeprecationWarning, msg, nca.fit, X, y) - - def test_singleton_class(self): - X = self.iris_points - y = self.iris_labels - - # one singleton class: test fitting works - singleton_class = 1 - ind_singleton, = np.where(y == singleton_class) - y[ind_singleton] = 2 - y[ind_singleton[0]] = singleton_class - - nca = NCA(max_iter=30) - nca.fit(X, y) - - # One non-singleton class: test fitting works - ind_1, = np.where(y == 1) - ind_2, = np.where(y == 2) - y[ind_1] = 0 - y[ind_1[0]] = 1 - y[ind_2] = 0 - y[ind_2[0]] = 2 - - nca = NCA(max_iter=30) - nca.fit(X, y) - - # Only singleton classes: test fitting does nothing (the gradient - # must be null in this case, so the final matrix must stay like - # the initialization) - ind_0, = np.where(y == 0) - ind_1, = np.where(y == 1) - ind_2, = np.where(y == 2) - X = X[[ind_0[0], ind_1[0], ind_2[0]]] - y = y[[ind_0[0], ind_1[0], ind_2[0]]] - - EPS = np.finfo(float).eps - A = np.zeros((X.shape[1], X.shape[1])) - np.fill_diagonal(A, - 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, num_dims=X.shape[1]) - nca.fit(X, y) - assert_array_equal(nca.A_, A) - - def test_one_class(self): - # if there is only one class the gradient is null, so the final matrix - # must stay like the initialization - X = self.iris_points[self.iris_labels == 0] - y = self.iris_labels[self.iris_labels == 0] - EPS = np.finfo(float).eps - A = np.zeros((X.shape[1], X.shape[1])) - np.fill_diagonal(A, - 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, num_dims=X.shape[1]) - nca.fit(X, y) - assert_array_equal(nca.A_, A) - class TestLFDA(MetricTestCase): def test_iris(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.iris_points, self.iris_labels) - csep = class_separation(lfda.transform(), self.iris_labels) + csep = class_separation(lfda.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Sanity checks for learned matrices. self.assertEqual(lfda.metric().shape, (4, 4)) - self.assertEqual(lfda.transformer().shape, (2, 4)) + self.assertEqual(lfda.transformer_.shape, (2, 4)) class TestRCA(MetricTestCase): def test_iris(self): rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) - csep = class_separation(rca.transform(), self.iris_labels) + csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) def test_feature_null_variance(self): @@ -265,14 +164,14 @@ def test_feature_null_variance(self): # Apply PCA with the number of components rca = RCA_Supervised(num_dims=2, pca_comps=3, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(), self.iris_labels) + csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) # Apply PCA with the minimum variance ratio rca = RCA_Supervised(num_dims=2, pca_comps=0.95, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(), self.iris_labels) + csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) @@ -280,7 +179,7 @@ class TestMLKR(MetricTestCase): def test_iris(self): mlkr = MLKR() mlkr.fit(self.iris_points, self.iris_labels) - csep = class_separation(mlkr.transform(), self.iris_labels) + csep = class_separation(mlkr.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) def test_finite_differences(self): @@ -318,30 +217,29 @@ def test_iris(self): # Full metric mmc = MMC(convergence_threshold=0.01) - mmc.fit(self.iris_points, [a,b,c,d]) - expected = [[ 0.000514, 0.000868, -0.001195, -0.001703], - [ 0.000868, 0.001468, -0.002021, -0.002879], - [-0.001195, -0.002021, 0.002782, 0.003964], - [-0.001703, -0.002879, 0.003964, 0.005648]] + mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) + expected = [[+0.000514, +0.000868, -0.001195, -0.001703], + [+0.000868, +0.001468, -0.002021, -0.002879], + [-0.001195, -0.002021, +0.002782, +0.003964], + [-0.001703, -0.002879, +0.003964, +0.005648]] assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) - mmc.fit(self.iris_points, [a,b,c,d]) + mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) expected = [0, 0, 1.210220, 1.228596] - assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) - csep = class_separation(mmc.transform(), self.iris_labels) + csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) - csep = class_separation(mmc.transform(), self.iris_labels) + csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 4b132af4..c9c8fb57 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -5,74 +5,80 @@ class TestStringRepr(unittest.TestCase): def test_covariance(self): - self.assertEqual(str(metric_learn.Covariance()), "Covariance()") + self.assertEqual(str(metric_learn.Covariance()), + "Covariance(preprocessor=None)") def test_lmnn(self): self.assertRegexpMatches( str(metric_learn.LMNN()), r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " - r"max_iter=1000,\n min_iter=50, regularization=0.5, " - r"use_pca=True, verbose=False\)") + r"max_iter=1000,\n min_iter=50, preprocessor=None, " + r"regularization=0.5, use_pca=True,\n verbose=False\)") def test_nca(self): self.assertEqual(str(metric_learn.NCA()), - ("NCA(learning_rate='deprecated', max_iter=100, " - "num_dims=None, tol=None,\n verbose=False)")) + "NCA(max_iter=100, num_dims=None, preprocessor=None, " + "tol=None, verbose=False)") def test_lfda(self): self.assertEqual(str(metric_learn.LFDA()), - "LFDA(embedding_type='weighted', k=None, num_dims=None)") + "LFDA(embedding_type='weighted', k=None, num_dims=None, " + "preprocessor=None)") def test_itml(self): self.assertEqual(str(metric_learn.ITML()), """ ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000, - verbose=False) + preprocessor=None, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000, num_constraints=None, num_labeled=inf, - verbose=False) + preprocessor=None, verbose=False) """.strip('\n')) def test_lsml(self): self.assertEqual( str(metric_learn.LSML()), - "LSML(max_iter=1000, prior=None, tol=0.001, verbose=False)") + "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, " + "verbose=False)") self.assertEqual(str(metric_learn.LSML_Supervised()), """ LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled=inf, - prior=None, tol=0.001, verbose=False, weights=None) + preprocessor=None, prior=None, tol=0.001, verbose=False, + weights=None) """.strip('\n')) def test_sdml(self): self.assertEqual(str(metric_learn.SDML()), - "SDML(balance_param=0.5, sparsity_param=0.01, " - "use_cov=True, verbose=False)") + "SDML(balance_param=0.5, preprocessor=None, " + "sparsity_param=0.01, use_cov=True,\n verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, - sparsity_param=0.01, use_cov=True, verbose=False) + preprocessor=None, sparsity_param=0.01, use_cov=True, + verbose=False) """.strip('\n')) def test_rca(self): self.assertEqual(str(metric_learn.RCA()), - "RCA(num_dims=None, pca_comps=None)") + "RCA(num_dims=None, pca_comps=None, preprocessor=None)") self.assertEqual(str(metric_learn.RCA_Supervised()), "RCA_Supervised(chunk_size=2, num_chunks=100, " - "num_dims=None, pca_comps=None)") + "num_dims=None, pca_comps=None,\n " + "preprocessor=None)") def test_mlkr(self): self.assertEqual(str(metric_learn.MLKR()), - "MLKR(A0=None, max_iter=1000, num_dims=None, tol=None, " - "verbose=False)") + "MLKR(A0=None, max_iter=1000, num_dims=None, " + "preprocessor=None, tol=None,\n verbose=False)") def test_mmc(self): self.assertEqual(str(metric_learn.MMC()), """ MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, - max_iter=100, max_proj=10000, verbose=False) + max_iter=100, max_proj=10000, preprocessor=None, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.MMC_Supervised()), """ MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, - num_labeled=inf, verbose=False) + num_labeled=inf, preprocessor=None, verbose=False) """.strip('\n')) if __name__ == '__main__': diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 707815ec..118f6b90 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -19,7 +19,7 @@ def setUpClass(self): def test_cov(self): cov = Covariance() cov.fit(self.X) - res_1 = cov.transform() + res_1 = cov.transform(self.X) cov = Covariance() res_2 = cov.fit_transform(self.X) @@ -30,7 +30,7 @@ def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) lsml.fit(self.X, self.y, random_state=seed) - res_1 = lsml.transform() + res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) @@ -42,7 +42,7 @@ def test_itml_supervised(self): seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) itml.fit(self.X, self.y, random_state=seed) - res_1 = itml.transform() + res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) @@ -53,7 +53,7 @@ def test_itml_supervised(self): def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) - res_1 = lmnn.transform() + res_1 = lmnn.transform(self.X) lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) @@ -64,7 +64,7 @@ def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.X, self.y, random_state=seed) - res_1 = sdml.transform() + res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) @@ -74,11 +74,11 @@ def test_sdml_supervised(self): def test_nca(self): n = self.X.shape[0] - nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) - res_1 = nca.transform() + res_1 = nca.transform(self.X) - nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca = NCA(max_iter=(100000//n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -86,7 +86,7 @@ def test_nca(self): def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) - res_1 = lfda.transform() + res_1 = lfda.transform(self.X) lfda = LFDA(k=2, num_dims=2) res_2 = lfda.fit_transform(self.X, self.y) @@ -100,7 +100,7 @@ def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) - res_1 = rca.transform() + res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) @@ -111,7 +111,7 @@ def test_rca_supervised(self): def test_mlkr(self): mlkr = MLKR(num_dims=2) mlkr.fit(self.X, self.y) - res_1 = mlkr.transform() + res_1 = mlkr.transform(self.X) mlkr = MLKR(num_dims=2) res_2 = mlkr.fit_transform(self.X, self.y) @@ -122,7 +122,7 @@ def test_mmc_supervised(self): seed = np.random.RandomState(1234) mmc = MMC_Supervised(num_constraints=200) mmc.fit(self.X, self.y, random_state=seed) - res_1 = mmc.transform() + res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) mmc = MMC_Supervised(num_constraints=200) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py new file mode 100644 index 00000000..0d834f10 --- /dev/null +++ b/test/test_mahalanobis_mixin.py @@ -0,0 +1,169 @@ +from itertools import product + +import pytest +import numpy as np +from numpy.testing import assert_array_almost_equal +from scipy.spatial.distance import pdist, squareform +from sklearn import clone +from sklearn.utils import check_random_state +from sklearn.utils.testing import set_random_state + +from metric_learn._util import make_context + +from test.test_utils import ids_metric_learners, metric_learners + +RNG = check_random_state(0) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_pairwise(estimator, build_dataset): + # Computing pairwise scores should return a euclidean distance matrix. + input_data, labels, _, X = build_dataset() + n_samples = 20 + X = X[:n_samples] + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + + pairwise = model.score_pairs(np.array(list(product(X, X))))\ + .reshape(n_samples, n_samples) + + check_is_distance_matrix(pairwise) + + # a necessary condition for euclidean distance matrices: (see + # https://en.wikipedia.org/wiki/Euclidean_distance_matrix) + assert np.linalg.matrix_rank(pairwise**2) <= min(X.shape) + 2 + + # assert that this distance is coherent with pdist on embeddings + assert_array_almost_equal(squareform(pairwise), pdist(model.transform(X))) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_toy_example(estimator, build_dataset): + # Checks that score_pairs works on a toy example + input_data, labels, _, X = build_dataset() + n_samples = 20 + X = X[:n_samples] + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + pairs = np.stack([X[:10], X[10:20]], axis=1) + embedded_pairs = pairs.dot(model.transformer_.T) + distances = np.sqrt(np.sum((embedded_pairs[:, 1] - + embedded_pairs[:, 0])**2, + axis=-1)) + assert_array_almost_equal(model.score_pairs(pairs), distances) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_finite(estimator, build_dataset): + # tests that the score is finite + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + pairs = np.array(list(product(X, X))) + assert np.isfinite(model.score_pairs(pairs)).all() + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_dim(estimator, build_dataset): + # scoring of 3D arrays should return 1D array (several tuples), + # and scoring of 2D arrays (one tuple) should return an error (like + # scikit-learn's error when scoring 1D arrays) + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + tuples = np.array(list(product(X, X))) + assert model.score_pairs(tuples).shape == (tuples.shape[0],) + context = make_context(estimator) + msg = ("3D array of formed tuples expected{}. Found 2D array " + "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n" + .format(context, tuples[1])) + with pytest.raises(ValueError) as raised_error: + model.score_pairs(tuples[1]) + assert str(raised_error.value) == msg + + +def check_is_distance_matrix(pairwise): + assert (pairwise >= 0).all() # positivity + assert np.array_equal(pairwise, pairwise.T) # symmetry + assert (pairwise.diagonal() == 0).all() # identity + # triangular inequality + tol = 1e-15 + assert (pairwise <= pairwise[:, :, np.newaxis] + + pairwise[:, np.newaxis, :] + tol).all() + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_embed_toy_example(estimator, build_dataset): + # Checks that embed works on a toy example + input_data, labels, _, X = build_dataset() + n_samples = 20 + X = X[:n_samples] + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + embedded_points = X.dot(model.transformer_.T) + assert_array_almost_equal(model.transform(X), embedded_points) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_embed_dim(estimator, build_dataset): + # Checks that the the dimension of the output space is as expected + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + assert model.transform(X).shape == X.shape + + # assert that ValueError is thrown if input shape is 1D + context = make_context(estimator) + err_msg = ("2D array of formed points expected{}. Found 1D array " + "instead:\ninput={}. Reshape your data and/or use a " + "preprocessor.\n".format(context, X[0])) + with pytest.raises(ValueError) as raised_error: + model.score_pairs(model.transform(X[0, :])) + assert str(raised_error.value) == err_msg + # we test that the shape is also OK when doing dimensionality reduction + if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}: + model.set_params(num_dims=2) + model.fit(input_data, labels) + assert model.transform(X).shape == (X.shape[0], 2) + # assert that ValueError is thrown if input shape is 1D + with pytest.raises(ValueError) as raised_error: + model.transform(model.transform(X[0, :])) + assert str(raised_error.value) == err_msg + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_embed_finite(estimator, build_dataset): + # Checks that embed returns vectors with finite values + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + assert np.isfinite(model.transform(X)).all() + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_embed_is_linear(estimator, build_dataset): + # Checks that the embedding is linear + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + assert_array_almost_equal(model.transform(X[:10] + X[10:20]), + model.transform(X[:10]) + + model.transform(X[10:20])) + assert_array_almost_equal(model.transform(5 * X[:10]), + 5 * model.transform(X[:10])) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index f1e1a09d..d9dce685 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -1,10 +1,23 @@ -import numpy as np +import pytest import unittest from sklearn.utils.estimator_checks import check_estimator +from sklearn.base import TransformerMixin +from sklearn.pipeline import make_pipeline +from sklearn.utils import check_random_state +from sklearn.utils.estimator_checks import is_public_parameter +from sklearn.utils.testing import (assert_allclose_dense_sparse, + set_random_state) -from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) +from metric_learn import (Covariance, LFDA, LMNN, MLKR, NCA, + ITML_Supervised, LSML_Supervised, + MMC_Supervised, RCA_Supervised, SDML_Supervised) +from sklearn import clone +import numpy as np +from sklearn.model_selection import (cross_val_score, cross_val_predict, + train_test_split, KFold) +from sklearn.utils.testing import _get_args +from test.test_utils import (metric_learners, ids_metric_learners, + mock_preprocessor) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -68,5 +81,263 @@ def test_mmc(self): # check_estimator(RCA_Supervised) +RNG = check_random_state(0) + + +# ---------------------- Test scikit-learn compatibility ---------------------- + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_cross_validation_is_finite(estimator, build_dataset, + with_preprocessor): + """Tests that validation on metric-learn estimators returns something finite + """ + if any(hasattr(estimator, method) for method in ["predict", "score"]): + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + if hasattr(estimator, "score"): + assert np.isfinite(cross_val_score(estimator, input_data, labels)).all() + if hasattr(estimator, "predict"): + assert np.isfinite(cross_val_predict(estimator, + input_data, labels)).all() + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_cross_validation_manual_vs_scikit(estimator, build_dataset, + with_preprocessor): + """Tests that if we make a manual cross-validation, the result will be the + same as scikit-learn's cross-validation (some code for generating the + folds is taken from scikit-learn). + """ + if any(hasattr(estimator, method) for method in ["predict", "score"]): + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + n_splits = 3 + kfold = KFold(shuffle=False, n_splits=n_splits) + n_samples = input_data.shape[0] + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes[:n_samples % n_splits] += 1 + current = 0 + scores, predictions = [], np.zeros(input_data.shape[0]) + for fold_size in fold_sizes: + start, stop = current, current + fold_size + current = stop + test_slice = slice(start, stop) + train_mask = np.ones(input_data.shape[0], bool) + train_mask[test_slice] = False + y_train, y_test = labels[train_mask], labels[test_slice] + estimator.fit(input_data[train_mask], y_train) + if hasattr(estimator, "score"): + scores.append(estimator.score(input_data[test_slice], y_test)) + if hasattr(estimator, "predict"): + predictions[test_slice] = estimator.predict(input_data[test_slice]) + if hasattr(estimator, "score"): + assert all(scores == cross_val_score(estimator, input_data, labels, + cv=kfold)) + if hasattr(estimator, "predict"): + assert all(predictions == cross_val_predict(estimator, input_data, + labels, + cv=kfold)) + + +def check_score(estimator, tuples, y): + if hasattr(estimator, "score"): + score = estimator.score(tuples, y) + assert np.isfinite(score) + + +def check_predict(estimator, tuples): + if hasattr(estimator, "predict"): + y_predicted = estimator.predict(tuples) + assert len(y_predicted), len(tuples) + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_simple_estimator(estimator, build_dataset, with_preprocessor): + """Tests that fit, predict and scoring works. + """ + if any(hasattr(estimator, method) for method in ["predict", "score"]): + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + (tuples_train, tuples_test, y_train, + y_test) = train_test_split(input_data, labels, random_state=RNG) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + + estimator.fit(tuples_train, y_train) + check_score(estimator, tuples_test, y_test) + check_predict(estimator, tuples_test) + + +@pytest.mark.parametrize('estimator', [est[0] for est in metric_learners], + ids=ids_metric_learners) +@pytest.mark.parametrize('preprocessor', [None, mock_preprocessor]) +def test_no_attributes_set_in_init(estimator, preprocessor): + """Check setting during init. Adapted from scikit-learn.""" + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + if hasattr(type(estimator).__init__, "deprecated_original"): + return + + init_params = _get_args(type(estimator).__init__) + parents_init_params = [param for params_parent in + (_get_args(parent) for parent in + type(estimator).__mro__) + for param in params_parent] + + # Test for no setting apart from parameters during init + invalid_attr = (set(vars(estimator)) - set(init_params) - + set(parents_init_params)) + assert not invalid_attr, \ + ("Estimator %s should not set any attribute apart" + " from parameters during init. Found attributes %s." + % (type(estimator).__name__, sorted(invalid_attr))) + # Ensure that each parameter is set in init + invalid_attr = (set(init_params) - set(vars(estimator)) - + set(["self"])) + assert not invalid_attr, \ + ("Estimator %s should store all parameters" + " as an attribute during init. Did not find " + "attributes %s." % (type(estimator).__name__, sorted(invalid_attr))) + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_estimators_fit_returns_self(estimator, build_dataset, + with_preprocessor): + """Check if self is returned when calling fit""" + # Adapted from scikit-learn + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + assert estimator.fit(input_data, labels) is estimator + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_pipeline_consistency(estimator, build_dataset, + with_preprocessor): + # Adapted from scikit learn + # check that make_pipeline(est) gives same score as est + input_data, y, preprocessor, _ = build_dataset(with_preprocessor) + + def make_random_state(estimator, in_pipeline): + rs = {} + name_estimator = estimator.__class__.__name__ + if name_estimator[-11:] == '_Supervised': + name_param = 'random_state' + if in_pipeline: + name_param = name_estimator.lower() + '__' + name_param + rs[name_param] = check_random_state(0) + return rs + + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + pipeline = make_pipeline(estimator) + estimator.fit(input_data, y, **make_random_state(estimator, False)) + pipeline.fit(input_data, y, **make_random_state(estimator, True)) + + if hasattr(estimator, 'score'): + result = estimator.score(input_data, y) + result_pipe = pipeline.score(input_data, y) + assert_allclose_dense_sparse(result, result_pipe) + + if hasattr(estimator, 'predict'): + result = estimator.predict(input_data) + result_pipe = pipeline.predict(input_data) + assert_allclose_dense_sparse(result, result_pipe) + + if issubclass(estimator.__class__, TransformerMixin): + if hasattr(estimator, 'transform'): + result = estimator.transform(input_data) + result_pipe = pipeline.transform(input_data) + assert_allclose_dense_sparse(result, result_pipe) + + +@pytest.mark.parametrize('with_preprocessor',[True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_dict_unchanged(estimator, build_dataset, with_preprocessor): + # Adapted from scikit-learn + (input_data, labels, preprocessor, + to_transform) = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + if hasattr(estimator, "num_dims"): + estimator.num_dims = 1 + estimator.fit(input_data, labels) + + def check_dict(): + assert estimator.__dict__ == dict_before, ( + "Estimator changes __dict__ during %s" % method) + for method in ["predict", "decision_function", "predict_proba"]: + if hasattr(estimator, method): + dict_before = estimator.__dict__.copy() + getattr(estimator, method)(input_data) + check_dict() + if hasattr(estimator, "transform"): + dict_before = estimator.__dict__.copy() + # we transform only dataset of points + estimator.transform(to_transform) + check_dict() + + +@pytest.mark.parametrize('with_preprocessor',[True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_dont_overwrite_parameters(estimator, build_dataset, + with_preprocessor): + # Adapted from scikit-learn + # check that fit method only changes or sets private attributes + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + if hasattr(estimator, "num_dims"): + estimator.num_dims = 1 + dict_before_fit = estimator.__dict__.copy() + + estimator.fit(input_data, labels) + dict_after_fit = estimator.__dict__ + + public_keys_after_fit = [key for key in dict_after_fit.keys() + if is_public_parameter(key)] + + attrs_added_by_fit = [key for key in public_keys_after_fit + if key not in dict_before_fit.keys()] + + # check that fit doesn't add any public attribute + assert not attrs_added_by_fit, ( + "Estimator adds public attribute(s) during" + " the fit method." + " Estimators are only allowed to add private " + "attributes" + " either started with _ or ended" + " with _ but %s added" % ', '.join(attrs_added_by_fit)) + + # check that fit doesn't change any public attribute + attrs_changed_by_fit = [key for key in public_keys_after_fit + if (dict_before_fit[key] + is not dict_after_fit[key])] + + assert not attrs_changed_by_fit, ( + "Estimator changes public attribute(s) during" + " the fit method. Estimators are only allowed" + " to change attributes started" + " or ended with _, but" + " %s changed" % ', '.join(attrs_changed_by_fit)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index e027d176..ab38d65e 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -19,60 +19,60 @@ def setUpClass(self): def test_cov(self): cov = Covariance() cov.fit(self.X) - L = cov.transformer() + L = cov.transformer_ assert_array_almost_equal(L.T.dot(L), cov.metric()) def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) lsml.fit(self.X, self.y, random_state=seed) - L = lsml.transformer() + L = lsml.transformer_ assert_array_almost_equal(L.T.dot(L), lsml.metric()) def test_itml_supervised(self): seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) itml.fit(self.X, self.y, random_state=seed) - L = itml.transformer() + L = itml.transformer_ assert_array_almost_equal(L.T.dot(L), itml.metric()) def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) - L = lmnn.transformer() + L = lmnn.transformer_ assert_array_almost_equal(L.T.dot(L), lmnn.metric()) def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.X, self.y, random_state=seed) - L = sdml.transformer() + L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.metric()) def test_nca(self): n = self.X.shape[0] - nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) - L = nca.transformer() + L = nca.transformer_ assert_array_almost_equal(L.T.dot(L), nca.metric()) def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) - L = lfda.transformer() + L = lfda.transformer_ assert_array_almost_equal(L.T.dot(L), lfda.metric()) def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) - L = rca.transformer() + L = rca.transformer_ assert_array_almost_equal(L.T.dot(L), rca.metric()) def test_mlkr(self): mlkr = MLKR(num_dims=2) mlkr.fit(self.X, self.y) - L = mlkr.transformer() + L = mlkr.transformer_ assert_array_almost_equal(L.T.dot(L), mlkr.metric()) diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 00000000..de59e9ff --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,1013 @@ +import pytest +from collections import namedtuple +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.exceptions import DataConversionWarning +from sklearn.utils import check_random_state, shuffle +from sklearn.utils.testing import set_random_state +from sklearn.base import clone +from metric_learn._util import (check_input, make_context, preprocess_tuples, + make_name, preprocess_points, + check_collapsed_pairs) +from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, + LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, + MMC_Supervised, RCA_Supervised, SDML_Supervised, + Constraints) +from metric_learn.base_metric import (ArrayIndexer, MahalanobisMixin, + _PairsClassifierMixin, + _QuadrupletsClassifierMixin) +from metric_learn.exceptions import PreprocessorError +from sklearn.datasets import make_regression, make_blobs, load_iris + + +SEED = 42 +RNG = check_random_state(SEED) + +Dataset = namedtuple('Dataset', ('data target preprocessor to_transform')) +# Data and target are what we will fit on. Preprocessor is the additional +# data if we use a preprocessor (which should be the default ArrayIndexer), +# and to_transform is some additional data that we would want to transform + + +@pytest.fixture +def build_classification(with_preprocessor=False): + """Basic array for testing when using a preprocessor""" + X, y = shuffle(*make_blobs(random_state=SEED), + random_state=SEED) + indices = shuffle(np.arange(X.shape[0]), random_state=SEED).astype(int) + if with_preprocessor: + return Dataset(indices, y[indices], X, indices) + else: + return Dataset(X[indices], y[indices], None, X[indices]) + + +@pytest.fixture +def build_regression(with_preprocessor=False): + """Basic array for testing when using a preprocessor""" + X, y = shuffle(*make_regression(n_samples=100, n_features=5, + random_state=SEED), + random_state=SEED) + indices = shuffle(np.arange(X.shape[0]), random_state=SEED).astype(int) + if with_preprocessor: + return Dataset(indices, y[indices], X, indices) + else: + return Dataset(X[indices], y[indices], None, X[indices]) + + +def build_data(): + input_data, labels = load_iris(return_X_y=True) + X, y = shuffle(input_data, labels, random_state=SEED) + num_constraints = 50 + constraints = ( + Constraints.random_subset(y, random_state=check_random_state(SEED))) + pairs = ( + constraints + .positive_negative_pairs(num_constraints, same_length=True, + random_state=check_random_state(SEED))) + return X, pairs + + +def build_pairs(with_preprocessor=False): + # builds a toy pairs problem + X, indices = build_data() + c = np.vstack([np.column_stack(indices[:2]), np.column_stack(indices[2:])]) + target = np.concatenate([np.ones(indices[0].shape[0]), + - np.ones(indices[0].shape[0])]) + c, target = shuffle(c, target, random_state=SEED) + if with_preprocessor: + # if preprocessor, we build a 2D array of pairs of indices + return Dataset(c, target, X, c[:, 0]) + else: + # if not, we build a 3D array of pairs of samples + return Dataset(X[c], target, None, X[c[:, 0]]) + + +def build_quadruplets(with_preprocessor=False): + # builds a toy quadruplets problem + X, indices = build_data() + c = np.column_stack(indices) + target = np.ones(c.shape[0]) # quadruplets targets are not used + # anyways + c, target = shuffle(c, target, random_state=SEED) + if with_preprocessor: + # if preprocessor, we build a 2D array of quadruplets of indices + return Dataset(c, target, X, c[:, 0]) + else: + # if not, we build a 3D array of quadruplets of samples + return Dataset(X[c], target, None, X[c[:, 0]]) + + +quadruplets_learners = [(LSML(), build_quadruplets)] +ids_quadruplets_learners = list(map(lambda x: x.__class__.__name__, + [learner for (learner, _) in + quadruplets_learners])) + +pairs_learners = [(ITML(), build_pairs), + (MMC(max_iter=2), build_pairs), # max_iter=2 for faster + (SDML(), build_pairs), + ] +ids_pairs_learners = list(map(lambda x: x.__class__.__name__, + [learner for (learner, _) in + pairs_learners])) + +classifiers = [(Covariance(), build_classification), + (LFDA(), build_classification), + (LMNN(), build_classification), + (NCA(), build_classification), + (RCA(), build_classification), + (ITML_Supervised(max_iter=5), build_classification), + (LSML_Supervised(), build_classification), + (MMC_Supervised(max_iter=5), build_classification), + (RCA_Supervised(num_chunks=10), build_classification), + (SDML_Supervised(), build_classification) + ] +ids_classifiers = list(map(lambda x: x.__class__.__name__, + [learner for (learner, _) in + classifiers])) + +regressors = [(MLKR(), build_regression)] +ids_regressors = list(map(lambda x: x.__class__.__name__, + [learner for (learner, _) in regressors])) + +WeaklySupervisedClasses = (_PairsClassifierMixin, + _QuadrupletsClassifierMixin) + +tuples_learners = pairs_learners + quadruplets_learners +ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners + +supervised_learners = classifiers + regressors +ids_supervised_learners = ids_classifiers + ids_regressors + +metric_learners = tuples_learners + supervised_learners +ids_metric_learners = ids_tuples_learners + ids_supervised_learners + + +def mock_preprocessor(indices): + """A preprocessor for testing purposes that returns an all ones 3D array + """ + return np.ones((indices.shape[0], 3)) + + +@pytest.mark.parametrize('type_of_inputs', ['other', 'tuple', 'classics', 2, + int, NCA()]) +def test_check_input_invalid_type_of_inputs(type_of_inputs): + """Tests that an invalid type of inputs in check_inputs raises an error.""" + with pytest.raises(ValueError) as e: + check_input([[0.2, 2.1], [0.2, .8]], type_of_inputs=type_of_inputs) + msg = ("Unknown value {} for type_of_inputs. Valid values are " + "'classic' or 'tuples'.".format(type_of_inputs)) + assert str(e.value) == msg + + +# ---------------- test check_input with 'tuples' type_of_input' ------------ + + +@pytest.fixture +def tuples_prep(): + """Basic array for testing when using a preprocessor""" + tuples = np.array([[1, 2], + [2, 3]]) + return tuples + + +@pytest.fixture +def tuples_no_prep(): + """Basic array for testing when using no preprocessor""" + tuples = np.array([[[1., 2.3], [2.3, 5.3]], + [[2.3, 4.3], [0.2, 0.4]]]) + return tuples + + +@pytest.mark.parametrize('estimator, expected', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +def test_make_context(estimator, expected): + """test the make_name function""" + assert make_context(estimator) == expected + + +@pytest.mark.parametrize('estimator, expected', + [(NCA(), "NCA"), ('NCA', "NCA"), (None, None)]) +def test_make_name(estimator, expected): + """test the make_name function""" + assert make_name(estimator) == expected + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('load_tuples, preprocessor', + [(tuples_prep, mock_preprocessor), + (tuples_no_prep, None), + (tuples_no_prep, mock_preprocessor)]) +def test_check_tuples_invalid_tuple_size(estimator, context, load_tuples, + preprocessor): + """Checks that the exception are raised if tuple_size is not the one + expected""" + tuples = load_tuples() + preprocessed_tuples = (preprocess_tuples(tuples, preprocessor) + if (preprocessor is not None and + tuples.ndim == 2) else tuples) + expected_msg = ("Tuples of 3 element(s) expected{}. Got tuples of 2 " + "element(s) instead (shape={}):\ninput={}.\n" + .format(context, preprocessed_tuples.shape, + preprocessed_tuples)) + with pytest.raises(ValueError) as raised_error: + check_input(tuples, type_of_inputs='tuples', tuple_size=3, + preprocessor=preprocessor, estimator=estimator) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('tuples, found, expected, preprocessor', + [(5, '0', '2D array of indicators or 3D array of ' + 'formed tuples', mock_preprocessor), + (5, '0', '3D array of formed tuples', None), + ([1, 2], '1', '2D array of indicators or 3D array ' + 'of formed tuples', mock_preprocessor), + ([1, 2], '1', '3D array of formed tuples', None), + ([[[[5]]]], '4', '2D array of indicators or 3D array' + ' of formed tuples', + mock_preprocessor), + ([[[[5]]]], '4', '3D array of formed tuples', None), + ([[1], [3]], '2', '3D array of formed ' + 'tuples', None)]) +def test_check_tuples_invalid_shape(estimator, context, tuples, found, + expected, preprocessor): + """Checks that a value error with the appropriate message is raised if + shape is invalid (not 2D with preprocessor or 3D with no preprocessor) + """ + tuples = np.array(tuples) + msg = ("{} expected{}{}. Found {}D array instead:\ninput={}. Reshape your " + "data{}.\n" + .format(expected, context, ' when using a preprocessor' + if preprocessor else '', found, tuples, + ' and/or use a preprocessor' if + (not preprocessor and tuples.ndim == 2) else '')) + with pytest.raises(ValueError) as raised_error: + check_input(tuples, type_of_inputs='tuples', + preprocessor=preprocessor, ensure_min_samples=0, + estimator=estimator) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +def test_check_tuples_invalid_n_features(estimator, context, tuples_no_prep): + """Checks that the right warning is printed if not enough features + Here we only test if no preprocessor (otherwise we don't ensure this) + """ + msg = ("Found array with 2 feature(s) (shape={}) while" + " a minimum of 3 is required{}.".format(tuples_no_prep.shape, + context)) + with pytest.raises(ValueError) as raised_error: + check_input(tuples_no_prep, type_of_inputs='tuples', + preprocessor=None, ensure_min_features=3, + estimator=estimator) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('load_tuples, preprocessor', + [(tuples_prep, mock_preprocessor), + (tuples_no_prep, None), + (tuples_no_prep, mock_preprocessor)]) +def test_check_tuples_invalid_n_samples(estimator, context, load_tuples, + preprocessor): + """Checks that the right warning is printed if n_samples is too small""" + tuples = load_tuples() + msg = ("Found array with 2 sample(s) (shape={}) while a minimum of 3 " + "is required{}.".format((preprocess_tuples(tuples, preprocessor) + if (preprocessor is not None and + tuples.ndim == 2) else tuples).shape, + context)) + with pytest.raises(ValueError) as raised_error: + check_input(tuples, type_of_inputs='tuples', + preprocessor=preprocessor, + ensure_min_samples=3, estimator=estimator) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('load_tuples, preprocessor', + [(tuples_prep, mock_preprocessor), + (tuples_no_prep, None), + (tuples_no_prep, mock_preprocessor)]) +def test_check_tuples_invalid_dtype_convertible(estimator, context, + load_tuples, preprocessor): + """Checks that a warning is raised if a convertible input is converted to + float""" + tuples = load_tuples().astype(object) # here the object conversion is + # useless for the tuples_prep case, but this allows to test the + # tuples_prep case + + if preprocessor is not None: # if the preprocessor is not None we + # overwrite it to have a preprocessor that returns objects + def preprocessor(indices): # + # preprocessor that returns objects + return np.ones((indices.shape[0], 3)).astype(object) + + msg = ("Data with input dtype object was converted to float64{}." + .format(context)) + with pytest.warns(DataConversionWarning) as raised_warning: + check_input(tuples, type_of_inputs='tuples', + preprocessor=preprocessor, dtype=np.float64, + warn_on_dtype=True, estimator=estimator) + assert str(raised_warning[0].message) == msg + + +def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor( + tuples_prep): + """Checks that a value error is thrown if attempting to convert an + input not convertible to float, when using a preprocessor + """ + + def preprocessor(indices): + # preprocessor that returns objects + return np.full((indices.shape[0], 3), 'a') + + with pytest.raises(ValueError): + check_input(tuples_prep, type_of_inputs='tuples', + preprocessor=preprocessor, dtype=np.float64) + + +def test_check_tuples_invalid_dtype_not_convertible_without_preprocessor( + tuples_no_prep): + """Checks that a value error is thrown if attempting to convert an + input not convertible to float, when using no preprocessor + """ + tuples = np.full_like(tuples_no_prep, 'a', dtype=object) + with pytest.raises(ValueError): + check_input(tuples, type_of_inputs='tuples', + preprocessor=None, dtype=np.float64) + + +@pytest.mark.parametrize('tuple_size', [2, None]) +def test_check_tuples_valid_tuple_size(tuple_size, tuples_prep, tuples_no_prep): + """For inputs that have the right matrix dimension (2D or 3D for instance), + checks that checking the number of tuples (pairs, quadruplets, etc) raises + no warning if there is the right number of points in a tuple. + """ + with pytest.warns(None) as record: + check_input(tuples_prep, type_of_inputs='tuples', + preprocessor=mock_preprocessor, tuple_size=tuple_size) + check_input(tuples_no_prep, type_of_inputs='tuples', preprocessor=None, + tuple_size=tuple_size) + assert len(record) == 0 + + +@pytest.mark.parametrize('tuples', + [np.array([[2.5, 0.1, 2.6], + [1.6, 4.8, 9.1]]), + np.array([[2, 0, 2], + [1, 4, 9]]), + np.array([["img1.png", "img3.png"], + ["img2.png", "img4.png"]]), + [[2, 0, 2], + [1, 4, 9]], + [np.array([2, 0, 2]), + np.array([1, 4, 9])], + ((2, 0, 2), + (1, 4, 9)), + np.array([[[1.2, 2.2], [1.4, 3.3]], + [[2.6, 2.3], [3.4, 5.0]]])]) +def test_check_tuples_valid_with_preprocessor(tuples): + """Test that valid inputs when using a preprocessor raises no warning""" + with pytest.warns(None) as record: + check_input(tuples, type_of_inputs='tuples', + preprocessor=mock_preprocessor) + assert len(record) == 0 + + +@pytest.mark.parametrize('tuples', + [np.array([[[2.5], [0.1], [2.6]], + [[1.6], [4.8], [9.1]], + [[5.6], [2.8], [6.1]]]), + np.array([[[2], [0], [2]], + [[1], [4], [9]], + [[1], [5], [3]]]), + [[[2], [0], [2]], + [[1], [4], [9]], + [[3], [4], [29]]], + (((2, 1), (0, 2), (2, 3)), + ((1, 2), (4, 4), (9, 3)), + ((3, 1), (4, 4), (29, 4)))]) +def test_check_tuples_valid_without_preprocessor(tuples): + """Test that valid inputs when using no preprocessor raises no warning""" + with pytest.warns(None) as record: + check_input(tuples, type_of_inputs='tuples', preprocessor=None) + assert len(record) == 0 + + +def test_check_tuples_behaviour_auto_dtype(tuples_no_prep): + """Checks that check_tuples allows by default every type if using a + preprocessor, and numeric types if using no preprocessor""" + tuples_prep = [['img1.png', 'img2.png'], ['img3.png', 'img5.png']] + with pytest.warns(None) as record: + check_input(tuples_prep, type_of_inputs='tuples', + preprocessor=mock_preprocessor) + assert len(record) == 0 + + with pytest.warns(None) as record: + check_input(tuples_no_prep, type_of_inputs='tuples') # numeric type + assert len(record) == 0 + + # not numeric type + tuples_no_prep = np.array([[['img1.png'], ['img2.png']], + [['img3.png'], ['img5.png']]]) + tuples_no_prep = tuples_no_prep.astype(object) + with pytest.raises(ValueError): + check_input(tuples_no_prep, type_of_inputs='tuples') + + +def test_check_tuples_invalid_complex_data(): + """Checks that the right error message is thrown if given complex data ( + this comes from sklearn's check_array's message)""" + tuples = np.array([[[1 + 2j, 3 + 4j], [5 + 7j, 5 + 7j]], + [[1 + 3j, 2 + 4j], [5 + 8j, 1 + 7j]]]) + msg = ("Complex data not supported\n" + "{}\n".format(tuples)) + with pytest.raises(ValueError) as raised_error: + check_input(tuples, type_of_inputs='tuples') + assert str(raised_error.value) == msg + + +# ------------- test check_input with 'classic' type_of_inputs ---------------- + + +@pytest.fixture +def points_prep(): + """Basic array for testing when using a preprocessor""" + points = np.array([1, 2]) + return points + + +@pytest.fixture +def points_no_prep(): + """Basic array for testing when using no preprocessor""" + points = np.array([[1., 2.3], + [2.3, 4.3]]) + return points + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('points, found, expected, preprocessor', + [(5, '0', '1D array of indicators or 2D array of ' + 'formed points', mock_preprocessor), + (5, '0', '2D array of formed points', None), + ([1, 2], '1', '2D array of formed points', None), + ([[[5]]], '3', '1D array of indicators or 2D ' + 'array of formed points', + mock_preprocessor), + ([[[5]]], '3', '2D array of formed points', None)]) +def test_check_classic_invalid_shape(estimator, context, points, found, + expected, preprocessor): + """Checks that a value error with the appropriate message is raised if + shape is invalid (valid being 1D or 2D with preprocessor or 2D with no + preprocessor) + """ + points = np.array(points) + msg = ("{} expected{}{}. Found {}D array instead:\ninput={}. Reshape your " + "data{}.\n" + .format(expected, context, ' when using a preprocessor' + if preprocessor else '', found, points, + ' and/or use a preprocessor' if + (not preprocessor and points.ndim == 1) else '')) + with pytest.raises(ValueError) as raised_error: + check_input(points, type_of_inputs='classic', preprocessor=preprocessor, + ensure_min_samples=0, + estimator=estimator) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +def test_check_classic_invalid_n_features(estimator, context, + points_no_prep): + """Checks that the right warning is printed if not enough features + Here we only test if no preprocessor (otherwise we don't ensure this) + """ + msg = ("Found array with 2 feature(s) (shape={}) while" + " a minimum of 3 is required{}.".format(points_no_prep.shape, + context)) + with pytest.raises(ValueError) as raised_error: + check_input(points_no_prep, type_of_inputs='classic', preprocessor=None, + ensure_min_features=3, + estimator=estimator) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('load_points, preprocessor', + [(points_prep, mock_preprocessor), + (points_no_prep, None), + (points_no_prep, mock_preprocessor)]) +def test_check_classic_invalid_n_samples(estimator, context, load_points, + preprocessor): + """Checks that the right warning is printed if n_samples is too small""" + points = load_points() + msg = ("Found array with 2 sample(s) (shape={}) while a minimum of 3 " + "is required{}.".format((preprocess_points(points, + preprocessor) + if preprocessor is not None and + points.ndim == 1 else + points).shape, + context)) + with pytest.raises(ValueError) as raised_error: + check_input(points, type_of_inputs='classic', preprocessor=preprocessor, + ensure_min_samples=3, + estimator=estimator) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, context', + [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) +@pytest.mark.parametrize('load_points, preprocessor', + [(points_prep, mock_preprocessor), + (points_no_prep, None), + (points_no_prep, mock_preprocessor)]) +def test_check_classic_invalid_dtype_convertible(estimator, context, + load_points, + preprocessor): + """Checks that a warning is raised if a convertible input is converted to + float""" + points = load_points().astype(object) # here the object conversion is + # useless for the points_prep case, but this allows to test the + # points_prep case + + if preprocessor is not None: # if the preprocessor is not None we + # overwrite it to have a preprocessor that returns objects + def preprocessor(indices): + # preprocessor that returns objects + return np.ones((indices.shape[0], 3)).astype(object) + + msg = ("Data with input dtype object was converted to float64{}." + .format(context)) + with pytest.warns(DataConversionWarning) as raised_warning: + check_input(points, type_of_inputs='classic', + preprocessor=preprocessor, dtype=np.float64, + warn_on_dtype=True, estimator=estimator) + assert str(raised_warning[0].message) == msg + + +@pytest.mark.parametrize('preprocessor, points', + [(mock_preprocessor, np.array([['a', 'b'], + ['e', 'b']])), + (None, np.array([[['b', 'v'], ['a', 'd']], + [['x', 'u'], ['c', 'a']]]))]) +def test_check_classic_invalid_dtype_not_convertible(preprocessor, points): + """Checks that a value error is thrown if attempting to convert an + input not convertible to float + """ + with pytest.raises(ValueError): + check_input(points, type_of_inputs='classic', + preprocessor=preprocessor, dtype=np.float64) + + +@pytest.mark.parametrize('points', + [["img1.png", "img3.png", "img2.png"], + np.array(["img1.png", "img3.png", "img2.png"]), + [2, 0, 2, 1, 4, 9], + range(10), + np.array([2, 0, 2]), + (2, 0, 2), + np.array([[1.2, 2.2], + [2.6, 2.3]])]) +def test_check_classic_valid_with_preprocessor(points): + """Test that valid inputs when using a preprocessor raises no warning""" + with pytest.warns(None) as record: + check_input(points, type_of_inputs='classic', + preprocessor=mock_preprocessor) + assert len(record) == 0 + + +@pytest.mark.parametrize('points', + [np.array([[2.5, 0.1, 2.6], + [1.6, 4.8, 9.1], + [5.6, 2.8, 6.1]]), + np.array([[2, 0, 2], + [1, 4, 9], + [1, 5, 3]]), + [[2, 0, 2], + [1, 4, 9], + [3, 4, 29]], + ((2, 1, 0, 2, 2, 3), + (1, 2, 4, 4, 9, 3), + (3, 1, 4, 4, 29, 4))]) +def test_check_classic_valid_without_preprocessor(points): + """Test that valid inputs when using no preprocessor raises no warning""" + with pytest.warns(None) as record: + check_input(points, type_of_inputs='classic', preprocessor=None) + assert len(record) == 0 + + +def test_check_classic_by_default(): + """Checks that 'classic' is the default behaviour of check_input""" + assert (check_input([[2, 3], [3, 2]]) == + check_input([[2, 3], [3, 2]], type_of_inputs='classic')).all() + + +def test_check_classic_behaviour_auto_dtype(points_no_prep): + """Checks that check_input (for points) allows by default every type if + using a preprocessor, and numeric types if using no preprocessor""" + points_prep = ['img1.png', 'img2.png', 'img3.png', 'img5.png'] + with pytest.warns(None) as record: + check_input(points_prep, type_of_inputs='classic', + preprocessor=mock_preprocessor) + assert len(record) == 0 + + with pytest.warns(None) as record: + check_input(points_no_prep, type_of_inputs='classic') # numeric type + assert len(record) == 0 + + # not numeric type + points_no_prep = np.array(['img1.png', 'img2.png', 'img3.png', + 'img5.png']) + points_no_prep = points_no_prep.astype(object) + with pytest.raises(ValueError): + check_input(points_no_prep, type_of_inputs='classic') + + +def test_check_classic_invalid_complex_data(): + """Checks that the right error message is thrown if given complex data ( + this comes from sklearn's check_array's message)""" + points = np.array([[[1 + 2j, 3 + 4j], [5 + 7j, 5 + 7j]], + [[1 + 3j, 2 + 4j], [5 + 8j, 1 + 7j]]]) + msg = ("Complex data not supported\n" + "{}\n".format(points)) + with pytest.raises(ValueError) as raised_error: + check_input(points, type_of_inputs='classic') + assert str(raised_error.value) == msg + + +# ----------------------------- Test preprocessor ----------------------------- + + +X = np.array([[0.89, 0.11, 1.48, 0.12], + [2.63, 1.08, 1.68, 0.46], + [1.00, 0.59, 0.62, 1.15]]) + + +class MockFileLoader: + """Preprocessor that takes a root file path at construction and simulates + fetching the file in the specific root folder when given the name of the + file""" + + def __init__(self, root): + self.root = root + self.folders = {'fake_root': {'img0.png': X[0], + 'img1.png': X[1], + 'img2.png': X[2] + }, + 'other_folder': {} # empty folder + } + + def __call__(self, path_list): + images = list() + for path in path_list: + images.append(self.folders[self.root][path]) + return np.array(images) + + +def mock_id_loader(list_of_indicators): + """A preprocessor as a function that takes indicators (strings) and + returns the corresponding samples""" + points = [] + for indicator in list_of_indicators: + points.append(X[int(indicator[2:])]) + return np.array(points) + + +tuples_list = [np.array([[0, 1], + [2, 1]]), + + np.array([['img0.png', 'img1.png'], + ['img2.png', 'img1.png']]), + + np.array([['id0', 'id1'], + ['id2', 'id1']]) + ] + +points_list = [np.array([0, 1, 2, 1]), + + np.array(['img0.png', 'img1.png', 'img2.png', 'img1.png']), + + np.array(['id0', 'id1', 'id2', 'id1']) + ] + +preprocessors = [X, MockFileLoader('fake_root'), mock_id_loader] + + +@pytest.fixture +def y_tuples(): + y = [-1, 1] + return y + + +@pytest.fixture +def y_points(): + y = [0, 1, 0, 0] + return y + + +@pytest.mark.parametrize('preprocessor, tuples', zip(preprocessors, + tuples_list)) +def test_preprocessor_weakly_supervised(preprocessor, tuples, y_tuples): + """Tests different ways to use the preprocessor argument: an array, + a class callable, and a function callable, with a weakly supervised + algorithm + """ + nca = ITML(preprocessor=preprocessor) + nca.fit(tuples, y_tuples) + + +@pytest.mark.parametrize('preprocessor, points', zip(preprocessors, + points_list)) +def test_preprocessor_supervised(preprocessor, points, y_points): + """Tests different ways to use the preprocessor argument: an array, + a class callable, and a function callable, with a supervised algorithm + """ + lfda = LFDA(preprocessor=preprocessor) + lfda.fit(points, y_points) + + +@pytest.mark.parametrize('estimator', ['NCA', NCA(), None]) +def test_preprocess_tuples_invalid_message(estimator): + """Checks that if the preprocessor does some weird stuff, the preprocessed + input is detected as weird. Checks this for preprocess_tuples.""" + + context = make_context(estimator) + (' after the preprocessor ' + 'has been applied') + + def preprocessor(sequence): + return np.ones((len(sequence), 2, 2)) # returns a 3D array instead of 2D + + with pytest.raises(ValueError) as raised_error: + check_input(np.ones((3, 2)), type_of_inputs='tuples', + preprocessor=preprocessor, estimator=estimator) + expected_msg = ("3D array of formed tuples expected{}. Found 4D " + "array instead:\ninput={}. Reshape your data{}.\n" + .format(context, np.ones((3, 2, 2, 2)), + ' and/or use a preprocessor' if preprocessor + is not None else '')) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('estimator', ['NCA', NCA(), None]) +def test_preprocess_points_invalid_message(estimator): + """Checks that if the preprocessor does some weird stuff, the preprocessed + input is detected as weird.""" + + context = make_context(estimator) + (' after the preprocessor ' + 'has been applied') + + def preprocessor(sequence): + return np.ones((len(sequence), 2, 2)) # returns a 3D array instead of 2D + + with pytest.raises(ValueError) as raised_error: + check_input(np.ones((3,)), type_of_inputs='classic', + preprocessor=preprocessor, estimator=estimator) + expected_msg = ("2D array of formed points expected{}. " + "Found 3D array instead:\ninput={}. Reshape your data{}.\n" + .format(context, np.ones((3, 2, 2)), + ' and/or use a preprocessor' if preprocessor + is not None else '')) + assert str(raised_error.value) == expected_msg + + +def test_preprocessor_error_message(): + """Tests whether the preprocessor returns a preprocessor error when there + is a problem using the preprocessor + """ + preprocessor = ArrayIndexer(np.array([[1.2, 3.3], [3.1, 3.2]])) + + # with tuples + X = np.array([[[2, 3], [3, 3]], [[2, 3], [3, 2]]]) + # There are less samples than the max index we want to preprocess + with pytest.raises(PreprocessorError): + preprocess_tuples(X, preprocessor) + + # with points + X = np.array([[1], [2], [3], [3]]) + with pytest.raises(PreprocessorError): + preprocess_points(X, preprocessor) + + +@pytest.mark.parametrize('input_data', [[[5, 3], [3, 2]], + ((5, 3), (3, 2)) + ]) +@pytest.mark.parametrize('indices', [[0, 1], (1, 0)]) +def test_array_like_indexer_array_like_valid_classic(input_data, indices): + """Checks that any array-like is valid in the 'preprocessor' argument, + and in the indices, for a classic input""" + class MockMetricLearner(MahalanobisMixin): + pass + + mock_algo = MockMetricLearner(preprocessor=input_data) + mock_algo._prepare_inputs(indices, type_of_inputs='classic') + + +@pytest.mark.parametrize('input_data', [[[5, 3], [3, 2]], + ((5, 3), (3, 2)) + ]) +@pytest.mark.parametrize('indices', [[[0, 1], [1, 0]], ((1, 0), (1, 0))]) +def test_array_like_indexer_array_like_valid_tuples(input_data, indices): + """Checks that any array-like is valid in the 'preprocessor' argument, + and in the indices, for a classic input""" + class MockMetricLearner(MahalanobisMixin): + pass + + mock_algo = MockMetricLearner(preprocessor=input_data) + mock_algo._prepare_inputs(indices, type_of_inputs='tuples') + + +@pytest.mark.parametrize('preprocessor', [4, NCA()]) +def test_error_message_check_preprocessor(preprocessor): + """Checks that if the preprocessor given is not an array-like or a + callable, the right error message is returned""" + class MockMetricLearner(MahalanobisMixin): + pass + + mock_algo = MockMetricLearner(preprocessor=preprocessor) + with pytest.raises(ValueError) as e: + mock_algo.check_preprocessor() + assert str(e.value) == ("Invalid type for the preprocessor: {}. You should " + "provide either None, an array-like object, " + "or a callable.".format(type(preprocessor))) + + +@pytest.mark.parametrize('estimator', [ITML(), LSML(), MMC(), SDML()], + ids=['ITML', 'LSML', 'MMC', 'SDML']) +def test_error_message_tuple_size(estimator): + """Tests that if a tuples learner is not given the good number of points + per tuple, it throws an error message""" + estimator = clone(estimator) + set_random_state(estimator) + invalid_pairs = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], + [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) + y = [1, 1] + with pytest.raises(ValueError) as raised_err: + estimator.fit(invalid_pairs, y) + expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 3 " + "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" + .format(estimator._tuple_size, make_context(estimator), + invalid_pairs)) + assert str(raised_err.value) == expected_msg + + +@pytest.mark.parametrize('estimator, _', metric_learners, + ids=ids_metric_learners) +def test_error_message_t_score_pairs(estimator, _): + """tests that if you want to score_pairs on triplets for instance, it returns + the right error message + """ + estimator = clone(estimator) + set_random_state(estimator) + estimator.check_preprocessor() + triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], + [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) + with pytest.raises(ValueError) as raised_err: + estimator.score_pairs(triplets) + expected_msg = ("Tuples of 2 element(s) expected{}. Got tuples of 3 " + "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" + .format(make_context(estimator), triplets)) + assert str(raised_err.value) == expected_msg + + +def test_preprocess_tuples_simple_example(): + """Test the preprocessor on a very simple example of tuples to ensure the + result is as expected""" + array = np.array([[1, 2], + [2, 3], + [4, 5]]) + + def fun(row): + return np.array([[1, 1], [3, 3], [4, 4]]) + + expected_result = np.array([[[1, 1], [1, 1]], + [[3, 3], [3, 3]], + [[4, 4], [4, 4]]]) + + assert (preprocess_tuples(array, fun) == expected_result).all() + + +def test_preprocess_points_simple_example(): + """Test the preprocessor on very simple examples of points to ensure the + result is as expected""" + array = np.array([1, 2, 4]) + + def fun(row): + return [[1, 1], [3, 3], [4, 4]] + + expected_result = np.array([[1, 1], + [3, 3], + [4, 4]]) + + assert (preprocess_points(array, fun) == expected_result).all() + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_same_with_or_without_preprocessor(estimator, build_dataset): + """Test that algorithms using a preprocessor behave consistently +# with their no-preprocessor equivalent + """ + dataset_indices = build_dataset(with_preprocessor=True) + dataset_formed = build_dataset(with_preprocessor=False) + X = dataset_indices.preprocessor + indicators_to_transform = dataset_indices.to_transform + formed_points_to_transform = dataset_formed.to_transform + (indices_train, indices_test, y_train, y_test, formed_train, + formed_test) = train_test_split(dataset_indices.data, + dataset_indices.target, + dataset_formed.data, + random_state=SEED) + + def make_random_state(estimator): + rs = {} + if estimator.__class__.__name__[-11:] == '_Supervised': + rs['random_state'] = check_random_state(SEED) + return rs + + estimator_with_preprocessor = clone(estimator) + set_random_state(estimator_with_preprocessor) + estimator_with_preprocessor.set_params(preprocessor=X) + estimator_with_preprocessor.fit(indices_train, y_train, + **make_random_state(estimator)) + + estimator_without_preprocessor = clone(estimator) + set_random_state(estimator_without_preprocessor) + estimator_without_preprocessor.set_params(preprocessor=None) + estimator_without_preprocessor.fit(formed_train, y_train, + **make_random_state(estimator)) + + estimator_with_prep_formed = clone(estimator) + set_random_state(estimator_with_prep_formed) + estimator_with_prep_formed.set_params(preprocessor=X) + estimator_with_prep_formed.fit(indices_train, y_train, + **make_random_state(estimator)) + + # test prediction methods + for method in ["predict", "decision_function"]: + if hasattr(estimator, method): + output_with_prep = getattr(estimator_with_preprocessor, + method)(indices_test) + output_without_prep = getattr(estimator_without_preprocessor, + method)(formed_test) + assert np.array(output_with_prep == output_without_prep).all() + output_with_prep = getattr(estimator_with_preprocessor, + method)(indices_test) + output_with_prep_formed = getattr(estimator_with_prep_formed, + method)(formed_test) + assert np.array(output_with_prep == output_with_prep_formed).all() + + # test score_pairs + output_with_prep = estimator_with_preprocessor.score_pairs( + indicators_to_transform[[[[0, 2], [5, 3]]]]) + output_without_prep = estimator_without_preprocessor.score_pairs( + formed_points_to_transform[[[[0, 2], [5, 3]]]]) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.score_pairs( + indicators_to_transform[[[[0, 2], [5, 3]]]]) + output_without_prep = estimator_with_prep_formed.score_pairs( + formed_points_to_transform[[[[0, 2], [5, 3]]]]) + assert np.array(output_with_prep == output_without_prep).all() + + # test transform + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_without_preprocessor.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_with_prep_formed.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + +def test_check_collapsed_pairs_raises_no_error(): + """Checks that check_collapsed_pairs raises no error if no collapsed pairs + is present""" + pairs_ok = np.array([[[0.1, 3.3], [3.3, 0.1]], + [[0.1, 3.3], [3.3, 0.1]], + [[2.5, 8.1], [0.1, 3.3]]]) + check_collapsed_pairs(pairs_ok) + + +def test_check_collapsed_pairs_raises_error(): + """Checks that check_collapsed_pairs raises no error if no collapsed pairs + is present""" + pairs_not_ok = np.array([[[0.1, 3.3], [0.1, 3.3]], + [[0.1, 3.3], [3.3, 0.1]], + [[2.5, 8.1], [2.5, 8.1]]]) + with pytest.raises(ValueError) as e: + check_collapsed_pairs(pairs_not_ok) + assert str(e.value) == ("2 collapsed pairs found (where the left element is " + "the same as the right element), out of 3 pairs in" + " total.") From 8658e06fd2e4151e4a307d1a235d88e7d5586273 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 2 Jan 2019 18:14:25 +0100 Subject: [PATCH 080/210] [MRG] FIX put back nca's tests (#143) * FIX put back nca's tests * Remove deprecation test for learning rate because there is no more learning rate * FIX: update tests with new terminology ('transformer_') --- test/metric_learn_test.py | 95 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 74bc25de..b6ed89d9 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,7 +6,7 @@ from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.datasets import load_iris, make_classification, make_regression -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_almost_equal, assert_array_equal from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y @@ -138,6 +138,99 @@ def test_iris(self): csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20) + def test_finite_differences(self): + """Test gradient of loss function + + Assert that the gradient is almost equal to its finite differences + approximation. + """ + # Initialize the transformation `M`, as well as `X` and `y` and `NCA` + X, y = make_classification() + M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1]) + mask = y[:, np.newaxis] == y[np.newaxis, :] + nca = NCA() + nca.n_iter_ = 0 + + def fun(M): + return nca._loss_grad_lbfgs(M, X, mask)[0] + + def grad(M): + return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() + + # compute relative error + rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) + np.testing.assert_almost_equal(rel_diff, 0., decimal=6) + + def test_simple_example(self): + """Test on a simple example. + + Puts four points in the input space where the opposite labels points are + next to each other. After transform the same labels points should be next + to each other. + + """ + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA(num_dims=2,) + nca.fit(X, y) + Xansformed = nca.transform(X) + np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], + np.array([2, 3, 0, 1])) + + def test_singleton_class(self): + X = self.iris_points + y = self.iris_labels + + # one singleton class: test fitting works + singleton_class = 1 + ind_singleton, = np.where(y == singleton_class) + y[ind_singleton] = 2 + y[ind_singleton[0]] = singleton_class + + nca = NCA(max_iter=30) + nca.fit(X, y) + + # One non-singleton class: test fitting works + ind_1, = np.where(y == 1) + ind_2, = np.where(y == 2) + y[ind_1] = 0 + y[ind_1[0]] = 1 + y[ind_2] = 0 + y[ind_2[0]] = 2 + + nca = NCA(max_iter=30) + nca.fit(X, y) + + # Only singleton classes: test fitting does nothing (the gradient + # must be null in this case, so the final matrix must stay like + # the initialization) + ind_0, = np.where(y == 0) + ind_1, = np.where(y == 1) + ind_2, = np.where(y == 2) + X = X[[ind_0[0], ind_1[0], ind_2[0]]] + y = y[[ind_0[0], ind_1[0], ind_2[0]]] + + EPS = np.finfo(float).eps + A = np.zeros((X.shape[1], X.shape[1])) + np.fill_diagonal(A, + 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca.fit(X, y) + assert_array_equal(nca.transformer_, A) + + def test_one_class(self): + # if there is only one class the gradient is null, so the final matrix + # must stay like the initialization + X = self.iris_points[self.iris_labels == 0] + y = self.iris_labels[self.iris_labels == 0] + EPS = np.finfo(float).eps + A = np.zeros((X.shape[1], X.shape[1])) + np.fill_diagonal(A, + 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca.fit(X, y) + assert_array_equal(nca.transformer_, A) + class TestLFDA(MetricTestCase): def test_iris(self): From a9979a8a54bae31a710eb4117cde5b8af4fbdb28 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 2 Jan 2019 18:14:47 +0100 Subject: [PATCH 081/210] [MRG] API: remove num_labeled parameter (#119) * API: remove num_labeled parameter * DEP: Add deprecation warnings for num_labels * MAINT: put deprecation for version 0.5.0 * Revert "MAINT: put deprecation for version 0.5.0" This reverts commit 8727c44213b2c25c0aebfa6fa12b25c322abada7. * Revert "Merge remote-tracking branch 'origin/master' into fix/remove_num_labeled_parameter" This reverts commit 944bb3ec80a6b27ae247465a9bdfa042ce411127, reversing changes made to 8727c44213b2c25c0aebfa6fa12b25c322abada7. * Revert "Revert "MAINT: put deprecation for version 0.5.0"" This reverts commit bc1eb32f95d0aea84ba84d60e6d320d5e809e4fe. * FIX string representation test wrongly merged * git revert d6bd0d4 * STY fix pep8 errors * STY: fix docstring indentation * FIX remove tests from NCA that are dealt with in #143 * FIX remove nca deprecation test because we remove totally learning rate in the merge #139 * FIX update version * Remove the use of random_subset --- metric_learn/constraints.py | 13 +---------- metric_learn/itml.py | 20 ++++++++++------- metric_learn/lsml.py | 21 +++++++++++------- metric_learn/mmc.py | 20 ++++++++++------- metric_learn/sdml.py | 18 +++++++++------ test/metric_learn_test.py | 44 +++++++++++++++++++++++++++++++++++++ test/test_base_metric.py | 12 +++++----- test/test_utils.py | 3 +-- 8 files changed, 100 insertions(+), 51 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 17523a46..c4ddcae8 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -89,17 +89,6 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): (num_chunks, chunk_size)) return chunks - @staticmethod - def random_subset(all_labels, num_preserved=np.inf, random_state=np.random): - """ - the random state object to be passed must be a numpy random seed - """ - n = len(all_labels) - num_ignored = max(0, n - num_preserved) - idx = random_state.randint(n, size=num_ignored) - partial_labels = np.array(all_labels, copy=True) - partial_labels[idx] = -1 - return Constraints(partial_labels) def wrap_pairs(X, constraints): a = np.array(constraints[0]) @@ -109,4 +98,4 @@ def wrap_pairs(X, constraints): constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d)))) y = np.vstack([np.ones((len(a), 1)), - np.ones((len(c), 1))]) pairs = X[constraints] - return pairs, y \ No newline at end of file + return pairs, y diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 48e71f56..8a251fe0 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -14,6 +14,7 @@ """ from __future__ import print_function, absolute_import +import warnings import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances @@ -172,8 +173,8 @@ class ITML_Supervised(_BaseITML, TransformerMixin): """ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - num_labeled=np.inf, num_constraints=None, bounds=None, A0=None, - verbose=False, preprocessor=None): + num_labeled='deprecated', num_constraints=None, bounds=None, + A0=None, verbose=False, preprocessor=None): """Initialize the supervised version of `ITML`. `ITML_Supervised` creates pairs of similar sample by taking same class @@ -186,10 +187,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, value for slack variables max_iter : int, optional convergence_threshold : float, optional - num_labeled : int, optional (default=np.inf) - number of labeled points to keep for building pairs. Extra - labeled points will be considered unlabeled, and ignored as such. - Use np.inf (default) to use all labeled points. + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional @@ -224,14 +225,17 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) pairs, y = wrap_pairs(X, pos_neg) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 73296b46..9090a431 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -8,6 +8,7 @@ """ from __future__ import print_function, absolute_import, division +import warnings import numpy as np import scipy.linalg from six.moves import xrange @@ -172,8 +173,9 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): metric (See :meth:`transformer_from_metric`.) """ - def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, - num_constraints=None, weights=None, verbose=False, + def __init__(self, tol=1e-3, max_iter=1000, prior=None, + num_labeled='deprecated', num_constraints=None, weights=None, + verbose=False, preprocessor=None): """Initialize the supervised version of `LSML`. @@ -188,10 +190,10 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, max_iter : int, optional prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] - num_labeled : int, optional (default=np.inf) - number of labeled points to keep for building quadruplets. Extra - labeled points will be considered unlabeled, and ignored as such. - Use np.inf (default) to use all labeled points. + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional @@ -222,14 +224,17 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, random_state=random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 596f085f..6d929d6e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -17,6 +17,7 @@ """ from __future__ import print_function, absolute_import, division +import warnings import numpy as np from six.moves import xrange from sklearn.base import TransformerMixin @@ -389,8 +390,8 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): """ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_labeled=np.inf, num_constraints=None, - A0=None, diagonal=False, diagonal_c=1.0, verbose=False, + num_labeled='deprecated', num_constraints=None, A0=None, + diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None): """Initialize the supervised version of `MMC`. @@ -403,10 +404,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, max_iter : int, optional max_proj : int, optional convergence_threshold : float, optional - num_labeled : int, optional (default=np.inf) - number of labeled points to keep for building pairs. Extra - labeled points will be considered unlabeled, and ignored as such. - Use np.inf (default) to use all labeled points. + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional @@ -443,14 +444,17 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) pairs, y = wrap_pairs(X, pos_neg) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 1892d176..b1421736 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -9,6 +9,7 @@ """ from __future__ import absolute_import +import warnings import numpy as np from sklearn.base import TransformerMixin from sklearn.covariance import graph_lasso @@ -113,7 +114,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): """ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - num_labeled=np.inf, num_constraints=None, verbose=False, + num_labeled='deprecated', num_constraints=None, verbose=False, preprocessor=None): """Initialize the supervised version of `SDML`. @@ -128,10 +129,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, trade off between optimizer and sparseness (see graph_lasso) use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False - num_labeled : int, optional (default=np.inf) - number of labeled points to keep for building pairs. Extra - labeled points will be considered unlabeled, and ignored as such. - Use np.inf (default) to use all labeled points. + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints : int, optional number of constraints to generate verbose : bool, optional @@ -164,14 +165,17 @@ def fit(self, X, y, random_state=np.random): self : object Returns the instance. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) pairs, y = wrap_pairs(X, pos_neg) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index b6ed89d9..eebce1f9 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -57,6 +57,17 @@ def test_iris(self): csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') + assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) + class TestITML(MetricTestCase): def test_iris(self): @@ -66,6 +77,17 @@ def test_iris(self): csep = class_separation(itml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') + assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) + class TestLMNN(MetricTestCase): def test_iris(self): @@ -121,6 +143,17 @@ def test_iris(self): csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + sdml_supervised = SDML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') + assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + class TestNCA(MetricTestCase): def test_iris(self): @@ -335,6 +368,17 @@ def test_iris(self): csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') + assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), diff --git a/test/test_base_metric.py b/test/test_base_metric.py index c9c8fb57..fdea2949 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -32,7 +32,7 @@ def test_itml(self): """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, - max_iter=1000, num_constraints=None, num_labeled=inf, + max_iter=1000, num_constraints=None, num_labeled='deprecated', preprocessor=None, verbose=False) """.strip('\n')) @@ -42,7 +42,7 @@ def test_lsml(self): "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, " "verbose=False)") self.assertEqual(str(metric_learn.LSML_Supervised()), """ -LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled=inf, +LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated', preprocessor=None, prior=None, tol=0.001, verbose=False, weights=None) """.strip('\n')) @@ -52,9 +52,9 @@ def test_sdml(self): "SDML(balance_param=0.5, preprocessor=None, " "sparsity_param=0.01, use_cov=True,\n verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ -SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, - preprocessor=None, sparsity_param=0.01, use_cov=True, - verbose=False) +SDML_Supervised(balance_param=0.5, num_constraints=None, + num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, + use_cov=True, verbose=False) """.strip('\n')) def test_rca(self): @@ -78,7 +78,7 @@ def test_mmc(self): self.assertEqual(str(metric_learn.MMC_Supervised()), """ MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, - num_labeled=inf, preprocessor=None, verbose=False) + num_labeled='deprecated', preprocessor=None, verbose=False) """.strip('\n')) if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index de59e9ff..39c718ac 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -58,8 +58,7 @@ def build_data(): input_data, labels = load_iris(return_X_y=True) X, y = shuffle(input_data, labels, random_state=SEED) num_constraints = 50 - constraints = ( - Constraints.random_subset(y, random_state=check_random_state(SEED))) + constraints = Constraints(y) pairs = ( constraints .positive_negative_pairs(num_constraints, same_length=True, From b38605753efac4af622ccc92c094155ab94dbe22 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 4 Jan 2019 11:00:40 +0100 Subject: [PATCH 082/210] FIX README file (#148) --- README.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b1893cc6..0db1f87a 100644 --- a/README.rst +++ b/README.rst @@ -34,8 +34,7 @@ package installed). **Usage** -See the `sphinx documentation`_ for full documentation about installation, API, - usage, and examples. +See the `sphinx documentation`_ for full documentation about installation, API, usage, and examples. **Notes** From d00196d806d9761d7a2cd440571bc6e9f93fbeae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Mon, 7 Jan 2019 15:49:13 +0100 Subject: [PATCH 083/210] [MRG] Documentation: introduction to metric learning (#145) * modified index, intro * cosmit * cosmit * add use-cases and a few nitpicks * cosmit --- doc/index.rst | 11 ++- doc/introduction.rst | 178 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 149 insertions(+), 40 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index 9dbcd9b0..ed3f6ccb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -2,8 +2,15 @@ metric-learn: Metric Learning in Python ======================================= |License| |PyPI version| -Welcome to metric-learn's documentation ! ------------------------------------------ +Metric-learn contains efficient Python implementations of several +popular supervised and weakly-supervised metric learning algorithms. The API +of metric-learn is compatible with `scikit-learn +`_, the leading library for machine learning in +Python. This allows to use of all the scikit-learn routines (for pipelining, +model selection, etc) with metric learning algorithms. + +Documentation outline +--------------------- .. toctree:: :maxdepth: 2 diff --git a/doc/introduction.rst b/doc/introduction.rst index 9f2b4165..f0195c83 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -1,38 +1,140 @@ -============ -Introduction -============ - -Distance metrics are widely used in the machine learning literature. -Traditionally, practitioners would choose a standard distance metric -(Euclidean, City-Block, Cosine, etc.) using a priori knowledge of -the domain. -Distance metric learning (or simply, metric learning) is the sub-field of -machine learning dedicated to automatically construct task-specific distance -metrics from (weakly) supervised data. -The learned distance metric often corresponds to a Euclidean distance in a new -embedding space, hence distance metric learning can be seen as a form of -representation learning. - -This package contains a efficient Python implementations of several popular -metric learning algorithms, compatible with scikit-learn. This allows to use -all the scikit-learn routines for pipelining and model selection for -metric learning algorithms. - - -Currently, each metric learning algorithm supports the following methods: - -- ``fit(...)``, which learns the model. -- ``metric()``, which returns a Mahalanobis matrix - :math:`M = L^{\top}L` such that distance between vectors ``x`` and - ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. -- ``transformer_from_metric(metric)``, which returns a transformation matrix - :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a - data matrix :math:`X \in \mathbb{R}^{n \times d}` to the - :math:`D`-dimensional learned metric space :math:`X L^{\top}`, - in which standard Euclidean distances may be used. -- ``transform(X)``, which applies the aforementioned transformation. -- ``score_pairs(pairs)`` which returns the distance between pairs of - points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, - 2, n_features)``, or it can be a 2D array-like of pairs indicators of - shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more - details). +======================== +What is Metric Learning? +======================== + +Many approaches in machine learning require a measure of distance between data +points. Traditionally, practitioners would choose a standard distance metric +(Euclidean, City-Block, Cosine, etc.) using a priori knowledge of the +domain. However, it is often difficult to design metrics that are well-suited +to the particular data and task of interest. + +Distance metric learning (or simply, metric learning) aims at +automatically constructing task-specific distance metrics from (weakly) +supervised data, in a machine learning manner. The learned distance metric can +then be used to perform various tasks (e.g., k-NN classification, clustering, +information retrieval). + +Problem Setting +=============== + +Metric learning problems fall into two main categories depending on the type +of supervision available about the training data: + +- :doc:`Supervised learning `: the algorithm has access to + a set of data points, each of them belonging to a class (label) as in a + standard classification problem. + Broadly speaking, the goal in this setting is to learn a distance metric + that puts points with the same label close together while pushing away + points with different labels. +- :doc:`Weakly supervised learning `: the + algorithm has access to a set of data points with supervision only + at the tuple level (typically pairs, triplets, or quadruplets of + data points). A classic example of such weaker supervision is a set of + positive and negative pairs: in this case, the goal is to learn a distance + metric that puts positive pairs close together and negative pairs far away. + +Based on the above (weakly) supervised data, the metric learning problem is +generally formulated as an optimization problem where one seeks to find the +parameters of a distance function that optimize some objective function +measuring the agreement with the training data. + +Mahalanobis Distances +===================== + +In the metric-learn package, all algorithms currently implemented learn +so-called Mahalanobis distances. Given a real-valued parameter matrix +:math:`L` of shape ``(num_dims, n_features)`` where ``n_features`` is the +number features describing the data, the Mahalanobis distance associated with +:math:`L` is defined as follows: + +.. math:: D(x, x') = \sqrt{(Lx-Lx')^\top(Lx-Lx')} + +In other words, a Mahalanobis distance is a Euclidean distance after a +linear transformation of the feature space defined by :math:`L` (taking +:math:`L` to be the identity matrix recovers the standard Euclidean distance). +Mahalanobis distance metric learning can thus be seen as learning a new +embedding space of dimension ``num_dims``. Note that when ``num_dims`` is +smaller than ``n_features``, this achieves dimensionality reduction. + +Strictly speaking, Mahalanobis distances are "pseudo-metrics": they satisfy +three of the `properties of a metric `_ (non-negativity, symmetry, triangle inequality) but not +necessarily the identity of indiscernibles. + +.. note:: + + Mahalanobis distances can also be parameterized by a `positive semi-definite + (PSD) matrix + `_ + :math:`M`: + + .. math:: D(x, x') = \sqrt{(x-x')^\top M(x-x')} + + Using the fact that a PSD matrix :math:`M` can always be decomposed as + :math:`M=L^\top L` for some :math:`L`, one can show that both + parameterizations are equivalent. In practice, an algorithm may thus solve + the metric learning problem with respect to either :math:`M` or :math:`L`. + +Use-cases +========= + +There are many use-cases for metric learning. We list here a few popular +examples (for code illustrating some of these use-cases, see the +:doc:`examples ` section of the documentation): + +- `Nearest neighbors models + `_: the learned + metric can be used to improve nearest neighbors learning models for + classification, regression, anomaly detection... +- `Clustering `_: + metric learning provides a way to bias the clusters found by algorithms like + K-Means towards the intended semantics. +- Information retrieval: the learned metric can be used to retrieve the + elements of a database that are semantically closer to a query element. +- Dimensionality reduction: metric learning may be seen as a way to reduce the + data dimension in a (weakly) supervised setting. +- More generally, the learned transformation :math:`L` can be used to project + the data into a new embedding space before feeding it into another machine + learning algorithm. + +The API of metric-learn is compatible with `scikit-learn +`_, the leading library for machine +learning in Python. This allows to easily pipeline metric learners with other +scikit-learn estimators to realize the above use-cases, to perform joint +hyperparameter tuning, etc. + +Further reading +=============== + +For more information about metric learning and its applications, one can refer +to the following resources: + +- **Tutorial:** `Similarity and Distance Metric Learning with Applications to + Computer Vision + `_ (2015) +- **Surveys:** `A Survey on Metric Learning for Feature Vectors and Structured + Data `_ (2013), `Metric Learning: A + Survey `_ (2012) +- **Book:** `Metric Learning + `_ (2015) + +.. Methods [TO MOVE TO SUPERVISED/WEAK SECTIONS] +.. ============================================= + +.. Currently, each metric learning algorithm supports the following methods: + +.. - ``fit(...)``, which learns the model. +.. - ``metric()``, which returns a Mahalanobis matrix +.. :math:`M = L^{\top}L` such that distance between vectors ``x`` and +.. ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. +.. - ``transformer_from_metric(metric)``, which returns a transformation matrix +.. :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a +.. data matrix :math:`X \in \mathbb{R}^{n \times d}` to the +.. :math:`D`-dimensional learned metric space :math:`X L^{\top}`, +.. in which standard Euclidean distances may be used. +.. - ``transform(X)``, which applies the aforementioned transformation. +.. - ``score_pairs(pairs)`` which returns the distance between pairs of +.. points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, +.. 2, n_features)``, or it can be a 2D array-like of pairs indicators of +.. shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more +.. details). \ No newline at end of file From 8ffd998971b70ba9b4f1b06ae9b9ef47d4c27f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Mon, 7 Jan 2019 19:34:06 +0100 Subject: [PATCH 084/210] [MRG] Move transformer_from_metric to util (#151) * move method to util and update classes accordingly * remove forgotten self * typo --- metric_learn/_util.py | 27 +++++++++++++++++++++++++++ metric_learn/base_metric.py | 27 --------------------------- metric_learn/covariance.py | 5 +++-- metric_learn/itml.py | 8 ++++---- metric_learn/lsml.py | 7 ++++--- metric_learn/mmc.py | 10 +++++----- metric_learn/sdml.py | 7 ++++--- 7 files changed, 47 insertions(+), 44 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 27707be9..3bc303f9 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -322,3 +322,30 @@ def check_collapsed_pairs(pairs): raise ValueError("{} collapsed pairs found (where the left element is " "the same as the right element), out of {} pairs " "in total.".format(num_ident, pairs.shape[0])) + + +def transformer_from_metric(metric): + """Computes the transformation matrix from the Mahalanobis matrix. + + Since by definition the metric `M` is positive semi-definite (PSD), it + admits a Cholesky decomposition: L = cholesky(M).T. However, currently the + computation of the Cholesky decomposition used does not support + non-definite matrices. If the metric is not definite, this method will + return L = V.T w^( -1/2), with M = V*w*V.T being the eigenvector + decomposition of M with the eigenvalues in the diagonal matrix w and the + columns of V being the eigenvectors. If M is diagonal, this method will + just return its elementwise square root (since the diagonalization of + the matrix is itself). + + Returns + ------- + L : (d x d) matrix + """ + + if np.allclose(metric, np.diag(np.diag(metric))): + return np.sqrt(metric) + elif not np.isclose(np.linalg.det(metric), 0): + return np.linalg.cholesky(metric).T + else: + w, V = np.linalg.eigh(metric) + return V.T * np.sqrt(np.maximum(0, w[:, None])) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 9af79ecc..bfec1264 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,4 +1,3 @@ -from numpy.linalg import cholesky from sklearn.base import BaseEstimator from sklearn.utils.validation import _is_arraylike from sklearn.metrics import roc_auc_score @@ -181,32 +180,6 @@ def transform(self, X): def metric(self): return self.transformer_.T.dot(self.transformer_) - def transformer_from_metric(self, metric): - """Computes the transformation matrix from the Mahalanobis matrix. - - Since by definition the metric `M` is positive semi-definite (PSD), it - admits a Cholesky decomposition: L = cholesky(M).T. However, currently the - computation of the Cholesky decomposition used does not support - non-definite matrices. If the metric is not definite, this method will - return L = V.T w^( -1/2), with M = V*w*V.T being the eigenvector - decomposition of M with the eigenvalues in the diagonal matrix w and the - columns of V being the eigenvectors. If M is diagonal, this method will - just return its elementwise square root (since the diagonalization of - the matrix is itself). - - Returns - ------- - L : (d x d) matrix - """ - - if np.allclose(metric, np.diag(np.diag(metric))): - return np.sqrt(metric) - elif not np.isclose(np.linalg.det(metric), 0): - return cholesky(metric).T - else: - w, V = np.linalg.eigh(metric) - return V.T * np.sqrt(np.maximum(0, w[:, None])) - class _PairsClassifierMixin(BaseMetricLearner): diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 10bc9582..7a04923d 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -13,6 +13,7 @@ from sklearn.base import TransformerMixin from .base_metric import MahalanobisMixin +from ._util import transformer_from_metric class Covariance(MahalanobisMixin, TransformerMixin): @@ -22,7 +23,7 @@ class Covariance(MahalanobisMixin, TransformerMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def __init__(self, preprocessor=None): @@ -40,5 +41,5 @@ def fit(self, X, y=None): else: M = np.linalg.inv(M) - self.transformer_ = self.transformer_from_metric(np.atleast_2d(M)) + self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 8a251fe0..158ec4d3 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -22,7 +22,7 @@ from sklearn.base import TransformerMixin from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs -from ._util import vector_norm +from ._util import vector_norm, transformer_from_metric class _BaseITML(MahalanobisMixin): @@ -125,7 +125,7 @@ def _fit(self, pairs, y, bounds=None): print('itml converged at iter: %d, conv = %f' % (it, conv)) self.n_iter_ = it - self.transformer_ = self.transformer_from_metric(self.A_) + self.transformer_ = transformer_from_metric(self.A_) return self @@ -136,7 +136,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def fit(self, pairs, y, bounds=None): @@ -169,7 +169,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See `transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 9090a431..50fcfa3e 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -16,6 +16,7 @@ from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints +from ._util import transformer_from_metric class _BaseLSML(MahalanobisMixin): @@ -101,7 +102,7 @@ def _fit(self, quadruplets, y=None, weights=None): print("Didn't converge after", it, "iterations. Final loss:", s_best) self.n_iter_ = it - self.transformer_ = self.transformer_from_metric(self.M_) + self.transformer_ = transformer_from_metric(self.M_) return self def _comparison_loss(self, metric): @@ -137,7 +138,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def fit(self, quadruplets, weights=None): @@ -170,7 +171,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def __init__(self, tol=1e-3, max_iter=1000, prior=None, diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 6d929d6e..b806a97e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -25,7 +25,7 @@ from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs -from ._util import vector_norm +from ._util import vector_norm, transformer_from_metric class _BaseMMC(MahalanobisMixin): @@ -206,7 +206,7 @@ def _fit_full(self, pairs, y): self.A_[:] = A_old self.n_iter_ = cycle - self.transformer_ = self.transformer_from_metric(self.A_) + self.transformer_ = transformer_from_metric(self.A_) return self def _fit_diag(self, pairs, y): @@ -267,7 +267,7 @@ def _fit_diag(self, pairs, y): self.A_ = np.diag(w) - self.transformer_ = self.transformer_from_metric(self.A_) + self.transformer_ = transformer_from_metric(self.A_) return self def _fD(self, neg_pairs, A): @@ -355,7 +355,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def fit(self, pairs, y): @@ -386,7 +386,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index b1421736..40fd5727 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -17,6 +17,7 @@ from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs +from ._util import transformer_from_metric class _BaseSDML(MahalanobisMixin): @@ -68,7 +69,7 @@ def _fit(self, pairs, y): emp_cov = emp_cov.T.dot(emp_cov) _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) - self.transformer_ = self.transformer_from_metric(self.M_) + self.transformer_ = transformer_from_metric(self.M_) return self @@ -79,7 +80,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def fit(self, pairs, y): @@ -110,7 +111,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): ---------- transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See :meth:`transformer_from_metric`.) + metric (See function `transformer_from_metric`.) """ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, From d3620bbb13620338cc8aaf39d78cead58ac5d410 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 29 Jan 2019 13:32:48 +0100 Subject: [PATCH 085/210] [MRG] Refactor the metric() method (#152) * MAINT Rename metric() into get_mahalanobis_matrix() * ENH: refactor methods to get the metric * DOC: change description of distance into pseudo-metric * MAINT: make description clearer * ENH: enhance description * MAINT: remove the 1D part in case we allow 2D * FIX: fix expression for mahalanobis distance * TST: Add tests * ENH: deal with the 1D case * Rename forgotten point 1 and point 2 to u and v * STY: Fix PEP8 errors * Address all comments * Revert changes in metric_plotting included by mistake * FIX: use custom validate_vector * TST: fix syntax error for assert in test * Add tolerance for triangular inequality because MMC probably projected onto a line * MAINT: address comments from review https://github.com/metric-learn/metric-learn/pull/152#pullrequestreview-194754318 * ENH: add squared option * FIX fix test that was failing du to a non 2D transformer: - ensure that the transformer_ fitted is always 2D: - in the result returned from transformer_from_metric - in the code of metric learners, for metric learners that don't call transformer_from_metric - for metric learners that cannot work on 1 feature, ensure it when checking the input - add a test to check this behaviour * FIX: remove message that is not supported anymore by python newer versions and replace it by str * TST: make shape testing more precise * TST: enforce the 2d transformer test for everyone, and make it pass for RCA and SDML * TST: fix typo in removing * Remove unnecessary calls of np.atleast2d * Add functions to commented doc --- doc/conf.py | 6 +- doc/introduction.rst | 6 +- metric_learn/_util.py | 11 ++ metric_learn/base_metric.py | 115 +++++++++++++++++++- metric_learn/rca.py | 6 +- metric_learn/sdml.py | 2 +- test/metric_learn_test.py | 10 +- test/test_base_metric.py | 82 ++++++++++++++ test/test_mahalanobis_mixin.py | 120 ++++++++++++++++++++- test/test_transformer_metric_conversion.py | 18 ++-- test/test_utils.py | 32 +++++- 11 files changed, 385 insertions(+), 23 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index ed476edd..f0faa2f8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -7,7 +7,8 @@ 'sphinx.ext.viewcode', 'sphinx.ext.mathjax', 'numpydoc', - 'sphinx_gallery.gen_gallery' + 'sphinx_gallery.gen_gallery', + 'sphinx.ext.doctest' ] templates_path = ['_templates'] @@ -35,3 +36,6 @@ # Option to only need single backticks to refer to symbols default_role = 'any' +# Option to hide doctests comments in the documentation (like # doctest: +# +NORMALIZE_WHITESPACE for instance) +trim_doctest_flags = True diff --git a/doc/introduction.rst b/doc/introduction.rst index f0195c83..dad530b3 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -38,6 +38,8 @@ generally formulated as an optimization problem where one seeks to find the parameters of a distance function that optimize some objective function measuring the agreement with the training data. +.. _mahalanobis_distances: + Mahalanobis Distances ===================== @@ -124,7 +126,9 @@ to the following resources: .. Currently, each metric learning algorithm supports the following methods: .. - ``fit(...)``, which learns the model. -.. - ``metric()``, which returns a Mahalanobis matrix +.. - ``get_mahalanobis_matrix()``, which returns a Mahalanobis matrix +.. - ``get_metric()``, which returns a function that takes as input two 1D + arrays and outputs the learned metric score on these two points .. :math:`M = L^{\top}L` such that distance between vectors ``x`` and .. ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. .. - ``transformer_from_metric(metric)``, which returns a transformation matrix diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 3bc303f9..bd57fd5f 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -349,3 +349,14 @@ def transformer_from_metric(metric): else: w, V = np.linalg.eigh(metric) return V.T * np.sqrt(np.maximum(0, w[:, None])) + + +def validate_vector(u, dtype=None): + # replica of scipy.spatial.distance._validate_vector, for making scipy + # compatible functions on vectors (such as distances computations) + u = np.asarray(u, dtype=dtype, order='c').squeeze() + # Ensure values such as u=1 and u=[1] still return 1-D arrays. + u = np.atleast_1d(u) + if u.ndim > 1: + raise ValueError("Input vector should be 1-D.") + return u diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index bfec1264..58b8cc5d 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,10 +1,13 @@ +from numpy.linalg import cholesky +from scipy.spatial.distance import euclidean from sklearn.base import BaseEstimator from sklearn.utils.validation import _is_arraylike from sklearn.metrics import roc_auc_score import numpy as np from abc import ABCMeta, abstractmethod import six -from ._util import ArrayIndexer, check_input +from ._util import ArrayIndexer, check_input, validate_vector +import warnings class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): @@ -34,6 +37,14 @@ def score_pairs(self, pairs): ------- scores: `numpy.ndarray` of shape=(n_pairs,) The score of every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two 1D + arrays and cannot use a preprocessor. Besides, the returned function is + independent of the metric learner and hence is not modified if the metric + learner is. """ def check_preprocessor(self): @@ -85,6 +96,47 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', tuple_size=getattr(self, '_tuple_size', None), **kwargs) + @abstractmethod + def get_metric(self): + """Returns a function that takes as input two 1D arrays and outputs the + learned metric score on these two points. + + This function will be independent from the metric learner that learned it + (it will not be modified if the initial metric learner is modified), + and it can be directly plugged into the `metric` argument of + scikit-learn's estimators. + + Returns + ------- + metric_fun : function + The function described above. + + + Examples + -------- + .. doctest:: + + >>> from metric_learn import NCA + >>> from sklearn.datasets import make_classification + >>> from sklearn.neighbors import KNeighborsClassifier + >>> nca = NCA() + >>> X, y = make_classification() + >>> nca.fit(X, y) + >>> knn = KNeighborsClassifier(metric=nca.get_metric()) + >>> knn.fit(X, y) # doctest: +NORMALIZE_WHITESPACE + KNeighborsClassifier(algorithm='auto', leaf_size=30, + metric=.metric_fun + at 0x...>, + metric_params=None, n_jobs=None, n_neighbors=5, p=2, + weights='uniform') + + See Also + -------- + score_pairs : a method that returns the metric score between several pairs + of points. Unlike `get_metric`, this is a method of the metric learner + and therefore can change if the metric learner changes. Besides, it can + use the metric learner's preprocessor, and works on concatenated arrays. + """ class MetricTransformer(six.with_metaclass(ABCMeta)): @@ -146,6 +198,17 @@ def score_pairs(self, pairs): ------- scores: `numpy.ndarray` of shape=(n_pairs,) The learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two 1D + arrays and cannot use a preprocessor. Besides, the returned function is + independent of the metric learner and hence is not modified if the metric + learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. """ pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, @@ -177,7 +240,57 @@ def transform(self, X): accept_sparse=True) return X_checked.dot(self.transformer_.T) + def get_metric(self): + transformer_T = self.transformer_.T.copy() + + def metric_fun(u, v, squared=False): + """This function computes the metric between u and v, according to the + previously learned metric. + + Parameters + ---------- + u : array-like, shape=(n_features,) + The first point involved in the distance computation. + + v : array-like, shape=(n_features,) + The second point involved in the distance computation. + + squared : `bool` + If True, the function will return the squared metric between u and + v, which is faster to compute. + + Returns + ------- + distance: float + The distance between u and v according to the new metric. + """ + u = validate_vector(u) + v = validate_vector(v) + transformed_diff = (u - v).dot(transformer_T) + dist = np.dot(transformed_diff, transformed_diff.T) + if not squared: + dist = np.sqrt(dist) + return dist + + return metric_fun + + get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__ + def metric(self): + # TODO: remove this method in version 0.6.0 + warnings.warn(("`metric` is deprecated since version 0.5.0 and will be " + "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."), + DeprecationWarning) + return self.get_mahalanobis_matrix() + + def get_mahalanobis_matrix(self): + """Returns a copy of the Mahalanobis matrix learned by the metric learner. + + Returns + ------- + M : `numpy.ndarray`, shape=(n_components, n_features) + The copy of the learned Mahalanobis matrix. + """ return self.transformer_.T.dot(self.transformer_) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 3380f4c9..c9fedd59 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -112,7 +112,7 @@ def fit(self, X, chunks): chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(X_t, chunks) - inner_cov = np.cov(chunked_data, rowvar=0, bias=1) + inner_cov = np.atleast_2d(np.cov(chunked_data, rowvar=0, bias=1)) dim = self._check_dimension(np.linalg.matrix_rank(inner_cov), X_t) # Fisher Linear Discriminant projection @@ -122,13 +122,13 @@ def fit(self, X, chunks): vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] A = vecs[:, inds] - inner_cov = A.T.dot(inner_cov).dot(A) + inner_cov = np.atleast_2d(A.T.dot(inner_cov).dot(A)) self.transformer_ = _inv_sqrtm(inner_cov).dot(A.T) else: self.transformer_ = _inv_sqrtm(inner_cov).T if M_pca is not None: - self.transformer_ = self.transformer_.dot(M_pca) + self.transformer_ = np.atleast_2d(self.transformer_.dot(M_pca)) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 40fd5727..be45d3a3 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -58,7 +58,7 @@ def _fit(self, pairs, y): # set up prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - self.M_ = pinvh(np.cov(X, rowvar = False)) + self.M_ = pinvh(np.atleast_2d(np.cov(X, rowvar = False))) else: self.M_ = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index eebce1f9..e4ce8cef 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -273,7 +273,7 @@ def test_iris(self): self.assertLess(csep, 0.15) # Sanity checks for learned matrices. - self.assertEqual(lfda.metric().shape, (4, 4)) + self.assertEqual(lfda.get_mahalanobis_matrix().shape, (4, 4)) self.assertEqual(lfda.transformer_.shape, (2, 4)) @@ -348,14 +348,16 @@ def test_iris(self): [+0.000868, +0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, +0.002782, +0.003964], [-0.001703, -0.002879, +0.003964, +0.005648]] - assert_array_almost_equal(expected, mmc.metric(), decimal=6) + assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(), + decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) expected = [0, 0, 1.210220, 1.228596] - assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) - + assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), + decimal=6) + # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index fdea2949..09718c29 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,5 +1,10 @@ +import pytest import unittest import metric_learn +import numpy as np +from sklearn import clone +from sklearn.utils.testing import set_random_state +from test.test_utils import ids_metric_learners, metric_learners class TestStringRepr(unittest.TestCase): @@ -81,5 +86,82 @@ def test_mmc(self): num_labeled='deprecated', preprocessor=None, verbose=False) """.strip('\n')) + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_metric_is_independent_from_metric_learner(estimator, + build_dataset): + """Tests that the get_metric method returns a function that is independent + from the original metric learner""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + + # we fit the metric learner on it and then we compute the metric on some + # points + model.fit(input_data, labels) + metric = model.get_metric() + score = metric(X[0], X[1]) + + # then we refit the estimator on another dataset + model.fit(np.sin(input_data), labels) + + # we recompute the distance between the two points: it should be the same + score_bis = metric(X[0], X[1]) + assert score_bis == score + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_metric_raises_error(estimator, build_dataset): + """Tests that the metric returned by get_metric raises errors similar to + the distance functions in scipy.spatial.distance""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + metric = model.get_metric() + + list_test_get_metric_raises = [(X[0].tolist() + [5.2], X[1]), # vectors with + # different dimensions + (X[0:4], X[1:5]), # 2D vectors + (X[0].tolist() + [5.2], X[1] + [7.2])] + # vectors of same dimension but incompatible with what the metric learner + # was trained on + + for u, v in list_test_get_metric_raises: + with pytest.raises(ValueError): + metric(u, v) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_metric_works_does_not_raise(estimator, build_dataset): + """Tests that the metric returned by get_metric does not raise errors (or + warnings) similarly to the distance functions in scipy.spatial.distance""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + metric = model.get_metric() + + list_test_get_metric_doesnt_raise = [(X[0], X[1]), + (X[0].tolist(), X[1].tolist()), + (X[0][None], X[1][None])] + + for u, v in list_test_get_metric_doesnt_raise: + with pytest.warns(None) as record: + metric(u, v) + assert len(record) == 0 + + # Test that the scalar case works + model.transformer_ = np.array([3.1]) + metric = model.get_metric() + for u, v in [(5, 6.7), ([5], [6.7]), ([[5]], [[6.7]])]: + with pytest.warns(None) as record: + metric(u, v) + assert len(record) == 0 + + if __name__ == '__main__': unittest.main() diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 0d834f10..1e555e73 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -2,9 +2,10 @@ import pytest import numpy as np -from numpy.testing import assert_array_almost_equal -from scipy.spatial.distance import pdist, squareform +from numpy.testing import assert_array_almost_equal, assert_allclose +from scipy.spatial.distance import pdist, squareform, mahalanobis from sklearn import clone +from sklearn.cluster import DBSCAN from sklearn.utils import check_random_state from sklearn.utils.testing import set_random_state @@ -167,3 +168,118 @@ def test_embed_is_linear(estimator, build_dataset): model.transform(X[10:20])) assert_array_almost_equal(model.transform(5 * X[:10]), 5 * model.transform(X[:10])) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, + build_dataset): + """Tests that using the get_metric method of mahalanobis metric learners is + equivalent to explicitely calling scipy's mahalanobis metric + """ + rng = np.random.RandomState(42) + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + metric = model.get_metric() + n_features = X.shape[1] + a, b = (rng.randn(n_features), rng.randn(n_features)) + expected_dist = mahalanobis(a[None], b[None], + VI=model.get_mahalanobis_matrix()) + assert_allclose(metric(a, b), expected_dist, rtol=1e-15) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_metric_is_pseudo_metric(estimator, build_dataset): + """Tests that the get_metric method of mahalanobis metric learners returns a + pseudo-metric (metric but without one side of the equivalence of + the identity of indiscernables property) + """ + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + metric = model.get_metric() + + n_features = X.shape[1] + for seed in range(10): + rng = np.random.RandomState(seed) + a, b, c = (rng.randn(n_features) for _ in range(3)) + assert metric(a, b) >= 0 # positivity + assert metric(a, b) == metric(b, a) # symmetry + # one side of identity indiscernables: x == y => d(x, y) == 0. The other + # side of the equivalence is not always true for Mahalanobis distances. + assert metric(a, a) == 0 + # triangular inequality + assert (metric(a, c) < metric(a, b) + metric(b, c) or + np.isclose(metric(a, c), metric(a, b) + metric(b, c), rtol=1e-20)) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_metric_raises_deprecation_warning(estimator, build_dataset): + """assert that a deprecation warning is raised if someones wants to call + the `metric` function""" + # TODO: remove this method in version 0.6.0 + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + + with pytest.warns(DeprecationWarning) as raised_warning: + model.metric() + assert (str(raised_warning[0].message) == + ("`metric` is deprecated since version 0.5.0 and will be removed " + "in 0.6.0. Use `get_mahalanobis_matrix` instead.")) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): + """Check that the metric returned by get_metric is compatible with + scikit-learn's algorithms using a custom metric, DBSCAN for instance""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + clustering = DBSCAN(metric=model.get_metric()) + clustering.fit(X) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_get_squared_metric(estimator, build_dataset): + """Test that the squared metric returned is indeed the square of the + metric""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + model.fit(input_data, labels) + metric = model.get_metric() + + n_features = X.shape[1] + for seed in range(10): + rng = np.random.RandomState(seed) + a, b = (rng.randn(n_features) for _ in range(2)) + assert_allclose(metric(a, b, squared=True), + metric(a, b, squared=False)**2, + rtol=1e-15) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_transformer_is_2D(estimator, build_dataset): + """Tests that the transformer of metric learners is 2D""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + # test that it works for X.shape[1] features + model.fit(input_data, labels) + assert model.transformer_.shape == (X.shape[1], X.shape[1]) + + # test that it works for 1 feature + trunc_data = input_data[..., :1] + model.fit(trunc_data, labels) + assert model.transformer_.shape == (1, 1) # the transformer must be 2D diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index ab38d65e..59986011 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -20,60 +20,60 @@ def test_cov(self): cov = Covariance() cov.fit(self.X) L = cov.transformer_ - assert_array_almost_equal(L.T.dot(L), cov.metric()) + assert_array_almost_equal(L.T.dot(L), cov.get_mahalanobis_matrix()) def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) lsml.fit(self.X, self.y, random_state=seed) L = lsml.transformer_ - assert_array_almost_equal(L.T.dot(L), lsml.metric()) + assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) def test_itml_supervised(self): seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) itml.fit(self.X, self.y, random_state=seed) L = itml.transformer_ - assert_array_almost_equal(L.T.dot(L), itml.metric()) + assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix()) def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.transformer_ - assert_array_almost_equal(L.T.dot(L), lmnn.metric()) + assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.X, self.y, random_state=seed) L = sdml.transformer_ - assert_array_almost_equal(L.T.dot(L), sdml.metric()) + assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) L = nca.transformer_ - assert_array_almost_equal(L.T.dot(L), nca.metric()) + assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix()) def test_lfda(self): lfda = LFDA(k=2, num_dims=2) lfda.fit(self.X, self.y) L = lfda.transformer_ - assert_array_almost_equal(L.T.dot(L), lfda.metric()) + assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) L = rca.transformer_ - assert_array_almost_equal(L.T.dot(L), rca.metric()) + assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) def test_mlkr(self): mlkr = MLKR(num_dims=2) mlkr.fit(self.X, self.y) L = mlkr.transformer_ - assert_array_almost_equal(L.T.dot(L), mlkr.metric()) + assert_array_almost_equal(L.T.dot(L), mlkr.get_mahalanobis_matrix()) if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index 39c718ac..5e640dbc 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,6 +1,7 @@ import pytest from collections import namedtuple import numpy as np +from numpy.testing import assert_array_equal, assert_equal from sklearn.model_selection import train_test_split from sklearn.exceptions import DataConversionWarning from sklearn.utils import check_random_state, shuffle @@ -8,7 +9,7 @@ from sklearn.base import clone from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, - check_collapsed_pairs) + check_collapsed_pairs, validate_vector) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -1010,3 +1011,32 @@ def test_check_collapsed_pairs_raises_error(): assert str(e.value) == ("2 collapsed pairs found (where the left element is " "the same as the right element), out of 3 pairs in" " total.") + +def test__validate_vector(): + """Replica of scipy.spatial.tests.test_distance.test__validate_vector""" + x = [1, 2, 3] + y = validate_vector(x) + assert_array_equal(y, x) + + y = validate_vector(x, dtype=np.float64) + assert_array_equal(y, x) + assert_equal(y.dtype, np.float64) + + x = [1] + y = validate_vector(x) + assert_equal(y.ndim, 1) + assert_equal(y, x) + + x = 1 + y = validate_vector(x) + assert_equal(y.ndim, 1) + assert_equal(y, [x]) + + x = np.arange(5).reshape(1, -1, 1) + y = validate_vector(x) + assert_equal(y.ndim, 1) + assert_array_equal(y, x[0, :, 0]) + + x = [[1, 2], [3, 4]] + with pytest.raises(ValueError): + validate_vector(x) From b336eba29773aa11be84efed559b2aae7c01087c Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 29 Jan 2019 15:19:07 +0100 Subject: [PATCH 086/210] [MRG] MAINT: remove variables not needed to store (#159) * MAINT: remove variables not needed to store * Address review https://github.com/metric-learn/metric-learn/pull/159#pullrequestreview-195570695 * DOC: add more precise docstring * DOC: make description clearer --- metric_learn/itml.py | 51 ++++++++++++++++++++++++++++++++------ metric_learn/lmnn.py | 23 ++++++++++-------- metric_learn/lsml.py | 58 ++++++++++++++++++++++++-------------------- metric_learn/mlkr.py | 3 +++ metric_learn/mmc.py | 6 +++++ metric_learn/nca.py | 3 +++ metric_learn/sdml.py | 10 ++++---- 7 files changed, 105 insertions(+), 49 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 158ec4d3..4316802c 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -73,9 +73,9 @@ def _fit(self, pairs, y, bounds=None): self.bounds_[self.bounds_==0] = 1e-9 # init metric if self.A0 is None: - self.A_ = np.identity(pairs.shape[2]) + A = np.identity(pairs.shape[2]) else: - self.A_ = check_array(self.A0) + A = check_array(self.A0, copy=True) gamma = self.gamma pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] num_pos = len(pos_pairs) @@ -87,7 +87,6 @@ def _fit(self, pairs, y, bounds=None): neg_bhat = np.zeros(num_neg) + self.bounds_[1] pos_vv = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] neg_vv = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] - A = self.A_ for it in xrange(self.max_iter): # update positives @@ -125,7 +124,7 @@ def _fit(self, pairs, y, bounds=None): print('itml converged at iter: %d, conv = %f' % (it, conv)) self.n_iter_ = it - self.transformer_ = transformer_from_metric(self.A_) + self.transformer_ = transformer_from_metric(A) return self @@ -134,6 +133,18 @@ class ITML(_BaseITML, _PairsClassifierMixin): Attributes ---------- + bounds_ : array-like, shape=(2,) + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. If + not provided at initialization, bounds_[0] and bounds_[1] are set at + train time to the 5th and 95th percentile of the pairwise distances among + all points present in the input `pairs`. + + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -151,8 +162,14 @@ def fit(self, pairs, y, bounds=None): preprocessor. y: array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. - bounds : list (pos,neg) pairs, optional - bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg + bounds : `list` of two numbers + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points present in the input `pairs`. Returns ------- @@ -167,6 +184,18 @@ class ITML_Supervised(_BaseITML, TransformerMixin): Attributes ---------- + bounds_ : array-like, shape=(2,) + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] are set at + train time to the 5th and 95th percentile of the pairwise distances + among all points in the training data `X`. + + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -193,8 +222,14 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, be removed in 0.6.0. num_constraints: int, optional number of constraints to generate - bounds : list (pos,neg) pairs, optional - bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg + bounds : `list` of two numbers + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points in the training data `X`. A0 : (d x d) matrix, optional initial regularization matrix, defaults to identity verbose : bool, optional diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 1d7ddf2a..f9cd0e91 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -60,20 +60,20 @@ def fit(self, X, y): X, y = self._prepare_inputs(X, y, dtype=float, ensure_min_samples=2) num_pts, num_dims = X.shape - unique_labels, self.label_inds_ = np.unique(y, return_inverse=True) - if len(self.label_inds_) != num_pts: + unique_labels, label_inds = np.unique(y, return_inverse=True) + if len(label_inds) != num_pts: raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) if self.use_pca: warnings.warn('use_pca does nothing for the python_LMNN implementation') self.transformer_ = np.eye(num_dims) - required_k = np.bincount(self.label_inds_).min() + required_k = np.bincount(label_inds).min() if self.k > required_k: raise ValueError('not enough class labels for specified k' ' (smallest class has %d)' % required_k) - target_neighbors = self._select_targets(X) - impostors = self._find_impostors(target_neighbors[:, -1], X) + target_neighbors = self._select_targets(X, label_inds) + impostors = self._find_impostors(target_neighbors[:, -1], X, label_inds) if len(impostors) == 0: # L has already been initialized to an identity matrix return @@ -196,23 +196,23 @@ def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, objective += G.flatten().dot(L.T.dot(L).flatten()) return G, objective, total_active, df, a1, a2 - def _select_targets(self, X): + def _select_targets(self, X, label_inds): target_neighbors = np.empty((X.shape[0], self.k), dtype=int) for label in self.labels_: - inds, = np.nonzero(self.label_inds_ == label) + inds, = np.nonzero(label_inds == label) dd = euclidean_distances(X[inds], squared=True) np.fill_diagonal(dd, np.inf) nn = np.argsort(dd)[..., :self.k] target_neighbors[inds] = inds[nn] return target_neighbors - def _find_impostors(self, furthest_neighbors, X): + def _find_impostors(self, furthest_neighbors, X, label_inds): Lx = self.transform(X) margin_radii = 1 + _inplace_paired_L2(Lx[furthest_neighbors], Lx) impostors = [] for label in self.labels_[:-1]: - in_inds, = np.nonzero(self.label_inds_ == label) - out_inds, = np.nonzero(self.label_inds_ > label) + in_inds, = np.nonzero(label_inds == label) + out_inds, = np.nonzero(label_inds > label) dist = euclidean_distances(Lx[out_inds], Lx[in_inds], squared=True) i1,j1 = np.nonzero(dist < margin_radii[out_inds][:,None]) i2,j2 = np.nonzero(dist < margin_radii[in_inds]) @@ -265,6 +265,9 @@ class LMNN(_base_LMNN): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The learned linear transformation ``L``. """ diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 50fcfa3e..312990ab 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -50,32 +50,32 @@ def _fit(self, quadruplets, y=None, weights=None): type_of_inputs='tuples') # check to make sure that no two constrained vectors are identical - self.vab_ = quadruplets[:, 0, :] - quadruplets[:, 1, :] - self.vcd_ = quadruplets[:, 2, :] - quadruplets[:, 3, :] - if self.vab_.shape != self.vcd_.shape: + vab = quadruplets[:, 0, :] - quadruplets[:, 1, :] + vcd = quadruplets[:, 2, :] - quadruplets[:, 3, :] + if vab.shape != vcd.shape: raise ValueError('Constraints must have same length') if weights is None: - self.w_ = np.ones(self.vab_.shape[0]) + self.w_ = np.ones(vab.shape[0]) else: self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 if self.prior is None: X = np.vstack({tuple(row) for row in quadruplets.reshape(-1, quadruplets.shape[2])}) - self.prior_inv_ = np.atleast_2d(np.cov(X, rowvar=False)) - self.M_ = np.linalg.inv(self.prior_inv_) + prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) + M = np.linalg.inv(prior_inv) else: - self.M_ = self.prior - self.prior_inv_ = np.linalg.inv(self.prior) + M = self.prior + prior_inv = np.linalg.inv(self.prior) step_sizes = np.logspace(-10, 0, 10) # Keep track of the best step size and the loss at that step. l_best = 0 - s_best = self._total_loss(self.M_) + s_best = self._total_loss(M, vab, vcd, prior_inv) if self.verbose: print('initial loss', s_best) for it in xrange(1, self.max_iter+1): - grad = self._gradient(self.M_) + grad = self._gradient(M, vab, vcd, prior_inv) grad_norm = scipy.linalg.norm(grad) if grad_norm < self.tol: break @@ -84,10 +84,10 @@ def _fit(self, quadruplets, y=None, weights=None): M_best = None for step_size in step_sizes: step_size /= grad_norm - new_metric = self.M_ - step_size * grad + new_metric = M - step_size * grad w, v = scipy.linalg.eigh(new_metric) new_metric = v.dot((np.maximum(w, 1e-8) * v).T) - cur_s = self._total_loss(new_metric) + cur_s = self._total_loss(new_metric, vab, vcd, prior_inv) if cur_s < s_best: l_best = step_size s_best = cur_s @@ -96,36 +96,36 @@ def _fit(self, quadruplets, y=None, weights=None): print('iter', it, 'cost', s_best, 'best step', l_best * grad_norm) if M_best is None: break - self.M_ = M_best + M = M_best else: if self.verbose: print("Didn't converge after", it, "iterations. Final loss:", s_best) self.n_iter_ = it - self.transformer_ = transformer_from_metric(self.M_) + self.transformer_ = transformer_from_metric(M) return self - def _comparison_loss(self, metric): - dab = np.sum(self.vab_.dot(metric) * self.vab_, axis=1) - dcd = np.sum(self.vcd_.dot(metric) * self.vcd_, axis=1) + def _comparison_loss(self, metric, vab, vcd): + dab = np.sum(vab.dot(metric) * vab, axis=1) + dcd = np.sum(vcd.dot(metric) * vcd, axis=1) violations = dab > dcd return self.w_[violations].dot((np.sqrt(dab[violations]) - np.sqrt(dcd[violations]))**2) - def _total_loss(self, metric): + def _total_loss(self, metric, vab, vcd, prior_inv): # Regularization loss sign, logdet = np.linalg.slogdet(metric) - reg_loss = np.sum(metric * self.prior_inv_) - sign * logdet - return self._comparison_loss(metric) + reg_loss + reg_loss = np.sum(metric * prior_inv) - sign * logdet + return self._comparison_loss(metric, vab, vcd) + reg_loss - def _gradient(self, metric): - dMetric = self.prior_inv_ - np.linalg.inv(metric) - dabs = np.sum(self.vab_.dot(metric) * self.vab_, axis=1) - dcds = np.sum(self.vcd_.dot(metric) * self.vcd_, axis=1) + def _gradient(self, metric, vab, vcd, prior_inv): + dMetric = prior_inv - np.linalg.inv(metric) + dabs = np.sum(vab.dot(metric) * vab, axis=1) + dcds = np.sum(vcd.dot(metric) * vcd, axis=1) violations = dabs > dcds # TODO: vectorize - for vab, dab, vcd, dcd in zip(self.vab_[violations], dabs[violations], - self.vcd_[violations], dcds[violations]): + for vab, dab, vcd, dcd in zip(vab[violations], dabs[violations], + vcd[violations], dcds[violations]): dMetric += ((1-np.sqrt(dcd/dab))*np.outer(vab, vab) + (1-np.sqrt(dab/dcd))*np.outer(vcd, vcd)) return dMetric @@ -136,6 +136,9 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -169,6 +172,9 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 6b79638e..74a21a82 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -30,6 +30,9 @@ class MLKR(MahalanobisMixin, TransformerMixin): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The learned linear transformation ``L``. """ diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index b806a97e..f9d3690b 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -353,6 +353,9 @@ class MMC(_BaseMMC, _PairsClassifierMixin): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -384,6 +387,9 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 81045287..5abe52e3 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -24,6 +24,9 @@ class NCA(MahalanobisMixin, TransformerMixin): Attributes ---------- + n_iter_ : `int` + The number of iterations the solver has run. + transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The learned linear transformation ``L``. """ diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index be45d3a3..78fc4ebc 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -58,18 +58,18 @@ def _fit(self, pairs, y): # set up prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - self.M_ = pinvh(np.atleast_2d(np.cov(X, rowvar = False))) + M = pinvh(np.atleast_2d(np.cov(X, rowvar = False))) else: - self.M_ = np.identity(pairs.shape[2]) + M = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) - P = self.M_ + self.balance_param * loss_matrix + P = M + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) - _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) + _, M = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) - self.transformer_ = transformer_from_metric(self.M_) + self.transformer_ = transformer_from_metric(M) return self From 297ad021d3cf33123b5eeac43b0c418f1a79630b Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 29 Jan 2019 17:32:59 +0100 Subject: [PATCH 087/210] [MRG] change bounds parameter of ITML_Supervised from init to fit (#163) * MAINT: remove variables not needed to store * Address review https://github.com/metric-learn/metric-learn/pull/159#pullrequestreview-195570695 * DOC: add more precise docstring * API: put parameter in fit, deprecate it in init, and also change previous deprecation tests names * Change remaining test names --- metric_learn/itml.py | 36 ++++++++++++++++++++++------------ test/metric_learn_test.py | 41 +++++++++++++++++++++++++++------------ test/test_base_metric.py | 6 +++--- 3 files changed, 56 insertions(+), 27 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 4316802c..a0ff05f9 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -202,8 +202,8 @@ class ITML_Supervised(_BaseITML, TransformerMixin): """ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - num_labeled='deprecated', num_constraints=None, bounds=None, - A0=None, verbose=False, preprocessor=None): + num_labeled='deprecated', num_constraints=None, + bounds='deprecated', A0=None, verbose=False, preprocessor=None): """Initialize the supervised version of `ITML`. `ITML_Supervised` creates pairs of similar sample by taking same class @@ -222,14 +222,11 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, be removed in 0.6.0. num_constraints: int, optional number of constraints to generate - bounds : `list` of two numbers - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] will be - set to the 5th and 95th percentile of the pairwise distances among all - points in the training data `X`. + bounds : Not used + .. deprecated:: 0.5.0 + `bounds` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Set `bounds` at fit time instead : + `itml_supervised.fit(X, y, bounds=...)` A0 : (d x d) matrix, optional initial regularization matrix, defaults to identity verbose : bool, optional @@ -245,7 +242,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, self.num_constraints = num_constraints self.bounds = bounds - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state=np.random, bounds=None): """Create constraints from labels and learn the ITML model. @@ -259,11 +256,26 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. + + bounds : `list` of two numbers + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points in the training data `X`. """ + # TODO: remove these in v0.6.0 if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0', DeprecationWarning) + if self.bounds != 'deprecated': + warnings.warn('"bounds" parameter from initialization is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use the "bounds" parameter of this ' + 'fit method instead.', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -274,4 +286,4 @@ def fit(self, X, y, random_state=np.random): pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) pairs, y = wrap_pairs(X, pos_neg) - return _BaseITML._fit(self, pairs, y, bounds=self.bounds) + return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e4ce8cef..e1eace90 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -57,9 +57,10 @@ def test_iris(self): csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible - def test_deprecation(self): - # test that the right deprecation message is thrown. - # TODO: remove in v.0.5 + def test_deprecation_num_labeled(self): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) lsml_supervised = LSML_Supervised(num_labeled=np.inf) @@ -77,9 +78,10 @@ def test_iris(self): csep = class_separation(itml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) - def test_deprecation(self): - # test that the right deprecation message is thrown. - # TODO: remove in v.0.5 + def test_deprecation_num_labeled(self): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) itml_supervised = ITML_Supervised(num_labeled=np.inf) @@ -88,6 +90,19 @@ def test_deprecation(self): 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) + def test_deprecation_bounds(self): + # test that a deprecation message is thrown if bounds is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised(bounds=None) + msg = ('"bounds" parameter from initialization is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use the "bounds" parameter of this ' + 'fit method instead.') + assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) + class TestLMNN(MetricTestCase): def test_iris(self): @@ -143,9 +158,10 @@ def test_iris(self): csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) - def test_deprecation(self): - # test that the right deprecation message is thrown. - # TODO: remove in v.0.5 + def test_deprecation_num_labeled(self): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) sdml_supervised = SDML_Supervised(num_labeled=np.inf) @@ -370,9 +386,10 @@ def test_iris(self): csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) - def test_deprecation(self): - # test that the right deprecation message is thrown. - # TODO: remove in v.0.5 + def test_deprecation_num_labeled(self): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) mmc_supervised = MMC_Supervised(num_labeled=np.inf) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 09718c29..6c9a6dc5 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -36,9 +36,9 @@ def test_itml(self): preprocessor=None, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ -ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, - max_iter=1000, num_constraints=None, num_labeled='deprecated', - preprocessor=None, verbose=False) +ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001, + gamma=1.0, max_iter=1000, num_constraints=None, + num_labeled='deprecated', preprocessor=None, verbose=False) """.strip('\n')) def test_lsml(self): From bf5c7224cc7ad4c025e15b247a80e076b7f75062 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Sat, 23 Feb 2019 13:57:29 -0500 Subject: [PATCH 088/210] Ignore .pytest_cache/ dir --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c532a6cb..449f70ea 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ dist/ .coverage htmlcov/ .cache/ +.pytest_cache/ doc/auto_examples/* From 0edd6136513f02cea4c6f18fefc7432566ae1b4e Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 12 Mar 2019 14:18:12 +0100 Subject: [PATCH 089/210] [MRG] Add test coverage (#181) * Add coverage config * Try to add codecov command * Try to add badge * Add badge and run pytest --cov * install pytest-cov * Install pytest 3.6 because pytest 4 raises a lot of errors * Other test for code coverage * Do coverage only in python3.6 * Install pytets-cov only for python 3.6 * Try another syntax for codecov from https://hackernoon.com/integrating-travis-ci-and-codecov-into-a-python-based-project-6f658074ff63 * Go back to previous syntax codecov and add right badge link * select test for tests * add codecov token * Change repo name * Update branch name * Add reference to scikit-learn --- .codecov.yml | 23 +++++++++++++++++++++++ .gitignore | 3 +++ .travis.yml | 12 +++++++++++- README.rst | 5 +++-- 4 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 .codecov.yml diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 00000000..d22b2821 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,23 @@ +# taken from scikit-learn: +# https://github.com/scikit-learn/scikit-learn/blob/a7e17117bb15eb3f51ebccc1bd53e42fcb4e6cd8/.codecov.yml +comment: false + +coverage: + status: + project: + default: + # Commits pushed to master should not make the overall + # project coverage decrease by more than 1%: + target: auto + threshold: 1% + patch: + default: + # Be tolerant on slight code coverage diff on PRs to limit + # noisy red coverage status on github PRs. + # Note The coverage stats are still uploaded + # to codecov so that PR reviewers can see uncovered lines + # in the github diff if they install the codecov browser + # extension: + # https://github.com/codecov/browser-extension + target: auto +threshold: 1% diff --git a/.gitignore b/.gitignore index 449f70ea..a51c1a82 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ htmlcov/ .cache/ .pytest_cache/ doc/auto_examples/* +coverage +.coverage +.coverage* diff --git a/.travis.yml b/.travis.yml index 5daa20b3..2b1ac188 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,15 @@ python: before_install: - pip install --upgrade pip - pip install wheel + - pip install codecov + - if [[ $TRAVIS_PYTHON_VERSION == "3.6" ]]; + then pip install pytest-cov pytest==3.6; + fi - pip install numpy scipy scikit-learn -script: pytest test +script: + - if [[ $TRAVIS_PYTHON_VERSION == "3.6" ]]; + then pytest test --cov; + else pytest test; + fi +after_success: + - codecov diff --git a/README.rst b/README.rst index 0db1f87a..7ce0f6dd 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -|Travis-CI Build Status| |License| |PyPI version| +|Travis-CI Build Status| |License| |PyPI version| |Code coverage| metric-learn ============= @@ -52,4 +52,5 @@ more complete. :target: http://badges.mit-license.org .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn - +.. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg + :target: https://codecov.io/gh/metric-learn/metric-learn?branch=master From a381eae25dac03bbea9163019cb0907ee2674c79 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 14 Mar 2019 11:18:19 +0100 Subject: [PATCH 090/210] Fix indent in .codecov.yml --- .codecov.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.codecov.yml b/.codecov.yml index d22b2821..421ca55c 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -20,4 +20,5 @@ coverage: # extension: # https://github.com/codecov/browser-extension target: auto -threshold: 1% + threshold: 1% + From 1d2f1e995468ded31e6713ff91769c363810aadd Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 14 Mar 2019 11:19:56 +0100 Subject: [PATCH 091/210] Fix indent in .codecov.yml (#182) --- .codecov.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.codecov.yml b/.codecov.yml index d22b2821..421ca55c 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -20,4 +20,5 @@ coverage: # extension: # https://github.com/codecov/browser-extension target: auto -threshold: 1% + threshold: 1% + From fc0ee466e21e6e54ea3589013ea343ee737bad3b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 14 Mar 2019 14:17:08 +0100 Subject: [PATCH 092/210] Allow comments --- .codecov.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codecov.yml b/.codecov.yml index 421ca55c..fb9982b0 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,6 +1,6 @@ # taken from scikit-learn: # https://github.com/scikit-learn/scikit-learn/blob/a7e17117bb15eb3f51ebccc1bd53e42fcb4e6cd8/.codecov.yml -comment: false +comment: true coverage: status: From 4e668d9ebed862987303de350ca62b70cbdb7ccb Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 14 Mar 2019 14:28:55 +0100 Subject: [PATCH 093/210] Add badges to documentation --- README.rst | 2 +- doc/index.rst | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 7ce0f6dd..c2a0a205 100644 --- a/README.rst +++ b/README.rst @@ -53,4 +53,4 @@ more complete. .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn .. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg - :target: https://codecov.io/gh/metric-learn/metric-learn?branch=master + :target: https://codecov.io/gh/metric-learn/metric-learn diff --git a/doc/index.rst b/doc/index.rst index ed3f6ccb..3e4d0ce3 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,6 +1,6 @@ metric-learn: Metric Learning in Python ======================================= -|License| |PyPI version| +|Travis-CI Build Status| |License| |PyPI version| |Code coverage| Metric-learn contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. The API @@ -34,7 +34,11 @@ Documentation outline :ref:`genindex` | :ref:`modindex` | :ref:`search` +.. |Travis-CI Build Status| image:: https://api.travis-ci.org/metric-learn/metric-learn.svg?branch=master + :target: https://travis-ci.org/metric-learn/metric-learn .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org +.. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg + :target: https://codecov.io/gh/metric-learn/metric-learn From 406552d1eaa90bfa0c7c13e3bb6e1925a07f8d89 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 14 Mar 2019 14:33:21 +0100 Subject: [PATCH 094/210] Use coverage syntax for uploading from https://docs.codecov.io/v4.3.0/docs/about-the-codecov-bash-uploader --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2b1ac188..971a6ab7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,4 +18,4 @@ script: else pytest test; fi after_success: - - codecov + - bash <(curl -s https://codecov.io/bash) From cd1861114889f59ed2a8d2290cae06c46b489601 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 14 Mar 2019 15:06:26 +0100 Subject: [PATCH 095/210] Launch pytest if version 3.4 not 3.6 --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 971a6ab7..1ee51c7b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,12 +8,12 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install codecov - - if [[ $TRAVIS_PYTHON_VERSION == "3.6" ]]; + - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; then pip install pytest-cov pytest==3.6; fi - pip install numpy scipy scikit-learn script: - - if [[ $TRAVIS_PYTHON_VERSION == "3.6" ]]; + - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; then pytest test --cov; else pytest test; fi From 0183d76c3761ac5244893aa8c94147942e2fc352 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 14 Mar 2019 15:51:14 +0100 Subject: [PATCH 096/210] Remove bot comments --- .codecov.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codecov.yml b/.codecov.yml index fb9982b0..421ca55c 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,6 +1,6 @@ # taken from scikit-learn: # https://github.com/scikit-learn/scikit-learn/blob/a7e17117bb15eb3f51ebccc1bd53e42fcb4e6cd8/.codecov.yml -comment: true +comment: false coverage: status: From 3d6f22989cb0144933ff8ff6e6568c073988cd16 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 14 Mar 2019 16:17:56 +0100 Subject: [PATCH 097/210] Add intersphinx (#184) --- doc/conf.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index f0faa2f8..a11f8bba 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import sys extensions = [ 'sphinx.ext.autodoc', @@ -8,7 +9,8 @@ 'sphinx.ext.mathjax', 'numpydoc', 'sphinx_gallery.gen_gallery', - 'sphinx.ext.doctest' + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx' ] templates_path = ['_templates'] @@ -39,3 +41,12 @@ # Option to hide doctests comments in the documentation (like # doctest: # +NORMALIZE_WHITESPACE for instance) trim_doctest_flags = True + +# intersphinx configuration +intersphinx_mapping = { + 'python': ('https://docs.python.org/{.major}'.format( + sys.version_info), None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), + 'scikit-learn': ('https://scikit-learn.org/stable/', None) +} From 3490349a3610168439daa491965ed01cef7fad12 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 21 Mar 2019 01:57:35 +0100 Subject: [PATCH 098/210] [MRG] Update pytest (#186) * TST: Remove useless fixture that made pytest 4 bug * CI: update travis to use lastest pytest version * TST: remove fixture that are not real fixtures --- .travis.yml | 4 +-- test/test_utils.py | 61 ++++++++++++++++++++-------------------------- 2 files changed, 28 insertions(+), 37 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1ee51c7b..cda5b00f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,11 +5,11 @@ python: - "2.7" - "3.4" before_install: - - pip install --upgrade pip + - pip install --upgrade pip pytest - pip install wheel - pip install codecov - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; - then pip install pytest-cov pytest==3.6; + then pip install pytest-cov; fi - pip install numpy scipy scikit-learn script: diff --git a/test/test_utils.py b/test/test_utils.py index 5e640dbc..9099e12d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -30,7 +30,6 @@ # and to_transform is some additional data that we would want to transform -@pytest.fixture def build_classification(with_preprocessor=False): """Basic array for testing when using a preprocessor""" X, y = shuffle(*make_blobs(random_state=SEED), @@ -42,7 +41,6 @@ def build_classification(with_preprocessor=False): return Dataset(X[indices], y[indices], None, X[indices]) -@pytest.fixture def build_regression(with_preprocessor=False): """Basic array for testing when using a preprocessor""" X, y = shuffle(*make_regression(n_samples=100, n_features=5, @@ -162,7 +160,6 @@ def test_check_input_invalid_type_of_inputs(type_of_inputs): # ---------------- test check_input with 'tuples' type_of_input' ------------ -@pytest.fixture def tuples_prep(): """Basic array for testing when using a preprocessor""" tuples = np.array([[1, 2], @@ -170,7 +167,6 @@ def tuples_prep(): return tuples -@pytest.fixture def tuples_no_prep(): """Basic array for testing when using no preprocessor""" tuples = np.array([[[1., 2.3], [2.3, 5.3]], @@ -252,15 +248,15 @@ def test_check_tuples_invalid_shape(estimator, context, tuples, found, @pytest.mark.parametrize('estimator, context', [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) -def test_check_tuples_invalid_n_features(estimator, context, tuples_no_prep): +def test_check_tuples_invalid_n_features(estimator, context): """Checks that the right warning is printed if not enough features Here we only test if no preprocessor (otherwise we don't ensure this) """ msg = ("Found array with 2 feature(s) (shape={}) while" - " a minimum of 3 is required{}.".format(tuples_no_prep.shape, + " a minimum of 3 is required{}.".format(tuples_no_prep().shape, context)) with pytest.raises(ValueError) as raised_error: - check_input(tuples_no_prep, type_of_inputs='tuples', + check_input(tuples_no_prep(), type_of_inputs='tuples', preprocessor=None, ensure_min_features=3, estimator=estimator) assert str(raised_error.value) == msg @@ -317,8 +313,7 @@ def preprocessor(indices): # assert str(raised_warning[0].message) == msg -def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor( - tuples_prep): +def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor(): """Checks that a value error is thrown if attempting to convert an input not convertible to float, when using a preprocessor """ @@ -328,31 +323,30 @@ def preprocessor(indices): return np.full((indices.shape[0], 3), 'a') with pytest.raises(ValueError): - check_input(tuples_prep, type_of_inputs='tuples', + check_input(tuples_prep(), type_of_inputs='tuples', preprocessor=preprocessor, dtype=np.float64) -def test_check_tuples_invalid_dtype_not_convertible_without_preprocessor( - tuples_no_prep): +def test_check_tuples_invalid_dtype_not_convertible_without_preprocessor(): """Checks that a value error is thrown if attempting to convert an input not convertible to float, when using no preprocessor """ - tuples = np.full_like(tuples_no_prep, 'a', dtype=object) + tuples = np.full_like(tuples_no_prep(), 'a', dtype=object) with pytest.raises(ValueError): check_input(tuples, type_of_inputs='tuples', preprocessor=None, dtype=np.float64) @pytest.mark.parametrize('tuple_size', [2, None]) -def test_check_tuples_valid_tuple_size(tuple_size, tuples_prep, tuples_no_prep): +def test_check_tuples_valid_tuple_size(tuple_size): """For inputs that have the right matrix dimension (2D or 3D for instance), checks that checking the number of tuples (pairs, quadruplets, etc) raises no warning if there is the right number of points in a tuple. """ with pytest.warns(None) as record: - check_input(tuples_prep, type_of_inputs='tuples', + check_input(tuples_prep(), type_of_inputs='tuples', preprocessor=mock_preprocessor, tuple_size=tuple_size) - check_input(tuples_no_prep, type_of_inputs='tuples', preprocessor=None, + check_input(tuples_no_prep(), type_of_inputs='tuples', preprocessor=None, tuple_size=tuple_size) assert len(record) == 0 @@ -400,7 +394,7 @@ def test_check_tuples_valid_without_preprocessor(tuples): assert len(record) == 0 -def test_check_tuples_behaviour_auto_dtype(tuples_no_prep): +def test_check_tuples_behaviour_auto_dtype(): """Checks that check_tuples allows by default every type if using a preprocessor, and numeric types if using no preprocessor""" tuples_prep = [['img1.png', 'img2.png'], ['img3.png', 'img5.png']] @@ -410,15 +404,15 @@ def test_check_tuples_behaviour_auto_dtype(tuples_no_prep): assert len(record) == 0 with pytest.warns(None) as record: - check_input(tuples_no_prep, type_of_inputs='tuples') # numeric type + check_input(tuples_no_prep(), type_of_inputs='tuples') # numeric type assert len(record) == 0 # not numeric type - tuples_no_prep = np.array([[['img1.png'], ['img2.png']], - [['img3.png'], ['img5.png']]]) - tuples_no_prep = tuples_no_prep.astype(object) + tuples_no_prep_bis = np.array([[['img1.png'], ['img2.png']], + [['img3.png'], ['img5.png']]]) + tuples_no_prep_bis = tuples_no_prep_bis.astype(object) with pytest.raises(ValueError): - check_input(tuples_no_prep, type_of_inputs='tuples') + check_input(tuples_no_prep_bis, type_of_inputs='tuples') def test_check_tuples_invalid_complex_data(): @@ -436,14 +430,12 @@ def test_check_tuples_invalid_complex_data(): # ------------- test check_input with 'classic' type_of_inputs ---------------- -@pytest.fixture def points_prep(): """Basic array for testing when using a preprocessor""" points = np.array([1, 2]) return points -@pytest.fixture def points_no_prep(): """Basic array for testing when using no preprocessor""" points = np.array([[1., 2.3], @@ -484,17 +476,16 @@ def test_check_classic_invalid_shape(estimator, context, points, found, @pytest.mark.parametrize('estimator, context', [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) -def test_check_classic_invalid_n_features(estimator, context, - points_no_prep): +def test_check_classic_invalid_n_features(estimator, context): """Checks that the right warning is printed if not enough features Here we only test if no preprocessor (otherwise we don't ensure this) """ msg = ("Found array with 2 feature(s) (shape={}) while" - " a minimum of 3 is required{}.".format(points_no_prep.shape, + " a minimum of 3 is required{}.".format(points_no_prep().shape, context)) with pytest.raises(ValueError) as raised_error: - check_input(points_no_prep, type_of_inputs='classic', preprocessor=None, - ensure_min_features=3, + check_input(points_no_prep(), type_of_inputs='classic', + preprocessor=None, ensure_min_features=3, estimator=estimator) assert str(raised_error.value) == msg @@ -610,7 +601,7 @@ def test_check_classic_by_default(): check_input([[2, 3], [3, 2]], type_of_inputs='classic')).all() -def test_check_classic_behaviour_auto_dtype(points_no_prep): +def test_check_classic_behaviour_auto_dtype(): """Checks that check_input (for points) allows by default every type if using a preprocessor, and numeric types if using no preprocessor""" points_prep = ['img1.png', 'img2.png', 'img3.png', 'img5.png'] @@ -620,15 +611,15 @@ def test_check_classic_behaviour_auto_dtype(points_no_prep): assert len(record) == 0 with pytest.warns(None) as record: - check_input(points_no_prep, type_of_inputs='classic') # numeric type + check_input(points_no_prep(), type_of_inputs='classic') # numeric type assert len(record) == 0 # not numeric type - points_no_prep = np.array(['img1.png', 'img2.png', 'img3.png', - 'img5.png']) - points_no_prep = points_no_prep.astype(object) + points_no_prep_bis = np.array(['img1.png', 'img2.png', 'img3.png', + 'img5.png']) + points_no_prep_bis = points_no_prep_bis.astype(object) with pytest.raises(ValueError): - check_input(points_no_prep, type_of_inputs='classic') + check_input(points_no_prep_bis, type_of_inputs='classic') def test_check_classic_invalid_complex_data(): From 4e37d7c5de4bbbd762856e13e745b9ac667141b3 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 15:46:08 +0100 Subject: [PATCH 099/210] Ignore test folder in test coverage --- .codecov.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.codecov.yml b/.codecov.yml index 421ca55c..f01db0a4 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,3 +1,6 @@ +ignore: + - "test" + # taken from scikit-learn: # https://github.com/scikit-learn/scikit-learn/blob/a7e17117bb15eb3f51ebccc1bd53e42fcb4e6cd8/.codecov.yml comment: false From e8c74d0598a12f08f8575832de06ea184de7c1d5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 22 Mar 2019 10:49:45 +0100 Subject: [PATCH 100/210] [MRG] FIX: sdml formulation and solvers (#162) * FIX: make proposal for sdml formulation * MAINT clearer formulation to make the prior appear * MAINT call the prior prior * Use skggm instead of graphical lasso * Be more severe for the class separation * Put back verbose param * MAINT: make more explicit the fact that to use identity (i.e. an SPD matrix) as initialization * Add skggm as a requirement for SDML * Add skggm to required packages for travis * Also add cython as a dependency * FIX: install all except skggm and then skggm * Remove cython dependency * Install skggm only if we have at least python 3.6 * Should work if we want other versions superior to 3.6 * Fix bash >= which should be written -ge * Deal with tests when skggm is not installed and fix some PEP8 warnings * replace manual calls of algorithms with tuples_learners * Remove another call of SDML if skggm is not installed * FIX fix the test_error_message_tuple_size * FIX fix test_sdml_supervised * FIX: fix another sdml test * FIX quic call for python 2.7 * Fix quic import * Add Sigma0 initalization (both sigma zero and theta zero should be specified otherwise an error is returned * Deal with SDML making some tests fail * Remove epsilon that was unnecessary * FIX: use latest commit of skggm that fixes the non deterministic problem * MAINT: add message for SDML when not SPD * MAINT: add test for error message if skggm not installed * Try other syntax for installing the right commit of skggm * MAINT: make sklearn compat sdml test be run only if skggm is installed * Try another syntax for running travis * Better bash syntax * Fix tests by removing duplicates * FIX: fix for sdml by reducing balance parameter * FIX: update code to work with old version of numpy that does not have axis for unique * Remove the need for skggm * Update travis not to use skggm * Add a stable init for sklearn checks * FIX test_sdml_supervised * Revert "Update travis not to use skggm" This reverts commit 57b0567e7abbdd1a87560b4f81cad895b61a995f. * Add fallback on skggm * FIX: fix versions comparison and tests * MAINT: improve test of no warning * FIX: fix wrap pairs that was returning column y (we need line y), and fix the example for SDML to not raise another warning * FIX: force travis to do the right check * TST: add non SPD test that works with skggm's quic but not sklearn's graphical_lasso * Try again travis this time installing cython * Try to make travis work with build_essential * Try with installing liblapack * TST: fix tests for when skggm is not installed * TST: use better pytest skipif syntax * FIX: fix broken link in README.md * use rst syntax for link * use rst syntax for link * use rst syntax for link * MAINT: remove test_sdml that was remaining from drafts tests * TST: remove skipping SDML in test_cross_validation_manual_vs_scikit * FIX link also in getting started * Put back right indent * Remove unnecessary changes * Nitpick for concatenation and refactor HAS_SKGGM * ENH: Deal better with errors and skggm/scikit-learn * Better creation of prior * Simplification for init of sdml * Put skggm as optional * Specify skggm version * TST: make test about 1 feature arrays more readable * DOC: fix rst formatting * DOC: reformulated skggm optional dependency * TST: give an example for sdml_supervised with skggm where it indeed fails * TST: fix test that fails weirdly when executing the whole test file and not just the test * Revert "TST: fix test that fails weirdly when executing the whole test file and not just the test" This reverts commit 6f5666b587219895e3e0c516a8796bd0cfe1979e. * Add coverage for all versions of python * Install pytest-cov for all versions --- .travis.yml | 19 +- README.rst | 7 +- doc/getting_started.rst | 7 +- metric_learn/constraints.py | 2 +- metric_learn/sdml.py | 81 +++++-- setup.py | 1 + test/metric_learn_test.py | 234 ++++++++++++++++++++- test/test_fit_transform.py | 10 +- test/test_mahalanobis_mixin.py | 18 +- test/test_sklearn_compat.py | 15 +- test/test_transformer_metric_conversion.py | 3 +- test/test_utils.py | 41 ++-- 12 files changed, 375 insertions(+), 63 deletions(-) diff --git a/.travis.yml b/.travis.yml index cda5b00f..f5527089 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,18 +4,19 @@ cache: pip python: - "2.7" - "3.4" + - "3.6" before_install: + - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - - pip install wheel - - pip install codecov - - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; - then pip install pytest-cov; + - pip install wheel cython numpy scipy scikit-learn codecov pytest-cov + - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || + ($TRAVIS_PYTHON_VERSION == "2.7")]]; then + pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; fi - - pip install numpy scipy scikit-learn script: - - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; - then pytest test --cov; - else pytest test; - fi + # we do coverage for all versions so that codecov will merge them: this + # way we will see that both paths (with or without skggm) are tested + - pytest test --cov; after_success: - bash <(curl -s https://codecov.io/bash) + diff --git a/README.rst b/README.rst index c2a0a205..e1bfca51 100644 --- a/README.rst +++ b/README.rst @@ -21,7 +21,12 @@ Metric Learning algorithms in Python. - Python 2.7+, 3.4+ - numpy, scipy, scikit-learn -- (for running the examples only: matplotlib) + +**Optional dependencies** + +- For SDML, using skggm will allow the algorithm to solve problematic cases + (install from commit `a0ed406 `_). +- For running the examples only: matplotlib **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 040adedc..2d2df25e 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -16,7 +16,12 @@ Alternately, download the source repository and run: - Python 2.7+, 3.4+ - numpy, scipy, scikit-learn -- (for running the examples only: matplotlib) + +**Optional dependencies** + +- For SDML, using skggm will allow the algorithm to solve problematic cases + (install from commit `a0ed406 `_). +- For running the examples only: matplotlib **Notes** diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index c4ddcae8..e591830b 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -96,6 +96,6 @@ def wrap_pairs(X, constraints): c = np.array(constraints[2]) d = np.array(constraints[3]) constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d)))) - y = np.vstack([np.ones((len(a), 1)), - np.ones((len(c), 1))]) + y = np.concatenate([np.ones_like(a), -np.ones_like(c)]) pairs = X[constraints] return pairs, y diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 78fc4ebc..590fbfb2 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -12,12 +12,19 @@ import warnings import numpy as np from sklearn.base import TransformerMixin -from sklearn.covariance import graph_lasso -from sklearn.utils.extmath import pinvh +from scipy.linalg import pinvh +from sklearn.covariance import graphical_lasso +from sklearn.exceptions import ConvergenceWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs from ._util import transformer_from_metric +try: + from inverse_covariance import quic +except ImportError: + HAS_SKGGM = False +else: + HAS_SKGGM = True class _BaseSDML(MahalanobisMixin): @@ -52,24 +59,74 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, super(_BaseSDML, self).__init__(preprocessor) def _fit(self, pairs, y): + if not HAS_SKGGM: + if self.verbose: + print("SDML will use scikit-learn's graphical lasso solver.") + else: + if self.verbose: + print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') - # set up prior M + # set up (the inverse of) the prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - M = pinvh(np.atleast_2d(np.cov(X, rowvar = False))) + prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) else: - M = np.identity(pairs.shape[2]) + prior_inv = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) - P = M + self.balance_param * loss_matrix - emp_cov = pinvh(P) - # hack: ensure positive semidefinite - emp_cov = emp_cov.T.dot(emp_cov) - _, M = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) - - self.transformer_ = transformer_from_metric(M) + emp_cov = prior_inv + self.balance_param * loss_matrix + + # our initialization will be the matrix with emp_cov's eigenvalues, + # with a constant added so that they are all positive (plus an epsilon + # to ensure definiteness). This is empirical. + w, V = np.linalg.eigh(emp_cov) + min_eigval = np.min(w) + if min_eigval < 0.: + warnings.warn("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.", + ConvergenceWarning) + w -= min_eigval # we translate the eigenvalues to make them all positive + w += 1e-10 # we add a small offset to avoid definiteness problems + sigma0 = (V * w).dot(V.T) + try: + if HAS_SKGGM: + theta0 = pinvh(sigma0) + M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, + msg=self.verbose, + Theta0=theta0, Sigma0=sigma0) + else: + _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, + verbose=self.verbose, + cov_init=sigma0) + raised_error = None + w_mahalanobis, _ = np.linalg.eigh(M) + not_spd = any(w_mahalanobis < 0.) + not_finite = not np.isfinite(M).all() + except Exception as e: + raised_error = e + not_spd = False # not_spd not applicable here so we set to False + not_finite = False # not_finite not applicable here so we set to False + if raised_error is not None or not_spd or not_finite: + msg = ("There was a problem in SDML when using {}'s graphical " + "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn") + if not HAS_SKGGM: + skggm_advice = (" skggm's graphical lasso can sometimes converge " + "on non SPD cases where scikit-learn's graphical " + "lasso fails to converge. Try to install skggm and " + "rerun the algorithm (see the README.md for the " + "right version of skggm).") + msg += skggm_advice + if raised_error is not None: + msg += " The following error message was thrown: {}.".format( + raised_error) + raise RuntimeError(msg) + + self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/setup.py b/setup.py index 168fbcb6..dfb20fc0 100755 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], demo=['matplotlib'], + sdml=['skggm>=0.2.9'] ), test_suite='test', keywords=[ diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e1eace90..ae9a8657 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -10,10 +10,15 @@ from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y - -from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) +try: + from inverse_covariance import quic +except ImportError: + HAS_SKGGM = False +else: + HAS_SKGGM = True +from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, + LSML_Supervised, ITML_Supervised, SDML_Supervised, + RCA_Supervised, MMC_Supervised, SDML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -148,28 +153,237 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): + + @pytest.mark.skipif(HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "not installed.") + def test_sdml_supervised_raises_warning_msg_not_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML_Supervised but has not installed skggm, and that the algorithm + fails to converge""" + # TODO: remove if we don't need skggm anymore + # load_iris: dataset where we know scikit-learn's graphical lasso fails + # with a Floating Point error + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True, + sparsity_param=0.01) + msg = ("There was a problem in SDML when using scikit-learn's graphical " + "lasso solver. skggm's graphical lasso can sometimes converge on " + "non SPD cases where scikit-learn's graphical lasso fails to " + "converge. Try to install skggm and rerun the algorithm (see " + "the README.md for the right version of skggm). The following " + "error message was thrown:") + with pytest.raises(RuntimeError) as raised_error: + sdml_supervised.fit(X, y) + assert str(raised_error.value).startswith(msg) + + @pytest.mark.skipif(HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "not installed.") + def test_sdml_raises_warning_msg_not_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML but has not installed skggm, and that the algorithm fails to + converge""" + # TODO: remove if we don't need skggm anymore + # case on which we know that scikit-learn's graphical lasso fails + # because it will return a non SPD matrix + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(use_cov=False, balance_param=100, verbose=True) + + msg = ("There was a problem in SDML when using scikit-learn's graphical " + "lasso solver. skggm's graphical lasso can sometimes converge on " + "non SPD cases where scikit-learn's graphical lasso fails to " + "converge. Try to install skggm and rerun the algorithm (see " + "the README.md for the right version of skggm).") + with pytest.raises(RuntimeError) as raised_error: + sdml.fit(pairs, y_pairs) + assert msg == str(raised_error.value) + + @pytest.mark.skipif(not HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "installed.") + def test_sdml_raises_warning_msg_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML but has not installed skggm, and that the algorithm fails to + converge""" + # TODO: remove if we don't need skggm anymore + # case on which we know that skggm's graphical lasso fails + # because it will return non finite values + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(use_cov=False, balance_param=100, verbose=True) + + msg = ("There was a problem in SDML when using skggm's graphical " + "lasso solver.") + with pytest.raises(RuntimeError) as raised_error: + sdml.fit(pairs, y_pairs) + assert msg == str(raised_error.value) + + @pytest.mark.skipif(not HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "installed.") + def test_sdml_supervised_raises_warning_msg_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML_Supervised but has not installed skggm, and that the algorithm + fails to converge""" + # TODO: remove if we don't need skggm anymore + # case on which we know that skggm's graphical lasso fails + # because it will return non finite values + rng = np.random.RandomState(42) + # This example will create a diagonal em_cov with a negative coeff ( + # pathological case) + X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) + y = [0, 0, 1, 1] + sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=False, + sparsity_param=0.01) + msg = ("There was a problem in SDML when using skggm's graphical " + "lasso solver.") + with pytest.raises(RuntimeError) as raised_error: + sdml_supervised.fit(X, y, random_state=rng) + assert msg == str(raised_error.value) + + @pytest.mark.skipif(not HAS_SKGGM, + reason="It's only in the case where skggm is installed" + "that no warning should be thrown.") + def test_raises_no_warning_installed_skggm(self): + # otherwise we should be able to instantiate and fit SDML and it + # should raise no warning + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + X, y = make_classification(random_state=42) + with pytest.warns(None) as record: + sdml = SDML() + sdml.fit(pairs, y_pairs) + assert len(record) == 0 + with pytest.warns(None) as record: + sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) + sdml.fit(X, y) + assert len(record) == 0 + def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! rs = np.random.RandomState(5555) - sdml = SDML_Supervised(num_constraints=1500) + sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + balance_param=5e-5) sdml.fit(self.iris_points, self.iris_labels, random_state=rs) - csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.25) + csep = class_separation(sdml.transform(self.iris_points), + self.iris_labels) + self.assertLess(csep, 0.22) def test_deprecation_num_labeled(self): # test that a deprecation message is thrown if num_labeled is set at # initialization # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - sdml_supervised = SDML_Supervised(num_labeled=np.inf) + X, y = make_classification(random_state=42) + sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, + balance_param=5e-5) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + def test_sdml_raises_warning_non_psd(self): + """Tests that SDML raises a warning on a toy example where we know the + pseudo-covariance matrix is not PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + msg = ("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.") + with pytest.warns(ConvergenceWarning) as raised_warning: + try: + sdml.fit(pairs, y) + except Exception: + pass + # we assert that this warning is in one of the warning raised by the + # estimator + assert msg in list(map(lambda w: str(w.message), raised_warning)) + + def test_sdml_converges_if_psd(self): + """Tests that sdml converges on a simple problem where we know the + pseudo-covariance matrix is PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + sdml.fit(pairs, y) + assert np.isfinite(sdml.get_mahalanobis_matrix()).all() + + @pytest.mark.skipif(not HAS_SKGGM, + reason="sklearn's graphical_lasso can sometimes not " + "work on some non SPD problems. We test that " + "is works only if skggm is installed.") + def test_sdml_works_on_non_spd_pb_with_skggm(self): + """Test that SDML works on a certain non SPD problem on which we know + it should work, but scikit-learn's graphical_lasso does not work""" + X, y = load_iris(return_X_y=True) + sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, + use_cov=True) + sdml.fit(X, y) + + +@pytest.mark.skipif(not HAS_SKGGM, + reason='The message should be printed only if skggm is ' + 'installed.') +def test_verbose_has_installed_skggm_sdml(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML) + # TODO: remove if we don't need skggm anymore + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(verbose=True) + sdml.fit(pairs, y_pairs) + out, _ = capsys.readouterr() + assert "SDML will use skggm's graphical lasso solver." in out + + +@pytest.mark.skipif(not HAS_SKGGM, + reason='The message should be printed only if skggm is ' + 'installed.') +def test_verbose_has_installed_skggm_sdml_supervised(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML_Supervised) + # TODO: remove if we don't need skggm anymore + X, y = make_classification(random_state=42) + sdml = SDML_Supervised(verbose=True) + sdml.fit(X, y) + out, _ = capsys.readouterr() + assert "SDML will use skggm's graphical lasso solver." in out + + +@pytest.mark.skipif(HAS_SKGGM, + reason='The message should be printed only if skggm is ' + 'not installed.') +def test_verbose_has_not_installed_skggm_sdml(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML) + # TODO: remove if we don't need skggm anymore + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(verbose=True) + sdml.fit(pairs, y_pairs) + out, _ = capsys.readouterr() + assert "SDML will use scikit-learn's graphical lasso solver." in out + + +@pytest.mark.skipif(HAS_SKGGM, + reason='The message should be printed only if skggm is ' + 'not installed.') +def test_verbose_has_not_installed_skggm_sdml_supervised(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML_Supervised) + # TODO: remove if we don't need skggm anymore + X, y = make_classification(random_state=42) + sdml = SDML_Supervised(verbose=True, balance_param=1e-5, use_cov=False) + sdml.fit(X, y) + out, _ = capsys.readouterr() + assert "SDML will use scikit-learn's graphical lasso solver." in out + class TestNCA(MetricTestCase): def test_iris(self): diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 118f6b90..b85e9273 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -1,3 +1,4 @@ +import pytest import unittest import numpy as np from sklearn.datasets import load_iris @@ -5,7 +6,8 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, + MMC_Supervised) class TestFitTransform(unittest.TestCase): @@ -62,12 +64,14 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) + sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + use_cov=False) sdml.fit(self.X, self.y, random_state=seed) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) + sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + use_cov=False) res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 1e555e73..a0bf3b9d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -10,6 +10,8 @@ from sklearn.utils.testing import set_random_state from metric_learn._util import make_context +from metric_learn.base_metric import (_QuadrupletsClassifierMixin, + _PairsClassifierMixin) from test.test_utils import ids_metric_learners, metric_learners @@ -96,7 +98,7 @@ def check_is_distance_matrix(pairwise): assert np.array_equal(pairwise, pairwise.T) # symmetry assert (pairwise.diagonal() == 0).all() # identity # triangular inequality - tol = 1e-15 + tol = 1e-12 assert (pairwise <= pairwise[:, :, np.newaxis] + pairwise[:, np.newaxis, :] + tol).all() @@ -281,5 +283,19 @@ def test_transformer_is_2D(estimator, build_dataset): # test that it works for 1 feature trunc_data = input_data[..., :1] + # we drop duplicates that might have been formed, i.e. of the form + # aabc or abcc or aabb for quadruplets, and aa for pairs. + if isinstance(estimator, _QuadrupletsClassifierMixin): + for slice_idx in [slice(0, 2), slice(2, 4)]: + pairs = trunc_data[:, slice_idx, :] + diffs = pairs[:, 1, :] - pairs[:, 0, :] + to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) + trunc_data = trunc_data[to_keep] + labels = labels[to_keep] + elif isinstance(estimator, _PairsClassifierMixin): + diffs = trunc_data[:, 1, :] - trunc_data[:, 0, :] + to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) + trunc_data = trunc_data[to_keep] + labels = labels[to_keep] model.fit(trunc_data, labels) assert model.transformer_.shape == (1, 1) # the transformer must be 2D diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index d9dce685..f1248c9a 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -72,9 +72,18 @@ def test_itml(self): def test_mmc(self): check_estimator(dMMC) - # This fails due to a FloatingPointError - # def test_sdml(self): - # check_estimator(dSDML) + def test_sdml(self): + def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None): + # this init makes SDML stable for scikit-learn examples. + SDML_Supervised.__init__(self, sparsity_param=sparsity_param, + num_labeled=num_labeled, + num_constraints=num_constraints, + verbose=verbose, + preprocessor=preprocessor, + balance_param=1e-5, use_cov=False) + dSDML.__init__ = stable_init + check_estimator(dSDML) # This fails because the default num_chunks isn't data-dependent. # def test_rca(self): diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 59986011..6cfe8281 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -44,7 +44,8 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) + sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + balance_param=1e-5) sdml.fit(self.X, self.y, random_state=seed) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) diff --git a/test/test_utils.py b/test/test_utils.py index 9099e12d..f1df4098 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -102,26 +102,25 @@ def build_quadruplets(with_preprocessor=False): pairs_learners = [(ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster - (SDML(), build_pairs), - ] + (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, - [learner for (learner, _) in - pairs_learners])) - -classifiers = [(Covariance(), build_classification), - (LFDA(), build_classification), - (LMNN(), build_classification), - (NCA(), build_classification), - (RCA(), build_classification), - (ITML_Supervised(max_iter=5), build_classification), - (LSML_Supervised(), build_classification), - (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=10), build_classification), - (SDML_Supervised(), build_classification) - ] + [learner for (learner, _) in + pairs_learners])) + +classifiers = [(Covariance(), build_classification), + (LFDA(), build_classification), + (LMNN(), build_classification), + (NCA(), build_classification), + (RCA(), build_classification), + (ITML_Supervised(max_iter=5), build_classification), + (LSML_Supervised(), build_classification), + (MMC_Supervised(max_iter=5), build_classification), + (RCA_Supervised(num_chunks=10), build_classification), + (SDML_Supervised(use_cov=False, balance_param=1e-5), + build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, - [learner for (learner, _) in - classifiers])) + [learner for (learner, _) in + classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list(map(lambda x: x.__class__.__name__, @@ -830,9 +829,9 @@ class MockMetricLearner(MahalanobisMixin): "or a callable.".format(type(preprocessor))) -@pytest.mark.parametrize('estimator', [ITML(), LSML(), MMC(), SDML()], - ids=['ITML', 'LSML', 'MMC', 'SDML']) -def test_error_message_tuple_size(estimator): +@pytest.mark.parametrize('estimator, _', tuples_learners, + ids=ids_tuples_learners) +def test_error_message_tuple_size(estimator, _): """Tests that if a tuples learner is not given the good number of points per tuple, it throws an error message""" estimator = clone(estimator) From 121a247b37ab5e2464e491e01e3803904ca00ba0 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 26 Mar 2019 14:37:54 +0100 Subject: [PATCH 101/210] FIX: fix error message for SDML --- metric_learn/sdml.py | 2 +- test/metric_learn_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 590fbfb2..6fd29d38 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -88,7 +88,7 @@ def _fit(self, pairs, y): "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_covariance=False.", + "`balance_param` and/or to set use_cov=False.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index ae9a8657..a785d60d 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -295,7 +295,7 @@ def test_sdml_raises_warning_non_psd(self): "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_covariance=False.") + "`balance_param` and/or to set use_cov=False.") with pytest.warns(ConvergenceWarning) as raised_warning: try: sdml.fit(pairs, y) From b28933cfe076a0789fbb61cd0637d76e1143eb53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Tue, 26 Mar 2019 14:02:43 +0000 Subject: [PATCH 102/210] change balance param (#188) --- examples/plot_sandwich.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py index 0e7658d3..84e53d07 100644 --- a/examples/plot_sandwich.py +++ b/examples/plot_sandwich.py @@ -28,7 +28,7 @@ def sandwich_demo(): mls = [ LMNN(), ITML_Supervised(num_constraints=200), - SDML_Supervised(num_constraints=200), + SDML_Supervised(num_constraints=200, balance_param=0.001), LSML_Supervised(num_constraints=200), ] From edad55d731f9da14b1b931a6c884b8d3c3c77c67 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Mon, 15 Apr 2019 11:04:41 +0200 Subject: [PATCH 103/210] [MRG+1] Threshold for pairs learners (#168) * add some tests for testing that different scores work using the scoring function * ENH: Add tests and basic threshold implementation * Add support for LSML and more generally quadruplets * Make CalibratedClassifierCV work (for preprocessor case) thanks to classes_ * Fix some tests and PEP8 errors * change the sign in decision function * Add docstring for threshold_ and classes_ in the base _PairsClassifier class * remove quadruplets from the test with scikit learn custom scorings * Remove argument y in quadruplets learners and lsml * FIX fix docstrings of decision functions * FIX the threshold by taking the opposite (to be adapted to the decision function) * Fix tests to have no y for quadruplets' estimator fit * Remove isin to be compatible with old numpy versions * Fix threshold so that it has a positive value and add small test * Fix threshold for itml * FEAT: Add calibrate_threshold and tests * MAINT: remove starred syntax for compatibility with older versions of python * Remove debugging prints and make tests for ITML pass, while waiting for #175 to be solved * FIX: from __future__ import division to pass tests for python 2.7 * Add some documentation for calibration * DOC: fix style * Address most comments from aurelien's reviews * Remove classes_ attribute and test for CalibratedClassifierCV * Rename make_args_inc_quadruplets into remove_y_quadruplets * TST: Fix remaining threshold into min_rate * Remove default_threshold and put calibrate_threshold instead * Use calibrate_threshold for ITML, and remove description * ENH: use calibrate_threshold by default and display its parameters from the fit method * Add a small test to test automatic calibration * Update documentation of the default threshold * Inverse sense for threshold comparison to be more intuitive * Address remaining review comments * MAINT: Rename threshold_params into calibration_params * TST: Add test for extreme cases * MAINT: rename threshold_params into calibration_params * MAINT: rename threshold_params into calibration_params * FIX: Make tests work, and add the right threshold (mean between lowest accepted value and highest rejected value), and max + 1 or min - 1 for extreme points * Go back to previous version of finding the threshold * Extract method for validating calibration parameters * Validate calibration params before fit * Address https://github.com/metric-learn/metric-learn/pull/168#discussion_r268109180 --- doc/weakly_supervised.rst | 117 +++++-- metric_learn/base_metric.py | 235 ++++++++++++- metric_learn/itml.py | 21 +- metric_learn/lsml.py | 2 +- metric_learn/mmc.py | 26 +- metric_learn/sdml.py | 25 +- test/test_mahalanobis_mixin.py | 38 ++- test/test_pairs_classifiers.py | 491 +++++++++++++++++++++++++++ test/test_quadruplets_classifiers.py | 42 +++ test/test_sklearn_compat.py | 182 ++++++---- test/test_utils.py | 35 +- 11 files changed, 1066 insertions(+), 148 deletions(-) create mode 100644 test/test_pairs_classifiers.py create mode 100644 test/test_quadruplets_classifiers.py diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index deae9b40..6bf6f993 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -148,8 +148,47 @@ tuples you're working with (pairs, triplets...). See the docstring of the `score` method of the estimator you use. +Learning on pairs +================= + +Some metric learning algorithms learn on pairs of samples. In this case, one +should provide the algorithm with ``n_samples`` pairs of points, with a +corresponding target containing ``n_samples`` values being either +1 or -1. +These values indicate whether the given pairs are similar points or +dissimilar points. + + +.. _calibration: + +Thresholding +------------ +In order to predict whether a new pair represents similar or dissimilar +samples, we need to set a distance threshold, so that points closer (in the +learned space) than this threshold are predicted as similar, and points further +away are predicted as dissimilar. Several methods are possible for this +thresholding. + +- **At fit time**: The threshold is set with `calibrate_threshold` (see + below) on the trainset. You can specify the calibration parameters directly + in the `fit` method with the `threshold_params` parameter (see the + documentation of the `fit` method of any metric learner that learns on pairs + of points for more information). This method can cause a little bit of + overfitting. If you want to avoid that, calibrate the threshold after + fitting, on a validation set. + +- **Manual**: calling `set_threshold` will set the threshold to a + particular value. + +- **Calibration**: calling `calibrate_threshold` will calibrate the + threshold to achieve a particular score on a validation set, the score + being among the classical scores for classification (accuracy, f1 score...). + + +See also: `sklearn.calibration`. + + Algorithms -================== +========== ITML ---- @@ -192,39 +231,6 @@ programming. .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/ itml/ - -LSML ----- - -`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared -Residual - -.. topic:: Example Code: - -:: - - from metric_learn import LSML - - quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], - [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], - [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], - [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] - - # we want to make closer points where the first feature is close, and - # further if the second feature is close - - lsml = LSML() - lsml.fit(quadruplets) - -.. topic:: References: - - .. [1] Liu et al. - "Metric Learning from Relative Comparisons by Minimizing Squared - Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf - - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 - - SDML ---- @@ -343,3 +349,46 @@ method. However, it is one of the earliest and a still often cited technique. -with-side-information.pdf>`_ Xing, Jordan, Russell, Ng. .. [2] Adapted from Matlab code `here `_. + +Learning on quadruplets +======================= + +A type of information even weaker than pairs is information about relative +comparisons between pairs. The user should provide the algorithm with a +quadruplet of points, where the two first points are closer than the two +last points. No target vector (``y``) is needed, since the supervision is +already in the order that points are given in the quadruplet. + +Algorithms +========== + +LSML +---- + +`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared +Residual + +.. topic:: Example Code: + +:: + + from metric_learn import LSML + + quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], + [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], + [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] + + # we want to make closer points where the first feature is close, and + # further if the second feature is close + + lsml = LSML() + lsml.fit(quadruplets) + +.. topic:: References: + + .. [1] Liu et al. + "Metric Learning from Relative Comparisons by Minimizing Squared + Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439917 diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 58b8cc5d..9f127f58 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,8 +1,7 @@ -from numpy.linalg import cholesky -from scipy.spatial.distance import euclidean from sklearn.base import BaseEstimator -from sklearn.utils.validation import _is_arraylike -from sklearn.metrics import roc_auc_score +from sklearn.utils.extmath import stable_cumsum +from sklearn.utils.validation import _is_arraylike, check_is_fitted +from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve import numpy as np from abc import ABCMeta, abstractmethod import six @@ -138,6 +137,7 @@ def get_metric(self): use the metric learner's preprocessor, and works on concatenated arrays. """ + class MetricTransformer(six.with_metaclass(ABCMeta)): @abstractmethod @@ -295,6 +295,14 @@ def get_mahalanobis_matrix(self): class _PairsClassifierMixin(BaseMetricLearner): + """ + Attributes + ---------- + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. + """ _tuple_size = 2 # number of points in a tuple, 2 for pairs @@ -317,13 +325,17 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ - return self.decision_function(pairs) + check_is_fitted(self, ['threshold_', 'transformer_']) + return 2 * (- self.decision_function(pairs) <= self.threshold_) - 1 def decision_function(self, pairs): - """Returns the learned metric between input pairs. + """Returns the decision function used to classify the pairs. - Returns the learned metric value between samples in every pair. It should - ideally be low for similar samples and high for dissimilar samples. + Returns the opposite of the learned metric value between samples in every + pair, to be consistent with scikit-learn conventions. Hence it should + ideally be low for dissimilar samples and high for similar samples. + This is the decision function that is used to classify pairs as similar + (+1), or dissimilar (-1). Parameters ---------- @@ -335,12 +347,12 @@ def decision_function(self, pairs): Returns ------- y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) - The predicted learned metric value between samples in every pair. + The predicted decision function value for each pair. """ pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return self.score_pairs(pairs) + return - self.score_pairs(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -369,6 +381,190 @@ def score(self, pairs, y): """ return roc_auc_score(y, self.decision_function(pairs)) + def set_threshold(self, threshold): + """Sets the threshold of the metric learner to the given value `threshold`. + + See more in the :ref:`User Guide `. + + Parameters + ---------- + threshold : float + The threshold value we want to set. It is the value to which the + predicted distance for test pairs will be compared. If they are superior + to the threshold they will be classified as similar (+1), + and dissimilar (-1) if not. + + Returns + ------- + self : `_PairsClassifier` + The pairs classifier with the new threshold set. + """ + self.threshold_ = threshold + return self + + def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', + min_rate=None, beta=1.): + """Decision threshold calibration for pairwise binary classification + + Method that calibrates the decision threshold (cutoff point) of the metric + learner. This threshold will then be used when calling the method + `predict`. The methods for picking cutoff points make use of traditional + binary classification evaluation statistics such as the true positive and + true negative rates and F-scores. The threshold will be found to maximize + the chosen score on the validation set ``(pairs_valid, y_valid)``. + + See more in the :ref:`User Guide `. + + Parameters + ---------- + strategy : str, optional (default='accuracy') + The strategy to use for choosing the cutoff threshold. + + 'accuracy' + Selects a decision threshold that maximizes the accuracy. + 'f_beta' + Selects a decision threshold that maximizes the f_beta score, + with beta given by the parameter `beta`. + 'max_tpr' + Selects a decision threshold that yields the highest true positive + rate with true negative rate at least equal to the value of the + parameter `min_rate`. + 'max_tnr' + Selects a decision threshold that yields the highest true negative + rate with true positive rate at least equal to the value of the + parameter `min_rate`. + + beta : float in [0, 1], optional (default=None) + Beta value to be used in case strategy == 'f_beta'. + + min_rate : float in [0, 1] or None, (default=None) + In case strategy is 'max_tpr' or 'max_tnr' this parameter must be set + to specify the minimal value for the true negative rate or true positive + rate respectively that needs to be achieved. + + pairs_valid : array-like, shape=(n_pairs_valid, 2, n_features) + The validation set of pairs to use to set the threshold. + + y_valid : array-like, shape=(n_pairs_valid,) + The labels of the pairs of the validation set to use to set the + threshold. They must be +1 for positive pairs and -1 for negative pairs. + + References + ---------- + .. [1] Receiver-operating characteristic (ROC) plots: a fundamental + evaluation tool in clinical medicine, MH Zweig, G Campbell - + Clinical chemistry, 1993 + + .. [2] most of the code of this function is from scikit-learn's PR #10117 + + See Also + -------- + sklearn.calibration : scikit-learn's module for calibrating classifiers + """ + + self._validate_calibration_params(strategy, min_rate, beta) + + pairs_valid, y_valid = self._prepare_inputs(pairs_valid, y_valid, + type_of_inputs='tuples') + + n_samples = pairs_valid.shape[0] + if strategy == 'accuracy': + scores = self.decision_function(pairs_valid) + scores_sorted_idces = np.argsort(scores)[::-1] + scores_sorted = scores[scores_sorted_idces] + # true labels ordered by decision_function value: (higher first) + y_ordered = y_valid[scores_sorted_idces] + # we need to add a threshold that will reject all points + scores_sorted = np.concatenate([[scores_sorted[0] + 1], scores_sorted]) + + # finds the threshold that maximizes the accuracy: + cum_tp = stable_cumsum(y_ordered == 1) # cumulative number of true + # positives + # we need to add the point where all samples are rejected: + cum_tp = np.concatenate([[0.], cum_tp]) + cum_tn_inverted = stable_cumsum(y_ordered[::-1] == -1) + cum_tn = np.concatenate([[0.], cum_tn_inverted])[::-1] + cum_accuracy = (cum_tp + cum_tn) / n_samples + imax = np.argmax(cum_accuracy) + # we set the threshold to the lowest accepted score + # note: we are working with negative distances but we want the threshold + # to be with respect to the actual distances so we take minus sign + self.threshold_ = - scores_sorted[imax] + # note: if the best is to reject all points it's already one of the + # thresholds (scores_sorted[0]) + return self + + if strategy == 'f_beta': + precision, recall, thresholds = precision_recall_curve( + y_valid, self.decision_function(pairs_valid), pos_label=1) + + # here the thresholds are decreasing + # We ignore the warnings here, in the same taste as + # https://github.com/scikit-learn/scikit-learn/blob/62d205980446a1abc1065 + # f4332fd74eee57fcf73/sklearn/metrics/classification.py#L1284 + with np.errstate(divide='ignore', invalid='ignore'): + f_beta = ((1 + beta**2) * (precision * recall) / + (beta**2 * precision + recall)) + # We need to set nans to zero otherwise they will be considered higher + # than the others (also discussed in https://github.com/scikit-learn/ + # scikit-learn/pull/10117/files#r262115773) + f_beta[np.isnan(f_beta)] = 0. + imax = np.argmax(f_beta) + # we set the threshold to the lowest accepted score + # note: we are working with negative distances but we want the threshold + # to be with respect to the actual distances so we take minus sign + self.threshold_ = - thresholds[imax] + # Note: we don't need to deal with rejecting all points (i.e. threshold = + # max_scores + 1), since this can never happen to be optimal + # (see a more detailed discussion in test_calibrate_threshold_extreme) + return self + + fpr, tpr, thresholds = roc_curve(y_valid, + self.decision_function(pairs_valid), + pos_label=1) + # here the thresholds are decreasing + fpr, tpr, thresholds = fpr, tpr, thresholds + + if strategy in ['max_tpr', 'max_tnr']: + if strategy == 'max_tpr': + indices = np.where(1 - fpr >= min_rate)[0] + imax = np.argmax(tpr[indices]) + + if strategy == 'max_tnr': + indices = np.where(tpr >= min_rate)[0] + imax = np.argmax(1 - fpr[indices]) + + imax_valid = indices[imax] + # note: we are working with negative distances but we want the threshold + # to be with respect to the actual distances so we take minus sign + if indices[imax] == len(thresholds): # we want to accept everything + self.threshold_ = - (thresholds[imax_valid] - 1) + else: + # thanks to roc_curve, the first point will always be max_scores + # + 1, see: https://github.com/scikit-learn/scikit-learn/pull/13523 + self.threshold_ = - thresholds[imax_valid] + return self + + @staticmethod + def _validate_calibration_params(strategy='accuracy', min_rate=None, + beta=1.): + """Ensure that calibration parameters have allowed values""" + if strategy not in ('accuracy', 'f_beta', 'max_tpr', + 'max_tnr'): + raise ValueError('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "{}" instead.' + .format(strategy)) + if strategy == 'max_tpr' or strategy == 'max_tnr': + if (min_rate is None or not isinstance(min_rate, (int, float)) or + not min_rate >= 0 or not min_rate <= 1): + raise ValueError('Parameter min_rate must be a number in' + '[0, 1]. ' + 'Got {} instead.'.format(min_rate)) + if strategy == 'f_beta': + if beta is None or not isinstance(beta, (int, float)): + raise ValueError('Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + class _QuadrupletsClassifierMixin(BaseMetricLearner): @@ -393,6 +589,7 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ + check_is_fitted(self, 'transformer_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -401,8 +598,12 @@ def predict(self, quadruplets): def decision_function(self, quadruplets): """Predicts differences between sample distances in input quadruplets. - For each quadruplet of samples, computes the difference between the learned - metric of the first pair minus the learned metric of the second pair. + For each quadruplet in the samples, computes the difference between the + learned metric of the second pair minus the learned metric of the first + pair. The higher it is, the more probable it is that the pairs in the + quadruplet are presented in the right order, i.e. that the label of the + quadruplet is 1. The lower it is, the more probable it is that the label of + the quadruplet is -1. Parameters ---------- @@ -417,10 +618,10 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ - return (self.score_pairs(quadruplets[:, :2]) - - self.score_pairs(quadruplets[:, 2:])) + return (self.score_pairs(quadruplets[:, 2:]) - + self.score_pairs(quadruplets[:, :2])) - def score(self, quadruplets, y=None): + def score(self, quadruplets): """Computes score on input quadruplets Returns the accuracy score of the following classification task: a record @@ -435,11 +636,9 @@ def score(self, quadruplets, y=None): points, or 2D array of indices of quadruplets if the metric learner uses a preprocessor. - y : Ignored, for scikit-learn compatibility. - Returns ------- score : float The quadruplets score. """ - return -np.mean(self.predict(quadruplets)) + return - np.mean(self.predict(quadruplets)) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index a0ff05f9..9b6dccb2 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -148,11 +148,19 @@ class ITML(_BaseITML, _PairsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ - def fit(self, pairs, y, bounds=None): + def fit(self, pairs, y, bounds=None, calibration_params=None): """Learn the ITML model. + The threshold will be calibrated on the trainset using the parameters + `calibration_params`. + Parameters ---------- pairs: array-like, shape=(n_constraints, 2, n_features) or @@ -170,13 +178,22 @@ def fit(self, pairs, y, bounds=None): If not provided at initialization, bounds_[0] and bounds_[1] will be set to the 5th and 95th percentile of the pairwise distances among all points present in the input `pairs`. + calibration_params : `dict` or `None` + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object Returns the instance. """ - return self._fit(pairs, y, bounds=bounds) + calibration_params = (calibration_params if calibration_params is not + None else dict()) + self._validate_calibration_params(**calibration_params) + self._fit(pairs, y) + self.calibrate_threshold(pairs, y, **calibration_params) + return self class ITML_Supervised(_BaseITML, TransformerMixin): diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 312990ab..536719ba 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -45,7 +45,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, self.verbose = verbose super(_BaseLSML, self).__init__(preprocessor) - def _fit(self, quadruplets, y=None, weights=None): + def _fit(self, quadruplets, weights=None): quadruplets = self._prepare_inputs(quadruplets, type_of_inputs='tuples') diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index f9d3690b..346db2f8 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -359,27 +359,43 @@ class MMC(_BaseMMC, _PairsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ - def fit(self, pairs, y): + def fit(self, pairs, y, calibration_params=None): """Learn the MMC model. + The threshold will be calibrated on the trainset using the parameters + `calibration_params`. + Parameters ---------- - pairs: array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a preprocessor. - y: array-like, of shape (n_constraints,) + y : array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. - + calibration_params : `dict` or `None` + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object Returns the instance. """ - return self._fit(pairs, y) + calibration_params = (calibration_params if calibration_params is not + None else dict()) + self._validate_calibration_params(**calibration_params) + self._fit(pairs, y) + self.calibrate_threshold(pairs, y, **calibration_params) + return self class MMC_Supervised(_BaseMMC, TransformerMixin): diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 6fd29d38..e9828d07 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -138,27 +138,44 @@ class SDML(_BaseSDML, _PairsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ - def fit(self, pairs, y): + def fit(self, pairs, y, calibration_params=None): """Learn the SDML model. + The threshold will be calibrated on the trainset using the parameters + `calibration_params`. + Parameters ---------- - pairs: array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a preprocessor. - y: array-like, of shape (n_constraints,) + y : array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + calibration_params : `dict` or `None` + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object Returns the instance. """ - return self._fit(pairs, y) + calibration_params = (calibration_params if calibration_params is not + None else dict()) + self._validate_calibration_params(**calibration_params) + self._fit(pairs, y) + self.calibrate_threshold(pairs, y, **calibration_params) + return self class SDML_Supervised(_BaseSDML, TransformerMixin): diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index a0bf3b9d..15bf1aed 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -13,7 +13,8 @@ from metric_learn.base_metric import (_QuadrupletsClassifierMixin, _PairsClassifierMixin) -from test.test_utils import ids_metric_learners, metric_learners +from test.test_utils import (ids_metric_learners, metric_learners, + remove_y_quadruplets) RNG = check_random_state(0) @@ -27,7 +28,7 @@ def test_score_pairs_pairwise(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairwise = model.score_pairs(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) @@ -51,7 +52,7 @@ def test_score_pairs_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.transformer_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - @@ -67,7 +68,7 @@ def test_score_pairs_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) assert np.isfinite(model.score_pairs(pairs)).all() @@ -81,7 +82,7 @@ def test_score_pairs_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) assert model.score_pairs(tuples).shape == (tuples.shape[0],) context = make_context(estimator) @@ -112,7 +113,7 @@ def test_embed_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) embedded_points = X.dot(model.transformer_.T) assert_array_almost_equal(model.transform(X), embedded_points) @@ -124,7 +125,7 @@ def test_embed_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D @@ -137,8 +138,11 @@ def test_embed_dim(estimator, build_dataset): assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}: + # TODO: + # avoid this enumeration and rather test if hasattr n_components + # as soon as we have made the arguments names as such (issue #167) model.set_params(num_dims=2) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: @@ -153,7 +157,7 @@ def test_embed_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert np.isfinite(model.transform(X)).all() @@ -164,7 +168,7 @@ def test_embed_is_linear(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert_array_almost_equal(model.transform(X[:10] + X[10:20]), model.transform(X[:10]) + model.transform(X[10:20])) @@ -183,7 +187,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) @@ -202,7 +206,7 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -228,7 +232,7 @@ def test_metric_raises_deprecation_warning(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) with pytest.warns(DeprecationWarning) as raised_warning: model.metric() @@ -245,7 +249,7 @@ def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) clustering = DBSCAN(metric=model.get_metric()) clustering.fit(X) @@ -258,7 +262,7 @@ def test_get_squared_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -278,7 +282,7 @@ def test_transformer_is_2D(estimator, build_dataset): model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features - model.fit(input_data, labels) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transformer_.shape == (X.shape[1], X.shape[1]) # test that it works for 1 feature @@ -297,5 +301,5 @@ def test_transformer_is_2D(estimator, build_dataset): to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) trunc_data = trunc_data[to_keep] labels = labels[to_keep] - model.fit(trunc_data, labels) + model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) assert model.transformer_.shape == (1, 1) # the transformer must be 2D diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py new file mode 100644 index 00000000..828181cb --- /dev/null +++ b/test/test_pairs_classifiers.py @@ -0,0 +1,491 @@ +from __future__ import division + +from functools import partial + +import pytest +from numpy.testing import assert_array_equal + +from metric_learn.base_metric import _PairsClassifierMixin, MahalanobisMixin +from sklearn.exceptions import NotFittedError +from sklearn.metrics import (f1_score, accuracy_score, fbeta_score, + precision_score) +from sklearn.model_selection import train_test_split + +from test.test_utils import pairs_learners, ids_pairs_learners +from sklearn.utils.testing import set_random_state +from sklearn import clone +import numpy as np +from itertools import product + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_predict_only_one_or_minus_one(estimator, build_dataset, + with_preprocessor): + """Test that all predicted values are either +1 or -1""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, + labels) + estimator.fit(pairs_train, y_train) + predictions = estimator.predict(pairs_test) + not_valid = [e for e in predictions if e not in [-1, 1]] + assert len(not_valid) == 0 + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_predict_monotonous(estimator, build_dataset, + with_preprocessor): + """Test that there is a threshold distance separating points labeled as + similar and points labeled as dissimilar """ + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, + labels) + estimator.fit(pairs_train, y_train) + distances = estimator.score_pairs(pairs_test) + predictions = estimator.predict(pairs_test) + min_dissimilar = np.min(distances[predictions == -1]) + max_similar = np.max(distances[predictions == 1]) + assert max_similar <= min_dissimilar + separator = np.mean([min_dissimilar, max_similar]) + assert (predictions[distances > separator] == -1).all() + assert (predictions[distances < separator] == 1).all() + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, + with_preprocessor): + """Test that a NotFittedError is raised if someone tries to predict and + the metric learner has not been fitted.""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.predict(input_data) + + +@pytest.mark.parametrize('calibration_params', + [None, {}, dict(), {'strategy': 'accuracy'}] + + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [0., 0.1, 0.2, 1., 5.]] + ) +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_fit_with_valid_threshold_params(estimator, build_dataset, + with_preprocessor, + calibration_params): + """Tests that fitting `calibration_params` with appropriate parameters works + as expected""" + pairs, y, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + estimator.fit(pairs, y, calibration_params=calibration_params) + estimator.predict(pairs) + + +@pytest.mark.parametrize('kwargs', + [{'strategy': 'accuracy'}] + + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [0., 0.1, 0.2, 1., 5.]] + ) +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_threshold_different_scores_is_finite(estimator, build_dataset, + with_preprocessor, kwargs): + # test that calibrating the threshold works for every metric learner + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + estimator.fit(input_data, labels) + with pytest.warns(None) as record: + estimator.calibrate_threshold(input_data, labels, **kwargs) + assert len(record) == 0 + + +class IdentityPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): + """A simple pairs classifier for testing purposes, that will just have + identity as transformer_, and a string threshold so that it returns an + error if not explicitely set. + """ + def fit(self, pairs, y): + pairs, y = self._prepare_inputs(pairs, y, + type_of_inputs='tuples') + self.transformer_ = np.atleast_2d(np.identity(pairs.shape[2])) + self.threshold_ = 'I am not set.' + return self + + +def test_set_threshold(): + # test that set_threshold indeed sets the threshold + identity_pairs_classifier = IdentityPairsClassifier() + pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) + y = np.array([1, 1, -1, -1]) + identity_pairs_classifier.fit(pairs, y) + identity_pairs_classifier.set_threshold(0.5) + assert identity_pairs_classifier.threshold_ == 0.5 + + +def test_f_beta_1_is_f_1(): + # test that putting beta to 1 indeed finds the best threshold to optimize + # the f1_score + rng = np.random.RandomState(42) + n_samples = 100 + pairs, y = rng.randn(n_samples, 2, 5), rng.choice([-1, 1], size=n_samples) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + pairs_learner.calibrate_threshold(pairs, y, strategy='f_beta', beta=1) + best_f1_score = f1_score(y, pairs_learner.predict(pairs)) + for threshold in - pairs_learner.decision_function(pairs): + pairs_learner.set_threshold(threshold) + assert f1_score(y, pairs_learner.predict(pairs)) <= best_f1_score + + +def true_pos_true_neg_rates(y_true, y_pred): + """A function that returns the true positive rates and the true negatives + rate. For testing purposes (optimized for readability not performance).""" + assert y_pred.shape[0] == y_true.shape[0] + tp = np.sum((y_pred == 1) * (y_true == 1)) + tn = np.sum((y_pred == -1) * (y_true == -1)) + fn = np.sum((y_pred == -1) * (y_true == 1)) + fp = np.sum((y_pred == 1) * (y_true == -1)) + tpr = tp / (tp + fn) + tnr = tn / (tn + fp) + tpr = tpr if not np.isnan(tpr) else 0. + tnr = tnr if not np.isnan(tnr) else 0. + return tpr, tnr + + +def tpr_threshold(y_true, y_pred, tnr_threshold=0.): + """A function that returns the true positive rate if the true negative + rate is higher or equal than `threshold`, and -1 otherwise. For testing + purposes""" + tpr, tnr = true_pos_true_neg_rates(y_true, y_pred) + if tnr < tnr_threshold: + return -1 + else: + return tpr + + +def tnr_threshold(y_true, y_pred, tpr_threshold=0.): + """A function that returns the true negative rate if the true positive + rate is higher or equal than `threshold`, and -1 otherwise. For testing + purposes""" + tpr, tnr = true_pos_true_neg_rates(y_true, y_pred) + if tpr < tpr_threshold: + return -1 + else: + return tnr + + +@pytest.mark.parametrize('kwargs, scoring', + [({'strategy': 'accuracy'}, accuracy_score)] + + [({'strategy': 'f_beta', 'beta': b}, + partial(fbeta_score, beta=b)) + for b in [0.1, 0.5, 1.]] + + [({'strategy': 'f_beta', 'beta': 0}, + precision_score)] + + [({'strategy': 'max_tpr', 'min_rate': t}, + partial(tpr_threshold, tnr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]] + + [({'strategy': 'max_tnr', 'min_rate': t}, + partial(tnr_threshold, tpr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]], + ) +def test_found_score_is_best_score(kwargs, scoring): + # test that when we use calibrate threshold, it will indeed be the + # threshold that have the best score + rng = np.random.RandomState(42) + n_samples = 50 + pairs, y = rng.randn(n_samples, 2, 5), rng.choice([-1, 1], size=n_samples) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + pairs_learner.calibrate_threshold(pairs, y, **kwargs) + best_score = scoring(y, pairs_learner.predict(pairs)) + scores = [] + predicted_scores = pairs_learner.decision_function(pairs) + predicted_scores = np.hstack([[np.min(predicted_scores) - 1], + predicted_scores, + [np.max(predicted_scores) + 1]]) + for threshold in - predicted_scores: + pairs_learner.set_threshold(threshold) + score = scoring(y, pairs_learner.predict(pairs)) + assert score <= best_score + scores.append(score) + assert len(set(scores)) > 1 # assert that we didn't always have the same + # value for the score (which could be a hint for some bug, but would still + # silently pass the test)) + + +@pytest.mark.parametrize('kwargs, scoring', + [({'strategy': 'accuracy'}, accuracy_score)] + + [({'strategy': 'f_beta', 'beta': b}, + partial(fbeta_score, beta=b)) + for b in [0.1, 0.5, 1.]] + + [({'strategy': 'f_beta', 'beta': 0}, + precision_score)] + + [({'strategy': 'max_tpr', 'min_rate': t}, + partial(tpr_threshold, tnr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]] + + [({'strategy': 'max_tnr', 'min_rate': t}, + partial(tnr_threshold, tpr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]] + ) +def test_found_score_is_best_score_duplicates(kwargs, scoring): + # test that when we use calibrate threshold, it will indeed be the + # threshold that have the best score. It's the same as the previous test + # except this time we test that the scores are coherent even if there are + # duplicates (i.e. points that have the same score returned by + # `decision_function`). + rng = np.random.RandomState(42) + n_samples = 50 + pairs, y = rng.randn(n_samples, 2, 5), rng.choice([-1, 1], size=n_samples) + # we create some duplicates points, which will also have the same score + # predicted + pairs[6:10] = pairs[10:14] + y[6:10] = y[10:14] + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + pairs_learner.calibrate_threshold(pairs, y, **kwargs) + best_score = scoring(y, pairs_learner.predict(pairs)) + scores = [] + predicted_scores = pairs_learner.decision_function(pairs) + predicted_scores = np.hstack([[np.min(predicted_scores) - 1], + predicted_scores, + [np.max(predicted_scores) + 1]]) + for threshold in - predicted_scores: + pairs_learner.set_threshold(threshold) + score = scoring(y, pairs_learner.predict(pairs)) + assert score <= best_score + scores.append(score) + assert len(set(scores)) > 1 # assert that we didn't always have the same + # value for the score (which could be a hint for some bug, but would still + # silently pass the test)) + + +@pytest.mark.parametrize('invalid_args, expected_msg', + [({'strategy': 'weird'}, + ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.'))] + + [({'strategy': strategy, 'min_rate': min_rate}, + 'Parameter min_rate must be a number in' + '[0, 1]. Got {} instead.'.format(min_rate)) + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], + [None, 'weird', -0.2, 1.2, 3 + 2j])] + + [({'strategy': 'f_beta', 'beta': beta}, + 'Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + for beta in [None, 'weird', 3 + 2j]] + ) +def test_calibrate_threshold_invalid_parameters_right_error(invalid_args, + expected_msg): + # test that the right error message is returned if invalid arguments are + # given to calibrate_threshold + rng = np.random.RandomState(42) + pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + with pytest.raises(ValueError) as raised_error: + pairs_learner.calibrate_threshold(pairs, y, **invalid_args) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('valid_args', + [{'strategy': 'accuracy'}] + + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], + [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [-5., -1., 0., 0.1, 0.2, 1., 5.]] + # Note that we authorize beta < 0 (even if + # in fact it will be squared, so it would be useless + # to do that) + ) +def test_calibrate_threshold_valid_parameters(valid_args): + # test that no warning message is returned if valid arguments are given to + # calibrate threshold + rng = np.random.RandomState(42) + pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + with pytest.warns(None) as record: + pairs_learner.calibrate_threshold(pairs, y, **valid_args) + assert len(record) == 0 + + +def test_calibrate_threshold_extreme(): + """Test that in the (rare) case where we should accept all points or + reject all points, this is effectively what + is done""" + + class MockBadPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): + """A pairs classifier that returns bad scores (i.e. in the inverse order + of what we would expect from a good pairs classifier + """ + + def fit(self, pairs, y, calibration_params=None): + self.transformer_ = 'not used' + self.calibrate_threshold(pairs, y, **(calibration_params if + calibration_params is not None else + dict())) + return self + + def decision_function(self, pairs): + return np.arange(pairs.shape[0], dtype=float) + + rng = np.random.RandomState(42) + pairs = rng.randn(7, 2, 5) # the info in X is not used, it's just for the + # API + + y = [1., 1., 1., -1., -1., -1., -1.] + mock_clf = MockBadPairsClassifier() + # case of bad scoring with more negative than positives. In + # this case, when: + # optimizing for accuracy we should reject all points + mock_clf.fit(pairs, y, calibration_params={'strategy': 'accuracy'}) + assert_array_equal(mock_clf.predict(pairs), - np.ones(7)) + + # optimizing for max_tpr we should accept all points if min_rate == 0. ( + # because by convention then tnr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tpr', + 'min_rate': 0.}) + assert_array_equal(mock_clf.predict(pairs), np.ones(7)) + # optimizing for max_tnr we should reject all points if min_rate = 0. ( + # because by convention then tpr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tnr', + 'min_rate': 0.}) + assert_array_equal(mock_clf.predict(pairs), - np.ones(7)) + + y = [1., 1., 1., 1., -1., -1., -1.] + # case of bad scoring with more positives than negatives. In + # this case, when: + # optimizing for accuracy we should accept all points + mock_clf.fit(pairs, y, calibration_params={'strategy': 'accuracy'}) + assert_array_equal(mock_clf.predict(pairs), np.ones(7)) + # optimizing for max_tpr we should accept all points if min_rate == 0. ( + # because by convention then tnr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tpr', + 'min_rate': 0.}) + assert_array_equal(mock_clf.predict(pairs), np.ones(7)) + # optimizing for max_tnr we should reject all points if min_rate = 0. ( + # because by convention then tpr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tnr', + 'min_rate': 0.}) + assert_array_equal(mock_clf.predict(pairs), - np.ones(7)) + + # Note: we'll never find a case where we would reject all points for + # maximizing tpr (we can always accept more points), and accept all + # points for maximizing tnr (we can always reject more points) + + # case of alternated scores: for optimizing the f_1 score we should accept + # all points (because this way we have max recall (1) and max precision ( + # here: 0.5)) + y = [1., -1., 1., -1., 1., -1.] + mock_clf.fit(pairs[:6], y, calibration_params={'strategy': 'f_beta', + 'beta': 1.}) + assert_array_equal(mock_clf.predict(pairs[:6]), np.ones(6)) + + # Note: for optimizing f_1 score, we will never find an optimal case where we + # reject all points because in this case we would have 0 precision (by + # convention, because it's 0/0), and 0 recall (and we could always decrease + # the threshold to increase the recall, and we couldn't do worse for + # precision so it would be better) + + +@pytest.mark.parametrize('estimator, _', + pairs_learners + [(IdentityPairsClassifier(), None), + (_PairsClassifierMixin, None)], + ids=ids_pairs_learners + ['mock', 'class']) +@pytest.mark.parametrize('invalid_args, expected_msg', + [({'strategy': 'weird'}, + ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.'))] + + [({'strategy': strategy, 'min_rate': min_rate}, + 'Parameter min_rate must be a number in' + '[0, 1]. Got {} instead.'.format(min_rate)) + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], + [None, 'weird', -0.2, 1.2, 3 + 2j])] + + [({'strategy': 'f_beta', 'beta': beta}, + 'Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + for beta in [None, 'weird', 3 + 2j]] + ) +def test_validate_calibration_params_invalid_parameters_right_error( + estimator, _, invalid_args, expected_msg): + # test that the right error message is returned if invalid arguments are + # given to _validate_calibration_params, for all pairs metric learners as + # well as a mocking general identity pairs classifier and the class itself + with pytest.raises(ValueError) as raised_error: + estimator._validate_calibration_params(**invalid_args) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('estimator, _', + pairs_learners + [(IdentityPairsClassifier(), None), + (_PairsClassifierMixin, None)], + ids=ids_pairs_learners + ['mock', 'class']) +@pytest.mark.parametrize('valid_args', + [{}, {'strategy': 'accuracy'}] + + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], + [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [-5., -1., 0., 0.1, 0.2, 1., 5.]] + # Note that we authorize beta < 0 (even if + # in fact it will be squared, so it would be useless + # to do that) + ) +def test_validate_calibration_params_valid_parameters( + estimator, _, valid_args): + # test that no warning message is returned if valid arguments are given to + # _validate_calibration_params for all pairs metric learners, as well as + # a mocking example, and the class itself + with pytest.warns(None) as record: + estimator._validate_calibration_params(**valid_args) + assert len(record) == 0 + + +@pytest.mark.parametrize('estimator, build_dataset', + pairs_learners, + ids=ids_pairs_learners) +def test_validate_calibration_params_invalid_parameters_error_before__fit( + estimator, build_dataset): + """For all pairs metric learners (which currently all have a _fit method), + make sure that calibration parameters are validated before fitting""" + estimator = clone(estimator) + input_data, labels, _, _ = build_dataset() + + def breaking_fun(**args): # a function that fails so that we will miss + # the calibration at the end and therefore the right error message from + # validating params should be thrown before + raise RuntimeError('Game over.') + estimator._fit = breaking_fun + expected_msg = ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.') + with pytest.raises(ValueError) as raised_error: + estimator.fit(input_data, labels, calibration_params={'strategy': 'weird'}) + assert str(raised_error.value) == expected_msg diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py new file mode 100644 index 00000000..2bf36b3f --- /dev/null +++ b/test/test_quadruplets_classifiers.py @@ -0,0 +1,42 @@ +import pytest +from sklearn.exceptions import NotFittedError +from sklearn.model_selection import train_test_split + +from test.test_utils import quadruplets_learners, ids_quadruplets_learners +from sklearn.utils.testing import set_random_state +from sklearn import clone +import numpy as np + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, + ids=ids_quadruplets_learners) +def test_predict_only_one_or_minus_one(estimator, build_dataset, + with_preprocessor): + """Test that all predicted values are either +1 or -1""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + (quadruplets_train, + quadruplets_test, y_train, y_test) = train_test_split(input_data, labels) + estimator.fit(quadruplets_train) + predictions = estimator.predict(quadruplets_test) + not_valid = [e for e in predictions if e not in [-1, 1]] + assert len(not_valid) == 0 + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, + ids=ids_quadruplets_learners) +def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, + with_preprocessor): + """Test that a NotFittedError is raised if someone tries to predict and + the metric learner has not been fitted.""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.predict(input_data) + diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index f1248c9a..5d6c5d77 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -15,9 +15,13 @@ import numpy as np from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) +from sklearn.metrics.scorer import get_scorer from sklearn.utils.testing import _get_args from test.test_utils import (metric_learners, ids_metric_learners, - mock_preprocessor) + mock_preprocessor, tuples_learners, + ids_tuples_learners, pairs_learners, + ids_pairs_learners, remove_y_quadruplets, + quadruplets_learners) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -97,22 +101,62 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', @pytest.mark.parametrize('with_preprocessor', [True, False]) -@pytest.mark.parametrize('estimator, build_dataset', metric_learners, - ids=ids_metric_learners) -def test_cross_validation_is_finite(estimator, build_dataset, - with_preprocessor): +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_various_scoring_on_tuples_learners(estimator, build_dataset, + with_preprocessor): + """Tests that scikit-learn's scoring returns something finite, + for other scoring than default scoring. (List of scikit-learn's scores can be + found in sklearn.metrics.scorer). For each type of output (predict, + predict_proba, decision_function), we test a bunch of scores. + We only test on pairs learners because quadruplets don't have a y argument. + """ + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + + # scores that need a predict function: every tuples learner should have a + # predict function (whether the pair is of positive samples or negative + # samples) + for scoring in ['accuracy', 'f1']: + check_score_is_finite(scoring, estimator, input_data, labels) + # scores that need a predict_proba: + if hasattr(estimator, "predict_proba"): + for scoring in ['neg_log_loss', 'brier_score']: + check_score_is_finite(scoring, estimator, input_data, labels) + # scores that need a decision_function: every tuples learner should have a + # decision function (the metric between points) + for scoring in ['roc_auc', 'average_precision', 'precision', 'recall']: + check_score_is_finite(scoring, estimator, input_data, labels) + + +def check_score_is_finite(scoring, estimator, input_data, labels): + estimator = clone(estimator) + assert np.isfinite(cross_val_score(estimator, input_data, labels, + scoring=scoring)).all() + estimator.fit(input_data, labels) + assert np.isfinite(get_scorer(scoring)(estimator, input_data, labels)) + + +@pytest.mark.parametrize('estimator, build_dataset', tuples_learners, + ids=ids_tuples_learners) +def test_cross_validation_is_finite(estimator, build_dataset): """Tests that validation on metric-learn estimators returns something finite """ - if any(hasattr(estimator, method) for method in ["predict", "score"]): - input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - set_random_state(estimator) - if hasattr(estimator, "score"): - assert np.isfinite(cross_val_score(estimator, input_data, labels)).all() - if hasattr(estimator, "predict"): - assert np.isfinite(cross_val_predict(estimator, - input_data, labels)).all() + input_data, labels, preprocessor, _ = build_dataset() + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + assert np.isfinite(cross_val_score(estimator, + *remove_y_quadruplets(estimator, + input_data, + labels))).all() + assert np.isfinite(cross_val_predict(estimator, + *remove_y_quadruplets(estimator, + input_data, + labels) + )).all() @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -143,23 +187,28 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(input_data[train_mask], y_train) + estimator.fit(*remove_y_quadruplets(estimator, + input_data[train_mask], + y_train)) if hasattr(estimator, "score"): - scores.append(estimator.score(input_data[test_slice], y_test)) + scores.append(estimator.score(*remove_y_quadruplets( + estimator, input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): - assert all(scores == cross_val_score(estimator, input_data, labels, - cv=kfold)) + assert all(scores == cross_val_score( + estimator, *remove_y_quadruplets(estimator, input_data, labels), + cv=kfold)) if hasattr(estimator, "predict"): - assert all(predictions == cross_val_predict(estimator, input_data, - labels, - cv=kfold)) + assert all(predictions == cross_val_predict( + estimator, + *remove_y_quadruplets(estimator, input_data, labels), + cv=kfold)) def check_score(estimator, tuples, y): if hasattr(estimator, "score"): - score = estimator.score(tuples, y) + score = estimator.score(*remove_y_quadruplets(estimator, tuples, y)) assert np.isfinite(score) @@ -183,7 +232,7 @@ def test_simple_estimator(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(tuples_train, y_train) + estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test) @@ -230,7 +279,9 @@ def test_estimators_fit_returns_self(estimator, build_dataset, input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - assert estimator.fit(input_data, labels) is estimator + assert estimator.fit(*remove_y_quadruplets(estimator, + input_data, + labels)) is estimator @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -240,42 +291,53 @@ def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est - input_data, y, preprocessor, _ = build_dataset(with_preprocessor) - - def make_random_state(estimator, in_pipeline): - rs = {} - name_estimator = estimator.__class__.__name__ - if name_estimator[-11:] == '_Supervised': - name_param = 'random_state' - if in_pipeline: - name_param = name_estimator.lower() + '__' + name_param - rs[name_param] = check_random_state(0) - return rs + # we do this test on all except quadruplets (since they don't have a y + # in fit): + if estimator.__class__.__name__ not in [e.__class__.__name__ + for (e, _) in + quadruplets_learners]: + input_data, y, preprocessor, _ = build_dataset(with_preprocessor) + + def make_random_state(estimator, in_pipeline): + rs = {} + name_estimator = estimator.__class__.__name__ + if name_estimator[-11:] == '_Supervised': + name_param = 'random_state' + if in_pipeline: + name_param = name_estimator.lower() + '__' + name_param + rs[name_param] = check_random_state(0) + return rs - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - pipeline = make_pipeline(estimator) - estimator.fit(input_data, y, **make_random_state(estimator, False)) - pipeline.fit(input_data, y, **make_random_state(estimator, True)) - - if hasattr(estimator, 'score'): - result = estimator.score(input_data, y) - result_pipe = pipeline.score(input_data, y) - assert_allclose_dense_sparse(result, result_pipe) - - if hasattr(estimator, 'predict'): - result = estimator.predict(input_data) - result_pipe = pipeline.predict(input_data) - assert_allclose_dense_sparse(result, result_pipe) - - if issubclass(estimator.__class__, TransformerMixin): - if hasattr(estimator, 'transform'): - result = estimator.transform(input_data) - result_pipe = pipeline.transform(input_data) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + pipeline = make_pipeline(estimator) + estimator.fit(*remove_y_quadruplets(estimator, input_data, y), + **make_random_state(estimator, False)) + pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), + **make_random_state(estimator, True)) + + if hasattr(estimator, 'score'): + result = estimator.score(*remove_y_quadruplets(estimator, + input_data, + y)) + result_pipe = pipeline.score(*remove_y_quadruplets(estimator, + input_data, + y)) assert_allclose_dense_sparse(result, result_pipe) + if hasattr(estimator, 'predict'): + result = estimator.predict(input_data) + result_pipe = pipeline.predict(input_data) + assert_allclose_dense_sparse(result, result_pipe) + + if issubclass(estimator.__class__, TransformerMixin): + if hasattr(estimator, 'transform'): + result = estimator.transform(input_data) + result_pipe = pipeline.transform(input_data) + assert_allclose_dense_sparse(result, result_pipe) -@pytest.mark.parametrize('with_preprocessor',[True, False]) + +@pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_dict_unchanged(estimator, build_dataset, with_preprocessor): @@ -286,7 +348,7 @@ def test_dict_unchanged(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "num_dims"): estimator.num_dims = 1 - estimator.fit(input_data, labels) + estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( @@ -303,7 +365,7 @@ def check_dict(): check_dict() -@pytest.mark.parametrize('with_preprocessor',[True, False]) +@pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_dont_overwrite_parameters(estimator, build_dataset, @@ -317,7 +379,7 @@ def test_dont_overwrite_parameters(estimator, build_dataset, estimator.num_dims = 1 dict_before_fit = estimator.__dict__.copy() - estimator.fit(input_data, labels) + estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [key for key in dict_after_fit.keys() diff --git a/test/test_utils.py b/test/test_utils.py index f1df4098..cfadfd32 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -100,8 +100,11 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) -pairs_learners = [(ITML(), build_pairs), - (MMC(max_iter=2), build_pairs), # max_iter=2 for faster +pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be + # faster, also make tests pass while waiting for #175 to + # be solved + # TODO: remove this comment when #175 is solved + (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in @@ -117,7 +120,7 @@ def build_quadruplets(with_preprocessor=False): (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(use_cov=False, balance_param=1e-5), - build_classification)] + build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) @@ -139,6 +142,18 @@ def build_quadruplets(with_preprocessor=False): ids_metric_learners = ids_tuples_learners + ids_supervised_learners +def remove_y_quadruplets(estimator, X, y): + """Quadruplets learners have no y in fit, but to write test for all + estimators, it is convenient to have this function, that will return X and y + if the estimator needs a y to fit on, and just X otherwise.""" + if estimator.__class__.__name__ in [e.__class__.__name__ + for (e, _) in + quadruplets_learners]: + return (X,) + else: + return (X, y) + + def mock_preprocessor(indices): """A preprocessor for testing purposes that returns an all ones 3D array """ @@ -840,7 +855,7 @@ def test_error_message_tuple_size(estimator, _): [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) y = [1, 1] with pytest.raises(ValueError) as raised_err: - estimator.fit(invalid_pairs, y) + estimator.fit(*remove_y_quadruplets(estimator, invalid_pairs, y)) expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(estimator._tuple_size, make_context(estimator), @@ -925,19 +940,25 @@ def make_random_state(estimator): estimator_with_preprocessor = clone(estimator) set_random_state(estimator_with_preprocessor) estimator_with_preprocessor.set_params(preprocessor=X) - estimator_with_preprocessor.fit(indices_train, y_train, + estimator_with_preprocessor.fit(*remove_y_quadruplets(estimator, + indices_train, + y_train), **make_random_state(estimator)) estimator_without_preprocessor = clone(estimator) set_random_state(estimator_without_preprocessor) estimator_without_preprocessor.set_params(preprocessor=None) - estimator_without_preprocessor.fit(formed_train, y_train, + estimator_without_preprocessor.fit(*remove_y_quadruplets(estimator, + formed_train, + y_train), **make_random_state(estimator)) estimator_with_prep_formed = clone(estimator) set_random_state(estimator_with_prep_formed) estimator_with_prep_formed.set_params(preprocessor=X) - estimator_with_prep_formed.fit(indices_train, y_train, + estimator_with_prep_formed.fit(*remove_y_quadruplets(estimator, + indices_train, + y_train), **make_random_state(estimator)) # test prediction methods From 99b03225bcdc70b89e1c054a60601d4277c4cf61 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 17 Apr 2019 11:50:42 +0200 Subject: [PATCH 104/210] [MRG] make transformer_from_metric more robust (#191) * ENH: make transformer_from_metric more robust * FIX: enhance test on an undefinite matrix with high computed determinant * FIX: only look at the value of slogdet, not the sign * MAINT: improve transformer_from_metric * Address https://github.com/metric-learn/metric-learn/pull/191#pullrequestreview-227267960 --- metric_learn/_util.py | 82 ++++++++++++---- test/test_transformer_metric_conversion.py | 106 ++++++++++++++++++++- test/test_utils.py | 21 +++- 3 files changed, 188 insertions(+), 21 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index bd57fd5f..7c70e4bf 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -1,5 +1,7 @@ +import warnings import numpy as np import six +from numpy.linalg import LinAlgError from sklearn.utils import check_array from sklearn.utils.validation import check_X_y from metric_learn.exceptions import PreprocessorError @@ -324,31 +326,73 @@ def check_collapsed_pairs(pairs): "in total.".format(num_ident, pairs.shape[0])) -def transformer_from_metric(metric): - """Computes the transformation matrix from the Mahalanobis matrix. +def _check_sdp_from_eigen(w, tol=None): + """Checks if some of the eigenvalues given are negative, up to a tolerance + level, with a default value of the tolerance depending on the eigenvalues. - Since by definition the metric `M` is positive semi-definite (PSD), it - admits a Cholesky decomposition: L = cholesky(M).T. However, currently the - computation of the Cholesky decomposition used does not support - non-definite matrices. If the metric is not definite, this method will - return L = V.T w^( -1/2), with M = V*w*V.T being the eigenvector - decomposition of M with the eigenvalues in the diagonal matrix w and the - columns of V being the eigenvectors. If M is diagonal, this method will - just return its elementwise square root (since the diagonalization of - the matrix is itself). + Parameters + ---------- + w : array-like, shape=(n_eigenvalues,) + Eigenvalues to check for non semidefinite positiveness. + + tol : positive `float`, optional + Negative eigenvalues above - tol are considered zero. If + tol is None, and eps is the epsilon value for datatype of w, then tol + is set to w.max() * len(w) * eps. + + See Also + -------- + np.linalg.matrix_rank for more details on the choice of tolerance (the same + strategy is applied here) + """ + if tol is None: + tol = w.max() * len(w) * np.finfo(w.dtype).eps + assert tol >= 0, ValueError("tol should be positive.") + if any(w < - tol): + raise ValueError("Matrix is not positive semidefinite (PSD).") + + +def transformer_from_metric(metric, tol=None): + """Returns the transformation matrix from the Mahalanobis matrix. + + Returns the transformation matrix from the Mahalanobis matrix, i.e. the + matrix L such that metric=L.T.dot(L). + + Parameters + ---------- + metric : symmetric `np.ndarray`, shape=(d x d) + The input metric, from which we want to extract a transformation matrix. + + tol : positive `float`, optional + Eigenvalues of `metric` between 0 and - tol are considered zero. If tol is + None, and w_max is `metric`'s largest eigenvalue, and eps is the epsilon + value for datatype of w, then tol is set to w_max * metric.shape[0] * eps. Returns ------- - L : (d x d) matrix + L : np.ndarray, shape=(d x d) + The transformation matrix, such that L.T.dot(L) == metric. """ - - if np.allclose(metric, np.diag(np.diag(metric))): - return np.sqrt(metric) - elif not np.isclose(np.linalg.det(metric), 0): - return np.linalg.cholesky(metric).T + if not np.allclose(metric, metric.T): + raise ValueError("The input metric should be symmetric.") + # If M is diagonal, we will just return the elementwise square root: + if np.array_equal(metric, np.diag(np.diag(metric))): + _check_sdp_from_eigen(np.diag(metric), tol) + return np.diag(np.sqrt(np.maximum(0, np.diag(metric)))) else: - w, V = np.linalg.eigh(metric) - return V.T * np.sqrt(np.maximum(0, w[:, None])) + try: + # if `M` is positive semi-definite, it will admit a Cholesky + # decomposition: L = cholesky(M).T + return np.linalg.cholesky(metric).T + except LinAlgError: + # However, currently np.linalg.cholesky does not support indefinite + # matrices. So if the latter does not work we will return L = V.T w^( + # -1/2), with M = V*w*V.T being the eigenvector decomposition of M with + # the eigenvalues in the diagonal matrix w and the columns of V being the + # eigenvectors. + w, V = np.linalg.eigh(metric) + _check_sdp_from_eigen(w, tol) + return V.T * np.sqrt(np.maximum(0, w[:, None])) def validate_vector(u, dtype=None): diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 6cfe8281..4328320d 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -1,11 +1,16 @@ import unittest import numpy as np +import pytest +from numpy.linalg import LinAlgError +from scipy.stats import ortho_group from sklearn.datasets import load_iris -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_almost_equal, assert_allclose +from sklearn.utils.testing import ignore_warnings from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) +from metric_learn._util import transformer_from_metric class TestTransformerMetricConversion(unittest.TestCase): @@ -76,6 +81,105 @@ def test_mlkr(self): L = mlkr.transformer_ assert_array_almost_equal(L.T.dot(L), mlkr.get_mahalanobis_matrix()) + @ignore_warnings + def test_transformer_from_metric_edge_cases(self): + """Test that transformer_from_metric returns the right result in various + edge cases""" + rng = np.random.RandomState(42) + + # an orthonormal matrix useful for creating matrices with given + # eigenvalues: + P = ortho_group.rvs(7, random_state=rng) + + # matrix with all its coefficients very low (to check that the algorithm + # does not consider it as a diagonal matrix)(non regression test for + # https://github.com/metric-learn/metric-learn/issues/175) + M = np.diag([1e-15, 2e-16, 3e-15, 4e-16, 5e-15, 6e-16, 7e-15]) + M = P.dot(M).dot(P.T) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + # diagonal matrix + M = np.diag(np.abs(rng.randn(5))) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + # low-rank matrix (with zeros) + M = np.zeros((7, 7)) + small_random = rng.randn(3, 3) + M[:3, :3] = small_random.T.dot(small_random) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + # low-rank matrix (without necessarily zeros) + R = np.abs(rng.randn(7, 7)) + M = R.dot(np.diag([1, 5, 3, 2, 0, 0, 0])).dot(R.T) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + # matrix with a determinant still high but which should be considered as a + # non-definite matrix (to check we don't test the definiteness with the + # determinant which is a bad strategy) + M = np.diag([1e5, 1e5, 1e5, 1e5, 1e5, 1e5, 1e-20]) + M = P.dot(M).dot(P.T) + assert np.abs(np.linalg.det(M)) > 10 + assert np.linalg.slogdet(M)[1] > 1 # (just to show that the computed + # determinant is far from null) + with pytest.raises(LinAlgError) as err_msg: + np.linalg.cholesky(M) + assert str(err_msg.value) == 'Matrix is not positive definite' + # (just to show that this case is indeed considered by numpy as an + # indefinite case) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + # matrix with lots of small nonzeros that make a big zero when multiplied + M = np.diag([1e-3, 1e-3, 1e-3, 1e-3, 1e-3, 1e-3, 1e-3]) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + # full rank matrix + M = rng.randn(10, 10) + M = M.T.dot(M) + assert np.linalg.matrix_rank(M) == 10 + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + + def test_non_symmetric_matrix_raises(self): + """Checks that if a non symmetric matrix is given to + transformer_from_metric, an error is thrown""" + rng = np.random.RandomState(42) + M = rng.randn(10, 10) + with pytest.raises(ValueError) as raised_error: + transformer_from_metric(M) + assert str(raised_error.value) == "The input metric should be symmetric." + + def test_non_psd_raises(self): + """Checks that a non PSD matrix (i.e. with negative eigenvalues) will + raise an error when passed to transformer_from_metric""" + rng = np.random.RandomState(42) + D = np.diag([1, 5, 3, 4.2, -4, -2, 1]) + P = ortho_group.rvs(7, random_state=rng) + M = P.dot(D).dot(P.T) + msg = ("Matrix is not positive semidefinite (PSD).") + with pytest.raises(ValueError) as raised_error: + transformer_from_metric(M) + assert str(raised_error.value) == msg + with pytest.raises(ValueError) as raised_error: + transformer_from_metric(D) + assert str(raised_error.value) == msg + + def test_almost_psd_dont_raise(self): + """Checks that if the metric is almost PSD (i.e. it has some negative + eigenvalues very close to zero), then transformer_from_metric will still + work""" + rng = np.random.RandomState(42) + D = np.diag([1, 5, 3, 4.2, -1e-20, -2e-20, -1e-20]) + P = ortho_group.rvs(7, random_state=rng) + M = P.dot(D).dot(P.T) + L = transformer_from_metric(M) + assert_allclose(L.T.dot(L), M) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index cfadfd32..c86aa018 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -9,7 +9,8 @@ from sklearn.base import clone from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, - check_collapsed_pairs, validate_vector) + check_collapsed_pairs, validate_vector, + _check_sdp_from_eigen) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -1051,3 +1052,21 @@ def test__validate_vector(): x = [[1, 2], [3, 4]] with pytest.raises(ValueError): validate_vector(x) + + +def _check_sdp_from_eigen_positive_err_messages(): + """Tests that if _check_sdp_from_eigen is given a negative tol it returns + an error, and if positive it does not""" + w = np.random.RandomState(42).randn(10) + with pytest.raises(ValueError) as raised_error: + _check_sdp_from_eigen(w, -5.) + assert str(raised_error.value) == "tol should be positive." + with pytest.raises(ValueError) as raised_error: + _check_sdp_from_eigen(w, -1e-10) + assert str(raised_error.value) == "tol should be positive." + with pytest.raises(ValueError) as raised_error: + _check_sdp_from_eigen(w, 1.) + assert len(raised_error.value) == 0 + with pytest.raises(ValueError) as raised_error: + _check_sdp_from_eigen(w, 0.) + assert str(raised_error.value) == 0 From d945df1342c69012608bb70b92520392a0853de6 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 18 Apr 2019 10:47:13 +0200 Subject: [PATCH 105/210] FIX: fix test that was not tested because didn't start with test_... --- metric_learn/_util.py | 3 ++- test/test_utils.py | 15 ++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 7c70e4bf..33311620 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -347,7 +347,8 @@ def _check_sdp_from_eigen(w, tol=None): """ if tol is None: tol = w.max() * len(w) * np.finfo(w.dtype).eps - assert tol >= 0, ValueError("tol should be positive.") + if tol < 0: + raise ValueError("tol should be positive.") if any(w < - tol): raise ValueError("Matrix is not positive semidefinite (PSD).") diff --git a/test/test_utils.py b/test/test_utils.py index c86aa018..52ebc7a6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1054,19 +1054,16 @@ def test__validate_vector(): validate_vector(x) -def _check_sdp_from_eigen_positive_err_messages(): +def test_check_sdp_from_eigen_positive_err_messages(): """Tests that if _check_sdp_from_eigen is given a negative tol it returns - an error, and if positive it does not""" - w = np.random.RandomState(42).randn(10) + an error, and if positive (or None) it does not""" + w = np.abs(np.random.RandomState(42).randn(10)) + 1 with pytest.raises(ValueError) as raised_error: _check_sdp_from_eigen(w, -5.) assert str(raised_error.value) == "tol should be positive." with pytest.raises(ValueError) as raised_error: _check_sdp_from_eigen(w, -1e-10) assert str(raised_error.value) == "tol should be positive." - with pytest.raises(ValueError) as raised_error: - _check_sdp_from_eigen(w, 1.) - assert len(raised_error.value) == 0 - with pytest.raises(ValueError) as raised_error: - _check_sdp_from_eigen(w, 0.) - assert str(raised_error.value) == 0 + _check_sdp_from_eigen(w, 1.) + _check_sdp_from_eigen(w, 0.) + _check_sdp_from_eigen(w, None) From 8f57db262f9f193aa69d48120d5c3a684cacafcf Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 3 May 2019 15:37:40 +0200 Subject: [PATCH 106/210] Add checks for labels when having pairs (#197) --- metric_learn/_util.py | 12 +++++++ test/test_utils.py | 73 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 33311620..ff9c021c 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -137,6 +137,11 @@ def check_input(input_data, y=None, preprocessor=None, input_data = check_input_tuples(input_data, context, preprocessor, args_for_sk_checks, tuple_size) + # if we have y and the input data are pairs, we need to ensure + # the labels are in [-1, 1]: + if y is not None and input_data.shape[1] == 2: + check_y_valid_values_for_pairs(y) + else: raise ValueError("Unknown value {} for type_of_inputs. Valid values are " "'classic' or 'tuples'.".format(type_of_inputs)) @@ -297,6 +302,13 @@ def check_tuple_size(tuples, tuple_size, context): raise ValueError(msg_t) +def check_y_valid_values_for_pairs(y): + """Checks that y values are in [-1, 1]""" + if not np.array_equal(np.abs(y), np.ones_like(y)): + raise ValueError("When training on pairs, the labels (y) should contain " + "only values in [-1, 1]. Found an incorrect value.") + + class ArrayIndexer: def __init__(self, X): diff --git a/test/test_utils.py b/test/test_utils.py index 52ebc7a6..4cec7444 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -10,7 +10,8 @@ from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, check_collapsed_pairs, validate_vector, - _check_sdp_from_eigen) + _check_sdp_from_eigen, + check_y_valid_values_for_pairs) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -1067,3 +1068,73 @@ def test_check_sdp_from_eigen_positive_err_messages(): _check_sdp_from_eigen(w, 1.) _check_sdp_from_eigen(w, 0.) _check_sdp_from_eigen(w, None) + + +@pytest.mark.unit +@pytest.mark.parametrize('wrong_labels', + [[0.5, 0.6, 0.7, 0.8, 0.9], + np.random.RandomState(42).randn(5), + np.random.RandomState(42).choice([0, 1], size=5)]) +def test_check_y_valid_values_for_pairs(wrong_labels): + expected_msg = ("When training on pairs, the labels (y) should contain " + "only values in [-1, 1]. Found an incorrect value.") + with pytest.raises(ValueError) as raised_error: + check_y_valid_values_for_pairs(wrong_labels) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.integration +@pytest.mark.parametrize('wrong_labels', + [[0.5, 0.6, 0.7, 0.8, 0.9], + np.random.RandomState(42).randn(5), + np.random.RandomState(42).choice([0, 1], size=5)]) +def test_check_input_invalid_tuples_without_preprocessor(wrong_labels): + pairs = np.random.RandomState(42).randn(5, 2, 3) + expected_msg = ("When training on pairs, the labels (y) should contain " + "only values in [-1, 1]. Found an incorrect value.") + with pytest.raises(ValueError) as raised_error: + check_input(pairs, wrong_labels, preprocessor=None, + type_of_inputs='tuples') + assert str(raised_error.value) == expected_msg + + +@pytest.mark.integration +@pytest.mark.parametrize('wrong_labels', + [[0.5, 0.6, 0.7, 0.8, 0.9], + np.random.RandomState(42).randn(5), + np.random.RandomState(42).choice([0, 1], size=5)]) +def test_check_input_invalid_tuples_with_preprocessor(wrong_labels): + n_samples, n_features, n_pairs = 10, 4, 5 + rng = np.random.RandomState(42) + pairs = rng.randint(10, size=(n_pairs, 2)) + preprocessor = rng.randn(n_samples, n_features) + expected_msg = ("When training on pairs, the labels (y) should contain " + "only values in [-1, 1]. Found an incorrect value.") + with pytest.raises(ValueError) as raised_error: + check_input(pairs, wrong_labels, preprocessor=ArrayIndexer(preprocessor), + type_of_inputs='tuples') + assert str(raised_error.value) == expected_msg + + +@pytest.mark.integration +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_check_input_pairs_learners_invalid_y(estimator, build_dataset, + with_preprocessor): + """checks that the only allowed labels for learning pairs are +1 and -1""" + input_data, labels, _, X = build_dataset() + wrong_labels_list = [labels + 0.5, + np.random.RandomState(42).randn(len(labels)), + np.random.RandomState(42).choice([0, 1], + size=len(labels))] + model = clone(estimator) + set_random_state(model) + + expected_msg = ("When training on pairs, the labels (y) should contain " + "only values in [-1, 1]. Found an incorrect value.") + + for wrong_labels in wrong_labels_list: + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, wrong_labels) + assert str(raised_error.value) == expected_msg From d4badc8adc0a8ce6689dd9a95e89181efaf5ee24 Mon Sep 17 00:00:00 2001 From: Hanchen Wang Date: Fri, 3 May 2019 14:48:22 +0100 Subject: [PATCH 107/210] add description of the algorithls in doc (#178) * Update_Doc_hc * Update lmnn.py, test if the issue was raised up by the unicode encode * Update .gitignore * Update with mispelled notations, wrong interpretations, and add more details of sdml * Test: fix nca's reference * Update Doc and Docstring * Update Doc and Docstring * Minor Equation Update and Reference * fix bad hyphen --- .gitignore | 3 - doc/supervised.rst | 153 ++++++++++++++++++++++--- doc/weakly_supervised.rst | 229 +++++++++++++++++++++++++++++++++----- metric_learn/itml.py | 27 ++--- metric_learn/lfda.py | 17 ++- metric_learn/lmnn.py | 17 +-- metric_learn/lsml.py | 19 +++- metric_learn/mlkr.py | 15 ++- metric_learn/mmc.py | 26 ++--- metric_learn/nca.py | 15 ++- metric_learn/rca.py | 18 +-- metric_learn/sdml.py | 18 +-- 12 files changed, 438 insertions(+), 119 deletions(-) diff --git a/.gitignore b/.gitignore index a51c1a82..449f70ea 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,3 @@ htmlcov/ .cache/ .pytest_cache/ doc/auto_examples/* -coverage -.coverage -.coverage* diff --git a/doc/supervised.rst b/doc/supervised.rst index 26934a47..83bf4449 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -41,17 +41,37 @@ the covariance matrix of the input data. This is a simple baseline method. .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 +.. _lmnn: + LMNN ----- -Large-margin nearest neighbor metric learning. +Large Margin Nearest Neighbor Metric Learning +(:py:class:`LMNN `) -`LMNN` learns a Mahanalobis distance metric in the kNN classification -setting using semidefinite programming. The learned metric attempts to keep -k-nearest neighbors in the same class, while keeping examples from different -classes separated by a large margin. This algorithm makes no assumptions about +`LMNN` learns a Mahalanobis distance metric in the kNN classification +setting. The learned metric attempts to keep close k-nearest neighbors +from the same class, while keeping examples from different classes +separated by a large margin. This algorithm makes no assumptions about the distribution of the data. +The distance is learned by solving the following optimization problem: + +.. math:: + + \min_\mathbf{L}\sum_{i, j}\eta_{ij}||\mathbf{L(x_i-x_j)}||^2 + + c\sum_{i, j, l}\eta_{ij}(1-y_{ij})[1+||\mathbf{L(x_i-x_j)}||^2-|| + \mathbf{L(x_i-x_l)}||^2]_+) + +where :math:`\mathbf{x}_i` is an data point, :math:`\mathbf{x}_j` is one +of its k nearest neighbors sharing the same label, and :math:`\mathbf{x}_l` +are all the other instances within that region with different labels, +:math:`\eta_{ij}, y_{ij} \in \{0, 1\}` are both the indicators, +:math:`\eta_{ij}` represents :math:`\mathbf{x}_{j}` is the k nearest +neighbors(with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0` +indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, +:math:`[\cdot]_+=\max(0, \cdot)` is the Hinge loss. + .. topic:: Example Code: :: @@ -80,16 +100,44 @@ The two implementations differ slightly, and the C++ version is more complete. -margin -nearest-neighbor-classification>`_ Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul +.. _nca: + NCA --- -Neighborhood Components Analysis (`NCA`) is a distance metric learning -algorithm which aims to improve the accuracy of nearest neighbors -classification compared to the standard Euclidean distance. The algorithm -directly maximizes a stochastic variant of the leave-one-out k-nearest -neighbors (KNN) score on the training set. It can also learn a low-dimensional -linear embedding of data that can be used for data visualization and fast -classification. +Neighborhood Components Analysis(:py:class:`NCA `) + +`NCA` is a distance metric learning algorithm which aims to improve the +accuracy of nearest neighbors classification compared to the standard +Euclidean distance. The algorithm directly maximizes a stochastic variant +of the leave-one-out k-nearest neighbors (KNN) score on the training set. +It can also learn a low-dimensional linear transformation of data that can +be used for data visualization and fast classification. + +They use the decomposition :math:`\mathbf{M} = \mathbf{L}^T\mathbf{L}` and +define the probability :math:`p_{ij}` that :math:`\mathbf{x}_i` is the +neighbor of :math:`\mathbf{x}_j` by calculating the softmax likelihood of +the Mahalanobis distance: + +.. math:: + + p_{ij} = \frac{\exp(-|| \mathbf{Lx}_i - \mathbf{Lx}_j ||_2^2)} + {\sum_{l\neq i}\exp(-||\mathbf{Lx}_i - \mathbf{Lx}_l||_2^2)}, + \qquad p_{ii}=0 + +Then the probability that :math:`\mathbf{x}_i` will be correctly classified +by the stochastic nearest neighbors rule is: + +.. math:: + + p_{i} = \sum_{j:j\neq i, y_j=y_i}p_{ij} + +The optimization problem is to find matrix :math:`\mathbf{L}` that maximizes +the sum of probability of being correctly classified: + +.. math:: + + \mathbf{L} = \text{argmax}\sum_i p_i .. topic:: Example Code: @@ -116,16 +164,55 @@ classification. .. [2] Wikipedia entry on Neighborhood Components Analysis https://en.wikipedia.org/wiki/Neighbourhood_components_analysis +.. _lfda: + LFDA ---- -Local Fisher Discriminant Analysis (LFDA) +Local Fisher Discriminant Analysis(:py:class:`LFDA `) `LFDA` is a linear supervised dimensionality reduction method. It is -particularly useful when dealing with multimodality, where one ore more classes +particularly useful when dealing with multi-modality, where one ore more classes consist of separate clusters in input space. The core optimization problem of LFDA is solved as a generalized eigenvalue problem. + +The algorithm define the Fisher local within-/between-class scatter matrix +:math:`\mathbf{S}^{(w)}/ \mathbf{S}^{(b)}` in a pairwise fashion: + +.. math:: + + \mathbf{S}^{(w)} = \frac{1}{2}\sum_{i,j=1}^nW_{ij}^{(w)}(\mathbf{x}_i - + \mathbf{x}_j)(\mathbf{x}_i - \mathbf{x}_j)^T,\\ + \mathbf{S}^{(b)} = \frac{1}{2}\sum_{i,j=1}^nW_{ij}^{(b)}(\mathbf{x}_i - + \mathbf{x}_j)(\mathbf{x}_i - \mathbf{x}_j)^T,\\ + +where + +.. math:: + + W_{ij}^{(w)} = \left\{\begin{aligned}0 \qquad y_i\neq y_j \\ + \,\,\mathbf{A}_{i,j}/n_l \qquad y_i = y_j\end{aligned}\right.\\ + W_{ij}^{(b)} = \left\{\begin{aligned}1/n \qquad y_i\neq y_j \\ + \,\,\mathbf{A}_{i,j}(1/n-1/n_l) \qquad y_i = y_j\end{aligned}\right.\\ + +here :math:`\mathbf{A}_{i,j}` is the :math:`(i,j)`-th entry of the affinity +matrix :math:`\mathbf{A}`:, which can be calculated with local scaling methods. + +Then the learning problem becomes derive the LFDA transformation matrix +:math:`\mathbf{T}_{LFDA}`: + +.. math:: + + \mathbf{T}_{LFDA} = \arg\max_\mathbf{T} + [\text{tr}((\mathbf{T}^T\mathbf{S}^{(w)} + \mathbf{T})^{-1}\mathbf{T}^T\mathbf{S}^{(b)}\mathbf{T})] + +That is, it is looking for a transformation matrix :math:`\mathbf{T}` such that +nearby data pairs in the same class are made close and the data pairs in +different classes are separated from each other; far apart data pairs in the +same class are not imposed to be close. + .. topic:: Example Code: :: @@ -151,17 +238,50 @@ LFDA is solved as a generalized eigenvalue problem. `_ Yuan Tang. +.. _mlkr: MLKR ---- -Metric Learning for Kernel Regression. +Metric Learning for Kernel Regression(:py:class:`MLKR `) `MLKR` is an algorithm for supervised metric learning, which learns a -distance function by directly minimising the leave-one-out regression error. +distance function by directly minimizing the leave-one-out regression error. This algorithm can also be viewed as a supervised variation of PCA and can be used for dimensionality reduction and high dimensional data visualization. +Theoretically, `MLKR` can be applied with many types of kernel functions and +distance metrics, we hereafter focus the exposition on a particular instance +of the Gaussian kernel and Mahalanobis metric, as these are used in our +empirical development. The Gaussian kernel is denoted as: + +.. math:: + + k_{ij} = \frac{1}{\sqrt{2\pi}\sigma}\exp(-\frac{d(\mathbf{x}_i, + \mathbf{x}_j)}{\sigma^2}) + +where :math:`d(\cdot, \cdot)` is the squared distance under some metrics, +here in the fashion of Mahalanobis, it should be :math:`d(\mathbf{x}_i, +\mathbf{x}_j) = ||\mathbf{A}(\mathbf{x}_i - \mathbf{x}_j)||`, the transition +matrix :math:`\mathbf{A}` is derived from the decomposition of Mahalanobis +matrix :math:`\mathbf{M=A^TA}`. + +Since :math:`\sigma^2` can be integrated into :math:`d(\cdot)`, we can set +:math:`\sigma^2=1` for the sake of simplicity. Here we use the cumulative +leave-one-out quadratic regression error of the training samples as the +loss function: + +.. math:: + + \mathcal{L} = \sum_i(y_i - \hat{y}_i)^2 + +where the prediction :math:`\hat{y}_i` is derived from kernel regression by +calculating a weighted average of all the training samples: + +.. math:: + + \hat{y}_i = \frac{\sum_{j\neq i}y_jk_{ij}}{\sum_{j\neq i}k_{ij}} + .. topic:: Example Code: :: @@ -193,7 +313,6 @@ generated from the labels information and passed to the underlying algorithm. .. todo:: add more details about that (see issue ``_) - .. topic:: Example Code: :: diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 6bf6f993..93720ffc 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -190,18 +190,55 @@ See also: `sklearn.calibration`. Algorithms ========== +.. _itml: + ITML ---- -Information Theoretic Metric Learning, Davis et al., ICML 2007 +Information Theoretic Metric Learning(:py:class:`ITML `) -`ITML` minimizes the differential relative entropy between two multivariate -Gaussians under constraints on the distance function, which can be formulated -into a Bregman optimization problem by minimizing the LogDet divergence subject -to linear constraints. This algorithm can handle a wide variety of constraints +`ITML` minimizes the (differential) relative entropy, aka Kullback–Leibler +divergence, between two multivariate Gaussians subject to constraints on the +associated Mahalanobis distance, which can be formulated into a Bregman +optimization problem by minimizing the LogDet divergence subject to +linear constraints. This algorithm can handle a wide variety of constraints and can optionally incorporate a prior on the distance function. Unlike some -other methods, ITML does not rely on an eigenvalue computation or semi-definite -programming. +other methods, `ITML` does not rely on an eigenvalue computation or +semi-definite programming. + + +Given a Mahalanobis distance parameterized by :math:`A`, its corresponding +multivariate Gaussian is denoted as: + +.. math:: + p(\mathbf{x}; \mathbf{A}) = \frac{1}{Z}\exp(-\frac{1}{2}d_\mathbf{A} + (\mathbf{x}, \mu)) + = \frac{1}{Z}\exp(-\frac{1}{2}((\mathbf{x} - \mu)^T\mathbf{A} + (\mathbf{x} - \mu)) + +where :math:`Z` is the normalization constant, the inverse of Mahalanobis +matrix :math:`\mathbf{A}^{-1}` is the covariance of the Gaussian. + +Given pairs of similar points :math:`S` and pairs of dissimilar points +:math:`D`, the distance metric learning problem is to minimize the LogDet +divergence, which is equivalent as minimizing :math:`\textbf{KL}(p(\mathbf{x}; +\mathbf{A}_0) || p(\mathbf{x}; \mathbf{A}))`: + +.. math:: + + \min_\mathbf{A} D_{\ell \mathrm{d}}\left(A, A_{0}\right) = + \operatorname{tr}\left(A A_{0}^{-1}\right)-\log \operatorname{det} + \left(A A_{0}^{-1}\right)-n\\ + \text{subject to } \quad d_\mathbf{A}(\mathbf{x}_i, \mathbf{x}_j) + \leq u \qquad (\mathbf{x}_i, \mathbf{x}_j)\in S \\ + d_\mathbf{A}(\mathbf{x}_i, \mathbf{x}_j) \geq l \qquad (\mathbf{x}_i, + \mathbf{x}_j)\in D + + +where :math:`u` and :math:`l` is the upper and the lower bound of distance +for similar and dissimilar pairs respectively, and :math:`\mathbf{A}_0` +is the prior distance metric, set to identity matrix by default, +:math:`D_{\ell \mathrm{d}}(\cdot)` is the log determinant. .. topic:: Example Code: @@ -231,11 +268,124 @@ programming. .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/ itml/ + +.. _lsml: + +LSML +---- + +Metric Learning from Relative Comparisons by Minimizing Squared Residual +(:py:class:`LSML `) + +`LSML` proposes a simple, yet effective, algorithm that minimizes a convex +objective function corresponding to the sum of squared residuals of +constraints. This algorithm uses the constraints in the form of the +relative distance comparisons, such method is especially useful where +pairwise constraints are not natural to obtain, thus pairwise constraints +based algorithms become infeasible to be deployed. Furthermore, its sparsity +extension leads to more stable estimation when the dimension is high and +only a small amount of constraints is given. + +The loss function of each constraint +:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is +denoted as: + +.. math:: + + H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) + - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) + +where :math:`H(\cdot)` is the squared Hinge loss function defined as: + +.. math:: + + H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\ + \,\,x^2 \qquad x>0\end{aligned}\right.\\ + +The summed loss function :math:`L(C)` is the simple sum over all constraints +:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) +: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The +original paper suggested here should be a weighted sum since the confidence +or probability of each constraint might differ. However, for the sake of +simplicity and assumption of no extra knowledge provided, we just deploy +the simple sum here as well as what the authors did in the experiments. + +The distance metric learning problem becomes minimizing the summed loss +function of all constraints plus a regularization term w.r.t. the prior +knowledge: + +.. math:: + + \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, + \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( + \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ + +where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity +by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: + +.. math:: + + D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} + (\mathbf{M}) + +.. topic:: Example Code: + +:: + + from metric_learn import LSML + + quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], + [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], + [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] + + # we want to make closer points where the first feature is close, and + # further if the second feature is close + + lsml = LSML() + lsml.fit(quadruplets) + +.. topic:: References: + + .. [1] Liu et al. + "Metric Learning from Relative Comparisons by Minimizing Squared + Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + +.. _sdml: + +======= + SDML ---- -`SDML`: An efficient sparse metric learning in high-dimensional space via -L1-penalized log-determinant regularization +Sparse High-Dimensional Metric Learning +(:py:class:`SDML `) + +`SDML` is an efficient sparse metric learning in high-dimensional space via +double regularization: an L1-penalization on the off-diagonal elements of the +Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence between +:math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either :math:`\mathbf{I}` +or :math:`\mathbf{\Omega}^{-1}`, where :math:`\mathbf{\Omega}` is the +covariance matrix). + +The formulated optimization on the semidefinite matrix :math:`\mathbf{M}` +is convex: + +.. math:: + + \min_{\mathbf{M}} = \text{tr}((\mathbf{M}_0 + \eta \mathbf{XLX}^{T}) + \cdot \mathbf{M}) - \log\det \mathbf{M} + \lambda ||\mathbf{M}||_{1, off} + +where :math:`\mathbf{X}=[\mathbf{x}_1, \mathbf{x}_2, ..., \mathbf{x}_n]` is +the training data, the incidence matrix :math:`\mathbf{K}_{ij} = 1` if +:math:`(\mathbf{x}_i, \mathbf{x}_j)` is a similar pair, otherwise -1. The +Laplacian matrix :math:`\mathbf{L}=\mathbf{D}-\mathbf{K}` is calculated from +:math:`\mathbf{K}` and :math:`\mathbf{D}`, a diagonal matrix whose entries are +the sums of the row elements of :math:`\mathbf{K}`., :math:`||\cdot||_{1, off}` +is the off-diagonal L1 norm. + .. topic:: Example Code: @@ -265,18 +415,33 @@ L1-penalized log-determinant regularization .. [2] Adapted from https://gist.github.com/kcarnold/5439945 +.. _rca: RCA --- -Relative Components Analysis (RCA) +Relative Components Analysis (:py:class:`RCA `) `RCA` learns a full rank Mahalanobis distance metric based on a weighted sum of -in-class covariance matrices. It applies a global linear transformation to -assign large weights to relevant dimensions and low weights to irrelevant -dimensions. Those relevant dimensions are estimated using "chunklets", subsets +in-chunklets covariance matrices. It applies a global linear transformation to +assign large weights to relevant dimensions and low weights to irrelevant +dimensions. Those relevant dimensions are estimated using "chunklets", subsets of points that are known to belong to the same class. +For a training set with :math:`n` training points in :math:`k` chunklets, the +algorithm is efficient since it simply amounts to computing + +.. math:: + + \mathbf{C} = \frac{1}{n}\sum_{j=1}^k\sum_{i=1}^{n_j} + (\mathbf{x}_{ji}-\hat{\mathbf{m}}_j) + (\mathbf{x}_{ji}-\hat{\mathbf{m}}_j)^T + + +where chunklet :math:`j` consists of :math:`\{\mathbf{x}_{ji}\}_{i=1}^{n_j}` +with a mean :math:`\hat{m}_j`. The inverse of :math:`\mathbf{C}^{-1}` is used +as the Mahalanobis matrix. + .. topic:: Example Code: :: @@ -295,7 +460,6 @@ of points that are known to belong to the same class. rca = RCA() rca.fit(pairs, y) - .. topic:: References: .. [1] `Adjustment learning and relevant component analysis @@ -307,21 +471,34 @@ of points that are known to belong to the same class. .. [3]'Learning a Mahalanobis metric from equivalence constraints', JMLR 2005 +.. _mmc: + MMC --- -Mahalanobis Metric Learning with Application for Clustering with -Side-Information, Xing et al., NIPS 2002 - -`MMC` minimizes the sum of squared distances between similar examples, while -enforcing the sum of distances between dissimilar examples to be greater than a -certain margin. This leads to a convex and, thus, local-minima-free -optimization problem that can be solved efficiently. However, the algorithm -involves the computation of eigenvalues, which is the main speed-bottleneck. -Since it has initially been designed for clustering applications, one of the -implicit assumptions of MMC is that all classes form a compact set, i.e., -follow a unimodal distribution, which restricts the possible use-cases of this -method. However, it is one of the earliest and a still often cited technique. +Metric Learning with Application for Clustering with Side Information +(:py:class:`MMC `) + +`MMC` minimizes the sum of squared distances between similar points, while +enforcing the sum of distances between dissimilar ones to be greater than one. +This leads to a convex and, thus, local-minima-free optimization problem that +can be solved efficiently. +However, the algorithm involves the computation of eigenvalues, which is the +main speed-bottleneck. Since it has initially been designed for clustering +applications, one of the implicit assumptions of MMC is that all classes form +a compact set, i.e., follow a unimodal distribution, which restricts the +possible use-cases of this method. However, it is one of the earliest and a +still often cited technique. + +The algorithm aims at minimizing the sum of distances between all the similar +points, while constrains the sum of distances between dissimilar points: + +.. math:: + + \min_{\mathbf{M}\in\mathbb{S}_+^d}\sum_{(\mathbf{x}_i, + \mathbf{x}_j)\in S} d_{\mathbf{M}}(\mathbf{x}_i, \mathbf{x}_j) + \qquad \qquad \text{s.t.} \qquad \sum_{(\mathbf{x}_i, \mathbf{x}_j) + \in D} d^2_{\mathbf{M}}(\mathbf{x}_i, \mathbf{x}_j) \geq 1 .. topic:: Example Code: diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 9b6dccb2..6cb34313 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -1,16 +1,17 @@ -""" -Information Theoretic Metric Learning, Kulis et al., ICML 2007 - -ITML minimizes the differential relative entropy between two multivariate -Gaussians under constraints on the distance function, -which can be formulated into a Bregman optimization problem by minimizing the -LogDet divergence subject to linear constraints. -This algorithm can handle a wide variety of constraints and can optionally -incorporate a prior on the distance function. -Unlike some other methods, ITML does not rely on an eigenvalue computation -or semi-definite programming. - -Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ +r""" +Information Theoretic Metric Learning(ITML) + +`ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler +divergence, between two multivariate Gaussians subject to constraints on the +associated Mahalanobis distance, which can be formulated into a Bregman +optimization problem by minimizing the LogDet divergence subject to +linear constraints. This algorithm can handle a wide variety of constraints +and can optionally incorporate a prior on the distance function. Unlike some +other methods, `ITML` does not rely on an eigenvalue computation or +semi-definite programming. + +Read more in the :ref:`User Guide `. + """ from __future__ import print_function, absolute_import diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 2feff211..2ca085d4 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -1,14 +1,13 @@ -""" -Local Fisher Discriminant Analysis (LFDA) +r""" +Local Fisher Discriminant Analysis(LFDA) + +LFDA is a linear supervised dimensionality reduction method. It is +particularly useful when dealing with multimodality, where one ore more classes +consist of separate clusters in input space. The core optimization problem of +LFDA is solved as a generalized eigenvalue problem. -Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction -Sugiyama, ICML 2006 +Read more in the :ref:`User Guide `. -LFDA is a linear supervised dimensionality reduction method. -It is particularly useful when dealing with multimodality, -where one ore more classes consist of separate clusters in input space. -The core optimization problem of LFDA is solved as a generalized -eigenvalue problem. """ from __future__ import division, absolute_import import numpy as np diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index f9cd0e91..9e606c56 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -1,11 +1,14 @@ -""" -Large-margin nearest neighbor metric learning. (Weinberger 2005) +r""" +Large Margin Nearest Neighbor Metric learning(LMNN) + +LMNN learns a Mahalanobis distance metric in the kNN classification +setting. The learned metric attempts to keep close k-nearest neighbors +from the same class, while keeping examples from different classes +separated by a large margin. This algorithm makes no assumptions about +the distribution of the data. + +Read more in the :ref:`User Guide `. -LMNN learns a Mahanalobis distance metric in the kNN classification setting -using semidefinite programming. -The learned metric attempts to keep k-nearest neighbors in the same class, -while keeping examples from different classes separated by a large margin. -This algorithm makes no assumptions about the distribution of the data. """ #TODO: periodic recalculation of impostors, PCA initialization diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 536719ba..1d66cbc0 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -1,10 +1,17 @@ -""" -Liu et al. -"Metric Learning from Relative Comparisons by Minimizing Squared Residual". -ICDM 2012. +r""" +Metric Learning from Relative Comparisons by Minimizing Squared Residual(LSML) + +`LSML` proposes a simple, yet effective, algorithm that minimizes a convex +objective function corresponding to the sum of squared residuals of +constraints. This algorithm uses the constraints in the form of the +relative distance comparisons, such method is especially useful where +pairwise constraints are not natural to obtain, thus pairwise constraints +based algorithms become infeasible to be deployed. Furthermore, its sparsity +extension leads to more stable estimation when the dimension is high and +only a small amount of constraints is given. + +Read more in the :ref:`User Guide `. -Adapted from https://gist.github.com/kcarnold/5439917 -Paper: http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf """ from __future__ import print_function, absolute_import, division diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 74a21a82..927c64e3 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -1,10 +1,13 @@ -""" -Metric Learning for Kernel Regression (MLKR), Weinberger et al., +r""" +Metric Learning for Kernel Regression(MLKR) + +MLKR is an algorithm for supervised metric learning, which learns a +distance function by directly minimizing the leave-one-out regression error. +This algorithm can also be viewed as a supervised variation of PCA and can be +used for dimensionality reduction and high dimensional data visualization. + +Read more in the :ref:`User Guide `. -MLKR is an algorithm for supervised metric learning, which learns a distance -function by directly minimising the leave-one-out regression error. This -algorithm can also be viewed as a supervised variation of PCA and can be used -for dimensionality reduction and high dimensional data visualization. """ from __future__ import division, print_function import time diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 346db2f8..eb7dc529 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -1,19 +1,19 @@ -""" -Mahalanobis Metric Learning with Application for Clustering with Side-Information, Xing et al., NIPS 2002 +r""" +Metric Learning with Application for Clustering with Side Information(MMC) -MMC minimizes the sum of squared distances between similar examples, -while enforcing the sum of distances between dissimilar examples to be -greater than a certain margin. -This leads to a convex and, thus, local-minima-free optimization problem -that can be solved efficiently. +MMC minimizes the sum of squared distances between similar points, while +enforcing the sum of distances between dissimilar ones to be greater than one. +This leads to a convex and, thus, local-minima-free optimization problem that +can be solved efficiently. However, the algorithm involves the computation of eigenvalues, which is the -main speed-bottleneck. -Since it has initially been designed for clustering applications, one of the -implicit assumptions of MMC is that all classes form a compact set, i.e., -follow a unimodal distribution, which restricts the possible use-cases of -this method. However, it is one of the earliest and a still often cited technique. +main speed-bottleneck. Since it has initially been designed for clustering +applications, one of the implicit assumptions of MMC is that all classes form +a compact set, i.e., follow a unimodal distribution, which restricts the +possible use-cases of this method. However, it is one of the earliest and a +still often cited technique. + +Read more in the :ref:`User Guide `. -Adapted from Matlab code at http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz """ from __future__ import print_function, absolute_import, division diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 5abe52e3..7139f0ff 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -1,6 +1,15 @@ -""" -Neighborhood Components Analysis (NCA) -Ported to Python from https://github.com/vomjom/nca +r""" +Neighborhood Components Analysis(NCA) + +NCA is a distance metric learning algorithm which aims to improve the +accuracy of nearest neighbors classification compared to the standard +Euclidean distance. The algorithm directly maximizes a stochastic variant +of the leave-one-out k-nearest neighbors(KNN) score on the training set. +It can also learn a low-dimensional linear transformation of data that can +be used for data visualization and fast classification. + +Read more in the :ref:`User Guide `. + """ from __future__ import absolute_import diff --git a/metric_learn/rca.py b/metric_learn/rca.py index c9fedd59..88538e8b 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -1,14 +1,14 @@ -"""Relative Components Analysis (RCA) +r""" +Relative Components Analysis(RCA) -RCA learns a full rank Mahalanobis distance metric based on a -weighted sum of in-class covariance matrices. -It applies a global linear transformation to assign large weights to -relevant dimensions and low weights to irrelevant dimensions. -Those relevant dimensions are estimated using "chunklets", -subsets of points that are known to belong to the same class. +RCA learns a full rank Mahalanobis distance metric based on a weighted sum of +in-chunklets covariance matrices. It applies a global linear transformation to +assign large weights to relevant dimensions and low weights to irrelevant +dimensions. Those relevant dimensions are estimated using "chunklets", subsets +of points that are known to belong to the same class. + +Read more in the :ref:`User Guide `. -'Learning distance functions using equivalence relations', ICML 2003 -'Learning a Mahalanobis metric from equivalence constraints', JMLR 2005 """ from __future__ import absolute_import diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index e9828d07..b300b9ac 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -1,11 +1,15 @@ -""" -Qi et al. -An efficient sparse metric learning in high-dimensional space via -L1-penalized log-determinant regularization. -ICML 2009 +r""" +Sparse High-Dimensional Metric Learning(SDML) + +SDML is an efficient sparse metric learning in high-dimensional space via +double regularization: an L1-penalization on the off-diagonal elements of the +Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence between +:math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either :math:`\mathbf{I}` +or :math:`\mathbf{\Omega}^{-1}`, where :math:`\mathbf{\Omega}` is the +covariance matrix). + +Read more in the :ref:`User Guide `. -Adapted from https://gist.github.com/kcarnold/5439945 -Paper: http://lms.comp.nus.edu.sg/sites/default/files/publication-attachments/icml09-guojun.pdf """ from __future__ import absolute_import From 05a8d411c25f4a12a3a51fe5a2a29b9083050261 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Mon, 13 May 2019 10:47:30 +0200 Subject: [PATCH 108/210] [MRG] Be compatible with newer scikit-learn (#199) * Update travis to use previous scikit-learn's versions for older pythons * Update code to work with both versions * Install scikit-learn before skggm * Simpler replacement of spaces and newlines that is compatible with python 2.7 * Address https://github.com/metric-learn/metric-learn/pull/199#pullrequestreview-236120427 * Address https://github.com/metric-learn/metric-learn/pull/199#pullrequestreview-236146764 --- .travis.yml | 7 ++- README.rst | 2 +- doc/getting_started.rst | 2 +- metric_learn/_util.py | 14 ++---- test/test_base_metric.py | 96 ++++++++++++++++++++++++---------------- test/test_utils.py | 59 ------------------------ 6 files changed, 70 insertions(+), 110 deletions(-) diff --git a/.travis.yml b/.travis.yml index f5527089..0e510a9f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: before_install: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy scikit-learn codecov pytest-cov + - pip install wheel cython numpy scipy codecov pytest-cov + - if $TRAVIS_PYTHON_VERSION == "3.6"; then + pip install scikit-learn; + else + pip install scikit-learn==0.20.3; + fi - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || ($TRAVIS_PYTHON_VERSION == "2.7")]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; diff --git a/README.rst b/README.rst index e1bfca51..32a9bb90 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn +- numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 2d2df25e..d620e401 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,7 +15,7 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn +- numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** diff --git a/metric_learn/_util.py b/metric_learn/_util.py index ff9c021c..105d89b5 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -22,8 +22,7 @@ def check_input(input_data, y=None, preprocessor=None, dtype='numeric', order=None, copy=False, force_all_finite=True, multi_output=False, ensure_min_samples=1, - ensure_min_features=1, y_numeric=False, - warn_on_dtype=False, estimator=None): + ensure_min_features=1, y_numeric=False, estimator=None): """Checks that the input format is valid, and converts it if specified (this is the equivalent of scikit-learn's `check_array` or `check_X_y`). All arguments following tuple_size are scikit-learn's `check_X_y` @@ -88,10 +87,6 @@ def check_input(input_data, y=None, preprocessor=None, is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check. - warn_on_dtype : boolean (default=False) - Raise DataConversionWarning if the dtype of the input data structure - does not match the requested dtype, causing a memory copy. - estimator : str or estimator instance (default=`None`) If passed, include the name of the estimator in warning messages. @@ -111,7 +106,7 @@ def check_input(input_data, y=None, preprocessor=None, copy=copy, force_all_finite=force_all_finite, ensure_min_samples=ensure_min_samples, ensure_min_features=ensure_min_features, - warn_on_dtype=warn_on_dtype, estimator=estimator) + estimator=estimator) # We need to convert input_data into a numpy.ndarray if possible, before # any further checks or conversions, and deal with y if needed. Therefore @@ -321,9 +316,8 @@ def __init__(self, X): accept_sparse=True, dtype=None, force_all_finite=False, ensure_2d=False, allow_nd=True, - ensure_min_samples=0, - ensure_min_features=0, - warn_on_dtype=False, estimator=None) + ensure_min_samples=0, ensure_min_features=0, + estimator=None) self.X = X def __call__(self, indices): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 6c9a6dc5..e5f2e17b 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,4 +1,5 @@ import pytest +import re import unittest import metric_learn import numpy as np @@ -7,84 +8,103 @@ from test.test_utils import ids_metric_learners, metric_learners +def remove_spaces(s): + return re.sub('\s+', '', s) + + class TestStringRepr(unittest.TestCase): def test_covariance(self): - self.assertEqual(str(metric_learn.Covariance()), - "Covariance(preprocessor=None)") + self.assertEqual(remove_spaces(str(metric_learn.Covariance())), + remove_spaces("Covariance(preprocessor=None)")) def test_lmnn(self): self.assertRegexpMatches( - str(metric_learn.LMNN()), - r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " - r"max_iter=1000,\n min_iter=50, preprocessor=None, " - r"regularization=0.5, use_pca=True,\n verbose=False\)") + str(metric_learn.LMNN()), + r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " + r"max_iter=1000,\s+min_iter=50, preprocessor=None, " + r"regularization=0.5, use_pca=True,\s+verbose=False\)") def test_nca(self): - self.assertEqual(str(metric_learn.NCA()), - "NCA(max_iter=100, num_dims=None, preprocessor=None, " - "tol=None, verbose=False)") + self.assertEqual(remove_spaces(str(metric_learn.NCA())), + remove_spaces( + "NCA(max_iter=100, num_dims=None, preprocessor=None, " + "tol=None, verbose=False)")) def test_lfda(self): - self.assertEqual(str(metric_learn.LFDA()), - "LFDA(embedding_type='weighted', k=None, num_dims=None, " - "preprocessor=None)") + self.assertEqual(remove_spaces(str(metric_learn.LFDA())), + remove_spaces( + "LFDA(embedding_type='weighted', k=None, " + "num_dims=None, " + "preprocessor=None)")) def test_itml(self): - self.assertEqual(str(metric_learn.ITML()), """ + self.assertEqual(remove_spaces(str(metric_learn.ITML())), + remove_spaces(""" ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000, preprocessor=None, verbose=False) -""".strip('\n')) - self.assertEqual(str(metric_learn.ITML_Supervised()), """ +""")) + self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())), + remove_spaces(""" ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001, gamma=1.0, max_iter=1000, num_constraints=None, num_labeled='deprecated', preprocessor=None, verbose=False) -""".strip('\n')) +""")) def test_lsml(self): self.assertEqual( - str(metric_learn.LSML()), + remove_spaces(str(metric_learn.LSML())), + remove_spaces( "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, " - "verbose=False)") - self.assertEqual(str(metric_learn.LSML_Supervised()), """ + "verbose=False)")) + self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())), + remove_spaces(""" LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated', preprocessor=None, prior=None, tol=0.001, verbose=False, weights=None) -""".strip('\n')) +""")) def test_sdml(self): - self.assertEqual(str(metric_learn.SDML()), - "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True,\n verbose=False)") - self.assertEqual(str(metric_learn.SDML_Supervised()), """ + self.assertEqual(remove_spaces(str(metric_learn.SDML())), + remove_spaces( + "SDML(balance_param=0.5, preprocessor=None, " + "sparsity_param=0.01, use_cov=True," + "\n verbose=False)")) + self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())), + remove_spaces(""" SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, use_cov=True, verbose=False) -""".strip('\n')) +""")) def test_rca(self): - self.assertEqual(str(metric_learn.RCA()), - "RCA(num_dims=None, pca_comps=None, preprocessor=None)") - self.assertEqual(str(metric_learn.RCA_Supervised()), - "RCA_Supervised(chunk_size=2, num_chunks=100, " - "num_dims=None, pca_comps=None,\n " - "preprocessor=None)") + self.assertEqual(remove_spaces(str(metric_learn.RCA())), + remove_spaces("RCA(num_dims=None, pca_comps=None, " + "preprocessor=None)")) + self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), + remove_spaces( + "RCA_Supervised(chunk_size=2, num_chunks=100, " + "num_dims=None, pca_comps=None,\n " + "preprocessor=None)")) def test_mlkr(self): - self.assertEqual(str(metric_learn.MLKR()), - "MLKR(A0=None, max_iter=1000, num_dims=None, " - "preprocessor=None, tol=None,\n verbose=False)") + self.assertEqual(remove_spaces(str(metric_learn.MLKR())), + remove_spaces( + "MLKR(A0=None, max_iter=1000, num_dims=None, " + "preprocessor=None, tol=None,\n verbose=False)")) def test_mmc(self): - self.assertEqual(str(metric_learn.MMC()), """ + self.assertEqual(remove_spaces(str(metric_learn.MMC())), + remove_spaces(""" MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, preprocessor=None, verbose=False) -""".strip('\n')) - self.assertEqual(str(metric_learn.MMC_Supervised()), """ +""")) + self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())), + remove_spaces(""" MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, num_labeled='deprecated', preprocessor=None, verbose=False) -""".strip('\n')) +""")) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, diff --git a/test/test_utils.py b/test/test_utils.py index 4cec7444..e4368791 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -300,35 +300,6 @@ def test_check_tuples_invalid_n_samples(estimator, context, load_tuples, assert str(raised_error.value) == msg -@pytest.mark.parametrize('estimator, context', - [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) -@pytest.mark.parametrize('load_tuples, preprocessor', - [(tuples_prep, mock_preprocessor), - (tuples_no_prep, None), - (tuples_no_prep, mock_preprocessor)]) -def test_check_tuples_invalid_dtype_convertible(estimator, context, - load_tuples, preprocessor): - """Checks that a warning is raised if a convertible input is converted to - float""" - tuples = load_tuples().astype(object) # here the object conversion is - # useless for the tuples_prep case, but this allows to test the - # tuples_prep case - - if preprocessor is not None: # if the preprocessor is not None we - # overwrite it to have a preprocessor that returns objects - def preprocessor(indices): # - # preprocessor that returns objects - return np.ones((indices.shape[0], 3)).astype(object) - - msg = ("Data with input dtype object was converted to float64{}." - .format(context)) - with pytest.warns(DataConversionWarning) as raised_warning: - check_input(tuples, type_of_inputs='tuples', - preprocessor=preprocessor, dtype=np.float64, - warn_on_dtype=True, estimator=estimator) - assert str(raised_warning[0].message) == msg - - def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor(): """Checks that a value error is thrown if attempting to convert an input not convertible to float, when using a preprocessor @@ -530,36 +501,6 @@ def test_check_classic_invalid_n_samples(estimator, context, load_points, assert str(raised_error.value) == msg -@pytest.mark.parametrize('estimator, context', - [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) -@pytest.mark.parametrize('load_points, preprocessor', - [(points_prep, mock_preprocessor), - (points_no_prep, None), - (points_no_prep, mock_preprocessor)]) -def test_check_classic_invalid_dtype_convertible(estimator, context, - load_points, - preprocessor): - """Checks that a warning is raised if a convertible input is converted to - float""" - points = load_points().astype(object) # here the object conversion is - # useless for the points_prep case, but this allows to test the - # points_prep case - - if preprocessor is not None: # if the preprocessor is not None we - # overwrite it to have a preprocessor that returns objects - def preprocessor(indices): - # preprocessor that returns objects - return np.ones((indices.shape[0], 3)).astype(object) - - msg = ("Data with input dtype object was converted to float64{}." - .format(context)) - with pytest.warns(DataConversionWarning) as raised_warning: - check_input(points, type_of_inputs='classic', - preprocessor=preprocessor, dtype=np.float64, - warn_on_dtype=True, estimator=estimator) - assert str(raised_warning[0].message) == msg - - @pytest.mark.parametrize('preprocessor, points', [(mock_preprocessor, np.array([['a', 'b'], ['e', 'b']])), From 9f732502dbf778c8c579cffc3927f619ed8f4089 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Mon, 13 May 2019 10:49:56 +0200 Subject: [PATCH 109/210] [MRG] Fix RCA_Supervised sklearn compat test (#198) * FIX fix RCA_Supervised sklearn compat test * Address https://github.com/metric-learn/metric-learn/pull/198#pullrequestreview-234140017 * Refactor comment --- metric_learn/rca.py | 7 ++++--- test/test_sklearn_compat.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 88538e8b..7d0bb21f 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -26,7 +26,9 @@ def _chunk_mean_centering(data, chunks): num_chunks = chunks.max() + 1 chunk_mask = chunks != -1 - chunk_data = data[chunk_mask] + # We need to ensure the data is float so that we can substract the + # mean on it + chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask] for c in xrange(num_chunks): mask = chunk_labels == c @@ -98,7 +100,7 @@ def fit(self, X, chunks): When ``chunks[i] == -1``, point i doesn't belong to any chunklet. When ``chunks[i] == j``, point i belongs to chunklet j. """ - X = self._prepare_inputs(X, ensure_min_samples=2) + X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) # PCA projection to remove noise and redundant information. if self.pca_comps is not None: @@ -109,7 +111,6 @@ def fit(self, X, chunks): X_t = X - X.mean(axis=0) M_pca = None - chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(X_t, chunks) inner_cov = np.atleast_2d(np.cov(chunked_data, rowvar=0, bias=1)) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 5d6c5d77..091c56e2 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -89,9 +89,15 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', dSDML.__init__ = stable_init check_estimator(dSDML) - # This fails because the default num_chunks isn't data-dependent. - # def test_rca(self): - # check_estimator(RCA_Supervised) + def test_rca(self): + def stable_init(self, num_dims=None, pca_comps=None, + chunk_size=2, preprocessor=None): + # this init makes RCA stable for scikit-learn examples. + RCA_Supervised.__init__(self, num_chunks=2, num_dims=num_dims, + pca_comps=pca_comps, chunk_size=chunk_size, + preprocessor=preprocessor) + dRCA.__init__ = stable_init + check_estimator(dRCA) RNG = check_random_state(0) From aa5b274dbb26e71bc22e2316b08512e1d30b53c9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 17 May 2019 09:45:12 +0200 Subject: [PATCH 110/210] TST: remove comment in test since #175 is fixed --- test/test_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index e4368791..6441fac6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -102,10 +102,7 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) -pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be - # faster, also make tests pass while waiting for #175 to - # be solved - # TODO: remove this comment when #175 is solved +pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, From f407bacc796320196269a5254443bd705acca166 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 24 May 2019 19:49:03 +0200 Subject: [PATCH 111/210] Add checks for bounds argument of ITML (#207) --- metric_learn/itml.py | 18 +++++++++++------- test/metric_learn_test.py | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 6cb34313..e3ff515a 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -69,9 +69,13 @@ def _fit(self, pairs, y, bounds=None): X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) else: - assert len(bounds) == 2 + bounds = check_array(bounds, allow_nd=False, ensure_min_samples=0, + ensure_2d=False) + bounds = bounds.ravel() + if bounds.size != 2: + raise ValueError("`bounds` should be an array-like of two elements.") self.bounds_ = bounds - self.bounds_[self.bounds_==0] = 1e-9 + self.bounds_[self.bounds_ == 0] = 1e-9 # init metric if self.A0 is None: A = np.identity(pairs.shape[2]) @@ -134,7 +138,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): Attributes ---------- - bounds_ : array-like, shape=(2,) + bounds_ : `numpy.ndarray`, shape=(2,) Bounds on similarity, aside slack variables, s.t. ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of @@ -171,7 +175,7 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): preprocessor. y: array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. - bounds : `list` of two numbers + bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of @@ -192,7 +196,7 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): calibration_params = (calibration_params if calibration_params is not None else dict()) self._validate_calibration_params(**calibration_params) - self._fit(pairs, y) + self._fit(pairs, y, bounds=bounds) self.calibrate_threshold(pairs, y, **calibration_params) return self @@ -202,7 +206,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): Attributes ---------- - bounds_ : array-like, shape=(2,) + bounds_ : `numpy.ndarray`, shape=(2,) Bounds on similarity, aside slack variables, s.t. ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of @@ -275,7 +279,7 @@ def fit(self, X, y, random_state=np.random, bounds=None): random_state : numpy.random.RandomState, optional If provided, controls random number generation. - bounds : `list` of two numbers + bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index a785d60d..c3efd9a3 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -18,7 +18,7 @@ HAS_SKGGM = True from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, LSML_Supervised, ITML_Supervised, SDML_Supervised, - RCA_Supervised, MMC_Supervised, SDML) + RCA_Supervised, MMC_Supervised, SDML, ITML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -109,6 +109,43 @@ def test_deprecation_bounds(self): assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) +@pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], + np.array([20., 100.]), + np.array([[20., 100.]]), + np.array([[20], [100]])]) +def test_bounds_parameters_valid(bounds): + """Asserts that we can provide any array-like of two elements as bounds, + and that the attribute bound_ is a numpy array""" + + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + itml = ITML() + itml.fit(pairs, y_pairs, bounds=bounds) + + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + itml_supervised.fit(X, y, bounds=bounds) + + +@pytest.mark.parametrize('bounds', ['weird', ['weird1', 'weird2'], + np.array([1, 2, 3])]) +def test_bounds_parameters_invalid(bounds): + """Assert that if a non array-like is put for bounds, or an array-like + of length different than 2, an error is returned""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + itml = ITML() + with pytest.raises(Exception): + itml.fit(pairs, y_pairs, bounds=bounds) + + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + with pytest.raises(Exception): + itml_supervised.fit(X, y, bounds=bounds) + + class TestLMNN(MetricTestCase): def test_iris(self): # Test both impls, if available. From 187b59e9bdd9b365835dd40efaa8ee167590a8a4 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 29 May 2019 16:17:02 +0200 Subject: [PATCH 112/210] [MRG] FIX LMNN gradient and cost function (#201) * TST: make tests for LMNN gradient * FIX: fix gradient computation * Simplify expression * Be more tolerant for checking NCA * Address https://github.com/metric-learn/metric-learn/pull/201#discussion_r286032898 * Add checks for bounds argument * Revert "Add checks for bounds argument" This reverts commit 562f33bfcc7d5fe6a8fc6f65145f4a0d909224d6. * Add missing return --- metric_learn/lmnn.py | 7 +-- test/metric_learn_test.py | 100 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 9e606c56..d70ca3d0 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -108,7 +108,7 @@ def fit(self, X, y): # objective than the previous L, following the gradient: while True: # the next point next_L to try out is found by a gradient step - L_next = L - 2 * learn_rate * G + L_next = L - learn_rate * G # we compute the objective at next point # we copy variables that can be modified by _loss_grad, because if we # retry we don t want to modify them several times @@ -194,10 +194,11 @@ def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, # do the gradient update assert not np.isnan(df).any() G = dfG * reg + df * (1 - reg) + G = L.dot(G) # compute the objective function objective = total_active * (1 - reg) - objective += G.flatten().dot(L.T.dot(L).flatten()) - return G, objective, total_active, df, a1, a2 + objective += G.flatten().dot(L.flatten()) + return 2 * G, objective, total_active, df, a1, a2 def _select_targets(self, X, label_inds): target_neighbors = np.empty((X.shape[0], self.k), dtype=int) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c3efd9a3..06da087a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -2,7 +2,7 @@ import re import pytest import numpy as np -from scipy.optimize import check_grad +from scipy.optimize import check_grad, approx_fprime from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.datasets import load_iris, make_classification, make_regression @@ -21,7 +21,7 @@ RCA_Supervised, MMC_Supervised, SDML, ITML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs -from metric_learn.lmnn import python_LMNN +from metric_learn.lmnn import python_LMNN, _sum_outer_products def class_separation(X, labels): @@ -157,6 +157,98 @@ def test_iris(self): self.iris_labels) self.assertLess(csep, 0.25) + def test_loss_grad_lbfgs(self): + """Test gradient of loss function + Assert that the gradient is almost equal to its finite differences + approximation. + """ + rng = np.random.RandomState(42) + X, y = make_classification(random_state=rng) + L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1]) + lmnn = LMNN() + + k = lmnn.k + reg = lmnn.regularization + + X, y = lmnn._prepare_inputs(X, y, dtype=float, + ensure_min_samples=2) + num_pts, num_dims = X.shape + unique_labels, label_inds = np.unique(y, return_inverse=True) + lmnn.labels_ = np.arange(len(unique_labels)) + lmnn.transformer_ = np.eye(num_dims) + + target_neighbors = lmnn._select_targets(X, label_inds) + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) + + # sum outer products + dfG = _sum_outer_products(X, target_neighbors.flatten(), + np.repeat(np.arange(X.shape[0]), k)) + df = np.zeros_like(dfG) + + # storage + a1 = [None]*k + a2 = [None]*k + for nn_idx in xrange(k): + a1[nn_idx] = np.array([]) + a2[nn_idx] = np.array([]) + + # initialize L + def loss_grad(flat_L): + return lmnn._loss_grad(X, flat_L.reshape(-1, X.shape[1]), dfG, impostors, + 1, k, reg, target_neighbors, df.copy(), + list(a1), list(a2)) + + def fun(x): + return loss_grad(x)[1] + + def grad(x): + return loss_grad(x)[0].ravel() + + # compute relative error + epsilon = np.sqrt(np.finfo(float).eps) + rel_diff = (check_grad(fun, grad, L.ravel()) / + np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) + np.testing.assert_almost_equal(rel_diff, 0., decimal=5) + + +@pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), + [1, 1, 0, 0], 3.0), + (np.array([[0], [1], [2], [3]]), + [1, 0, 0, 1], 26.)]) +def test_toy_ex_lmnn(X, y, loss): + """Test that the loss give the right result on a toy example""" + L = np.array([[1]]) + lmnn = LMNN(k=1, regularization=0.5) + + k = lmnn.k + reg = lmnn.regularization + + X, y = lmnn._prepare_inputs(X, y, dtype=float, + ensure_min_samples=2) + num_pts, num_dims = X.shape + unique_labels, label_inds = np.unique(y, return_inverse=True) + lmnn.labels_ = np.arange(len(unique_labels)) + lmnn.transformer_ = np.eye(num_dims) + + target_neighbors = lmnn._select_targets(X, label_inds) + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) + + # sum outer products + dfG = _sum_outer_products(X, target_neighbors.flatten(), + np.repeat(np.arange(X.shape[0]), k)) + df = np.zeros_like(dfG) + + # storage + a1 = [None]*k + a2 = [None]*k + for nn_idx in xrange(k): + a1[nn_idx] = np.array([]) + a2[nn_idx] = np.array([]) + + # assert that the loss equals the one computed by hand + assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, k, + reg, target_neighbors, df, a1, a2)[1] == loss + def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with @@ -458,7 +550,9 @@ def grad(M): return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() # compute relative error - rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) + epsilon = np.sqrt(np.finfo(float).eps) + rel_diff = (check_grad(fun, grad, M.ravel()) / + np.linalg.norm(approx_fprime(M.ravel(), fun, epsilon))) np.testing.assert_almost_equal(rel_diff, 0., decimal=6) def test_simple_example(self): From fbd92ff910042c1a3f8329a8920777afe9d72842 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 5 Jun 2019 11:44:24 +0200 Subject: [PATCH 113/210] [MRG] Export notebook to gallery (#180) * Export notebook to gallery * Fix the figure number in order to get the image printed in the logo of the example * wip replace dataset by faces * Finalize notebook * change dataset for make_classification * Add comments on the properties of the algorithms * Address https://github.com/metric-learn/metric-learn/pull/180#pullrequestreview-242334192 * Address https://github.com/metric-learn/metric-learn/pull/180#pullrequestreview-243366233 * a few updates and minor corrections --- doc/introduction.rst | 4 + doc/metric_learn.constraints.rst | 7 + doc/metric_learn.rst | 1 + doc/weakly_supervised.rst | 1 + examples/metric_plotting.ipynb | 708 ---------------------- examples/plot_metric_learning_examples.py | 485 +++++++++++++++ 6 files changed, 498 insertions(+), 708 deletions(-) create mode 100644 doc/metric_learn.constraints.rst delete mode 100644 examples/metric_plotting.ipynb create mode 100644 examples/plot_metric_learning_examples.py diff --git a/doc/introduction.rst b/doc/introduction.rst index dad530b3..ef221971 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -1,3 +1,5 @@ +.. _intro_metric_learning: + ======================== What is Metric Learning? ======================== @@ -77,6 +79,8 @@ necessarily the identity of indiscernibles. parameterizations are equivalent. In practice, an algorithm may thus solve the metric learning problem with respect to either :math:`M` or :math:`L`. +.. _use_cases: + Use-cases ========= diff --git a/doc/metric_learn.constraints.rst b/doc/metric_learn.constraints.rst new file mode 100644 index 00000000..97d79002 --- /dev/null +++ b/doc/metric_learn.constraints.rst @@ -0,0 +1,7 @@ +metric_learn.constraints module +=============================== + +.. automodule:: metric_learn.constraints + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index c2472408..eb606542 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -6,6 +6,7 @@ Module Contents .. toctree:: + metric_learn.constraints metric_learn.base_metric metric_learn.itml metric_learn.lfda diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 93720ffc..351c4e3b 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -118,6 +118,7 @@ through the argument `preprocessor`. paths in the filesystem, name of records in a database etc...) See section :ref:`preprocessor_section` for more details on how to use the preprocessor. +.. _sklearn_compat_ws: Scikit-learn compatibility ========================== diff --git a/examples/metric_plotting.ipynb b/examples/metric_plotting.ipynb deleted file mode 100644 index f8661181..00000000 --- a/examples/metric_plotting.ipynb +++ /dev/null @@ -1,708 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Metric Learning and Plotting\n", - "\n", - "This is a small walkthrough which illustrates all the Metric Learning algorithms implemented in metric_learn, and also does a quick visualisation which can help understand which algorithm might be best suited for you.\n", - "\n", - "Of course, depending on the data set and the constraints your results will look very different; you can just follow this and change your data and constraints accordingly. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Imports " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "import metric_learn\n", - "import numpy as np\n", - "from sklearn.datasets import load_iris\n", - "\n", - "# visualisation imports\n", - "import matplotlib.pyplot as plt\n", - "from mpl_toolkits.mplot3d import Axes3D" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Loading our data-set and setting up plotting\n", - "\n", - "We will be using the IRIS data-set to illustrate the plotting. You can read more about the IRIS data-set here: [link](https://en.wikipedia.org/wiki/Iris_flower_data_set). \n", - "\n", - "We would like to point out that only two features - Sepal Width and Sepal Length are being plotted. This is because it is tough to visualise more features than this. The purpose of the plotting is to understand how each of the new learned metrics transform the input space. " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# loading our dataset\n", - "\n", - "iris_data = load_iris()\n", - "# this is our data\n", - "X = iris_data['data']\n", - "# these are our constraints\n", - "Y = iris_data['target']\n", - "\n", - "# function to plot the results\n", - "def plot(X, Y):\n", - " x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n", - " y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n", - " plt.figure(2, figsize=(8, 6))\n", - "\n", - " # clean the figure\n", - " plt.clf()\n", - "\n", - " plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)\n", - " plt.xlabel('Sepal length')\n", - " plt.ylabel('Sepal width')\n", - "\n", - " plt.xlim(x_min, x_max)\n", - " plt.ylim(y_min, y_max)\n", - " plt.xticks(())\n", - " plt.yticks(())\n", - "\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XlYVOUXB/DvBQYYYIABhmGRHWRTEBEUF9zXNPctd211\nqczS1OpX2WJmZbm0mJparm2WS4a7uO8obiggJqLs+wAzc35/YBQNKg5cBuF8noenBs+877kzA4d7\n73vPFYgIjDHGGBOPkaETYIwxxho6LraMMcaYyLjYMsYYYyLjYssYY4yJjIstY4wxJjIutowxxpjI\nTMQaWBAEvqaIMcZYo0NEwn+/J1qxvTehmMMzxhhj9Yog6NRZAHwYmTHGGBMdF1vGGGNMZFxsGWOM\nMZFxsWWMMcZExsWWMcYYExkXW8YYY0xkXGwZY4wxkXGxZYwxxkTGxZYxxhgTGRdbxhhjTGRcbBlj\njDGRcbFljDHGRMbFljHGGBMZF1vGGGNMZFxsGWOMMZFxsWWMMcZExsWWMcYYExkXW8YYY0xkXGwZ\nY4wxkXGxZYwxxkTGxZYxxhgTGRdbxhhjTGRcbBljjDGRcbFljDHGRMbFljHGGBMZF1vGGGNMZFxs\nGWOMMZFxsWWMMcZExsWWMcYYExkXW8YYY0xkXGwZY4wxkXGxZYwxxkTGxZYxxhgTGRdbxhhjTGRc\nbBljjDGRcbFljDHGRMbFljHGGBMZF1vGGGNMZFxsGWOMMZFxsWWMMcZExsWWMcYYExkXW8YYY0xk\nXGwZY4wxkXGxZYwxxkRmYugEGHvc7Ny5E6dPn4aXlxeGDRsGIyP+m5Ux9mACEYkzsCCQWGMzZijv\nvPMuvv1uNcI69kDC2eNoHtAUG9evgyAIhk6NMVYPCIIAItL5hcDFlrFqys3NhYtrE3zy6wHY2CtQ\nVlqCOcO74+dNGxAZGWno9Bhj9cD9ii0f/2KsmnJzc2FhaQlrOwcAgMTUDI4uTZCVlWXgzBhj9R0X\nW8aqydXVFfb29vh91VIU5GbjyM7fcPPaFYSHhxs6NcZYPceHkRl7BDdu3MDoseNw9swZeHh6YtWK\nbxEREWHotBhj9QSfs2WMMcZExudsGWOMMQPhYssYY4yJjIstY4wxJjIutowxxpjIuNgyxhhjIuNi\nyxhjjImMiy1jjDEmMi62jDHGmMi42DLGGGMi42LLGGOMiYyLLWOMMSYyLraMMcaYyLjYMsYYYyLj\nYssavEOHDqFbj56IjGqLBR9/DK1Wa+iUGGONDBdb1qDFxcWh35P94dehN3pMeBkr1q7Du/PmGTot\nxlgjw/ezZQ3aG2+8gQupORg+7XUAQErCJXz5+vNITrxu4MwYYw0R38+WNUoSiQQlxUUVj1VFRZCY\nmhowI8ZYY2Ri6AQYE9OECROwNCISUisZ7J1csfW7pXj3f28aOi3GWCPDh5FZg3f9+nV88ulnyC/I\nx+CBAzFgwABDp8QYa6DudxiZiy1jjDFWS/icLWOMMWYgXGwZY4wxkXGxZYwxxkTGxZbVGSLCwk8+\ngbOLKxSOSrz62kxoNBpDp8UYY6LjYsvqzLp167B42dd45Ys1eHPVr9ixZz/mf/SRodNijDHRcbFl\ndeb3rdvQa8xzaOLjD4WLGwY+NwNbt203dFqMMSY6LrasztjZ2eHuX8kVj9NSkmBra2u4hBhjrI7w\ndbaszqSkpKB1mygERnaAmVSK4zFbEfPnTrRs2dLQqTHGWK3gphasXrh9+zY2bNgAtVqNgQMHwtfX\n19ApMcZYreFiyxhjjImMO0gxxhhjBsLFljHGGBMZF1vGRFJSUoL09HTw6RTGGBdbxkSw7MsvIZfb\nwcfXD0HNmiMpKcnQKTHGDIgXSDFWy44dO4a+/Qdg7vIf4ejqjq2rv0LC0d04ceyooVNjjImMF0gx\nVkdOnDiBltHdoGziAUEQ0HvUJJw5dRJardbQqTHGDISLLWO1zM3NDdcvnEVZaQkA4PLp43BydoGR\nEf+4MdZY8WFkxmqZVqvFyFGjceT4Cbh6+uLK2RPYtHEDunXrZujUGGMi46YWjNUhIsLBgweRnp6O\niIgIuLu7Gzolxlgd4GLLGGOMiYwXSDHGGGMGwsWWMcYYExkXW/ZYO3HiBJYsWYKzZ88aOhXGGLsv\nLrbssTVu3Hi0j+6IBZ8vReuoKLz40kuGTokxxqrEC6TYY+nEiRNoH90R8zfshJO7F1KuXsKb4/oh\n4coVeHh4GDo9xlgjxQukWINy7NgxKJt4wMndCwDg3jQQ1nJ7nDhxwsCZMcaYLi627LHUvn173Pkr\nGSkJlwAACXGnkZediTZt2hg4M8YY02Vi6AQY00eLFi3w3LPP4s2x/WAtt0d+dhZenzULTZo0MXRq\njDGmg8/ZssdaUlISTp06hcjISO7SxBgzOO4gxRhjjImMF0gxxhhjBsLFljHGGBMZL5BitUKj0WDi\nxIlISEjAgAEDMHPmTEOnJJqTJ0/i7Nmz8PLyQpcuXSAIOkeMGGvwLly4gGPHjsHZ2Rm9evWq0f2a\n8/PzsXXrVpSVlaFHjx5wcnKqxUzrBz5ny2pMo9FAoXSCRGoJn+BQnI3dg9aREdi7Z4+hU6t1i5cs\nwbz33kfzNtFIOH8afXv1xLKlSwydFmN1av26dZj6wrMId5EhKacELdp0wMafftGr4GZmZiIqIhxy\nFMHcxAiXs0qx7+AhBAYGipC5+HiBFBPN5MmTsfnX37Dw530wNTPHrcQEvD6iB/Lz8iCVSg2dXq3J\nz8+Hk7MLPtz4JxQubiguLMCc4d2xY+tvCAsLM3R6jNUJIoKNzArvdXCEp9wcZRrCrP13seS7dejV\nq9cjjzfz1RmI37YWz4XZAwB+v5qD2w7NsfWPP2s79TrBC6SYaK5fvw43nwCYmpkDAFy8fCEIAhIS\nEgycWe3KysqCpUwGhYsbAEBqaQVXTx+kpaUZODPG6o5KpYKqpAQetmYAAImxAE9bM71/DlL/uglv\na+OKx75yU9xOvVUrudYnXGxZjY0aNQoXTx7GtfNnoNVqsW3tNzCRmCI4ONjQqdUqV1dXSM3MsPeX\n9SAixB8/hKTLF9CiRQtDp8ZYnZFKpQgKaIqfL+dAS4RrWSqcuZ2P1q1b6zVep67d8WdKCXJUaqjU\nWmy5XoROXbrWctb1ABGJ8lU+NGsshgwZQhJTMxKMjEhqaUVr1641dEqiiI+PJ//AIDIxMSGlszPt\n2rXL0CkxVueSk5OpZUgzMjE2Irm1jDZv3qz3WFqtlma9+iqZmUpIYmJCI4cOoeLi4lrMtm7dq306\nNZHP2bJao9FokJWVBYVCYehURFdSUgIzMzNDp8GYQZWUlMDU1LRWVuRrNBpotVpIJJJayMxweIEU\nY4wxJjJeIMUYY4wZCBdbxhhjTGTcQYrVCpVKha+//hopN2+iXdu2GDRoUI3G2717N/7YuRN2cjme\nf/55yOVynRiNRoOVK1fi8pUrCGneHGPGjKlRFxvGGBML/2ZiNVZWVoYu3brjh1+3IbVUgukzZ+Od\nd9/Ve7xVq1bhqTFjkVII7DxyGq3bRCE3N7dSDBFh2IiRWLx8FVJLJZj/6Rd45tnnaropjDEmCl4g\nxWps+/btmDH7Dbyx8lcYGRkhJ+Mupj/ZDnm5uTA1NX3k8Vzd3DF5/lfwDgoBACyZ9QJGD+yDF154\noSLm/Pnz6NH7CXz0015ITM2gKirE9H5tcf7cWbi5udXatjHG2KPgBVJMNAUFBZArlBWHcK3l9hAE\nI5SUlOg1XlFhAewc/2lEbqtQoqCgQGdOa7kdJKbll9+YSS1gZW2D/Px8PbeCMcbEw8WW1ViHDh2Q\nEHcaB37fjNspSVi94E20iYqCTCbTa7wn+w/A6vlzkZp8Haf2x+DIzi3o3bt3pZiQkBCUFORh29qv\nkZaShF+WL4KVhRR+fn61sUmMMVar+DAyqxWnT5/G5KnTkJqaijZt2uCrZUthZ2en11jFxcWY/soM\n7PjjD8jlcixc8BG6deumE3f9+nU8+/wLuHr1Kpo1C8byr79GkyZNaropjDGmN25qwRhjjImMz9ky\nxhhjBsLFljHGGBMZF1vGGGNMZFxs6wEiwnerV6Nf/wF4avRonD9/3tApVVCr1fjgww/Rp28/PPvc\n83yjdMZqwdmzZzFi6GD0690D369da+h0WB3gYlsPLFmyBG+9+x68onrAWOmNjp074+rVq4ZOCwDw\n7HPPY+OW7fDv9CTSVEDbdu2Rl5dn6LQYe2xdunQJXTp2gFXiIQTkxWPOK1OxbNlSQ6fFRMarkesB\n36YBGP/WQvgEtwAArP/8fYS6OWDevHkGzaukpATWNjb4avc5mFtYAgAWThuDOdOnYvDgwQbNjbHH\n1euzZiFx20qMDnEAAFxKL8LqZGNcTEg0cGasNvBq5HpOEIwq/X99+EOlIod/3Rj63gfJQBkx9vj7\n78+PEf9MNQp815964IXnn8Xit1/BoOdnIOvuHRz8fSM+io01dFowNzfHsOEjsPi1Z9F12HhcP38K\n6TeT0L17d0Onxthja+y4cejw9ZeQm2fDTmqC9VcKMGPuO4ZOi4mMDyPXA0SE5d9+i59+/gUymQxz\nZ7+OsLAwQ6cFoPyOPh988CEOHjoEV1dXfPDePLi6uho6LcYeaydPnsSH895BYUE+hj01BhMmToQg\n6Bx5ZI8h7iDFGGOMiYzP2TLGGGMGwsWWMcYYExkXW8YYY0xkXGzZA6WkpMA/MAgyG1sonJyxadOm\nKuN+/PFHODo5w8rGFn7+gbhx40aVcWvWrEHz0BYICGqGBR9/XCeXPGzfvh0tW0XAzz8Qs16fjbKy\nMtHnZIyxf+Niyx4oMqotHL0D8daKnzDw2Vcwdtx4xMXFVYq5cOECRo8Zi/7PTMf/Vv4M56bNEBnV\nVmes3377DTNnz8WAqXMxavaH+HrFd1i8ZImo+R8/fhxjxo1H1zFTMOndz/HH3oOYPWeuqHMyxth/\n8Wpkdl8ZGRlwcnLGd0cSYGxSfkn2/Klj0KlVCD7//POKuOnTp2P30dOYvewHAIBGrcb4tn5IuXED\nLi4uFXFPjRoNK59QdB44EgBw/ugB7F/3FQ4dPCDaNsyZMwdXMoox5IUZAIBbSdeweMZE3Ejibj2M\nsdrHq5HZI7OwsACBUJCbDaD8euDczHRYW1tXipPJZMjNyqg4JFyQlwMi0omztLRETubdisc5Gemw\nsLAQfRvystIrHudmpkMq8pyMMfZfvGfLHqhjp864fC0R3YeNxeXTx3H9/CncSEqsVEgLCgrg5uEJ\n72YtERjeGrs2r4Wvpzti/7PHevnyZbTr0AHtnhgKM6kFdm/6Dr/+8jOio6NFyz8tLQ3hrSLQrF1X\n2Dm5YNeGlVjyxecYPny4aHMyxhovvZtaCIJgBmAwAE/8q70jEb37kOdxsW0AtFotZsyYgX0HDsDF\nyQmrV6+Gg4ODTlxGRgbGjRuH1LQ0dGjXDosWLYKRke6Bk2vXruHbFSugVqvx1MiRaNmypejbcPv2\nbXz55ZfIzy9A//5PolOnTqLPyRhrnGpSbP8AkAvgFADN398nok8e8jwutowxxhqV+xXb6tyIoAkR\n9RIhJ8YYY6xRqM4CqcOCIDQXPRPGGGOsgbrvYWRBEM4DIJTv/foBSARQAkAAQEQU8sCB+TDyI9No\nNDAyMqqVu39otVqo1WqYmprWQmZAaWlptcbSaDQwNjZ+YAwRgYiqPKcrZm6Pu+q8ttVV3fegNj+T\njDUG+lz60xdAPwC9AfgC6HHv8d/fZ7UkPz8fAwYNhtTCAja2ciz61zWs+njm2WdhbmEBc3MplC6u\nuHLlit5jHTp0CHYOCpibm8PcwgJvv/12lXE7duyAs4srzMzMENG6DZKTk3ViiAiz58yFlZUMUgsL\nTJz0NEpLS/XObcWKFbCUWcPM3BwyG1v8+OOPeo9Vn6WmpqJjVGuYmZrCyd6uRttJRJj37juQWVpA\nam6GUSOGQaVS6cTl5uaib68ekJqbwdZahmVLl9ZkExhjf/+Fe78vAGur870qYohVz5hx4ym67yBa\ndegqffrrAXJ286Bt27bpNdby5cvJUmZN8zf+SauPXqOuQ8aQSxM3vXOztpXT0Mmv0ZpjifTWip/I\nTGpBu3btqhRz/fp1ktvZ01vf/khrjyfRyGmvU7OQUJ2xvv7mG/INak5Ld56kb/fHU1i7TjR7zly9\n8kpMTCQzcym9/PE3tPZ4Ej3/zqdkLrWgzMxMvcarz9pHtqIZbXwpY1p32jOiDSlsZHT+/Hm9xvrh\nhx/IU2FD3z7pQ+uH+FGUlwO9OGWyTtywwQOpe1MFbR7WlJY+4UVOchnFxMTUdFMYa/Du1T6dmlid\n43jB/34gCIIxgPBarvmN2t69e9H/6ZdhJpXCyd0L0f1HYM/evXqN9fPPP6ND36Fw9wuExNQMw6fO\nxN07aXqNlZqaisKCfAyYNA0mEgkCwiLRLLIdfvrpp0pxx44dQ3BEWwS0bA1jExP0HT8Z169dQ25u\nbqW4Xbv3oNvwCZArlLCQWaPv+MnYvWePXrnt2LEDjq7uiOzaG8YmJojuNxSW1rbYv3+/XuPVV2q1\nGkdOnsacSC9IjI0Q7mSDXl6OOHz4sF7j7f7zD/T0MIfCUgILiTEG+1liz64/deL279uH4QHWMDU2\nQhNrM3RyNcO+ffp9JhljDziMLAjCbEEQ8gGECIKQd+8rH8BdAFvqLMNGwNFRiRtX4gGUH2m4mXAR\nTkqlXmO5uLgg8eI5aLVaAMCNKxdhamau11jl19MKSE2+DgBQl5Xi5rUrcHd3/0/+jvgrMQFlpSUA\ngLSUJAiCACsrq0pxTkolUq5erHh848pFKB0d9crNx8cHGXdSUZhfXtBzM9ORn5MFPz8/vcarr4yN\njWErs0J8RgEAQK3V4mJWARz1fN2cXFyRnK+teJyUUwLHKj5rCoUDErPLDy8TEW4UAkqlk15zMsZQ\nrcPIHz4s5j7Pq5Nd9oYgNjaW5Hb21KX/MAprG00hLcIoPz9fr7Hy8/PJXuFI3kEh1P6JwWQmtaC3\n335b79wmTJxIFlbWFN1vCLl4+pK7lzeVlZVVitFqtTRk2HDyDWpO3QePIgdHJ/p2xQqdsdLS0sjD\n04tad+lJ0X0GksJRSRcvXtQ7t1aRrclO6UzR/YaSjb2CunTtpvdY9dmGDRvI0UZG41v6UCt3JT3R\noxup1Wq9xsrMzCQ/b09q7a2grv5Ksre1oTNnzujE7du3j+TWVtQjQElhHg4UHtqcCgsLa7opjDV4\nuM9h5AetRn5gax8iOv2gf+fVyI/m+vXr2L17N6ysrDBgwIAa9QwuKCjAm2++iTt37uCpp55C3759\na5Tb999/j23btsHb2xvvvPMOTEx0L8/WarXYunUrbt26hcjISISHV32mIScnB1u2bIFarUbv3r0r\n3ahAHwsWLMDp06fRrl07TJs2rUZj1Wfnzp3D4cOHoVQq0b9//xqtSs7Ly8OWLVtQUlKCnj17ws3N\nrcq4hIQE7NmzB9bW1hg4cCDMzfU7QsJYY/LIHaQEQfj7BI05gFYAzqH8sp8QACeJKOohE3KxZYwx\n1qg88qU/RNSZiDoDuA2gJRG1IqJwAGEAbomXKmOMMdawVGc1sj8Rnf/7ARFdABAoXkqMMcZYw1Kd\nYhsnCMK3giB0uve1HECc2Imx+kOlUuHKlSvIzs5+YFx2djYuX75cZZME9vhRqVSIiYnBhQsXDJ2K\njpycHGzfvh03btwwdCqMVUt1iu0EAPEAXrr3dfHe91gjcOrUKXh5+6Brz95w9/DE4iVLqoxbunQZ\n3D080a1XH3h6e+PkyZN1nCmrTceOHYOj3AZD+/VGeIsQtGoRUnE5maGtWLECTgp7jB7cH37eXhg1\ncqShU2Lsofjm8ey+iAgenl4YOHkW2vToh/TUm5g3cRBidu5AixYtKuLi4uLQpVsPvLnyZzi6uuP4\n7u3Y/Pl7uJlyg3vqPqacHeTo5GyMEc0cUFimxcw/b2DY01Pw6aefGjQvtVoNK6kZZkQ5o3UTGW7n\nl+KVnclY/+MvePLJJw2aG2OAHgukBEHYdO+/5wVBiPvvl5jJsvohPz8f6enpaNOjvBW2wsUNQeFt\ndA4rXrhwAYHhreHoWt7sIrJrH2RnZ+t0kGKPj5zcPHTzti1vTmJqjA4eMhw7dszQaSEhIQEgQusm\nMgCAs8wU/g5S7Nu3z7CJMfYQDzqM/NK9//5944H/frEGTiaTwdLKEhdPHgEA5Odk42rcKfj6+laK\n8/HxQULcaeTnlJ/TvXTqKKRSc1hbW9d5zqx2WFlIcTK1vGtVmUaLk6mFCAw0/LpILy8vaAFcSi8C\nAOSq1LiWqbrvdd2M1RtVdbqgyp2gJgHwe1hcFc8ToTcHq2sxMTEkt7en5q3akJ3C8b43Dpgz9w2y\nUzhSSEQUye3s6c8//6zjTFlt2rJlC5lLjMlLbk625ibk6epMJSUlhk6LiIjmzZtHZsYC+diZk1Ri\nRJ2j2xs6JcYq4FE7SP1NEIR3AHQA4AngFIADAA4S0dmHPI8eNjZ7PNy5cwfx8fFwcXFBQEDAfeMu\nX76M1NRUBAcHQ6lnb2dWf6SkpODnn3+GQqHAyJEja+3+w7UhLi4OMTExCAoKQu/evQ2dDmMVHrmD\nVBUDSAE8A+BVAK5E9MB+cVxsGWOMNTZ6F1tBEN4A0A6AFYAzAGJRvmd7+yHP42LLGGOsUalJsT0N\nQA1gG4D9AI4QUUk1JuRiyxhjrFGp0WFkQRCsUb532x7AUAB3iaj9Q57T4IttXFwckpKSEBwcrLNC\n91FlZmbi6NGjsLKyQvv27e97V5edO3di//79iIiIwMCBA2s0Z3WlpKTg7NmzcHFxQatWrepkzsai\nsLAQsbGxEAQB7du3r9Hdngxl+/btiI2NRZs2be57ratWq8WhQ4eQk5OD1q1b630/3kdBRDh58iRu\n376NsLCw+97dSKVS4eDBg9BoNGjfvr3OfZj/dvv2bZw8eRL29vaIioqq0TXkarUaBw8eRFFREaKi\nomBnZ6f3WKx+uV+xrc6q4mYAXgCwAcA1AHsBvFuN59X+Mq965H9vv0MOSieK7NiN5PYOtHrNGr3H\nOn/+PDkqnahl247k7R9IXbp1r3Ll59ix48jcwpKCWkWRhZU1PdG3b002oVp+//13ktvZU0R0V3Jy\ndaOp014Ufc7G4s6dOxTo401RXi7UxsuFgpv6Unp6uqHTeiTDhw4hqcSImistyUJiRAOf1P1MqtVq\n6tu7J3k62lKkt5Ic5DZ04sQJUfPSarX03NOTyMXOmtr4OJHc2oq2b9+uE5eVlUXNAppSUBMHCnFX\nkI+HG6WmpurEHTx4kBS21tQjwIN8lXY0cshg0mg0euWmUqmoY7so8nWSU7iXkpwU9jW6rzOrX1CD\n1chbUb4CORbACSIqq2Z1p4eN/bi6ePEiojt1wXvr/4CNnQNuJSbgnQkDkHY7FZaWlo88XrvojgiK\n7oMug0dBq9Hg0+kTMHHEYEyZMqUi5vr16wgMCsZHm2Lg5O6F7PQ0zBjYCTt3bEd0dHRtbl4FrVYL\newcFpn+2Cn4hLVFUkI//je6DDT+sRfv2DzywwarhuUkTIIk7iA/a+4KIMCv2GkxadcOSr742dGrV\ncuHCBYS3CMGSPl5QWpkivbAMU7Yl4uCRY4iIiKiIW7NmDRbOfQVvt3OAiZGAAzfyEJMjw7n4y6Ll\ntnfvXowfNhALOikhlRjhUnoRPj6Zi/Ss7Ep7pNNfnIYrMRvxXAs7CIKAtReyYdG8M1b/sL7SeE29\nPDAvxB69vR1Rotai55ZzmPvZMgwePPiRc/vss8+w8Yv3Mau1A4yNBGxPyMEVMy/sjT1c4+1mhvfI\nHaT+RkR9iWgBER2ubqFt6FJSUuDu5w8bOwcAgKu3HyxlMty9e1ev8W4kJyMooi0AwMjYGH4tIpGU\nnFwp5sKFC5DZ2sHJ3QsAIFc4wdHVHefOndN/Qx6ioKAAKpUKfiEtAQAWVjJ4B4dy8/daknztGqJd\nyht/CIKADs7WuJF4zcBZVd+5c+dgJ5VAaWUKAFBYSqCwNNX5TCYnJ8PfRoCJUfnvnxBHC9z8S9y7\ndCYnJ8PPXgqppPxXXICDFHkFBSguLq4Ul3Q9AcF2kooC3MzeFEmJ13XGS7l1G9Fu5Yd6zUyM0NpR\npvfPQXLidQTaGsH43uvR3FGKGyn8M9XQ1Z8L5x4jwcHBSLp0AUmXyu88eGr/nyCNBq6urnqNFxER\ngd0/roVWq0V+TjZOxPyOiP+cG42KikJBXjbOHd4HALh67hTSbiahc+fONdmUB5LJZHBxccH+3zYB\nAFKTryP+5BGEhoaKNmdjEtG2HdZcTUeJWguVWoO1V9MR3qatodOqtg4dOiC7WI24tEIAQPzdIqQX\nlul8JiMiInDsThmyi9UgIuxIzEPYv3priyEsLAzn0gpwO78UABCTmAsvdzedc+KRbdtj718lKFFr\nUaYh7L5ZjIgq3oPw0OZYHvcXiAi38lXYnpypd9eqyDZROJSmRkGpBloi7EwuQHiriIc/kT3eqjq2\nXBtfaODnbH/66SeytrElO4UjKZ2d6ciRI3qPdefOHYpo3YZs5HYktbCkV1+bSVqtVidu8eLFZGpu\nTlIrGUlMzeh///tfDbages6fP08enl4kd1CQpZWMVqxcKfqcjUVxcTEN6vcEWVuYk0xqTkMH9K83\nXZqqa+HChWRqbEQWEiMyNRbogw8+qDLu7bfeJKmZKcmtLCgkKID++usv0XP7+uuvyFJqRnYyS/J0\nc6X4+HidmNLSUhoxdDBZmJuSTGpOT/TsTkVFRTpxSUlJFOznQwprK7I0N6OPP5qvd15arZamvziV\npGamZGMppahWLSkjI0Pv8Vj9An3P2eqrIZ+z/VtJSQnS09Ph5OQEExOTGo1FRLhz5w4sLCwe2FO4\nqKgI8fHxCAwMvO+qydqm0WiQlpYGOzs7SKXSOpmzMcnIyIAgCLC3tzd0Knqp7meyoKAA+fn5UCqV\nddaNqri4GFlZWXBycrrvCn8AyMrKgkajgYODw31XGWu1WqSlpcHGxkavtRn/lZubi+LiYiiVSr47\nVgPyyJeWMVRhAAAgAElEQVT+CILwO4D7VksieuD9rBpDsWWMMcb+7X7F9kG7YwtFzIcxxhhrNPgw\nMmOMMVZL9L70RxAEP0EQfhQE4aIgCIl/f4mTZuMVHx+PTz/9FN988w3y8vJqNFZxcTFWrlyJhQsX\n4vTp0/eN27x5M0JCQtCyZUscOHCgRnOyxomIsHXrVixYsABbtmxBXf2BPXXqVAQEBKBnz546l/M8\nqj179iAsLAwhISH45ZdfailDxv6jqlVTVHlVcSyArgDiAHgAeBvcQapW7d69m+R29tRrxHhq07U3\n+QcEUk5Ojl5jFRUVUXhEJLVs35n6PDWJ7BwU9OOPP+rELVy4kEzNzCm631Bq06MfmZpLacuWLTXd\nFNbIvDxtKnk52tKAYEfyUcpp8nPPij5nSLMgsjEzpn5N5eRla0Y2FmZ6r+LetGkTmRoL1MFdRl28\nrMnUWKDFixfXcsasMUENOkidIqJwQRDOE1Hzf3/vIc+jh43NyrVoGY5uY6cgvGMPAMBXb72EPu0j\nMWvWrEcea8WKFVj23Q+Y8flqCIKAK2dPYMXb03HzRnKlOFs7Bwx49mX0HD4eALD+iw9xbOcvuJOa\nWtPNYY1ESkoKQoMDsbSnK6xMjVFUpsHUP1Nx9NTZGvcKv5/09HQ4Kx3xdT8fKCwlUGsJk7cmYtDY\np7Fs2bJHHs/RzgadnCUYHaoAAGy5nIlfruYjq6Bme8us8dJngdTfSgRBMAKQIAjCVAC3UH67PVZL\nsrKy4OL5zy8nJ3cfZGRm6j2Wk4dPxaUErl6+yMnO1onTEsH1X3O6evuhrEyj15ysccrKyoKdlTms\nTMsvqbGQGMNBJkVWVpZocyYlJcHYSICDRfmvLhMjAc4yU9y6pV9HKk1ZKdxs/rmMx83aDFpNTq3k\nyti/Vedit5cAWAB4EUA4gDEAxomZVGPTq2dPbF76EfKys5B8JR77flmHnj166DVW586dcXTnFlw9\ndwoFeTnY8Pn76Na9u06ck1KBDYvnI+vubaSlJOHnbxYhONC/ppvCGhF/f3+UChL8cS0XhaUa7ErM\nRV4ZEBQUJNqc4eHhMDESsP58BgpLNThxqwCX0ovwzDPP6DWef1AINlzIwO38UqQXlmFtXDqc3Txq\nOWvGUP0OUgCsAcgeIV70Y+MNRWFhIY0aM5asZNbk5OxC3yxfXqPxNm/eTK5u7mRpJaP+AwdRdna2\nTkxRURE5uzYhiakZSczMqam/P6nV6hrNyxqfixcvUljzYJKamVJocCDFxcWJPufvv/9OMjMTMhZA\nUhMjeumll/QeS61Wk4+nB5kaCyQxEqiJk2OVHaQYqy7U4JxtKwCrAMjufSsXwEQiOvWQ59HDxmaM\nMcYaEr1vHi8IQhyAKUR08N7j9gCWEVHIQ57HxZYxxlijovd1tgA0fxdaACCiWADq2kyOMcYYa8iq\ns2e7CIAUwHqU90oeDkAF4HsAIKIquybwni1jjLHGpiZ7tqEAmgL4H8obWgQCCAPwCRpg/2Qiwg8/\n/IDxEydh1uuvIyMjo8q4oqIizHvvPYybMBGLFy+GRlN/LpuJi4tD6zZRCAxuhldmzLhv3OHDh/Hc\n8y9gytRpOH/+fJUxRIQVK1Zg/MRJeOPNN5GbmytW2o+spKQE8z/8EBPHjMKnn3wCtbpmB1wWLFiA\nkAA/tAoNQUxMTJUxGo0GS5YsxsQxozDv3XdQVFRUozlXr16NkKAAhAYH4ocffqgyhoiwZs0aTBgz\nCrNfn3XfS2vu3r2LLp06ItDXCyOGD6vx61Gbtm/fDhcnRyjkMgwcOPC+cQcOHMDzT0/EtMkv4OLF\ni1XGqNVqjB41CoG+XugU3QGp97k2PCcnB3PnzMaEMaOwauXKOuluVVxcjPffm4dxo5/C558vuu/v\nhcTERLw0bSqenjAOO3fuFD2vR3H06FE89/QkTH7+WZw7d67KGCLCqpUrMWHMKMydMxs5OXy51ENV\ntWqqNr7wmK5Gfu/998nD158mzvmAegwbR17ePjqrecvKyqhdh2iK6v4ETZo7n5q1akOjx44zTML/\ncfnyZTK3sKQew8fThNnvk53SmfoPGKATt2vXLrJzUNCo6W/QsMmvkdzOns6ePasT98qMV8kvOIQm\nzfmQOj85jJqFhNaL1ZoajYZ6dulEffyb0KKuQdTF14WGDuhf5X2Aq2PGK6+QrZmEPuoYQK9GepPU\nxJh27dqlEzdx7Ghq6+VEi7oG0cAgN2rfOoJKS0v1mnPZsmVkbmJE40IVNDbUgcxMjOjbb7/ViXvr\njbnk7WhLL0QoqZe/gvy8PCgvL69STH5+PsllFtTWTUaTI5zIR25Owf5+euVV2/bv309mxgL1ayqn\n51spSW5uTOEtw3Titm/fTo62Mvog2p/mRPmSg401XbhwQSeuRbMg8rI1o8kRTtTB3ZpsLM0pNze3\nUkxBQQEF+vlQj6YKeiFCSX5Ocpr16quibSNR+crmju2iqJ23A02OcKIWbvY0ctgQnbikpCRSyG1p\naDMFPRuuJKWtjNatWydqbtW1d+9esrO2pPEtFDQ6VEFyays6deqUTtysV18lPyc5vRChpB5NFRTU\n1JcKCgoMkHH9gxqsRlYC+ACACxH1FgQhCEAUEa14yPPoYWPXN0QEaxtbvL/+Dyhc3AAAX7z6DJ4Z\nNRQTJ06siIuNjcXYSc9i3ro/YGRkBFVxEV7sHYHrCQlwdHQ0VPoAgGHDhuFGdhFeXvAVAOBWYgLe\nGP0EVMWV98C69eiJgE590a53+V7G76u/gmnebXy38p+3tbS0FDJrayzecQIyWzmICPOfG4Z3587C\ngAED6m6jqnDmzBkM6dUNJ0aEw8TICCq1Bs3XHMXRs3Hw8vJ65PGU1lb4sqs/unmWdxL638GrOAI7\nHD15siImIyMDPh5uuDS+HaxMTaAlQocfz2Lpus2Ijo5+5Dk9XRzRt4kxevjaAgC2J2TjzzQjJN78\nZ09Nq9XC0kKKZb3cYG8hAQC8fzQTL877DKNGjaqI+/jjj7H4vTexuI8nBEFAUZkGY36+hmuJSfDw\nMOx1o82aNYNDwU280tYFAJCYrcLsXTdQXKatFNelXRtMkKvQ388JAPDx8URkBnfEsm+WV8TcvXsX\nLk5KrBnkBytTYxARXv4jGROmz8Fbb71VEbd582bMf20K3oqygyAIyFWp8fTWZBQWFdf43tP3c/To\nUYzo3xufdVbC2EhAiVqLZ7bfRPyVBLi6ulbEvTF3DuJ/XY4JoQ4AgLi0QmxMNcP5ywmi5PUo+nTv\ngqYFl9HFywYAsOVKFkqbdsLa9RsrYtRqNSwtpPi2rydszE1ARHj3SBZe/3gphg4daqjU642aHEb+\nDsBOAC73Hl8F8HLtpVa/qNVlsJD9c/N2C5k1SktLK8WUlJTAwsqq4gbYpmbmkJia6cQZQklJCSxl\nNhWPLWTW0Gq1OnGlpaWw+FecpcwapSUllWL+PgwpvXejbEEQYCGzqT/baSqByb33wMzYCFJTCUr+\nsw3VpdVqYG0qqXhsa24CdZnu+25qbAypSXnHJCNBgMxMovfroVFrYGn6z4+gpcQI2v8c+iUiaDRa\nWEiMK8X9d86ioiJIJUYVncPMjI1gLJTfsN3QSktLITOrnH9Vf4eXqEpgbfbPe2BjaozSElWlmOLi\nYgiCAHOT8tdNEARYmRrr3Iyg/PPxz+shlRjdey3FO91TPqcJjI3K55QYCzA1MdZ5r1QqFaT/uo+9\npalujKGoVCpYSip/JktUld8DjUYDIoJU8s97YGmq+5lklVWn2DoQ0SYAWgAgIjWA+nOCshYJgoDh\nI0biqzdeQkLcaez5eR3OHtqD3r17V4pr3bo1CrMz8cvyRbh24QxWfzgHgQEBlf56NZQXX3wRh3b8\ngv2/bcLVc6fwxeuTERgUqBM3dsxobFg0DxeOxeLMwd3Y8u0ijBk9qlKMhYUFevbqja//Nx3Xzp/B\nH+tWIPlSHDp37lxXm3NfLVq0gMbcEu8eTcSptBzMPnQdCtcm8PPz02u8Nh07YcquCzj0Vxa2JKRh\n4YkkPDN5SqUYFxcXBDdvjpcPXMWptBx8fCIZd0rLPw/6GDRiFJafuosztwtxOrUAK87cxZCnxlSK\nMTY2xpBBA/D5qSxcySjGjms5OJ+uQo//dBibMGECbuaVYnN8Bq5kFOOLY7dhY22NwEDd976uzZgx\nAzHXc7A3KReX0ouw8HAq7OzsdeJGTZyE1w8n4cDNTGy/fhefnEvFyLHjK8V4eHhAYSfHoiO3cSWj\nGL9cysS1rBI8/fTTleK6d++Oy5ml2JaQU/56nMpCvz69YWZmJtp2RkREoMRYig3x2biaWYxvz2bD\n29dP58jC8BEjsfNGMQ6l5CH+bhG+jsvF6PET7zNq3Ro36VmsuZiPs2mFOJlagE1XCzFmYuXX1szM\nDP369MYX9z6T2xJycDmzFN26dTNQ1o+Jqo4tU+Vzr/sA2AM4fe9xGwD7q/E8MQ+Li0alUtGMV1+j\n0LCW1K1HTzpz5kyVcTdu3KABgwZT89AWNG7CRMrKyqrjTO/vu+++I0dnF7K1d6C27dpTYWGhToxW\nq6Uvv/qKWkW2pjZt29HmzZurHKugoIAmT5lKIS3CqPcTT9Dly5fFTr/abt26RcMHDaCwIH8aM2IY\npaen6z2WRqOhJ5/oQ0qZJTnLrWnevHlVxmVnZ9OksWMoLMifBvXtQ8nJyXrPSUQ0aeJEcpBZkIPM\ngp5/7rkqY4qLi+mlqVMoJLAp9ejS8b5dmvbv30/uTgqSW5pTUFNfunnzZo1yq00zZ84ka3MJyUyN\nycu9CeXn5+vEaLVaWrL4C2od2pw6RITTr7/+WuVYt27douYBTUluaU5NlA5VnlsnIoqPj6de3TpT\nSGBTmvrCc1X+HNS2lJQUGtjvCQoJ8KOxT42gzMzMKuNiYmKoQ5sICmsWSB++/z5pNBrRc6uu5d98\nQ61Cm1HrlqG0fv36KmMKCwtp6gvPUUhgU+rVrTPFx8fXcZb1F2pwzrYlgMUAmgG4AEABYAgRxT3k\nefSwsRljjLGGRO8OUveebALAH4AA4AoRlVXjOVxsGWOMNSqPvEBKEIQIQRCcgIrztOEA3gfwiSAI\ndqJlyhhjjDUwD1og9TWAUgAQBCEawHwAa1B+I4JvxE+NMcYYaxgeVGyNiejvVjXDAXxDRD8R0ZsA\nfB/wvEYhNTUVT40ejciotpg8ZSry8/MNnRKrASLCsqVL0LF1BHp17oi9e/dWGZefn49pLzyHduFh\nGD18qN43LX8UpaWlmPv6TLRv1RKD+z2BK1euVBl38+ZNPDV0MNqFh+GlKZPve9nPrl270KtzNDq1\nicTXX31VZWclrVaLBfPno21ES/Tu1gXHjx+vcqysrCw8M2Ec2oWHYeKY0fftuFZdK1esQOeo1ujR\nsUO966xU14gIXyz6DO0iw9GjczQOHjz48CexeuuBxfbeuVoA6Apgz7/+TZyrwh8TRUVFiO7UGSpz\nO/SaNB2Xbt7BkwMG1kk7OCaOLxYtwtL338FLLoQh5tkYPrC/ToEhIgx+si8yDu3EXG8JnFPOoUv7\ndigsLBQ1t+efnojjP6/DbE8TtMpPROf27ZCWllYpJj8/H53btYX7rQuY6y1B6v5tGDawv85n8siR\nIxg1ZBCGmefiRWcNPnvnDXy5bJnOnG+/9SZWfbEAvWUZ8M2/hF7du+LSpUuVYtRqNXp36wI6dxBz\nvSUwu3QUPTp3RFnZQ5d0VOnb5cvx4ZyZmKJUY6RlHsaNGIoDBw7oNVZDsGD+fCye/y56WqYjSHUN\n/Z/ojTNnzhg6LaanBxXN9QD2C4KQAaAYwN+32PNF+aHkRuvo0aMwtZRh6JSZAAD/FpGY1qsVUlNT\n68W1tuzRfbf8K3zWwRttXOQAgJt5KqxbuwaRkZEVMbdv38bpU6dwdUJbmBgZoX0TO8RuOY+jR4+i\na9euouSl0Wjww4aNuP50R1ibmaCDmx1OZqrwxx9/YPz48RVxsbGxcDYlzGld3j2rtbMtfFfGIiMj\nAwqFoiLu++9WYWpzZwwJcAYAmJsY4+1vvsTkKZWvKV61Yjleb2kLN5vy61JvFaixefPmSl2aLl++\njIxbN/HJyFYQBAHtXOVos+k0zp8/j5YtWz7ytn73zZdY0M4LXT3KOyulF5Vi7coVenXnaghWLv8K\nz4fawM9eCgC4U1CG9evWISwszMCZMX3ct9gS0fuCIOwG4Azgz38tLTYCMK0ukquvJBIJSlWq8mun\nBAHqslKoy8pEawPHxCeRSFCs/qfTVrFGCxOJRCdGrdGiTEswMSrf0y1Wa0R93wVBgPG9dpTWZuXz\nFKu1OnNKJBKo1JqKz2SpVgu1RgtjY+NKcSYSCVSaf21nWdX5m5hIUKL5Z6+4VIsq5yzVaKDWEiTG\nAjREUNXg9ZBIJCj+V9euIrXue9CYlL++/xwlKNECEgn/jnlcVevSH70GbsCX/pSVlaFdh2hIFS4I\njozGkT9+hp+bCzZtWG/o1Jie1q1bh1nTJuO1MFdkqNT4Kv4ODh49Bn9//0pxo4cPQ+qpQxjha4+9\nqXlINLHFwaPHYWpqKlpus2e+hp3r1+C5IEecyyxCTHoZTsadh43NP+02S0pK0C4iHAFCITo4WWHd\ntUx4RXXCd9+vqzTWxYsX0bFtFKY0c4Lc3AQLTt/CZ199g2HDhlWKW7p0CT7831wM8rFAerEWu2+V\n4dTZc3Bzc6uIISL07dkdws0reNJDjm0pOShy9MTOPfsqWpk+ii1btuD58WMxs2UTFJRp8EVcKnbt\nP4jQ0NBHHqsh+G7VKsyZ8RIG+VkiW6XBzpRSHDt5Cj4+PoZOjT1Aja6z1XPCBltsgfKesx98+CES\nriWiVXgYZrzyCu/ZPua2bt2KTd+vgdTSEi/NeA1BQUE6MWq1Gp99+glOHzsKL7+mmD33DchkMlHz\nIiJ88/XX2L/rTyhdXDH7jTervOFFbm4u5r//Pm4kXkN4myi8PP0VnT1bADh//jwWf/YJSoqLMWLs\neJ12pH/buHEjfv1xE2zkcsx8fQ68vb11YkpKSvDxRx8h/twZBDQLwczXX4dUKtV7W2NiYvD9qhUw\nNTPDlJemo0WLFnqP1RBs2bIFG39YCyuZNWbMnKXzxx+rf7jYMsYYYyKryV1/GGOMMVYDXGwZY4wx\nkXGxZYwxxkTGxZYxlK8wnzblBTjay+Hh6oRvly+vMu7YsWNo4mAHqcQYCpkl1q+vegV6TEwM/H28\nYG9rjSEDnkROTo6Y6QMA1qxZAweZBaQSYzRR2OPkyZNVxn311Zdwd3GC0sEO01+cBvV/blgPlN+k\nfeK4MXCQ28LbvQk2btxY5VgxMTFwsJHBzMQI9taW2L59e5Vxv/zyC5p6ukNpJ8f4UU+J3ggEAA4c\nOICQgKZQyG0w8Ik+Ne5uVV8lJyejU/so2NnI0KpFc8TFPfCGbLUiIyMDT/bpBXtbawT7+zbq5iPV\nxQukGAMw89UZ2L15NZ4PtUFeiQYLT2Rh5Q8bK63ULS0thVJug+eau2BSiDv23MjAK3sv4Wz8pUo3\nrb98+TLaRkbgxZa28LA1w8bLeTD2CMW2nTGi5R8fH4/IsFB80TUI0W72+PrsDay6mIY7OXmVVsn/\n/vvveH78aLwaYQcrUyMsO5uLJ0Y9jfc+nF9pvKcnjMOl/dswKcQWdwvL8MmJLGzZvhNt27atiMnJ\nyYGLUoGRwXaI9rDG4Zt5WBuXieS/Uiutlj558iSe6NYFq7oHwNvWAnMPJ0LWoj2++6HyZUm1KTk5\nGREtQrA42hctnWzw6ekUJEidsftgrGhzGoJarUawvx9aWxejq6cMp24XYnNiKS5dvQZbW1vR5u3Y\nLgo2OdcxqKkNEjKL8eW5XJw+dx6enp6izfm44AVSjD3A1i2/YFSgDEorU/jZS9HHU4rft/xSKebU\nqVMgjRqz2/hCaWmGkUGuCLK3wqZNmyrF7dmzB61dLdHSxQr2FhJMCpEjZs9eaLVaiGXDhg0IVVhj\naIALlJZmeLOtH0pLS3HhwoVKcb/98hP6eknhY2cOpZUpngqwwu+//qwz3ratWzG+mQ0cLCQIUlig\ni5s5/vhjR6WYnTt3wkoioH+AHeRSEzzR1A5yc2Ns27ZNJ25kU0e0b2IHFytzfNDW+757wLVl//79\n6OTugD4+jnCyNMOH7XwRe/QYVCqVqPPWtaSkJOTnZGFIoBxyqQm6edvA0cJY1LaOxcXFOHL8BCY0\nt4Od1AStm8jQwtmK924fgostYwBsbW1xp+Cf7kV3iglyO/tKMS4uLihWa5FZXN7Vp1SjRWqBCkql\nUnesIk1FX+I7hWWwtJDq1eihupRKJf4qKEbZve5Q6UWlKNFoda7Hlds74E7RP0U/raAUNlXsAdnY\nWCOt4J/uRXdVgK2tvFKMq6srCko0KCrTAABUai1yVWq4uLhUirO1tUXyv17bpNxi2FiLe22yra0t\nUvKKob33HtzML4aJibGozUcMwcbGBvnFJcgvKX8PSjVaZOSrKjU8qW2mpqYwMTZGRlH550NLhDsF\nZaLO2SAQkShf5UMz9njYt28fyWVWNCBIQV39FOTm4kRpaWk6cR3bRpGbzJxmRHhRqKM1ebk6kUaj\nqRRTXFxMEWGh1NrTgYYEK8jR1oq+Xb5c1PzLysrIw8mRwpTW9EorL3KVmVPXjtE6campqeSiVFC3\npg40IEhBcmsrio2N1Yn77bffyM7akgYHO1BHXwfy8/KgnJwcnbjmQf7kKjOlIUF25GZtSoF+3jox\neXl5FNzUl/oHutHLET7kaCOjzZs3186G30dpaSl1ahdFXf1caUakD7nb29IXixaJOqehzJj+Mnk5\n2tKwYAcKcrWj4YMHklarFXXOzz/7lJzlMhraTEGtPByoQ9s2VFpaKuqcj4t7tU+nJvI5W8buuXDh\nAn777TdIpVKMHj26UgP/f3vzzTdx8OBBNG3aFEuWLKlyb6m4uBirV6/G3bt30alTpzpppq9SqTBt\n2jRcu3YNHTt2xNtvv11l3N27d/H999+jpKQE/fv3r7JTFlB+rnXHjh2wsbHBuHHjqtxz0Wq1eO21\n13Dq1CmEhobis88+q3IPPj8/H6tXr0Z2djZ69uxZ6QYPYiktLcXq1auRmpqK9u3bi3azCEMjImzZ\nsgVnz56Fn58fRo4cKepRlL/t3r0bsbGxcHFxwbhx4xrcUQN9cQcpxhhjTGS8QIoxxhgzEC62jDHG\nmMi42LI6V1RUhPz8fEOnUaW8vDwUFxfXylgZGRnYs2dPlU0jHhURISsrC2VlZQ8PrkU5OTkoKSl5\nYIxWq0VmZqaolzYx9rjjYsvqjEajwQvPPA17uS2UDg4Y1K9vrRW2msrLy0Ovrp3h7KiAna0NZs54\nBTVZc+Dj5QknRwV6de8GmdQUS5cu1XusxMREhAT6w7OJK+TW1ljx7bd6j1Vd6enpaBvZCq7OSthY\ny/D+e/OqjIuNjYWLUgEvtyZQOthj3759oufG2OOIF0ixOvPF54uwftFH2Nw7GGbGRnhm92V4d+uP\nT79YbOjUMGnsaJScO4QvOvkhr0SNAVsv4JUPFmLs2LGPPNbEiROx6fvV+KSnJ5ysJPjxYiZ+vpSF\nwlKNXrm1Cm2GAbYaTAtzx/WcIvTZEoftu/eiZcuWeo1XHU/26QWjG2cwPkSOHJUGb8WmY9mq79G3\nb9+KmPz8fHh7uGFyiAzhLlY4m1aIL07nIiEpGXK5/AGjM9Zw8QIpZnBHDuzH+KYOsDYzgZmJEZ4N\ndsKxQ/Wjfd7Rw4fxQnNnmBgZwU5qiqd87XE09qBeY/35559o42YFZ5kpBEHAgAB7FJdp9epepFar\ncebCRUxp4Q5BEOArt0QPT4f79j2uLceOH0c/XxmMBAF2UhO0c5Lg6NEjlWISEhJga26CcBcrAEAL\nJ0s4ykxx5coVUXNj7HHExZbVGTcvbxy5W1BxePZoWh6auHsYOKtybu7uOJqaC6D8/Oix9EI00bPP\nq7u7O66kqyq6OV3KKIKpsQBzc/NHHsvExARKOzscv11+IwOVWoMz6flo0qSJXrlVVxNXF1xKLz/E\nr9ESEvIJbm7ulWKcnZ1xN6+oopNQVrEat3OKdDpIMcb4MDKrQ9nZ2YiOag3rskJYSkxwJa8E+w8f\nrRfNyy9evIhuHaMRqrBCVnEpIHfEnoOHYGlp+chjqVQqKGxksJAATazNEH+3CB27dsfOnTv1ym3H\njh0YO2I42rk74HJGHlpFd8HaDRshCDpHqmrNiRMn0LtHNwQ4SJFeVAZXn0D8sWuPTuOCTxcuxPz3\n30WgoyUu3y3E9Ndm4fU5c0XLi7H6jptasHqhqKgIu3fvhlqtRqdOnerVub309HQcOHAAUqkUXbt2\nhZmZmd5jqdVqDB48GDdv3sSkSZMwZcqUGuWWlJSE48ePQ6lUomPHjqIW2r/dvn0bsbGxsLa2Rteu\nXSvdPejfzp07h0uXLsHf3x9hYWGi58VYfcbFljHGGBMZL5BijDHGDISLLWOMMSYyLraswSMiXLx4\nEceOHUNRUVGNx8vIyMDhw4fx119/PTDu2rVrOHLkCHJzc2s8Z3XdvHkThw8fRkZGRp3NyRqnnJwc\nHDlyBElJSYZO5bHAxZY1aBqNBqOGDUX39m3x7ND+aO7fFNevX9d7vO3btyPAxxsvjRqK0KAALP58\nUZVxr73yMtqGh2HqyMEI9PUW/bpYoHxlcPOgADw9fACa+njpvfqZsYc5dOgQ/Lw8MWlYf4SHNMNb\nc+cYOqV6jxdIsQZt5cqVWD5vLn7t2wxSE2N8cfoG9sMBMfsfvWGFSqWCq9IRG3sFIdLFFil5xej6\n0xkcPH4STZs2rYiLiYnBlDEjsGtgC9iaS/DTldtYcCUPl66Ltwdw8eJFREdFYkFnJzhYSHAxvQgL\njmcjLT2D7zPKahURwdXJEU8HmKOVqxVyVWrM2n8XP/6+A23btjV0egbHC6RYo3T50kV0d5FBamIM\nAHtL/C8AABEmSURBVOjno9C7w1FaWhqkJkaIdLEFALhbS9HcSY5r165Virty5QqiXeWwNZcAAJ70\nVeJq8g1RG/UnJCTATyGDg0X5nEEKCxhBi7t374o2J2uciouLkZGVjXCX8mvQbcxNEKSw4M5hD8HF\nljVozZqHYMfNPBSUlt9558erdxEUFKTXWM7OzijREg7ezAIAXM8pRFxaNvz9/SvFBQcHY8/NLGQW\nl96bMw2BPt4wMhLvx83f3x9X0/Nxp6B8znNphYCRMZRKpWhzssZJKpXCyVGBo38VACjvHHbhbiGC\ng4MNnFk9R0SifJUPzZhhaTQamjh2DCmsraipkwP5e3lScnKy3uPFxMSQg83/27vzuCrLvI/j3x8c\nFZBFRMAFTDFNy9RMeyI1HaXS9tK0vawpW2amfeYpW2216TVTmVP5TOs0Zfs8zdRTmWmrWam4ZxI6\nLkioKRxUQOGaPziPUUmJcnmDfN6vFy/l5jrX+R4O+uW+7+vcJ9Ed3CHNtYqPc1Mef2yX42656UaX\nHB/nenRIcxnpqS43N3eP73N3TX5kkktsGeu6tG3tUloluvfff9/7faJpmj17tktLSXZZbVu7xLhY\nd89ddwYdqcGIdN9POpFztmgSVqxYodLSUnXr1m2vrgwlScXFxcrPz1dGRoZSU1NrHbd27Vpt2LBB\nXbt2VVxc3F7d5+4qKipSQUGBsrKylJiYuE/uE01TaWmp8vLylJ6ernbt2gUdp8HgClIAAHjGAikA\nAAJC2QIA4Nmu38YDTcL69es1bdo0hUIhjRgxQgkJCXs8l3NO7733ntasWaP+/furZ8+e9Zh075SX\nl+utt95SOBzWkCFD1LFjx12OKygo0PTp0xUbG6sTTjhBsbGxuxw3b9485ebmqnPnzvvsHXiC8Mkn\nn+iZZ55RSkqKxo8fr/j4+KAj7bRgwQLNmTNHmZmZGjZs2H77HGA/sqtVU/XxIVYjN2h5eXmubWqK\nG9Al3R3ROc116dTRFRUV7dFcVVVV7oKzz3Ld27VxZ/bOcmlJCe5vzz5bz4n3zNatW92Rhx/msju3\nd6MO7exSWyW5zz777Cfj5s+f79JbJ7vTenZyR3fp4Poc0sMVFxf/ZNzkRya51KR4l9O9rctsk+R+\nc/m4ffEw9rnHHnvMtYiOctkZCS4rOcalJMa7TZs2BR3LOefcU08+6VISq5+DTmmt3Njzz3VVVVVB\nxwKcc6xGxo+MOvVkxa/6XKd3r34/2b/O36isoaP14KRH6jzXzJkzddlZo/TBqMMUG4rW0o2lOubV\nudpUElZ0dHR9R6+Thx56SNMefUB/H36wzEyvLFunx9dJs+fN/8G4YwYP1EnNN+vCnhlyzunS6V+p\n5xkX65Zbb905prS0VG3TUvXnnPZKj2+urdsrdfX0Qr39/ofq06fPvn5oXiXHx+rSPq01oGOiqpzT\n7TPWqPuQEzV16tRAc1VUVKh1qyTdP7SdMhJbqHxHla6d8a2m/uNNDRgwINBsgMQCKfzI2jWrdWDy\n95fxOzAxpLWrV+3RXOvWrdPBbRJ3XqWpe+uWqqqqUjgcrpese6Ng7RodlhKz8zBj3/QkFRYW/mTc\nuoJ16ptW/VIZM9PhKXFat2b1D8Zs3LhRLVs0U3p89fctrlm0MpPjtG7dOs+PYt8rK69Qt5Tqw+hR\nZureJkZrfvT9CEJJSYmiTMpIrH75VotQlA5IjlVBQUHAyYCfR9k2UYOH5uhf+VtVtqNK4fJKvbOq\nTIOH5ezRXP369dNHq9ZrTmGxnHN6bP5qdeqYqaSkpHpOXXeDjh6sF/K+05rwNm2vrNKfc1drwMCB\nPxk3cPBgPTR/rcp3VOnbLeV65usNGvSroT8Y06FDB7WIa6n38qsf56Kirfpmwxb17t17Xz2cfaZt\neppeWrxBlVVORVu26938Yg0fPiLoWEpJSVF6WpreXL5Zzjkt27BNSwrD6tevX9DRgJ+3q2PL9fEh\nztk2aGVlZe7sMWe4ZqFo17xZyF31mytdZWXlHs/32muvudaJCa55KOR69TjILV++vB7T7p2J997j\nYls0d81DITd82K92ee4xHA67U08Y4ZqHQi6meTN36/jxuzwPuHDhQte18wGuWSjapbZu5d599919\n8RD2uby8PNc2pZWLMrlokzvlpBODjrTTsmXL3MHdDnTNQtGudVKie+ONN4KOBOwkztliVyoqKhQV\nFaVQaO8XpjvnVFZWVusq3iBVVlZq+/btiomJ+dlx5eXlCoVCv3iueevWrYqNjd3vV8Fu3rxZ8fHx\n9fLzUd+2bdummJiY/f45QOPCFaQAAPCMBVIAAASEsgUAwDPKtglbvHixbr3lFk2YMEErV64MOo43\nS5cu1fDjjtOAo47SlClTgo4DoAmibJuo2bNna8iAbIXfeV6FbzytIw/vq+XLlwcdq94tXbpUR/Tp\nrdarF2lQ1be6/rdX6sYbbww6FoAmhgVSTdRJx+boOFeo83tmSJImzs7Xhh4D9fgTTwacrH4dd+yx\nSlmzWI8dd6gkacaqjbro7UXauGVbwMkA7I9YIIUfCJeUqEPC9y+DyYhvodKS4gAT+VEaLlHHxJqP\nM0Y7KisDTASgKaJsm6hTzhitCV+s1qL1Yc0p3KwHctfqlDPGBB2r3p1/4Vg9Om+VZq7aqLxNW3TV\n9MXq1r170LEANDEN75Xq2CeuuuZabdmyRRf89X8UHQrphjvu1ujRo4OOVe/GjRun/Px8jX3kYe2o\nrFS37t014+NPg44FoInhnC0AAPWEc7YAAASEsgUAwDPKFgAAzyhbz3Jzc3XOeefp1NNH6qWXXgo6\nTp1VVlbqj/ffp9OOH64rx12qoqKioCPV2ZIlS3ThOWdr5InH62/PPht0nEbBOaenn3pKI088XmPP\nPUfLli0LOhLQqFG2Hi1ZskRDc3IUlZalDn0H6+rrf68nnmxcF424/JKL9c/HH9apVqioue9r4BH9\nFQ6Hg46127755hsNGXCUslbN1QmVa3XnDddo0sMPBR2rwXvwT3/SfTfdoBOq1uqAf3+po7OP1IoV\nK4KOBTRarEb26Lrrr9fKkkqNuvw6SdKSL2fpH4/crYXzcwNOtnvKy8uVlJCgby45WgnNq18ldtqb\ni3XFvQ9q5MiRAafbPbfddqu+e/PvuntQV0nSnMJijZu1Vl+vXBVwsoatS0Z7PTukkw5NTZQk/f7D\nr5Vx2kW6+eabA04GNGysRg6Aq3KKqvEm5NGhkBrTLyDVWZ2ia7w5dyjKGtdjqHKKrvFT3qyR5Q+K\nk374vBvfN2BvcFELj84//zwNHZajpJRUJbZO0SuTJ+oP114ddKzdFhMTozNOP13nvfOJLjskXZ9/\nG9bXpTuUk5MTdLTddvY552jQpIeVGd9CGQkxuvPL1brkd9cGHavB+/Vll+vSRx/W+H6ZWh0u04t5\nG/TpmWcGHQtotDiM7NmsWbN078T7tW3bNp05ZrQuGjtWZj85wtBgVVRU6O4Jd+jTD2aqfWam7pr4\nR2VmZgYdq07mzp2re26/VaUlJTpl9BhddvkVjeo5CIJzTn+Z/IjeePklJbZqpfF33Kk+ffoEHQto\n8Go7jEzZAgBQTzhnCwBAQChbAAA8o2wBAPCMsgUauKKiIvXqcZCSYpsrPTlRzz333F7N9+qrr+qI\n3j3V66Cuuvfuu1RVVVVPSQHUhgVSQAPXOaOd2lqpzurZRt9sKtfjX36rGR99rOzs7DrPNX36dJ07\n6nRNHtJVyTEhXffxCo254ir94cabPCQHmh4WSAGNUEVFhVYVFOra7PbqlByjYVlJ6te+paZMmbJH\n870y9Xld1audcjq10eFtW+m+ozrp5ef3bk8ZwC+jbIEGLBQKyUwqKa+UVP36181llYqPj9+j+WJb\nxmt92Y6dn6/fWqHY2Lh6yQqgdhxGBhq44ccco7mzPtDJ3ZK1/LsyzSsqU97K1UpLS6vzXPn5+Tqq\nfz+NyUpWcotoPbqoUE+/8KJGjBjhITnQ9HAYGWik3p42TeeN+62+rGijUJfDteir5XtUtJKUlZWl\nT7/4UrFDR6q473F6/a23KVpgH2DPFgCAesKeLQAAAaFsAQDwjLIFAMAzyhYAAM8oWwAAPKNsAQDw\njLIFAMAzyhYAAM8oWwAAPKNsAQDwjLIFAMAzyhYAAM8oW/yi8vJy5eXlqbi4OOgoANAoUbb4WfPm\nzVPXTh2Vc9QRymzfVo9Onhx0JABodHiLPdTKOacuHTN086FtNOqgdlpZvFXHvj5f737wkXr16hV0\nPABocHiLPdRZOBzWt+s3aNRB7SRJnZLiNKhjihYsWBBwMgBoXChb1CohIUEt4+L0yZrvJEmbyrbr\ni4LN6tKlS8DJAKBxCQUdAA2Xmem5qS/qnNGj1CM1Scs3lOjCX1+i7OzsoKMBQKPCOVv8osLCQi1c\nuFDt27fXIYccEnQcAGiwajtnS9kCAFBPWCAFAEBAKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyj\nbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADP\nKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDA\nM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPKFsAADyjbAEA\n8IyyBQDAM8oWAADPKFsAADyjbAEA8IyyBQDAM8oWAADPQj4nNzOf0wMA0CiYcy7oDAAA7Nc4jAwA\ngGeULQAAnlG2AAB4RtkCnpjZeDNbZGbzzWyumfWv5/kHm9k/d3d7PdzfKWbWvcbnM8ysb33fD7A/\n8roaGWiqzOxIScdL6uOc22FmrSU193BXta1w9LHy8VRJ/5L0lYe5gf0ae7aAH+0kbXDO7ZAk59x3\nzrlCSTKzvmY208y+MLP/M7P0yPYZZvagmc0zswVm1i+yvb+ZfWpmc8zsYzPrurshzCzOzJ4ws88i\ntz8psv0CM3s1cv/LzGxijdtcHNn2mZlNMbNJZpYt6WRJ90f20rMiw0eb2Wwz+8rMBtTHNw7YH1G2\ngB/vSuoYKaHJZna0JJlZSNIkSSOdc/0lPSXpnhq3i3XOHSbpysjXJGmppIHOucMl3Sbp3jrkGC9p\nunPuSElDJT1gZrGRr/WWdIakXpLGmFkHM2sn6WZJR0gaIKm7JOecmyXpDUk3OOf6OufyI3NEO+f+\nS9I1km6vQy6gSeEwMuCBc25L5HzmIFWX3FQz+29JcyT1lDTNqq/6EiWpoMZNX4jc/iMzSzCzREmJ\nkp6N7NE61e3f7bGSTjKzGyKfN5fUMfL36c65Ukkys8WSDpCUKmmmc644sv1lST+3J/1a5M85kdsD\n2AXKFvDEVV8x5kNJH5rZQknnS5oraZFzrrZDrj8+1+ok3Snpfefc6WZ2gKQZdYhhqt6LXv6DjdXn\nlMtrbKrS9/8f1OXSb/8/R6X4/wSoFYeRAQ/MrJuZHVhjUx9J/5a0TFJqpOxkZiEzO7jGuDGR7QMl\nFTvnwpKSJK2NfH1sHaO8I+l3NXL1+YXxX0g62sySIoe8R9b4WljVe9m14fqsQC0oW8CPeEnPRF76\nkyuph6TbnXPbJY2SNDGyfZ6k7Bq3KzOzuZL+IumiyLb7Jd1nZnNU93+zd0pqFllwtUjShFrGOUly\nzhWo+hzy55I+krRCUnFkzFRJN0QWWmVp13vhAHaBayMDDYSZzZB0nXNubsA5WkbOOUdLel3SE865\n/w0yE9DYsWcLNBwN5Tff281snqSFkvIpWmDvsWcLAIBn7NkCAOAZZQsAgGeULQAAnlG2AAB4RtkC\nAOAZZQsAgGf/AckQihPvOEbCAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# plotting the dataset as is.\n", - "plot(X, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Metric Learning\n", - "\n", - "Why is Metric Learning useful? We can, with prior knowledge of which points are supposed to be closer, figure out a better way to understand distances between points. Especially in higher dimensions when Euclidean distances are a poor way to measure distance, this becomes very useful.\n", - "\n", - "Basically, we learn this distance: $D(x,y)=\\sqrt{(x-y)\\,M^{-1}(x-y)}$.\n", - "And we learn this distance by learning a Matrix $M$, based on certain constraints.\n", - "\n", - "Some good reading material for the same can be found [here](https://arxiv.org/pdf/1306.6709.pdf). It serves as a good literature review of Metric Learning. \n", - "\n", - "We will briefly explain the metric-learning algorithms implemented by metric-learn, before providing some examples for it's usage, and also discuss how to go about doing manual constraints.\n", - "\n", - "Metric-learn can be easily integrated with your other machine learning pipelines, and follows (for the most part) scikit-learn conventions." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Large Margin Nearest Neighbour\n", - "\n", - "LMNN is a metric learning algorithm primarily designed for k-nearest neighbor classification. The algorithm is based on semidefinite programming, a sub-class of convex programming (as most Metric Learning algorithms are).\n", - "\n", - "The main intuition behind LMNN is to learn a pseudometric under which all data instances in the training set are surrounded by at least k instances that share the same class label. If this is achieved, the leave-one-out error (a special case of cross validation) is minimized. \n", - "\n", - "You can find the paper [here](http://jmlr.csail.mit.edu/papers/volume10/weinberger09a/weinberger09a.pdf)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Fit and then transform!" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# setting up LMNN\n", - "lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)\n", - "\n", - "# fit the data!\n", - "lmnn.fit(X, Y)\n", - "\n", - "# transform our input space\n", - "X_lmnn = lmnn.transform()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So what have we learned? The matrix $M$ we talked about before.\n", - "Let's see what it looks like." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 2.49193844, 0.35638993, -0.39984418, -0.77608969],\n", - " [ 0.35638993, 1.68815388, -0.90376817, -0.07406329],\n", - " [-0.39984418, -0.90376817, 2.37468946, 2.18784107],\n", - " [-0.77608969, -0.07406329, 2.18784107, 2.94523937]])" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lmnn.metric()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let us plot the transformed space - this tells us what the original space looks like after being transformed with the new learned metric." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd4jecbwPHvm71lICRCiIhN7B17qz1rr6Kqeymqdu0a\nVbtWaa0qWntTo2aIGUFIJGTvnJy8vz+ifm2RhJyTk3F/rst1Ocn7PM99EPd5tqKqKkIIIYTQHyND\nByCEEELkdZJshRBCCD2TZCuEEELomSRbIYQQQs8k2QohhBB6JslWCCGE0DMTfVWsKIrsKRJCCJHv\nqKqq/Pdreku2zxrUZ/VCCCFEjqIoL+RZQIaRhRBCCL2TZCuEEELomSRbIYQQQs8k2QohhBB6JslW\nCCGE0DNJtkIIIYSeSbIVQggh9EySrRBCCKFnkmyFEEIIPZNkK4QQQuiZJFshhBBCzyTZCiGEEHom\nyVYIIYTQM0m2QgghhJ5JshVCCCH0TJKtEEIIoWeSbIUQQgg9k2QrhBBC6JkkWyGEEELPJNkKIYQQ\neibJVgghhNAzSbZCCCGEnkmyFUIIIfRMkq0QQgihZ5JshRBCCD2TZCuEEELomSRbIYQQQs8k2Qoh\nhBB6JslWCCGE0DNJtkIYmFarZdbs2bRt34EhQ4fx8OFDQ4ckhNAxSbZCGNiY9z9gzcbNeDV+iwgs\nqFuvPuHh4YYOSwihQ4qqqvqpWFFUfdUtRF6h1WqxsrZm0d6/sLGzB+C7T4YyelBf+vbta+DohBCv\nS1EUVFVV/vt16dkKkQMoyv9/No2MjJAPqkLkLdKzFcLAhr8zgjOXrtKyz1Du3fDl1M5fuHzpIgUL\nFjR0aEKI1yQ9WyFyqO8XL6JHx7Zc2PUTFvFhnDp5QhKtEHmM9GyFEEIIHZGerRBCCGEgkmyFEEII\nPZNkK4QQQuiZJFshhBBCzyTZCiGEEHomyVYIIYTQM0m2QgghhJ5JshVCCCH0TJKtEEIIoWeSbIUQ\nQgg9MzF0AELkNrt37+bs2bOULFmSvn37YmIiP0ZCiPTJ2chCvIaJE79h1dp11GjWjtuXzlK8SCF2\n7vgVIyMZJBJCvPpsZEm2QmRSbGwshQo7M++3ExRwKkSKRsOEt9uwZuUyGjVqZOjwhBA5gFxEIEQW\nxcTEYG5hgZ1j2vV3JqamFCzqQlRUlIEjE0LkdJJshcikIkWK4O7uzpYls4h4EsLJP7Zz78Y1ateu\nbejQhBA5nAwjC/EagoKCGDhoCOcv/EXx4iVYsWwp1atXN3RYQogcQuZshRBCCD2TOVshhBDCQCTZ\nCiGEEHomu/GFyIXOnTvH+vUbMDE1YfiwYXh5eRk6JCFEOqRnK0Quc/jwYVq1actjrTkBURrqNWiA\nn5+focMSQqRDFkgJkcu0aNWaMo3a0aBtZwB2rFyIbXIky5ctNXBkQghZICVEHhEfH4+tvcPz17YO\nTsQnxBswIiFERmTOVohcpk/vXsz+biqW1jYkJSawc9UCViz9wdBhCSHSIclW5BoPHjzA398fDw8P\nihcvbuhwDGbUyJEkJyezat5ETExMmDtrJu3btzd0WEKIdMicrcgVVq1ezccff4KbRxkC/W8xZ85s\nBg8aZOiwhBDiX+QEKZFrhYaG4unlxYRVv+Li7kHw/bt8M6gjt27cwNnZ2dDhCSHEc7JASuRa9+/f\nx9nFDRd3DwCKlihFEdfi3L9/38CRCSFE5kiyFTmeh4cHocEPuXP1IgD+1y4RGvwQDw8PA0cmhBCZ\nIwukRI7n6OjImtWrGTBoAHb2jkRHhrNm9WqcnJwMHZoQQmSKzNmKXCMmJobAwEDc3NywtbU1dDhC\nCPECWSAlhBBC6JkskBL5mqqqfDtzJpWqVKVm7Trs2LHD0CEJIfIRmbMV+cLMWbNYsWYDfT+dRFx0\nJEOGvUOBAgVo3LixoUMTQuQDMows8oUq3tXo8v4EylSpAcDu9cuwiX/KD0u+N3BkQoi8RIaRRb5m\nYWFBTGTE89exEeFYWVoaMCIhRH4iw8giXxj/1VgGDB5C0D1/4qMjObHrF+b8+aehwxJC5BPSsxX5\nQvv27fl16xYcUqIo7WTJmT//lEMxhBDZRuZshRBCCB2ROVshhBDCQCTZCiGEEHomyVYIIYTQM0m2\nIl/QarU8evSIxMREQ4cihMiHJNmKPO/ixYuUcC9JZe9qFCrszLr16w0dkhAin5HVyCJPS01Nxb1k\nKTqO+IR6rTvx0P8m00f04vSpk5QpU8bQ4Qkh8hhZjSzypadPnxIVFUW91p0AKObhRdmqNbly5YqB\nIxNC5CeSbEWe5ujoCEDAdV8A4mKiCLhxlRIlShgyLCFEPiPHNYo8zcTEhFUrVzBseD9KV6zKgzs3\n6NunDzVr1jR0aEKIfETmbEW+cO/ePa5cuYKbmxve3t6GDkcIkUe9as5Wkq3Qq5s3bzJk2HBu37pF\n+QoVWLViOSVLljR0WAYVFRXFtm3bSEpKol27dri5uRk6JCGEjkiyFdkuNjaWcuUr0Lz3MKo3bsmp\nPb9y9o+t+F31xczMzNDhGcSTJ0+oXacuhd1LY2Vjx5U/j3Bw/36qVKli6NCEEDogq5FFtvP19cXa\n3pGWvQbiVMSFDgNHkZyi5fbt24YOzWBmzZ6NZ40GvD97BcMmzqXTsI/49PMvDR2WEELPJNkKvbGz\nsyPiaSjJiQk8DX7EzjVLePI4mOTkZEOHZjAhoU9wK132+Ws3z3I8eRJqwIiEENlBkq3Qm/Lly9Os\naVMmDe7MFz1bEnj7Bt4Nm9G2XXsePHhg6PAMokWzphzcvIanwY+Ii4li56qFNGva1NBhCSH0TOZs\nhV6lpqZSp159yjZoRZu3hwLwy+JvKWySwtIflhg4uuynqipTpk7l229nkpKioWev3iz7YQnm5uaG\nDk0IoQMyZysMwsjICHNzc1xLeT7/mkvJMjx5+tQg8Wg0Gi5cuICvry+pqanZ3r6iKIwfN46Y6CgS\n4uNZs3qVJFoh8gE51ELoxaNHjzh//jyFCxemdauWbFy5gGIeZdAkJ7Nn3Q988cmHOmlHVVX27t3L\n/fv3qVatWrqHVTx9+pRmLVoSGROLJjmZcl5l2PXbDiwtLXUSy+tQlBc++Aoh8jDp2RqYr68vnTp3\noVHjJsycNcsgvS1dO3z4MJUqV2HKnAV069mH6zdu0rxRPcb2bMmkgW/Rv09Phg4ZkuV2VFVl8NBh\njBzzIVv2HaVth7dYvPj7Vz7/yWef41rOm+m/HGTm1iPEY8q3M2dmOQ4hhMiI9GwNKCAggMZNm9Jh\n8BjKFi/J6uXzCQsL59sZ0w0dWpb0HzCQdyZ/R+W6PiQnJjBpUCcWzJnJ/HnzdNrO2bNn2XfgIFM3\n7sPc0pLQRw/4tGdLBg8e9NLeqp+fH62HfoyiKBibmODdqCXXrp7WaUxCCPEykmwNaNu2bdRo2pZW\nvQYBULREKSYP7pSrk21qaiqPHgayfu5knga/Q6nylXEp5cn9+/d13lZISAiu7h6YP0ushV2LY2ll\nRURExEuTbcWKFTl7YBdlq9VGm5LC+cN/0K5JfZ3HJYQQ/yXDyAZkbGxMyj/2nGqSkzE2MjZgRFn3\n5MkTzC0t6TbiIxb+cYYKtRpw4fghqlatqvO2qlWrxp2rl7h29iSpWi37Nv2Ivb09RYoUeenzs2d+\nS+ida3zWxYdPOjXA3tyIzz/7TOdxCSHEf8nWHwMKCgrCu1p1GnbshXPxUvy+9nuGDezP2C9z74lC\nu3btYsL02Xy6aD2QNq86slkV/K764urqqvP2Dh48SL/+Awh5HEyFSpXZ8svP6V4Kn5KSwo0bNzAx\nMcHLy0sWKgkhdOpVW39kGNmAXFxcOP3nKaZNn0HwxWOM/+IzBg0caOiwssTR0ZGnwQ/RJCdhamZO\n5NNQNMnJ2NnZ6aW9Zs2aEfToISkpKZiYZPzP2cTEhIoVK+olFiGEeBXp2QqdUlWVbj164nfbn9JV\nanLhyF7eHTE8V/fWhRAis+TWH6EzWq2W06dPEx8fT+3atV/otaampvLLL79w//59qlevTvPmzQ0U\nqRBCZC9JtkInkpKSaN22HfcePMTW3oGIkCCOHD6Eh4eHoUMTQgiDk+MahU4sWrSIWK3ClI17+XLZ\nZny69efd0WMMHZbO3bp1i4GDh9C5azfWrl2LfHAUQmSFJFvxWu7436VczQYYGadtUapUpxH+d+8a\nOCrdun//PvUbNiTRqiCu1X0Y/80U5s2fb+iwhBC5mCRb8VpqVK/G2X2/ER8bQ2pqKkd/3US1arrf\nQ2tI69evp3rTtnQc8h7123RmxNSFfLdgoaHDEkLkYrL1R2TazZs3MTY2xqNYUca0rYWFpRUepUqx\ndtdOQ4emU6mpqRgb//9Hw8TUFK1Wa8CIhBC5nSRbkSlbt25l2DsjqFynIYH+d2nevAUL5s/Dzc0N\nI6O8NUDSs2dP5tWrh7ObOwVd3Nj+w2yGDxtq6LCEELmYrEYWGVJVFaeCBfl4wVpKla+CJjmJbwa8\nxaJ5s2nTps1Ly1y7do2tW7diYWFBv379KFq0aDZHnTUXL15k4qTJREVF0blTR8a8956cNiWEyJCs\nRhZvTKPREBMdjXvZSgCYmplTwqsiQUFBL33+xIkTNGzkwzn/IP44dZ7SnmVo36Ejt2/fzs6ws8Tb\n25sd27dx5NBB3h8zRhKtECJLpGcrMqVajZqUrdec9gNHEXjnBjPffZujhw+99OhDnyZNqdi8Mw3a\ndgZg/dzJBFy/QtjDe1y+dPGVFwUIIURuJz1bkSXbtmzG78Q+BtXzZOqw7ixa8N0rzxiOjo6msKvb\n89fOxYpTpHhJvKrVYdeuXS8tk5qayuQpUyhV2pMy5cqzfMUKvbwPIYQwBFkgJf4lLCyMGTO+Jehx\nMI0aNmT4sGEoioK7uzuXLpwnPj4eS0vLdIdVO3bowOZFMxgyfhbxsdHsWreMfh9P4PQf2zE2fvkV\ngnPnzWPdps0Mn7qYpIR4Jox/n4JOTnTu3Flfb1UIIbKNDCOL52JjY6lWoybulWtSslxlDm9dx1tt\nWjF71szXqker1fL5F1+yYsUKUrRaajZvSwF7J84f2MmlixdwcnJ6oUzd+g1o2m80FWs3AODw9o3E\nBVxhw7p1OnlvQgiRHWQYWWRo9+7d2BYqyqAvp9G4Uy8+XrCWhQsXvPYeU2NjY2bPmkl42FNmTJuK\nnZqEs5mW03+eemmiBbCxsSH8yePnr8NDgrGz1c+1fEIIkd1kGFk8p9FosLC0ev7a3MIKVVXRarWv\nHP5Nj5GREaNHj2b06NEZPjtxwng6vNWRx/fvkpQQz7kDO/nz5MnXblMIIXIiGUYWz4WGhlKpchVa\nvT2ckhWqsGf9MtydHfl540/Z0v7+/fvZtGkTzs7ODB8+HHd39zeqJyYmhm8mTebmrVtU867Kl198\ngYWFhW6DFUKIl5BhZJGhwoULc+zoESLuXGb3D99S37sia1avypa2vxo3nu49enLk5GlW/7iGyMjI\nN6pHo9HQtHkLzt+6R6n6rdlz7DRdunWXW3uEEAYlPVthcIcPH6bfwMGMX70DOwdHTvy+nQPrl3Dz\nut9r13XmzBl69RvIlI17MTIyIkWTzPvt6nDxr3OUKFFCD9ELIcT/Sc9W5Fh+fn5UqN0AOwdHAOq0\naM+dWzdJTU197br+nl/+e2uSkZExxsbGcpGAEMKgJNlms6SkJMLDw/P8sGZ4eDjvjn6PVm3a8uXY\nr0hMTHzls2XLluXa2ZPERqcNHZ879AelSnu+0QUH1atXx8LUmPWzv+bKn0dZ9s1HlC9XjpIlS77x\nexFCiKySZJuN5sydi72DA8Xd3alWoyaPHj0ydEiZ9vfeWbcS7nh6lWXt2rWvfDYpKYnGTZtxOzSK\nii27cuTsxXTnTZs1a0afnj34vGsTJvZrx8/zJ7Pppw1vFKe5uTlHDx/C1caUk7+soEopN3bv/E3O\nNhZCGJTM2WaTw4cP83b/gYxdthlH56JsWTKbyAA/Dh3Yb+jQXmnLli38sGw5xsbGODo44HvzDgO/\n+pa46Ei+HzuaNatW0Lp16xfKHTt2jKGj3mPi2l0oikKKJpkxbWpx9cplXF1dX9leQEAAoaGhlCtX\nDjs72WMrhMh9ZM7WwM6cOUONpm1wKuKCoii0eXso586eNXRYr7RlyxbeHfMBlVp2pYxPO3bu2kX1\nZu1wLVmaMlVq0OrtYfy647dM1pb27y6jD18lS5akdu3aOk20Dx8+pHuPXlSrUZOhw98hKipKZ3UL\nIURmSbLNJm5ubvhfvUiKRgPAjYtncUmnl2doS5evoPeH46nVrC31WnWkzwfjOHfoj+ffD3/8CDs7\n25eWrV27NmZGsHbmeM4f3c+Sce9Ru3btdHu1+hAbG0sjn8bg6ELH0V9xNzSKDh075fn5ciFEziMn\nSGWTXr16sfHnX/i6Xzuci5Xg1uW/2PHrdp22odFoWLJkCX7Xb1CxQnlGjBiBicmb/RUbGxuTokl+\n/jpFk0zg7Rv8suhb4mKi8D1xkB/Onnlp2b/nTceNn8Cl3zfRsFo1Jn49IdvnTc+cOYOlvSNd3vkI\ngNIVvXmvdQ0ePXpEsWLFsjUWIUT+Jsk2mxgbG/Pbr9s5cuQI4eHh1KmzXKf/4auqSrcePXkQEkbl\nBs1Yvm4TR44dZ/PPm94oyX0w5j3e7j+ApIQEUjTJ7FjxHePGfsGtW7eoXK4kP84/h4uLyyvLOzk5\nseT7xVl5S1lmampKYnwcqampGBkZoUlORqNJxszMzKBxCSHyH1kglUf4+fnRtEVLZm0/hompGclJ\niXzSsQGnThzH09Pzjeo8cOAAy1esxMjYiBRNCgcPHcKpsDPx0VHs27uHChUq6Phd6JZGo6GhT2NM\nCxSifK0GnN77K5U8S7F+3atXUgshRFbIAqk8Lj4+HisbO0xM03ptpmbmWNnYkpCQ8MZ1Nm/enJ83\nbaRPr16cv+zLrG1HmbRhD+2GvE/f/gN0FbremJqacnD/PprWrkrs3csM7NmVH7Pp+EkhhPgn6dnm\nEYmJiVSu6k1ln9bUbNqWM/t3cuP0YS5dOJ/lYdOZM2dy7Ko/b384HoD4mGjea1OL+LhYXYQuhBB5\nhvRs8zgLCwsOHdiPJuQeKyaMQY0I4sC+vTqZnyxatCjnDu0hPiYagD/376SMl1eW6xVCiPxCFkjl\nIcWKFdP5CmeA3/fswc7BkQ87NsS+YGHCQoIYNmQwADdu3CAgIIDy5cvLQf9CCPEKMowsMuRdvQad\n3huPQ6HCxEZF4n/1IomB16lYvgKz583D3bMcATeusnjRQnr37m3ocIUQwmBeNYwsyTaPSE1NZf36\n9Vy+cgWvMmUYPHjwG++x/a++/fsTkWrO2x9NQJuSwoJPh+NTsyqr165l8vrfcSjkTOCdG0wZ2o2g\nRw+xsbHRSbtCCJHbyJxtLhUVFUV0dHSGz70zYiTT5swnKNmURSvX0K1HT52dlDRvzhweXjvPF92a\n8EmnBhSytaBxYx9KlPbCoZAzAG6ly2Jta0dISIhO2hRCiLxE5mxzqMTERHq/3Ze9e/agqipdunTh\nx9WrMDU1feHZhw8fsnnzZubuPIWltQ1t3h7KF92acuXKFapUqZLlWAoVKsRfZ89w/fp1zMzM8PLy\n4uHDh9y76cf9W36UKFOeSycPk6JJkpOZhBDiJSTZ5lATv5lEUEQsPxy6TKo2lQWfDmPmrFl8NXbs\nC8/GxcVhbWuHhZU1kLbH1s7RidhY3W3NMTU1pXLlys9fu7m58cOS7xk2vCeW1tZoUzRs37oVc3Nz\nnbUphBB5hSTb/wgLC2PatOk8ePiIRg3q8e67777RJeZZdfrMGZp0GYipWVryatSpN38e//2lz3p4\neFDAzpZtP8yhfruuXDx2gPjIcJ30atPTo0cP2rdvT0hICK6urnIMohBCvILM2f5DXFwc9Rs0xDcw\nlEIV67Bk9TpGvzdGZ/X7+/vz0UcfM3LUuxw5ciTdZ0uUKMGNi2kH/auqys0Lp3F/xdYaExMT9u/d\nQ0KwP3PH9CPw8kkOHTyQLQuVrKysKFmypCRaIYRIR4arkRVFMQe6Au78oyesquqkDMrlutXIv/76\nKxNnzObzJZuAtJOSRrWsRnRUVJaHR/39/alTrx7123fHxt6RPeuWsXL5Ujp27PjS54ODg2nQsBE2\nToVJ1WrRxMdw8vgxnJycshSHEEII/XnVauTMDCPvAKKA80CSrgPLSTQaDWYWls9fmzzrrWm12izX\nveSHH6jfrjs9R38BgEsJD6ZMm/7KZFu0aFEuXbzAsWPHUBSFxo0bY2VlleU4hBBCZL/MJNtiqqq2\n1nskOUCzZs344MOP+G3VIkpXrs7+Tato3+EtnSS5hIRErOzsn7+2KeBAYmJiumVsbW1p165dltsW\nQghhWJmZsz2lKEolvUeSAzg6OnLi+DE0IQHsXz2f+t4V2aCj69h69ezB3p+Wc/7oPm5d/ot1M8fz\ndp/0T1tKSkrKMCELIYTI+V45Z6soii+gktb79QTukjaMrACqqqqVX1rw/+Vz3Zytvu3atYsp06aT\nkJBAn969+PSTT1660lmr1TJi5CjWrl0DQPfuPVi1coUsQhJCiBzutY9rVBQl3VPlVVW9n0GDkmzf\n0MxZs1i3eTsfzFmJsbEJi74YQWuf+kyZnO6aNCGEEAb22guk/k6miqKsU1W1338qWwf0e2lBkWXH\nT5ykabf+WNnYAtC8x0BObPvRsEEJIYR4Y5mZs63wzxeKohgD1fUTjgBwK1YMf98Lz1/fuXoRV1dX\nA0YkhBAiK9IbRv4SGAtYAvF/fxlIBpapqvpluhXLMPIbCwkJoV79BtgXKYaRsTGPA25z6uQJ3Nzc\ndNrO8uXLuXnzJgMHDqRixYo6rVsIIfKjN75iT1GU6Rkl1leUk2SbBdHR0ezfv5/U1FSaN2+Og4OD\nzurWarW4uhUnJiYWh8LOPAkKZPxXXzFu3DidtSGEEPnRmyyQqpZehaqqXkjv+5Js/+348eNs3/4r\nNjbWjBw5kqJFixoslj59+nDg6HGmb9qHlY0tp/b8yoopX5AQp7uLC4QQIj96k/ts5zz7tRg4AywD\nlj/7/WJ9BKkvgYGBDBg0mBatWjNt+nRSUlKytf1t27bRuWt3gjSmnL31gOo1ahIcHKyz+nfu3Mn4\n8eNZuXIlGo0mw+d9fX2pUq/J8wVYNRq3IikxQWfxCCGE+LdXJltVVZuoqtoECAaqqapaQ1XV6oA3\n8Ci7Asyq8PBw6tVvQKypHVXa9OTnX3czYuSobI1h/NcTGT5pHh0Hj2bA51OoVL8Zy5cv10ndEyZ8\nzegPPsYvJJYFy1fToWMnUlNT0y1Tq1Ytzh/dR3REOAAnft+GhaW1TuIRQgjxoswc1+ilqqrv3y9U\nVb2qKEo5PcakU3v27MG1dDm6jfwEgHLV6zCyeVV+WPI9JibZc8NgQkICDgULP39t51SI2Li4LNd7\n+fJlZnz7LRNWbsWjQhW0KSmMf7sNx48fx8fH56VlQkND6dChA7/v2cuYtrWxKeBAXHQks2fNzHI8\nQgghXi4zW3+uKIqyQlGUxs9+LQeu6DswXVEUBZV/zB0bYB65W9eurJ05nge3rnPp5GEOb11H506d\nslTnosWLady0GW6lyzLzvf4c/e0XjE1MKFikKFFRUS8tc+nSJSpUqsS0eYuwtrOnbLlyjP3sY+4F\n3GX06NFZikcIIcSrZaZrNwgYCbz/7PUxYIneItKx1q1b8/kXX7J58be4l6/C/k2rGDRo8Au92gsX\nLjBoyFDuBQRQuUoV1q35EXd3d53EMG3qFIzGT2D5+PewsbFm9coV1K1b943rCwwMZNy48Uxav5tC\nLm4E37/L+H4dSE5MwO/COQIDA19abvjIkXR79wsadehOqlbLnA8GYmVlRZEiRd44FiGEEBnLsGer\nqmqiqqrzVFXt/OzXPFVVc83p+A4ODvx56iT2JOJ3YDtvd+3I94sX/euZ8PBwWrdpS4OuA5m57Shu\nVerSqk1bnVytB2mXu8+YPo07t25w6cJ5OnTokKX6Hjx4gEuJUhRySdt3W7REKSxtbNi5Zgmd3/mQ\naTNns3r16hfKBT4IpFz1OgAYGRtTukoNAu7dy1IsQgghMvbKnq2iKL+oqtrjHxcS/EtGFxHkJK6u\nrqxa8eoFSefPn8fF3YMG7boA0H7gKA5uWUtgYKDOere65OnpSXBgAHeuXqR0RW+unz9NQmwMC34/\ng5WNLcVKlWHp8iUMGjToX+Vq1arFvk2r6f3BOGIiwzm7bye9Z0430LsQQoj8I71h5L+HjdtnRyCG\n5ODgwJPHQSQnJWJmbkFMZDhxMTHY2dkZOrSXKly4MD+uWsWAgQOwtLYhOjKCms3bPd/Kk6LRYGxs\n/EK5FcuW0v6tjoxoWomUZA0fffwxnV4xd6zRaJj4zST27t9PoYIF+Xb6NCpXzjWfr4QQIkfJzAlS\nQ4Bjqqrefq2Kc9GhFqqq0qtPH65cv0UZ7zpcPLaPfr17MXXKZEOHlq74+HiCgoJ48uQJ7dp3oP2g\n0Vha27B92Vy+X7iAbt26vVBGVVXCw8OxtLTEysrqlXUPf2cE565e563BYwi8c4PfVn7Hhb/+onjx\n4vp8S0IIkatl5bjGb4CGgDtwnrQFUsdVVb2UQblck2wBUlNT+fnnn7l79y7e3t60bdvW0CG9lr/+\n+ov5CxaQlKRhYP++tGvX7o3rUlUVGxtb5u48hZ2DIwArJn1C1xaNGDUqe/coCyFEbvLaV+z9TVXV\nr59VYAkMAz4F5gMvjlPmYkZGRvTu3dvQYbyxGjVqsH7tWp3VZ2JqSlJCHDxLtkkJ8XJ5vRBCvKHM\n9GzHAfUBG+AicIK0nm265w3mtp6t+LfJU6awat0GWvYawqO7t/A9vp+LF87j6Oho6NCEECLHysow\n8gUgBdgNHAX+VFU1KRMNSrLNxVRVZe3atew7cIDCBQvx+eefyX5cIYTIwBsn22eF7Ujr3TYAugOh\nqqo2yKCMJFshhBD5yhvP2SqKUpG0BVI+QA0gEDiu8wiFEEKIPCozZyPPAGyBBUC5Z7cBTdBvWNlr\n48aNeHrU0Ps2AAAgAElEQVSVxaWYG2Pe/4Dk5GRDhySEECIPydQw8htVnEuGkY8cOUL3Xr0ZNXUR\n9gULs3bmOJrUqcncObMNHZoQQohcJktztm/YYK5Ith9/8gkPE43pNOQ9AALv3GDp2FH4375l4MiE\nEELkNq9KtpkZRs7TCtjZEf740fPXT4MfYWtra8CIhBBC5DX5vmcbGhpKjZq1KO1dhwJOhTi6YxMb\n1q2lTZs2hg5NCCFELvPaw8iKouzkJbf9/E1V1bcyaDBXJFuAJ0+esHr1amJjY+nYsSPVq1c3dEhC\nCCFyoTdJtj7pVaiq6tEMGsw1yVYIIYTQBVkgJYQQQuhZVg618ASmA+UBi7+/rqpqKZ1GKIQQQuRR\nmVmNvBpYQtr5yE2AtcB6fQYlhBBC5CWZSbaWqqoeJG3I+b6qqhOBN78sVQghhMhnMhxGBpIURTEC\nbiuKMhp4RNp1e0IIIYTIhMxcsVcTuA7YA5OBAsBMVVVPZ1BOFkgJIYTIV7K8GvnZNXuqqqoxmXxe\nkq0QQoh85Y2Pa1QUpYaiKL7AFcBXUZTLiqLIqQ9CCCFEJmVmGPkK8K6qqsefvW4AfK+qauUMyknP\nVgghRL7yxvtsAe3fiRZAVdUTiqKk6DQ6IYQQIhNUVWX//v3cvHmTChUq0LRpU0OHlCmZ6dnOByyB\njaSdldwTSOTZXltVVS+8opz0bIX4jwsXLnDlyhVKlSpFo0aNDB2OELnOB++N5tefN1CxkDmXQxIY\n9M67TJo6zdBhPffGC6QURTmczrdVVVVf+rFCkq0Q/7ZowQKmfj0enxIFORccSadebzPnuwWGDkuI\nXOP27dvUreHNwhYuWJsZE5WYwuh9j7h+6w4uLi6GDg/IwjCyqqpN9BOSEPlHdHQ0Y7/8nJM9a1Gi\ngCVRSRrqrFvLwKHDqFSpkqHDEyJXePLkCc52VlibGQNQwMIEJxtLwsLCckyyfZXMrEZ2VhRlpaIo\nfzx7XV5RlCH6D02IvOPJkyc4WFlSooAlAAXMTfEsaEdwcLCBIxMi96hQoQJPE1I48SAajVblYEAU\nSRhTunRpQ4eWocwc1/gjsBf4+2PDLeADfQUkRF5UvHhxMDVj0/UgVFXleGA4fk+iqFw53UX9Qoh/\nKFCgALv37GNHsBk9t9zmYLg1e/YfxNLS0tChZSgzc7bnVFWtqSjKRVVVvZ997ZKqqlUzKCdztkL8\nw+XLl+nR6S3uPQzCsYAd6zb9TPPmzQ0dlhC5kqqqKMoLU6MGl5WtP3GKojiRthIZRVHqAFE6jk+I\nPK9KlSrcDLhPfHw8lpaWOfI/CiFyi9z285OZnm01YCFQEbgKFAK6qap6JYNy0rMVQoh8ICIigrNn\nz2JtbU3dunUxNjY2dEgGk6WzkRVFMQG8AAW4qaqqJhNlJNkKIUQed/36dZr6NKSotTGR8Ro8K1Zh\n5x97MTMzM3RoBvHaZyMrilJTUZQiAKqqpgDVganAHEVRHPUWqRBCiFzjnSED6eRuysS6jsxpUpiw\nO76sWLHC0GHlOOmtRl4KJAMoitIImAGsJW2+dpn+QxNCiNwhOjqavr16UKxIIbwrlef48eMZF8oj\n7gXco6qzFQDGRgrlHYzwv3PbwFHlPOklW2NVVcOf/b4nsExV1a2qqo4Hcv6mJiHES2m1WoKCgkhK\nSjJ0KHnG2716EHLhMBNq2dHKPoqO7dvi7+9v6LCyhXf1auwLiEVVVWKTtZwO0VCjZi1Dh5XjpJts\nn83VAjQDDv3je5lZxSyEyGHOnz9PyWIuVC3nRWEnR3755RdDh5TrabVa9u4/wDvejhSxMaOemx01\nXKw5dOhQxoXzgGUrf+SeSWEG737IsN0PaNe9L7169TJ0WDlOeklzI3BUUZSnQALw9xV7pZGtP0Lk\nOikpKXRq15bJ1YvSpUwRroRG03nYEGrWrEnJkiUNHV6uZWRkhLmZKWHxKRS1NUNVVZ4maLGxsTF0\naNnC2dmZcxcuExQUhLW1NQ4ODoYOKUd6ZbJVVXWqoigHgaLAvn8sLTYC3suO4IQQuvP48WM0SYl0\nKVMEgMqF7ajm4oSvr2+GyTYxMZEHDx7g7OxMgQIFsiPcXENRFKZOncY3k7+mias59+MAO2c6depk\n6NCyjZGREcWKFTN0GDlausPBqqqefsnXbukvHCGEvhQsWJAETQrXnsZQoaAt4YnJXA2JTDtKMh2n\nT5+mS4f2WBiphMUmMPe77xgydFg2RZ07jPngQ8qULceRw4eoWdSFYcOG5YojBEX2ydQ+2zeqWPbZ\nCpHjbPzpJ8aMfIcark74hkQyYOhwpn4785XPa7Vaihctwpw6brT1KIx/RBytf73M0dNnKVu2bDZG\nnvOoqsq8OXNYNH8uWq2WwcPfYcLEb3LdyUZCt7JyXKMQIhuoqsrly5eJjo6matWq2NnZ6byN3n36\nUKt2bXx9fSlevDjVqlVL9/knT56QnJhIW4/CAHg4WFOzmBPXrl3L98l23dq1LJ09nXXNvTAzNmL4\nqh+wt7fn/Q8/MnRoIgeSnq0QOYBWq6Vn1878efwoDtbmRGoUDhw+avCEptFoKFLQiV/alKdmUXue\nxCfRaPNFdh86QtWq6d5FolfR0dE8fvyY4sWLY2FhYZAYenTsQMvk+/Qom3Yh2v6AJyx5as6BE6cM\nEo/IGV77BCkhRPZZs2YNt8+fYkHzIkxr4ET7YkYMG9jf0GFhamrKmg0/0WuPH+13+1Hv5/OMGPO+\nQRPtj6tX41a0CK0a1KVkMVdOnTJMcrNzcOB+9P/3Kt+LSaSArMQVryDDyELkALdv3aSig4Kpcdrn\n3xou1vx68o6Bo0rTvn17rly/ybVr13Bzc8PLy8tgsdy+fZvPPnyfg92qUcbRhj13Q+nW8S0eBD/G\nxCR7/zv7/KvxNKxTm8cJGsyMFH6585R9h9dnawwi95BkK0QOUKWqN1vXaGmfrMXK1IjD92OpXLnS\nC88lJSWxYcMGQkJC8PHxoV69etkSX9GiRSlatGi2tJUePz8/qrk4UsYxbQ9r61KF0R69Q0hICK6u\nrtkai6enJ2cvXuKnn34iJSWFUz174unpma0xiNxD5myFyAFUVWX0qBGsW7sWO0tzbB2c2HfoCG5u\nbs+fSUpKoknD+mhC7+NmrXDiUQIz5y1g4KBBBow8e/n6+tKyUQOOd69GYWtzLoRE0WXXVR4/Dcu3\nt8yInCVLV+y9YYOSbEW20Gq13L9/HysrK4oUKWLocLIkODiYmJgYSpYsiamp6b++t2nTJmZ8/h7f\n1HNCURTuRSYy8WQY4VHRBorWMKZOmsSCubPxKmTP9SeRLP9xbb46QELkbLL1R+RJISEhtG3RjMcP\nHxKfnEyPnj1ZsnwlRka6Xft39epVrl27RunSpalevbpO6/6n9IZrIyIiKGpt/Hwfp6utGbHx8aSm\npur8/eZkX02YQLeePQkMDKRcuXLZPnwsxJvIPz+hIk96d9hQ6lsk4de/NlcH1OPCgT9Ys2aNTttY\nsngxzRrUY+OUL+nYshnfTBiv0/ozq3Hjxpx9FMflx3FEJ6Ww6koETRo1zFeJ9m9eXl40b95cEq3I\nNfLfT6nIUy5fvkS/cs4oioKtmQmdSthz6fxfOqs/IiKCLz77lP1dqrK2eRmOdavG9wu+49at7D+1\ntFy5cmz4eTM/+qcyam8QRu7ebNy8NVvavnnzJr26d6VF44bMnTOb1NTUbGn3b+Hh4fTu1gV3lyLU\nq1GN8+fPZ2v7QmSVDCOLXM3Dw4P990PwcrRBo03l8OMYOnbX3UEQISEhFLK1wr1A2uXYBa3MKF2w\nAI8ePcLOzo4//vgDU1NTOnTokC0H9Ldu3Rr/+w/13s4/PXz4kIZ169C2hDm1bE1YMWcaT0JCmD5z\nVrbF0KNzR0rEPOLXVmU4GxxJ2xbNuXTNL0eskBYiM6Rnm46UlBRDhyAysGjZCn64GU6TbZep/fNf\nmJUoy4gRI3RWv7u7O/Fa2HUnBIBTjyK49TQKU1NTvCtWYNfcSfw09StqVK5EaGioztrNSbZt24Z3\nYVO6lHOgdjFbPqrpyNKlP2RYTqvVsmnTJmbOnMnRo0df+kxSUhJDB/bHxtISpwJ2zJo544VnYmJi\nOHX6LLMbelLK3ope5Vyo7WLPsWPHsvzehMgukmxf4vjx47gVL4G5uTlly1fg6tWrhg5JvMIvmzYS\nFhHJvfBoTKztWLJi1QureLPCwsKC7bt28/m5IIovO0a//TdZ//Nm5s6YxvuVCrO6uRebWpejeSFT\nZkydorN2cxJFUVD5/+JKVVUzPGw/NTWVrh07MPmTdzm1Zg69u3Rg7pzZLzw37svPCTx1iKsD6nKg\nSxWWz531woX25ubmqKg8TUhOq1tVeRyb+Py+WFVVmTt7Fm5FC1O0kBPjxn752sPcwcHB3Lt3L9uH\nx0X+Icn2P548eULnLl3p/elk1p4NwKf7INq2a09ycrKhQxP/sW/fPlYumMfF/vW4O6QBXYuaM+jt\n3jpvp3bt2jwIfoz/g0BCwsJp1aoVj4OCqFzQ9vkzVZysCAl6pPO2syo6OpqBffvg6V6cRnVrc+nS\npdeuo2vXrlx6omGzXzinAqOZfS6CUe+OTrfM0aNHuXLuTyY3KMTgKo5MblCYr8aOJSkp6V/PHdjz\nB19Ud8PR0gwPe2veKe/MgT2//+sZMzMzxo4dS/vffJlz9i599vhhXsSNFi1aALB+3ToWzJzKp97W\nTKhtz7Y1S5kzK3ND3Fqtlv69e1GhTGnqelehUd3aREREvMafjhCZI8n2P65cuYJrKU+8GzTFyMiI\nxp16oVXh/v37hg5N/Mdff/1Fe3cHiliboygKwyq5cuHy5QzLJSQksHz5cmbMmMGZM2cy1ZaiKDg5\nOT1f+du4RQvmXw4iJjmF0LgklvqF4tO8ZZbejz706t6Vh2cPMKa8CVXVQFo0bUxQUNBr1eHi4sKf\nZ89hVKEpvpblGPPVJCZNmZpumfDwcIraWWBilNYDLmhlgqmxMTExMc+fOXfuHEkaLdtvPebvPfnX\nIxMpWPjFvdLjv57IjCXLia/TgVYjP2H/kWPPD7H4bdsWOntY4W5vgaudGT3L2PDb9i2Zem/ff7+Y\ngDPH8BtQj+sD6uClCeOj99L/ICHEm5AFUv/h7OxM8IMA4mNjsLKxJTw0mKiIcAoWLGjo0MR/uLu7\nsyM0nmRtKmbGRhwNDKd4BltBEhIS8KlXB4f4cLwKmNPx2+nMXbyEPn36vFbbEydP5Z2gIDyWb8bI\nSOH9MWMYNnx4Vt6OziUkJHDw8BE2dvHAxEihhL05l8NVjhw58trv18PDgzUbNmb6+Tp16nA9NJaz\nj4ypUMiKXXeiKVnSHScnJwAWL1zApAnjqFzYko03IjgYGE7pQvb4xamc+vjjl9bZuXNnOnfu/MLX\n7R2dCAnQPn/9OE6DvZNjpuK8ePYM3Uo5YGVqDEBfL2c+uqC71exC/E2S7X9UrFiRnt27883At/Cq\nWhPf08f4+usJOMhtHjlOz5492fbzRupv/pMS9tZcDonmtz/2pFvm559/xi4ujF/aVkBRFDp7FKLv\nB2NeO/mYmZmxet0GVvy4FkVRcuReV1NTUxRFISZJi4OlCaqqEpmYgpWVld7bdnV1Zceu3xkyoC+P\nzj6gundVdv+yBUVRiI2N5bPPPmN+C1ecbcxIqGzP6L2BdOnSnzUff4y9vf1rtfXluPHUqbmDyKQw\nTI3gxKNEDhx5caHVy3h4leXQ+eP0r6BibKRw4EE4HqXlfGOhe3Jc40uoqsrBgwfx9/encuXK1K1b\n19AhiVdITU3l1KlTREREUKtWLZydndN9ft68edzYsJhZDUsDEJWkwWvVCeITk9Itl1tNnDCetUsX\n0cTVjDvRqcRaF+HkmXMGuwMW4MGDB9SoUoEVbYo9/9qUMxFMXLiKtm3bvlGdDx8+5KeffkKr1dKt\nW7dMXwiQkJBAm+ZNCQ24g625GeGpxhw8foLixYu/URxCyNnIQgCXL1+mhU9DfmxRlrKONnx9JoDY\nYuXYvuv3jAvnQqqqsnnzZo4fOYxr8RKMHj36+SpeQ0lJSaF0yRK0c0mlRakC+IbEM+98BFev38TF\nxcUg8Zw7d46kpCRq1qyJtbV1tscg8g5JtkI8s3PnTj58dyRhEZE0b9aU5T+ufe2hy/xGVVWmTPqG\nlUuXYmykMPrDj/ngo48y3AL0KtevX6drxw7c8g+goKMDGzb9TLNmzd44tjt37pCYmEjZsmV1uvVL\niNclyVaIXERVVdavX8+lC+cpXcaLYcOGZepy9NjYWM6dSxsmrlWrFsbGxjqJZ+F337Fy1lSWNS2D\nJjWVwQduMnbGHAYMHJilepOTk7N0NZ5Go6F7546cOnEcCzMT7As6s//w0QynE4TQF0m2Itc7deoU\nI4cM4lFwMHVq1WLlug159j/VUcOHcWbPb7xVvACHgmOxKFGGHb/vSXeu9d69ezRr2IDCZiqRCcm4\nlPZi974DOpmfbdGwPsOdEmlVshAAW24E87tJMbbs3J3lurNi3ry5bJg/jS/rOGFipLD2agSJrpXp\nP2gozs7O+Pj4vHHvW4g38apkm/OWUIpcTVVVzp07x++//05ISIjO6n348CGd2rXh09JWnO5RnVKR\n9+jSPvOLae7evUsLn4a4OReiZeNGBAQE6Cw2XXv8+DEbf9rAjvaV+LBmKZY08eT86VOUcC3K5XT2\nEb8/8h36utuy961KnOpeDcvQ+3w3f75OYirg4MCD6ITnr+/FJGJnb/gV+r4XL1CzsAmmxkYoioKj\nGRw6dJjvx3/AoJ6d6dOjG/KhX+QEkmyFzqiqyqC+b9OzXSvmfjSSSmXLcOLECZ3UffLkSeq4OvJW\naWcKW5szqZ4Hl69eIyoqKsOyCQkJtGrSmEbqU35vV54GqU9o1aQxiYmJOontb3FxcaxevZqFCxdy\n8+bNLNVjY2GOrZkxk0/dova6k4CKJj6Wjm3bvLLcXX9/mhdPS4DGRgpNXGzxv3XjjeP4p3GTpjDj\nwiPGHr/Np8dusdQvlC/GT9BJ3S+jqiparTbD5ypUrsL5JylotCqqqrLx6lO+9nHl4xr2zG5cmL9O\nHOb33/Pm4jeRu0iyFTqzc+dOLhw9wJ89q7OtTTkWNfLQ2fGJ9vb2PIiKR5ua1ksJik0kVVUztWfU\nz88Pc20S71cvQYkClnxQvQQmmkRu3NBNIoK0YxFrVavK0smfs2fJVOrWqsGRI0feqC53d3ccCzsz\neI8vG68/Ykn7kqzqVJoeFZ0IfRJCXFzcS8tVrVaNdTdCSFVVYpNT2BoQgXeNWll4V/+ou2pVTp37\niyKdBlGi61DOXrxEmTJlMl0+LCyMo0ePZurPfPrUqdhaWWFpYU6Pzh1f+X4B3hvzPoW8vBm9P4gP\nD4USn5yKl5MlAKbGRpR2MOfhw+y9JUmIl5FDLYTO3Lt3j9rOdliapC3KaVzciXu7r2Tq4PqMNG/e\nnCJlytNp11VqFLRke0A4kyZNytTKU1tbW8LiEonXaLEyNSZeoyUsLkGnW2CWLl1KIW0kH9VJO2ms\nSkETPhw9iotX/V76fGJiIgsXLiTA/zZ16jWgX79+z/+MjI2N2XPwMA3q1qGSsxV25mk/pk3cC7D8\nfMgrP2DMX7yEDq1bUm7NaRI1Gjp36cI7OrwBydPTk3Hjxr12uePHj9P5rfa42lkQFBnHgCFDmT33\n/8PboaGhbNiwgcTERCwsLFi9cC6netfEycKMMUcv8sn7Y1iyYuVL6zYzM2PXnn34+fmRlJTEkAH9\n+O1WGJ287AmK0XA+KJapNWu+8XsWQlck2QqdqVatGrMnhfFRTDFcbS1YffUR3hXL62SBirGxMTv3\n7GP9+vU8fPiQpXXqPD+IPiOenp60bNOWjrsO09LVlr2PYmjboQMeHh5ZjutvoSEhFPvH9swSBcwJ\nux1GWFgYH7w7kksXLlDKw4PvlizF1dWVFk180Ib4U97emGm/beHCubPMX7joefmiRYsyb8FCRg/s\nTYImFUtTI849iqW4q8sr/zydnJw4ceYcgYGBWFhYGHzxmEajYfasWUyfMokPaxWiuosNsckF+Hzt\najp07IyPjw9BQUHUrVGNhoUssTc1Yu3VQDztLamx5gRGCnTzKsrhQwfSbUdRFCpUqADAtt920aFN\nKzb/eg8VWLBoEdWqVcuGdytE+mQ1stCpObNmMvHrr7GxMKOAvQO79h2gdOnShg6LhIQEZs2axYMH\n92nYsBH9+vXT6RGL+/btY0CvbnxVtyBOliYsvRxBybqtuOHnR2U1ir5lnTnwIIL192JY8MNSPhzW\nn5k+hTBSFGKTtQzddZ+QJ0+xtf3/TUKqqvLO0MFs37KZQjZmPElIZc/+g1SvXl1ncetTjy6d8T9/\njEuB4Wzr6fX8Q8Lii5F0/3gyQ4cO5fNPPyH28HamN0w78anW2hNYm8HnDV1JSknl68MPMXUswk3/\nzC9oU1WVqKgobGxsMrVdSghdetVqZPmXKHTq408/Y/iIkURGRuLi4qKzfZ5ZERgYSMvGjVAS44lM\nSCQuMpK+ffvqtI2WLVsyYeoMxo/9krj4RN5q35bPvvyKpvXq8MeAOhgpClUK27H3kS+XL1+mgIUp\nRs+Sj6WJEaYmxiQmJv4r2SqKwrKVq3n/o08ICwujcuXK2X74RlxcHEePHkVVVXx8fDI99P748WP2\n7tvLynZuvB8Ry9H70TR2L0BYvIbLIXFMqlQJgMjwMDxt/7nPVqV7xYJYmBhhYWJEey977ti93giE\noihySInIcSTZ5gNJSUncvHkTOzs73N3d9d6era3tv5KGoY0aOpiuRc34rKYXiSlauuz+k5UrVzJc\nx7f0jBw5ipEjRz1/HRISQqJGQ0KKFmtTE7SpKlGJyVSvXp2F81PYfTuSioUs2RMQS+VKlV55s9Tf\nQ6TZLTQ0lAZ1amGljUdRIFax5MTps5kantZqtRgbKRgrCp/Wd2HqsYesvfSEhFQjJn7zDbVr1wag\nXcfOvD/kV2q7OGBvbkqUJpXbYUmUL5Q2L+0flUL5epX0+j6zi1arZea309mxdQsOjo5Mnj6TGjVq\nGDoskU1kGDmPCwgIoFWTxhgnJxAWl0Dnbt34YcWqfLXR37N4MTY1KYmnY9qk6sLz9wit3Iz5Cxdx\n8eJFIiIi8Pb2xtHx39eyxcXFceHCBaysrPD29n6jYefB/fty4/hBupZ04FBQDImFS7Dv8FHu3LnD\nu+8M5f69+9SqXZuFS5a+0H52SE5O5tSpUyQnJ1O3bt1/fUgaMWwoj0/tZHCVtLh+vBKOY83WrFi9\nJsN6VVWlWeNGKME3aeJmwYXHiVyIMuXYqT9fOOT/hyVL+HbKJBKTkmjZug1//PE75ZzMSUxJJRxL\n/jx7Pk9ccfnlZ5+y86dV9CpjzeNYDT/djOX0ufOvtapb5HxyglQ+1bxRAxoRxgfVSxCbnEKHnb58\n8u18evfWzZac3KBDqxZUiXvAZzVLPuvZXqPvZxM4dfQIRw/sxc3eBv+IeHbv2/98MU1AQADNfRri\naJxKRHwSXlW82b7r99c+WlCr1bJs6VIunT+Hh1dZxox5P8MTnf7erxsaGkrTpk1p3Ljxm771dMXG\nxtLMpyGRwQ+wNDUmMtWUYyf/nwxbNfWhRoo/tYulJeBzj2L5kxIcOJq5vdOxsbF88enHnD93ltKe\nZZg17zuKFHnxYvj/evz4Mfv27cPMzIx27drlqFGSrChSyIlv6thT9Nmw+cpLYdTp9yFffvmlgSMT\nuiRztvmU3/XrLOhYEQAbMxNaudrie+VKvkq2i5evpFUTH7b9cpHIhER8mjbHwcGBqycPc7ZXDSxN\njNl8I4jBfftwyS9tH+h77wyjqlUqBcyMcXa257S/H99//z0ffPDBa7VtbGzMyFGjMn7wmYSEBHzq\n1qaoJppyBczou3gBE2fMYuiwYa/VbmZ8O2M6VtEPGetTCEVR+Nkvgo/GjGbLr78BUK+hD7vXXKNq\nEWsUBQ48SKR5n0aZrt/GxoZFS5a+dlxFihShf//+r10upzM1MSEpJfX56+RU5NKEfEQOtcjjynh6\nstP/CQDxGi0HgmMpb6A5QEMpXrw4l/xusGHXHo6cPsdPm7cQEBBAgyK2z/cEN3cvxN37D56XOffX\nX5x/HI6xaTJnQ8K4FRqOn+8Vvce6ZcsW7BMjWd+qHOPqlmZL2wqM/fxTvbR19/YtKjmaPJ9SqFzI\nnLv+d55//8uvxlGqpg8DfrtH/x33KFa1PuO/nvivOlRVZfGihVQqU5pKZUrz/eLFOo3x6tWrTJ06\nlXnz5vH06VOd1p3dPvnsc+b8FcGBu5Fs8A3j4lMtffr0MXRYIptIss3jlq9Zx9LbUTTaeplqP52l\nXL0m9O7dm1kzZ+DuWhR3lyJMmzI5z58fa25ujre3N2XKlEFRFKpUqcKeBxE8jU8GYL1fEJUrlAfS\nhn7Do2KY2MSNDl6OjKlTBFsLIxQT/fdCoqOjKW5j/jwBlihgSXRsvF7+fmrWqcuxoGQSU1LRpqoc\nfJBAzdp1nn/fzMyMTZu3EhwSSnBIKJu3//rCMPqaH3/kuykTmeddkHneBZk/eQJr12Q8p5sZR48e\npVH9uvz103fsXjId78oVdXrednZ7/8OPmLFgCU/c6uFYrxOn/zpvkPt7hWHInG0+EB8fz9WrV7Gz\ns8PLy4tVK1cyZ8KXrGxWBiNFYdjBW4wYO4FR7442dKjZasJXY5k/bx4O1pZY2trx+4FDlCpViuTk\nZKytrNjY1QMz47TPo9NOBjPim/kMzOKVchm5ceMGDWvXYkkTT8oXtGHqufsklqjI1t926bwtrVbL\n4AH92Lp1G6YmRlSt6s2O3X9gZ2eX6To6tGxGd7MwOnmmzcVuv/WYbdpC7NizP8vx1atZjQbmITQo\nnhbPsothVO0ylClTp2W5biH0RW79ycesrKyoVasWZcuWRVEUdm7dzGferlQoaEs5JxvGVi/Gzi2b\nDS5teHIAACAASURBVB1mtps0dRp3HwRy8M+zXL11h1KlSgFpPbr2bVqx6EI4dyMS2ecfiX9M2l5a\nfStbtiwbt25j0o0Ymu+4Cl41WL3+J720ZWxszJr/tXff8VGVWQPHf8+k9x5aEgg1CU0iIBDpIAgi\nVQGRquIqKojL6iuygO6Kq74WcEVUUKRJL0oLvffQuwFCCoSQXieZmfv+kbyUXZCWy0zC+f4DuXPv\nfU7yyeTMfcp5Zs8lLiGRk2fPsXHr9ntKtACubu5cKekdAEjOM+LmXjoTmjIyMqjofv1JuoKLIi31\nzl3JhYWFnDx5kqSkpFKJQ4jSIBOkHkFePj5cTIi79nVcVgFevlWtGJH1+Pv733JZyexfFzB65JtM\n27yJihVDmTt/Ilu2bMHNzY3OnTs/0Ibnd9KhQweOnDqj2/3/k5+f331f++648XRq14bkkoT786kU\nojeVzoeDZ7r3ZM7Cmbz2mB2ZBSZWX8hn+j97/uk158+fp3O7tpjzc0jLzWfwkKF8MXnKI7XUTdgm\n6UZ+BJ08eZI2US3oGeqDQSkWxaayYes26tcvH8UD/tPKlSuJiYkhNDSU/v3733NVqwMHDtClYwea\nV/bmcq4Rg38lNmzdjouLi04RX6dpGlu2bOHSpUs0btyYWrVq6d7mrRiNRsaP+4BtmzdSJTiYf33+\nJaGhoQAcP36cX2b+jFKKQYOHEBERUSptFhUVMXrUW8z/9VdcnJ0YN+GjO87KbhvVnLZ2GYx6vCoZ\nBUV0WXGED6dMo1evXqUSkxB3IutsxU3Onz/P3Llz0TSN/v37l2pRfr3l5OSwbt06zGYzHTp0+NPS\nfOM/GMuvP07jmare7LiSS3DDpixYuuyennSebNqYgT5G+kdUQdM0Xlx7gnbDR/P222+XxrdzW5qm\nMXjAC2zdsIZq3s4cuZTN9Jmz6Nnzz5/u9PBC3+c4t3cTXUNdOZNWyIZLZo6eOPVAT8V6CPDxYudz\nkVRwcwLgo11/4NppABMnTrRyZOJRIetsxU1CQ0MZO3astcO4ZykpKbR8oimV7U04GBRjRpnYumsP\nwcHB/3VuZmYmX3zxBYcGNiPA1QmjyUKLhTvYs2cPzZo1u8Xdb+1SUhJNIoo/jCileNzPhcT4+FL7\nnm5nw4YNbN+whs9aB+Jkb+BsqhPDhgyiR4+sh9otajQaWbRkKXN61sDJ3kDDim7E5qSzfv16+vbt\n+9DiuBs1q1dn1bkUhtYPIt9kZvOlHN6uU8faYZV5mqZx8OBBUlNTiYyMtLkPWWWBTJASZcqH48fR\nztfAsq51Wfh0BP1C3Hl/zDu3PDcrKws3Jwf8XYrHV53sDQR5uZKRkXHHdlJTU9m9ezeJiYk0j4ri\n68MJmCwWLucamfdHGlEtW5bq93UrCQkJVPV0wMm++G1a09eZ3Lx8CgoKdG/7RsWJXVFkud5TVWTW\nbGKTif80fdYcPj92hQ7LjtJ43j7CW7ShX79+D3zfs2fPsnfvXnJyckohyrLFYrEwaEB/unVsy7vD\nBxJWqwb79u2zdlhljiRbUaYkxsXRtML12a5NK3qSePHiLc+tUqUKgRUr8dm+C6TkGVlw6hKnUnPu\nWPx9zZo1hNWozoh+vWgQXocGjR7nsk9VKk/dRMOZOxj4+psPpSs3Pz+fPXGpXMw0omkaK06nUy04\n6KGMFd/I0dGRV14exse7U9lyIZMfD6eSoZzp1KnTQ43jbkRERHD8zB98MWs+qzZtY+bceQ+0laKm\nafzllZdp3rgRA3t2pU7N6hw/fvyB45z1yy+E1QilWlBlxv7Pu5jN5ge+p16WLFnC3s3RfNW+IhOa\n+zA0wo3BAx6dCnSlRbqRRZnSok0bpk/9mo7V/LE3KKYdv0yLXrfeLs9gMLAyej1DB/Rn6q8HqBYS\nzMro9X9a1N5oNPJiv77MeSqM5lV8iM/Kp90nH7N1zz6Cg4NxdHR8aHukLpj1M4PrVuG9dRexaBpe\nTva069pal7auXr3K22+9wYljR4moV58vvp5CQEDAtden/HsqU2qHsXXTBkIjqzJ9/ASbrVns6enJ\nk08+WSr3WrZsGet/W8I3T1XG1cGO6NhMBvbvS8yRY/d9zzVr1jBm5AhGNfbF3dGZ72b9iJOTM3+f\nYJvjyufOnSPC93oPS2QlN77ZH3eHq8R/kmQrrMZkMrFlyxZycnKIioq6q51d3h79V86eOk2NH2YD\nGr16dGfcn/yRCg4OZv3WuyucD8Xb4jkaoHkVn+LrPV1oWMmHM2fOUOchj/0ZjUaerl6Bj1uFk1Nk\nYt6JRM44O5V6O0VFRXRo04pqWirPV3Zm1+FNdGzbmn0HD1+r3WswGBj59tuM1HlSmK05deoUDf3t\ncXUo7jJvHuTOz6tjH+ieSxctpFt112vbCA6O8GD2gl9tNtk2atSIKZ8Z6VlgwtvZnnXnsmlQ79Eq\n+VoapBtZWIXRaKRDm1a8NvB5Jo1+lbphtTl69Ogdr7Ozs2Pa9BlcuHiR1157jUKjkclff4nJZCqV\nuCpUqECRptiekAbA+cw8DiWlPfRECzDwpVd4d+d5tiemsT0hnS8OX+KFwUNLvZ0TJ06QfuUSQxv4\nEB7gytAGPqRfucSJEydKva2yJiIigkNXi8gtLO7m3R6fTVjtB1t+5eHlRWrB9Q0JruYV2WwvAUDH\njh15+fW3eH1NPK+uSWJrmiNz5j96RXAelDzZCquYNm0aBYln+FerAOwMiujYDF57ZRjbd9954kVB\nQQGdO7SjQtFV6vrZ8+u/d3H44EFmz5v/wHE5OTkxb+Ei+vfpTUUPFxLSs5n06WdW2XP0L6+9jsVi\n4ePpP+Lo5Mj3M2fRrl27Um/HwcGBQpMZswb2CswaFJrMuhbuKCueffZZNqzrz+szZ+Ln7kyhwZHo\nDQse6J5vjRxF019mUnjwKm52sD6+gAVLfimliPUxfuKHvDlyFBkZGYSEhDy0oZTyRNbZCqsYPWok\nqZvn0TuieAlBYlYhn8TkEpd0+Y7Xrlu3jpFD+zOppT9KKYwmC0N+u0B84qVS24A9MzOT2NhYgoKC\nCAwMLJV72iqLxcLTT3Ug69wRmgY6sO+KCY/q9Vkdvf6BJheVJ3FxcaSlpREWFlYqE9QSExOZMWMG\nBfl59O7z3LV9lEXZJ7WRbdyBAweYNm0aq1evLvc78AA0axHFjstFZBlNWDSN1eezafrEE3d1rdls\nxtHOcG2tqZ1BYacMpTqj08vLi8jIyDKVaAsLCzl//jy5ubnXjh05coR2LaMIrxnK8JeG3nLpisFg\nYMXK1TwzbBRXglvQddhIVqxcLYn2BlWrVqVRo0alNhO8SpUqjBs3jn9+PEkS7SNCnmxtwLTvpjLh\n/fd4qloA+5OzaNq2IzNmzS7X9Vw1TeO9v41h8uTJONjbUa9eXX5btfauFstnZ2fTsG4ETbwLqe/v\nxLq4PBxD6hK9cXO5/pn9md27d9P9ma4YLCZyjEV8N+172rVvT4O64Txfy4Vavs4s/yMXzzqNWf77\nKmuHK0S5JeUabZTRaMTP24tt/ZpQw9uNfJOZqAUxzFyygqioKGuHp7vc3Fzy8/Px8/O7p0SZmJjI\n8GFD2LFtKwalKLLAx598wpsjR+kYrW0qKioiuHIlXgp35okgD+IyjIzffoWx4yeyYtpnvNO4eGZ1\nkdlC/yWxZOfk4uRU+rOahRDSjWyzMjMzcbK3o4a3GwAu9nbU8fcs05tk3ws3N7dr++0ajca7vq5y\n5cqcPnmKz1vX4cLwNux64QkmTRxPTEyMjtHapsuXL2PMz+WJoOIZrVW9najp78qVK1fILjRfG5bI\nKbRgMBhkcosQViDJ1soCAgIIDKzA1EMXMVs0tieksTcx9Y5VjsoDTdMY8epwIutF0OepdtSvU5tz\n587d1bV5eXkkXL7Mc3UqARDi6ULLYH+OHDmiZ8g2KS4ujtwCI+fSiss4ZhSYOJmUTp8+fTC5+jF5\nfyq/nU7jw51X+duYMTZZZlGI8k4+4lqZUooVa9byfM/ujP1mPYG+Psyev5CQkBBrh6a7hQsXsmPV\ncg692AxPJ3smx8Tx0sABbNqx647Xurq64u3pwdaENFoH+5FRUMS+S+m8UYZ2LyotR48epV5FT8Zv\njifU24mLmUaMJguRkZFs372Xr7/6isT4i/zjr+1LpU6wEOLeSbK1AbVq1eLgsRMUFRVdq9jzKDh2\n7Bidqnjg6VT8a/h0NT8mrziGpmnMnj2bfbt3UbV6DUaMGIGzs/NN1yqlmDN/If169yI8wIuzVzMZ\nOGwYLR/CBgG2JiQkhGyzgU86hJCcU0R6gYkF54qws7PD09OTcX//u7VDFOKRJxOkhNXMnTuXL/9n\nNN+2rsnLa45wJi0XC4o2bVpz5fQxnqvuw/bkXAoCQojetOWWY43JyckcPXqUSpUqUbfuo1lCTtM0\nBr7Qj20bognycuZEcjbDXnmV9JRkKgdX5a9jxvzpnr9CiNIjs5HLibVr1/LBmHfIzMqiW/ceTPrs\n8zJb6cdisTBs4IssW7KIkZFVGd2kOmfTc2n/6x7mPtuIlkG+mC0arRYfYsrs+bRp08baIdssTdPY\nsWMHycnJbFy/jnXL5tM+yIlTqQXsT8qjdZs2/PDTTCpVqmTtUIWNWbduHRvWryOwQkWGDx+Ou7u7\ntUMq0yTZlgMxMTF0bteGb1rXJMTThbG7L1C347NM/naqtUO7b0VFRTg7OXH1zY7YGYp/P19adZgW\nQT681KB43LrHqhP89cvv6NKly13f12KxMGbMGI4fP06LFi34+yPSlWoymXB3c+X7rlXxdrZH0zTG\nbYzH3dmeLJcKxBw5hoODwyO7Hlnc7LvvpjLx/XdpF+REfB5kOPqxa98B3NzcrB1amSVLf8qBFStW\n8GKdQDpXDyTC34MvWtZgyaJF1g7rgTg4OBDo68P+y8UbuhtNFg6nZLHmfAonU3OYdjies5kFNG/e\n/K7vabFYiKhdgwU/foPThb1M+ddHPNn87qpT3S9N0/h+2jSaRzakVdPGLF26VNf2bsdsNmOxWHB1\nKH5rK6Vwd7KjWWU3Ui4l4O3pgYuzEy8PHUJRUZFVYhS2Y+x77zK2uT996/nzThM/XArSWFTG/6bY\nKkm2ZYi7uzvJ+dd3t0nONeLq+nA3EtfDDz/PpN+q4zy3PIbW83YS7ueOh4MdnRfuZUWOC+s2b8XH\nx+eu77dkyRIuJcTzRadqvBRZgS87V2Pvvn2cPHlSt+9h+o8/8vn493kv1JERlSyMeGkIa9eu1a29\n23FycqJbl6f5at9VzqTm8/vpNE6l5BEe4EJOvpEJrSoyo1s1Dm38nQnjPnjo8QnboWkaOXn5+LsW\nz4VQSuHnbLhlSU/x4CTZliFDhgxhV3oRIzed5ot95xiy7hTj/znJ2mE9sGeeeYbte/dxusiBGr6e\nvNIgBA9XV8Lq1mPzrj2EhYXd0/0SEhLwc72+2bWnkz1uDnZcvHhRj/ABmD39ByY1q0bbED+erh7I\n3xoFMXfmT7q192dmzZvPY5368OmeVJaezqBNNS8+351MNW8n6vi54O5oR69abqxbu9oq8QnboJSi\nW5enmXYoncs5hexJyGZ3Yi4dOnSwdmjlkiTbMsTf3589MYcI7TWE7CZdmLN4GQMGDLB2WKUiPDyc\n/QcPE9isHZ/EmXGMbM3q9RvvqwBD9+7dScouZNP5DHIKzSw/lUqhBV2XBTk5O5NZeL3XIavQhKOV\nSiK6urryzdRpJKak8fdJn1OpdR+C6jahpr/7tbHaCxmFBFaoaJX4xK1pmsayZcuYNGkSy5cvfygb\nkvw8ey5Vn+jIh3uzWZnqwZLlv1ll7+ZHgUyQEuXSvHnz+MtLQ8kzFuLp5sKCpSto3769bu1FR0cz\n8Pk+jGpYmTyThe+OJ7N+y1YaNmyoW5v34vLlyzRr8jhBTiac7RVHrhjZtG37I7tcyha9/upwopcv\noqG/PYevmujc8zm+mTrtlufm5+djZ2dXZlcilGcyG1kInW3bto3ZP8/A3sGBv4x4k/r161s7pJuk\np6ezdOlSTCYTXbt2pUqVKtYOSZSIjY2lSaMGfNupCq4OduQVmXl9bSIHDh8jNDT02nl5eXkM6Psc\nq9ZGA/DG66/z+ZdfyexyG3K7ZCsVpIQoJS1btrTpClY+Pj4MGzbM2mGIW0hPT8ff3QVXh+JhE1cH\nO/zcXUhLS7sp2f7tnbdJO7mXuT2rk2/S+Mf8WYTXrcfLr7xirdDFXZIxWyGEsLLw8HDyNDvWxmaS\nU2hmzR+ZFGBPeHj4Tedt37qFZ6q74WBnwNPJjvbBTmzbsslKUYt7IclWCCGszM3NjegNm9hd4Mfw\nVfHsMfoRvWETrq6uN51XJSiY02nFW1FqmsbZTDNBIVWtEbK4RzJmW0q+/fbfjP9gLLl5+fR49ll+\n/Hnmf71RhCgt/z9z9dChQ9SsWZMBAwZgMNz82TktLY2ioiICAwNlTK+cOH36NK2fbEFNbwdyC80U\nufiwffdeqX1tQ2SClI7Wrl3L0P7P8X5zf3xd7Jl6MJ2w1l35YcbP1g7NZhmNRubMmUNKSgqtW7em\nWbNm1g6pTHln1FssmzeLxgH2HM+wUK9Za+YtXIxSCrPZzCtDB7No0WIc7Aw0ioxkyW8r8fT0tHbY\nohSkpKSwceNGHB0d6dSpk3yotzGSbHX0zui3Sdkwhz51/QBIyDLy6aECLiQkWTky22Q0GmnXMgqX\njMuEezmxKPYqn341hYGDBlk7tDLhypUr1AytyndPB+PuaEeh2cLI9Zf5ff1mGjVqxNdffcXCyZ+y\n4OkInO0NvLn5LB5N2vLdjzOsHboQ5Z7URtZRQGAFEvKuf7CIzyzEz9fXihHZtsWLF2OXeonFXery\nzydrsfDpuvx11Ehrh1VmZGVl4e7siLtj8cxVRzsD/u7OZGZmAnBg90761/TF3dEee4OBIWGBHNi7\nx5ohC/HIk2RbCl577TUu4ckne1KZdiiNaUcy+fKbb60dls1KT0+nppfztXHEWr5upGdn37Jizs6d\nO5kxYwY7d+582GHarGrVquHp48eik+mk5ZuIjs3gSr6ZRo0aFb9eszZbkq7/PLckZlAttIY1Qxbi\nkSfdyKUkOzubhQsXkpOTQ6dOnaTk2Z84fvw4bVo046eOYdTz9+CjvRdI9q/B79Hrbjpv4vhxTPtm\nMvUCXTl2JY9X33iL8RM/slLUtiUuLo4hL77A0WPHqB5ajRm/zKFevXoA5OTk0KF1S4wpl3BztOdS\nIXz/00wA6tWrR4UKFawYuRDlm4zZCpuycuVKRr3+F1LS0mjbqjXTZ83G94au9/j4eOqH12HyU1Xw\ndrYno8DEW9GJHDlxipCQECtGXjYUFhayY8cOCgsLiV69kjkzf6aWvxenUrKYt2ixFJsXQidSQUrY\nlK5du9I1Lv62rycnJ1PByw1v5+JfUW9neyp4uZGcnCzJ9i44OjrStm1bdu3axeI5s9ndrzG+zo5s\ni09jQN/nuXw1VZYDCfEQyZitsEm1a9cmvcDMrvjiscfdCdmkF5jKfPf8yZMnad38CUIqBtKtU0eS\nkvSdsR4bG0uTSt74OhcXrG8Z7EtObi7Z2dm6tivKp8LCQo4fP05cXJy1QylzJNkKm+Tp6clvq1Yz\n55yZPgvPMjvWzG+r1pTptaKZmZk81bY1z7rm8HuXMMKy4ujSsT1ms1m3NuvXr8/2hFTiMvMBWHLm\nMoH+fnh4eOjWpiifLl68SIOIMLq0a0mj+hEMGzwQi8Vi7bDKDBmzFTavoKAAZ2dna4fxwDZs2MC4\n4YNZ82zxRCZN04iYtYft+w/eVGy+tH0zeTJj/+c9/NxdKMSOFavXEBkZqVt7onzq1L4NAakneT7C\nl/wiCxN3XuW9SV8ySNbH30TW2YoyqzwkWgAPDw+u5ORTaC5+Gsg0msgpMOLu7q5ru2+89RZxiUms\n2baL2IvxkmjFfTlx4gQtg4t/V10cDDT2t+PI4cNWjqrskAlSQjwkjRs3pl7jpvReeZhWFd1YEZfJ\n0KHDCAgI0L1tb29vqZ8rHkjt2nXYk/QHPeo4Umi2cCjNzMiICGuHVWZIN7IQD5HJZOKnn37ij7Nn\naBT5OA0bNuTkyZPUrFmTBg0aWDs8IW4rNjaW9m1a4WQxkplfSKs27Zm3cBF2dnbWDs2myDpbIWzM\nd1OnMva9MYQFenD2ag6jRo/h/XF/t3ZYQtxWXl4eR48exd3dnYiICFk+dguSbIWwIWlpaVQLDuKz\ndpWo5OFIer6JtzdcYt/Bw9SoIaUVhSirZIKUEDbk0qVL+Lo7U8mjeP2rj4s9wT5uxMffXOjDZDIx\n6o0R+Ht7Usnfl//97DNrhCuEeEAyQUoIKwgNDSXPpLE/KYfGld05mZJHfEYe4eHhN533j4kTiFm1\nlG19IskpMjHgfz+hclAQ/fv3t1LkQoj7Id3IQljJjh076N39WQqNBWCwY+78BXTu3Pmmc5o91oAJ\ntV1oUcUHgJnHEjgQUJ+f586zRshCiDuQ2shlWExMDDt37qRixYr07NlTZv+VE1FRUSQmXyElJQV/\nf3/s7f/77ejj58vZ9KvXku3ZzAJ8I/RfKqSXEydOMOuXmSilGDR4CGFhYdYOSYiHQp5sbdysX35h\nzMg3eaZ6AEdSc6kQVp9lK1dLwn1EHDhwgM7t29G9uh85Jgs7UwrYfSCGypUrWzu0e3bgwAE6tmtD\nh2BnLMCmeCMbt26jYcOG1g5NiFIjs5HLIE3T8PH0YE2PhkT4e2CyWOiw9AgT//0D3bp1s3Z44iGJ\njY1l+fLlODo60rdv34dSBEMPvbt3wz9pH11rFT+lLz+dRl6NJ5m3YLGVIxOi9Eg3chlUWFhIXkEB\ndXyLS6TZGwyE+bqRkpJi5cjEw1SjRg1Gjx5t7TAeWE52FrWdr//J8XW254rsPiQeEbL0x4Y5OTnR\n5LGGfLz3PEaThd1J6ay/kEJUVJS1QxPinj3/wkB+PZ3DmdR8Tl3NZ/6ZHPq+MNDaYQnxUEg3so1L\nSkrihT692LF3PxX8fJj64wzpQhZlkqZpTPn6K76d8jWgeHP0O4wY8Ya1wxKiVMmYbRmnaZqURhNC\nCBsnFaTKOEm0QghRdkmyFUIIIXQms5GFEEIAEBcXR3R0NK6urvTo0QM3Nzdrh1RuyJitEEII9u/f\nT+cO7XmsogvZRgvZDp7s2ncALy8va4dWpsgEKSGEELfVsllTHlcJtAstTq5f77tKm0FvMk72WL4n\nMkFKCB2sXLmSWqFV8ffxol+fXmRlZVk7JCHuS3JyMtW9na59Xc3DwOXERCtGVL5IshXiPh05coRB\nL/RlUKjGZ60DuHpkG8MGvWjtsIS4L23bt2fR2RwKTBau5BaxPt5Iu45PWTusckMmSAlxn9avX0+L\nIDcaViyeRDKsgQ+vrIy2clRC3J8vJ3/D4AGpvLh0FY4O9nzwwTh69+5t7bDKDUm2Qtwnb29vruRZ\nrhUcuZRdiKeHu7XDEuK+uLq6snDpcsxmMwaDQdb2lzKZICXEfcrLyyPqiSa45F6hsqtiS0I+X075\nlhcHSr1fIR5VMhtZCB3k5uYyc+ZMrl69Svv27WWTCCEecZJshRBCCJ3J0h8hhBDCSiTZCiGEEDqT\nZCuEEELoTJKtEEIIoTNJtkIIIYTOJNkKIYQQOpNkK4QQQuhMkq0QQgihM0m2QgghhM4k2QohhBA6\nk2QrhBBC6EySrRBCCKEzSbZCCCGEziTZCiGEEDqTZCuEEELoTJKtEEIIoTNJtkIIIYTOJNkKIYQQ\nOpNkK4QQQuhMkq0QQgihM0m2QgghhM4k2QohhBA6k2QrhBBC6EySrRBCCKEzSbZCCCGEziTZCiGE\nEDqTZCuEEELoTJKtEEIIoTNJtkIIIYTOJNkKIYQQOrPX8+ZKKT1vL4QQQpQJStM0a8cghBBClGvS\njSyEEELoTJKtEEIIoTNJtkIIIYTOJNkKoROl1Fil1DGl1GGlVIxSqkkp37+1Uuq3uz1eCu11V0qF\n3fD1JqVUZGm3I0R5pOtsZCEeVUqpZkAX4DFN00xKKV/AUYembjfDUY+Zjz2A34FTOtxbiHJNnmyF\n0Ecl4KqmaSYATdPSNE27DKCUilRKbVZK7VNKrVZKVSg5vkkp9ZVS6qBS6ohSqnHJ8SZKqZ1KqQNK\nqe1KqVp3G4RSylUpNV0ptbvk+m4lxwcrpRaXtH9aKfWvG655qeTYbqXU90qpKUqp5sCzwKclT+nV\nS05/Xim1Ryl1SikVVRo/OCHKI0m2QugjGggpSUL/Vkq1AlBK2QNTgN6apjUBfgI+vuE6F03TGgEj\nSl4DOAk8qWna48B4YNI9xDEW2KBpWjOgHfC5Usql5LWGwHNAA6CvUqqKUqoS8AHQFIgCwgBN07Rd\nwApgjKZpkZqmnSu5h52maU8AbwMT7iEuIR4p0o0shA40TcstGc9sSXGS+1Up9R5wAKgHrFPFVV8M\nQNINl84ruX6bUspDKeUJeAK/lDzRatzb+/YpoJtSakzJ145ASMn/N2ialgOglDoOVAUCgM2apmWW\nHF8I/NmT9JKSfw+UXC+EuAVJtkLoRCuuGLMV2KqUOgoMAmKAY5qm3a7L9T/HWjXgI2Cjpmm9lFJV\ngU33EIai+Cn67E0Hi8eUjTccsnD978G9lH77/3uYkb8nQtyWdCMLoQOlVG2lVM0bDj0GxAGngYCS\nZIdSyl4pFXHDeX1Ljj8JZGqalg14AYklrw+9x1DWAm/dENdjdzh/H9BKKeVV0uXd+4bXsil+yr4d\nqc8qxG1IshVCH+7AzJKlP4eAcGCCpmlFQB/gXyXHDwLNb7iuQCkVA3wLDCs59inwiVLqAPf+nv0I\ncCiZcHUM+PA252kAmqYlUTyGvBfYBpwHMkvO+RUYUzLRqjq3fgoXQtyC1EYWwkYopTYB72iaFmPl\nONxKxpztgKXAdE3TllszJiHKOnmyFcJ22Mon3wlKqYPAUeCcJFohHpw82QohhBA6kydbIYQQsZRi\nigAAACxJREFUQmeSbIUQQgidSbIVQgghdCbJVgghhNCZJFshhBBCZ5JshRBCCJ39H9+X+UbrPmv5\nAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_lmnn, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pretty neat, huh?\n", - "\n", - "The rest of this notebook will briefly explain the other Metric Learning algorithms before plottting them.\n", - "Also, while we have first run `fit` and then `transform` to see our data transformed, we can also use `fit_transform` if you are using the bleeding edge version of the code. The rest of the examples and illustrations will use `fit_transform`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Information Theoretic Metric Learning \n", - "\n", - "ITML uses a regularizer that automatically enforces a Semi-Definite Positive Matrix condition - the LogDet divergence. It uses soft must-link or cannot like constraints, and a simple algorithm based on Bregman projections. \n", - "\n", - "Link to paper: [ITML](http://www.cs.utexas.edu/users/pjain/pubs/metriclearning_icml.pdf). " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "itml = metric_learn.ITML_Supervised(num_constraints=200)\n", - "X_itml = itml.fit_transform(X, Y)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd0FNX7x/H3pPdKCoEQeif0UAOhgyC9g2ABFLAAfhFR\nOio2FARFpKP0Jl16R3rvJZBAIIH0bArJ7s7vjyDqzyRAkt1NeV7ncGTCnZnPiUme3Dt37lVUVUUI\nIYQQhmNm6gBCCCFEQSfFVgghhDAwKbZCCCGEgUmxFUIIIQxMiq0QQghhYFJshRBCCAOzMNSFFUWR\nd4qEEEIUOqqqKv//YwYrtk9vaMjLCyGEEHmKovynzgIyjCyEEEIYnBRbIYQQwsCk2AohhBAGJsVW\nCCGEMDAptkIIIYSBSbEVQgghDEyKrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYm\nxVYIIYQwMCm2QgghhIFJsRVCCCEMTIqtEEIIYWBSbIUQQggDk2IrhBBCGJgUWyGEEMLApNgKIYQQ\nBibFVgghhDAwKbZCZEGr1RITE4OqqqaOIoTIx6TYCpGJhYsW4eziim8JPypXrUZwcLCpIwkh8inF\nUL+xK4qiSm9A5Fdnz56lVZt2jJ27Cp+SZdj22y9c3LeVc2dOmzqaECIPUxQFVVWV//9x6dkKkYGT\nJ09SvVEQPiXLANC27yAuXThPWlqaiZMJIfIjKbZCZMDX15c7Vy+Q+iQFgFsXz+Dq7o6lpaWJkwkh\n8iMZRhYiA6qq0u+1ARw5dpziZcpz9dQxfl26hPbt25s6mhAiD8tsGFmKrRCZUFWVgwcPEh4eTt26\ndSldurSpIwkh8jgptkIIIYSByQQpIYQQwkSk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEVQgghDEyK\nrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYigIhNTWVqKgo2eRdCJEnSbEV+d7sH3/ExcUV\nv1KlqF6zFqGhoaaOJIQQ/yJrI4scSUxMZOXKlcTHx9OqVSuqVq1q1PsfOXKErj168skva/Dw8WXj\ngh+4f+E4Rw4dNGoOIYQAWRtZGIBGo6FBo8b8vHQFO46fp0nTIP744w+jZjhx4gS1g9rgWawEiqLQ\nrt8QTp44btQMQgjxPBamDiDyr0WLFmHv4cN7X/2MoijUCGzJyA//R9u2bY2WwdfXl9uLlqJNS8XC\n0orr507gU6y40e4vhBAvQoqtyLbIyEiKliyLoqSPmBQvXZ7oqCijZujatSvLVqxkQv/2FPUrxbUz\nJ1i7ZrVRMwghxPPIM1uRbYcOHaJrj56M/G4hHsV8+e2biRR3tWfZr0uNmkOv13PgwAEiIyOpV68e\nJUqUMOr9hRDiL7J5vDCIJUuWMObjsSTEx/NKhw4snD8PR0dHU8cSQgiTkGIrhBBCGJjMRhZCCCFM\nRIqtEEIIYWBSbIUQQggDk2IrhBBCGJgUWyGEEMLApNgKIYQQBibFVgghhDAwKbZCCCGEgUmxFUII\nIQxMiq0QQghhYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEV\nQgghDEyKrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYmxVYIIYQwMCm2QgghhIFJ\nsRV50o4dOyhboQI+viXo2q0bWq3W1JGEECLbpNiKDIWGhnLixAl0Op3R733q1Ck6du5CndZd6D1y\nAqcuXKZpUDOj5xBCiNyiqKpqmAsrimqoawvD0el0lC1Xnnv3QjEzt8DS0pKdf2ynUaNGRsvQtWtX\nHqcqDJs6E4DHD+7xv27NSU1JNlqGl3X06FGGDn+X8IcPadS4MfN/mYubm5upYwkhjExRFFRVVf7/\nx6VnK/6lW7duaFJSmf3HSRYduU7jDt1o/2ono2ZI/2L9+1hVVf7zlZuHhISE0KFjR5r1fZvxizaS\naGZH9569TB1LCJGHWJg6gMhbTp46TfMufXB2KwLAK/0Gc2DjaqNmGDt2LI0Dm7BhfhmKlSrHmjnf\nEhAQYNQML+PgwYNUDQikXsv2ALw2ejJvBVYkOTkZW1tbE6cTQuQF0rMV/+Lt5cnF44fQP31We/X0\nMSwsLI2aoU6dOmzdspmzuzez5ofPaVi7Ogf27zNqhpfh5ORE5MP76PV6AKIfhWNuZo6VlZWJkwkh\n8gp5Ziv+JTo6mhIlS+Ho6o6rpze3L55lyuRJjBkzxtTR8qzU1FSatWjJE8USv4r+HNvxO6M+eI8P\nR40ydTQhhJFl9sxWiq34D41Gw8iRI4mOjuaDDz6gSZMmpo6U56WkpLBw4UIePHhA48aNadu2rakj\nCSFMQIptLouLi2Pz5s2kpaXRrl07vL29TR1JCCGEiUmxzUWPHj2ifoOGFPEtjbWtLdfPHOfggf1U\nrFjR1NGEEEKYkBTbXDRq1IdcDY9hwOgpAGxfNp/Ym+fYtPF3EycTQghhSvKebS56GB6OX4Wqz45L\nVqxKeESECRMJIYTIy6TYZkNQUFP2rF5CXNRjkhM1bFs6h2ZBQaaOJYQQIo+SRS2yYcjgwdy5c5eR\nHRuh1+vp1bsPU6dMNnWsXBUeHo5Go6FkyZJYWMiXiRBC5IQ8s80BvV6PqqqYm5ubOkquUVWVd997\nn19/+xU7e0fcXF3YteMPihUrZupoQgiR58kzWwMwMzMrUIUWYMWKFezcd4DvNx3lu81HqdyoJW8N\nHmLqWEIIka9JsRX/cu78eWoFtcXO0QlFUQh8tQfnz583dSwhhMjXpNiKfylfrhxXTx5Gm5YKwPnD\neylbtqyJUwkhRP4mz2zFv2i1Wrr37MXJ02dwLeJBTMRD9uzeJQt2CCHEC5BFLcQLU1WVM2fOoNFo\nqFmzJk5OTqaOJIQQ+YIUW2Fyqqpy6tQpYmJiqF27Nu7u7qaOJIQQuSqzYisvUAqj0Ol09OrTl2PH\nT1KkqA8P7txixx/bqVmzpqmjCSGEwUmxLSSuXbvGt9O/Q6PR0KN7N7p162bU+69atYrLN27zxepd\nWFpZc3jret54axDnzpw2ao6XkZqayvLlywkPDycwMJBGjRqZOpIQIp+S2ciFwO3bt2kUGEiClQvO\nFWrz7ohRzF+wwKgZgoODqVCrPpZW1gD4N2zK3Tt3jJrhZaSlpdGydRtm/LyAI1fu0qV7D+bNn2/q\nWEKIfEqKbSGwaNEiGr7Sjc6D3ieoUy+GTP6eb7/73qgZatasyZkDO4iPiUJVVfatX0b16tUzbX/w\n4EHqNWhI2fIVee/9D0hJSTFiWti0aRORcRpG/7iMviPH8dGPyxk16kNkHoIQIjtkGLkQSNNqsbK2\neXZsZW2DTqszaob27dvz2p/HGNWxMfaOjri7ubFj+7YM2167do3OXbry2pjP8ClZhrU/fc3Q4e+y\naIHxepYxMTF4lyiNmVn676PeviVJSU5Cq9ViaWlptBxCiIJBZiMXAhcuXCCoWXN6vDcWVw8v1sya\nxpA3BzLmo4+MniUmJob4+HiKFy+e6VKX06dPZ9fJiwwc8xkAcVGPGdO9OXGxMUbLeePGDeo3aMg7\nn/1AqUrV2PDLDJIjQti3Z7fRMggh8h+ZjVyI+fv7s2XzJqZ+/gVXNBo+GP4O7737rkmyuLq64urq\nmmUbOzs7EmKinx3HRUVia2tr6Gj/Ur58eVauWM7Q4e/yKCKCwCaBrFm10qgZhBAFh/RsRZ4TGxtL\n7Tp1KelfB2+/Muxds5QJ4z5h6DvvmDqaEEJkSRa1EPlKVFQUs2fPJio6mrZt2vDKK6+YOpIQQjyX\nFFshhBDCwGQ/WyGEEMJEZIJUNoWEhLB48WLS0tLo1asX1apVM3UkIYQQeZT0bLPh1q1b1AkI4Pj1\nUC49iKVpUDOOHj1q6lhCCCHyKHlmmw1Dhw3nkdaK7kM/BODg5jXcOrKDXTv+MHGyzC1dupTxkyaT\nlqalVfNmLFq08NmCDUIIIXKHPLPNRQmaBNy9fZ4du3v5oNFoTJgoa5s2bWLw2+/Qoteb9B45nu27\n99Kte3dTxxJCiEJDntlmQ9fOnXl3xCiKlS6Pja0dq2d9wZA3Bpg6Vqa+/Oor2vZ5k1Y9BwLg5unN\n1+++ZuJUQghReEixzYauXbsSGRXFN19+jFar5Y2BAxg1cqSpY2VKURRUvf7ZsV6nz6I1aLVaFixY\nQGRkJH379qVUqVKGjiiEEAWaPLMtBLZv307nrl3pMXQ0rp5eLJ/xOa2CmrJy5Yr/tE1KSqJs+Yqk\nanU4F/EgLPgGq1asoFOnTiZILoQQ+YssapFPrF69mvETJ6PVptG3dy+mTp2aK9ddu3YtYz8dx5PU\nVNq3bcOcOXMybNevXz9OXb7BuF9WY2Fpye61v7Jx3kwiH4XnSg4hhCjIpNjmAxs3bqRn7z50f3sU\nDs4urPhhGgP69WH27NlGy1C/QQP8agXSedD7ADwKC+XjXq1JTsy7E8CEECKvkNnI+cDkKVPpMOAd\nOgx8h6DOvRn22UyWr1pt1AwN6tfn4Ja1aOJiUFWV3Wt+xc3dPdP2O3fupFadupQtX5FRH/6P1NRU\nI6YVQoj8QSZI5SE6vR4rm39u8m6NsUcHpk+fzuGjfzK8TR0srWywsDDnwL69GbY9e/Ysvfv2481x\nX+Hh48vKGZ/x4f9GM+uHmUbNLIQQeZ0MI+chixYtYti77/H6x5/h6OzKoi8/pUWTwAwnMhna7du3\niYyMpHbt2lhYZPw72eTJkzlz9xG93x8LQMT9EL56pxcPw+4bM6oQQuQZsnl8PvDGG28QFxfH19O/\nQ6fT0a5lCxYvXmySLGXKlKFMmTJZtrG3tychOvLZcVzUY+zs7AwdTQgh8h3p2Ypsi4yMpFbtOlQI\nCKSIjy97Vi9m+tdf8dprsmCGEKJwkglShdx3332Hl08x3D29aP/qq2i12hxfs0iRIpw6eYJGVcrg\npSSz4rdfpdAKIUQGpGdbCCxbtoxBg4cwaPxXuHp4sejL8VQoVYLdu3aaOpoQQhQo8p5tIVa/QQOK\n+den+zvpuxTdvXaJL97pQ0JcjImTCSFEwVJoJ0jdunWLpUuXotfr6devH5UqVTJ1JKOztLQkKSHh\n2XFyYkKW2+ulpaWxc+dOEhISaNKkCT4+Ppm2FUII8XwFuthevnyZJkFBNHylG2bmFjRqHMiunTuo\nXbu2qaMZ1bQvvqBFq9ZY29rh5uXN2jnT6durR4ZtU1JSaNm6DZGx8bh5FeXd997nj+3bqFOnjpFT\nCyFEwVGgh5H7DxiA6laCDgPfAWDX6iXEXD/DhvXrTJrLFPbu3cuHo0eTnPKE3j26M2nSpAzbzZo1\niyVrNjLy+/TN5Q9v28Dxjcs4deKYcQO/hPj4eObMmcPjyEhatWxJmzZtTB1JCFFIFcrZyAkJGty8\nvJ8du3kVJSEPb/JuSM2bN+fs6dNcu3wp00ILcO/+fUpXq/lsmLl89do8eBBmpJQvT6PRUL9hI7Ye\nPM69ZDNef2swc37+2dSxhBDiXwp0se3RrSsb583g1qWz3Ll6kXVzvqF7t66mjpWnNW7UiGPbNxDz\nOBy9Tsf2X3+hQYMGmbZfs2YNpcuWw8PTi9fffIukpCQjpk2/v0ORogz97Ac6v/Ueo2YsYvyECUbN\nIIQQz1Ogn9n279+f2NhYZn42GlVVGTZkMG8PGWLqWHlax44duXDxIqM6BaKgEFC/PsvWrsmw7bFj\nx3hn+Lu8O+0nPIqVYNn0Sbz7/gcsnD/PaHk1Gg2unl7Pjt08i5JYSEcvhBB5V4F+ZiuyLy0tjZSU\nFBwdHTNtM3HiRC7cj6bnu2MAePzgPl8M6kr4wwfGisn169dp0LARr38yjeJlKrDu528p7mrPqhXG\nX09aCCEK5TNbQzp58iT9BwygV+8+/PHHH6aOk+ssLS2zLLQArq6uRD649+z4UVgoTs7Oho72LxUq\nVGDD+nXsX/ELM0YMpEJxLxbOn2/UDEII8TzSs82GU6dO0bpNWzq8+R42tnZsmDud+b/MpVOnTqaO\nZlRxcXHUrVcfz5LlcC/qy+Eta1g4f16h+zwIIcRfZAWpXPTmoMGkOnnT/rW3ATixZztntq7IdN/X\nvGDjxo1MmDSFxMREunfrymdTp2S6dd7LiIuLY8mSJcTGxtKuXTvq1q2bC2mFECJ/KrQrSBmCTqfD\nwtLq2bGllRU6nS7T9lqtlnXr1pGUlES3bt1wcnIyRsxnjh49ypuDhzB4wre4enjz2/SJqOp4vvpy\nWo6v7ezszPvvv58LKf8rNTWV+Ph4ihQpYpDrCyGEsUjPNhsOHjxIl27d6T1iPNa2tqyc8RlffT41\nwx1vYmNjqVCpCqlaLdY2tmjiYjh6+BD+/v5Gy/u/0aMJTYIugz4AIPTmVX75dDi3b94wWoaX1adv\nX9asWYOqqri4urFvz26jfs6EECI7ZIJULmrSpAkrly/j1uHtnNu+mm+mfZ7p1nK9evfBu1R5Zm07\nzncbD9GsS186d+tm1LwO9vbERT5+dhwb+Qg7O/tM2yclJfHbb78xZ84cbt68aYyI/zJ79my2bPuD\nb9buZenxYBq060Lrtu2MnkMIIXKL9GwNrHylygT1eINmXfoAcOviWb4b+SYxUY+fc2buefjwIbXr\n1MU/sCXORbzYs3oJ8+bOoUuXLv9pq9FoaBTYBHM7R9y8fDhzYBe/b1hPkyZNjJb3lVdeQXXxYcDo\nSQAkJsTxTouaaNNSjZZBCCGyQ3q2JlKhbBkOb9tAWuoTVFXl0Na1Rn8GWbRoUU6dPEH9iiUpbqPj\n9/VrMyy0APPmzcO+SFH+N+s33hr/Da9/Mo33Row0al4/Pz9unD+F/ulz8FsXz2Jja2vUDEIIkZuk\nZ2tgSUlJVKxchajoaCytrFF1Wk4c+5MKFSqYOlqGxnz8McHxWroOHgFAxP0QvhnWh7B7oUbLkJKS\nQsnSZbCwsaOoXxkuHjvI1CmTGT16tNEyCCFEdshsZBOxs7PjbvBt9u/fT1JSEi1btsTGxsbUsTLV\nonlzlr45iPqtXsXdy4cNv3xHs2bNjJrBxsaG0Lt3+OKLLwgPD+ebiR8TFBRk1AxCCJGbpGcr/uPH\nH3/kk0/HkZycxCvtO/DrksXPXU1KCCGELGohXpKqqqiq+myrPSGEEM+X7WFkRVGsgW5AyX+2V1V1\nSm4GzG/27t3Lt999T1paGm+98Tq9e/c2daRcpSgKivKfrxchhBDZ8CLPbDcCccBp4Ilh47y4yMhI\nAJOsLnTo0CG69+xFrw8+xcbWjpGjx6DT6ejXr5/RswghhMj7njuMrCjKJVVVq770hQ00jPzkyRN6\n9enL7t27AGjVqjUrly/D2to61++VmdfffAs8StKm9xsAnDm4m2PrF3PowH6jZXhZS5YsYcKkySQm\naujWtRs/zJxh1M+ZEEIUBjl5z/aooijVDJApW6Z+9jkPYhL4addZftp1lrDoeKZ+9rlRMyiKglab\n9uxYp9Pm6Webe/fu5aOxnzBoykwmLdnC6Ss3+d/oj0wdSwghCo1Mh5EVRbkIqE/bvKEoSjDpw8gK\noKqqapKFak+cPEmTTr2xtErvlQV27MXJneuMmmHYO2/Tpm07LCyssLa1Zf3P3/LLnJ+MmuFlbN22\njWbdXqNs1ZoA9PrgE+aMeZtZP8w0cTIhhCgcsnpm28FoKV5C6VKluHryKLWatALg6smjlCpZMtP2\nhw8f5sef5qDX63l7yGCaN2+e4wx169Zl29YtzJw1C02alsUL5tO+ffscX9dQXF1cOHfm8rPjiHsh\nOBt5k3chhCjMXuSZ7a+qqr72vI9lcJ5Bntk+fvyYwKZBmNvYAaBLSeLwwQMZTpQ6dOgQnbp0pfOQ\nkZibW7B+7nSW/7qUNm3a5HquvCwqKoo6dQPwrVQdlyKeHN66jlUrltOqVStTRxNCiAIl2+/ZKopy\nRlXVWv84Ngcuqqpa+TnnGew926SkJA4fPgxA48aNsbOzy7Bdz159cCpbnRbd+wNweOt67hzbxbYt\nmw2SKzesWLGCz76YxpMnT+jbuzcTJ07A3Nw8x9eNjo7mt99+Q6PR0L59e6pXr54LaYUQQvzTS79n\nqyjKWOATwFZRlPi/PgykAr8YJOULsrOzo3Xr1s9tp9VpMbe0fHZsYWmZ5SbvprZr1y4+GPUhQybP\nwMHZhSXTPsHSypLx48bl+Npubm4G2+RdCCFE1l6kZztNVdWxL33hPLCC1I4dO+g/4HV6jxyHubkF\nK76fws8/zs50xxtTe2foMJLsPWnXbxAANy+cYc13E7h4/pyJkwkhhHgRL/3qj6IotRRFqQWs+evv\n//xj0LS5pE2bNixeOJ/r+7dwefcG5syelWcLLYCDgz2xkRHPjmMeR+Dg4JBp+xs3btC0aVPq1KnD\nqlWrjBFRCCFENmTas1UUZd/Tv9oAdYDzpA8j+wOnVFVtkOWF80DPNr8JCQkhoF59ajd/BXtnV/as\nWcLK5csyHDK/cOEC9Ro0pGzVmjgX8eDk3u38b9QoPv/cuO8cCyGE+FtOJkitByaqqnrx6XFVYJKq\nqt2fc54U22y4d+8e8+bNIzklhR7duxMQEJBhu9JlyuBTwZ/hn88C4PC2DSz9egIJcTHGjCsMTK/X\ns2nTJkJDQwkICKB+/fqmjiSEyEJO9rOt8FehBVBV9ZKiKJVyNZ14xtfXlylTnr/HQ2JSMiUr/b2w\nV4myFdHr9YaMJoxMVVX69ezBteOHqePlyFeTIvl06mcMG/6uqaMJIV7Si6wxeEFRlPmKogQ9/TMP\nuGDoYCJrjRs2YNuv8wgPvUOSJoHVP32No2Pmz3dF/nPo0CHOHjnAzs7VmR5Ylu2d/Rn9v//x5Eme\n2Q9ECPGCXqRn+wYwFPjg6fFBYI7BEokXsm7dOqr5V+ejHi3R63U4u7hx8fxZU8cSuejx48eUdXPE\n2iL9d2I/J1uszM1ISEiQTSSEyGdk8/gCQKfT5crCFyJvuXfvHrWqVWF+iwo09HHlp/P3WPtIx4Wr\n12WvYSHyqOy8+rP66X8vKopy4f//MWTY3LR161Zat21HqzZtWb9+vanjGIQU2oLJ19eXles2MOJY\nGD5z9vJHog2b/9gphVaIfCirV3+Kqqr6UFEUv4z+XVXVkCwvnAd6tjt27KDfawPpM2p8vljUQojM\nqKoqRVaIfCAnr/68BRxUVfXmS97Q5MW2a/ceeFSpR1Dn3gAc27mZa/s3s2P7NpPmykrffv34feMm\ndDodlStV4sjhQ5mu/SxEdqWmpjJ37lxu37xB7boB9O/fX4q5ELkgJ5vHlwDmKooSrCjKGkVR3lMU\npUbuR8x9FuYW6LTaZ8fatLQ8PeT66aefsn3HLsbPX8u36/aRpJrTqnXh2qFIGJ5Op6ND29YsnT6Z\n2EOr+HzMB7w3fKipYwlRoL3wBClFUWyBwcD/gGKqqmZZtfJCzza/bbFX1b86ddp1p03vNwC4ffkc\n377/OrHRkSZOJgqSo0eP0q9LB75r5om5mYImVceQraGE3A/D3d3d1PGEyNey3bNVFGWcoijbgZ1A\nWdKLbfHcj5j7AgMD2bhhPUl3LhF/8yyrVyzPs4UWwMnRgQd3bj07Dg+9g6WVZabtIyMjmTFjBl98\n8QUXL17MtJ0Q/5SYmIiLrSXmZuk/D+wtzbCxsiApKSnTc1JSUggJCSE1NdVYMYUoUF5oP1tAC2wF\nDgB/qqr63Lfq80LPNr/5a73jmoEtcHJ1Z//vK/lu+rcMGzbsP20fPXpE3YB6lKpWG0dXd45sXce6\ntWto1qyZCZKL/CQuLo7KFcrRvrg5Nbxt2X03kRALT06eOY+Z2X9//96wYQNvDHgNawszVDNz1v2+\nicDAQBMkFwWZTqfj66+msX3zJtyLePDZl19TpUoVU8d6admeIPX0ZCegEdAY6AE8UlW18XPOkWKb\nDVevXuXTTz8lOTmZ4cOH06FDhwzbTZgwgWPX7vLG2C8AOLFnG0fXLeb4n0eNGVfkU9evX2fo4DcJ\nDr5DrVq1+Hn+Qjw9Pf/T7sGDB1SpWJ7xDT0o62bDmQcafrqgISTsAba2tiZILgqqD0eOYMeaX+le\nzo6w+DR+D07h1Lnz+Pll+EJMnpXttZGfbjwQCDQlffefe8ChXE8oAKhUqdILvQ8cHRuLR7ESz449\ni/sRGxuX5TlpaWmkpqZib2+f45wif6tQoQJ7Dx55brsrV65Q0t2esm42ANTyccD6sobQ0FAqVKhg\n6JiiEFm4YAHfNvPCw96SWkXhflIUGzZsYMSIEaaOliteZDbyl4Aj8ANQSVXVZqqqTjBsLPE8r7Zv\nz+5Vi7l9+RyRD8NYM2saHdq/kmFbVVWZNGkyjk5OuLsXoWXrNsTGxho5sciP/Pz8CIlKJDo5fVb/\nvbgnxCU/oWjRoiZOJgoac3MztPq/R0PT9AVrwR5ZrjEfW7BwIZOnTCUlOZkePXrw/XfTsbKy+k+7\ndevWMWrMJ3w8ZwWOLm4s/vITPGwUVixbZoLUIr/58ovPmf71l5Qu4sDNRwnMmPUjAwYONHUsUcBM\nmTyJpXNm0qm0LWEaHfsf6jh38RLe3t6mjvZScvTMNps3lGKbR4wcOYoIvS2vvp7+LuWDu7eZ9eGb\n3A2+beJkIr+4cuUKwcHBVK5cmdKlS5s6jiiAVFVlwfz5bN/8O+4ennw6YVK+e14LOdvPVuRzxYsX\n48/te54t+Xfzwml8fIqZOpbIRypXrkzlypVNHUOQXpTi4+NxdHTMcPZ4fqUoCoMGD2bQ4MGmjmIQ\nBef/lMjU0KFD0Wti+HxQV2aPeZt1P37Fj7NmmjqWEOIlXb9+nYplS1PU0wMXJ0dWr15t6kh53pMn\nT3hv2Dv4FSuKf6XybNtmmuV6s9qIYDOQ6Tiwqqods7ywDCPnKampqezcuRONRkPTpk1lgosQ+Yyq\nqlQsW5pmbim8Us6F4JgUph59zNETp2VmeBaGDhnMmV0bGFjFiQhNGrPPxrJz735q165tkPtlZxj5\nW4MkESZhZWWV6Tu7wjT0en2BGgYUhhUXF8f9Bw94pV76M/PSrjZU83bkzJkzUmyz8PuG9Uxp4IKX\ngxXFnaybq8qmAAAgAElEQVRpFvmELVu2GKzYZibT73RVVQ9k9ceYIYUoSLZv346vtydWlpY0qF2T\nkJAsd6sUAgBHR0fMzcy5E5MCQIpWz52YFIoVk/kXWbG3tyMq+e8NaWJTVRwdHY2e40WWaywHTAMq\nAzZ/fVxV1SynJMowshD/FRwcTL1aNVjaqiIBRV2YeTaUzTFmnL10xdTRsnT9+nXOnTtHgwYNKFGi\nxPNPEAaxauVKhr09CH9vR4Jjkmn5SkfmL1oi2yNmYcWKFbw/dAht/Gx5nKJyI9ma0+cu4ObmZpD7\n5WQ/28PAROB74FXgDcDseQtbSLEV4r9WrFjBmi8+ZXHL8kD6c7hicw8QFh6Bs7OzidNlbMigQSxZ\nvBBXWwtikrWMnzSFcePGmTpWoXX9+nVOnz5NsWLFaNKkiRTaF3Dw4EG2bN6Mi6srb7/9tkF3t8pJ\nsT2tqmptRVEuqqpa7Z8fe855UmyF+H/27NnDewN6c7BbTazMzbgVk0iT1aeIS9DkydVy/vzzT5o1\nacx3bfwo7mTN1cdJTNh3j3sPwjNcS1mIwi4n79k+URTFDLipKMq7QBjgkNsBhSgMmjVrRtWAhrTa\ncIyano5sv/OYmT/MypOFFtL3hPZ1sqa4kzUAlTzssLcy5/Tp07Rr187E6YTIP16k2H4A2AHvA1OB\n5oCs1SZENpiZmbFy3QY2b95MWFgYwwICqFOnjqljZap+/fpMjH9CuCYVbwcrbkWnkJiqo2bNmqaO\nJkS+8sLLNT7dZk9VVTXhBdvLMLLINlVVOXLkCNHR0QQEBOS79VELkn69e7Nu7Wq8HKyI0KQy4sPR\nfPnVV6aOJUSelJNntnWARaTv/AMQB7ypqurp55wnxVZki06no3e3rlw4foSSLg6cDY9l47btNGjQ\nwNTRCq2TJ09y6tQpmjRpki839BbCWHJSbC8Aw1VVPfT0uDHwk6qq/s85T4qtyJaVK1cyfcwItnfy\nx8rcjM23Iph2LZ5LN2XjBCFE3pZZsX2R5Wt0fxVaAFVVDwPaLNoLkSMhISHU93LAyjz9yzOwuBsh\nYQ9MnEoIIbLvRYrtAUVR5iqKEqQoSlNFUX4C9iuKUktRlFqGDigKn7p167LlTjQPNCmoqsq8i2HU\nrlHd1LGEECLbXmQYeV8W/6yqqto8k/NkGFlk29fTpjF58mTsrCwoWqwYW3bskpWLhBB5nmweL/Kd\nxMRE4uPj8fLykgX7hRD5Qraf2SqK4qUoygJFUbY/Pa6sKMpbhggpxD/Z29tTtGjRXC20SUlJDB8+\nnC5durBs2bJcu64QQmTlRX6KLQZ2AD5Pj28AIwwVSAhDSUpKooyvD8fWLcP19imGvvk67733nqlj\nCSEKgRcptkVUVV0N6AFUVdUCOoOmEsIAPv30U7wsVPb0rseMFlXY0r0uC36eY+pYwgC+/PJLOnfu\nzKRJk9Dr9aaOI8QLLdeYqCiKO6ACKIpSn/SFLYTIVx4+fEiVIg6YPd0lpZKbA090OrRaLRYWmX8r\n6HS6PLt2sfivJg3rc/ncaQKKOfDz7m1sWLOK85evmjqWKORepGc7CtgElFEU5QiwFJCxN5Hv9OrV\ni003Izj+IIbENC3jD1/H280100K7bt06vNxdsbayIrBeXcLCwoycOP85efIkJX28cLGzpmyJ4ly9\natwid+7cOY6fOMHMdiUZWtebGe1KcvvWDbZt22bUHEL8f8/t2aqqekZRlKZABUABrquqmmbwZELk\nsi5duvDuh6PpMv1bUrRavN3d2H3oSIZtL1++zNC33mBVu8r4ezjx9am79OrSicMnThk5df4RHR1N\ns8BGtC/rRH1/H/beiaNBnVqER8VgY2NjlAx37tzB0docJ+v0H212luYUsbXk7t27Rrm/EJnJtGer\nKEpdRVG84dlz2trA58B0RVEMs8W9EAY2bdo0NKlpaPUq9x9HUalSpQzbHT16lNalPKjt7YKluRkf\n1y3F8TPnSEuT3zMzs3btWlysFPr5e1DGzYZBtTxR9Gns3bvXaBmaNWtGUprKHzdjeKLVczAknvDE\nNNq3b2+0DEJkJKth5LlAKoCiKE2AL0kfQo4DfjF8NCFMx9PTkytRiWifTq65HJmAk71dls92CztH\nR0eStXp0+vT361N1Kk90Kk5OTpmec/ToURYtWsTx48dzJYOLiwsr165n+ZU4eq29wS9nIpm/aAl+\nfn65cn0hsivTRS0URTmvqmr1p3//EXisquqkp8fnVFWtkeWFZVELkY/pdDo6t2/Hw6sXqOpuz/bg\nx8z8+Rd69+5t6mh5llarxbeoJ94WqdQr7sCBu/Gk2rpx825ohu9KTxz3KfPmzKaKhx2XHiUyfMSH\njJsw0QTJhcg9L72ClKIol4AaqqpqFUW5BgxRVfXgX/+mqmrV59xQiq3I13Q6HZs2bSI8PJyGDRtS\nvbqsz/w8Go2GPr17EXzjGpX9a/Lrb79l+Lz27t271KxWhR9a+eBsY0Fsspb3doVx9cYtfHx8Mriy\nEPlDZsU2qzGxFaRvQhAJJAN/bbFXFnn1RxQC5ubmdOnSxSDXDg8P58aNG9SpUwc7OzuD3MMUHBwc\n2Lxl63PbRURE4O1sh7NN+o8gF1sLPJ3siIiIkGIrCqRMn9mqqvo58CHpK0g1/kc31Qx59UeIbBvQ\nvx++xXx4tXVz3F2cWLFihakjGV3FihWJTErjRFgCqqry570E4lP1lCtXztTRhDAI2YhAFCrJycks\nXLiQiIgImjdvTlBQkFHvv2HDBvr36s53bUpS1NGKg3fj+elUBJqU1EK32cKff/5Jj66diXgcRVEv\nD9Zu2EhAQICpYwmRIznZPF6IAiElJYWmDeuzddaXpOxczmvdOvPL3LlGzbB7926qetpR1NEKgCYl\nndDq9Ny7d8+oOfKCBg0acP9hBPEJCYSGPZRCKwo0Kbai0Fi/fj32mihWtK3MuAZlWde+KmM/Gm3U\nDNWrV+dGVAqa1PTlxa8+TgIFihUrlmH7R48e8c7gQbRr2ZypUyYXyPd8bW1tTR1BCIOTlwZFoREf\nH08JB2uUp2sj+znbkpCUhF6vz3QIV6vVkpCQgIuLy7PzcmLIkCEsmjeXdzafp5iTFXdinzBuwqQM\n399NTEwksEE9KtokUsPNio0LLnDtymWWrVyd4xxCCOOSnq0oNJo3b862O4/ZcecxYQkpjD50i3at\nWmZaaBctXIirkyN+xXyoVrE8wcHBuZLjz5OnWbR8Ff0++ITDfx5nwoQJGbbbt28fdtpE3qruTgNf\nR8bUc2fD778THx+fKzkM4ebNmzRtVJ9iXh60bh5UKIfHhciI9GxFoVG+fHlWrtvAiGHv8DjyDs2a\nBbF0waIM2547d46xH45kf486lHOzZ/bZEHp06sjpi5dyJUu3bt1eqN0/O9Mv0rHes2cP586do0WL\nFtSokeW6M7lOo9HQIqgJrYtC/3pO7A+9RqtmTbl49TqWlpZGzfIyVFUlJiYGFxeXQjdJTRiPfGWJ\nQqVFixZcvH6T8KhoVqxdj7Ozc4btTpw4QauSRSjnZg/AsBolOH/lqlGfmQYFBRGHNUsvRnMqTMP0\nE9G0b/dKpssftmvdklfbtubnL8ZTv04txn36qdGyApw/fx57RUvH8i54OVjRs5ILiXHR3Lp1y6g5\nXsbJkycpXtQLv+I+FHFzYceOHaaOJAooKbZCZMDX15ezjxJI0aZPZDr5MJYirs5G7aE5ODhw+M8T\nONVqzRG1BC37DmLZqoyf127YsIFDB/Yxp0Npprfx47MWJfj6q2lGHXJ2dHQkNukJqbr09aSTtXoS\nklNxcHAwWoaXkZKSwquvtGVAOSuWdS7FR3Vd6dOzOxEREaaOJgogGUYWIgNt27ZlWeMgmqzdS8Ui\nThwJjWTxcuMvPlG0aFEWLvn1ue1OnjxJaVdbXG3Tv6XLu9tiaaZw/fp16tata+iYAFSrVo3GTZsx\n9egRqrkpnHqso0fPXvj6+hrl/i8rJCQES/Q08HUEoLKHHX5uKVy6dAkvLy8TpxMFjRRbITKgKAq/\nrlzFgQMHCA8P57uAAEqXLm3qWJlq1qwZM775irD4VIo5WXH6gQadClWqVDFaBkVRWLl2PUuWLOHa\n1auMr1GDvn37Gu3+L8vLy4vYpBTCNal4O1gR/0TLvehEihcvnmF7rVbLDzNmcPL4n5StUJExH4/N\ns712kffIClIiR1JTU9m6dSvx8fE0bdqUkiVLmjpSofXm66/z269LcbA2JylNz3czZzFs2DBTx8rT\n5vz0IxM++ZjKXg5cf5zIoHeG89kX0zJs2693T64e3UugjyUXorQkO/ty6M/jeXrylzC+l971Jxdu\nKMW2gEtJSaFVUBPSIu5TwsmGfSGRrN+8lcDAQFNHK7RCQ0O5fPky9erVw83NzdRx8oVLly5x6dIl\nypQpk+mQ+6NHjyhb0o8FHUpgbWGGXlUZvf8xC1f/Ll/v4l+ys+uPEFlatGgRdjEPWdWpGmaKwpZb\nzrz/zhDOXr5q6miZiouL44eZM4l4+ICWbdrSuXNnU0fKVSVKlKBEiRKmjpGvVK1alapVs9wxlLS0\nNCwszLAwS/8ZaqYo2Fqak5qamuk5cXFx3Lp1i2LFiuHt7Z2rmUX+I7ORRbY9fPiQGq7WmD19AbSm\nlzPheXgmp0ajoX6dWhz4bRbJxzfw/uCBfPP1V6aOJfIBHx8f/P2rM+dsNFcfJ7H8cgyJijX169fP\nsP2ePXso61eCN7q0p3K5svw0e7aRE4u8RoqtyLbAwEBW3Y7iblwSWr2e6WdCady4saljZWrDhg04\n6xJ4v04ROlV0Y1yDIkydMoW8/LgjNDSUPt270rhOLT784H2SkpJMHSnPS05OJiAgAO8ibtSoUZ24\nuJxvv60oCpu2/UGpwA6sjnBAX7Y+B478ib29/X/apqWl0adHNxa1LM/hbjU40KM2k8d9wrVr13Kc\nQ+RfMowssq1Vq1aM+nQiDT8ZS2paGs0CG7Ni4WJTx8pUUlISTtZ//37pbG3Bk9RUVFXNdN3j0NBQ\ngoODCQgIMPom73FxcQQ1akCfEg70L+nM/N2/0/fWTX7fut2oOfITnU6Hr7cHrhY6Ovg5cTzsJn5F\nvXgUG4+VlVWOru3k5MTc+Quf2y4iIgIzVaWJrzuQvgZ3TR83rl+/TsWKFXOUQeRf0rMVOfL+iBHE\naxJJ0CSyY+/+PD0pp3Xr1px+mMT+O3HcjU1h9plounR8NdMl+rp27kS50qXo/kobvFyd2bhxo1Hz\nHjhwgJK25owJKEWgrxvzWlRk9959udJTK6h+//13kpKS+aJFCTpWdGNSUAkUVcuMGTOMlsHT0xMd\nCkfuRwNwLz6Zsw+iKV++vNEyiLxHerYix8zMzLC2tjZ1jOcqVaoU23fuZuS7w9h85REtWnXku5mz\nMmy7aNEiDuzYzpmBjfF1smXJxXsM7NOL2KQUo+W1sLAgRat71vN+otOj16uYm5tnes6pU6cICQnB\n39+fcuXKGS1rXvHo0SOsLRSszNNHKizNFewtzYmKijJaBisrK5atWk2/nj3wdbYnJCaeiVOmUqlS\nJaNlEHmPvPojRAb69+9P2um9zGvrD4BeVXGbuRNNYqLRhpOTk5OpV6sGtWxSaeTlyNIbkVRo0or5\ni5dk2P7j0R+yfNFC/L1dOHE/ihk/zqFvv35GyZpXaDQaPN2caVvGhWalnDl+P4F1V6O5fvuO0Wdp\nR0dHc/PmTYoXL57pfsWi4Mns1R8ZRhYiA/7+/hwNiyEhVQvA/tAobC0tMi20YWFhDHpjIO1aNmfa\nF5+j1WpznMHW1pYDR4/h0awze+xK03X4h8xdkPEzw7Nnz7Js0QIO9ajF8lYV2PRqNYa+PYSUFOP1\nxF9WamoqLVu2pIRvcdq0aYNOp8vxNR0cHNjyxy7230vi490hbL2dwG8rV5vkdSg3Nzfq1auX64U2\nODiYdevWcfz48Vy9rjAs6dkKkQG9Xk/d6tW4e+smpV3suByp4YtvvmXEiBH/aRsbG0v1qpUJcNVR\n1sWS7SHJ1G3ZkXkLM96+zxA2btzInLEfsLrN3xNwyi06yulLVzJdftCUdDodXm4uOChp1C/uwNF7\nCaSY2/AoWp5HZ2X9+vUMen0Alb0duROVRKfuvfjx57mZTvATxic9WyFegpmZGacvXmbOkt/o+v4Y\nTpw9l2GhBdixYwc+1jpeq+ZGA19HPq7nzpJff8tywYPc5u/vz+mwaM4/St/lZ+31h9jY2WW6mMLl\ny5cJrFeX4p4evNqmFQ8ePDBaVoCFCxeSkpzEN6396OvvwTetS6JJ0LBq1Sqj5ngZ4eHhlPItjqud\nNUWLuHH69Gmj3l+n0/HGwAGMa+jBR3VcmN7ci83rVnPkyBGj5hDZIxOkhMhCz549n9smL4zglCpV\nijkLFvLqG69jjoqDoyO/b92OhcV/v8VjYmJo3awpH/l70+LVyiy+EkKHNq04df6i0TZPv3//Po7W\n5lhbpN/P1tIMByszQkJCMj1Hr9cTHR2Nq6trlpPEDEGn01GhdEnKuljQt44Hpx5oCKwfQPC9MKOt\nDpWQkIBWm0ZZNxsA7CzNKetuR2hoqFHuL3JGerZC5FCbNm0ISzFj+aVoToQl8PWJaPr37Z3j9zpf\nVvfu3XkcHcPV23e4c/8BNWvWzLDdyZMnKeNsyxvVilPCyZbx9Urx4P49wsLCjJb1rbfeIiZZy8Zr\n0UQmpbH+ahQJqXoGDBiQYftDhw5RzNODciVL4OPpwf79+42WFdJfw3qSmsongcWpV9yRYXW9cbez\n4LPPPjNaBmdnZ7y9PNkVnD7UHhr3hAvhCZn+fxZ5ixTbfGzr1q0E1KpOtQrl+GzqFPR6vakj5XkR\nERGM+uB9+vfszsIFC3KlV+rq6sqRYyewqd6S40opurz5Lr8sMN7z2n+ytLTE09Mzyx6qo6MjEZpk\n0p5u8h7zJI3EJ6kZroYE6T33DRs28O2337Jr165cyVmiRAl+nr+Q1VeiGbYlmHVXY1iw5NcMe4nx\n8fF079SRHwNLEjK4Cb8ElaFnl87ExMTkSpYXkdHkLQWM+j2nKAqbt+1gywMzBmwKYez+h8yc/ZO8\nUpRPyASpfOrIkSN0eqUt79RwxtnagkWX4unz9vuMnzjJ1NHyrNjYWGr7V6WNpzVV3ez4+XIEnV8f\nzJTPPzd1NKPS6/V0bt+OuBsXaeJlz+8hcbTp2Zdvv//vwg+qqvLmwNc4vGsbVdwtOR3xhDffGc7k\nqcb7nJ05c4aBnV7hSPcazz4WtP4Cc1ZvoF69ekbJoNPpcHW0o4KrJW3LunLqgYYDd+ONOoz8F71e\nT2RkJC4uLkYfPRHPJ1vsFTAj3n+P6IOr6F45fUm4m1HJLAg248rNYBMny7sWL17Muq8nsqxNek/g\ngSaFusuOE5+YVOhmc2q1WhYtWsTtW7eoXacO3bt3z/BzcP78edo0C2RWy6JYW5gRm6Jl+B/3uRN6\njyJFihgla3h4OJXKluFo77oUc7ThoSaFhqtOce7yVXx9ff/TXqPR0KNbV25du0Jxv9KsWb8+V7KG\nh4dTr3Yt4mMisbK1Z/P2HQQEBOT4uqJgkS32ChgbWzsS0v7+ZSbhiQ5b24yHAUW6tLQ07C3/Hl61\nszBHq9NluTZyQWVhYcHgwYOf2y4qKgpvJ9tnE5lcbCxwtrMmJiYmywKWlpaWa5uqe3t7M2HyZJp/\nPpUAHzdOPohmzNhPMiy0er2eyuVK46Ym0b6kE38Gn6VyuTLcj3ic416gt7c3IWHGnbUtCg7p2eZT\nISEh1K1Vk0Bvc5ytFLbcSWbuoqV06dLF1NHyrLCwMGr7V+NDf2+qFnFg+vkwSjZsmemKTCJ9FaSK\n5cowsKIdtX3s2Xsngd2PzLl++06GxfTo0aP069md0IfhlC/px4p1G6hRo0YGV355Fy5c4OrVq1So\nUCHTa+7fv58OrVuwpEs5LM0VdHqVwZtuM+OXRfTv3z9XcgiRFXnPtoDx8/PjxOkzlG43ALv6XVn9\n+2YptM9RrFgx9hw8xGEbX6bcfkK9rv346Zd5po6Va/R6PV9++SUDBw5k2bJluXJNNzc3tu3YxdZH\nNry+6S5ndZ7s2LMvw0IbHR1Nlw7tmVbLm6j3WvFheUdebdua5OTkXMni7+9Pr169sizeycnJmJsp\nmD/9yWamgJW5GU+ePMn0nOjoaE6dOkVEHt6LWeR/0rMVogDQ6/XUqFKR2Af3qVfUmV13I2nfpRvL\nVqw0WoZDhw4xemAfdnaq+uxjdVeeZt2ufVStWjWLM3NPamoqRYu4UdvDgmalnDl2P4F9IYncj3iM\nk5PTf9pv3ryZgf374uFgTURcMt9+P4NBLzC8LkRmpGcrXtiihQsp7uWBs4M9r/XuJRuWv4BvvvkG\nRxtLnKwtcHOy5+7du0a9/5IlS4gIDeX4aw1Z0K46B/o2YM3q1URGRhotg5eXF3ei44lJSQMgIvEJ\nEQmJeHh4GC2DlZUVp85fJMzCg+knHnMt1ZGjJ09nWGg1Gg0D+vVlbD13vmnqwZfNvPnow5FZLqwh\nRHZJsRX/sm/fPsb9byQrWpbjXP96aC78yQfDhpo61nPFxMRw+/Zt0tLSjH7vP/74g3Fjx9C1ogtj\nGvvgZw/VKhp3e7tbt25R3s0ee8v0OY+lnO2wsTA3atEvX748A996ixbrz/Hu/lu0WH+OMWPH4uXl\nZbQMkL6a1uXrN4mMT+J6cEimveqwsDAcrC2oUMQWAB9HK0q6O3Dz5k1jxhWFhBRb8S87d/zB6xU9\nqe7phLutFRPrlWTnjj9MHStLn0+Zgl8xH5rVq0PlsmW4ceOGUe8/evRoqnjY0a1yEap52fNJYHGS\nUrXcv3/faBk6d+7MmfA4Dt+PRq+qzDsfCooZ/v7+GbY/ceIElcqVwc7GmgZ1axMcnDuvjH09/Xvm\nr1pH46EfsXLTVsZ+Oj5XrmsIxYoVQ/NEy42o9GfKDxNSuRulKZT7AAvDk1d//mH//v3MnDUbnU7H\n4Lfe5NVXXzV1JKNzL+LB8fi/F9C/GZOIm5urCRNlbd++fSyYPYPT/evjZW/N3POh9O/ZnRPnLhgt\ng5WVFSm6v1cSStWlz1WwsbExWoa6desybspUekwcT3KaDmc7G1au35Dh6y6PHz/mlTateauKPTVr\n+LEz+AHtWrXgyo1bubLmcFBQEEFBQTm+zv+XlJREWFgYPj4+ma529TIcHBxYumw5A/v3xdPRhvDY\nJL757nv8/PxyIa0Q/yYTpJ46dOgQnbp0pfvwMVhaWrF69jTm/TyHzp07mzqaUcXFxdGobh1KWTyh\nmJ0l624+ZvnadbRq1crU0TL0/fffc33Zj3wdWBaApDQdJebuIzUt5/vJvqgzZ87QKKAOTUs6UdHD\njo3XoolJU4jV5M4s3Jel0WhwcHDI9N937NjBJ0MHMrH+379EDdp+n5PnLuXZQrN9+3b69+6Fk7Ul\ncSmpLFm2PNd+GY6KiuL27duUKFHC6KtBiYJHFrV4jp9/+YXOg0fQrHNvACytrZn14095utgu++03\nPps4nuTkZLr36sW0r7/N8UICzs7O/Hn6DCtWrECj0bCvdWujzSTNjtKlS7MwPJ7ENC32lhbsDomk\nrJE3Cq9VqxYbtmyjd/eu/BmWiKuHN+E3TPfcL6tCC+mv80TEp5Cq02NlbkZMspbElDRcXFwybK/T\n6ViyZAm3bt6kRs2a9OjRw6iLgMTGxvJa716sbFuJej6unHoYS8/+/bh2OzhXVoZyd3fH3d09F5IK\nkTkptk+pqgr/+AGS11cU2r17Nx+9/y4LW1bAw86KkZvWMMHSimlff5Pjazs6OjJkyJBcSGl4HTt2\nZPOGddRbuZmSro7ciE5g4zbjP2Nu27YtsZr8MWu7Tp06NG3ZmvGH9lDR1YJT4Sl8/PHHODs7/6et\nqqr06NKJ22ePUcVVYc1iHUcOHmDm7B+Nljc4OJiiTnbU80nvidcp6oKfqwO3bt0y2pKRQuSUDCM/\ndfDgQTp37UaP4WOwtLZm9axpzP3pxzy7UMQH7w6nyPndfFCnFADnH8Uz9EQEl27eNnEy41NVlfPn\nzxMZGUmNGjUK1A/g06dPM2zQm9y/H0ZAQABzFy3G09Mzx9fV6/WsX7+e4OBgatWqRcuWLTNsd+bM\nGTq2bs7MFl5YmpuhSdXxzvZ73LoTkis5XsTjx48pX7oke7vVooyrPXfikmi+9gwXr93Ax8fHKBmE\neFEyjPwcTZo0Yd2a1cz4YRY6nY75c3+mY8eOpo6VKScXV+4n/f2ay/2EFBwdHU2YyHQURcm1JQH/\nv4sXLxIREYG/v7/RistfIiIiaN+6FVMDfGlYqwpzLtygS4dXOHz8ZI5HXszMzOjevftz28XFxeFm\nb4Xl0yWZ7C3NsLexIiEhIdPPR2pqKrdv36ZcuXIZbl7/sjw8PPj2+xm0/nAUlb1cuRIRwxdffSOF\nVuQr0rPNpx4+fEj92jUJ8rTB08aCJdce8dvqtbRu3drU0QoEVVUZ8e5w1q5YThl3J65FxrN24yaa\nNGlitAzr169n3rhRrGpTEQC9quI37yB37oXh5uZmlAxxcXFUKl+WTn4W1Clqz94QDWeTHLhw5VqG\nhfTLL79k4rhPUAAUhZmzf+Ltt9/OlSx3797lxo0blC1bltKlS+fKNfOCR48eMej1AZw4cYLixYsx\nd8FiateubepYIptki70CKCIigoULF5KcnETnzl2oVauWqSMVGHv27GFov17s7VoDJ2sLdt+NZMSx\ne4Q+NN76ubt372bU6/040K0G5mYKEYlPqL70KNGxcUZ9rejKlSsMev01bgffobq/PwuX/kbx4sX/\n0+7q1avUrFaFiUG+VPG041SYhq+PPiA07KHRRwXyC1VVaVC3FkVTHtChjCOXHyXx6/UkLl29bvTF\nQETukGHkAsjLy4uxY8eaOkaBdPv2beoXdcHJOv1bpLmfOw82nc3VreOeJygoCK+yFeix7TL1PGxZ\nex8+i+4AABZNSURBVCeGMR+NMWqhBahcuTJHT5x+brvdu3fj7WhFFU87AOoUc8DRypwjR47k2bkP\nphYdHc3lK1f5uFNJzBSFoFLOHHuscvToUfmcFTCygpQQGfD392dvaCRhCSkALL/6gAqlS2ZaaM+f\nP0/LJo2oUrY0Q958nYSEhBxnsLCwYOvO3fT433jUpt35Zu5CJk6ZkuPrGkq1atWI0KQRm5z+jnOE\nJpW4J1oqV66cYfvk5GQmT5xAn25d+GzKlCx35imo7Ozs0Or0xKXoANDpVR4nPslwLee/HDp0iK+/\n/prly5ej0+mMFVXkkAwjiwLh7t27REZGUqlSpVxZXQjgu2+/YdKECbjZ24KVNVt27MrwneOH/9fe\nncdlVeZ9HP9c7JvIJioqmBLKuOSaWC7lMpGlYuVY01TTNtlUM1M9acvUlO3p06Q2aVk2LVa2ZxqZ\nNjqapiY2KrgrIEphgAg3AgL3ef6AmaEnUXl13/eBm+/7n/Jw7nO+L17Iz+tc1/ld331H/969uH9A\nHAPbhzNnaz5V8cl8vCzdJTlakovGjGL92jWcHR3E7sIKxqddxjvvvveT85xOJxeNuoCQIzlcEh/B\nJzlHsbokseyLlc3+tTtXe+QvD7Fw3hzO7+DPnlKLiK6/YPmXq07azev5ObN57OEHSYkLZn9JDfG9\nBrDks89d0vlLXENztuKVLMvinjv/xGsLF9KxbShHq50s+2Jloz2Bm6qkpITCwkLi4+NP2voQ4M03\n3+SDpx/ktTE9ADhR66TTvFUcKyvz+CPf5mDx4sVs3LiRkSNHMnHixJOes337diaMHknGVYPw8/Gh\nutbJOYs28eX6jfTo0cPDie23ZMkSNm7YQHxCAtdff/1Jf9ZqamoIDwtl9i870z4sgFqnxbR/HuGF\n1xc32w5vrZHmbMUrLV++nKWLF5Fx9blEBPnz1o58rpkyma07d7vk+hEREY12Vvq3kJAQCo+fwLIs\njDEcrazGx8d4bG63uZkyZQpTpkw55TnV1dUE+fnhWz+K9fMxBPr5UlPjuTabzcmECRNO+6ph3VaX\nFrGhdT9Xvj6Gjm0CKSoq8kBC+bk0Zyst2s6dO7mwcwQRQXW/gCYltWfnPs829rj44ospD4ng5pW7\nmPdtLmlLM5l2zzQ92juFPn36EBzdjnu/2s+6Q8X8z9p9RMd1bpWj2jMVHh5Or+Rk3so6SvmJWjbn\nO8gscDB06FC7o8kZULGVFi05OZlVh0r+s2H5R3sKSE7s3uj5O3bsIG1cKucN6McD907nxIkTjZ57\npoKDg/nn+g30vfImDvYYzoPPzuXhRx/92ddtLqqqqpg1axa33HQj8+fPx+l0nv5Dp+Hv788Xq/5J\nVXIKj+4/gdX7fD7/cpVLmmB4s0+WpXOkbSI3LTvIWznw4SefNtvNI+THNGcrLZplWUy7+07+/vIr\ndIwI5eiJxudsDx8+zMC+fbirbwd6x4Txv/86TLdhY1nw6t89H7yFqK2tJXXMKMpysugb7cuGghoG\nXJDK399YZHc0kWZJC6TEq+Xk5FBUVETPnj0bXY28YMECVs59ggWj6x5VllRW02PhWsorKvHx0UOe\nk9m0aROTL72I50a1x9fHUFnj5ObP8ti1dz8dO3a0O55Is6MFUq3cDz/8wMKFC3GUlTFh4kQGDx5s\ndySX6tq1K127dj3lOf7+/lQ02OT9eE0tfr6+jb5qYlkWa9eupaCggEGDBnHWWWe5MnKLUFFRQVig\nH74+dd+jQF9DcIAfFRWN79XrcDjIz8+nc+fOhISEeCqqSLOmkW0rcOTIEYYM6Mew6EA6Bvvy2q4j\nvPrWO4wbN87uaB519OhRBp3Th9TYQHpHhTAvq4C0397MjMcf/8m5lmVx7dVXsWbl58RHBJNV4ODN\ntxe3uu+Zw+Ggd3IPRsbUMqBDMKsOHifPP5ZNGf866QKwD95/nxuuv442Qf4cP+HknffUr1taFz1G\nbsVmzHiE7A//zuwL6x6ffpH9A0/uqyAjc4fNyTyvoKCApx5/jCPf5TPqoou54cYbTzqyTU9P5/br\nf81TI2IJ9PNhxw/HeXZLKUeKjtqQ+sxUV1ezcOFCcrKzGXzuuUyaNMklDSJycnK4ferv2Ld3D+f0\n68/z81+iXbt2Pznv+++/JzkpkYfOa0f3qCCyjhxn1uaj5OQdbtY7UmVlZZGZmUliYqI2AJCfTY+R\nW7HSkmN0Cf3vO59dwoMpc/xgYyL7tG/fnr/OmXva8/Ly8kiMDCLQr24ut2dMMMUlhzzaG7kpamtr\nuTT1lxTu207PtobFr77I5o0beOLpZ372tbt27crSz7847Xl79uyhS2Qo3aPqGnn0ig0hPLCMnJwc\n+vTp87NzuMOL8+fx0H3TGdo5hozvirlh6m088thPn3SI/FxaFdIKjJ84kQU7CvjqUDH7j5Zz//ps\nJqjJ+SkNHjyYLd85OFxa92rQ0r3H6NUzqVkWWoB169axL2srfx4aw5TeMcwYFsNzs2dTWlrqsQwJ\nCQnkFTsocNR9zw4eq6K4vOqkOwQ1ByUlJUy7+26Wp/Xj9TFJrLliAPOfn8vu3a5piCLSkEa2rcDI\nkSN5bv5L/M8D91FeXs5lV0zmiadn2h3LZZxOJ4sXLyY3N5dJkya5pDFC//79eXLWs/zxjjvwNYZO\nneL4NP1TF6R1j9LSUqJDA/6zkKlNgC+B/r6Ul5efsqm9KyUkJPDoE08y/YH76RodRnaRg+dfmEdk\nZKRH7t9UBQUFxISF0C2ibhFXdHAASe3acujQITXXEJfTnK20aE6nkz49kzhyKI/48GB2FjmYM28+\nN910k0uuX11dTWlpKVFRUc26QX5hYSG9eiYxJTGIPrHBfJ7tIMc3ls3fbvN47gMHDnDgwAGSkpKI\nj4/36L2borKyku7xnXlmSBfGJ7ZnQ/5Rrl6+k+279tChQwe743kFp9PJs/87i+XLlhLbvj2PPP4k\niYmJdsdyKy2QEq90//338+682Xx19XmE+PuyZF8Bt6/cSUlFpd3RPG7r1q3ccuNvOXgwjwEDBvDK\na29oA/LT2LRpE5dPGI/DUYavnz9vvrOY1NRUu2O5jNPppLCwkIiIiEY30nCnu+/8E8vfe4OJ3YLJ\nK6theV41/9qe6dXvaKvYildKS0sj5sAWnh1dt2fq8epaOr2wklqnfvbkzDidToqLi4mMjPSqfta7\ndu2qWzRXWEiN02LO889zww03ejRDm9AQ5oyNIzqkbq3DnIxiLv/Tw9x6660ezeFJjRVbLZCSFm3E\niBF8uv8IBeV1G4+/nnmIqDA1UpAz5+PjQ0xMjFcVWoBJ4y9hbGw1r09I4JlRHZl255/Yvn27RzP8\n/ykM6yTHGioqKmLFihVkZGTgbYM1LZCSFu2uu+5iRfoy+ixcTViAHyec8MGnS+2OJWKr8vJyDuQe\n5KlBdZtydA4PpF9cGzIyMjz6GtbUqVOZ+darTOwewsHSarKKa1iUlnbSc7/55hvGXTSWLm2DKCir\n4MKxqby+6G2vaaWqYistXvqKL8nOziY3N5eUlJRWuWG7SEMhISGEhYSwp6iSHjHBVNY42VdU4fEF\na089M5POnTuTvvRT2vfpwNcfP97o4rPrrr6K3yaHMjwhnKqatjz4z5V8+OGHXHHFFR7N7C6asxUR\n8UJLly7l2quvIjk2jNyjx7l4wiReeuXVZruqPjQ4iJcviSc0oO5x/qvbiki55m6mT59uc7KmUQcp\nEZFW5NJLL2XL1u1s2bKFuLg4hgwZ0mwLLUDf3r1YkZ1PWo8ISipq2Fxwglv797c7lstoZCsiIrY7\ncOAAqWNGUVpSTFnFCaZNm8ZfHplhd6wm06s/IiLSrNXU1JCXl0dERESz7Tx2Oiq2IiIibqY5W/Fa\nTqeT1atXU1hYSEpKSrNuESgirZNGttKi1dbW8qtJE9mZsZGzI8NYf6iIdz74iNGjR9sdTURaIY1s\nxSt99NFHHNy6mTWX9yfA14cvcwu55frr2HfwkN3RRET+wztac0irlZeXx8B2YQT41v0oD42LJO/7\nAptTiYj8mIqttGhDhgxhaXYhOceOY1kWz397kCEDvOfdvOZg79699P1FT+KiI0gZNJDCwkK7I4m0\nOJqzlRbvb3PnMm3aPfgaQ2L3bnzy2ed06dLF7lheobS0lC4dYxnaMZhzO4Wx8sAxsiv8OHyk0Gt6\n1oq4knb9Ea912x13UFJaRu7hfDK2ZarQutCiRYsI87W4dXB7BnUK457z4ygtPcbXX39tdzRxoZqa\nGh5/7FHGjBzGNVdNITc31+5IXkfFVryCv78/kZGRzbodXUvV8AHVv/9Xo1rvctvUW3j/pdkMJRfn\nztWcN2QwRUVFdsfyKnqMLCKNcjgcdO7QjkHtAxkcF8aXB45xuDqAvO9/UMH1ErW1tYQEB/HqhLMI\nq98EYOY3xdz84EyuvfZam9O1PHqMLCJNFhYWxtasXZS0iefN3RUEJvQma89+FVov1HBs5LROvcm7\nNJ1GtiIirdxtt97C2qXvc0nXYPYfq+brQsPWzB1ERUXZHa3F0chWREROas7zL3DtH6azNagH4QNT\n+XrTZhVaF9PIVkRExEU0shUREbGJiq2IiIibqdiKiIi4mXb9ERGRZmHnzp2sWbOGqKgo0tLS8Pf3\ntzuSy2iBlIiI2O6zzz7jN1f+isGdwvjOUUNEl0RWrl5DQECA3dGaRAukRKRV2LFjB+f0SibA35/k\ns7uRkZFhdyQ5A7//3U3cNTiK2/pHMmNYDI7D+3j77bftjuUyKratxObNm5k8cTzjRl/AywsWoKcO\n4o0qKytJHTuaYWHHWDSpG5fEVjHuorGUlJTYHU1Oo7D4KN0igwDwMYb4Nr4UFHjP3tQqtq3Ajh07\nuHjMKFLK9vObkGPMeug+Zv/1r3bHEnG5ffv24VNTxUXd2xLo58OIhHDahfiRmZlpdzQ5jeHnn8c7\nO0uorrXIKalk3aHjjBgxwu5YLqNi2wq88fprXNcjlpvPiefSxPbMuyCRl/421+5YIi4XHR1NsaOC\n0qoaAI5X11JQWkF0dLTNyeR03nh7Mcfb9eTKD/byl3WFzHpuLikpKXbHchmtRm4FfHx8cDZ4bKwm\n4+KtOnbsyB/+8Efue3k+A2KD2F5YxeQpV5GcnGx3NDmNmJgYVq5eQ21tLb6+vnbHcTmtRm4Fdu/e\nzbAh53LnOR3pEBrIkxmHuPOhGfz+ttvsjibiFitWrCAzM5OkpCTGjRunf1yKxzS2GlnFtpXYtm0b\nM594jPKyMi678tf85ppr7I4kIuJ1VGxFRETcTO/ZioiI2ETFVkRExM1UbEXklPbv30/qqAs4O6EL\nV0wY71WNBkQ8RcVWpJlbt24dL774Ivv37/f4vR0OB2NGDmd4bQFvXZBAp4KdjBs7mtraWo9nEWnJ\nVGxFmrFLUn/JmAtG8MT0P9KrZxIzZ8706P03b95MbIDhjgEJ9IgKY8bQ7hzJP0xubq5Hc4i0dCq2\nIs3Uu+++y5pV/+CFS85i7rizeHBEJ/58372cOHHCYxlCQ0MpPl5Fda0TAEd1LeVVJwgJCfFYBhFv\noA5SIs3Uhg0bSIoOJjqkbk/PPu1D8TEWe/fupVevXh7JMHDgQJL69mNKehYXdmzDx7klXDF5Mh06\ndPDI/UW8hUa2Is3U8OHD2V14nB/KqwHYku/AwnD22Wd7LIOPjw8fL0sn7Y/3caTvaH7/8JPMf3mh\nx+4v4i3U1EKkGZt82SSWLPmEiCA/jlXWMuu52dx+++12xxKRRqiDlEgLlZWVRVZWFsOGDSMuLs7u\nOCJyCiq2IvIj69evJzs7m759+9KnTx+744h4BbVrFJH/mHb3nfx64iV89PSDjBl+Pi+9ON/uSCJe\nTSNbkVZm27ZtXHzBCL6eMpCIIH/2l5Qz8t0M8guOEBYWZnc8kRZNI1sRAeDw4cP0jG1LRFDdK0Xd\nI0IJDwqgsLDQ5mQi3kvv2Yq0Mn379mXb90fZkH+UlLhI3tuVj29gEJ06dbI7mrRyK1eu5PP0dGLa\ntWPq1KlERETYHcll9BhZpBVKT0/nmquupLKqitiYaD78dBn9+vWzO5a0Yi8vWMCfp9/N2C6BfHcc\nDjrD2PztVsLDw+2O1iRajSwiP+J0OiktLaVt27YY85PfDSIe1aFdNPcOCqdbZBAAz2ws4pppj3LL\nLbfYnKxpNGcrIj/i4+NDRESECq00C8crKokO/u/MZlSQweFw2JjItVRsRUTEdmkTJvDi1hIOlVax\n8VAZX+WVk5qaancsl1GxFRER2734ykJ6XTiep7+t4LPicN7/eInHNtzwBM3ZioiIuIjmbEVERGyi\nYisiIuJmKrYiIiJupmIrIiLiZiq2IiIibqZiKyIi4mYqtiIiIm6mYisiIuJmKrYiIiJupmIrIiLi\nZiq2IiIibqZiKyIi4mYqtiIiIm6mYisiIuJmKrYiIiJupmIrIiLiZiq2IiIibqZiKyIi4mYqtiIi\nIm6mYisiIuJmKrYiIiJupmIrIiLiZn7uvLgxxp2XFxERaRGMZVl2ZxAREfFqeowsIiLiZiq2IiIi\nbqZiKyIi4mYqtiJuYox5wBiTaYzZaozZYowZ7OLrjzTGfHqmx11wv4nGmJ4N/rzKGDPA1fcR8UZu\nXY0s0loZY1KAcUA/y7JqjDFRQIAbbtXYCkd3rHxMA5YCu9xwbRGvppGtiHt0BAoty6oBsCyr2LKs\n7wGMMQOMMauNMd8YY9KNMe3rj68yxjxnjPnWGLPNGDOo/vhgY8x6Y0yGMeYrY8zZZxrCGBNijHnF\nGLOh/vPj649fZ4z5oP7+u40xTzf4zI31xzYYY14yxsw1xgwFJgDP1I/Su9Wf/itjzEZjzC5jzPmu\n+MaJeCMVWxH3+AKIry9CfzPGjAAwxvgBc4HLLcsaDLwKPNHgc8GWZfUHbqv/GsBOYJhlWQOBvwBP\nNiHHA8CXlmWlAKOAWcaY4PqvnQNMBvoCU4wxnYwxHYE/A+cC5wM9AcuyrK+BJcA9lmUNsCzrQP01\nfC3LGgLcCTzchFwirYoeI4u4gWVZ5fXzmcOpK3LvGGPuBTKA3sAKU9f1xQfIb/DRt+s/v9YY08YY\nEw6EA6/Xj2gtmvb39pfAeGPMPfV/DgDi6///S8uyHADGmCwgAWgHrLYs61j98feAU42kP6z/b0b9\n50XkJFRsRdzEqusYswZYY4zZDlwLbAEyLctq7JHr/59rtYBHgX9YlnWZMSYBWNWEGIa6UfTeHx2s\nm1OuanDIyX9/HzSl9du/r1GLfp+INEqPkUXcwBiTZIxJbHCoH5AL7Aba1Rc7jDF+xphfNDhvSv3x\nYcAxy7LKgLbA4fqvX9/EKMuBPzTI1e80538DjDDGtK1/5H15g6+VUTfKboz6s4o0QsVWxD3CgNfq\nX/35F5AMPGxZVjVwBfB0/fFvgaENPldpjNkCvADcUH/sGeApY0wGTf87+yjgX7/gKhOY0ch5FoBl\nWfnUzSFvAtYC2cCx+nPeAe6pX2jVjZOPwkXkJNQbWaSZMMasAu62LGuLzTlC6+ecfYGPgFcsy/rE\nzkwiLZ1GtiLNR3P5l+/Dxphvge3AARVakZ9PI1sRERE308hWRETEzVRsRURE3EzFVkRExM1UbEVE\nRNxMxVZERMTNVGxFRETc7P8AXsVpWPQcdcQAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_itml, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Sparse Determinant Metric Learning\n", - "\n", - "Implements an efficient sparse metric learning algorithm in high dimensional space via an $l_1$-penalised log-determinant regularization. Compare to the most existing distance metric learning algorithms, the algorithm exploits the sparsity nature underlying the intrinsic high dimensional feature space.\n", - "\n", - "Link to paper here: [SDML](http://lms.comp.nus.edu.sg/sites/default/files/publication-attachments/icml09-guojun.pdf). \n", - "\n", - "One feature which we'd like to show off here is the use of random seeds.\n", - "Some of the algorithms feature randomised algorithms for selecting constraints - to fix these, and get constant results for each run, we pass a numpy random seed as shown in the example below." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/bhargavvader/Open_Source/metric-learn/venv/lib/python2.7/site-packages/sklearn/covariance/graph_lasso_.py:252: ConvergenceWarning: graph_lasso: did not converge after 100 iteration: dual gap: 2.377e-04\n", - " ConvergenceWarning)\n" - ] - } - ], - "source": [ - "sdml = metric_learn.SDML_Supervised(num_constraints=200)\n", - "X_sdml = sdml.fit_transform(X, Y, random_state = np.random.RandomState(1234))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdcVfX/wPHXYW8ERDaoqDhQwb1x5h65zVw5MlelmSMr\n00zNbOfKlZozR27JvTV3KuEAt6DI3nLv+f2BX8vfBVMucAXez8eDRx563895H9PefD7nMxRVVRFC\nCCFE3jEydAJCCCFEYSfFVgghhMhjUmyFEEKIPCbFVgghhMhjUmyFEEKIPCbFVgghhMhjJnnVsKIo\nsqZICCFEkaOqqvL/v5dnxfbJDfOyeSGEEOKVoig6dRaQYWQhhBAiz+Vpz7aoS01N5euvvyb06jWq\nBVRl+PDhmJjIb7kQQhQ1Sl4N9SqKohblYWSNRkOzFq+RqpjhX7cxf/6xhQq+Pqxe+auhUxNCCJFH\nFEXJ/3e2Rdnp06cJv3mLL9bsxsjYmIZtO/Nu2zrcvXsXDw8PQ6cnhBAiH8k72ycyMjKYMXMmbdt3\n4O2h7xAREaFXe2lpaVja2GJkbAyAiZk55hYWpKWl5Ua6QgghCpAiUWyvXLnCsWPHiI+Pzzbm7aHv\nsHLDZsoFteNeikq9+g2eG/9fqlevzuPkRNbP/5rrl86xfNYn+Hh7U7JkyRy3KYQQomAq1MVWVVWG\nDR9BvQaNeGvocMr5lef8+fM6cWlpaSxfvoz3Zi+iVrM2vPHeJBzcvAgODs7xva2srDiwby/aqNus\nmTUJZ3PYuX0bRkaF+rdcCCFEFgr1O9tt27axc/devtywH0trGw5tXU/vPn25eOHZgvu/iVzKvwqh\nkZGR3uuEPT092fDbOr3aEEIIUfAV6m5WaGgoFWrUw9LaBoCaTVtx7eoVnTgLCwt69OjJDx++zdlD\ne/htziwib16nRYsW+Z2yEEKIQqhQF9tKlSrx1/EDJMbFAHB0xybKV6iYZeziRQtp3yyIPzevwCo9\njqNHDlOsWLH8TFcIIUQhVejX2X44bjzz58/HwbkEmrRUdu3cQcWKWRdcIYQQQh/ZrbMt9MUW4M6d\nO0RHR1O2bFksLS3z9d779+/n2rVrVK5cmdq1a+frvYUQQuSvIl1sDeX90WNYt2EjfgE1uXjyMGPH\njOaDMWMMnZYQQog8IsU2n12+fJmgJk2ZsW4vVrZ2RD+4z/huzbl5IxxHR0dDpyeEECIPyHaNL+DA\ngQMcO3YMd3d33njjDb0ODYiIiMDdpzRWtnYAOJZww97RiYcPH0qxFUKIIqZQz0Z+Gd//8AM93niT\noyE3mPXDXFq3bYdGo8lxe1WqVOFO+FXOH9mHqqoc2roeNBmyg5QQQhRBMoxM5r7ItnZ2zFi7mxIe\n3mg1Gj7r34HvZs2gdevWOW734MGD9HqjN5ER9ynlW4b169ZSpUqVXMxcCCHEq0SGkZ8jNTUVVatS\n3M0TACNjY1y8ShIdHa1Xu40aNeLundukp6djZmaWG6kKIYQogGQYGbCxsSGgWjXW/jiDhNgYzh7a\nw6WTR6hfv36utC+FVgghijYZRn4iIiKCLt26c/bMGYoXL84vS5fQpEkTQ6clhBCiAMluGFl6tk8c\nOHCAkJAQaga1QDE1Y8kvy/Q+iEAIIYQA6dkCoNVqKebgyIT5ayjpV4n01BQ+6dOWJQvm0bRpU73a\njomJ4e7du/j4+GBra5tLGQshhHgVSc/2OZKSknicno5Pucw9k80sLPEpV5F79+7p1e7yFSsoWaoU\n7Tp1xrtkSXbt2pUb6QohhChgpNgCtra2lCxdmuDVS1BVlZtXLnPx5BFq1KiR4zbv3LnDqFHvMmnR\nBqav28uoWT/Tq3dvkpKScjFzIYQQBYEs/Xli86aNdOj0Omt+mIGpmRkLf15A+fLlc9xeaGgo3mXL\n41m6HADlA2thbWvH7du39WpXCCFEwSPF9omyZcsScukiCQkJWFtbY2SkX6ff19eXW9dCeXjvNs7u\nXtz4+yLxMTF4eHjkUsZCCCEKCim2/09uTWIqWbIkn0+ZwqQ+7XAvWZp7N8JYtPBnmSQlhBBFkLyz\nzUNly5YBVK6HXMLCwkL2RRZCiCJKlv7kkQcPHlC+QkVGzJxHhep1OLlnO6u+nsyNsDAsLCwMnV6e\n27FjBxs2bsLWxob33nsXb29vQ6ckhBB5Tpb+5LOLFy/i5VuOCtXrAFCrWRtMzCy4efOmXu1++eWX\nFHMqjrWtHf5Vquq9f3NeWLZsGf0HDuaxgyfXYlKoWas2d+7cMXRaQghhMFJs84iHhwd3blwnITYG\ngAd3bxEX/YgSJUpkGb9//37efPNNhg0bRlRUVJYxGzdu5JPJnzHo41lMXb4VU3tn6tZvkGfPkFPT\nps9k6Off07LnAHqNmkhg45YsWbLE0GkJIYTByASpPOLn58fQIUP45M02lK1SjZDTJ5gxfToODg46\nsQsWLGDku+9Ro/FrRF++Smnfsvwdcgl3d/dn4hYvXkyj9l2pHtQCgLcnf8WoNnXy5XleRlpaKjZ2\n9k+vrWztSUtLM2BGQghhWPLONo+dPHmSa9eu4e/vn+1Zto7OJXhzzGTqtuyAqqp89+FQnMxUtm/f\n/kxc7969CbkdydjvlgIQHvIXUwd3IyUpMa8f46V8NOljNm7bSY93JxEdeY/lsz5hzx/BBAYGGjo1\nIYTIU3KerYHUqlWLWrVqPTcmLS0NH79KQOZ/qNKVqnL16G6duJkzZ1K+YiW+Hz8MT18/dq5cRM8e\n3fMkb31M+WwyFhbmrP9+CjY2Nqxft1YKrRCiSJOe7SugQiV/7Nx9eOezb4mNiuSzgV14d/g7TJ48\nWSf25s2bvPPOO0THxNCta1fGjBmTr7lqNBru3r0rs4uFECIL2fVspdi+AiIiIqhdtx53bt3CyNiI\ntm3bsmnjRkOnpWPIkCEsXbYcTcZjTM3MmTZ1Sr4XeyGEeJVJsS0AMjIyMDIy0nuryJcVEhLCosWL\n0WRo6Nu3T5ZDvnv37qV1m7aM/W4pFWvW49C29Sz+YiL3797B0dExX/MVQohXlayzLQBMTEzyvdBe\nuHCB+g0bEhabzp00I5o1b8GRI0d04lasWIFXmfJUqlUfRVFo1K4r5hYWOpO4hBBC6JIJUkXcV19/\nQ5s+Q2nbdygADs6ufDFjJtu2bH4mzt/fn7XrN5KcEI+VrR0P790mJSmJgIAAQ6QthBAFivRsi7ik\npCRsHZyeXts5OJGckqITN3r0aOxsbfigSxO+/XAoE3u1pnLlyvj7++dnukIIUSBJsS3ievXozqaf\nv+XSn0cJPfcn636cQa9slhNt27IZEwVO79+Fc3EnNm5Yn8/ZCiFEwSQTpASfffYZX3/9DVpVy6CB\nA/n6669RlGff70dHR1OhYiW6DB9P1fqN2bdxJWd3b+HSXxcwMZG3EUIIATJBSmTj4MGDTJ85k8r1\nm1C9aRvmzJvP5s2bdeJOnz6Nq09pGrbrgp2DEx0GjCA2Lo7bt28bIGshhChYpEtSxA0dNpzWvQfT\nfdhYALzLlGfU+6Pp2LHjM3EODg5ERdwjPS0VM3MLEmJjSEpIwM7OzhBpCyFEgSLFtoCJiYlh3rx5\nPIqOplXLljRv3lyv9pKTU/AoVebptXtJX1JTU3XiqlevTv26dZgxtAd+1epy9kAwI0eOxMnJSSdW\nCCHEs+SdbQESHx9PjZq1cPerjIt3afZv+JVpUz9j0MCBOW6zX79+BO87yNgflmFiYso3YwZRoUxJ\ntm/dqhOr1WpZvXo1169fJzAwkHbt2unzOEIIUejIDlKFwLx581i2YSsjZ84D4MbfF/nhg0Hcu5vz\ng9m1Wi2NmzThxImTqKhUqlSJE8eOYWZmlltpCyFEkSETpAqBpKQk7Iv/c/i8QwlXkpKS9GozJCSE\n02fOUrK8P2UrV+Pvv0M5ffq0vqkKIYT4F+nZFiCXLl2iYaMgBkyaiZuPL7/9NJNyXq4sW7okx20G\nVKtOibKVGTD+cwDWzZ3NhQPbuRYamltpCyFEkSHn2RYClSpVYumSxQwaMoTk5GTq16vH/Llz9Goz\nJjaWhoE1n177BdTgyNa1+qZqUJGRkWzcuBFVVenYsSPu7u6GTkkIUcTJMHIBEhsbS/+3BuLg6kWt\nZu04eOgwX3zxhV5tVqnsz44VC0lOTCA9NYWty+ZRxtc3y9irV69SJSAQd09v6jdoSHR0tF73zgs3\nbtwgILAaq7b+wdodewmoVo2rV68aOi0hRBEnw8gFyIABAzh+IYSPf16HoiiEnD7OV+8NIDkxIcdt\npqenE1i9OiGXLoMCJUuW4tzZMzrrZ+Pj4/H09qFmszYENmzO3g2/EhEWyp1bN/P9pKLnGTBwEAmm\ndnR5ezQAW5bOQfMgnDWrVhk4MyFEUSATpAqByMhIfMpVerqVolcZPx6np+vVZlpaGra2dniV9qVU\nWT+sbWxIz6LNJUuWYOvgxMCPZlA9qAXvf7WAmJiYV24y1cOHD/EoXe7ptaevH1FRjwyYkRBCSLEt\nULp168ahrb8RdvkCqSnJrPzuC1zd3LKMDQ8Px83dE1t7B4o5OrFs2bIs46Z+Pg1zJ1e+WLObqSt3\n4VWpGuMmTNSJy673+v/3UDa0Vq+1YMfyeTyKvE/Mw0i2Lv2Jlq+1MHRaQogiToaRC5gRI0awcPFi\nHqen4+rmweGD+ylVqpROnF0xR8pXq03bvkP5++wJNiz4lqOHD1G9evVn4jq+3hmf2s2p27IDAH8d\nP8ih1Qs4dGD/M3Hx8fF4eftQo2kbAhs1Y++Gla/kMLJWq2XiR5OYM2cOqqoyaNAgvpr1JcbGxoZO\nTQhRBMimFoWMVqvNtshduXKFipUqseRIKCammZtTTB3cjbIeJfj999+fif108mR2HjzOiBlzMDIy\nZuGUD/Av6c4P33+n0+7ly5dp3uI1klNScHJ05OSJ47JdoxBC/Iu8sy1kntebtLa2RlUhLTXzEHhV\nVUlJSsTCwkIndsL48ThZmfBumzq8164OGXEP+WLa5zpxGRkZvDN8BN7lK9N56Bgs7R0Z/cHY3Hug\n/3D16lVK+pbB2tYOpxIurF2bv8uTNBoNYWFhPHz4MF/vK4QoHGSdbSHk4eGBc4kSTB3cnZY9BxBy\n6hgRt27w9e6dOrFGRkaYmprxOD0NjcYYU1PTLAv5yZMnuXXnHp+v2oWRsTEN23ZhVJtafDljOi4u\nLjnONT09nU2bNhETE0Pjxo3x8/PTidFqtdSp34DAoJa8++ZgQs6coG+//lSqVIlKlSrl+N4v6u7d\nu7zWqjVRjx6RlJjAoIGD+Obr2a/c+2ohxKtLeraFVNi1q6gpiaz6bhoXju4leOd2PDw8dOJmzJzJ\nvegEfvrjDD8FnyHD3JaPJn2sE5eamoq1nR1GT959mllYYm5umeUJQS8qNTWVoCZNmTrrG9bt2k/d\nevUJDg7WiQsPDychPo4B4z/H1bsUTTr1xNc/MNtJX7ntrUGDqVCvGd9sPc43W46yZWcwv/32W77c\nWwhROEixfUmqqjJ9xgycS7jgWLw4o8d8gEajMXRaOoaPGImDmxdvTZxO/dadGThoMImJiTpxp8+c\npW7r1zE1M8fYxIQG7bpy+swZnbhatWqRGB3F74t/IDzkL5bP+gRf39J4eXnpxCYnJ/Nm337Y2RfD\nzd2DRYsXZ5njihUrSFdMmTB/LQM/nsXbU79j+MhROnH29vZoNRoS42IA0Go0xDyMzLf3xefPnSOo\nY08URcHGrhjVGrfi7Nmz+XJvIUThIMX2Jf3yyy8sWLyUCQvW8dmybQTvP8SMmTMNndYzkpOTWbVq\nJe99vYg6LdrRZ+xnWDkUZ8+ePTqxZXx9uXj8IKqqoqoqfx07QJkyZXTibGxs2Ld3D9dP7OfHD98m\nPeouO7ZtzXLIeeS77xEWEcWsjQcYNXsREyd9zN69e3XiIiMj8Shb4elwrI9fJR4+eKATV7x4cerU\nqcsnfTvw++IfmTH8TR6nJDJqlG5hzgulSpXm/JF9AGQ8TufvU0ey/D0SQojsSLF9Sdt27KJVn6G4\n+ZSmuJsHHQa/x/Yduu9CDUlVVVCfnURlbGKKVqvVif3k40nE3rnOJ71bM6VfB66eOszM6bpbQKqq\nyofjJ5CYrqF2y46E3bjFl7O+yvL+wcHBdB0+HjsHJ0qW96dRp17symJ4OCgoiOM7N3En7AqP09PY\nOP8bgho3zrLNgwf282aProSd3IeflwvXr17JcsJXXlj483y2L/2R6UO6MaFbc3y93OnXr1++3FsI\nUTjIBKmX5OToQMTN60+vI25cx9HR0YAZ6bK2tqZ9x478NGEYTbv149r5U0TduUHTpk11Yu3t7Tl+\n9AgnTpxAo9FQu3ZtLC0tdeJOnz7N0eMnmL52N6Zm5rTqPZgxHRsw9oMxOsO5Do6O3L9xHTfvzPW/\nD26FUyuojk6bDRo04IvPp/LBoK4kJSbQtHkLlvy6IstnMjIyYvbs2Tn57dBbpUqVCLl8idOnT2Nn\nZ0f16tVlcpQQ4qXIOtuXdOvWLWrXqUv5GvUwNbfg9P6d7NuzhypVqhg6tWekpaXx2ZSpHD56FG8v\nL2Z8MQ1PT88ct/fHH3/wwaTJjJ+3Bsjs6b7frg7HjxzW2VRj9+7ddO/RkzotOxLz4D4xd29w4vgx\n7O3ts2xbVVW0Wq1sPCGEKPBkU4tcFBkZyZo1a9BoNLz++uuULFnS0CnluejoaCpUrETn4eOoWq8x\n+zet4tyerVy8cB4Tk2cHSDIyMnhn2DC2bd+BhYUlX305g86dOxsocyGEyD9SbIXezp07x1uDBhMe\ndp2qAQH8smQJPj4+OnFjPxzHzv2HeOP9T3gUeZ8l08axc/s2atasmUWrQghReEixFfnGp1RpRn61\nCI/SZQH4be5syjtbMW3atBy3qdFo+P777zly7Dg+3t5M+mgiDg4OuZWyEELkCtmuUWRJVVW+nDUL\nd08vXN3c+fjjT7KctfwyLK2siI+JenodHxOFlZWVXm0OeXsoi1etw6VKfc7fuEfDoMakpKTo1aYQ\nQuQX6dkWcUuWLuWzaTMYPv0njE3NmP/xKIb078OY0aNz3Obq1asZ+d77tOg5kJjIe5w/GMyZ06dw\ndXXNUXuJiYk4O5dgzu6zWFhZo6oqXwzuypdTP6VNmzY5zlMIIXKb9GxFljZv2Urb/sPx9PXDzbsU\nnYaMZsvWbXq12bNnT9as/BVnNZHqvu6c+vNkjgstZA4hK0ZGmJiaApl/mM3MLcjIyNArTyGEyC+y\nzraIc3Rw4OGdm0+vI2/fpFg2S3ReRtOmTbNc15sT9vb2NGvenLmTRtG0a19Cz57g0b1bBAUF5Ur7\nQgiR12QYuYi7fv06devXp2qD5piYmnJy91b27t5N1apVDZ3aM5KTk/lo0sccP3ECb28vZs2cibe3\nt6HTEkKIZ8hsZJGtO3fusGrVKjQaDd26dcPX19fQKQkhRIEkxVYIIYTIYzJBSgghhDAQKbZCCCFE\nHpNiK4QQQuQxKbZCCCFEHpNiK4QQQuQxKbZCCCFEHpNiK4QQQuQx2a5RFBh79+5lx44dVKxYkQED\nBhg6HSGEeGGyqYUoEN5//33mzl9AuarVuRl6mbJlfDl18oSh0xJCiGfIDlJFUHp6Onfu3MHZ2Rlb\nW1tDp5NjycnJ2BdzYOryLfiUq0hyQjyjX2/Et1/Nkh6uEOKVIjtIFTFnzpyhpJcH9WsE4uZSgp9/\nXmDolHIsPDwcYxMTfMpVBMDK1o6SfpW4ePGigTMTQogXI8W2EFJVlY7t2tC7jBnzW7nzVTN3Jnww\nhkuXLhk6tRzx8/PDSFE4tHU9ALeuhHDl/OlsD44/dOgQPXu9QY+evdi7d29+piqEEFmSYlsIxcTE\nEBsbS0MfOwDcbc2o5GrDhQsXDJxZzpiYmLDsl6UsnfERA+r7MalPOwYPGkizZs10Yg8ePEjH1ztj\nXboydmUD6dazF8HBwfmW68qVK/H08sbWzp4evXqRmJiYb/cWQry65J1tIaTRaCjuWIwJtZ0oX9yS\nxHQNH+yLZOP2YGrXrm3o9HIsIyODv//+m9KlS2NlZZVlTLcevbAvG0CzLr0BOLR1PTdP7mHbls16\n3fvMmTPs3LkTOzs7+vTpg729vU7MsWPHaN/xdd796mdKePqw4qtP8XK0ZuWKFXrdWwhRcMg72yLE\n2NiYFStXM+PEI6Ycj+G93ffp89bgAl1oIbOH6+/vn22hBdBoMjA2+WdFm4mpKRqNRq/7btu2jeav\nteR46C1WbdlFzdp1iIuL04kLDg6mQfuulKkciJ2DIz3fnZivvWohxKtL1tkWUuXLl8fdzY2zV65R\nzNaGhkGNDZ1Svnh78CDe7NsfMwsLjIyMWfP9NObP+UmvNsd+OJ63p3xLlbpBAMz5aCSLFi1i9OjR\nz8Q5OTkRefgkqqqiKAr3b4Th4OCo172FEIWDFNtCSFVVOrZpxRtuJgxr1YJT9+Po1fsNjp85i6+v\nr6HTy1MtW7Zk6eKFfP/jT2i1Wub99COdOnXSq82Y2BhcvUs9vS7h6UNMTIxOXP/+/Zm34Ge+GzOQ\n4h4+HNu5iWVLl+h1byFE4SDvbAuhmJgYfDzcufN20NPv9d19hZ4ffUHPnj0NmFnBNHDwEC6F36Hv\nh1N4eO8uP457m43rf6NBgwY6sUlJSfz666/ExcXRokULAgICDJCxEMJQsntnKz3bQsjW1hYVuBKd\nSDlHG1IzNIQ8jMfV1dXQqRVIP37/He8MH8HHvVtjY2PL999+k2WhBbC2tmbIkCH5nKEQ4lUnPdtC\naumSJYx7/11alHLm7IMEqjVswrJVq1EUnR+4hBBC5BLZrrEIOnfuHKdOncLT05OWLVtKoRVCiDwm\nxVYIIYTIY7LOVgghhDAQKbZCCCFEHpNiK4QQQuQxKbYiz8TFxZGWlmboNIQQwuCk2Ipc9+jRI4Lq\n1cHT1YVidrZM/vhjQ6ckhBAGJZtaiBeWnJzM17O/IuxKKAE1azF8+AiMjY114t4Z+Bbl0x7y+9tB\nRCWn027hXKpWq8brr79ugKyFEMLwpGcrUFWVW7ducePGDbJbrpWRkUHr5k05tWohVe+fY923M3mr\n75tZxp44cZxhVT0wUhRKWJvTrbQDx48d1SvHhw8f0qV9W7xcnKlbPZCzZ8/q1Z4QQuQnKbZFXFpa\nGq+3a0ONypWoE1iF5kENszzw/MSJEzy8GcbS1yowoLIXa9tUZPPvvxMREaET6+npybG7sQBoVZWT\nD1Pw8vbJcY6qqtKpbWtcI0LZ3rYi/Z01tG7ejAcPHuS4TSGEyE9SbIu4GV9MI/36RS73q0tI37o4\nx9xh0vhxOnFpaWnYmZti9GQXKksTY8xMTEhPT9eJ/XHBQiafukOv4FCabjhPeglvBg8enOMco6Oj\nuXjpEtPq++Jjb0mvCu4EuNhx5MiRHLcphBD5Sd7ZFnEXTp+iW2lHzIwzf+7qWbY4350+pRNXu3Zt\nHmmMmXkynObeDiwPfUC58uXx9PTUiQ0MDOTcpcscOXIEGxsbmjVrholJzv+oWVlZ8Vij5VFKOs5W\n5mi0KvcTUrC1tc1xm0IIkZ+kZ1vE+fqV5487cWhVFVVVCb4VS5ny5XXirK2t2Xv4CFeK+zHmQiyK\nfz0279iFkVHWf4RcXV3p0qULLVu21KvQAlhaWjL2g7G03fwXX564Tvcdl3AuXY7GjRvr1a4QQuQX\n2Ru5iIuPj+e1JkEkRNzF1NgIrbU9ew4extnZ2dCp6di0aRMnjh/D26ckAwcOxMzMzNApCSHEM+Qg\nApGtx48fc+rUKTQaDTVq1MDCwsLQKQkhRIEkxVYIIYTIY3LqjxBCCGEgUmyFEEKIPCbFVgghhMhj\nUmyFEEKIPCbFVgghhMhjUmyFEEKIPCbFVgghhMhjUmyFEEKIPCbFVrB82TJqVqlEdf8KzPnpp2zP\ntBVCCJEzcupPEbdhwwYmjX6X74PKYGak8O7UTzA3N2fgoEGGTi1f/PDDD1y4cIEWLVrQvXt3Q6cj\nhCikZLvGIq5n5040TrrOGxU9ANgR9oBFMVYEHzxs4MzyXs2AKty5doVabsXYf/sR7bt0ZcWvqwyd\nlhCiAMtuu0bp2RZxFpZWPHr0+On1o5THWFpZGTCj/LFq1SrCQv/m/IBG2JmbEBabTO1la/hq9je4\nuroaOj0hRCEjxbaIe//DcTQPakRiugZTI4W5FyPYuG2uodPKc1euXKGcow125pl/BUoXs8LazITr\n169LsRVC5DqZIFXEVa1alf1HjpJWuw1xgS3YuXcfDRo0MHRaea5t27acfxDP0bsxqKrKr5fu8Fir\nUr16dUOnJoQohOSdrSiypk2bxueTPyFDo2JpZsKSX1fRpUsXQ6clhCjA5Ig9If4fM2MjrC0sqObj\ngpm5OUZG8tdBCJE3pGcriqSQkBAa163NwW7VcLOx4GxkHJ22/MW9Bw+xtLQ0dHpCiAJKerZC/EtY\nWBhVXB1ws7EAINDFHiszEyIjIw2cmRCiMJJiK4qkChUqcO5+NKHRiQDsvhFFBgru7u5Zxm/dupXB\n/fsx5v33uH37dn6mKoQoBGQYWRRZy375hZHD3sHOwow0Lfz2+2YaNWqkE7do4UKmjB/LqCpu3EpM\n47cb8fx57ny2hVkIUXTJMLIQ/4+iKKiAFhUtKoqi8/cDgJmfT2Fxcz8GV/Vmav2ytPSwYdmyZfmb\nrBCiQJNiKwqd/fv30+P1jnTv2J7g4OAsY8LCwnh3xDCmN3Zlbkt3RlS1o0unDqSnp+vEpqenU8zc\n9Om1vakxqakpeZa/EKLwkWIrCpX9+/fTo1MH6idcIyglnL49urFz506duJCQEMo62+Jlbw5ANXcb\nFK2Ge/fu6cS+0acvIw5e48S9GH77+z4rQh/QubOsxxVCvDjZrlEUKvO+/5aPanjRv7IXAGZGRsz5\nZjatWrV6Jq506dJcj0ogOsUGR0sTrkenkpqhwcXFRafNqV9MZ3hMLP1/34SVlRVLV66iSpUq+fI8\nQojCQXq2olBRtVqMjf5592pspGR5Pm+FChUYO34io/fc59Oj0Uw5+pAlvyzPco3ttm3b2Lh2NT1K\n2hFgrTKUTV2EAAAgAElEQVRm5AhiYmLy9Dn+be3atTStV4cm9WqzZs2afLuvECL3SM9WFCqDho+k\nT/eumBkZYawoTDpxg/m/fJ5lbEC16ihGRly6E4WnmxvlypXLMm7iB6NZ0MyPJt5OAAzeHcLixYsZ\nM2aMXrmGhYVx4cIFfHx8CAwMzDJm06ZNjHlnCLPql0ZR4INhb2NiYiLbSgpRwEjPVhQqLVq04LMZ\nX/LN1Xi+Co1l3KdTaN++vU7c3bt3ebNHN1a0KM/DEc0ZW9GB9q1eIyMjQyc2Pj6ekvb/9HhLWpsS\nG6tfz3bN6tXUDqzK/Inv0b55EyaMHZtl3C8L5jG5ljdtfEvQunQJPqvlw/KFC/S6txAi/0mxFYXK\n2bNnmTRuLB2cTXnd1YzPP/2Y48eP68SdP3+eqq4O1Pd0RFEUelf0IC05ibt37+rEtmnXjolHw7kd\nn8LRuzH8EvqQ1q3b5DjHtLQ03h48kE3tK7OmZXkOd6/G8sU/c/bsWZ1YUzMzkh5rnl4nZWgwMTXV\niRNCvNr+cxhZURRzoAtQ8t/xqqpOybu0hMiZr2dOZ0yAB8MCfQAoYWXGrGlTWb9l2zNxbm5uhEbF\nEZ+WgZ25CeFxySSkpuPk5KTT5rc/zqFHl87UXrkXC3Nzvvr2O+rVq5fjHB89eoS5sRGVne0AcLQw\no7JLMW7evKkznDzqgw/p3K4NyY81KIrC7HN3Wb+l8J83LERh8yI929+BjkAGkPSvLyFeOcmJiRS3\n/KfnV9zSjJRk3T+ugYGBdO7xBo3Xn2XIvqu02nSeWbNnY2NjoxO7ePFi/ggOpoVXMbwtYOy7I4iO\njs5xji4uLphbWrE+9D4AFx8mcOruoyxnODdo0IDNO4O5VrIWV7yr8/uOXTRs2DDH9xZCGMZ/bteo\nKMpFVVX9X7ph2a5RGMCaNWuYMPIdfmhUBmMjhVEHrzFx+lf0HzBAJ1ZVVQ4ePEh4eDgBAQEEBARk\n2WZxWyu+bexHhzIuqKpK542ncazVhHXr1uU4zzNnzvB6uzakJCWRptGyYOEievTsmeP2hBCvhuy2\na3yR2chHFUWprKrqX3mQlxC5qkePHlz66y8Gz/kRFZU+A4fQr3//LGMVRSEoKIigoKDntpma9piA\nEnZPP1PLrRiHstj84mVUq1aNsNt3efDgAY6Ojpibm+vVnhDi1ZbtMLKiKH8pinIBaACcURQlVFGU\nC//6vhCvnL179/LVlzMIcrGimZstP377Ddu2bfvvDz6Hu5sL049fI12j5UZcMkv+ukPrNjmfIPU/\nxsbGuLm5SaEVogjIdhhZURSf531QVdWbz21YhpGFAVQq40snZyPG1fEF4PvT4Sy7mcSVm3dy3ObN\nmzdpWLM6d6MeYawodOjQgd82bsqtlIUQhchLDyP/r5gqirJcVdU+/6+x5UCfLD8ohAGlJidS1tHt\n6XU5B2tSQx7q1aaPjw+3HkSRnJyMhYUFRkayYk4I8XJe5P8alf59oSiKMVA9b9IRQj91gpryxbFr\nXI9N4kZcMlOOXqN6vfpZxoaFhdGh1Wv4l/Wl3xs9/3OGsZWVVa4W2vT0dC5cuEBiYmKutSmEeDU9\n753tBEVREoAqiqLEP/lKAB6QuRxIiFfO8l9/xaNSAPVXHKPO8iPYl/Zj3fqNOnHx8fE0bVifGsm3\nmVfLBZO/T9Khdcss91HOC6tWraKYjRV1qgfiaG/H+++9my/3FUIYxoss/ZmuquqEl25Y3tkKA4iI\niKBh7Vr4WICxAlcSMzh0/CSenp7PxAUHBzNl2Ftsb5+5qk2rqpRbcpSzl//Gw8Mjx/e/desWX82c\nQfSjKDp07kr37t11YtLT0ylmY8Wwmi408rEjLCaVCbtvseOPPTRu3DjH9xZCGF5272yf17OtpihK\nNWDd/3797688zVaIHJryyce0cjFjQ5uKrGtdke7eNkwa96FOnIWFBbGp6Wif/ECY9FhD6uMMLCws\nsmz38ePHXL58mTt3sp9odf/+fWpVr8b9g79RLOwQY4YN5ofvvtOJu3jxIgoqjXwylxOVdrCgnJMl\nu3btyskjCyEKgOets5395J8WQA3gPKAAVYBTQN28TU2Il3fv9k06l/hnF6iaJWxZcPuWTly9evUo\n7uNL310hBLnZsDYsml69emW5XePNmzdp3awJaQlxxCan0vON3vw4bz6K8uwPr7/++isBjgp9Kme2\nUcbJglkzv2Dku88OEZcpU4Z0jcqNmFRKOliQmK7hRmwq/v4vvXeMEKKAyLZnq6pqE1VVmwD3gWqq\nqtZQVbU6EAjo7tYuxCugXlATFoY8ICE9g6THGcy7HEG9oCY6cSYmJuzYvZd6/d7hSqnavDXhM+b+\nvDDLNof070tXNzPOvlGT833qcnT771meK5uWloaF8T8F2NLEiPT0xzpxdnZ2DH1nGON23+KTfbd5\nZ2sYFStXpXfv3no8uRDiVfYi72wvqar6/2ck63wvi8/JO1uR7zIyMhg2ZBC/LP8VgJ7du7Jw6TJM\n9Tgpx8O5OMEdKuFll3nM3szj11Abd+OL6dOfiQsNDaVurRq8Wd4GV1szVv6dQJueA/jyq9lZNcue\nPXsIDg6mcuXKvPnmmznOLyfi4uI4d+4cdnZ2BAQE6PTShRA5k9072xcptqvIPHhgxZNv9QZsVFXt\n9R+fk2IrDCYtLQ1VVbN9B/sygurWpr1lIkOqepGaoaHj1osMnTyDfv366cTOnz+f998dharNoFz5\nipw5ew5jY2O9c8hNly5dokXTxhS3MOJRYioNGjdj1brfZP2wELlAn2JrAbwDNHryrYPAXFVVU//j\nc1JsRaEQGhpKy6aNcTBWiUpKpWHTZqxYs06nOB0+fJjmjRtR08MGd1sztl2JoUwFf86dP2+YxLNR\nu3oANU0e8JqvPekaLZOPRDFu+rf06SP71AihrxwfRPCkqH7z5EuIIsfJyQkPT09Onz2HCvhXDciy\nF9i9e3equVsztn7m0qFqbtZ8tv+i3vdPTExk3759qKpKkyZNsLW11au962E3GBpUHAAzYyMqORhx\n9epVvfMUQmQv22KrKMpaVVW7K4ryF6DTRVVVVffwTSEKoUH9++KUeJPVXXyJScng0+9mUzWwGm3b\ntn0mLiUlBVdXs6fXxa1M0eg5uhMZGUn92jWxIxVFURitmnP4+ElcXV1z3GZl/0rsvXmdbhUcSEzX\ncPphBt2rVtUrTyHE8z3vJc3/1iu0A9pn8SVEkXD8xAk6lrHDSFFwsjKlgZspR48c0Ynr2bMnO6/F\ncvZ+EvcT0vnpZAQWZjmfmAXw8cTxVLFJY3I9Jz6t60hVmzQmTRinV5tLV6zkzwQr3gm+z9Adt2nf\n/U06d+6sV5tCiOd73kEE95/8sjlwUFVVGWcSRZKHuxshD2NwtjZFq6pci1dp6OWlEzd37lwuXLjA\nl0eOo1VVTE1MuRASqte9b4aHUcPxn4Jd3tGUP2+E69Wmj48Pf4WEEh4ejp2dnV69ZCHEi3mR6Yfe\nwHxFUcIURVmnKMpIRVEC8joxIV4V8xYuYWlIIrNOxTLh4EOsPMvx1ltv6cSlpKSgTUuhupcr3fxL\nYW5uzvXr1/W6d72GQfxxK5W0DC1pGVr+uJVKvYbPP+z+RZiamlKuXDkptELkk/+cjfw0UFEsgcHA\nB4CHqqrPXc8gs5FFYXLv3j0OHz6Mra0tLVq0wMREd1Bo3rx5bPpmGmtaV0RRFP4If8inl2K5eDXn\nBffx48f07d2LjZsyz/7o1KEDy1auwszM7D8+KYQwhBzPRlYUZRJQH7ABzpJZbA/leoZCvMLc3d2z\nPFTg3yIiIvAvZv50g4jKJex4cEC/nm1KSgo3w8PxLGaHoijcvBFOampqvhXbuLg4li5dSlxcHG3a\ntKFGjRr5cl8hCpsXGUbuDDgBu4ENwO//ep8rhHiiSZMmrL4Wxd+PEknN0DDt5A0aN9ZvyHfyxx9R\nOj2aU2/U4M9e1SmbEcOnH03MpYyfLzY2lprVAtj44+dcWPsjLZs1ZvPmzflybyEKm/8stqqqViNz\nktRJoAXwl6Ioh/M6MSEKmqCgID6d/iWtNp3Hc94+ol3LsmDJL3q1GXrxIm19HDBSFIwUhbY+Dvx9\n8a9cyvj5Fi9ejIdRIqNrFqdvFSfeq+7I2PdH5cu9hShs/rPYKoriT+YWjf2AHmQeQrA3j/MSokAa\nPGQIj+LiSU1LZ8vOYIoVK6ZXe5UCAtkYHo1Gq6LRqmwIi6ZSQGCWsdHR0bRt04aqlSowaNAgtFqt\nXveOiYmmxL92u3S1MSU+PkGvNoUoql5ku8atZG7ReBj4U1VV3WNMsv6cTJASQk+JiYl0aN2SqyGX\nUFDwLV+BLTuDsbGxeSYuOTkZH3cXSlqrBLhaE3w9DifvMpy5kPMdrI4dO0aH1q8xuoYjLjamLLoQ\nR4WgNizUs7cuRGGW472R9bihFFshcoFWq+Xq1auoqkq5cuWy3Cry66+/ZvbkCfzUthRGikJiuoZ+\nG69xPfwG3t7eOb73smXLGD1qBKlpaTRu3Jh1GzZhaWmpz+MIUahlV2zlmA8hXnFGRkb4+flRvnz5\nbE/miY+Px87cGKMnM6EtTYwwMcr8fk5FR0fz/ohhOBg/JsDZnN1/7GbJkiU5bk+Iokx6tkIUAuHh\n4VQsV5belZ2o4mrF9iuxnHqYwYOYuBwfnde9e3euHtrO5MaeKIrCsdsJzD0TRWzScw/8EqJIk56t\nEIVYqVKl2LB5C79fT+KjPXcISTLlxJlzep1Re+/eXfycLJ6uGy7jaEFa+gtN2RBC/D/Z/k1UFGWL\noiibs/vKzySFKOq0Wu1zZxerqsri+fMo5WDLgKo+GGsyWLdmtV73bNu2HbuuxxGZmE6GVmXNxSjc\nXF30alOIoirbYWRFUZ67Gl9V1QPPbViGkYXQm0ajYfSokSxYuBCAQW+9xbc//oSx8bO7pR4+fJiB\n3TpxpFs1zIyNiEhKI3DZMSKjonRmLr+Mzp06sXnL76gqlHB04OifpylVqpRezyREYfbS2zX+VzEV\nQuS9b77+ij+3byRkQANQoPfO35k9y4cPx094Ji4mJgafYtaYGWcOVrlYmWFtbkp8fLxexXbDpk1k\nZGSQmpqqVztCFHUvsqlFWUVRflMU5fKTk3/CFEUJy4/khCjq9u/axQh/VxwtzXC0MGNkZTf2/7FL\nJ65mzZqcj4zn96sRxKQ+5qtTN3F1d8+VU31MTEyk0AqhpxeZPbEEmAtkAE2AZcCKvExKCJHJxd2D\nC4+Snl5fiErCxc1DJ87V1ZWNW7fx4ck7lP35ABsfqmzd9YdeE6SEELnnRXaQOq2qanVFUf5SVbXy\nv7/3H5+Td7ZC6OnmzZs0rFOLao6WKAqcikrh8ImT+Pj46MTWqFqZu9evUt3VnoO3o+ne+00WLpZ1\nsULkpxzvIKUoylGgAfAbmXsi3wVmqKrq9x+fk2IrRC6Iiopi69atALRr147ixYvrxCxbtowxQwdz\ntn9D7MxNuBqdRL0VR7j/MApHR8f8TlmIIivH59kC7wJWwChgKtCUzEMJhBD5oHjx4vTv3/+5MaGh\noZR3ssHOPPOvdFlHa8xNjAkLC5NiK8Qr4EWO2PtTVdVEIB4YpapqZ1VVj+d9akKIF9WuXTvORMZx\n6n4sqqqy4tIdVBSqVKmiE6uqKvPmzqFdi6b07taVy5cvGyBjIYqW/+zZKopSg8xJUrZPruOAt1RV\nPZ3HuQkhXlDdunUZM34ibb6YhlZVsTA1ZdnqNZiZmenETp82jVVzv2N8NU9uPLhP4/r1OHHmrKyf\nFSIPvcg72wvAcFVVDz25bgDMUVVV90fmZz8n72yFyGcZGRlERETg7u6e7UxkHzcX1rYoSwWnzOU8\nYw9eoWTXwUyYMCHL+Beh1WoZP348hw8dpEzZcixYsAALC4v//qAQhYw+72w1/yu0AKqqHlYUJSNX\nsxNC5AoTExM8PT2fG/O/vY7/63svo2Hd2oRdPk/TUvac2nEBX+/t3LwXgYnJi/wvRojC70UW4R1Q\nFGW+oiiNFUUJUhRlDrBfUZRqiqJUy+sEhRC56+3hIxm4O5Tfr0bw3ekbbAyLpkePHjlu79atW5w8\ndZpZr5Wkh39xPm/mzePkeH7++edczFqIgu1Ffuys+uSfn/6/7wcCKpmzk4UQBcT4iRO5e+8ekzdv\nxNLGlrUbN+n1vvbBgweYGSvYmmX+7G5ipOBoacKjR49yK2UhCjw5z1aIQiQhIYHIyEi8vLwwNzfP\nMubnBQv4dPwHtCttyYNklT+j4cz5v3BxydmJPhkZGTgXs6WptxVtyjpwLiKJhWcecOHy35QtW1af\nxxGiwMnxebaKorgoirJIUZQdT64rKooyMC+SFELk3LJffsHd1YVGtavj7e7GsWPHsoybNmUyY2o6\n0q6cI28FOOFvD8uXL8/xfU1MTDhw9ASH7mcwdGsYv/wVw7KVq6XQCvEvL/LOdimwC3B/cn0FeC+v\nEhJCvLxr167x/qjhzGjsyryW7gz2t+L1Du3JyNCdy5iWloaN2T9H9FmbqKSkpOh1/717dmOESgu/\nEjjbWbFz2xZkZEuIf7xIsS2uqupaQAugqmoGoMnTrIQQL+XSpUuUc7bFyz5z6LiWhy0Z6alERETo\nxPZ6ozdzz8USGpXCwRvx7L+Tyuuvv55lu8uXLcPDxRlbayve6N6VpKQknZjExEQ+mjiRaY1KMDTA\ngRmNnNm55XdOn5al+EL8z4sU2yRFUZzInAyFoih1gLg8zUoI8VJKlSrF9UeJxKZm9mSvRafyWKvi\n7OysE/vl7K9p9+Zglt0y5YTWnU1btuHv768Td+jQIcaMGs7oAGvmtvLg3pkDDBsySCcuJiYGK3NT\nnK1NATA3McKjmCWRkZG5/JRCFFwvsqlFNeAHwB+4CDgDXVVVvfAfn5MJUkLko6mffcp3X8/Gx8mG\nsKhEFv+yPNse64v4+ONJXNk0n96VMw8+eJD0mElHoomIin4mTqPRUK50SZqXyKCVrz0XIpP57kwM\nl/6+gpubm067a9asYe7336AoCqPGfKhXjkK8anK8qYWqqmcURQkC/AAFCFVV9XEe5CiE0MPHn35G\ntx69uHXrFhUrVvzPzS3+i5NTce7/61Xunfg0HByK6cQZGxuz4489dO3UgYXrruDuUoL1mzZnWWjX\nr1/Pe+8MZoC/HaoKQ9/qi6npKtq1a6dXrkK86rLt2SqKUhO4rapqxJPrvkAX4CYwWVXV6Cw/+M/n\npWcrRAGWkJBAnRrVsM+Io4SFwqE7yfy6Zh2tWrXK9jMajQZjY+Ns/33r5k3xTwmloY8dAPvC47jh\nVJWNW3fkev5CGEJOlv7MB9KffLgRMANYRub72gV5kaQQ4tVha2vL9FmzeZBhzskHj+nY6XWaNWv2\n3M88r9ACmJqakK7RPr1O02gxMdU9LEGIwuZ5PdvzqqpWffLrn4CHqqpOfnJ9TlXVgOc2LD1bIQq0\ns2fP0rxxI96uak8Ja1OWXYqnUcdefPvDjzluc9++fXTt2J6u5WzQqirrryaxeftOGjRokIuZC2E4\nOenZGiuK8r93us2Avf/6d7K7uBCF3KZNG2niZUEdT1tKO1jwdtVirFu7Rq82mzRpwqZtO0gu15j0\n8k3ZtusPKbSiSHhe0VxF5iEEUUAK8L8j9sogS3+EKPSsrW1I+NdUyNjUDCwtsz82Ly4ujrCwMDw8\nPChRokS2cQ0bNqRhw4a5maoQr7xse7aqqk4DxpC5g1SDf40JGwEj8z41IQTAqVOn6Nq1K126dOHU\nqVP5dt/+/ftzOcGYBWej2BjyiG9OxfDplGlZxu7Zs4cyPt70ad8KP99SzJszJ9/yFKIgkIMIhHiF\n/fHHH7Rv04q6njYoKBy9k8CW7Ttp0aJFvtx/9+7djBw2lOSkJNp3ep0ffvxJ5+zb9PR0PF1LsKRp\nORp6ORIel0yL9ec4cuq07I8sipwcH0QghDCcEW8PomsFR96v6857dd3oXtGR4UPy5xyQq1ev0qNL\nZxrZJdK3jAnBG1bx+dQpOnERERGYoNLQyxGAUvZWBLg7EBoami95ClEQSLEV4hWWnJj4dL9jAE97\nc5ITE/Pl3qtXr6ahhzltyzlQ08OGUdUc+Hme7vCwi4sL6Vo4fi8GgNvxKZy/H5Ntr3bu3Ll4e3ni\n7eXFokWL8vQZhHhVyKxiIV5hdRoEsXLvdko7mKMoCisvRFGrcfabSuQmRVHQ8s9oWIZWRUFndAxz\nc3NWrF5Dr+7dcLW15F5cIlOmfYGfn59O7Oeff87UyZ/QtpwDqqoy7O3BxMXFMXr06Dx9FiEMTd7Z\nCvEKy8jIoEmjhpw8eQKAmjVrsf/QYUxM8v7n5Bs3blCzWgCtvc0pYW3C+qtJjPzwI0Z/MFYn9tdf\nf2XQgH6oWi1aFUZ/MJYZM2fqxDnaWNKtvB1ty2UOOW8KecTm60lExeueJvQyTpw4wYoVK3B2dubD\nDz/EwiL7WdNC5KXs3tlKsRVCZCs0NJQvpkwmLjaGTl170K9/f50JUqmpqTjY2TCipgsNfey4+iiF\nj/beZt/Bw9StW/eZWAcrM96pUYI6nrYAHLoZz6Jzj4hOSs1xjnPmzGH0qBFUdrEmIjGdxyZWhN2+\ni5WVVY7bFCKncnwQgRCi6PLz8+OXX1c9N+bs2bOYKDzd77iskyW+Dhbs2rVLp9hWqBzI4jNncbI0\nQQWWnntA1Zr19Mpx/AfvM7qeO3U8bdFoVSbsucWIESNYvHixXu0KkZuk2Aoh9FK2bFnSMrTcjkvD\ny96cxHQNt+LSqFixok7soaNHCahSmU/2Zc5U9qvgz779B/S6f1r6Y8oXtwTA2EihkrMlt27d0qtN\nIXKbFFshhF6KFy/OgIEDGbtkMeWdLbkenUrlgAC6d++uE2tsbMxfly7n6v2dHBxYf/kRAwJL8Cg5\ng73hcUzo/1qu3kMIfck7WyGE3q5fv07r11pwPfwGTo7F2LBpc77teXzx4kUa169DbELmJKt27dqx\nafOWfLm3EP+fTJASQry08PBw3nvvPWJiounXrz8DB+puqKHRaKhYrgwNiqXSyteO85FJzDufwKXQ\nK7i4uORbrvfu3cPR0VFmIguDkh2khBAvJTw8nMrl/Yg+tw+XR5cZ9c4Qxo0bpxN37949oqOi6OhX\nDHMTI2p52FLayZLTp0/rff+uHdpRq6o/I4YOIfE/NvNwd3eXQiteWVJshRBZGjVqFIGuFoyt70Gf\nqiWY0NCDud9/oxNXrFgxktPTeZSceURQWoaW+/GpODk5ZdluYmIi27ZtY/v27SQlZb2+NjY2liYN\n6lEp5hrTylvz4PAuenTulHsPJ0Q+kwlSQogsxcXF4mZj+vTaxdqUDI1GJ87W1pZPP53MR7OmU9PV\nipCYdIKat6RWrVo6sREREQTVrUMJ4wy0qspYIwv2Hz2Os7PzM3GHDh2itLUpY2qUBKCaiz2lFh4i\nJiYGBweH3H1QIfKB9GyFEFnq27cf267GciEyicjEdOaeisTX1zfL2A/HT2DVhi00HjSOL+csZvnK\n1TqbXwB8PH4crUqYsK29Pzs6VKaRvcLkSR/pxJmampL0OIP/zftIydCi0Wqz3TkrJSWFNWvW6D10\nLURekQlSQohsjRs3jrnff0OGRoOvry+Hjp2kWLFiOW7vtUYNGOKYwmulMnuyW65FsjrNiS3Bu5+J\nS0tLo37N6pQjkfquNqy4+oiqzdowb6HuwQXr16+nT8/umBhBaoaKh7sb4bfv5jhHIfQhE6SEEC9t\n5syZxKekk5yu4a+QK3oVWoDaDRqyKCSS1AwNyY81LAl9QK36ukuEzM3N+W3zVsLNi/NdSAwu/jX4\nfs7cLNsc8GYvOldwZHnnsizu5EtS9AN69uypV55C5DYptkIUMYmJibzZoxsOtjaUdHdl5cqV+Xbv\nSZ9OxrZiDUovPITvwoO4BNZj/ETdYeTk5GTatWyBvzaaiVWciQ85w+AB/bJsMzU9g9fKFENRFOzM\nTWjkY8fJkyfz+lGEeCkyQUqIImb4kMGkXjrJ6TdqcSMumd4j3qFkyZLUq6ffHsUvwtzcnLUbNxEX\nF5dZHO3ssow7cOAAdhnJfNXQH0VRaFmqOGUWbuCHufE6nzEzMeLMvSSalrbnsUbLmftJlK4emGW7\nsbGxLF++nISEBFq3bk1gYNZxQuQ2KbZCFDHBwbvY06kyxa3MKG5lRu9yzgQHB+dLsf0fe3v75/57\njUaDqZHR00lWJooRRoqCVqvVif1o8lQ++/gjtl6J5lHyY4xMzdm+fbtOXExMDLWqB+JulIyjOcye\n+QUrVq+jdevWufNQQjyHDCMLUcQUs7fnWkzy0+vrCY+zXROr1Wo5evQoO3fu5NGjR/mVIkFBQdxL\nV5hyPIw9N6MYuPtvXmvRPMt3xp07d8bG2prw2DQS0rW0aN0WMzMznbiFCxfibZzEmFpODKjqxMhA\nB8a+Nyo/HkcIKbZCFDWzvvuBwXtCmXj4Gm/svMx1rQX9+/fXicvIyKBT29YM6NKBmaOGULl8Oc6f\nP58vOdra2nLg2HGiStfghwgTKrbrzq9rf8sytkVQQ2qVMOG37n781LY0OzZv5Ntvv9WJi34UhYvl\nP9dutmbExcfl1SMI8QxZ+iNEEXTu3DmCg4Oxt7end+/e2NjY6MQsWrSIX774mI1t/TE1NmLF5bss\njzLh2OmzBsg4e+YmRvzc3pdilplvxX4594A490B27352OdHBgwfp2qEtY2s5UdzKhJ8vxOLfpD0L\nFi0xRNqikJLD44UQTwUEBBAQEPDcmLCw6zQoYY2pceYAWGMvR6ae/is/0nsp5qYmXItOpYaHDVpV\nJTQqleo1PXTiGjVqxDc/zuWj8WNJTIqhQ4cOfPfjHANkLIoi6dkKIbK0ceNGPho+hG0dKuNoYcrn\nJ8K5bOPFtj/2GDq1Z8yePZuPxn9IDXcb7iWkk6A15fqtO9nOdBYiL8kRe0KIl6KqKh+N/5Dvv/8B\na01Bxz0AABFjSURBVHMzPLy82LrrD9zd3Q2dmo79+/ezbNkynJ2d+fTTT7GysjJ0SqKIkmIrhMiR\nuLg4EhIS/q+9O4+OqszTOP59K3vIHgxLAihgBEF2EAUDskjbNtqCAspAo+LuuKD0mdNOn3HUQVGP\n2gd1Wm3HfdpeFDAuLAqYABIh7BoEmzUkSAgQUqlAUpV3/ki1jUOwxeTmJlXP5x+oW7duPXAO+fH+\n3ve+l44dO+LxaE2lyA/Rdo0i8pMkJyeTlZUVVoXW5/Nx/333MnLYUGbe8CvKysrcjiStnEa2IhI2\n6urqKCwspLKykoEDBza4uYa1lp+NHc3x3Vu4NCuWjYdq+PpEAus3byUuLq6Bq4r8g0a2IhLW/H4/\nV4+/ggmXj+XeG6bQM7s7RUVFp5xXWlrKFwVruGdQOgM6JnDDBamY6goKCgpcSC2hQrf+iEhYePXV\nV9mz+QueHd2eqAjDx99UcPMN01m5Zu33zqsfmRB8lm79AKXO0uDzeUV+LBVbEQkL3+zYTu80D1ER\n9UVzQPs4Fq7Zdcp57du3Z+TIkTy1di0jMqPZfMhPbGoGQ4cObe7IEkLURhaRsNCv/wC+OBjAWxPA\nWsunu7307dv3lPOMMbz21v+yu8rwwtqDrCmp5q13/kxMTIwLqSVUqNiKSFiYMmUK4yZM5taP9nLr\n4hK21iTzh9feaPDcbl2ySApUMr1vW85NtAwd2A+v19ssOX0+HzfNmE5mu7PodV53Fi1a1CzfK87S\namQRCSsHDx7E6/XSpUsXIiIiTnk/Ly+PsaNG8uaEc4mN9FBnLbfl7uT6W+7i6aefdjzf9OuvY1fB\nJ0zrlUTJsRrmbTjC8vzV9OnTx/HvlsbTamQRESAjI4OuXbs2WGih/gHzEcYQ5an/eekxhrgoDz6f\nr8Hzm9r7ubnc3CeF9gnRDOiYwCVZbTS6DQEqtiIiJ7niiivweAzPf3GArw9V88ctZZR6a5g9e/Yp\n5wYCAWbd868kJ7QhNTGB3/7mNzS2o5eY0IYyX+13rw8dt9rnOQRoNbKIyEkiIiJYvXY9o3OGU/BZ\nMVFRUfz53QV069btlHOfnPs4a97/K2uvH4K/ro6pb75Cx6wsbr/jjp/8/XPmPsn9d9/J6E6xHDgO\n5Z4kpk6d2pg/krQAmrMVkbDi9/upqalpkocVjBl+Mbe3PcHYc84C4N2vS/nAk8m7H3zUqOvm5eWx\neNEi0tu25aabbmpwpytpmTRnKyJh7/E5c0hsE09aSjJjR+Zw+PDhRl2vbUY7io5Uffd625Fq0jMy\nGhuTnJwc/mvOHGbNmqVCGyI0shWRsJCbm8usmTPIvfICMuKjmZ3/DVXn9OWdd+f/5Gtu27aNkcMu\nZnRWMn4Lqw9WseqLdXTu3LkJk0trcrqRreZsRSQsrFqZz5RuaXRMiAXg7n5ZjP9oVaOu2aNHDwo3\nb2HhwoV4PB7mTZhARhOMbCX0qNiKSFjIzOrER+XV1FmLxxgKSyvo0KFD46+bmckdjVgQJeFBbWQR\nCQvHjx9nzIhLqP22mI6JsawqPsIHi5cwZMgQt6NJCDldG1nFVkTCRk1NDYsWLcLr9ZKTk0NWVpbb\nkSTEqNiKSMhat24dS5cuJSUlhWnTppGQkOB2JAlTKrYiEpLmz5/PzBnTGJEVz4FqS2VMGp+vLVTB\nFVeo2IpISOrWOYuZ2RH0yqjfpGJuwSGm3v+fWrQkrtCmFiISko4eO0aHxOjvXneIMxw9etTFRCKn\nUrEVkVZt3GWX8frWoxyt9lNU5mPFvmrGjBnjdiyR71EbWURaNa/Xyy03zuDjRYtJTkrkyWd+x7XX\nXut2LAlTmrMVERFxmOZsRUREXKLtGkVEWpiNGzeybNkyUlNTmTJlCnFxcW5HkkZSG1lEpBlZazHm\nlC7jdxYuXMiN0/+FYVnxHPDVYVM6krd6jQpuK6E2soiIi5YsWUJWh3ZERUYyZEA/du/e3eB59951\nOw8MTmNm3zQeHJqO52gJb7/9dvOGlSanYisi4rA9e/Yw5dqJ3NYzhj9dcy497QHGXz6Ohrp/h49W\n0Cm5/r5hYwyZbTyUl5c3d2RpYiq2IiIOKygooFe7BPq0b0NUhGFCjxR279nDkSNHTjl39KhRvLm1\nAm9NgO3l1eQX+xg1apQLqaUpqdiKiDgsIyOD4orj1AbqADjgraXOQmJi4inn/s/rbxJ37iBu/nAv\nz2zyMe/3LzN48ODmjixNTAukREQcZq1lyjUT2Ph5Ht1TolhbUsXDjz3Bbbff7nY0aWLa1EJExEV1\ndXXk5uZSXFzM4MGD9dD6EKViKyIi4jDd+iMiIuISFVsRkRamsrKS1atXU1RU5HYUaSLarlFEpAXZ\nsmUL48aMIiXacKiymqsmTOSlV179wV2npOXTnK2ISAvS/4JeXNLmCGO6JlNdW8dvV5bx2HMvM3Hi\nRLejyY+gOVsRkVbgm527uDAzAYC4KA8XpEeyfft2l1M1TklJCQsWLCA/P7/BXbPCgdrIIiItyPk9\nziNv7wGuODcFb02ADWW1zOjd2+1YP9nKlSu56hc/J7ttPKWVJxg4dDh/mb8Qjye8xnpqI4uItCDb\nt2/nstGXQk01R6qOM3PmTJ565netds42u2sXrs0KcGFWIrUBy29XlvHQM//NpEmT3I7miNO1kTWy\nFRFpQbKzs/n6m53s2LGD1NRUMjMz3Y7UKPv2H+CCgV0AiIowZKdGsnfvXpdTNb/wGseLiLQCMTEx\n9O7du9UXWoCB/fvy4TfHsNZyyFfL2tLjDBo0yO1YzU5tZBERcczevXu5YtxYiouLOVHr5+GHH+GB\nX//a7ViO0XaNIiLiCmstBw8eJCkpibi4OLfjOErFVkRExGG6z1ZERMQlKrYiIiIOU7EVERFxmO6z\nFRFppU6cOMGiRYuoqqpixIgRIXGrUKjSAikRkVbI5/Nx6SXD8B3cR1pcFF+W+Vj8yTIGDhzodrSw\nph2kRERCyIsvvkjkkX08OrwtxhiW7zLcectM1hRucDuaNEBztiIirVDxvr10S/J8t2fyeW3jKCkt\ndTmVnI6KrYhIKzT8khw+219Dua+WQJ1lwY5Khg8f7nYsOQ21kUVEWqGrr76aL7du4fZHH8Vay4jh\nw3jhpT+4HUtOQwukRERaMb/fT01NDfHx8W5HEbSDlIhISIqMjAy7QlteXs606ybTt2c21/zySvbv\n3+92pH9KI1sREWk1AoEAQwcNoP2JA4zoFMe60mrWe+PY9GVRi3jIgUa2IiLS6u3cuZN9e3Yxs28q\n2elxXN87Dc8JLxs2tOxbnlRsRUSk1YiJiaHGH6C2rr5zGqiz+Gr9REdHu5zsh6mNLCIirYa1lskT\nJ/C3wnyGtotiwyE/sZ16sHTZCiIiItyOp+fZiohIaPD7/Tz33Dw2rFtLj/N7cd+s+4mNjXU7FqBi\nKyIi4jjtjSwiEqastSxYsICNGzfSvXt3pk6disejJTvNSSNbEZEQ98Cs+3jv7dcYnBHJl0fqOH9I\nDn96973v9lWWpqM2sohIGCorK6Nrl078/vLOJMZEUBOo455PD5C7ZDkDBgxwO17I0X22IiJhqKKi\ngoS4GBKi63/cR0d4aJsQS0VFhcvJwouKrYhICDv77LNJTk3nr9uOcrjaz9KdFXxb5ad///5uRwsr\nKrYiIiEsMjKSxZ8upzihG/cv/5bPj6ezdNkKUlJS3I4WVjRnKyIi0kQ0ZysiIk2qpKSEwsJCjh07\n5naUFk/FVkREztjcx+bQM7s71105jm5nd2bVqlVuR2rR1EYWEZEzsn79ei4fPZLHR2SQHh/Fuv1e\nXi46Tsm3B8P+3l21kUVEpEkUFRVxfrs2pMdHATAoM4GKY8fUTv4BKrYiInJGzjvvPIq+reJItR+A\n9SVekhITSEpKcjlZy6W9kUVE5IwMGjSIu2c9wD1zH6d9ShsOVdUy//3csG8h/xDN2YqIyE+yb98+\nSktLyc7O1n27QdobWURExGFaICUiIuISzdmKiIijNm3axIoVK0hLS2PSpEnExMS4HanZqY0sIiKO\nee+997h5xnQu7pTAfm+AmIzOrFi5OmQLruZsRUSk2XXq0I67esfS86x4rLU88nk5dz30JDNmzHA7\nmiM0ZysiIs3u8NEKOifXj2KNMWS18VBeXu5yquanYisiIo65dEQOb315FF9tgO3l1aws9jFy5Ei3\nYzU7tZFFRMQxhw8fZtp1k/l0xWekJifx7LznmTx5stuxHKM5WxEREYdpzlZERMQlKrYiIiIOU7EV\nERFxmIqtiIg4qrKykoKCAnbs2OF2FNeo2IqIiGM2b95MdrdzmDHxF1w0qD933nYr4bh4VquRRUTE\nMX3O78GlyccYfU4yvtoAD+aX8ezLbzB+/Hi3ozlCq5FFRKTZ7fjbLi7KSgAgPiqCC9pGs23bNpdT\nNT8VWxERcUyP7O7k7/UC4K0JsKmshl69ermcqvmpjSwiIo756quvGDdmFFF1tRz2VnPTzTfz1NPP\nYswpndaQoB2kRETEFdXV1Wzfvp20tDQ6derkdhxHqdiKiIg4TAukREREXKJiKyIi4jAVWxERcVwg\nEAjLzSz+TsVWREQcU1ZWxqic4cTGRJOekswbr7/udiRXaIGUiIg45mdjRhFTupXpvdPYX3mCR1Yf\n4oPFn3DhhRe6Hc0RWiAlIiLNLm/lKqb0TCEqwnB2SizDMuPJz893O1azU7EVERHHnJWexq6jJwCo\ns5Y9lQHatWvncqrmpzayiIg45sMPP2TadZMZkplAiddPWpdslixbQXR0tNvRHKFNLURExBVFRUXk\n5eWRnp7OVVddRVRUlNuRHKNiKyIi4jAtkBIREXGJiq2IiIjDIp28eKg+QklERORMODZnKyIiIvXU\nRhYREXGYiq2IiIjDVGxFREQcpmIr4hBjzIPGmK3GmE3GmPXGmMFNfP0RxpjcH3u8Cb7vKmNMj5Ne\nLzfGDGjq7xEJRY6uRhYJV8aYocDPgX7WWr8xJg1wYn+6061wdGLl4y+BD4BtDlxbJKRpZCvijA7A\nIWutH8Bae9haewDAGDPAGLPCGLPWGPOxMaZd8PhyY8yzxpgNxpjNxphBweODjTGrjTGFxpiVxphz\nf2wIY0y8MeYVY8ya4OfHB4//yhjzbvD7vzbGzD3pMzcFj60xxrxkjJlnjLkIuBJ4IjhK7xo8fZIx\npsAYs80YM6wp/uJEQpGKrYgzlgCdg0XoeWNMDoAxJhKYB0y01g4GXgXmnPS5OGttf+DO4HsARcBw\na+1A4D+Ax84gx4PAp9baocAo4CljTFzwvb7AtUAfYLIxJtMY0wH4d2AIMAzoAVhr7efA+8Bsa+0A\na+3O4DUirLUXAvcBD51BLpGwojayiAOstVXB+cxLqC9y7xhj/g0oBHoDS039ri8eoOSkj/4x+Pl8\nY0yiMSYJSALeCI5oLWf27/YyYLwxZnbwdTTQOfj7T621XgBjzJdAF+AsYIW1tiJ4/C/AD42k3wv+\nWhj8vIg0QMVWxCHBJ3HkAXnGmC3AdGA9sNVae7qW6/+fa7XAI8Aya+0EY0wXYPkZxDDUj6J3fO9g\n/ZzyiZMO1fGPnwdnsvXb368RQD9PRE5LbWQRBxhjso0x3U861A/YA3wNnBUsdhhjIo0x55903uTg\n8eFAhbW2EkgG9gffv+EMoywG7j4pV79/cv5aIMcYkxxseU886b1K6kfZp6P9WUVOQ8VWxBkJwOvB\nW382Aj2Bh6y1tcA1wNzg8Q3ARSd97rgxZj3wAnBj8NgTwOPGmELO/N/sI0BUcMHVVuDh05xnAay1\nJdTPIX8B5AO7gIrgOe8As4MLrbrS8ChcRBqgvZFFWghjzHLgfmvtepdztAnOOUcA84FXrLUL3cwk\n0tppZCvScrSU//k+ZIzZAGwBdqrQijSeRrYiIiIO08hWRETEYSq2IiIiDlOxFRERcZiKrYiIiMNU\nbEVERBymYisiIuKw/wO7E19JBWZHrgAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_sdml, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Least Squares Metric Learning\n", - "\n", - "LSML is a simple, yet effective, algorithm that learns a Mahalanobis metric from a given set of relative comparisons. This is done by formulating and minimizing a convex loss function that corresponds to the sum of squared hinge loss of violated constraints. \n", - "\n", - "Link to paper: [LSML](http://web.cs.ucla.edu/~weiwang/paper/ICDM12.pdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "lsml = metric_learn.LSML_Supervised(num_constraints=200)\n", - "X_lsml = lsml.fit_transform(X, Y)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd4jXcbwPHvk5O9JJGIRIQYiVnE3mIXtXftvWmrrdVB\naUurqLYULVW1N7U3sRWJGUSQkEhEZM/zvH9E9U1PgkZOTsT9uS7X65z3Pr/nfnpF7vP8pqKqKkII\nIYTQHyNDJyCEEELkd1JshRBCCD2TYiuEEELomRRbIYQQQs+k2AohhBB6JsVWCCGE0DNjfTWsKIqs\nKRJCCPHGUVVV+fd7eiu2Ty+oz+aFEEKIPEVRdOosIN3IQgghhN5JsRVCCCH0TIqtEEIIoWdSbIUQ\nQgg9k2IrhBBC6JkUWyGEEELPpNgKIYQQeibFVgghhNAzKbZCCCGEnkmxFUIIIfRMiq0QQgihZ1Js\nhRBCCD2TYiuEEELomRRbIYQQQs+k2AohhBB6JsVWCCGE0DMptkIIIYSeSbEVQggh9EyKrRBCCKFn\nUmyFEEIIPZNiK8RLOnHiBD179aJLtx7s2bPH0OkIIV4jUmyFeAknT56kdZt3MHMrg32Zqrzbuy/b\nt283dFpCiNeEoqqqfhpWFFVfbQuR23r37YuRc0la9hgAwMk927i8fzP79uw2cGZCiLxEURRUVVX+\n/b482QrxEtLS0jA2MXn2WmNiQlpamgEzEkK8TowNnYAQr4PBAwfSuVt3LKysMTE1Z/XcL5jz7SxD\npyWEeE1IN7IQL2n37t18N3ceaWlpDB44gG7duhk6JSFEHpNVN7IUWyGEECKHyJitEEIIYSBSbIUQ\nQgg9k2IrhBBC6JkUWyGEEELPpNgKIYQQeibFVgghhNAzKbZCCCGEnkmxFUIIIfRMiq0QQgihZ1Js\nhRBCCD2TYiuEEELomRRbIYQQQs+k2AohhBB6JsVWCCGE0DMptkIIIYSeSbEVQggh9EyKrRBCCKFn\nUmyFEEIIPZNiK/KlpKQkBg8Zip29Ay6uRViwcKGhUxJCvMGMDZ2AEPrw8YSJnL92iy/X7CXmcSTT\nxg/CvWhRWrdubejUhBBvIHmyFfnSzl276DTiQ+ydnHH3LEuTrn3ZsWuXodMSQryhpNiKfMne3p7Q\nO4HPXofdCcTRwcGAGQkh3mSKqqr6aVhRVH21LcSLHD16lHbtO1CzRVtiHz8i+Polzpw+haOjo6FT\nE0LkY4qioKqqovO+FFuRX127do3t27djaWlJjx49sLe3N3RKQoh8ToqtEEIIoWdZFVsZsxVCCCH0\nTIqtEEIIoWdSbIUQQgg9k2IrhBBC6JkUWyGEEELPpNgKIYQQeibFVuhdXFwcly9fJjIy0tCpCCGE\nQUixFXp17NgxipcoQau27SnuUYJFixcbOiUhhMh1sqmF0JvU1FRc3dwYMOUbKtX1IfTubaYP6sQJ\n32N4enoaOj0hhMhxsqmFyHUPHz4kLU1Lpbo+ABR296BUhcpcuXLFwJkJIUTukmIr9MbR0RFtWioB\nF88B8Dg8jMAr/pQqVcrAmQkhRO6Sw+OF3piamvLHihX06t2HIsVLEhx0i48/+pAKFSoYOjUhhMhV\nMmYr9O7hw4dcvXqVokWLUqJECUOnI4QQeiOn/gghhBB6JhOkhMglvr6+vN26DQ18GrNg4ULkS6cQ\nQoqtEDno/PnzvNO2He7VfajVoS+zvpvH3HnzDJ2WEMLApBtZiBz0wfjx3I2DjkPGAXDD7y9WfzOF\nK5f8DJyZECI3SDeyELlAo9GQmpz07HVKchIajfwzE+JNJ0t/hMhBAwcMoHbdulhY22Dr4MiWJfP4\n6ouphk5LCGFg0o0sRA67dOkS3343h7i4eHp060LHjh0NnZIQIpfI0h8hhBBCz2TMVgghhDAQGbMV\nQgiRZyUlJXH69GkURaFGjRqYmpoaOqVskWIrhBAiT3r06BGN6tUhJfoRWlXFqmBhDh71xc7OztCp\n/WfSjSyEECJHJCcn88G4MZQs5oZ3xXLs3LnzldqbPOFjiitRzGzgyDcNnSicEs6nUyblULa5S4qt\nEEKIHDH+/XEc3rSSceVNaeMQQ6/uXTh79my227tx/SpVnExRFAVFUajsZMqNa9dyMOPcI8VWCCFE\njtiwbh2D3ypAMTszqhWxpom7JVu3bsl2e1Wq1eBwSCKpWpWUNJUj95OoWqNmDmace6TYCiGEyBGW\nlhZEJqQ+ex2VDNbWNtlub9r0GZgXq8CgHfcYvOMedqUqM+XTz3Ii1Vwn62zFa+fOnTusX7+ewoUL\n06NHD4yM5DujEHnBqlWrGDt8CC2LWRCepHI5xphzF/xwcnLKdpuqqhIcHIyiKBQpUgRF0VnCmqfI\nphYiX1i/fj29+vTFpVgJHj8MpaCDPdevXsHYWCbWC5EXHDx4kO3btmJboADDh4+gUKFChk4pV0mx\nFfmCXUFHuo2eRKN2XUlOSuTTPm1p08yHH3/80dCpCSGE7CAl8of4uFiq1G8MgKmZOZXq+nDtNZ2d\nKIR4c0ixFa+VAgXsObR5NQAxUY85uWcbtWrVMnBWQgjxfNKNLF4rx44do2Wr1miMTUiIi6Vqtar4\nHj0qk6SEEHmCjNmKfCMxMRFfX19cXV0pW7asodMRQohnpNgKIYQQeiYTpIQQQggDkWIrhBBC6JkU\nWyFe0r59+2j9TltatmrNhg0bDJ2OEOI1IsVWiJdw+PBhuvXoiXuNxng1bMOI0WNZt26dodMSQrwm\nZIKUEC/h3d69MStalmZd+gBw5sBOzu9cy8F9ew2cmRAiL5EJUkK8AkVR0KZpn73WarUY5fEN0YUQ\neYfs3i6yJS0tjU6dOuHn50fZsmXZunUrGo3G0GnpzYhhw2jTth0aYw0mpmZsWPANvyz62dBpCSFe\nE9KNLLLFxdUN1diEGk1ace7QHpLiYwi7H5KvC+6xY8f4fv6PpKalMnjgAN5++21DpySEyGNkUwuR\nYzZt2kSPd3vx464zWNrYkpgQz6iW1Zk7+1sGDx5s6PSEEMJgZMxW5JgbN25gXcAOSxtbAMwtLLG1\nL0hQUJBhExNCiDxKiq34z/r06UPskyh2/LGEJ5ER7Fv/O48fhjJgwABDpyaEEHmSdCOLbFm2bBmj\nxo4jOTERUzMzvpn5NcOHDzd0WkIIYVAyZiuEEELomYzZCiGEEAYixVYIIfKItLQ0Q6cg9ESKrRBC\nGNiuXbso7FQQUxMTvN+qQGBgoKFTEjlMiq3IMyIjIzl69CgBAQFZxsTExNC1ew9sbAvgVtSdVatW\n5WKGQuS8oKAgenbrwthK1qzv6kllk0e0adkcmfOSv0ixFXnC8ePH8SxThmFj36d2vfp8MP7DTOMG\nDx1KWFwys7ccY+iMHxk99j1OnjyZy9kKkXNOnz5NeWdryheyRGOk0M6zAMEhIURGRho6NZGDpNiK\nbHnw4AEdO3fBs2w52nXoSHBw8Cu116Pnu/SbNJMpv2xi5voDrN2wkYMHD+rE7dmzh26jJ2FjZ0+p\nilWo27oT+/fvf6VrC2FIzs7O3ItKJPnpQRcPYlNI06rY2toaODORk6TYiv8sJSWFps1boC3gzMDP\n52HqXByfJk1JTEzMdnvB9+5SpX4TAKxsClDGu2am3cn29g48uJM+nqWqKmF3b+Hg4JD9mxHCwBo0\naECdRk2ZeCScBRei+PToQ+Z+/z0mJiaGTk3kIDn1R/xn165dIzo2jq4jP0ZRFNw9y3L+8B78/f2p\nXr36f27PxMSEkqU9Ob5zM/VadyQq4iGXTh3j8/dH6sTOmf0t/QcOolbztoSH3CUxKpw+ffrkxG0J\nYRCKovDHmrXs2LGD4OBgplerRtWqVQ2dlshhUmzFf2ZhYUFCXBwpyUmYmpmTmpJMXEw0FhYW2W5z\n3ZrVtGrdhu1L5xMZ8ZCJEyZQt25dnbi2bduyf+8e9u3bh51PLXr06IGVldWr3I4QBqcoCq1btzZ0\nGkKPZAcp8Z+pqkrX7j0IuBNMlYYt8Du2nyKOdmzdvAnlFQ5UT0xM5NatWzg5OVGoUKEczFgIIXKH\nbNcoclRqaioLFy7E/9JlypUtw4gRI2SMSQjxxpNiK4QQQuiZ7I0shBBCGIgUWyGEEELPpNgKIYQQ\neibFVgghhNAzKbZCCCGEnsmmFiIDrVbLhg0bCAwMpEqVKjRv3tzQKQkhxGtPlv6IZ1RV5d3efTh3\n8RKeVWpy/sgeBvbry9TPPzN0akII8VqQdbbihc6dO8c7HTrx1Zq9mJpb8CQygg/a1Sf43l3s7e0N\nnZ4QQuR5ss42j7h48SKDBg+hT79+ee5ouMjISAq5FMHUPH2P4wIOjljb2hIVFWXgzIQQ4vUmxTYX\n+fn54dO4CXGWjhi7lKZbj3fZtm2bodN6xtvbm5CgmxzfvYW4mCds/20htjY2FC1a1NCpCSHEa026\nkXPR0GHDiTa1o23/9KPjzhzYyZmtf3D08CHDJvZ/zp49S78BA7kdGEilypX54/fleHh4GDotIYR4\nLWTVjSyzkXNRckoKZgUsn702s7AkNTXVgBnpqlatGpf8Lho6DSGEyFek2Oaivr170alrNwoUdMLC\nypo/Zn/OlAkfGTotIYQQeibdyLlsx44dzPp2NikpKfTr24dBAwe+0hmwImvh4eEcOXIES0tLmjRp\ngqmpqaFTEkLkc7L0R7xRLl++TOMmTSlWpgIxUZHYmJty6MB+LC0tX/xhIYTIJim2wmAePXpEQEAA\nbm5uuTazuXHTZnjU8KFplz6oqsoPE4bTtnE9JkyYkCvXF0K8mWSdrchRWq2W0aNHU7duPYYNG5bl\nRK+9e/dS2suLAcNGUvGtSnw3Z06u5HfvXjCelasD6T/8pd6qxt17wblybSGE+DcptiIDX19f3It7\nYOfgiFfZcty+fTvTuLcqV2HTjt24VKzJroNHKVe+IlqtNkNMSkoK3Xv0YNTXC/l02Vamr9rNl1/N\n5MqVK3q/j9q1a7Fn1a+kpaYS/TiS4zs2UKd2Lb1fVwghMiPFVjwTGhpKsxYtqfF2Rz6c/xtFylSm\navWaOk+tx44d41ZgIF8s30anoe/x+dLNPAgNZceOHRniwsPDQTGibNX0IlfQ2YWS5d8iICBA7/cy\nf95ciH3EkEYVGNu6Jh3fac27776r9+sKIURmZOmPeOaPP/7AqUhROg15D4DiUyoyuFEFzp07R82a\nNZ/FhYaGYmVti7mlFQBmFhZY29kTFhaWoT0nJycUBfxPHaVizfqE37/HzUsXKFOmjN7vpUCBAuzf\nu4fo6GhMTU0xNzfX+zXzo6tXr+Lv74+HhwfVq1c3dDpCvLak2IpnrK2tSYiNRZuWhpFGQ3JSImkp\nKVhbW2eIa968OQlxMWxd9hN1W7bn9IGdREU85J133skQZ2Jiwro1a+jctSt2BQsRHhrClzNm5Eqx\n/ZutrW2uXet1kZSUxOxvv+Gy30UqVq7C+x+Mz3RZ1C+/LOGj98dR3tmGGxFx9Bs8jK9mzjJAxkK8\n/mQ2sngmMTERN/diuJepSJX6TTi0ZQ2a1CQCrl3ViT1y5Aidu3YjOjoaGxtbVq5YTrNmzTJtNzo6\nmlu3buHq6oqzs7O+byNfCgoK4uHDh5QpU+aVvkBotVpaNm1MbNBlqjkZc+phCk5e3mzbuTvDeu/Y\n2FhcnJ2Y5eNKEVtTYpLSGLf/AQePnaBChQo5cUtC5EsyG1m8kLm5OQHXruJkocF38x9UK++V5daN\nDRo04GHoAxLj4wgPe5BloYX0p8sqVapIoc2mTyZNoHqligzt0g6vEh6cPn06221duXKFyxfP82GN\ngjQtacdHNRw5c+oEN27cyBD38OFDrM1MKWKb/sRrY6ahuIMVwcEyo1uI7JBuZJGBg4MDu3fvNnQa\n4qmjR4+yYvEiTveoTkELU7beDKNn507cvHsvW+0lJydjaqzB6On3bmMjMDM2Jjk5OUOcm5sbGJvg\nezeauu62XI9I4GZErDzVCpFNUmyFyMOuXbtGPTd7ClqkP2G2KVmIfjv8SElJwcTE5D+3V6FCBawd\nnPjN/zE1Xcw5fj+RgoVddcbRTU1N2b5zN+3atGLB+SCMNBpWrFydXoSFEP/ZC4utoihmQCeg+P/H\nq6o6TX9pCSEAypYty4zgSMLjk3CyNGPLjTA83Ipkq9BCehHdf/go748ZxepL/lSoVJu93/+AsbHu\nr4KqVatyN+QBjx49wt7ePtMYIcTLeeEEKUVRdgFPgHNA2t/vq6o6+wWfkwlSIsfdvXuXug0acj/4\nHsYmpvTt3YtFixYZOi29mvrpJ3w/Zw6udtZEJqWyZccuqlWrZui0hBCZyPbeyIqiXFJV9T8P1Eix\nFQCqqrJy5UpOnj6NR7FijBgx4pXWvBYvUYqiZSsxYNKXhN4LYsbQbsyeNZOhQ4fmYNZ5T3BwMA8f\nPsTT01NnKZYQIu94ldnIxxVFqaiHnEQeFRwczM6dO4mMjHxuTJt32uJRshQtW7UmKCgo07j3PxjP\n51/O5LHGltXbdtGsRcss91F+Gffvh9Bz3GTMLa0o7lUenw49WbduXbbbe124ubnh7e0thfZfduzY\nQdOmTWnVqhV//fWXodMRIktZFltFUfwVRfED6gF/KYpyXVEUv/97X+RDI0eNokTJkvTs0w+XIkWY\nN2+eTkxycjJNm7fAokgpRn6zBPuSFWnStBkJCQkZ4mJiYli4cCEf/bSS1r2HMvbbJTx4GMHRo0ez\nnZ+pqSn3bl0H0p+a71y/JEuK3lC//PILHdu2wfLeX6RcP06dmtXx9fU1dFpCZOp5Mx7a5FoWItu2\nb9/O6LHjSEhIpEqVSmzZtCnbh6T7+vryy69Lmb7iT4qWKoP/ySN89P5A+vbti52d3bO469evE5+Y\nRIch76EoCkU8RnFm/3b8/f2pUaPGs7iEhARMTU2xtE7fhMHIyIgCDo7ExcVl+34/Gv8BX30wiFrN\n2/LgTiChQTc59OfmbLcnXl+fThjPkGrONC2R/rNpa6Zh1LAhnPe/bODMhNCV5ZOtqqp3VFW9A0z/\n++///17upZi/JCYmsmvXLrZt20ZUVNQrtXXq1Ck6d+lKnbY96DfpK64F3qVe/YbZbu/QoUMULeVF\n0VLpy0Aq1mqAmYWlTveclZUVsTHRJCcmApCSnETskyisrKwyxDk5OVG+QgV+/+ZTggMD2Lv2N+7f\nvkGdOnWyneOnn37K6pUrKahJoVGNKgQF3sLBwSHb7YmsxcbG0tSnEcVdClGjahXu3Llj6JQySE5K\nxtnqn1nZLjamxMXGGDAjIbL2MhOk/lJV1fv/XmsAf1VVy73gczJB6l+io6Np0MiHZFXB3NKK8HtB\nHD1ymOLFi2ervc6dOxOWqDLii/Su3siHD3ivbT2Sk5Ky1d7u3bvp0Kkz32w4gEMhF4KuX+azvu0I\nvneXQoUKPYtTVZXeffpy/sp1Kjdohv/xg5R2L8L6tWsybPkHEBkZyfCRIzl79hxFixZlwY8/ULZs\n2WzlJ3KPVqulRFFX7LSxNC9ZgHP34zgXlsTdB2F5Zr/pFk2bcPOv43xY15XEFC3TjwTTd9hoZn/3\nnaFTE2+w/zwbWVGUicAkwAKI//ttIBlYpKrqxBdcUIrtv0yeMoXj/gEM/mw2iqKw5df5JN2/xaYN\n67PVXvfu3bnzOJ4xX/8EwMOQu3zYuQnJiQkv+GTW3mnXnn379uFavBQhgQGMGDGc72brrvJKS0tj\n6dKl+Ptfoly5sgwaNAiNRpPt64q8xc/PjxrelfmjU2lMNEaoqsrIHbf5cNosxo4da+j0gPS5Az4N\n6nH+r3MoikKbtu1Zsy57/5aEyClZFdssx2xVVf0K+EpRlK9eVFjFy7l9OwjPKjWfPf2VrVqbLScO\nZLu9KVOmUK16Ddb+9A1uJT3Z8PMc6tSp/Uo5btuymf3793Pu3Dl8fHyyPFZNo9EwaNCgV7qWyLuS\nk5NRFCVDT4WxorzSTPKcZmpqiu/J7O8TLURuet6TrXem/8dTqqo+d569PNnq+uGHH1iw9Hfen7sM\nUzNzFk/9gArFXfnpxx+y3aavry+Dhw4jLi6eenVr8/vy5RgZyfkS4tVotVrcnB0pbp5Ki1J2nLsf\ny8G7sdwJCZUxciGeIzvdyAef/tUcqAZcJL0b+S3grKqqz32EkmKrS6vVMnTYcFb8/jsaY2Pq1a/P\n+rVrZO2kyJMiIiJo1aIpdwMDcXB0Yt3mrZQvX97QaeVLqqqyf/9+7t27R7Vq1ahYUbY2eF29yg5S\nG4HPVFX1f/q6AvC5qqqdX/A5KbZZiI2NJSUlBXt7e0OnIoQwMFVVGdC3N4d2/0kpB3PO34/lu+9/\noE/fvoZOTWTDq+wg5fV3oQVQVfUSINNJX4G1tbUUWiHyoLS0NM6cOcPRo0eJj4/PMm7Z0qW4OhXE\n2sKC7p06EBsbm+1r+vr6sn/ndmY1LMToKnZMrefEiOHDSElJyXabIu95mWM8/BRFWQKsePr6XUB2\nkBI8evSIoKAgihUrhqOjo6HTESJL169fZ/lvy1BVlV69+1CunO7KxcTERFq1aEbg1UtYmhqTpDHn\n0LHjFC1aNEPc4cOHmfzBe6x9uxzuthaMP/oXI4cM4reVq7OV2/379ylmb4GZcfqzT9ECZhiRvlSw\nYMGC2WpT5D0v82TbH7gMjH3658rT90Q+lJaWxooVK5g2bRrbtm3LMm7Dhg2ULF2a7r37UrJ0aVau\nWpWLWQrx8vz8/KhTozrXty7h5rZfqFe7JmfPntWJmzd3Lon3rjGnsTNfN3Ckln0Ko4frHnCxb99e\nenk68lYhW+zMTfisZnH27d2X7fyqVavG5dAYrkckoKoqfwZEUbiws0xEy2de+GSrqmoiMOfpH5GP\nqapKtx49uXIjEC/vWvy6fDzHT5zkqy9nZIh7/PgxAwcP5qMf/sCjbEXu3bzGiKFdaezjQ+HChQ2U\nvXgd+Pv7c+jQIRwcHOjcuTNmZmZ6v+bMGV/QvqQF7cqkFy8Hi8d8Oe0zNm79M0NcwLUrVCqoQWOU\nPtxWtbAFv94I0GnP0dGJg9HJqKqKoihcj4zFwSH7w0IlSpRg2YqV9OvTi9i4eEqX8GD7rj91NogR\nr7fnHUSw9un/+j89gCDDn9xLUeSWs2fPcvL0GSYsXE3XUR8zcdE65s+fr3P6T1BQEAULueBRNn3G\nZNFSZXAp6kFgYKAh0hYGFh8fT+OGDXAtWACvEsU4ceJEpnFbt26lUb3a7Fowg28njaNxg3okZXO3\ns/8iJiYae/N/niscLIyJiY7WiatctTonQlNIStWiVVUO3YunchXdFZADBw7kNpZ023WV8UdvMPRA\nALPn//hKObZt25ZHj58Q9SSay9dv4Onp+UrtibzneU+2f28TIwcSvCGioqIo6OyCiWn604aNnT2W\n1tZER0dn6NIqVqwYEWH3uRNwhWKe5QgJvMGDe7fx8PAwVOrCgKq+VQHjmDAGVXQgICKOJg3rc/n6\nDZ2fh9HDhzK+ekHKF7JEVVW+OBHEqlWr6Nevn17z69qjF5PeH0UhaxM0Cqy6FsukGb114kaMGMFJ\n36MM+fNPzIw1FC3uwdIfF+jEWVtbc/zMOdasWUN0dDRjmzXLkSVRiqJgaWn5yu2IvOl5O0g9ePrX\npsARVVVv5E5KwlCqVq1K6J1AjmxbR6U6jTi4aSUFHRx0Jog4ODiw+OefGTKsO85F3AkLucv38+bh\n4uJioMyFocTGxhIQeJuVnTyxMDGimqs11yISmTt3rs7xjI8eR1HMLv1nSVEU3KyNiIiI0HuOvXr3\n5kn0E+Z/NxtV1TJ2wicMGjRYJ06j0fDH6rWEhISQlJREsWLFstyC1MrKigEDBug7dZGPvMxsZHfg\nZ0VRigPngCPAUVVVL+gxL2EADg4O7N61kwGDBrN63nQqVa7M7p07Mv2F07VrV3x8fAgMDKR48eJy\npmw23blzh6ioKLy8vDA3Nzd0Otmm8s+aei1qpruYNWpQnz8uX6BfRXvuRSfjGxzP5IbZP6Xqvxg5\nchQjR456qdgiRYroORvxJnrhphbPAhXFAhgMjAeKqKr63F3nZVMLIbKmqipjR45g1YoVONlYkGhk\nwq79B1/LsbqKZb1ICb9H+7L2XI9IYE9gDFdv3MLd3T1DXGRkJO9268L+Q0ews7Vh7vwf6Nmzp4Gy\nFkI/XmUHqSlAXcAaOA8cI/3J9sELPifFVogsbNy4kc/GDGNH24oUMDPh54t32Rxjge+Zc4ZO7T9L\nTk6mY/t2+P11Bjv7gqxYs4633nory/i/Z/EKkR/951N//k9HIBX4EzgMnFBVVf9TCIXIx65cuUKz\nIrYUMEs//LxT6cJ8ufL1PMHG1NSU7Tt2vnS8FFrxJnrhphZPD45vCpwGmgH+iqIc03diQuRnXl5e\nHLgfQ2xy+pF1W289pIxnaQNnJYTQlxcW26cHD7wL9AW6ASFA9g9hFULQuXNnajZvhffK0zTceJE5\nVyL4dcVKQ6f1xlu2bBmuhZxwLmjP6NGjDZ2OyEdeZsx2O+kzkI8BZ1RVfandsWXMVogXCwgIICoq\ninLlyslRi9kQGxvLsmXLsLa2pnfv3lku1XkZy5YtY+igAbTzcsDGTMPqSxG06dCZNWvW5GDGIr/L\n9gSpV7igFNvXUGBgICNGjuZWYCDe3pX5cf58OWRA5Ennzp2jQe2amGogVaui0ZgQGHw/23sKuxZy\nop6TSq9KTgCcCo5h/ukwohPl9B3x8l7liD3xhoiJiaFhIx8KelVh0BffE6NY8nbrNmi1WkOnJvI4\nVVV5/PgxqampuXbNt5v40KCYDcval2JZ+9IUt9VQp2aNbLeXlpaKjdk/T8Y2ZhqQBwaRQ6TYGoCq\nqqSlpT03Zs2aNXiUKk0R92L07dsvVwre6dOnKeBUmDZ9h+Feuiy9xn/O7dtBBAcH6/3a4vV148YN\nypYuSVFXF+wL2LDi999z5bpJifE0LG6LoiiYaBQaFrflUdhzVyQ+V9eevVh9KYJTwTFcCY/nh1Oh\nFC1RKgczFm8yKba5SFVVJk2egpWVNZZWVvTu2y/Tjdh37NhBn379adCxN13HTGHn/gN07NRJ7/lZ\nWloS+ySCFT3zAAAgAElEQVQK7dMvAonxcSQlJWBhYaH3a4vXV/s2rWhgl8DKDh581ciFsaOGc/ny\nZb1f19TMnBN3Y9K/vGpVjt+Lwc6xULbbmz9/Pm06dGb+6VC+PBKCvVsJ/C7p/z7EmyHLMVtFUbYB\nWfahqKra9rkN55Mx26NHj7Jy1WoszM0ZMWI4pUpl/5vur7/+ypffzuH9ucswt7RiweTRNKrpzTez\nZmaIq1e/PvYlKtBz3GQAAq9c5OsRvYh58viV7uVF0tLSaNaiJbGpUKZ6Pc7s3Ubd6t4sWbxIr9cV\nr6+4uDgc7O1Y26nks/Wz8/6Kou+kmfTp00ev1z5y5Agtm/pgY6ohJU1LKkYE3L4rxzwa2M6dO9my\ncQMF7O0ZM3bcG7f9ZXbGbL8FZj/nT773559/0r5TZ6LN7Ln9JJladeoQEKB7vuXL2n/wII279MXe\nyRkLK2ta9xvBgYMHdeIURcmw12z6lxb9f3HRaDTs/HM73d5piWVsGO+PHMqinxfq/bri9WVpaYmF\nuTk3IxMBSErVEvg4ETc3N71fu0GDBgSHhjPiw8lMnj6TiCexUmj1JCYmht49u1PE2YlK5cpwMJPf\nWwDLli6lf89upJ7bxvU/f6O6d2UePMh+135+IrORn6N23XrU7tSP6j4tAVj30zcUtVSYN29uttr7\nYPyHXA4Op+/H0wHYvXop4VdO8+e2bRnidu/eTdv2Heg6fDwOzi78MXcGjerWYv369a92Q0LowaZN\nmxjYtzcVXWwIepyAT4vWLF2+QnaKykc6tXuHx1dO0qOsLUFRSSy4+IQTp8/i5eWVIa5ksaIM9dJQ\nxjF96OmncxE06v8BEyZMMETaBpHt7RoVRSkNfAWUA54dS6KqaokczTAPSkxMxKaA/bPX1gXsSYgJ\nzXZ7Ez7+iFq16zDnvQFYWFlx5exxDmfyDbFFixb88ftyJkyaTFJyMu1bt2TRIunKFXlThw4dqFjx\nAmfPnsXV1ZX69etLoc2GtLQ0YmJiKFCgQJ7676eqKtt37mJ5uxJYmBjhbG3KX+Gp7N+/X6fYJiUl\nYWNq8+y1tQkkJMTndsp50svsjbwU+AyYA/gA/XlDJlb1ercnC2Z/Tq8PpxEX/YQdyxeybs2qbLfn\n5OTEX+fOsm3bNpKTk2m5+EdcXV0zje3cuTOdO3fO9rWEyE2lSpV6pfkM+ZVWq2X//v2Eh4dTq1Yt\nSpTI/Bll/fr1DOzfj9TUFFwLF2brjl2ULVs2l7PNnKIoWFlYEB6fgnsBM1RV5VGiFhsbG53YHu/2\nYuHa3+hdzobwuBQO3E3gk476n9z5OniZHaTOqapaVVEUf1VVK/7/ey/43GvfjayqKt/Ons2KP1Zi\nZmbGpAkf0759e0OnJYR4DWi1Wjq3b4vfmeO42ZrhFxrLqrXradGiRYa4mzdvUsO7Mp/WcaKEgzl7\nbkWxK8yEG7fv5Jkn3IULFzB10sf4uJlxLw6izR05ceYclpaWGeJSU1OZ+tknbN6wHltbW6bP/BYf\nHx8DZW0Yr3LE3nGgHrCe9D2RQ4CvVVX1esHnXvtiK4TIH1JTU7lw4QKqqlK5cmVMTEz0fs2NGzcy\nefRgptdzwkSj4B8Wx4IrSQQ/CMsQt3btWn6YMpbx1eyevddrcxC37wVTsGBBvef5svbt28eB/fso\n5FyYQYMGyfaiWXiVI/bGApbAGOALoDHphxIIIYRBbdiwgblz5mBiasqMGTOoXbu2Tkx0dDTNGzci\n7F4QigL2zkXYd+gI9vb2ug3moJCQEEramWCiSf+9W8bRgtCHITrn+bq5uXE7MoGEFFssTIwIikpE\nVaBAgQJ6ze+/atq0KU2bNjV0Gq+tlzli74yqqrFANDBGVdWOqqqe1H9qIic9efKERo0a4enpydix\nYw2djnhDJScns3379ucuoUtLS2Pqp59Suawndat5s3v37kzjFixYwLvdu1Io8irm987j06Aehw4d\n0omb+uknWEcHM6dxIeb4FMI5OYzJEz7OqVvKUq1atTgVEkdIdDKqqrLpehTVvSvrdA3Xrl2bVu07\nMv5QGN+di2Kqbzg/L1qCsfHLPAuJ18XLdCNXI32S1N+j4U+AAaqqnnvB56QbORf4+/szafIUIh49\n4u0WLZg0aaLOP9LY2FhcirjhXNQDz8rVOfbnekqXKsm5M2cMlLV4E61bt46+PbujKJCcpqVkCQ+u\n3QjUiftk0kT2rlzGV7WL8SA2iXFHb/Hn3v1Ur149Q5yroz1dS1vQ2CP9CXDFxXACFGcu/GvXp1bN\nmlAp8Tq1i6b/Cjt7P5ajqW4cPHZCT3f6j8WLFzFuzBhUVYtn6VJs/XMX7u7uOnGqqnLs2DGCg4Op\nUqUKZcqU0XtuQj9epRv5V2CEqqpHnzZUj/Ti+1bOpvhmiI+PZ+/evaSkpODj4/NKYzJ37tyhUePG\ntBs4lkoepVn3yzwiHj3i+3+tAx4yZAi2BZ2YumwzRhoNzbv1ZXyHRiQkyFaMIvcM6NWTzuUd6FzO\nkciEVN7fdZshQ4boLGtbs/IPfqtfgvKO6cXxUkQs69et1Sm2KSnJOFr+MyO2kJUJfhGxOtet5O2N\n7wY/ahRJH2P0vZ9EpRbPnd+ZYwYPHsKAAQOJj4/PdPbu3xRFoX79+rmSkzCMlym2aX8XWgBVVY8p\nipJ7R3vkI1FRUdSt3wCNuRXmVtaMGTuOo0cOU7JkyWy1t3XrVrwbNKdZt34AuHqUYmLXpjrFNiws\njMLuHhg9PevTybUoKiqhoaF4eHi80j2JvGXTpk1cvHiRFi1aZDp+aUgJKam0Kp0+TupgYUxdd1uO\nHDmiE2duZkZkwj/H2j1KSsPlX7NeAWrUqc+SE4f4oI4riala/vCPYMjo93TiPvlsKm1PnWLY7vMo\nioJnmXJM/+rrHLyz59NoNM8ttOLN8DLF9rCiKD8Dq0jfM7AbcEhRFG8AVVX/0mN++crMWbNwKV2e\nAVNmoSgK239byPvjP2TLpo3Zas/Y2JjkpMRnr5MTE9AY6x6ePWTIEPr068f5YwcoXbEKm3+Zj7mF\nlRTa18STJ0+YN3cO94ODadysOV26dMl0SYhPg3qcOXWCYgXMmTnjC0aOGce3s/POzqqmxkZcDI2n\ndlEbUtK0+IXFUbGe7trciZ9PY/DIYYyq6MKD+BS2B8dwatBgnbgt27bTrLEPE/efxMhIoX3nbsyc\nNUsnztLSkr0HD3Pz5k1UVaVUqVIYGb0RWwWIPORlxmwz3wQznaqqauMsPidjtv/ybu/eWJeoRKP2\n3QG4fuEM2376inNnTmervfDwcCpX8aZ6s7a4eJRi1x+LGNinF1MmT9aJHTFiBEt/W05qSjIWltZs\n3byRRo0avcrtiFc0Y8YMvvnyC1JSUylZoiSHfE/oHHweFxdHDe/KuKhReNgYse9eEgNGvccnn36W\nIW7t2rUM7N2TBW08sDUzJjAykY/23iHicRS2tra5eVtZmjhxIt99M5MS9uY8jEtBa2RCWGQUpqam\nOrH79u1j8/p1WNnYMnL06EzHOYXIi7K9zvYVLijF9l9+WrCA7xcuYfz85ZiamfPzp+Pw9irB/O/n\nZbvN4OBgvvp6JuEREbRq2YK+ffvmmYXw+U1YWBhr164lLS2N9u3bU7x48Wy3tXbtWvq+24MP67ri\namPKknNhJFgX5nLAzQxxa9as4ZsJo/mklgOKohARn8LIXfeIi0/I8HQ2adIktv/6PdMbF332Xo/1\nARzyPakz1mlIR44c4eeff8bd3Z3p06ej0ej2xIjnS0pK4uHDhxQuXDhX1guL/+ZVNrVwBr4EXFVV\nfVtRlHJAbVVVf3nB56TY/otWq2XM2HEsXrwIBYUWb7/Nqj9W6OzCInLXqJEj2bJ6JYqRQr/ho5g2\nbZpOzN27d6lTvRoNnK0w0yjsuPOYfYePULFixWxds2XLlpgFnWagtzMAj+JTGLY9kKRUbYa4pUuX\n8vvXExlXNX2sMylVy7ubAolPSMgw6/zAgQO0btGMb5oXw72AGSeDY5hz4gGPY+IwNzdH5A/btm2j\nz7s9MTYCRWPM+k1baNCggaHTEv/nVYrtTtJnH09WVbWSoijGwPm/t258zuek2GYhMTGRtLQ0rKys\nDJ3KaykkJARfX1/s7Oxo0qTJKz0djR41irVLFzOvSXmS0rSM2X+ZCZ9NY+LEiRniRgwZjJX/IT6p\nlT6ZbdHFexy3LMbG7Tuydd0uXboQfHIXE+unH0V3PSKBzw8HE5eUce5hcHAwlStWoIeXJSXtzdh0\nIxbH8rXYsGWbTpsjhw9n8aKfMTM2IkWrsmDREvr375+t/ETeExoaSlnPUkyq5YiXowV/PYjlx4sx\nBN0Lkd8leUh2zrP9m6OqqmsBLYCqqqlAWg7n90YxNzeXfxzZdPz4capUKMeKaR/z4cDetG7elJSU\nlBd/MAtbVv/BnMblaFWyEB08C/NFPU9++/knnbjIiHBKF/hnmVQpOwsiIyKyfd1vv/0W//AkZh+/\nz2r/cL44HEzvfgN14tzc3Nh74CD+mmIsDFAp69OW5StXZ9rmjwsWcD/sIbsOHCbySYwU2nzm2rVr\nuDtY4fX0+DpvF2ssTYy4c+eOgTMTL+Nlim2coigFeXp6uaIotUjf2EKIXDdsQD++q1eCFc28ONSx\nMvFB11mxYkW221OMjEj8v67bxFQtipHuk3KLNm2Z5/eAW4/jCI5JYNb5EFq80zbb1y1WrBj+V69j\nWaY2t61KMG3mtyz8+edMY6tUqcIh3xNcuxXEwsW/PPeLmqOjI3Xr1pWhiXzI3d2de5FxPIpP/3J5\nPyaZx7GJuLi4GDgz8TJeZunP+8BWoKSiKL6AEyBnv+VTqamp/Pbbb9y6dQtvb286deqUpyZchTwI\npXbD9A0JNEYK1R0tCQ4OznZ7g8e8x7gvphKVlEJympYvTtzk+wW6Ra9f//48uH+flt/NJk2bRr/+\nA/howsRMWnx5Hh4e7Ny165XayK9SU1O5desW1tbWFClSxNDp5AklSpRgwqTJfPT1l5RytCbgYQzf\nzZun9z2eRc54qdnIT8dpvQAFuK6q6gv77WTMNvc8fPiQ+/fvU6FChSz3U71y5QrvvT+ekPsh1Ktb\nl9nffqPzhKTVamnfsRNB98PwqlqHcwd30KFNa2Z/+01u3MZLad2sCaWj7zK1dglCYhNps9WfRSvX\nvtIG6bNnz2bJj/MxMjJi/ORPpPvVwEJCQni7aWNiHkUQnZBIpy5dWLjkV1kb+9Tly5e5desWZcuW\npXTp0oZOR/zLf54gpShKdeCeqqqhT1/3AToBd4DPVVWNfMEFpdjmgtZt2rB79x5MzMww1mjYteNP\n6tatmyEmLCyMtypVpnW/kZSsWIVdKxZjb6awdfOmDHGnT5+mU7cefLlmL8YmpsQ+ecx779TlTtDt\nPHPUV1hYGB1at8Lv8mW0qsq0adMY/5H+N5V/U6mqSlxcHFZWVln2cCQlJTH10yn4HjqEi5sbX34z\nO8tD0l9G25bNKRMdxOQaHsSlpNFu2yVGfTGTvn3lsDGR92VngtTPQPLTDzcAvgaWkz5eu+g5nxO5\n5LvvvsP3xCnmbffllyNXaNV7KO06dNSJO3DgACUrVqFZt36UKFeJIVO/Y/eunSQkJGSIi46Oxt7J\nGWOT9E0GrGztsLSyIjZWd79ZQ3F2dub42XOEhIbxJCZWCm02XbhwgTVr1nDx4sUsY/z8/ChZzB1H\nB3ucHOzZs2dPpnGD+vbhr82r+MANPB9epWGd2jx69Cjbufn7+9PD0xlFUbA2NaaNuy0Xz8tGdeL1\n9rxiq/m/p9duwCJVVTeoqvoJoLvHWj4VGxvL/v378fX1JTU1b20JfeDAAWo2a4NDIRcURaF5t35E\nPdbtcDA3Nycu+gl/9zTEx8YA6HQ5V6tWjfDgOxzYuJKIByFsXDibQoUK4ebmpv+b+Y8KFCggC/r/\nJSYmhimTJtK7WxfmzZ1LWlrmiwZmfjmDVo0bsmr6RFr6NGDO7G91YlJSUmjdsjnt3LSs6VSSD6oW\noHuXTty/fz9DXFJSEms3bGRZs7I0ci/I+GrFecvBgr1792b7PkqXLs3OoPSZ3slpWg48iMWrbLls\ntydy1+PHj9m7dy+nTp1Cq9W++ANviOdNkNIoimL8dKlPE2DIS37utbBv3z62bttGAVtbRo4cSeHC\nhXVi7ty5Q8NGPlg7OBEfG4OrsxN7d+/KMyfleHp6sm7rDlKSkzAxNePK2eOYW+rOVG3ZsiWffj6V\nRZ+/T4nylTm6dQ3jxr2nU6zs7OzYu2c3g4cMY9uSuVSuUoXdO3fILj8Gtn//fkYPHczDiAjq16vH\nkt9+1+nWT0pKokmDepRMe0L9wjas/N4Xv/N/8ctvyzPE3bt3j1lffcWJHtUpbGVGcEwCdT/7jB7v\n9srwbyA4OJjUxAR8irsCUL6QJSULJuHn54erq+uzOCMjI3h6ZJ6lSfrPSVJa2iv9zPy05FeaN2rI\npjt+RMQlUqVmbQYO1F0WJfKeS5cu0dSnIS5WxjyKS6JKjdps2LJNzubl+WO2k4FWQATgDnirqqoq\nilIK+E1V1bqZfvCfz+fZMdsVK1bwwUcf06RrfyJDg7nke4BzZ8/g7OycIa5dh46Yu5ak/aCxaLVa\nfpgwnDaN6jA5k72HDSE1NZXSXmWIjounUBF3Aq/48f3cOQwdOlQnNjo6mtnffUdwyH0a1KtLnz59\n8tQsY5G5mzdvUruaNwsblaZSIVtmnbvLHVs3dh88nCFu3759fDywNwc6vIWiKMQmp+L16zHu3n+Q\nYbbqyZMnGdm9Iwc7/HNCZp31F/h9606qVKny7L2YmBhcnAvxXVNXClubEpecxrj9D9h72Je33sp4\nuuaYEcM5vWMzg8s6cTY8jgOP0jjr5/9KJ93ExsZy4cIFrK2tqVSpkvysZkNcXBx3796lSJEiubY/\ndu3q3lQ1CqV5yQKkpKl8cSKC0Z/NfKO+LP3n82xVVZ2hKMp+wAXY83+V0wgYrZ80c8fUL6YzfMYP\neFVO3zN2yRcf8ttvv/HRRx9liLt16xY9Oqf/kBgZGVGuRj1u3Lqp056hGBsbc+tGAIsXLyY4OJgu\nSxfp/CL8m62tLVM//zx3E3xNqKpKZGQkGo0GOzu7LOPWrFnDvFlfkZaaRv9hIxg6bJjei8Dhw4dp\nVtyJZh5OAHxVtyQuCw6QkpKSoWciJSUFK1PjZ/mYGxthrDHS2fDDy8uLe0/iOXj3ET7uBdkXFE54\nXBKlSmUcGbKxsWHWN98wefJE3ipsTUBEAj1798v052vuDz/yw3wvdh/Yj2vtYhz79LNXPlLO2tqa\nevXqvVIbb7K9e/fSvUsnrE01RMUns2jJL3Tr3l3v1w28HcSguumHaZhoFMrZKdy6mXd+ZxrSc5/t\nVVU9mcl7AfpLJ3fEx8dj6+D47LWtvSNxcXE6cd7e3hzevJriZSqSnJTIqd1bGNavV26m+kJGRkaZ\nPsmKlxMfH0/3Th05fOQwWq1Khw7t+XX5Cp1urz///JMPRgxlXv2SmBmb8P7nUzAxMWHgoEF6zc/O\nzo7b0QloVRUjReFOdALmpqY6+dWtW5eQRJVZZ25T39WOZdfCqFGjBk5OThni7O3tWbtpM907dyQx\n4TIWlhas37I10+I4YuQo6tarz8WLFylRokSWxc/IyIgxY8cxZuy4nLtxkW1xcXF079KJ8dXsKV/I\nkqDHiQwbMoh69evrfc1y5UqV2Bt0lR7l7IhN1nL6YSodq1bV6zVfF2/kqT/vvf8BB46fpvu4KYTf\nv8eyLyeyb89uvL29M8Q9fvyY1u+05XpAAMlJiXTo0JGlvyyRMcx8ZPx7Y7m1ZxOLm5QhVavSc9cV\nWgwayccTJmWIe7dLJ2pHXadPhfTJYrsCH7LokTn7jh3Xa37Jyck0bVgf00chvGVvzrqbEUyZ8TVD\nhw3Tib179y4fjh3DnduBVK1Zi6+/nZ3lE6ZWq+Xx48fY29vL+tV85urVq7RsWIf5Tf8Zg//0eCRz\nlq7Gx8dHr9cOCQmhZdPGhIXeJz4xhaHDhvHtd3PeqGGA/9yNnJ/Nmvk1Uz75lGVT38fW1pY1q1bq\nFFpIfwo4duQwISEhmJmZUahQIQNkK/TpzPHjfFDGGVONEaYa6FXakZ3HdQuouYUFUWH/zEZ/kpSK\nWS6cpmNqasreQ0f4/fffCQ0NZUX9+jRs2DDTWHd3d9Zs2vxS7RoZGeWZtdMiZxUpUoSo+GSCHidS\n3N6csNhk7kbG4eHhkSvXvnDpCsHBwVhbW8vP2P95I59shfhbnx7dcA46zye1SqCqKmMPB1CwYVtm\nz814xvDFixdp2rA+Q8s5Y6YxYr7/A1Zv3Ezjxo0NlLl4Waqq8vWXX/LT93NRVRg8fDiffj41Xz9t\nrV61iuFDB1PMwYq7kXFMm/Elo0aPMXRabwQ5PF6ITISEhNCoTm0Km2hJTtOSaGHLId8Tme436+/v\nz6IFP5GWmkrv/gOoXbu2ATIW/9XiRYuYN3UyS5t6oVEUBu4PYMD4SYweO9bQqelVSEgIAQEBeHh4\nULx4cUOn88aQYitEFmJiYjh69CgajYaGDRvKYevZkJqamn4EnLt7lstMUlJS+GzKJPbs+BOHggWZ\n8c13VK9eXe+5tX+7Be2NwujomT6GuTPwIUufWLPr0BG9X1u8eV7lPFsh8jUbGxtatWpFixYtpND+\ny9mzZ1myZAkHDhwgqy/P27dvp4CVBdWrVKKgvR39+2W+h/G4USM5tWElX5ezpb1pFK2aNeXWrVv6\nTB8AO4eC3I7+Z2vSwCcJ2DnIWKLIXfJkK8QbJjY2li+mfs4V/4tUrFyFKZ9+nun5t9/Pm8v0zz6h\nios11yISeKdzN37I5PhBa3NT+lR0oGVpe4Kjk/hwzx2Wr1pLp06dMsTZ21hzukd1nK3MAHjvcAAV\n+4xh3Dj9LhkKCAigQe1atHa3Q2MEmwMjOXjMl/Lly+v1uuLNJE+22bR//34GDBrMiJGjuHr1qqHT\nESJLP/zwA7bmpmiMFAoVsObIEd1u0rS0NN5u1oQzm5dTLvYKvuuX0qZlc509bKOjo5k4YQJfNnRm\nZBU7ZjYqxPrVq7hw4UKGuIcPH5KYnEKLUumbgbjZmlGpsBW7Mjmn19TEhCdJ/2yy8SRFi6mpaU7c\n+nN5enpy+vwFvHoMo2TXoZz667wUWpHrpNg+x5YtW+jesxfagu6Ea82p16CBFFyRJ124cIGP3hvH\nkhYVCBnRlJFvudK2ZXOdwzMuXbpE0M3rjK7qQF13W8ZVK8jVS34EBGTcq+bRo0fYWphRyCp9lypL\nEw1u9paEhoZmiHN0dMTISCHgUSIACSlabjxKyLSYfTxpMj13X2XJxbt8fPQG5x4n061bt5z8z5Al\nd3d3PvroIz7++ONcWQIjxL9JsX2Or2bOou/EL2nRvT8dh4yjUYdeLFi40NBpCQPRarWcOXMGX1/f\n555msnHjRiqV86J0cXemTJqQ5ek7OWndunV4O9vSskQhLE00jKvqQVpaGn5+fhni1Kc7URk97eRS\nFNAYGencj5ubG6aWVuwLTD8t6mJoHIGP4qhUqVKGOCMjIyZMmsInB+/xyYG7DN8eSGH3EowZo7vM\n5P3x45n+/QKuFK2KrU8HTp47L+swxRvjjdzU4mUlJydjaW397LWFjQ1JceEGzEjoQ3BwMFu3bkWj\n0dCxY0edLQ4hfZzzLa/ShIeHozFSsLC04tylKxlOwAE4cuQIQ/v3ZZS3PXbmGpYsX4yiKHwx4yu9\n3oO7uzu3n8STmJqGubGGkNhEklLTdI5HrFChAoWKuPPzhVBqFDblxP0kinqUxMvLK0OciYkJO3bv\npVO7d1hw9gaO9vas37QFFxcXnWtPmzaN5s2bs23bNjw9Penfv3+Wu1J17dqVrl275tyNC/GakAlS\nzzHv+++Z++MC3n3/c+JinrB81idsWr+OBg0aGDo1kUOuXLlCo3p1qVzIlBQtBESrnDxzjqJFi2aI\na9G0CUkBF1jXrirGRgpDd/tzy8Sec/6XM8SNHT2KqKNr6VQu/YntVmQiP99QuXYrSK/3odVqKVOi\nOEpMJPWK2LP15kPqNmnG5m3bdWKjoqKY+NGHXLnkR8VKlfly5jfPPRUmKSkJMzMzfaYvRL4h2zVm\nw5jRozFSFH5fOhczMzN+X7ZUCm0+M/njD3nHw4x2XumbWKzwf8T0qZ/z85JfMsQFXr/KhApFMDNO\nf2LrXaEIQ/df12nP2saGoOR/vmQ+TkjFykr/x5sZGRlx5WYgkydPJiAggM+GNGHUqFGZxtrZ2bFg\n0eKXblsKrRCvTp5sxRutXs1qNLd8SGUXKwAOBT3hjqM3G7ZmfCKsV6smzo/v8svbb6EAE49c53C0\nEZdvBmaICw4OplqVytRwVLA1hZ1BCfz2x2ratGmTW7ckhDAgebIVIhMtWrVh/ZL5FLMzI0Wrsv12\nAh/01S2M6zdv4S2v0lRedhQzjRFhCakcO31WJ87NzY2z5y+weNEi4uJi2dapM3Xq1MmNWxFC5GHy\nZCveaKmpqbw3djTLli7DSGPE2LHjmPrF9Ew3qY+Pj+f3338nJSWFXr16PfegeSHEm0n2RhZC6M2p\nU6c4efIkLi4udOrUSc58Fm8sKbZCiP8kPj6eTyZP4tzpk5Ty9OKrWd9muixqyZLFTBz/PrWKWBH4\nJBWPCt5s+XOnFFzxRpJiK4R4aaqq8nbzpiTc9sPHzZwLEclcT7LmL79LWFhYPIvTarXYWlsxq7EL\nbrZmpGlVJhwJZ+4vK3j77bcNeAdCGIbsjSzEayoyMpLJEycwsE8vli9fnuXpOznpwYMHnDp5grHV\nCuLtak3/ivYYJUZz8uTJDHGJiYmkpKbiapO+x7HGSKGIrSkRERF6z1HkPbGxsXTt1AErC3OcHR1Y\n+uuvhk4pz5BiK0QOS01N5euvv2bcuHE6ewn/V7GxsdSrUZ2QnWuoEHKebyZ+wGdTpuRQpllL/3ZO\nhn5rZasAABXSSURBVMKeplV1Jo5ZWlpSqUJ5Vl9+TFKqFv+wOC7cj6V27dp6z1HkPSOGDCbc7xiL\nW7szsXoBJnwwlkOHDhk6rTxBupGFyEGxsbEULmgPqhYLEyNik9P4+Zdl9OnTJ1vtrVq1il8//4gN\nrdI39g+NS6Ly8uPExidkuSViTlBVlY5t2xB66TQNipjiF5HCA2NHTp07r7PJxf379+neuSMnz5zD\n2dGBxUt/o2XLlnrLTeRdLk4FmVbbDmfr9J6OVf4RlGw3hOnTZxg4s9wj3chCZGHpr79Sv3pVGtWq\nwcaNG1+prfLly1HExphlHUqxpG1J2ng6MHLwgGy3l5SURAGzf5bD25oak6bVPvcghJygKAqr12+k\nRe9hXLEuR5nm3Th07Himu0m5urpy5H/t3Wl4FFWixvH/SSchJIQsgCAIhD0kQRNAEMOmwogIERCR\nGRF1vCOCC26ICiMgzqg412XGDYHxARRGUUG4iCiIFxhkC4IEBTEsCQTCEkII2ejuuh/oizABhZCi\nkvT7+6JdqT79to+dN+dUddWq1RSXlJC5L1tF68eio6PIzCsBTv7BtrcQatcufVKdP9LMVvzajOnT\nmTDqUV5ObkqJx8tjK9OZ+v5sevfuXabxIkJDuC02gpTYaAAyjhYz+qvdHC8p251/srKyaNsmgScT\nLyfpsnBe3ZhFjYQOzJrzcZnGE7HTl19+yeCBA0huWIODhR6OV4tm1dr1hIeHOx3tktHZyCJn0bNL\nMvdGFdK72WUAzEjbw6qIlsz6uGwz3EYNGxJWdJjnr29IkCuAWd8fZOH2XI4Vu3/7yeeQlpbGk488\nzP59WXS7vgd/nfTyGWcEi1QkW7Zs4auvviI8PJzBgwcTFhbmdKRLSpdrFEfs3r2bO2+/jfUbN9G4\nQX2mznif5ORkp2OdEhQcTP6JY6ceHzvhIfgiLrz//ebNXFG3NvfM+5nqQS7yit288c75X/T/bBIS\nEvh8ydcXNYbIpRIfH098fLzTMSoczWzFNl6vl6viYulfJ4BhV17B8swcRq7YwaYffjzrfVGdsGTJ\nEu4YOIBHr6pPsdfijc37WLTka9q3b1/mMd1uN8899xwHDx5k9OjRxMTElF9gEanQtIwsl9z+/ftp\n07IFP//x2lNfGbnti6088NLfSUlJcTjdL1auXMn0qVNwBQYy7IEHSUpKcjqSiFRSWkaWS65mzZoU\nlJwgK7+YBuEhFLu97MrNp1atWk5HO0Pnzp3p3Lmz0zFEpApT2YptQkNDmfDcBG56+UVujqnF2gP5\nJHbqolvOiYjf0TKy2G7ZsmWkpqbSqFEjBg4caOvFGPxZbm4uzz83gZ0/b6fDtck8/sQoAgP197TI\npaRjtiJVWFFRER3bJVHPfZiEWoEs21tC3LU38P7sD52OJuJXdMxW5BLZt28fU6dOpeB4Pv0H3EqH\nDh1sf80VK1bgzjvEiM61MMZwzRVe7p43j5ycHKKjo21/fRH5dVrPEzlPM2fMoH2bOBJbt+Tvr712\n1rvvZGVl0SEpkd1z34P//ZS+v+vBokWLbM/m8XgICjCnzvp2BRhcJgCPp2xXrhKR8qVlZKmydu7c\nyeLFiwkNDWXAgAHUqFGjzGN99tlnPHTvXbzZvQUhrgAeXp7OyGcncv/w4WfsN3bMMxz64l9M6toS\ngEU7DvDKbjdrNn5/Ue/lt+Tn53NlXGs6RJ4gvnYwSzMLCW1yFQsXf1XqTj0iYh/diED8ytq1a+mQ\nlMjKdyYx669juaZtIrm5uWUe78OZ0xnd9gq6NaxFx/pRTOzYmDnvzyi1X35eHvVDg049rl8jhPz8\n/DK/7vmqUaMGK75djat1FxYdq01ir9v5eN58Fa1IBaFjtlIlPTnyIf5yTWMGt64PwIivt/L311/n\n2XHjyjReaFgNDmWfOPX4UGEJ1UMjS+3X79aB/OGDmbStW5PLQqvx9Lc76X/7XWV7ExeoQYMGfPDh\nnEvyWiJyYVS2UiUdOHCA+Ga/XBIyPjKErOyy38j9kVFPcl3nZI6f8FDNZXgnLZtPFrxZar/u3bvz\n6tvv8syzYyksLGTg4CGMn/h8mV9Xfl1RUREbN24kKCiIxMREXC6X05FEzkrHbKVKemDYfexd8QVv\nXteSQ4UlDFy4hUnvTKV///5lHnPr1q1MfXcyHo+HIUPvol27duWYWC5UdnY213VJxp2fS5HbTbNW\n8Xz+5RLdEUkcpe/Zil8pKCjgT3cP5ZN586kWHMSfxz3LE6NGOx1LytEdt99G0Q/LGZoQhdeCV9Yf\npseQEYwbP8HpaOLH9D1b8SuhoaF88NHHzPR6McboRKEymjx5MsuWLSMhIYFnnnmmQl39a9vWHxlY\nLwRjDC4DbWsHsnVLmtOxRM6q4nxyRGwQEBCgoi2jW/r05smRD3Bo3Re88dJzXBnXCq/X63SsU65M\nTGLFniK8lsUJj5dv958gUUv7UkFpGVlESsnIyKBZkxgm921K7dAgitxe7pufzpvTpnPHHXc4HQ+A\nnJwcbup5Axk7d3DC46Fzl6589Ok8goODnY4mfkzLyCJy3nbt2kX1wABq+74zHBIYQL3wYHbu3Olw\nsl9ER0ezau16fv75Z4KDg4mJidEqhlRYWkYWOU+WZZGens5PP/1UoZZT7dChQwc8GD7/6Qger8X6\nrHx25RbTr18/p6OdweVy0apVK5o0aaKilQpNM1vxe7t27WLu3Lm4XC4GDRpEvXr1Su1TXFzMgJQ+\nrF2zmsCAAJo0b8Gir5YSERHhQGL7hYSE8PG8+fx+YH/eTc2merCLv736OgkJCU5HE6mUdMxWqiTL\nsnj7rTf5aOYMqoeG8tS4CXTr1q3Ufps3b+aGrl3o0ySaEo/Fsn35/HvtOmJiYs7Yb8L4cSye8RZP\ndKhFgIG3v8uhSZe+vDNl2iV6R85xu926L67IedK1kcWvvPbKK7z5lwk8fLmH/sE5DEzpw/r160vt\nN+7p0YxKrM9r3Vry1vWtGNIsihefn1hqv+83pNKpXhCBAYYAY+hSP4TvN353Kd6K41S0IhdPZStV\n0vSpk3m9SzN6xNRhcOv63J9Qj1nvzyy135HDh2keFXrqcYvIEHIOHii1X2x8AqkHT+C1LCzLYn12\nMbFx8ba+BxGpOlS2Uibbtm3j2vZtiQwPo0PilWzZssXpSGcIDAyk8LR7uRa6LQIDg0rt16vvLby4\nYQ8ZeYX8fOQ4r27aR6+U0icBPTP2z3hqN2Xk0mwe/+YgO6woJv33q7a+BxGpOnTM1k8UFRUxbdo0\n9u7JJLlzF26++eaLGiuuRTNGtIhkYMu6zE8/yN/SDvDD9vSLumdseXp/5kyeefRhRiU14GDhCd7Z\nks2K1Wto1arVGft5PB6efnIU7/1zGq4AFw8/+hhPjxlz1jNbPR4PGzduxOPxkJiYqO9zikgpujay\nHyspKeG6Lsl4DuyiaTisyCrhwcefYvTTT5dpvE2bNvH73j1ZPajtqW3dP/2etz+aS8eOHcsr9kVb\nsGABH70/g+phYTzyxJPExcU5HUlEqjiVrR9bsGABTw+/h+e71CbAGA4VnGDEogzyjxeU6eSXjIwM\n2ibE8d2QjkRUCyK/xE27Wev4ZvXaUjNHERF/oitI+bG8vDxqhQUR4FsajQoJxLIsSkpKylS2jRo1\nYsjQu+g19yN6NohgWVYeKQMGqGhFRM5BM1s/kJmZSWKbeO6JD6dVrRDmbT9GYZ2WfL18ZZnHtCyL\n+fPnk5aWRmxsLAMGDNAVfETE72kZ2c+tWbOGEffdy759+0lOTmbytPeIjo52OpaISJWishUREbGZ\nriAlfsfr9ZKWlsaePXucjiIifk5lK1VSeno6l0dH0jEpkWaNG9H+qjZV/k49IlJxqWylSrq5x/Xc\n1DCCvQ/cwPb7rqNg7y6GDRvmdCwR8VMqW6mSDmRnMyyxEQHGEBkSxND4BqxfvcrpWCLip1S2UiWF\nhYWxPPMwAF7LYlnGYRo0inE2lIj4LZ2N7CdSU1MZOXwY+7Ky6JSczBuTpxAZGel0LNssWrSI225J\nIa52DXIKSzhGINt27q7S71lEnKev/vixvXv30rZNAhM7NKR9vQhe37SH7OgYvli6zOlottq5cycz\nZswgPDycESNGEBIS4nSkSsftdnPgwAFq166tGy+InAeVrR+bNWsWc14Yw/QeJy+n6PZ6qf/2MnJy\njxIaGvobzxZ/tWbNGm5N6cuJ4kJKPBb/nDGT/v37Ox1LpELTtZH9WFhYGPvzi7EsC2MMBwtKMMZo\npiLnVFJSwoC+fXi54xX0aV6X77KPcuvdQ2nf/gcaNmzodDyRSkcnSPmBXr16Qa163Ln4R15dt4OU\nBWmMGTOmTDchEP+wZ88eXF43fZrXBSCpbgRX1otmy5YtDicTqZz029YPVKtWjaXLVzJ58mT2ZOzm\nxce7aTlQftVll13G0aJituXk0yq6BocLS/jhwBHNakXKSMdsReSsZkyfzhMjH6J9g1ps2n+EP414\nkPETn3c6lkiFphOkxO9s376dzz//nNDQUAYNGkRERITTkSqd7du3k5aWRpMmTUhMTHQ6jkiFp7IV\nv7Jq1Spu6d2LlKZ1OFjkZluh4dvUDbqtoIjYSmUrfqVrx6u5O7qEgbGXA/Dg11tp2u8uxk+Y4HAy\nEanKdIs98SuHDx+mVa2wU49bR4Zw+OABBxOJiD9T2coZNm3aRMekq7gsKpLfde9KZmam05HKpMeN\nvfjL+kxyCkvYlpPPlB8P0LPXTU7HEhE/pWVkOeXIkSPEt2zOn9vW54bGtZmxJYt5h7xs+mErLpfL\n6XgXpKioiOF/upePP/mU6iHVGDtuPA+PfMTpWCJSxemYrfymJUuWMGH4PSzsEw+AZVnEzVzDv1M3\nEhMT42w4EZFKQMdsbZaTk8OsWbOYPXs2ubm5Tscpk4iICLLyCih2ewE4UnSCY4XF1KxZ0+FkUp5y\nc3N5YNh93JDciYeG38/Ro0edjiRS5WlmWw4yMjJI7tyFBs1b47W8ZO/czrer/k39+vWdjnZBLMti\nUP9+7Nm0li51w1iQcZR+Q+7mhUkvOx3Nr3m9XqZMmUJ6ejr9+/enU6dOZR7L7XZz7dXtiOcYfWOi\nmbfzMOnBtVj+7ZpKd6hApCLSMrKN7rzrbgpDorj1/scB+PAfL1In8ART3p3scLIL5/F4+OCDD0hP\nT6dt27akpKRgTKn/b+QS8Xq9xLdszsGsTK6IqMbWQ4WMHTeBsWPHlmm8jRs3MqhXD9YNbocxBq9l\nkTRrHQu/WUlcXFw5pxfxP7rrj4327dtH0s3dTz1uEncV276Z71ygi+ByuRg6dKjTMcTnhRde4Ej2\nHt7u05RqgQFsOVDAhAnjyly2xhg8loXXApcBr2Xh8VoEBOiIkoid9AkrB926duHL2dMoyD9GwbE8\nlnz0Ht27dXU6llQB27ZtI7Z2daoFnvyotq5TnRK3l4KCgjKNl5CQQMPmLblv6Vbm/rSf/1qyjVYJ\nbWjZsmV5xhaR/6CyLQdPP/UUV7dpzfAeiQzvmURyu0Qef+wxp2NJFXDjjTeyPiuffcdKAFj40xHC\nQ6sRGhpapvFcLhcLv1xC896DmGfVIy7lD8xftFgzWxGb6ZhtOXK73QC6T6yUqzuH3MHs2bMJCjC4\nXC7mLlhIz549nY4lImehE6REKrG8vDwyMjKIjY3VH3MiFZjKVkRExGa6qIWIiIhDVLYiIiI2U9mK\niIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOVrYiIiM1UtiIiIjZT2YqIiNhMZSsi\nImIzla2IiIjNVLYiIiI2U9mKiIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOVrYiI\niM1UtiIiIjZT2YqIiNhMZStnSE1NJTEulogaYXTr1JFdu3Y5HUlEpNJT2cophw8fps+NPXmocTCb\n7uzI9UF59O5xPW632+loIiKVmspWTtmwYQMtosK4LbY+0SHBPNquMXlHcsjMzHQ6mohIpaaylVOi\noqLIPHqcIrcHgIMFxeQVFhMZGelwMhGRyi3Q6QBScbRr145ru99A7/krSK5bg0UZR3j0sceIiopy\nOpqISKVmLMuyZ2BjLLvGFvt4vV7mzJnDjh07SEpKolevXk5HEhGpNIwxWJZlSm1X2YqIiJSPc5Wt\njtmKiIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOVrYiIiM1UtiIiIjZT2YqIiNhM\nZSsiImIzla2IiIjNVLYiIiI2U9mKiIjYTGUrIiJiM5WtiIiIzVS2IiIiNlPZioiI2ExlKyIiYjOV\nrYiIiM1UtiIiIjZT2YqIiNgs0M7BjTF2Di8iIlIpGMuynM4gIiJSpWkZWURExGYqWxEREZupbEVE\nRGymshWxiTFmjDEmzRizyRizwRhzdTmP380Ys+B8t5fD691ijIk97fEyY0zb8n4dkarI1rORRfyV\nMeYaoDeQaFmW2xgTDQTb8FLnOsPRjjMf+wH/A2y1YWyRKk0zWxF7XA4csizLDWBZVo5lWfsBjDFt\njTHfGGPWGWMWGWPq+rYvM8a8Zoz5zhjzvTGmvW/71caYVcaYVGPMSmNMi/MNYYwJNcZMM8as9j2/\nr2/7XcaYT3yvv80Y89Jpz7nXt221MeZdY8w/jDGdgBRgkm+W3tS3+yBjzBpjzFZjTHJ5/IcTqYpU\ntiL2+BJo5CuhN40xXQGMMYHAP4BbLcu6GngP+Otpz6tuWVYS8IDvZwA/Ap0ty2oHjANeuIAcY4Cl\nlmVdA1wP/M0YU933s6uA24ArgduNMQ2MMZcDY4EOQDIQC1iWZX0LzAdGWZbV1rKsHb4xXJZldQQe\nBcZfQC4Rv6JlZBEbWJZ13Hc8swsnS+5fxpingFQgAfjKnLzqSwCQddpTZ/uev8IYE26MqQnUBGb4\nZrQWF/a5/R3Q1xgzyvc4GGjk+/ellmXlAxhjtgCNgTrAN5ZlHfVtnwP82kz6U98/U33PF5GzUNmK\n2MQ6ecWY5cByY8xmYCiwAUizLOtcS67/eazVAiYCX1uWNcAY0xhYdgExDCdn0dvP2HjymHLxaZu8\n/PL74EIu/fb/Y3jQ7xORc9IysogNjDEtjTHNT9uUCOwGtgF1fGWHMSbQGBN32n63+7Z3Bo5alnUM\niAD2+n5+zwVGWQw8fFquxN/Yfx3Q1RgT4VvyvvW0nx3j5Cz7XHR9VpFzUNmK2KMGMN331Z+NQGtg\nvGVZJ4CBwEu+7d8BnU57XpExZgPwFvBH37ZJwIvGmFQu/DM7EQjynXCVBjx3jv0sAMuysjh5DHkt\nsALYCRz17fMvYJTvRKumnH0WLiJnoWsji1QQxphlwOOWZW1wOEeY75izC5gLTLMs6zMnM4lUdprZ\nilQcFeUv3/HGmO+AzcAOFa3IxdPMVkRExGaa2YqIiNhMZSsiImIzla2IiIjNVLYiIiI2U9mKiIjY\nTGUrIiJis/8Ddjbvid3BBjsAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_lsml, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Neighborhood Components Analysis\n", - "\n", - "NCA is an extrememly popular metric-learning algorithm, and one of the first few (published back in 2005).\n", - "\n", - "Neighbourhood components analysis aims at \"learning\" a distance metric by finding a linear transformation of input data such that the average leave-one-out (LOO) classification performance is maximized in the transformed space. The key insight to the algorithm is that a matrix $A$ corresponding to the transformation can be found by defining a differentiable objective function for $A$, followed by use of an iterative solver such as conjugate gradient descent. One of the benefits of this algorithm is that the number of classes $k$ can be determined as a function of $A$, up to a scalar constant. This use of the algorithm therefore addresses the issue of model selection.\n", - "\n", - "You can read more about it in the paper here: [NCA](https://papers.nips.cc/paper/2566-neighbourhood-components-analysis.pdf). " - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "nca = metric_learn.NCA(max_iter=1000, learning_rate=0.01)\n", - "X_nca = nca.fit_transform(X, Y)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd4VVXi9fHvvje9QiCEICX0FkB6lSJIk6qC6FiGURDF\nMo69t59j17E7FixYEFEQBBVQOkrvHUJvCSWV1Hv3+0cyjq8jEpDDSVmf5+EZcpJ7s5wZs7LL2cdY\naxERERHneNwOICIiUtapbEVERBymshUREXGYylZERMRhKlsRERGHqWxFREQcFuDUGxtjdE+RiIiU\nO9Za89trjpVt0Td08u1FRERKFGP+p2cBTSOLiIg4TmUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExl\nKyIi4jCVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWt\niIiIw1S2xZCVlcWoG8ZQr0EjOl/QjeXLl7sdSUREShGVbTFc89eRbNpzkNFPvkaL3pfQp18/du/e\n7XYsEREpJYy11pk3NsY69d7nks/nIzQsjLfnrCc4NBSAtx+5nasG9+W6665zOZ2IiJQkxhistea3\n1zWyPQWPx0NgYBDpx48CYK0l/VgKYWFhLicTEZHSIsDtACWdMYaHHnqQ5265im5Dr2Tvlg3kpB1j\n0KBBbkcTEZFSQtPIxfTVV1/x49y5xMfFccsttxAVFeV2JBERKWFONo2ssj0Ltm7dyldffUVQUBBX\nXnklVatWdTuSiIi4QGu2Dlm6dCkdOnZi/tptTF+wlJatWrN37163Y4mISAmike2fdFGfvtTp0Ivu\nQ0YAMOGVp6gZFcCrL7/scjIRETnXNLJ1SGpqKnE1En75OK5GAsePH3cvkIiIlDgq2z9p0MABfPXm\ncyTv38OerZv4dvxbDBowwO1YIiJSgujWnz/p/vvuIy0tnSevu4TAoCDuvvMOhg8fDhSOet966y2+\n/X4mUVFRPPLQg7Rp08blxCIicq5pzdYBPp+P0TeM4ZNPPsZiqFGvEZ37DmbauFdZtHABjRs3djui\niIg4QGu259Drr7/OktXreHP2at6dt4GqNRI4tHcXXQYN57PPPnM7noiInGMqWwf8vGQpnQcMIyQs\nnIDAQHpedhVJG9fiy8/D6/W6HU9ERM4xla0DatdOYMuKn/jPNPrGZYspyM9jycypXH311a5mExGR\nc09rtg5IT0+nW48Lyc734Q0MYt+OrVzUuzcPPXA/kyZ9yeq1a2nYoAGPPPwQkZGRbscVEZGzRMc1\nnmO5ubnMnz+fvLw8unTpQlRUFP0uHkBanqVd70GsXfgDJ1L2s2jBfAICtClcRKQsUNm6LCkpifYd\nO/HitJ/Iy83h63GvsfCbSQy7dCiXDx9ObGwsiYmJGPM//xuJiEgpod3ILrPWYjwefAX5/PPGK0g7\nmsIVtz3A3J9XMPyKv3BR335cc+1f8fv9bkcVEZGzTCPbc8Tv99P9wp6k5fpIPXaUpz77DmMMudnZ\n3NSnNc9OnMWr94zhsfvvYcSIEW7HFRGRM6CRrcs8Hg/Tp02lTtXKeD2eX6aLAwID8Xg8BAQG0bht\nZ7Zt2+ZyUhEROdtUtudQZGQk48d/hMnP4Ys3nmPj8p94/cFbqZfYEk9AAEtmTWfa9OksXLjQ7agi\nInIWqWzPsYiICObPm0tI9lG++fezbF25hANJW7mlX3via9UhoEJVLurdmzfeeMPtqCIicpZozdZl\neXl5tO/QkbYDr+CHLz+mYmwc1es2ZO7kz3jumacYdf31bkcUEZFiOtmarW7wdFlQUBBh4eHsT9pG\neGQU/3jhXYwxdO47mDtHXUb7du1o1qyZbgkSESnFNI1cAtx8040smDaRmLhqv5Rq5Wo1yMzI4MKL\nejNw8BB8Pp/LKUVE5EypbEuAK664giefeJyls6ezauGPHD18kHH/vJ/mHbsRU7U6c+fNJyq6Ak88\n8YRKV0SkFNKabQkye/Zsbv377STt2EGzjl3ZtHIpjVu148JL/sLK+bNYOP0rwsJC2b93LyEhIW7H\nFRGR39BxjaVIl67dyPWGsn7pIt6es5aAwECstdx/RV9ysk9QtXJFVq9c6XZMERH5DR1qUYp89MH7\nrPtpHmCx9r/HNxqPl2vueowN6zcwe/Zs8vPz3QspIiLFprItgerUqcOMGdMBePEf17P0hxm89+R9\nADRu1R6Px8PIUTfQpWs3MjMz3YwqIiLFoGnkEmzChAmMvO56PF4vNeo14pJRf2fhjK/4eeY0atZv\nzLHkg/Tp1ZNPP/lYtwaJiJQAWrMtpbKysmjTth1HjqdhPB4yUo8RGh7BsBvvJPnAXmaMf5u83Bwa\nNm7CpImfk5iY6HZkEZFyS2u2pVR4eDirVq7gsYfuJ7ZiNMbj4Y6XxtF9yAiG33QXXQcOIyAwiL17\n99K2fQeWLVvmdmQREfkNnSBVCoSEhHDTTTcRExPDyOtGEfSr236CQ8OIrhTLqzN+ZuGMyQweegn7\n9uzG49HvUSIiJYV+Ipci7dq1w1o/r9xzE+uXLGTOlAnMnjSeTn0HA9Cl/1DS0tJYvXo1BQUFLqcV\nEZH/0JptKTN+/HiuHz2awMBgrPXj8/t4dfoSZn3xEfO+/pzkA3uJrhhDdGQkM6Z/Q9OmTd2OLCJS\nbmiDVBmSlZXFI488yr9eeZmAgEA8Hi8hYWGc36UnK+fPwvr91GrYlMyU/SRt3+52XBGRckMbpMqQ\n8PBwnn/+Od575x2shfy8HB565wvWL13IhZdcSad+gzlycB/79u6j/4CBZGdnux1ZRKRcU9mWYtde\ney2DBvQnKCSU/Tu3ExtfndQjyezatJ6Lr76B5h27MWfuXG4ae7PbUUVEyjVNI5dyBw4coE69+rTv\n2Z+927dwcO9O3vh+OSFh4fj9fu4e1pPU5MNkZaa7HVVEpMzTNHIZVa1aNV58/jmWzfmO/bu2Y4Cg\n4MJbgzweD+GR0eQX5DFhwgSOHDniblgRkXJKI9syJDU1lVoJtWnWuQd9Lh/JuiULmPr+61RLqEtc\nfDV2bVrH/HlzqV+/vttRRUTKJO1GLic2bdpE23YdsAas309i+wu4/fm3AZg+/t9kJK3n6ymTXU4p\nIlI2aRq5nGjcuDHbt21hzKjrqVmzJi27XPjL5+o0PZ+Zs2YREV2RSpVjGTL0Enbt2uVeWBGRckIj\n2zLszbfe4uU33+aOlz8kKCSUB/7Sn7ycHIZcfyt7tm1izlef4vf7+NdLLzF27Fi344qIlHonG9nq\nbOQybMwNN7Bly1bG9mmDz+cjIDCIpyZ8T3zN2gCkHztK1Rq1eODhR+jYsSOtWrVyObGISNmkaeQy\nzBjDv156kazMTGIqVQZrCQkN++XzoeERRFSIoeUFvVi8eLGLSUVEyjaVbTkQGBjI7bfdisfr5dX7\nxrJ55RJ++PJjls/5jvM792D7+tX88OMct2OKiJRZWrMtR7p168aSZcuJqliJrIx0Eho2JSP1GIHB\nwezfsZWxY2/izjvuID4+3u2oIiKlknYjC/PmzePJJx4nLDiQgIAAqtWuR6NW7Ug/fpQrbrufzYdS\nad2mLYOGDCWmcmXqN2zEd99953ZsEZFSTyPbcujw4cM0a96CAX+7hSnjXuOulz8goWHho/j+MaQr\nDZq35vJb7mXfji289eCtLJg/T4/qExEpBo1s5RdxcXHM+fEH9q5cSHZmOuGR0QBYa0k5sJe/3vt/\nVIyNo1mHrrS5sB9TpkxxObGISOmmsi2nmjZtyqzvv2XU9aN47/E72bZ2JYtmTMYbEMjhfbuBwvLd\nvW0z//fkk/zjjjvRTIWIyJnRNHI5V1BQwCOPPsY306dToUIFLujciX+/+x4d+13C7i0byMpI4x8v\nvMvzt1zNi8/8kyFDhrgdWUSkxNLZyFJsCxcu5KLevRk66nb6jPgrQSGhTHztGVrWiuXhhx92O56I\nSImlNVspti5dutA0sRkRFSoSFBJKXk42m5Yv0tOCRETOkEa28rvWrVtH7z59ialajaOHD9Hzwh6M\n//ADPB79fiYicjKaRpbTlpaWxpo1a6hQoQLNmjXDmP/5/4+IiPyKylbOmuTkZFJSUqhTpw6hoaFu\nxxERKTG0ZitnxVNPP03d+vXpP2gIdevVZ/Xq1W5HEhEp8TSylWL76aefGHrZcB5+fwoVY+NYOGMy\n33/4Gtu3bnE7mohIiaCRrfxp69atI7F9FyrGxgHQqe9gdiXtID8/3+VkIiIlm8pWiq1BgwZsXvEz\nmempAKxe+CPVzqtOYGCgy8lEREq2ALcDSOnRvXt3Rgwfxr3DelK1ek0O79vN15Mnux1LRKTE05qt\nnLatW7dy6NAhEhMTiYmJcTuOiEiJoVt/REREHKYNUiIiIi5R2YqIiDhMZSsiIuIwla2IiIjDVLYi\nIiIOU9mKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqI\niDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi\n4jCVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiI\nw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIO\nU9mKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqIiDhM\nZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLisIBTfYExJhi4FEj49ddbax93LpaIiEjZccqy\nBb4G0oAVQK6zcURERMqe4pRtdWttX8eTiIiIlFHFWbNdbIxp5ngSERGRMspYa3//E8asAyyFo9/6\nQBKF08gGsNba5n/4xsbYk723iIhIWWSMwVprfnv9j6aRBziYR0REpNw46cj2ly8wZry19upTXfud\n12lkKyIi5crJRrbFWbNt+ps38gKtz1YwERGRsu6kZWuMuc8YkwE0N8akF/3JAJIpvB1IREREiqE4\n08hPWWvvO+031jSyiIiUMyebRv6j3cit/ugNrbUrT/ENVbYiIlKunEnZzin6awjQBlhD4W0/zYHl\n1tqOp/iGKlsRESlXTnuDlLW2h7W2B3AQaGWtbWOtbQ20BPY7F1VERKRsKc5u5IbW2nX/+cBaux5o\n7FwkERGRsqU4ZyOvNca8C3xc9PFfgLXORRIRESlbirMbOQS4EehadGk+8Ka1NucUr9OarYiIlCun\nvUHqLHxDla2IiJQrp302sjFmorV2+K8eSPD/OdWDCERERKTQH936E2+tPWiMqfV7n7fW7v7DN9bI\nVuSsS01NZcuWLZx33nlUr17d7Tgi8htncuvPwaK/9gKCrLW7f/3HqaAi8vvmzp1Lg9oJjBk+hBZN\nGvHc00+7HUlEiqk4G6QeAy4AEoAVFG6QWmCtXX2K12lkK3KW+Hw+qlWJ5e3udelRsxIHM3Po8eUq\nvp0znxYtWrgdT0SKnPFTf6y1j1hrL6Tw6T8LgLsoLF0ROUeOHz9OXm4OPWpWAiA+IoR251Vi8+bN\nLicTkeI4ZdkaYx40xnwLzATqAXcCWiwSOYdiYmIIDgnhh91HANifkcOS/Udp3Fjny4iUBsWZRl4J\nFADTgXnAT9ba3FO+saaRRc6q+fPnM2zIYOIigtl3PIO/33EnG9auYd2aNdRv0IBX//0ONWvWdDum\nSLn2p+6zNcZEAZ2BLsAwINla2+UUr1HZipxlaWlpbN++ndjYWIZc3I8uoXmMaFCFb3cd5fN92aze\nuImwsDC3Y4qUW2e8ZmuMSaTwiMZrgcspfAjBj2c9oYicUnR0NK1bt+bEiRMcO3SQJzrVITE2krva\nJhBh81i9+g/3LYqIS4pzNvLTFO5AfgVYZq3NdzaSiJxKWFgYWbl5ZBf4CQv0ku/zk5qdS2hoqNvR\nROR36LhGkVLIWss1V4xg59IFDKoZzawDGQQlNGLadzPxeIrzMC8RcYLORhYpY3w+H2+//TZrV66g\nQZOmjB07lqCgILdjiZRrKlsRERGHnfEGKREpnWbOnEnzRg2oXqUyf/3LlWRmZrodSaTc+qMHEUzj\nd5728x/W2kF/+MYa2Yq4ZsOGDXTv1IFRjeOYtPUgh0/kUTU+nhXrNhIREeF2PJEy67QfsQc872Ae\nEXHQzJkz6VotmnfW7uWdvs2oXzGce+ZtZsx1I/n48y/cjidS7py0bK21885lEBE5e6Kjo1l/NJPL\nGlblwlqVAXi1V1NafjKDo0eP8uTjj7F3ZxIdu3bntttvx+v1upxYpGwrzqEW9Y0xk4wxG40xSf/5\ncy7CiciZufzyy8nyhrDt+Ilfru1KyyYqPIKuHdqRsfAb+ubv4as3XuSm0aNcTCpSPhTnbOSFwCPA\nS8BAYCTgsdY+fIrXac1WxEWHDh2ifcvzaRbpoXHFMD7emsIVI69j+dSJfDMwEYD03ALqvTuP42np\nOhBD5Cz4M7uRQ621P1BYzLuttY8CF5/tgCJydlWtWpX1W7fR++Z7Cev7FyZ9M4MuXboQEvDfKeMg\nr8EYg9/vdzGpSNlXnJHtYgofQDCJwjOR9wNPW2sbnuJ1GtmKlDCpqamc37Qxw2tEsDEljUUHUgkJ\nCWXcpxPo16+f2/FESr0zPtTCGNMW2ARUAJ4AooFnrbU/n+J1KluREmjPnj306dGV6v4TPNu9ITtT\nTzBmzja+nzOPli1buh1PpFT70ydIFT1mz1prM4r59SpbkRKqSsUKzL/sfKpFhADw4KLtxA+9jvvu\nu8/lZCKl2595xF4bY8w6YC2wzhizxhjT2omQInJuREaEsz8j55eP95/I12EXIg4qzgapccBN1toE\na20CMBZ439FUIuKox596hqtnbuLpn3cwevYmNpzw0LJlS3p07kiDhJr89S9Xkpqa6nZMkTKjOGu2\nq6y1LX9zbaW1ttUpXqdpZJESbP78+Xz37bdUjIlh0KBBdOvUkQdaVeP8KlGMnrmeHceziAgL494H\n7ufuezS9LFIcZ3Jc43/MM8b8G/iMwrOSLwfmGmNaAVhrV57VpCJyTnTt2pWuXbsCMGHCBNrFR3Nt\nYnUeXLCFWhHBfHdZW1Jz8xn24nPk5OZz7bXXUqtWLZdTi5ROxZlGbgE0oPBgi0eBxkBL4AV0frJI\nmRAWFkZKVi7WWubuOco9HepRMSSQ2tFh3NC0Kq8/9xStEpvwrxdfcDuqSKmk59mKCLm5uXTt2J5q\nOcfYknycm1slcFXT8wC4ZdZ6Zu8+QlxYMPty/CxYupyGDf/wNnuRcuvP7EaOM8a8Z4z5tujjJsaY\n65wIKSLuCA4O5scFi+hw7Via9+jLvQu2cf23axn+9UoW7T/Ogis7khAdRpjxs337drfjipQ6xdkg\n9S2Fu48fsNa2MMYEAKustc1O8TqNbEVKqRHDLuWbr6cwtlUCN7dKIDo4kHfW7OHBeZvp3LkTwcHB\n3H7PffTq1cvtqCIlyp85G7mytXYi4Aew1hYAvrOcT0RKkNbt2hMc4GV3WjYRgQGk5uTz8vIkAgM8\nDAo6Ti//IYb070tMhWhuGj2KnJycU7+pSDlWnJHtXOBSYJa1tpUxpgPwjLW22ylep5GtSCmVm5tL\np/bt2LZpIwV+Pz6/JTTQy3PdG3N542oAjFu7l0cXbiXX56d9h47MXbjQ5dQi7vszI9t/AFOBusaY\nRcBHwC1nOZ+IlCDBwcH8vGw5jZo0oWmVClzT9DwCPB4CPP/9GRLk9RDk9TD10jYsXfITu3btci+w\nSAlXrN3IReu0DQEDbLHW5hfjNRrZipRyOTk5vPLKyyRt3cKWHTtZu2QRL/Zogs9vuWvuJs6LCKFy\nWBCL9x+nQlQk1466gbvvvptKlSq5HV3EFaf9IIKip/3stdYeKvr4Ggqnk3cDj1prj53iG6psRcoQ\nn89Hz27dWLtiKR4g0GOICw+mV61KhAYG8OnGAxzNycMTHMLaDZuoXr2625FFzrkzmUb+N5BX9OKu\nwNMUTiGnAW87EVJESi6v18ucBQuYv2wllavXICOvgM3HsqgaEcLkbYd4u28zPh5wPt78PBrWrcui\nRYvcjixSYvzRyHaNtbZF0d9fB1KstY8WfbzaWnv+H76xRrYiZdqiRYvo2aM7LSqFc2/HevSsVRmA\nD9fv4/8WbyPXGjbt2El8fLzLSUXOnTMZ2XqL1moBegI//upzxTlTWUTKsM6dO/Pss8+xPS2bIyfy\nfrmenJVLn9qxeKyfyZMnu5hQpOT4o9L8jMKHEBwBsoEFAMaYehROJYtIOXfr3/+Oz1puv/dudqdn\ncyLfx/gN+5kxrC2LD6SSm5tLeno6ubm5hIWFER4e7nZkEVf84W7kontq44GZ1tqsomsNgIhTPe1H\n08gi5ceiRYvod1FP2lQO56HO9VmTksGDC7aS77cY68da8FtLp44d+XH+Arxer9uRRRxx2ruRz8I3\nVNmKlCNbt27lskEDSNq1m6jwMCoFWJKOZdKndmX2Z+YQ4DEcyMihfd+BTPh8ottxRRyhshWRc6Z7\nh3Zs27CWV3slkhgbyT9+3MiifceIDAogywRwNC3D7YgijvgzJ0iJiJyWGgkJpOYU0Dw2kmFTVtCg\nYjg/XtGBG1vWIjcnh/T0dLcjipxTGtmKyFm3b98+zm/cgJ7nRbNw/3E2XtcNYwp/2e/55Sqe+eAz\nWrVqxdSpUykoKKBfv366RUjKBE0ji8g5lZSUxMC+fUhKSmLb6B5EBQeQ7/PT7P0FHMzMITIkiA7V\nYwnwGlamZDFv8U96KL2UeipbEXHFTaOvZ8l30xhUM5oZSSlYazm/ShRrUtLZeCSTyKAAPMbQvH0n\nZsz+we24In+KylZEXGGt5ZNPPuGbqV+zdM4sll3RjkFfLeNgZi6zL++Az+/n8cXb+G5vOinp2jgl\npZs2SImIK4wxXHXVVTz86GPk+CzZBX68xjC4fhzz9x6l7fhFbD6aRU5ONs/880m344o4QiNbETln\nbrlxDLO//pKU42lUCg1kT3oO0y5tS7tqFTiUlUunT3/m2znzadu2rdtRRc6IRrYi4rpX3niTl8aN\nxwSHkJnvIzTQS7tqFQCoGh5Mw+hQevboQZsWzbmwc0c+/OAD9Eu7lAUqWxE5Z4wx9O3bl04dO+I1\nBq+BWTtTANiRmsX6Ixn4crPZsGED2bs2c88tN/LqKy+7nFrkz9M0soiccwcPHqRN80Tqhli2HMsi\nMiiA/Zk5DKgTy9TtyVQKDaJL9RhWHU4j1QfJaZm/3KcrUpJpGllESoz4+Hg2bk8ip3INUnPy2Zue\nzWUNqhIfEUJwgIdZl7fn3X7NmXdlR2x+PnPnznU7ssiforIVEVdER0ezdNUannzqaYzHw6ebDvDj\n7qOEBXipERUKQERQALUrhDGkXx8G9O7F1q1bXU4tcmZUtiLiqjvvvpvsvHymfD2VPSfyOVHg4701\ne/D5LT/sPsLmo5m82bsp7XP20aNLJ5KTk92OLHLatGYrIiVGcnIyzz//PG+9/BJZ+QUEez18POB8\neiXEYq2l/5Q1tOh/CWPHjqVp06ZuxxX5H1qzFZESr0qVKjz77LOMGnMjXWvH47fQKi4agLvnbmbX\n0TQOzP2GCzt35IP333c5rUjxaWQrIiVOXl4ef795LJ9+/DHVwgIYWLcKH23Yz/JruxAZFMDWY5lc\nOGklKceOExwc7HZckV9oZCsipUZQUBBvvP0Ox7NOcOsTz/JzQBUaValAZFAAAA1iIgj2ejl+/LjL\nSUWKRyNbESnxdu3aRdsWzZnYvwmt4qIZv/EA/9qcytZdu/F4NGaQkuNkI9sAN8KIiJyOhIQExn38\nCZdd9Reyc3KoeV41pn73vYpWSg2NbEWk1PD7/WRlZREREaETpaRE0vNsRUREHKZpZBEp0/Lz85k8\neTJHjhyha9euJCYmuh1J5Bca2YpIqZefn0/fnj04sTeJRhVDmZ6Uwjsfjmfw4MFuR5NyRiNbESmz\nvvjiC3L37+Tbwc3wGMMV9Stz/ZjRdOvWjQ0bNlClShXq16/vdkwpx1S2IlLqJScn07RiKJ6iTVOJ\nlSM5lHKURnXrUDMqlN3HM7hm5N947qV/uZxUyivtmxeRUu+CCy5gyvZk1iSnk1vg58klO4kICeL5\nTrWYPaQZy65oy5RPx/PNN99w8OBB/H6/25GlnFHZikip17p1a156/U0u/XYj1d78ka3h1UjLzuHi\nOlUAqBASSPu4CC4ZOpTmDetzftPG7Nmzx+XUUp5og5SIlCl+vx+Px0PT+nW5rW44IxpXIzkrl06f\nLOalCxszoG4czy/fzUJbkTmLfnI7rpQxus9WRMqVNWvWMKDPRYR7YP+xNILwk5HvIyYkkDva1uGJ\nZbtJzzrhdkwpY1S2IlLuZGdns2nTJvpf1Is4bwG5/gLqVAxm3u40vN4AgkJCSahVk3EffULz5s3d\njitlgG79EZFyJzQ0lIiICPAVkOHP57netfAYWJd8gs41IulbvyIrDx6ld88ebNq6nYoVK7odWcoo\nbZASkTItJiaG9OwczosMItBrOHIin+x8P1c0q0xMaAC96kQTVpBF3Tq1+OSTT9yOK2WUylZEyrTK\nlSszevRolh/MZPWhLLzGkJnnIyOv8PaffJ+fjFwfbWM8jBp5DZMmTXI5sZRFWrMVkXLhmWee4bmn\n/0laRibxVWIxeSdoHxfEqkNZxIUHclfnaiw7kMnkg8Fs2LrD7bhSSmmDlIgI8J+fS++//z43jLqe\nDtUj+EfHang9hu3HcnhjUwHbd+91OaWUVipbEZHfGHbpUL77Zhp3dapGdIiX15enMOKG2+jdtx87\nduygefPmtGzZ0u2YUoqobEVEfsNayz13382nH30AWK7923Xk5OQw4aMPaBwbxppDmTz02BPcetvf\n3Y4qpYTKVkTkFDZu3Ei3Tu15uVc8EUFekrPyuW3mPoYPG0ZmehoDh17K1ddcgzH/87NUBNB9tiIi\np3Tw4EEnM8X1AAAPeElEQVSqVwwjIsgLQJXwQAJtAYd+mk6jyiE8ctd8DhzYz7333e9yUiltdOuP\niEiRxMRE9hzPZu3hLKy1zN+VToHfcn2rWHrUjuaudjG89MLzbseUUkgjWxGRInFxcUz8cjKDLu5H\nfn4BXg+0ig8n0Fs4Lgn0GAoKfC6nlNJIa7YiIr/Rp9sFVErewYETmaw+fIJLmsTg81smbzpGVr4l\npkIUb73zHpdeeqnbUaWE0QYpEZFiGv/RR9x7283k5mbTr14FJm06iscYLq5fgSuaxZJ0PId//nyE\nBT8tpUmTJm7HlRLkZGWrNVsRkd+4+ppreOL5l4iMiWXixqOMaFqp6DzlWLweQ/1KobSoEsrixYvd\njiqlhMpWROR3/O266/hxwSIqRkWwNz2PkADD7rRcAAr8lu0pmcTFxbmcUkoLTSOLiJxEdnY28VVi\naVzBEOw1rD50grbnRbDjWA4nPKEcSDmKx+Phow8/ZNLnnxIVFc29Dz5MYmKi29HFJZpGFhE5TaGh\noXz48SesP1bAkv1ZXFg7mny/5Wie4fW338Xj8fDaK6/w0J230SBtPUFb59O9S2cWL17MjBkzWL58\nORp0CGhkKyJySsnJyUyZMoXvp08jICCAkaNuoG/fvgA0qF2LGxoUruMCjFt1mO93ZJJYI4YDqdn0\nHTiYd9//UKdOlRPajSwi4oCa8XH8vUUY9WJCAPhwdTLTtx7j3i7VaVoljHvnJ/PquE/o37+/y0nl\nXNA0soiIA86rmcBzi/bz874Mpm89xuykNEa3rso7Kw8THOChTqRh9uzZbscUl6lsRUTO0OrVq1mz\nehU5+X7Gr0lhQ0o2j3avQfvqkaTm+EjJymfFvnTefO0Vpk2b5nZccZHKVkTkDH0x8XP6140iKsRL\nWk4BIxIrU6tCMB+vTcEAN8/YydDGlbi1XRy3jx3jdlxxkc5GFhE5Q0HBweRZwwt9EnhnxWHu/H4X\neT5LaIChb/2KDGtaibBAL/N3p5GScpR9+/ZRvXp1t2OLCzSyFRE5QyNH/o2fk31M3HicOhVDiAwL\n4corr6Rtx87MTkpjQ/IJXvppP68vPUSI10+9hJqMHDmSvXv3uh1dzjHtRhYR+RN27tzJS88/R0Z6\nGpcMH8HAgQM5cuQILRKbcvRICpHBXl7uV5uIIC+zd6Qyfm0K3pAIZv04hypVqlCxYkXCwsLc/seQ\ns0S3/oiInENJSUk0ql+P3vUqMLp14bGOuQV+rvxyG5c1qcSMpEwCA7zk5BXwzLPPMvaWW11OLGfD\nycpWa7YiIg6Ij4/HAqsOZpGZ5yMiyMuivRlUjwpiwe50RjSuwMUNYjicmcf9Dz1A+46daNOmjdux\nxSFasxURcUBoaCjPPf8Cx7MLGDV1B2Om7WD8mhRGtarCgYw8+tWvCEBcRBCNK3h45513mDFjBllZ\nWS4nFydoGllExEErVqxg3LhxTJ0yhcOHDgAGY+ChrtVpXjWcnAI/N09PIivfT+WocIKjYlj481Kq\nVKnidnQ5A1qzFRFx0VNPPcX8D15gVMtKDJ+4lbAgL7Wig9ifnkfTKmH0q1eBpxbsJzzYS3iFyrzx\n7vv06dPH7dhymlS2IiIuOnjwIK1aNKNDrIfNyRlYC7tScxnTtio9EqJYuCeDcauSGd06jjyfnw83\nZPDFlKn06NHD7ehyGnQ2soiIi+Lj41m2cjX1+l1Np4uHkdDqAgK9hrBAD8YYZu1IZUybODrWiKRb\nQjTDGoTzzpuvux1bzhLtRhYROUeqV6/Oc8+/ABQ+mL5yTEVeW3KQ7cdySM7Kp8D/39nAAr/FG6Af\n0WWFppFFRFwyefJkrrnqSsIDPRzNyiE8MICrmsWQ77N8sTWTGTNn06FDB7djymnQmq2ISAl04MAB\nNm/eTK1atdi+fTvj3n4Tb0AAt95+p4q2FFLZioiUYtZajh49SnBwMJGRkW7HkZPQBikRkVLI5/Ox\nc+dOunfpRJ2aNYiLrczNN41Bg5nSRWUrIlJC3XPPPYSHBNOofl1WLV/G/3WryrsDajF78ueMGzfO\n7XhyGlS2IiIl0FtvvcUrLz7P071q8MXwhlzTojLPLNpPeKCHrucFsfSnRW5HlNOgNVsRkRKoeZPG\nhKTt5cFuNX65dsWkrXRLiOSnvZlUjI0jMbE527dtoUbNmrzyxr9p2LChi4kFtGYrIlKqhIQEsys1\nl+x8PwC7UnPI81n2pubxULcamIyjpG1czOi6lvOOb6Jjuzbs2bPH5dRyMhrZioiUQF9//TVXjxhG\niMdSu2IIaw9n4fNZ3h1SjwCP4fqvd/DJpfWZse04H61Jwee3hAUHsXLdBurVq+d2/HJLI1sRkVJk\n8ODBfPrFVwRGRLMrNYcb2sQRHuwlI9dHgMfgs5ZlBzKZuuUYr/WvzaTLG9K1ZhiD+unhBSWRRrYi\nIiVYUlISHdq2oVVsAPtST3AwPZdLGscwZ3cGKZl5XFQnmmvOL3wcX2p2ATdM30V2Xr7Lqcuvk41s\ndfCmiEgJVqdOHVatXcfnn3+Oz+cjMjKSVcuXMbx3Fb6Y+DnrUw7h81u8HsO324/jtT4qhIfgDQik\nTp3avPnOONq0aeP2P0a5p5GtiEgplZqaSs34KlQO8RAe5GHHsVyqhAfSJDaUoY1j2HQkm/fXpzPz\nhzm0bdvW7bjlgtZsRUTKmAoVKtCmXTtaVA2jwG8Z1jSGQ5l5jG4TR1xEEN0TokkIh+5du/Dee++6\nHbdcU9mKiJRiDz7yOPMP5JGV56fAZ7FAao4PAL+1ZOT6GNkshjtuu43k5GR3w5ZjmkYWESnllixZ\nQvcLLsD6C0isEsr+jHx61o5mzeEsAB7rUZN7Fxzhs6nf07p1a5fTlm2aRhYRKaPat2/PspUrMR4v\nm4/kcCLPx+frjxAS4OHR7jXYcSyHI5m51KlTx+2o5ZZGtiIiZYTP52PixIlkZmYCcPcd/yAiOIDM\n3ALGf/oZAwYMcDlh2afn2YqIlDOZmZns37+f6tWrEx4e7nacckFlKyIi4jCt2YqIiLhEZSsiIuIw\nla2ISDl14sQJcnJy3I5RLqhsRUTKmZycHIYNHUxMhWiioyK5cfQo/H6/27HKNJWtiEg58+D993Jg\n7WI+HlqHDwbVZuGMr3jtlVfcjlWmqWxFRMqZhfPm0j8hlCCvhwCPoUEUTJr4OdnZ2W5HK7NUtiIi\n5UyNmrXYfCyP9NwC7pq5m1UHsziwbT2tWzTjyJEjABw4cIAJEybwzTffkJeX53Li0k/32YqIlDM7\nd+7kgk4dIDuT5rFB3NAmDoBxa48T32kgo8bcSJ9ePWkSG8qx7Hyi4hP4Yd4CQkNDXU5e8ulQCxER\n+cWxY8e4qNsF9K2YRvvqkQAs2ZfBUk8d0tPT6RScTI/a0fit5dklR7n8tge57bbbXE5d8ulQCxER\n+UVMTAz9Bg5kzr5c8n1+8n1+5uzLpX2nThw8eJBGlQtHsR5jqBsJe/fscTlx6aayFREppx565DHi\nmrZj5LQ9jJy2hypN2vLQI4/RsXNnvt6Wgc9vOZZdwPwD+XTu0sXtuKWappFFRMq5I0eOYK0lNjYW\ngOPHj3PpkIEs/mkJGMMD99/PQ4886m7IUkJrtiIiclqysrIICgoiMDDQ7SilhspWRETEYdogJSIi\n4hKVrYiIiMNUtiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiI\nw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNUtiIiIg5T2YqIyDkza9YsenbtTKe2rXjrrTcpL889\nV9mKiMg5sWjRIkZcOpSWvl30jkjhmYfv443XXnM71jlhnPqtwhhjy8tvLCIicmpjRl9P7rKpDG1c\nCYB1h7P4KiWSFWs3uJzs7DHGYK01v72uka2IiJwTQYHB5Pr++3FugSUgMMC9QOdQ+finFBER140Z\nO5YuH48nwHOM8EDDl9uyePO9f7kd65zQNLKIiJwz69ev5+UXXyAn5wRXXfs3+vTp43aks+pk08gq\nWxERkbNEa7YiIiIuUdmKiIg4TGUrIiLiMJWtiIiIw1S2IiIiDlPZioiIOExlKyIi4jCVrYiIiMNU\ntiIiIg5T2YqIiDhMZSsiIuIwla2IiIjDVLYiIiIOU9mKiIg4TGUrIiLiMJWtiIiIwwKcfHNj/uf5\nuSIiIuWOsda6nUFERKRM0zSyiIiIw1S2IiIiDlPZioiIOExlK+IQY8wDxpj1xpg1xpiVxpi2Z/n9\nuxljphX3+ln4foONMY1+9fEcY0yrs/19RMoiR3cji5RXxpgOQH/gfGttgTEmBghy4FudbIejEzsf\nhwDfAJsdeG+RMk0jWxFnxANHrLUFANbaY9baQwDGmFbGmLnGmGXGmG+NMXFF1+cYY/5ljFlljFlr\njGlTdL2tMWaxMWaFMWahMaZ+cUMYY8KMMe8ZY34uev3AouvXGmO+LPr+W4wxz/zqNdcVXfvZGPO2\nMeZVY0xHYBDwbNEovU7Rlw83xiwxxmw2xnQ+G//FiZRFKlsRZ8wEahaV0OvGmK4AxpgA4FXgUmtt\nW+B94J+/el2otbYlMLbocwCbgC7W2tbAI8BTp5HjAeAHa20H4ELgeWNMaNHnWgDDgObA5caY84wx\n8cCDQDugM9AIsNban4CpwF3W2lbW2qSi9/Baa9sDtwOPnkYukXJF08giDrDWZhWtZ15AYclNMMbc\nC6wAEoFZpvDUFw9w4Fcv/azo9QuMMZHGmCggCvioaERrOb1/b3sDA40xdxV9HATULPr7D9baTABj\nzAagFhALzLXWphVd/wL4o5H0V0X/uaLo9SLyO1S2Ig6xhSfGzAfmG2PWAdcAK4H11tqTTbn+dq3V\nAk8AP1prLzHG1ALmnEYMQ+Eoetv/d7FwTTn3V5f8/Pfnwekc/faf9/ChnyciJ6VpZBEHGGMaGGPq\n/erS+cBuYAsQW1R2GGMCjDFNfvV1lxdd7wKkWWszgGhgf9HnR55mlO+BW3+V6/xTfP0yoKsxJrpo\nyvvSX30ug8JR9snofFaRk1DZijgjAviw6Naf1UBj4FFrbT5wGfBM0fVVQMdfvS7HGLMSeAP4W9G1\nZ4GnjTErOP1/Z58AAos2XK0HHj/J11kAa+0BCteQlwILgJ1AWtHXTADuKtpoVYffH4WLyO/Q2cgi\nJYQxZg5wh7V2pcs5wovWnL3AZOA9a+3XbmYSKe00shUpOUrKb76PGmNWAeuAJBWtyJ+nka2IiIjD\nNLIVERFxmMpWRETEYSpbERERh6lsRUREHKayFRERcZjKVkRExGH/D8EdZOhfkDUfAAAAAElFTkSu\nQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_nca, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Local Fischer Discriminant Analysis\n", - "\n", - "LFDA is a linear supervised dimensionality reduction method. It is particularly useful when dealing with multimodality, where one ore more classes consist of separate clusters in input space. The core optimization problem of LFDA is solved as a generalized eigenvalue problem.\n", - "\n", - "Link to paper: [LFDA](http://www.machinelearning.org/proceedings/icml2006/114_Local_Fisher_Discrim.pdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "lfda = metric_learn.LFDA(k=2, dim=2)\n", - "X_lfda = lfda.fit_transform(X, Y)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd0FFUbx/HvpPfeKEkgjd57B5FepUoVwUq1IRZeK9hB\nRZRqQaQXUUGlE5pAgAChBAgkpBECSUhh03Z33j+C2EghyWZTns85HNll78xvFPPsvXPnXkVVVYQQ\nQghhOCbGDiCEEEJUdlJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAzQx1YURR5\npkgIIUSVo6qq8u/3DFZs757QkIcXQgghyhVF+U+dBWQYWQghhDA4KbZCCCGEgUmxFUIIIQxMiq0Q\nQghhYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEVQgghDEyK\nrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYmxVYIIYQwMCm2QgghhIFJsRVCCCEM\nTIqtEEIIYWBSbIUQQggDk2IrhBBCGJgUWyGEEMLApNgKIYQQBibFVgghhDAwKbZCCCGEgUmxFUII\nIQxMiq0QQghhYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMDAptkIIIYSBSbEV\nQgghDEyKrRBCVAIZGRncuXPH2DFEPqTYCiFEBZaTk8OoEcNwd3XB1dmJcaMfJTc319ixxL9IsRVC\niAps7rvvcOXYPr4fVJvvB/tx4dAuPnj/PWPHEv9iZuwAQgghiu9g8F56+VpjaZbXd+rhY8Xh/cFG\nTiX+TXq2QghRgfnW9iM8Oefe6/CUXHxq1TZiInE/iqqqhjmwoqiGOrYQQog8169fp0Pb1jibZKOq\nkKZYc+jIMTw9PY0drUpSFAVVVZX/vC/FVgghKrb09HT27NmDoig89NBD2NnZGTtSlSXFVgghhDCw\n/Iqt3LMVQgghDEyKrRBCCGFgUmyFEEIIA5NiK4QQQhiYFFshhBDCwKTYCiGEEAYmxVYIIYQwMCm2\nQgghhIFJsRVCCCEMTIqtEEIIYWBSbIUQQhRKo9HwxOOP4VujGs0bNWDfvn3GjlShyNrIQgghCjV6\nxDBiTwYzqq4D0anZLDmTyuGjIdStW9fY0coVWRtZCCFEsf30y1aeaepMDQcL2nnb076GDdu3bzd2\nrApDiq0QQohC2VhbkaTR3nudlK1ib29vxEQViwwjCyGEKNTixYt4+7VZPOxtSZwGruNASOhp2Tv3\nX2Q/WyGEECWyY8cOdu7YgZu7O8888wyOjo7GjlTuSLEVQgghDEwmSAkhhBBGYmbsAH/Kzc1lwYIF\nnDl7jvp16/Dcc89haWlp7FhCCCFEiZWLYWRVVRk8ZCgxN1No3rU3pw/swtXWgl+3/oKJiXS+hRBC\nVAzl+p7t5cuXad+pM/N+Ooi5hSXa3FxmDe3K9l+30qhRI4PkE0IIIUpbub5nm52djZWVNWbmFgCY\nmplhZWNLdna2kZMJIYQQJVcuerZarZYWrVrj3bAlbXoO4MTe37l4NJhTJ0/IfVshhBAVRrnu2ZqZ\nmbFrx3YcVA0b5r2BhSaZPbt2SqEVQghRKZSLnq0QQghRGZTrnq0QQghRmUmxFUIIIQxMiq0QQghh\nYFJshRBCCAOTYiuEEEIYmBRbIYQQwsCk2AohhBAGJsVWCCGEMLAy22Lv7NmzrPzhBxRF4fEJE6hT\np05ZnVoIIYQwqjLp2YaEhNC5S1cuJ2USfiOd9h06cubMmbI4tRBCCGF0ZbJc46BHhuBWvxUPDxsH\nwLaVS1BvRrFq5UqDnFsIIYQwBqMu13jnzh2c3DzuvXZy8yAj405ZnFoIIYQwujK5ZztyxHDmfvgx\nTq4e6HRatiz9lI/em1MWpxZCCCGMrkyGkVVV5fMFC1i8ZBmKojBj+lSeefppg5xXCCGEMJb8hpFl\niz0hhBCilMgWe0IIIYSRlHmx1ev1zJs/n64PdWfIsOGEhYWVdQQhhBCiTJV5sZ39vzdYtuIH2gyZ\ngINfI7o+9BCRkZFlHUMIIYQoM2V+z9bD04vXlm3E07sWAN99MJvuLerz8ssvGySHEEIIUVbKzT1b\nU1NTtFrtvdc6XS6mpqZlHUMIIYQoM2Xes33/gw9Y+s139H98Kjeio9j34w+EnjhBjRo1DJJDCCGE\nKCv59WzLbCOCP70yaxYeHh5s3fYrLs7O/HHokBRaIYQQlZo8ZyuEEEKUknJzz1YIIUTFkJCQwBOP\nP8bDXTryxuzXycnJMXakCkt6tkIIIf4jIyODpg3r08Q+m/ouFuyMzsS3eWfWbdps7GjlWrm5ZyuE\nEKL827dvH/Zk8VgjVwAae9rw2M9bSUtLw8HBwcjpKh4ZRhZCCPEfivKfzpkoAenZCiGE+I+uXbty\nx8SGb84kU8/ZnN0xWQwaOFB6tcUkPVshhBD/YWtry8Ejx/BqN4BQiwD6T5jC96vWGDtWhSUTpIQQ\nQohSIhOkhBBCGFxCQgI//fQTAIMHD8bT09PIicoH6dkKIYQoFVeuXKFD29Y0cMnrx51L1nH46DH8\n/PyMnKzs5NezlWIrhBCiVIwbNRIuBDO8vjMA684lY9rgIb5fXXXu9coKUkIIIQzqRsJ1fBz+ujvp\n62hO4o3rRkxUflT5Ypuens57773H1OnT2bhxo7HjCCFEhdWr3wB+uppJcqaW5EwtW65o6Nm3v7Fj\nlQtVehg5MzOTtu074FDNF996jTjw0zqemDCO/82ebexoQghR4ej1ema++AJLlywB4Kmnn+bjefMx\nMak6/Tq5Z3sfGzZsYO68z3n5qzUoikJy4nVeeqQLdzIyqtRfDiGEKE1//uyviqtQyaM/96HRaHBw\ndr33F8LeyQWdTodWq8XCwsLI6YSovGJjYzl06BBOTk48/PDDmJqaGjuSKEVVscgWpkr3bGNjY2na\nvDnDpryCX/3GbFuxCAdTHb/8tMXY0YSotP744w8G9e1N2xouRKdq8Aioy7YduzA3Nzd2NCFKTIaR\n83Hy5EmmP/8CCdev07lzZz7/dD729vbGjiVEpdW0fl1e9LNiUKAXOr3KkG1nGfvqO0yaNMnY0YQo\nMRlGzkfz5s05GLzP2DGEqDLirl+nbcfmAJiaKLRwtSY2NtbIqYQwrHI1CygmJoYLFy6Qm5tb5DY6\nne6BPi+EMK62rVuz4FQMelUlLj2LzZHJtGvXztixRDkQHR3N+vXr2bNnD3q93thxSlW5KLaqqvLk\nU0/TqElTevTpR8PGTYiJiSm0zaxXXsXWzg5bOztGjhpNVlZWGSUWQhTX1ytXEYozNRbvo/nKwzz7\n4sv07NnT2LGEke3evZumjRrwxewZPDFqKMMfGVSpCm65uGe7cuVK5nw8n1lfrcHKxpYtyz4n7doF\nfv91W75tli9fzoeffsGLC1ZgZW3Loten0qF5I+bP+6S0LkEIYUBpaWlYW1vLxKgS0ul03LhxA2dn\nZ6ytrY0dp9hq1azOxEAzmlazJVenMvvgTeYsWMrQoUONHe2BlOvlGs+EhdG0c0+sbe1QFIX2fR8h\nLCyswDb79u+n27BxOLq4YWltTZ/xz7AvOLiMEgshSsrBwUEKbQmFh4cTWLsWjeoG4e7qzJLFi40d\nqdjibyRSzz3vy4K5qUKAk3mlupdfLoptnaAgzh0JJic7bxj45L4dBAYGFtimulc1oi6cufc68vwZ\nqlerZtCcQghRngwZ2J/eXjq+7e/NvO41+N8rMwkNDTV2rGJp2awpP19ORVVVEjJyCInX0KpVK2PH\nKjXlYhhZp9MxctRoDh46jJOrG5lpt9m9aycBAQH5tklKSqJd+w7Ye1TD2taeS6eOEbx3L/Xq1St2\nZr1eT1JSEk5OTvKNWwhRrmVlZWFvZ8vG4YH3FpH4IvQ2o2bOZeLEiUZO9+Cio6MZ0KcXVyMj0elV\nPv5kHlOmTjV2rAdW7p+zVVWVsLAwMjIyaNKkCba2toW2SU9PZ9u2beTk5NCzZ0+8vLyKnTcsLIwB\nAweRkpKCqqosX7aUESNGFPt4QghhSKqq4unmwvPNHGjgYUNmrp5ZwYl8vWYj3bt3N3a8YlFVlZSU\nFOzt7Stsh6fcF1tj0uv11Pbzp++kGXTuP4yoi+f4eMoYQo4dxd/f39jxhBDivrZv386oEcOo425H\n9G0N/QYNYcnyb2S5RCOSRS0KcPPmTVLT0ujcfxgAteo0oE7TVpw6dUqKrRCi3OrVqxenz54nNDQU\nLy8vWrVqVWkLbVxcHM9Nm0LEpYs0a9GS+Z9/gZOTk7FjFZkUW8DZ2RmdVkv05Qv4BNZDk5FO1MVz\neHt7GzuaEEIUyNvbu9L/rNJoNHTt2J4WDtmMqm7F3qPb6durBwf/OFphdmiTYgtYWFiwdMlipkwe\nTd1mrYgKP8eIYUNp3bq1saMJIUSVFxISgoVWw+gGbgAEuVrx5G/hREdHU6tWLeOGKyIptneNGjWK\nli1bEhoaio+PD23btjV2JCGEEOR1iDJzdOhVFRNFIVenkqvVVaitUGWClBCiQktOTubT+fNIvB7P\nQz17M2LEiAp93/LQoUO8/85baDR3GPPYRCZOmlShr6c0aLVaunXqgHLzKk1czTickEtg6y6s3bDJ\n2NH+Q2YjCyEqnfT0dFo3bUJre5XGLtYsu5DI+CkzeG32/4wdrVhOnDhBj25dGFvPHntLU364kMHL\nb81hypSK97xpadNoNHz80UdcDj9Pi9ZtmDZ9BmZm5W9wVoqtEKLSWblyJavmvs76PvUBiEnLpN3a\nEFIz7lTI3uCMaVNJ2r+OEXfvTZ5L1LA2zorTFy4aOZkoqnK9NrKhBAcHM+iRIfTtP4BNm8rfcIMQ\nomSysrJwtvyrd+NkZU5OrpaK+kXf1NQU3d82utHqVUxMTY0XSJSaSltsDx06xCNDh+HVuD3+HXsz\nedoM1q1bZ+xYQohS1KtXL/bEpPD9uThCb6Ty9O6LDB/ySIV5HOTfJj35FNujs9gSnsyeyFQWnbrN\n8zNnGTuWKAWVdhj5sccngnsteo/KWyP0RPAOjm1Zyf59e42WSYiqQKfTER8fj5OTE/b29gY/X2ho\nKC8/N43EG4l0e7gHH3wyDysrK4Of11BOnz7NJx++T+adO4x+7HGGDBli7EjiAVTJFaT+Xuz1en2F\nvIcjREUSERFB/54Pk5qSQkZ2Dm+9/TYvznzZoOds1qwZO4MPGvQcZalJkyasXL3W2DH+QVVVdu3a\nRUREBI0aNaJjx47GjlThVMpiq6oq3bp0ZtqMGaCCvZMzG7/6kIWff2bsaEJUaqOGDuFxXxueHVSX\nuPQsen34Pm3atZcfzkWUkpLCuXPn8PDwICgoyNhx7pkxbQo/rV9NAzcr3r2RybQXZvLq67ONHatC\nqXTDyDqdjhGPjuKPo8dwdHElNvIKLVu04PnnZjBw4MAyzyNEVaGqKuZmZlyf3B1Ls7x7pi8eiKDR\n+OlMnz7dyOnKvyNHjjCgb2887SxISNUwfsJE5n++oNTPEx8fz4EDB7C3t6dHjx6F7q5z/vx5urZv\nw4Ie1bAxNyU5U8szW69y+GgIzZs3L/V8FV2VGUZeuXIl4Vev8cGG3ZiamrHof89x6ewJIiMjyc3N\nrbDbNglR3imKQq0a1dkXk0Sv2u5ocnUcTUhnYO3axo5WIYwaPpQnG9rRtqY9GTlOvLL6e/oNHFSq\n2+WdOHGCXg8/RD13G5I0ubh6+7Nzb3CB97hv3LiBl4M1NuZ5s6JdrM2wtzBh2CODuHw1ClOZLV0k\nFXPKXgEiIiKo27IDZuYWLH7zBW7diKf7o5NYsf5HHhk6DL1eX/hBhBDF8u2q1UwJjmDIbxdov/4E\nLbo9TP/+/Yt9vLi4OEY+MpjmDery2OhHSUpKKsW05YdWqyU67jqtqtsBYGdhSgN3K3bt2kX/3j3o\n1LYV8z75uMQ/v559ciLj69oys6UT73VyIzfhCsuXL7/35ykpKWzdupU9e/aQm5sLQOPGjYlJ0XAs\nLh2dXmXX1dsoQEbqbWJiYkqUpyqpdD3bpk2bsmb2m7Ts1oszR/azYOthLKys6TF8HLOGduPs2bM0\nbtzY2DGFqJQ6derEmQsXOXnyJO7u7rRs2bLYExMzMzN5uHNHBnia80xjF9ZcDKFvj+4cDjlR6XpT\nZmZmBNT2ZX90Ot1qOXA7U8up63c4uOBzxjVwwMPWnGXz3yctNZW3351T7PPExcVTr40DACaKQoC9\nQmxMNAAXL16kW+eOVLc1IT1Li7tPXq/X1dWVjz/9jOmTnyZbq8fbwZLn2lVj7qFEHB0dS+X6q4JK\n17MdOnQoA/v2Zs4TQzExMcHcMm94xMzcAjsHRzQajZETClG5eXl50bdv3xLvrXry5EmstFnMbuNH\nCy8nPuoUQHx0FJGRkaWYtvzY8ONPrL+ay/TdN5i6I5aGzVvxcC07egc407yaHdOaOfH1sqUlOkf7\n9u356XJeDzU5U8uBhFzad8ibvDb1mSfp723GG21d+LCLO9yM5IsvvgBg0qRJDBw4iCAvZ1rUdGDx\n6TRmznwJZ2fnEl93VVHpiq2iKHw6fx6XL13Cw82V9Qs/JPryBbYs/xxdThZNmjQxdkQhRBFYWlqS\nkZ2LTp830TJbpycrR1uhdnp5EI0bNyYi8hpbd+/n8tUounV/mFz9X5NMc3XqP3r0ERER9H64G3X8\nfBk9YhjJycmFnmPJ19+S7urP6B+v8syv13hi6vP3Jo5ei7pGQ4+8zomJolDPyYTIK5eBvJ+rq9dv\n5K1PF9FyzPMsXbmWt94pfg+7Irt8+TJfffUVP/zwA5mZmUVuV+lmI/9dQkICk6dOIyzsLHWCAvnq\ny4X4+PgYNZMQomh0Oh29u3fDMiGSHjUd2BKVgmeTtqzZuKlKPDMfHR1Ny2ZN6VHTHHcbU36M0PDC\na28y4/nnSU1NpUHdIHpVN6GJhxU7ou6QbO/DoaMhRfp3k56ejpWV1T8mjI59dAQpp4N5ookzmVo9\n7xy+xavvf8pjjz1myMusUIKDg3lkYH/a1LDlVqaOXFt3Dh45hp2d3b3PVPmNCKKiooiPj6du3bq4\nuLgYO44QogiysrKYP+8TLl84T+PmLZk2fXq53OnFUCIiIvjwvbncTk5i8LARjBk7FoDt27fzytPj\nebt93s8yvaoycVsMZ8MvU7169WKdKyUlhR4PdeHc+Qvo9fDEExNZ+NXiKvHFpqiaNKhLP9cM2ta0\nR1VVPjmWxNBpr/Pcc8/d+0yVefTnfl566SUWLV6CjZ0D2ZkZ/PzTT3Tt2tXYsYQQhbCysuK1Krx4\nQkBAAMu++fY/79vY2JB+d4jd1EQhS6snJ1dXomUqDx8+zNUrV+lY25XY9FwiLl1Cp9NVqS83hbl1\nK4la/nkTzBRFwdsWEm8kFKltoT1bRVEsgaFALf5WnFVVfaeQduWiZ7t161aGDR/BwyPG4+JRjV++\n+wptThZpt2/LNzYhRIWk1Wrp3qUTudcjaOBswqGEXDr3HcLiZcsLb5yPmtU8mdzAioYeNuj0Kq/v\nv4G5SzXcXJyZ9MwURo8eXYpXUDGNGzWS+ON7eLqZC7c0ucw5fIsV6zbRo0ePe58p9jCyoii/A6nA\nCUD35/uqqs4rpF25KLZDhw4l3dSOx1+dC8D5438w/4VJxEZfk5l0QpQBnU7Hxo0biYmJoXXr1nTu\n3NnYkSqFrKwsvvjiC65cukjrdu2ZMGFCiXY7sjA3Z9UjfvdW//ry2HWydSrtatqz4nwaH332JWPH\njSut+BVSeno648eMYttv27GxtuS9Dz5k8uQp//hMSYrtWVVVGz5oqPJSbB+fOJFUM0eGP/siANGX\nLvDOE0PJSEuVnq0QBqbX6xk+eCDRp4/T2sOOn6/eokuvPvQbMJABAwb8Y2KJMK5unTrgdjuC0Q2c\niUvP4fXd0bzRpSaBrtaExGWwL9uLA0dCjB2zXFBVNd/6UZJ7tocVRWmkqmpYSQMaw5TJk3moew98\nAuvi7O7FN++9yojhw6XQClEG9u/fz/njRzkwrBkWpiZMblyD5ivWcz3kAHPemM3BY8dlhKmcWLNh\nE0MHDWDkxlBMTBRaV7Mh0NUayNvEvrItJFISxakf+Y45KIoSpijKGaAjcFJRlIuKopz52/sVQsuW\nLdm0cT2HNn3H6g9fY9zIoSwv4YPh97Nnzx4eHTWa0WPHcujQoVI/vhAVUVJSEv4udliY5v2oqWlv\nhbWpCcu7B9HSVs+8jz82csL7O3HiBOvWrePs2bPGjlJmvLy8OHQ0hPQ7d9h/8BBnb6v8cjGZnVdu\nszwsledfftXYESu0ggb4+wMDgD5AANDz7us/368wevToQciRP7h44Rxz3n03329oer2exMTEe2uC\nFtWOHTsY/ugo7PybYOVdjwGDBnPgwIHSiC5EhdamTRuOxiazPfIm6TlaPjp6BV9Ha1yszGnqasP1\n2Oh82+7du5dhA/sxpF8ftm3bVmaZ33nzDQb17M6aOa/SvWN7vlq4sMzOXR5YWlrSunVrftuxi8zA\nLiTWbMPKtRsYNGiQsaNVaEW5Z7tSVdVxhb13n3bl4p5tUYWFhTFg4CBSUlLQ6/UsX7aUkSNHFqlt\n3/4DqN22Bx37DQFg54bvuXM1jHVr1xgyshAVwv79+3lywniiYmJxt7bgp0daYG9pxtBtZ5n5/jzG\njx//nzbBwcGMGDSAN1r7YGFiwpvHrrH4u5UG3yYzIiKCDi2bc3hkC9xtLIlK1dBp3XEiY2ILfT4/\nJyeH3NxcbG1tDZpRlG/53bMtytS1Bv86kCnQorSClQd6vZ7+AwbS5/FpLN4bxuvLNvDslKlERETc\n9/MajYbHJz2BV7Xq1K3fgMSbNzH720os5uYWaHW6+7YVoqrp3LkzF69GcTstnU49+9B+zRGafn+Y\nRyY8xbh8Zrcu+/ILXmnhzbgGNRlZrzrvtavN4s/nF3quuLg4Jo4bS6+unXj7jTceeJQqNjYWfzdH\n3G0sAajlaIOHvQ03btzIt42qqrwx+3Uc7O1wdXaiR7cupKamPtB5SyIzM5Nnn3qCAF9v2rZoJrex\nyql8J0gpivIq8BpgrShK2p9vAzlA6d/0NKJbt25x+/ZtOg8YDoBvUH3qNm3FqVOnCAgI+M/nJz3x\nJBHXb/Ha8s0kxESyYObTXLv2Diampui0WjYt+oRVK1eU9WUIUa5ZW1uzZuMmtFotJiYmBT6moqoq\n/+4aFDZSlpqaSud2bXikhjV9PB1YsvZrnoi4zIrVRR9hql+/PpdvpXEoNpkONV349UoiGVqVWrVq\n5dtmw4YN/LDsK5b29cXe0pTFpy4w9ZmnWLlmXZHPWxJPTpxA1LE9PNfQnmu3ExnQtzdHj58kMDCw\nTM4viibfYquq6vvA+4qivK+qaqW+M+7s7Ixer+fapfP4BtVHk5FO1MVzeHt7/+ezWVlZ/LjlR+Zv\nOYCzuyfu1WvSZfCjOOnvELYjb83WFd9+Ta9evYxwJUKUf/mtSKTVatFoNDg4OPDklGmMHDwQC1MT\nLEwV3jh6jUXffl/gcXft2oWftQlvtPUHoGNNZ2ov3cjir7/B2tq6SNk8PDxYtX4DY0aOIDcnBzs7\nO37cuq3A9gf3B9O1ujlO1nnXNcDfjk/LsHe5+cctLO/vi52FKT6OlpxL0bF9+/YHKraqqhIdHY1G\noyEwMFBWjTKAgnq2ze/+dsPffn+PqqonDZaqjJmbm7N82VKemTyaes1aE3XxHMOHDqFNmzb/+Wxw\ncDBmZubcjI/F2d0TgOtRV+g2qA8vvfRSWUcXolL4cuEXvDxzJqgqDevX48etv7Jm8xa+nP8JOp2O\npd+/V+gm9Iqi8Pe+b3GnjPTs2ZOEW0mkpKTg4uJS6EIRNX18+eU3/b1nLy8mZVGjxn+/qJemO3fu\ncPHiRdzd3bG2suR2lhY7i7yJn7ez9djY2BT5WDqdjsfHj2XrLz9jY2GOk5sHO/cGU61aNUPFr5Ly\nnSClKMreu7+1AloCp8kbRm4MHFdVtV2BB64gE6RUVWXXrl3ExMTg7u5OdnY23t7e9y20ANu2bWPy\ncy+Qdvs2XQc/SkJ0JKcO7ObC+XMFDjUJIe7vwIEDjBo8gG2DGlPLwZq5xyI5YerO7gMP1jtMS0uj\nRaOG9PG0pLWnHV9fSMSvQ3e+XlFwj7ikNBoNXTu2R3MzFmdrM8JvZbFrb7DBtvM8efIkfXv1wN5c\n4WaahnbtO3DmxDF6+lgSe0clWmdHSOhpHBwcinS8ZcuWseDtV3ijvRsWpgqrzqWQ49OCLVt/NUj+\nyu6BF7VQVbXb3YabgeZ/LmqhKEpD4C0D5SxTqqoyYeIkgg8epna9Rpw5sp95H3+Ub6EF6NSpE4o2\nl6YdupGUEM+Na1fp2KkTvr6+ZZhciMrjyJEjDPJzo7ZjXm9selMf6nx78IGP4+DgwIGjx3j7f7NZ\nHx1Fz8eHMXPWrNKO+x82NjYc+OMoO3bsQKPR0KVLF7y8vAx2vlHDhzIm0JIutRxJy9byanAIU196\nhYT4ODp7eDBt2vQiF1qAM6Enae1hdm+Zxs7etnwaVmGWUqgwijIwX+fvq0epqnpWUZR6BsxUZg4c\nOMDe4P28u+o3LKysibt6mSmPDWTsmDH53rNwcHDgwP5gXnr5Za5di2dA7x68/95cWZFKiGKqUaMG\nm27dQavXY2ZiwrHrKVT39CjWsby8vFhUgsX4i8vS0pIBAwy//IBeryci6hodWwUB4GBpRiMPaxwd\nHXn11eJNralTvwHfbttAX52KualCSLyGoKD6pRlbULRie0ZRlOXAD3dfjwEqxdeehIQEvAPqYGGV\nN/mhhl8giolCWlpagc/UeXt7s26NPEMrxIMIDw/n8uXL1KlTh6CgoHvvjxgxgjXff0fXTafxd7bl\nYMwt1m3eYsSk5ZeJiQl+Pt4cjkmnk68D6dk6wm5mMrPe/fs/8fHxXLt2jYCAANzd3e/7maeffprt\nv25l+q4/sLe2IMfEkj0/fmPIy6iaVFUt8Bd592yfB368++t5wKoI7dTy7sqVK6qzi6v6zoqf1VUn\notXHZr5m76dWAAAgAElEQVStBtapq+r1emNHE6JS+Wz+PNXD0V7tWddXdXewUxcv+uoff67VatXt\n27erq1evViMjI40TsoI4fvy46uHqrNap4aY629mos2a+dN/PLVm8WHW0s1Hr1XRXnext1c2bN+d7\nTL1er546dUo9fPiweufOHUNFrxLu1r7/1MRCV5AqrvIyQWrHjh289PIr3L6dQp/evZk+bSq//fYb\npqamPProo4SEhPDYhAmkp6dTt159fty0UZ5PE6IUxcTE0LRBPQ6MaEFNe2uu3tbQbeMJLl2Nyre3\nJQqWnp7OhQsX8PDwuO/EzGvXrtG0YX0+6OpFNXsLIpKzeOfQTWLir2Nvb1/2gauQB54gpSjKelVV\nRyiKEgb8p2qqqtq4lDOWujNnzvDo6DFM+t/HVPP1Y+2CubRt34H2vQejzclm7vvvY29nT65Wh6KY\ncDMxkR+3bGHmSy/JPVghSklMTAx+ro7UtM+7XePnZEM1Bzvi4uKk2BaBqqpcu3aN7Oxsdu/axZHD\nB6nl58/Ml2flWzivXLlCLVc7qtlbABDgYoWjtTmxsbHUy2fIWRhWQfdsZ9z9Z8EPt5Vjv//+O+37\nPELzzg8D8Pir7/PiI12Y8MocAKb0aknXkRPpOeIx4qOu8M6koSz4chF2dnZMfvZZY0YXokJSVZXN\nmzcTFhZGnTp1GDlyJEFBQUQmp3MkPoW21Z0JjknipiYLPz8/Y8ct93JychgxZDAHD+xHr9Wi1+sY\nVs+FQyG72frTFv4IOYGlpeV/2gUGBhKVfIfYNFtqOlhy8VYmaVna+y7UI8pGQY/+XL/724eB/aqq\nXi6bSKXHzs6OlMSEe6+TE69jaZO3SHh2ZiZpKcn0GJ63CHr1Wv40atsZeycXNmzaLMVWiGJ4buoU\n9mzZSG9vB+Z/m8Hvv/zMd6tWs3LtOkY9OhILEwWdorBu048P9HhKVfXp/E+IP3uMJb1rYGqisDgk\ngdi0HKa38uS1Awns3buX3r17/6edt7c3ny74ghlTp+DhYEPSnWxWrl6DnZ2dEa5CQNFmI/sASxRF\nqQWcAPYDB1RVPWXAXKVizJgxfPb5Aha/8RyePrXZuW4F1rZ2JMZFk5OVhampKRFhoQQ2bk5WpobI\nC2E0ad8Ve/kLKcQDi42N5Yfvv+fUuDY4WprzklZHi9W/cf78efr06UN84k1u3LiBl5cXFhYWxo5b\nIZw6cZx2XuaY390PuGttR74LTURRFGwtTMnJycm37YQJj9O//wCio6Px8/PDycmprGKL+yi02Kqq\n+iaAoijWwJPATOAz4P6bwpYjjo6OHD3yB0uWLCE5JYUNa1ezY+cu5kx8BFNTU0aNepT5z0+gZkA9\nEqKjcHR144/fNrNj++/Gji5EhZOamoqLrRWOlnk7YFmbmeJlb3NvBxxLS0t8fHyMGbFciY2NZeYL\nM4i8coXW7drz/ocf/2d7vjr1G7In9ABda6mYKPBHTDr2lqb8GH6bOI1Kp06dCjyHm5sbbm5uhrwM\nUURF2c92NtABsANCgYPk9WyvF9KuXMxG/reoqChGjRnLieMh1PT24f25c7h58ybHjx+nevXqjBkz\nhgYNGhR+ICHEP+Tk5NAwKJDxvjaMrOPFtqs3mX/uJucuRcgM2H/JyMigcf16tHbOpZGbBTujM7H0\nbcTvO3f/Y3KmRqOhT4/uRF0Ox9LMhNuZWtzc3QgICODTL77C39/fiFch7ie/2chFKbYnAS2wDQgG\n/lBVNbsIJyx3xVZVVRo0akyTbv3oNWoS548fZvk7LxJ2+jQ1atQwdjwhKryrV68yadwYzp4/T6C/\nP8u//4H69Q27GpGqqkRFRZGbm4u/vz+mpuV+0I3t27cz6+lxvNPeFQCtXmX8lkjCLoQTFRWFlZUV\nrVq1wszMDJ1Ox6lTp8jJyaFZs2ZYWVkZOb0oyAM/+vMnVVWbK4riQF7vtgewVFGURFVVOxogp0El\nJiYSHxfH7MenoCgKzTp1J7BRc0JCQv5RbHfu3MmChV+iqirPPv0U/fr1M2JqISoOPz8/9h76o8zO\nl5OTw8ghgzl88CCWZqbUrO3Hrzt3l/v7k2ZmZmTlaO/tFKTVq+Roc2nRtAk1HK24k6PFJ6Auv+3c\njbW1NS1atDB2ZFFCBe8dxb2NB8YAjwEjgThgj4FzGYSDgwM52dkkJcQDkJuTTUJ0JK6urvc+s3v3\nbh4dM5aaLbvi2+ZhJkx6gq1btxorshCiAPPnfYLm0hnOjW/LmbGtqaNN4eUXnjN2rEJ17NiRDL0Z\nnx25zr7IVObuj6WJpw1KbiZzO7oyr6sH2uuX+ezT+caOKkpJUWYjf0DeDOQFQIiqqrmGjWQ41tbW\nzJk7h7lPDqN5lx5cCQuldcsWdOz4Vyf9q8VLGPrsTDr3HwaAiYnCwq8WFbqXphCi7J05cZwhtZ2x\nuDtbd2SgO2+Hhho5VeEsLS0Z89gEfv5mIcfjM2hRzZZGHja8fzAOAFMThUYuplwOv2DkpA8uJSWF\n27dv4+3tLZvQ/02hPVtVVfurqvqRqqqHK3Kh/dMLzz/PhrWrebhlQ957azbr1qz+x4QERTH5x67T\nfw7zlIZdu3axYMECduzYUSrHE6KqC6rfgN9jU9Hp89af3RaVRFAFWSFpwoTHuZFjSkNPG7wdLfn0\nWCKutpboVZUsrZ7DCbm0aFPgtuHlzpx33qZm9Wq0a9GEeoH+XLlyxdiRyo1Kvzbyg9q7dy/DRoxk\n6OSXMTU1ZeOXH7Hi26/p27dviY4765VXWb1uPQ3adOL8sYOMGDqETz7+qJRSC1E1aTQa+vV8mNiI\nS1iZm4KtI7v3H8TDo3hb9JW1kJAQ3vnf66Snp9Gr3wC2bNrI1YjLZGt1DB48mG+//6FCTPiCvFtw\nj40cwtxOHjhbm/Hzpduc0Xty7GS5X5KhVBV7NnIJTlghiy3Anj17+OLLr1BVlWeeevK+K7Q8iOjo\naBo3bcbHm/dh5+hMRtptZg3txomQY9SuXbuUUgtRNf05W1er1dK0adP7Ll9YUej1emJjY7G0tMTT\n09OoWVRVJT4+Hmtr6wK3HP3TvHnzOPDNR0xskvfZzFw9j/0cSVZ2/gtvVEb5FdtCh5Grok6dOvFQ\n1y5U8/LiypUraLXaEh3v1q1buHlWw87RGQA7ByfcPKtx69at0ogrRLkTHx/P4cOHSUxMNPi5TE1N\nadGiBW3atKnQhRby9qv18fExeqFNSkqifeuWNKoXhE+N6jz71JMU1nny8/PjfEou2Vo9AKEJGdTy\nlkcq/5RvsVUU5RdFUX7O71dZhixLqqoybMRIvlmzkTu2HixZsZpRY8YW+hetIHXq1EGTdpsDWzeR\nm5PNwV9/JC35FnXr1i3F5EKUD998vZxGdYOYMWY49QL82bB+vbEjiQc05ZmncLsTwzf9vFnWz4fg\nrZv47rvvCmwzePBg2nbtxXO7E3j7jxSWn73DilVryyZwBZDvMLKiKF0KaqiqanCBB66gw8jnz5/n\noR49+fjH/ZiZW5CTncWLAzty5PBBAgICinSM7OxsEhIS8PT0vPcA+unTpxk1ZiyXL4bjHxjEmlU/\n0KxZM0NeihBlLjY2lib167JrSDP8nW05k5jGwF/OcDU6ttw/+yr+ElTbl6n1TKjllPfz65eLyZi1\nHMiiJcsKbKeqKidPniQpKYlmzZpVyS0UH3hRi8KKaWWl0WiwsXPAzDxvoXRzC0ts7OzIzMwsUvu8\nCVYjMDUzJycri5Xfr2DAgAE0adKE82fDSnV2sxDlzdWrVwlyd8LfOW+N38YeDnjYWRMTEyPFtgKp\n7Veb0zcuUsvJCp1e5WyyjqGBdQptpyiKLMCRj6Is1xgIvA/UB+6tE6aqaoGbUVbUnm1WVhaNmzaj\ncdc+tHqoD0d3/EL4kb2cOnmi0J1KMjIy8K1dm2fe/YKGbToSERbK/OcncPHChQozO1KIkoiPj6dR\n3SB+HdyUeq52nEi4zbBfzxEZE/dAW+qFh4dz+fJl6tSpQ1BQkAETi/uJiIiga6cOuFtCWlYu3gF1\n+X3XHlkqsghKMkHqW2AReesjdwO+B34o3Xj3d+3aNbZv386lS5fK4nQAWFlZsWnDemJPH2HhS0+i\nT45j984dRdoSLDIyEnsnFxq2yVskI6BRM6r7+hEeHl5o299++42GjZvi7VuLZ56dTFZWVomvRYiy\nVr16dRZ8tZjeP56i3YZQhv16jm9XrnqgQvv5p/Pp0rY1C1+eRsdWLViyeJEBE4v7CQgI4Fz4JT5c\nupLvNvzErn37pdCWUFF6tidUVW2hKEqYqqqN/v5eIe1K1LNdvWYNU6ZMpXbdBly7HM4rr8xi5osv\nFvt4RXXq1Cl69e5DtVr+3EqIp3XLFmxYt7ZIz7olJSVR28+ft77/hWo+tUm6cZ3Zo3tz6uQJatWq\nlW+70NBQuvfoyZNvzcezpi9rP59Do4BafL1saSlemRBlJykpiejoaGrVqoWzs3OR28XExNC0QT32\nD2+Bt4M1V29r6LbxBBevRMrokKgQir0RAZCtKIoJcFlRlKnkrY1s0N3V09PTeeaZZ5m9fCPeAXVJ\nTrzOG2P7MXjgQAIDAw15aiY9+RRDpsyic/9haHNz+ODZUaxevZpx48YV2tbV1ZX58+fx8qQh+NVr\nRGT4Wd743+wCCy3k9Wo79h9G0w7dABg/ay5vje8nxVZUWK6urv9Yc7yoYmJi8HN1xNvBGgA/Jxuq\nO9oRFxcnxVZUaEUptjMAG2A68C7wEHmbEhjM9evXsXdyxjsg79EYF49q+PgHERUVZfBiGxl5lSfa\ndgbAzNyCoGZtHmjJsScmTaJb166Eh4fj7+9fpMd77OzsSLnx1/bASQnx2Noa9PuMEOVSUFAQkcnp\nHIlPoW11Z4Jjkki8k1Wq+7ZGR0ej0WgICAiQtXtFmSnKFnshAHd7t9NVVU03dChvb2+yM+8QdmQ/\njdp25tql80RdukC9Ul7zVFVVLl68SGpqKg0bNsTW1pamTZuxd/NqBj85g/TbKYTu287ETz95oOP6\n+/s/0A+H8ePHs+CLhSx583nca/iy78dVfDbvwc4pRFm4evUqV65coU6dOvj4+JT68d3c3Fi5dh2j\nHh2JhYmCFoW1Gzc/0D3f/Oh0OiaOH8uvv/yCnZUFju6e/L57L15eXqWQXIiCFeWebUvyJknZ330r\nFZioquqJQtqV6J5tcHAwQ4cPx9zCCk1GOsuWLmHEiBHFPt6/6fV6JkycxG+//46LmweatNvs2rkD\nGxsbevftR2JiIpo7d5g+fTrvvze31M6bn5SUFJYuXUrK7dv06d2bLl0KfMxZiDK3cMHnvP2/2dT3\ndOb8jRTmf/El48aPN8i5srOzuXHjBl5eXkWanFgUS5cu5bv332Rzv4ZYm5nw1pGrxHjUZePPvxTY\nLiMjg/j4eGrWrImNjU2pZBGVV7HXRlYU5QwwRVXVA3dfdwS+UlW1cSHtSvzoT1ZWFnFxcXh5eWFr\na1uiY/3b6tWrefuDj3l18Xosra3Zue47Luz/nT8OH0Sn0xEbG4uDg8MDTe4QorK6du0aLRo1YN/w\nFvg4WHMxOYOem09xNTqmwvw/MvXZp6l5PpjJzWsBcP5WOo8djOVi5LV822xYv54nJj6OvbU5mbl6\n1m/6ke7du5dRYlERleTRH92fhRZAVdWD5D0GZHBWVlb4+/uXeqGFvOf4GrbtgqV13kSMlg/14dLl\ni0DeWqu+vr4V5oeIEIZ27do1At2d8Lk7camOix2e9jbExcUV2C45OZn169ezadMmMjIySi2Pqqqs\nXr2al2e+xLJly4q0fnlg3frsjEsnV5e3du+vUUkFPsMbHx/PU09M5O2OHnzQ2Z1AOz1D+vfhhRnT\ni7zIjRB/KsrsgGBFUZYAawAVGAnsUxSlOYCqqicNmM9gGjZsyOqN79J33NPY2Nlz+Pct1K/foNB2\nWq2W9PR0nJycZCUoUWUEBQVx+VYqpxLTaOrhwKHYZJIys/H19c23TVRUFF3at6WeoyU5Oj2vzzTj\nwNFjpbKE37Rnn+HQti0M8HFk5eYMfvv5Jzb9/EuB/09OnjyZ3b//Rsu1x3C2tiQdc3YG57/84KVL\nl/BxsaWmowUvbb9GI08bevo7s/uXVQy9cJ5t23ca7WdAVlYWp0+fxtLSksaNG2NiInvKlHdFGUbe\nW8Afq6qqPpRPu3K9gpSqqkyeMpU1a9fi5OKKourZtWN7gRObvvn2W6ZNm46iKPj4+rL155/w8ytw\nIS0hKo1Nmzbx5OMTcLK2ID1Hy+r1G+nRo0e+nx89fCj+188xs1UtAF4+cBmrdn347IuFJcqRkJBA\nvQA/zoxvh6OlOTk6Pa3XHmfDbzsLXSpQr9cTFhaGRqOhSZMmBd6DjYqKonnjhkxq5MhP4cl83NMX\nRVHQ6lUmbYvm3MUIqlevXqJrKY74+Hge6twRvSYNTY6WBk2a8fOvv1f4HY8qi2I/Z6uqajfDRDIu\nRVFY9NWXvPrKLFJTUwkKCirwL2toaCgvz3qVd1ZupZqvH7/+sJShw0cQeuJ4GaYWwniGDh1Kr169\nijxZKD4mhpE17O+9buVuy/bo/O+PFlVGRgb2VpY4WOT9+LIwNcHT3qZIw9QmJiY0adIk3z/XarWE\nhoai0+lo1qwZb77zLrNfexUn878+o6p5v4zVq50++Rma2GgY09odnV7lw6NhfPbpp8x65ZViH1On\n02FiYiKjdQZU6NiDoiieiqJ8rSjKb3df11cUZZLho5UNHx8fGjVqVOi3wpCQEJp27Eb1Wv4oikKf\n0U8QdvoUubm5ZZRUCOOzs7MjKCioSLNy23fpwuKz18nU6kjNzuXr8Ju071r87+7h4eHs3bsXW1tb\nnNw9eO9YJFGpGpaejiE+U1viXbQyMjLo3L4tw/v3ZNwj/WjVrAljxo7j6PGTWDh7sCQ0mT9i0vnw\njwTq1a9vtD1nL4ZfoHW1vKUTTU0UmrubEX4urFjHunPnDsMGD8Taygp7Wxs+/OB9AG7cuMF3333H\nqlWrSE1NLbXsVVlRBvq/A7YDf46XXAKeM1Sg8qpmzZpEnj9DTnbemsWXw07i4uaGubl5IS3zpKSk\n3Jsokp5u8EeVhTC6N9+Zg3Pjtvgu2UfAsv007dGfadNnPPBxbt26xfjRo+jSthWznxxPk/p1+d87\nczhn50P/X8PZlu3Ajr3BJX4Wd847b2OVEs1nD3nySVd3aispzHrpBerXr0/IyVPEmbmx9ORNMrJy\niY+8zMsvGX752H/LycnB0cWVlWHJxKZmk6PTc/SGlsbNi7fTznPTpnDr7B+sesSPz3rU4Kt5H7Jw\n4UIaN6jPN3Nn8fnrM2jSsD6JiYmlfCVVT1Hu2YaoqtpKUZRQVVWb3X3vlKqqTQtpV67v2T4oVVUZ\nPXYch48eo6Z/EBeOH2Hl9yvo169foW2vXbtGh46dqO5fh9ycHNJvJfDHoYNVcq9HUfVkZmZiYmJS\nrHuK27ZtY/SI4dgoeo6M64CzlTl7rt1i8oEo4hJvluqw5yP9++CfcobOvnlF+1TCHXbc8eDg0eNc\nvHiRjq1b8EXP6tiYm5KRo+PZ32M5f/EyNWrUuHeM9evX89Gct8nOyeGxSU/y4kszSy1jdnY23bt0\nIjX2Ci7mOo7FpmNuYU6PHj1Zt3FzsVbDCvD15rmG5vg45v232XIhiUOpNnR0ymJQ3bynMRaFJIBf\nK37bvqNUrqOyK8mjP3cURXElbyYyiqK0JW9hiypFURRW/7CS779exvSJ4zgecqxIhRZg1quv0r7/\nCJ7/9Fte/nIVQS078vY77xo4sRDlg7W1dbEKbWZmJuNHj+KpBtXo6uOKs1XeKFI3H1eSbqeW+uM3\nTVq04vD1HHJ1Kjq9ysG4LJre7TEmJyfj7mCDjXnehiR2Fqa42FqRnJx8r/327duZ9vQk+rumMdY7\nh8Xz3mfBZ5+WWr5Vq1aRlRDJu53cebFdNV7tWAMXZxc2/vhTsZed9PT05Gpy3midqqpEZajk5mTj\n5/zXQiJ13Kw5tH8vsbGxpXIdVVVRiu0LwM+Av6Ioh8jbYm+aQVOVU4qi0KVLF0aOHPlAs5Dj4uIJ\naNz83mv/Rs2ILeT5RCGquoSEBGzMTekf4MH+mCRi0/OK66aLCfhUr1bqqzm9+trrOAc25ZnfY3l2\nezyZLrV578OPgbxHBVOy9eyJTEWTq+O3iNvozCz/sVb7mpUreCTAlubV7KjnbsOE+vas+v67Ust3\n48YNfGwVTO72lP1cLEm+nVqinvOnC79ixYUMvjiZwpwjySSaOtP5oYdZdzYJTa6O21latl5KwdHG\nmsuXL5fWpVRJRZmNfFJRlC5AHUABLqqqKrOCHkDHDh3YufZb6jRthVarZe+mH3h8dOktPSlEZVSt\nWjWydHru5OqY3qI2bVcewtrMDFMbO37duavUz2dpacnW33cQFRWFTqfDz8/v3vOr9vb2bN+1h7GP\njmBpaCT16gSxY/eGf+zxam1rR2q2/t7rtBxdqX4h6Ny5M59++B4P1cqmmp0F6y6k0qVTxxIds3Xr\n1pw8HcauXbuwsbFh4MCBZGVl4VtjLeM2X8ZEUehWy4EjCTmluhlElaSq6n1/Aa0Ar7+9Hg/8BCwA\nXPJr97fPqyJPVlaWOnLUaNXC0lI1t7BQn3r6GVWr1Ro7lhDl3o4dO1Q3Rwe1QU1P1cHWWn33nbfV\nrKwsY8e6r/DwcNXVyUEd3tBdHdfEXXVxsFN37NhRquf4evly1cHOVjUzNVG7d+mk3rp1q1SP/6eN\nGzaojnY2av2abqqjnY26eNEig5ynMrpb+/5TE/OdIKUoykngYVVVkxVF6QysJW/4uClQT1XVYQUV\n8co2Qao0ZGVloSiKPHwuKpTExESeGD+WP44epZqnJ18t/4aOHUvWo3oQqampXL16lZo1a5b7SYWX\nLl1i6ZLF5GRlM2rsWNq1a1fq51BVFb1ej6mpaakf++9u3LhBREQEtWrV+sckMFGwB96IQFGU06qq\nNrn7+y+Bm6qqvnX3dZWYjZyTk8OHH31E6KnT1AkM5PXXX8POTvaZFVVL57ZtaKxLYkYzb0Ku32bG\ngaucOBNmkC32hKjoijMb2VRRlD/v6XYH9vztzyr9jsuqqjJsxEh+3hlM9eadORx2ke49epKbm0tO\nTg4DBgygTp06TJw40dhRhTCYjIwMQkJDmdPBH09bS/oHeNLR25VDhw4ZO1q5k5mZydq1a/n666+J\niooydhxRzhRUNNeQtwnBLSAT+HOLvQCqwKM/MTExHDx4kM+2HcHcwpJ2vQYxe1Qvjh49yqBHhmJt\n70jjdp3Zsm0b+wICuRohM/VE5WNlZYWiKMSlZ+HtYI1Or3ItVYOTk5Oxo5UrGRkZdGrXBpP0RFys\nTJn5wnP8un0nbdu2NXY0UU7kW2xVVZ2rKMpuoBqw429jwiZUgUd/dDodpqammJrm/StSFAUzc3NW\nrFiBVq9n7qptWFhZM3DiVKb1acOJEycKXQRdVVXOnTtHVlYWDRs2/MdMRiHKIzMzM+bOnUu/D+Yy\n1M+Vk0mZuPoFFbgBQVW0aNEi7DMTebGdK4qisP+aKdOffZpjoaeNHU2UEwUOB6uqeuQ+710yXJzy\nw9fXl7p167L83Zl06DuU04f2YIYerVaLq2c1LKzy9vV0dHHDysaWixcvFlhsc3NzeWToMI6fOImN\nnT3mJrBn106ZeCAMJisrC51OV+L9oJ9/8SXqN2zEiu++o2aQGTNmzCj2IgqVVcL1eGrZKfeeefV3\ntmJDlCxxKP4imyDeh1arZeHChQQGBKCm3WL39wtxMc0leO8enn32WeIiL3N4+09o0tPY+v0ScnOy\nGThwYIHHXLhwIfFJqXz8YzBz1+6gYcceTC3GOrFCFEav1zNjymScHBxwc3FmyID+aDSaYh9Pp9Px\n5WfzuXR4L6bnDtO/x0OsWb26FBNXLNevX2fWzJk8PelxfvnlFwC6PdSdvbHZ3MjIIUenZ+PFNLp2\n7WrcoKJckWL7L39OjPp27SZ0rj6kZOZSvXo1vvvmazw8PGjdujWvvfIKX895hae7N+GnrxfwzfJl\nhc5SvhB+kcYdH8bMPG8ZtJbd+3LhQnhZXJKoYpYsXszhrZu5NKkT0U91Rb1yhtdenlns423bto3Y\nc6fZMbgxC7oEsqlfQ6Y88zR/f9pAVVWWLF5Mp1Yt6N6xHb/++mtpXEq5k5iYSNsWzUjfu5mAq0eY\nNnE8SxYvpn///jz3ymye3xXP6M1XsA1qycLFS40dV5QjUmz/JTw8nCNHj/L8p9/Qe9REXvjsW/bv\n309ERMS9z7z55ptoMtLR5uaSkZbK6NGj2bdvHy+8+CJvv/02N27c+M9xGzaoT2jwdnKys1BVlaM7\nfqZhwwZleWmiijgcvJcJddxwsjLH0syEZxtV448D+4t9vMTEROq52GJ2dzWl+q72pGsy/7G95NIl\nS5j/1mxe9FaY4JTJ46MfZd++fSW9lHLnhx9+oJO7FR90CuTppr6s6FGXD959G4AXX5pJ+h0Nmsws\nft72G/b29oUcTVQlUmz/RaPRYGNrj7lF3sIT5haW2NjZF7jo+dq1axn+6Cjic8w5dO4KLVu1/s+W\nVJMnTybAuxovDuzIrKFduRxygIULPjfotYiqqWat2hxNvHOv53n0eho1vB/8mVhVVYmMjMTLy4ud\nUTc5fv02uTo9HxyLpHWzJlhY/LVY/ffLlvBRh9o85OvGoEAvXmxag1XffVtq11ReZGZm4mzx12IS\nLlYWZGVn33utKIrczxb3JX8r/qVhw4aYmypsWvQJrbr34+jOX7C1tqJu3br5tnnjrbd5ds4X1GvR\nlqO7thGydzsNGjXmsfHjGTd2DL///js2NjYsXbyIlJQUMjMzqVu3bpH3whXiQcx69TU6//wT/X45\ni52FGedSMtl78JcHOkZubi6jhg3lQPA+rM3NcHR2YdSOiySlptGuZXPWb9nyj8+bW5hzJ+evnm56\nrjbTrVQAAB93SURBVA6LSrhS2uDBg+n68Yc0c7OjtpM1bx2L5tHRo0v9PKGhoUwcP4Zr0TE0bdKE\nFavW4O3tXernEWWn0P1si33gCryCVFxcHJOnTuNCeDgN6tfnyy8WUL169Xw/X8PbhxcXruL2zRt8\n8eoUJr/7OU7uHiyaPZ3EuBi6DBxBekoSMRdOE3LsKG5ubmV4NaIq0mg07Ny5k9zcXLp164arq+sD\ntZ/z7jtsX7aAzYOaY/H/9u4zvopqffv4b6WHQCAJJLQA0qQjHaRKkyLCQUCxUxQFRUApikpTVLoH\nwQoesIBiFAgIBBAIHZRilC7SlFACIb3sZP4vkgfxoYZksglc3ze6J2tm39uP2VfWzCquhqHhB0mp\n3IBZc7+84jKBoaGhPPPk4wytVYKYlDRm/hbJT+s3UL169Wx9jtTUVHbu3IllWdSqVetfvWln2bBh\nA68PfZkLF6Lp8GAXRo97K0f/cI6KiqJyxfI8dnc+agXlY8WfsexO8uXXPfsubowgt64sL9eYA2+Y\nZ8M2q14aNJjwbTvwLRxEsdJl6dz7BQBGPtaBB3sNoEHrjH1vZ789gnurlmPMmDHOLFfkusqVLMag\nygE8Va0kAD+fjGbQjrP8euDQFdvHxcXx2muvsXF9OGVKl2b0uLeyHbQXLlzg/pYtiDn5F8ZAvsJB\nhK0Nx8/P77K2lmUx+o03+GjmDACe6z+A0ePG5ejm8rll+fLlvN7/ad5smPE5Lcui749/seO3Perd\n5gHZ2TxeMiUlJbFjxw72799PWloayZnPaiZNnEDLe+vz26afOPP3Pxssx12Ipmipuy6+Dip1F+ej\no3O9bpGsSEtL49jJ0yw/fBpHejqWZbH40Cny+xa8YvvY2FjurVeHIysX0sQ9lg3r1uTI3qdjR71B\nBUc0m3vUYlP3WtRwjeeNV0dcse2MD6YTOudTVnSuzorO1Vk85xNmfDA92zU4Q6FChTgdm0RqWkZn\nJSY5jYSUVA24yuPUs71BR48epVXrNqS7uHE+6gzJiYmkpCTTvMV9LPhmPv7+/pw8eZK69epTpVEL\nCgYEsmLeLMpXu4e+b04k5lwU77/yDJ9/9gnt27d39scRuSrLsvDzzU8VXw8i45PxcXfjr7hk3n1/\nOs8+++xl7WfOnMny6e/yZbsqAGw8cY6B205y8OjxbNXRqW0rHvE8R6fyQQCE/XmGT855Exa+4bK2\nD7RpSU+v8xfbhh46xbwkP5as/OmytnazLItjx46RnJxMuXLlsrw7j2VZPNS5Ewd3bqZyQRe2nU7l\niWeeZ9zb79hUseQk9Wyzqd/z/anb7j+8PT+MaaGbKFWxCk8PG4e7f3H6PJPxBVSsWDF++Xk7zWpU\npFwhdz6c8QEXTv3N8B5teG/AY7w2YpiCVm55xhgmTJrC0cR07i0ZQD5vLypVq8bTTz99xfbnz5+n\nbIF/nqWW8/PhQkxstuuoUbsu3x2OwpGeTlq6xYI/oqheu/YV2/oFFOZQ9D8LdxyKTsS/cO5vx+dw\nOHjkoa7Uq1GN1vc2oFHd2kRFRWXpGsYYFvywiKFvT6VKtwFMn/WFgvY2oJ7tDSp9V1lemvo/ipUu\nC0DonI+IPnuKjk88y+gnHuDM6X/PrT179izVqtfg/sf7UfGeuoTNn413WhLLf1zqjPLlNrNp0yZm\nTptKWpqDXv2ep23btjn+HuHh4YSHhxMYGMhTTz111X2Yt2/fzgNtWzGnTWXKFsrHa5sOk696Q+bO\n+yZb75+YmEiXju2J2LUTg6FStWos+nH5FReQOXDgAM0aNaRdqYwNEpYfiyZ88xYqVqyYrRqyasrk\nySz5cArz21fB09WF4RsOkXR3ff731bxcrUOcRwOksqldh44UvKsKXfq+REpSIu/0f4xmnbqTv5Af\nK+d8QMTuXf9qHxISwnvTP2Lw1Iy5ho7UVJ5tUY3TpyL17EWyZfPmzTzYri0j6gTj6erCWz8fY9aX\n8+jYsWOu15KQkMDChQtZHx7O8tBFxCUk0L5dO2Z+Oitbez+vXr2aFcuW4RcQQLt27ShQoABly5a9\n5mjcEydOsGDBAgC6d+9OyZIlb/r9b1avxx+lVuRunq6eMZDpl8hoXv41mh2/a7W4O8XVwlbzbG/Q\nJx99SKs2bdkaFkrUmVN4enmz/5dNRGxZz6KFP1zW3tPTk4TYGCzLwhhDUkIc6elpmlsr2fbR9PcZ\nXrskz9TMWKjCx92VDyZPyPWwjY2NpVmjBvinxFI0nwdx8QksXracRo0aZeu6s2fN4s1hL9OrUhF2\nx6TwxazP2LJj53WnvZQsWZLBgwdn672zq0LlKoT9vJ7Hq6bj5uLCj0fOUeHuKk6tSW4N6tlmQXJy\nMnv27MHDw4MDBw4QExND8+bNKVOmzGVtk5KSaHhvYwoWL0O5GnXYGPotHdq0YuqUyblfuNxWHu/R\njbpRe+ldI6P3FHroFHMu+Fxx4JCdJkyYwNY5HzC7TWWMMYTsP8knkYbNO3Zd/+RrCA4K5OvW5akZ\n6AvAEyv20uGl1+jXr19OlG2rpKQkOrVry5F9e/DxcCPFw4dV4euvOU9fbi/q2eYAT09PatWqBUDV\nqtde19jLy4v169YyecoUjh3/k2GDXqR37965Uabc5no/15+eXbuQ38MVT1cXRm45yqSZH+d6HadO\n/k11P6+Lc1lrFPHl9K9/ZPu6cQkJFM//z/Ph4vnciY3N/oCr3ODl5cXy1WvYtWsXKSkp3HPPPXh7\ne9/UtUJCQnhv3BhSUlPo1fdZBg4anCfnDUsG9Wxz2JEjR1iyZAkeHh5069YNf39/Z5ckt6EVK1Yw\nfdKEjAFSzw2gR48euV7D0qVLGdjrCb5/oBpFfTwZuPYAPvc0ZfYXX2bruk8/9ijndmxgdIPSHDgX\nz4vrDrF20+br/oF7OwkLC+PxHg/x3D2F8HZz4bOIGAaPHMMLAwc6uzS5Dg2QygU7d+6kddu21Gra\nhsT4OE7sj2Db1i0EBQU5uzQRW0ydPJlRb75BUkoKndq3439fzcv2AMDExEQGvziAFcuW4e/vx3tT\n/0vr1q1zqOK84anHH8V7/090qJCxitTuyHiWRPuxdcduJ1cm16OwzQVt7m9Pmfr30bJrxsLkX0wc\nRZXi/kyaNNHJlYnYx7Is0tPTs7x4g1zdc8/0JW7bYnpUzVjTesOxGLZZwazduMXJlcn1aFGLm+Bw\nONi3bx9HjhzhRv5wOHv2DCXL/TOvr3jZipw+e8bOEkWczhhjW9A6HA4G9n8eX598+BXIz5hRb97Q\n72JeN3DwEJYfTebriCi+3xvFrIgLvDZqrLPLkmxQ2F7FmTNnqNegIS3b3E/tevV5uOejOByOa57T\ntk0bFn06jZjz5zh1/Agr58+mnQ2LDYjcKcaPG8euFYvY8VgD1veow3effcjsWbOcXZbtqlSpwoYt\nWyl638N41+/M4h9XZGvhkrS0NGbNmsUrLw9hzpw5pKen52C1ciN0G/kqHnn0UWJdfHhsyJukJicx\nZXBv+jzanYHXGKCQkpLCCy8O5KuvvsLDw51hw4YxYvhwjSAUyRQaGsryJaEEBAYy8KVB191usln9\nurwSbGhRKuN26vy9f7PGpxzzQi6f2y5XZlkWDz/UlX3bw7nH35VfzqZRv1V7Pp+bvYFscmW6jZxF\nERG/cW/7/2CMwcPLmzot27Pr14hrnuPh4cEnH39EfFws58+d49URIxS0IplmfDCdgb2fJHhfOCeW\nfEXDOrU4f/78Nc8pHBjIvnPxF1/vPZ9IQGCg3aWSmJjIwP7PUePuCrRt0ZRdu7I3d9iZ9u7dS/ia\n1bx5b2G6VQ1gVOPCLFr4A0eOHHF2aXcUhe1VVKxYkZ3hK4GMpRYjNq6h8t25u86qyO1k/NgxfN2u\nCv1rl2Fai7upWcCF+fPnX/Occe9NZPLukwxYc4Deq/bx/fFYXn39Tdtr7fPk4xxd+yPT6xSmk/t5\n7m95H8ePZ28Xo6xISkri2LFjOXKtuLg4CubzwMM14+vey82FAl6exMXF5cj15cZoUYurmP7+NO5r\n1Zrd61eREB9H1cqVrnkLWUSuLSExicLe/+wOFODpRmJi4jXPqVq1Kj/v/pXFixfj5ubGRw89dN1b\nz9mVlpZGyKLFHHm2OT7ubtQKKsiG0wmEhYXRp08fW98boOfDPVjw3QIMBh9vT1asXkuDBg1u+nrV\nqlUjxcWThfujaVA8H+tPxONVoGCub9Jwp1PYXkXJkiX5dddOdu/ejaenJzVr1rzu2qwiAunp6bz3\nznhCQ77Dt2BBRo1/l0aNGtGjR3cGrF3JG/VKcfBcPD/8cYYNN7Cec3BwMAMGDMiFyjO4uLjg7upG\ndJIDH/eMr8joJAdeXl62v/fHH3/MkoXfM6NDWYrmd2dexFkeuL81Z6JvfgWtfPnysXrdep7p9RQr\nf95PlSpVWbV4Lh4eHtc/WXKMBkiJSI56/dURhH39P0bXL8WxmETe2HqUtRs3U758eV4bNpQVPy7F\n39+fd6ZMo3Hjxs4u94reGjuGrz6cTt/Kgew6l8iOeFe27tyVrZ2MbkSXLl1I/n0dz9crCkCyI51H\nvjtAWrq+S/MKLWohIrmiTPGifNemPBX9M4LpjQ0H8X/gSUaNGuXkym6cZVnMnz+fdatXElisOIOH\nvIyfn1+OXT8yMpLTp09Trlw5fHx8Lh4fMmQIP3w+k8n3l8HNxbDrZDzvbfqb+ORrTzuUW4c2IhCR\nXOHu5kZcatrF13GOdIpdZeP5W5Uxhp49e9KzZ88cv/Y7b7/Fu+PHE1DAm4Q0WLJsBXXr1gXg3Xff\n5bt5XzJg6WFK+noScTqBsW+/k+M1SO5Tz1ZEbkhqairffvstkZGRNG7cmIYNG16x3UcffsiEUSN5\nqUYxjsel8PUf59m2c5dTNnO/1WzZsoUuHdrybvMg/L3d2Hgshm+Pwp/H/7rYxuFwMGXKFE6cOEGP\nHj1o0qSJEyuWrNJtZBG5aQ6Hg/atW5J47BA1AvKx8I8zvD1pKr2usm1kSEhIxgApPz+GDB12xT2f\n70SzZ89m3sSRvFirEJBxu7rbgkPExMbe9FZ8cmtR2IrITVu4cCHjBz3Pis41cHUx7D8XR+uQnUTH\nxmnhlizYuHEj3R/swIQWQfh6uvLzX3HMPpDCiZOnnF2a5BA9s7VJXFwc0dHRFCtWTLueyG0rKiqK\n8oXy4eqS8R1SvpAPicnJpKamagpJFjRu3Jhezz7PwBkfUKxQPk7FprBoyVJnlyW5QD3bbJg4aRKj\nRo3G28eHAH9/lv+4lLJlyzq7LJEcd+DAARrXq8vsNndTK7Ag7/18hN/ci7Bm42Znl5YnHT58mMjI\nSCpXrpyjo5zF+XQbOYeFh4fz8KOPM/KzEAKCirF07kcc2LyabVv05SO3p2XLlvHCs32JPBtF00YN\nmTv/WwJzYZ1ikbxEYXsdcXFxuLu743mDUxSmTp3Kyu0RPDE0Y4/J5MREnr2vGinJyXaWKXJHSElJ\nweFwkC9fPmeXIpIl2vXnKmJjY2nf8QEKFy6Cb8GCDBs+4oY2py5TpgyHfv2FlOQkAH7btoHgUqXt\nLlfktmZZFkOHDMY3f378CxWkS8f2xMfHX/9EkVvcHT9AatCQl0l08eLT8D0kxMUy8YXHqFa1Ck8+\n+eQ1z+vcuTMLQr5n5CNtKRpchj/3RbDoB+2xKZIdn3/+OasWfM2+3k3I7+HG8z/tYdiQQcz4+FNn\nlyaSLXd8z3bjxk3c/9gzuLm74+vnT5NOD7Nh46brnufi4sJXX8wl5Jt5jBnxMr9HRGjyuchVpKam\nMuzlIdx9V2nqVKvCkiVLrthuc/hanqgQgL93xpZw/asXZ9P68FyuViTn3fFhW7JECQ7u/hnIuIX1\nR8QvlAq+sZVujDE0aNCAjh07UrRoUTvLFLHNvHnzqF6xPBVKB/P6qyNIS0u7/klZ9OqwV9i2cD5z\nmpTktfLe9Hn8UbZu3XpZu+KlSvPz2YSLj3K2R16geAmtPCV53x0/QGrPnj3c17IVZavWJDb6PO6k\nsX7d2hzb3cOyLMLDwzl58iR16tShQoUKOXJdkZywatUqnurRjU9aViTA253BGw7T7om+jBo77ort\n09LSsCwLN7esPYG6q0QxFrQud3Fzgne2/IFp0Y3x7/x73d+YmBiaNWpA/qQYCnm5s/NMHKvDN1Cp\nUqWb+4C3uLi4OMLCwnA4HLRq1YqAgABnlyTZpEUtrqJKlSpE/LqbtWvX4u3tTZs2bXJs30rLsni6\ndx/Whq8nuHwl9gx4gc8+/YSuXbvmyPVFsmthyAIGVC9K02B/AMY3LMOgb7+5LGwty2LkiOFMnTaN\ndMuiW5cuzJr7xQ3/ruT38SEyPvli2J5MdFDhCn/Q+vr6svnnHYSFhZGcnMzn991HkSJFsvkpb01R\nUVHcW78uBdLj8XB1YVBsGhs2b9Vc/dvUHd+ztdPq1avp068/o+cuwdPbm8N7djPxhSc4fy5KS9zJ\nLWH40KEkrPmet5tm3HEJPXSKmScNG3/e8a92s2fN4r+jX+W79lXJ7+FG31X7qNSuK5OmvX/ZNSMj\nI3E4HJQoUeLi/+cLFixgYL++PFM5iL8SUll1KpFtO3cTFBRk/4e8RQ15aSAHwubzbK2M3mzI3vPE\nlapHyKLQ656blJTE3LlzOXXqFM2aNaN58+Z2lys3SD1bJzhx4gRlKlXDM3OB8bsq1yAxMYGEhIR/\n7WEp4iwvDBxIwzn/I3ndfgI8XZm19zRffLPgsnbhq1fRp1IggT4Z89Bfqlmc135a9a82qampPP5w\nD1atDMPVxYXqNWuycOkyChQoQPfu3QkKCiJ00UJK+fqy7bnn7+igBThx/CgVCv3zFVzR34PQE8ev\ne15ycjL3NW1M2pmjlM7vwowpExnz7gT69XvOznIlm+74AVJ2qlu3LhFb13Pij/0ArPxmDuXKV1DQ\nyi0jODiYbTt3UaLzU1jNurJ05Wruv//+y9oVLVmSXVEJF1/vOh1L0WLF/9Vm8qSJRP22jb1P38u+\npxsRGH2CkcOHXvx5s2bNmDh5CqNGjb7jgxagWYtWrDyeTFxKGsmOdJb+mUiT5vdd97xFixaReOoY\nIxsV5okaAbzZuDDDXnnlhtYHEOdRz9ZGVatW5b/TptKv138AKFGyJEsWL3JyVSL/VqJECUaNGn3N\nNkOHj6BJSAjdftxDfg9XNp+M4af1If9qs3PrFh4pF4CXW8aGHI9VLMK727bZVXae1/+FF9i3bw9P\nfzoLY6BLp068Nf76G8VHR0cT6ON68RZ9kI87CYmJpKenazOUW5ie2eYCh8NBbGwshQoV0rNaybNi\nY2NZunQpKSkptG3b9rLpbq8OG8aR5d/yUcu7McYwavMfnCtbhzlfz3dSxXlDSkoK6enpNzzY7ODB\ngzSoW5sXahWinJ8X3+y7QFrxqqxYvcbmSuVGaG1kEbFVbGwsbVo0I+H033i6uhLn6sWajZs0B90G\nq1evZkC/Zzhz9izNmjZl9twvtXvQLUJhKyK2S01NZevWrTgcDurXr6+NBOSOo40IRG5R58+f55GH\n/kOJIoWpVbUy69evd3ZJN83d3Z0mTZrQokULBa3IJRS2Ik72WI9ueB3eRdiDVXmlrBddO3Xkzz//\ndHZZIpKDFLYiTpSamsqqteuY1LQCwb7edCofRKvShVmz5s4b7LJx40ZaN21MvRpVGTtqlC1rNIs4\ni8JWxInc3NzwdHfnr7iMfZEty+J4bDK+vr5Orix3/f7773Tp2J6HfWJ5u3J+ls/9hNeGD3N2WSI5\nRgOkRJzsv9OmMeXtMfQsX5hd5xK54BvEmg2b8PT0dHZpuWbs2LGcDZ3DW00ylo3843w8nZfv51jk\naSdXJpI1Wq5R5BY1cNAgKletyrp1a3mgWHF69+59RwUtgJeXFzGp6RdfRyc78HD3cGJFIjlLPVsR\ncbqTJ09S756aPFS6AKXzezI9IpJX33qHZ/v1c3ZpIlmiebYicks7fvw406ZM5sK5KDp17Ubnzp2d\nXZJIlilsRUREbKZFLURERJxEYSsiImIzha2IiIjNFLYiIiI2U9iKiIjYTGErIiJiM4WtiIiIzRS2\nIiIiNlPYioiI2ExhKyIiYjOFrYiIiM0UtiIiIjZT2IrcBtLT06/fSEScRmErkoetWrWKUsWCcHd3\no17NGqxatYrBA1/k+Wf6sm7dOmeXJyKZFLYiedTx48fp2a0rMxqX4swLrWnqGU/nDu3x2LaMUgc2\n0KNzJ0JDQ51dpoigsL0tfP3115S+qywBRYrQq09fEhMTnV2S5ILt27dTv0QAzYMDcHNxIcWRxvO1\nghnZqDwDapdhWrPyTBg32tlliggK2zxv/fr1vDR4CL1HTWXslz+y98hfDBo8xNllSS4IDAzkYFQs\nSY40AM4kphDg5XHx535e7iQnJzurPBG5hJuzC5DsWbZsGc3/8xgVa9YB4JGXRjJxwKNOrkpyQ+PG\njWnQohWtf1hD3SBfVp24wOoT0dxVMB+FvNwZuvEwvQYPd3aZIoLCNs/z8/PjTMSWi68jjx+hYKFC\nTqxIcosxhrnz5rN06VKOHj3Kc/XqcerUKSaMG01SUhy9hwxn0JCXnV2miADGsix7LmyMZde15R/R\n0dHUq9+AoLJ34x9UnA1LQ/hy7hw6dOjg7NJERO44xhgsyzKXHVfY5n0XLlzgiy++IDY2lnbt2lGr\nVi1nlyQickdS2IqIiNjsamGr0cgiIiI2U9iKiIjYTGErIiJiM039ERG5jvT0dJYsWcLx48epV68e\n9evXd3ZJksdogJSIyDVYlsWjPbrxy4Y1VPDzYPvf8YwZ/x7P9+/v7NLkFqTRyCIiNyE8PJwnu3dm\nUotAPFxdOBmbwpBVf3H+QgweHh7Xv4DcUTQaWUTkJpw+fZqSBb3wcM34uiya3x1XY4iJiXFyZZKX\nKGxFRK6hfv367DkVx6+R8TjSLRbujyY4uCQBAQHOLk3yEIWtiMg1lCpVim+++56P9ibTY8FBItID\nWbI8DGMuu1MoclV6ZisicoPS09NxcVEfRa5Oz2xFRLJJQSs3S//niIiI2ExhKyIiYjOtICUi15Wa\nmspnn33GHwcPUrtuXXr27KkBQiJZoAFSInJN6enpdO7QjrhDv9G8aH4WHommeeduvD9jprNLE7nl\naAUpEbkpW7du5YnOHdncozburi5EJ6VSfe4m/jh6nMKFCzu7PJFbikYji8hNiYuLI7CAN+6ZKygV\n9HTDx9OD+Ph4J1cmknfoma2IXFPdunU5FpfCp78ep1WpAObsPUmxksEEBwc7uzSRPEM9WxG5poIF\nCxK2Zh1LkwvSZcUB/vQvx9KwVZpzKpIFemYrIiKSQ/TMVkRExEkUtiIiIjZT2IqIiNhMYSsiImIz\nha2IiIjNFLYiIiI2U9iKiIjYTGErIiJiM4WtiIiIzRS2IiIiNlPYioiI2ExhKyIiYjOFrYiIiM0U\ntiIiIjZT2IqIiNhMYSsiImIzha2IiIjNFLYiIiI2U9iKiIjYTGErIiJiM4WtiIiIzRS2IiIiNlPY\nioiI2ExhKyIiYjOFrYiIiM0UtiIiIjZT2IqIiNhMYSsiImIzha2IiIjNFLYiIiI2U9iKiIjYTGEr\nIiJiM4WtiIiIzRS2IiIiNlPYioiI2ExhKyIiYjOFrYiIiM3c7Ly4McbOy4uIiOQJxrIsZ9cgIiJy\nW9NtZBEREZspbEVERGymsBUREbGZwlbEJsaYkcaY34wxu40xO4wx9XL4+s2NMaE3ejwH3q+zMabS\nJa/XGGNq5/T7iNyObB2NLHKnMsY0BDoA91iW5TDG+AMeNrzV1UY42jHysQuwBNhnw7VFbmvq2YrY\noxhw1rIsB4BlWecsy4oEMMbUNsasNcZsN8YsM8YEZR5fY4yZZozZaYz51RhTN/N4PWPMJmPML8aY\nDcaYCjdahDEmnzFmljFmS+b5nTKPP2WMCcl8//3GmPcuOadP5rEtxphPjDHTjTGNgAeBCZm99LKZ\nzXsYY7YaY/YZYxrnxH84kduRwlbEHmFAqcwQmmGMaQZgjHEDpgMPWZZVD/gcGH/Jed6WZdUCBmT+\nDGAv0MSyrDrAKOCdLNQxElhtWVZDoCUwyRjjnfmzmkB3oAbwsDGmhDGmGPA6UB9oDFQCLMuyNgOL\ngaGWZdW2LOtw5jVcLctqAAwGRmehLpE7im4ji9jAsqz4zOeZTckIufnGmBHAL0A1YKXJWPXFBfj7\nklPnZZ6/3hhTwBjjC/gCczN7tBZZ+71tC3QyxgzNfO0BlMr899WWZcUBGGN+B0oDRYC1lmVdyDy+\nALhWT/r7zH/+knm+iFyBwlbEJlbGijHhQLgxJgJ4EtgB/GZZ1tVuuf7/z1otYBzwk2VZXY0xpYE1\nWSjDkNGLPvivgxnPlJMvOZTOP98HWVn67f9dIw19n4hclW4ji9jAGFPRGFP+kkP3AEeB/UCRzLDD\nGONmjKlySbuHM483AS5YlhULFAT+yvx5ryyWsgIYeEld91yn/XagmTGmYOYt74cu+VksGb3sq9H6\nrCJXobAVsUd+YE7m1J9dQGVgtGVZqUA34L3M4zuBRpecl2SM2QHMBHpnHpsAvGuM+YWs/86OA9wz\nB1z9Boy9SjsLwLKsv8l4hrwNWA/8CVzIbDMfGJo50KosV+6Fi8gVaG1kkVuEMWYN8LJlWTucXIdP\n5jNnV+AHYJZlWYucWZNIXqeercit41b5y3e0MWYnEAEcVtCKZJ96tiIiIjZTz1ZERMRmClsRERGb\nKWxFRERsprAVERGxmcJWRETEZgpbERERm/0fblQhEznlSdgAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_lfda, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Relative Components Analysis\n", - "\n", - "RCA is another one of the older algorithms.\n", - "It learns a full rank Mahalanobis distance metric based on a weighted sum of in-class covariance matrices. It applies a global linear transformation to assign large weights to relevant dimensions and low weights to irrelevant dimensions. Those relevant dimensions are estimated using “chunklets”, subsets of points that are known to belong to the same class.\n", - "\n", - "Link to paper: [RCA](https://www.aaai.org/Papers/ICML/2003/ICML03-005.pdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2)\n", - "X_rca = rca.fit_transform(X, Y)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAAFsCAYAAACEtRP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdcVuX/x/HXYe8p4EARt7hyb9ziKPfObTa0LEu/ZZkz\nNbcNM83UHKVlmVsT3BsniDMDRWTI3vv8/sCsfi4Ebs4NfJ6PB4+HN5xzXe+7lM99nXOd61JUVUUI\nIYQQumOgdQAhhBCiuJNiK4QQQuiYFFshhBBCx6TYCiGEEDomxVYIIYTQMSm2QgghhI4Z6aphRVHk\nmSIhhBAljqqqyv//ns6K7cMOddm8EEIIoVcU5bE6C8hlZCGEEELnpNgKIYQQOibFVgghhNAxKbZC\nCCGEjkmxFUIIIXRMiq0QQgihY1JshRBCCB2TYiuEEELomBRbIYQQQsek2ApRAiUkJPDmuPE0btqM\ngYMHExISonUkIYo1KbZClDCqqtKzdx+uB4fT7fXJZFo549mmLUlJSVpHE6LY0unayEII/RMaGsqF\nCxf4ev95DI2MqNGgKdcvnOL06dN06NBB63hCFEsyshWihDE2NiYrK5PMjAwgZ6SbnpKCkZF89hZC\nVxRd7cyjKIoqu/4IoZ8GvzqUa3/dpXnX3lz1PU5yRAgnjh3FxMRE62hCFGmKojxxiz0Z2QpRAm34\nYR1D+/Uk6upZWtWrySEfbym0QuiQjGyFEEKIAiIjWyGEEEIjUmyFEEIIHZNiK4QQQuiYFFtRJMTF\nxREdHa11DCGEyBMptkKvZWZmMnTYcMqULUcFt4q80rMXKSkpWscSQogXIsVW6LVFixcTcPsO3xy4\nyAqfS0SlZPLptOlaxxJCiBcixVbotVOnz9C6x0BMzc0xMjahbe8hnDl7VutYOqWqKqGhoURGRmod\nRQhRQKTYCr3mXrEi18+f4u9ntq+fP42bm5vGqXQnPj6edh06UrNWbSpWqsTwkaPIysrSOpYQIp9k\nUQuh16Kjo2ndpi0Ym2JkbEJCZDjHjh6hXLlyWkfTibGvv8HtiDjGTJ1PenoaS94bydihg5gwYYLW\n0YQQufC0RS1k5XGh1xwcHDh39gxHjhwhOzub1q1bY21trXUsnTl3/jy93vkUA0NDzMwtaN61D2fP\nndM6lhAin6TYCr1nbm5Oly5dtI5RKNzd3Qk4e4xq9RqSnZ3Ndd8TtGtST+tYQoh8ksvIQuiRu3fv\n4tmmLdalXEhNSsTexoqD3gewsrLSOpoQIheedhlZiq0QeiYhIYFTp05hYmJCy5YtMTY21jqSECKX\npNgKIYQQOia7/gghhBAakWIrhBBC6JgUWyGEEELHpNgKIYQQOibFVgghhNAxKbZCCCGEjkmxFUII\nIXRMiq0QQgihY1JshRBCCB2TYiuEEELomBRbIYQQQsek2AohhBA6JsVWCCGE0DEptkIIIYSOSbEV\nQgghdEyKrRBCCKFjUmyFEEIIHZNiK4QQQuiYFFshhBBCx6TYCiGEEDomxVYIIYTQMSm2QgghhI5J\nsRVCCCF0TIqtEEIIoWNSbIUQQggdk2IrhBBC6JgUW6EXYmNjCQ8PR1VVraMIIUSBk2IrNJWdnc2b\nb42jnGt5qteoSZt27YmNjdU6lhBCFCgptkJTq1ev5sips3y59yzLD1zExLEM7058X+tYQghRoKTY\nigJx48YNmjRrjq2dPY2bNuPatWu5Ou/0WV+ad+2NhZU1BoaGtOszlHPnzuk4rRBCFC4ptiLfUlJS\n6NTZi9rtXmbBb0eo27Ennb26kJyc/NxzK7tX5Pr5U2RnZwNw1fckFSu6PXacqqpER0eTkZFR0PEL\nRFpaGnPmzmXIq0P5bM4c0tLStI4khNAjUmxFvl27dg1jc0s6DRiBjb0DHfsNw8zKOlej24kTJ6Im\nxTJj+MssGDeYI7+u58tly/5zTFBQELXr1qNCxYrY2dnz7cqVunoreaKqKr369GX7gcNYV63PTp9j\nvNKz16MPEEIIYaR1AFH02dvbExMZQXJiAhZW1qQkJRL1IAI7O7vnnmthYcHRw4c4fvw4qamptGjR\nAltb2/8c03/gIOq1f5lPRrxFeHAQ094YQMMGDWjcuLGu3tILuXHjBhcvXWLhtmMYGRvTuntfJvf2\n5Nq1a9SqVUvreEIIPSDFVuSbu7s7gwYOZO7YftRu3pYrp48woF8/KleunKvzjY2Nadeu3RN/lp2d\nzcXz55jw1U8oikLpCu681LoDvr6+elNs09PTMTYxxdAo55+TgaEhJmZmpKena5ys5Lly5Qo7duzA\nwsKCYcOG4ejoqHUkIQC5jCwKyPKvv2Lh3Nk0qVKWhXNmseKb5QXSroGBAS6ly3Djki8AGelpBF71\nw9XVtUDaLwgeHh442tuxcdEMblzyZdOSWdhaWcqotpAdOnQIzzZtOXktiG0+x2jQsBERERFaxxIC\nAEVXiwgoiqLKAgWiIPzxxx8MGjyEGg2acD/wNk0aNWDzj5swMNCfz4qRkZG8P2kyAQEBeHh4sGTR\nQpycnAq0j+zsbKZ+Oo01a9diaGjIxPfe5YP330dRlALtp6hq0qw5LfuNokmHbgCsmTuFFh6VmDlz\npsbJREmiKAqqqj72j1IuIwu917lzZy6cP8fZs2dxdnamTZs2eldgSpUqxfp1a3Xax6LFi/lt117+\nt2IzGWmpfDVlPKVdXBg6dKhO+y0qYuPicHb9Zya7s6sbMbJAitATMrIVooho6dkGz0GvU7d5GwCO\n7fqVCP+TbP15i8bJ9MP7H0zi8OlzjPpkPvExkXz1vzf5Yc1qvLy8tI4mShAZ2QpRxDnY2RERcvfR\n64iQOzjY22uYSL98Pm8u7018n1kje2Bmbs5ns2ZIoRV6Q0a2QhQRFy9epEOnTjTr3JOM9DT8jvtw\n+tRJ3N3dtY4mhHjoaSNb/ZlhIvLtxo0btGjtiZOzC23atiMwMFDrSKIA1a9fn7OnT+NZtyqdm9bj\n/DlfKbRCFBEysi0mkpKSqFHTg46Dx9KovRen9m3n1K6fuXrFH1NTU63jCSFEiSAj22LO398fC1t7\nOg8aiYNzGboPf5PMbJVbt25pHU0IIUo8KbbFhI2NDTGREaSnpgCQnJhAXEw0NjY2GicTQgghl5GL\nCVVVGTZ8BOf9A/Bo4onfCR86tmldYCs5CSGEeL6nXUaWYluMZGdn8+OPP3L9+nXq1KnDgAED9G7x\nByGEKM6k2IpiS1VVHjx4gLGxMfby3KkQQkMyQUoUS4mJiXTu0pUqVavhWr4Cw0eOIisrS+tYQgjx\nH1JsRZH24UdTSDe25BvvS3z9x3kuXbvJl19+qXUsIYT4Dym2okg76+tL295DMDQywszcghbd+nLW\n91yh51izdi3NW7ailWcbfvvtt0LvXwih36TYiiLNvaI7V8+dBHLu3d44fwp394qFmmH9+vV8OmMW\nbQa/QbM+I3hj3Hj27NlTqBmEEPpNJkgJvfPnn3+ybt06MrOyeHXIEOrUqfPUY4ODg2nt2QYbp9Kk\npSRjaWrMkUMHsba2LrS87Tp0pOErQ2jYpjMAh37fTOyN8/y8+adCyyCE0A+y648oEq5du0YrT09a\ndOuLkbEJbdq2Y8/uXTRr1uyJx5cvXx5/v8ucPHkSIyMjWrVqVejLUxobG5OSlPTodUpiQrFfIvPK\nlStcv36datWqUbduXa3jCKH3ZGQr9MqoMa+RYlGKnqPfBnJGiSEXjrJ75w6Nkz2dt7c3AwcPofuI\ncWRkpLN/4yr+2L+PRo0avXBbGRkZzJg5iwM+Prg4OzN/3lw8PDx0kDrvln3xBZ/NmUe1ug246XeB\nDz+czOQPPtA6lhB6QR79EUVCYmIi9k4uj17bO7mQkJioYaLn69ixI9u3/YZRbAjWqdH4eB/IU6EF\neGvcePYcOorX6Ik4VKtPm3btuH//fgEnzrvQ0FCmz5jB9HXbeWfhKmas38mcz+Zw7949raMJodfk\nMrLQK/379mHi5A8pXcEdI2MTfvlqHh9MGK91rOdq1aoVrVq1ylcb2dnZbNy4ga/2+WJlY4dHo+bc\nuebH7t27GTt2bAElzZ/79+/jXKYcpcqUA8DRpQwu5coTEhKCq6urxumE0F9SbIVeGTBgADGxsSyZ\n/wlZWVm8OXYM48aN0zpWoVAUBUMDQzLSUh99Ly01BSMj/flnWqVKFaIfhON/+ih1mnkScPYEkeH3\nqVatmtbRhNBrcs9WCD0ydeqnbP7tdzoNGk3wrWv4HzvApYsX9GoZyiNHjtC3f38yM7MwMjTg5y1b\naN++vdaxhNALsjayEEWAqqqs/v57fHwO4uzszMdTPqJ06dJax3pMVlYWkZGRlCpVCkNDQ63jCKE3\npNgKIYQQOiazkYUQQgiNSLEVQgghdEyKrRBCCKFjUmyLiMjISA4dOsTVq1e1jiKEEOIF6c8DfOI/\nYmJiWLFiBdExMbiWK8fsOXMoV7EKoXcDeXXIEJYtXaJ1RCGEELkkxVYPxcXF0bhpM1xr1MWlQiWW\nf/wJExZ8y0st25GcEM+MEa/Q45WX5dlGIYQoIuQysh7atGkTLu7VeWPmUl4e/gbpaWnUbd4GAAtr\nG6q91Jg///xT45RCCCFyS4qtHkpMTMTu4WL8RsYmOLtW4OiurQBER4QScPZ4nrc1y87OZtkXX9Cx\nsxcDBg6We8BCCFEI5DKyHurWrRufz29LzUYtKONWiVIuZdi8dDa7135NXHQU06Z9+tT9XZ9n+vQZ\nbNm2nR6vvUd4cCCebdty3tcXNze3An4Xhe/8+fP4+vpSvnx5unXrhqI89lw5kLNn7qFDh7C3t6dP\nnz7Ffu9ZIYT2ZAUpPeXt7c0H//uQ2JgYunbpwufz5hIeHo6TkxMODg55btfZpTRTVv1C6QruAKyd\n9zGdm9Rl0qRJBRVdE6u++45Ppk7jpVbt+SvgEs0aN2Tj+h8eK7j79+9n8JBXadSuC+H3gjBTsjly\n6CBmZmYaJRdCFCdPW0FKRrZ6qmPHjly+cP4/37Ozs8t3uw//Ijx6nZ2d9dQRYFGRnp7OxPcmMvvH\nvZSp4E56WirTXu3KsWPH8PT0/M+xb094lzc/+5I6zTxRVZXF745gw4YNerOFnRCieJJ7tiXM22+P\nZ/mUcZw+sIvt33/F5WPeDBw4UOtY+RIfH4+BkSFlHo7WTUzNKOdelYiIiMeOjXwQQYWqNQEICbxF\nfHwcGzduxN/f/6nth4SE8NGUKYx/+x18fHx08yaEEMWaFNsSZuonnzD5vXf468Q+TBIjOHn8eJHf\n9NvR0ZFy5VzZs3EV2VlZXDt/musXz9KoUaPHjm3brj1bv13En/4XmDWmLx6NmlOqen3atG3H2bNn\nHzv+/v37NGrcBP/gB8SZ2DF46DB+/PHHwnhbQohiRO7ZCr2UlZXFvn37iIyMpGXLllSpUuWZx9++\nfZt+Awbid+kiTi4urFuzhi5dujx2XExMDK8OG8ahQ4fp+8b7dB/2OgDeWzcQde0cv//263+Onz17\nNsf9/2TklDkAXD13iq3LZnEt4OkjYSFEySX3bEWRkZmZycs9evJXcAhl3Sox8f0P+OnHTXh5eT31\nnMqVK3Px/DkyMzMxMnr6X2t7e3v27NpFj169sXdyfvR9W0cngpOTHzs+KTkZK7t/JqTZODiSkvL4\ncUII8SxSbIXe2bp1K8FhD5i25ncMjYwI8D3J2Dfe5G5Q4HPPfVah/bfBAwcw6aOPcXAug4GhEb8u\nX8C0Tz567Li+ffrg1bUbbjVq4ehShp+WzGJQEb/HLYQofFJshd4JDQ3FrUZtDB8Wzsq16hF6P4RV\nq1YxcuRITExM8t3H4MGDiU9I4MtlM1FVmDxxAqNHjXrsuMaNG/PTpo18On0GiYlJ9OndixnTp+W7\nfyFEySL3bIXeOX36NK/07MVH326hdAV3Nn85j/NH/qB0ufLYmBri/cf+XI9ghRCiMD3tnq0UW6GX\nVn//Pe+++x6pqSm416jD+0tWY+tQilmjevLFws+fOPlJCCG09rRiK4/+CL302pgx3A+5h7GxCTN/\n2I69kwsGhoaUKlOOuLg4reMJIcQLkZFtPqWmpmJsbIyhoaHWUYqllq09sXOrTtehr3Pr8jk2LpqG\n36VLlCtX7oXbyszMZNOmTdy5c4dGjRrRrVs3HSQWQpRkMrItYNHR0XTo1BkbW1usrK1ZtHix1pGK\nlZiYGF7u0ZNzZ89yePtmPhnYiSNbVrNn1648Fdrs7Gx69enLwq++5eKdB7wx/h1mzZ6tg+RCCPE4\nGdnmUd/+A0hQzBg+eRbREWHMHzeY1d9+Q9euXbWOViz07N2HJENzXp04jftBt1k6cTS7d26nadOm\neWrv2LFjDBv1GrN/3IeRsTGxkRG837M1kQ8isLS0LOD0QoiSSka2BezE8eO8MnI8hkZGOJV1pVmX\n3hw/flzrWMXGQR8fBoz/CDMLSyp51KV5114cPnw4z+3FxsbiVLYcRsbGQM4iFmbm5iQkJBRQYiGE\neDoptnnkUqYMtwMuATmXKO9e96ds2bIapyo+HBwduXf7OgCqqnL/9g1KlSqV5/aaNm1K0PUATu7f\nTlzUA377djEVKlTAxcWloCILIcRTyWXkPDp58iQv9+hB7SatiQy9h62FKYd8vGVf1AKyc+dORowa\nTZMO3Qi7G4gpmfned9bX15fXXn+Du3fv0LBBI35YtyZP93+f59atW9y4cYOqVatSvXr1Am9fXwQH\nB5OYmEiVKlUwfnjFQIiSTp6z1YE7d+5w9OhRrK2t6datW4GsbCT+4e/vz6FDh3BwcKB///6Ymppq\nHem5vv12JR9PnUpljzr8de0K06d9yoR33tE6VoHKzs5m9Gtj2b59OxZW1thaW3Fg/z6dfHARoqiR\nYlvMZGVlcevWLYyMjKhcuXKR3wC+OAgPD6dq9erM2rAbF1c3Hty/x7Sh3Qi44l+sCtHatWtZ+OU3\nTF6+CVMzc35dsYj08CB27tiudTQhNCcTpIqRuLg4Wrb2pF3HzjRv1Zqu3V8mNTVV61glXkhICM5l\nXHFxdQPAqawrpcu7ERwcrHGygnX5sh8vtfHCzNwCRVFo0a0P/leuaB1LCL0mxbYI+vCjKViVdmPR\n9uMs2XGS2LRsPp8/H8i5Xzhr1ixmz57NX3/9pXHSkqVy5cpEPwjj2vlTANy45EvE/WCqVq2qcbKC\nVb16NQJOHSYzIx2Ai0cPUK1qNY1TCaHfZDX3IsjP35/2w97GwMAAAwMDGnfszuVzh7l8+TLtOnSg\nRdc+ZGdns6xpU44dOYKHh4fWkQtNZmYmVx6OsurUqVOoK3vZ2try8+bNDBg0CCMjYzLS0/hx0yYc\nHR1fuC1VVfl25UrWrF2HsbEx/5v0Ab169dJB6hc3duxY9h/w5sO+7bCxdyA1IY6DPt5axxJCr8k9\n2yJo9GtjCUvOYuikmajZ2Xz76bt4NqjNjVu3sHKvjdegnK3idq5bgUFMMBvXr9c4ceGIi4ujY2cv\nwh9Ekp2dTQXXcuzfuwdra+tCzZGWlkZYWBilS5fO86SulatWMXf+IoZOnkVaajI/zPuYHzduoFOn\nTgWcNm9UVcXPz4+kpCTq1asnC4MI8ZBMkCpGoqKiaNehI4nJqWRkpONW3pX9e/fQq09f6nbpT6O2\nXgCc2r+DwFN/sHP773nu6+//h0VhAtbb70zgekgkoz75HIDvZrxPw+ruLF60UONkL65Fq9Z4Dn6D\nei3aAnDgl/Vk3r/J+nXrNM0lhHg2mSBVjDg6OnLu7Bm2bFrP9l9/4cihg1haWtK7V09+X7mEuzev\nEXQjgO2rv6B3r5556kNVVWbOmoWNrR0WlpaMff0NMjIyCvidFKyr16/ToK3Xo8vrDdp6cfXaNa1j\n5YmpqSkpif+sbpWcEI+pif4/+iSEeDK5Z1tEmZiY0Lhx4/9876033yQuLo5vPnoDRVGY8PZ4Ro0c\nmaf2f/jhB37YtJm5m//AzMKCFZ+8w/QZM5k757MCSK8bdWrVwtdnN/VatgNV5ZzPHprVqa11rDz5\ncPIkXh02nJgH4aSmJOG9ZS2HDx7UOpYQIo/kMrJ4oiGvDsW6yku07TUIgGvnT7Nv9WLOnj6lcbKn\nS0hIoEu37gQG3UFVVapVrcKeXTuL7P3EY8eOsX7DRoyNjRn31pvUrl00PzgIUZI87TKyjGzFE7k4\nO3Pz9o1Hr+/9eR0nJycNEz2ftbU1Rw8f4saNGyiKQvXq1TEwKLp3Slq3bk3r1q21jiGEKAAyshVP\nFB4eTpOmzShXrRZm5hb4nTrM4YMHZXQlhBDPILORxQuLiYlh27ZtpKen0717d8qXL691JPGCzpw5\nw9zP55OcnMzAAf0ZM3p0kZhZLkRRJcW2GAkNDeXGjRu4ubnh7u6udZwiIyoqitDQUNzd3YvsfdwX\n4efnR9t27enz5iRsSzmx9ev5THrvHd4pZhsjCKFP5NGfYmLbtm141KrNO5M+okGjxixZulTrSEXC\n8uXf4F6pEi/36kPFSpU4ceKE1pF0bv2GDbTrO4wO/YbSqK0Xoz9dyIqV32kdS4gSSSZIFSHJycmM\nGj2GyV9vpJJHXaLCQ5k+rDsvd+9OtWqyNu3TBAQEMG3mTD77cR9OZctz8ZgPffr14/69e4W6nGNh\nUxSF7OysR6+zszKRK8hCaEOKbQHw8/Pj+vXr1KhRg7p16+qsn7CwMMwtLankkdOHo0sZ3KrV5Pbt\n21Jsn+Hq1atUr9cQp7I595zrt+7AqulpREVF4ezsrHE63Rk9ahQtW7fG0tYOO0cntn27hOmffqx1\nLCFKJLmMnE+LliyhQycvvlj9Ax06dWbh4sU666ts2bKkp6UScDbnEmhI4J8EXg+gZs2aOuuzOKha\ntSq3/C8SGxkB5DwzbGhokKcNAoqSmjVrcsjHBzUikHvnDrN4wTxeHztW61hClEgyQSof7t+/T02P\nWszZ/AeOLmWIjgjlk0FeOt0s/NChQ/QbMABLa1tioyL56qsvGTF8uE76Kk7mzp3HwsWLKevmTujd\nQDb/+COdO3fWOpYQopiRRS104P79+ziXdcXRpQwADs5lcC7ryv3793VWbNu1a8fdoCDu3LlD2bJl\nsbOz00k/xc3HH09h0KCBhISEUKNGDb1foEMIUbzIZeR8qFq1KjGR4fifPgqA/5ljREWE6nyzcEtL\nSzw8PDQptNeuXWP16tVs376drKys55+gRypVqkTr1q2fW2gzMjKY+uk0Wrb2pG//Ady6dauQEgoh\niiu5jJxPR44cod+AAaSnpWNsYszWn3+mbdu2j37+9y/u3Xv2YGdnz/x5c2jZsqV2gfNhx44djBw9\nhpdatud+4E0qupZl984dxW5G7+gxr3Hp+p90HfYmQdev4LNlDX6XLxXryVRCiIKR50UtFEUxBfoC\nFfnXZWdVVWc957wSUWwBsrKyiI6OxsHB4bHCM/7tdzh5wY++4yYTdjeQHxfP5MTxY0VyUlOZsuV4\nc+5yqtVrRFZmJnPH9mPmJx/Sv3//Amk/KyuLmzdvUrlyZUxMTAqkzbxkMLewYIX3JSyscjadXz5l\nHK8N6sPIPO6gJIQoOfKzqMV2oCeQCST960s8ZGhoiJOT0xNHeJu3bGbMpwupXOslWnbtTTOvnuzc\nuVODlPmjqiqRDyJwr1kHAEMjI8pX8yAsLKxA2l+yZAmW1jbUqVsXS2trhgwZUiDtvihFUTAwMCAz\nI/3R9zLS0ord6F0IUbhyU2xdVVUdqKrqAlVVF//9pfNkxYSpqRmJ8bGPXicnxGJmZqZhorxRFIXm\nLVuxbdVSsrOyuHvrGucO7y+QS+KJiYl8/MlURn44mw1nA5m68md+3fY7O3bsKIDkL8bAwIDx499m\n6cRRHN+zjR+XziY86BavvPJKoWcRQhQfuZmNfFJRlDqqqvrrPE0xNPWTj/ls8ut0HjyG8OBA/vI7\nz5C1K7WOlSdbfvqRvv0HMLJ5FSwsrVj+9Vc0aNAg3+0ePnwYQyMj2vQYAEC1eg1xr1mHTZs20aNH\nj3y3/6IWLphPpW+/5eChw7iVLs13p07KrG8hRL489Z6toij+gEpOQa4K/AWkAQqgqqr6zKWSStI9\n2+fZsWMHu/fsxd7ejonvvYeLi4vWkfIlPT0dY2PjAts95u7du1SuUoX5v/hQpoI7yYkJTOzRiv99\nMJGpU6cWSB9CCFEYXniClKIobs9qUFXVO8/pUIqtyLVWrVpz4dIlPBq14HbAJcxNTLgfEqx1LCGE\neCEvPEFKVdU7DwvqZ3//+d/f02VYUfIcP36Mz2bNxNYwkxGvDpZCK4QoVnLz6M8FVVUb/Ou1IeCv\nqqrHc86Tka0QQogS5YVHtoqiTFEUJQGoqyhK/MOvBCCCnMeBhBBCCJELuRnZzlNVdcoLNywjWyGE\nECVMXiZIPfOZDlVVLzynQym2QgghSpS8rCC1+OHXcuAMsAr47uGfl+siZHFz+fJlmrZoQZlyrvTo\n1ZuIiAitI2nqxIkTdOzsRZNmzZn3+edkZ2drHUkIIQrFs2Yjt1NVtR0QCjRQVbWRqqoNgfpASGEF\nLKoiIyPp1NmLlzr3ZcqqX1BtXej+Sg9K6mj/ypUrvNKjJ1Vbd8VrzPv88NMvTJ8+Q+tYQghRKHKz\nXGP1f68eparqFaDoraJfyE6dOkWFah607TkQp7LlGfzuJ9y6eZMHDx5oHU0Tv/zyC617DMTz5X54\nNGrOmGkLWbd+vdaxHpOZmcncz2bTpa0nI4YM4s6dZz5OLoQQuZKbYuunKMpqRVHaPvz6DvDTdbCi\nztramuiIMLIf7vmaEBtNWloaFhYWGifThrGxMWkp/+xfkZqUpNnOPs8yYdxb7FuzgtG2iZQLukjr\nZk2IjIzUOpYQoojLzWxkM+AtwPPht44CK1RVTX3OeSV6glRWVhZdunUnMjGFKvUac857N4P792Xu\nnJK5Hsi9e/do2KgxLV7uh2NpV/asX8H0qR/zxuuvax3tkaysLCzNzbk5pjV2ZsYADPvjOn0/nMXw\n4cM1TieEKAqeNkHquRsRPCyqSx9+iVwyNDRk984drFmzhjt37jD48zn07t1b61iacXV15fSpkyxe\nupT4e9dY+O0yAAAgAElEQVT4etkS+vTpg6qqxMbGYm1tjZFRbvbF0L1s/vmQmK2qBbYGtBCi5HrW\noz8/q6o64F8bEvyHbEQg8uvmzZu83KMn90Ny5tut+GY5w4YN0zTTO+Pe5Pze7YyrXZrLkUn8cjeB\nC/4BODg4aJpLCFE05OU52zKqqoY+bUMC2YhA5JdH7To07T6QzoNGcu+vm8x/azCHD/pQu3btXLex\nZcsWPp76KYkJCfTs2ZMvv1iWr/2Cs7KyWLZkMUe8/8ClbDmmz56Dq6trntsTQpQsL1xs/3XiGOCo\nqqq3XrBDKbbiqZKTk7F3cGDtyVuPLtOu/PRdRvXvwYgRI3LVxvHjx+ndtz/j5i3H0aUsGxZ+SiOP\naqz4ZjmqquLv709cXBz16tXDxsZGl29HCCGAvC1q8bcKwEpFUf5SFOUXRVHeURTlpYKPKEoSc3Nz\nLCws+evqZQDSUlIIun7lhUaRe/fupU2vwdSo3wSnsq4MmTiNnbt2kZ2dzZChw+jctTtvTJhIjZoe\nBAQE6OqtCKEpX19fRg4dwqsD++Pt7a11HPEUuZkgNR1AURRzYCwwGVgGGOo2mijOFEVhzferGfPa\nSGo2akbwreu0b9uG9u3b57oNW1tbIv1vPnodEXIXGxtbfvrpJy4FXGP+1oOYmJlz8NdNjHrtNc6e\nOqWLtyKEZnx9ffHq2J4+lS2wMFIY3G8fP/y4hW7dumkdTfw/ubmMPBVoCVgBF4HjwDFVVUOfc55c\nRhbP9eeff3L+/HnKli1Lq1atXmjmb3R0NI2bNMW1Rl3snctwbOfPrFvzPefOncP/fiz9x00GIOZB\nOFOHeBEtz8uKYmbk0CEYXvWhZ42cCXwn7sZzzqAiPkdPaJys5Mrzoz9AHyAT2A0cAU6pqppWwPlE\nCVWlShWqVKmSp3MdHBw453uWdevWER8fz9S9e2jUqBHp6els2PIp3Ya9jqW1Lcd3/0qd2nUKOLkQ\n2svMzMTc8J/f68aGCpkZmRomEk/z3JEtgKIoNuSMblsB/YEIVVVbPeccGdkKTaiqysT3P2DN2jXY\n2DlgZmLMgf37cHd31zqaEAXq4MGDDOzTk5G1bDAxVFgXkMD8ZV8zVONH6Eqy/MxGrg20BtoAjYBg\nci4jT3vOeVJshaZCQ0OJi4ujcuXKGBsbax1HCJ3Yu3cviz+fS2ZmBq+9OV4KrcbyU2x3kbNE43HA\nV1XVjFx2KMVWCCHECzl8+DAzPvmIhIQE+g4cxEdTPsHAIDcPzuiHPBfbfHQoxVYIIUSuXbx4kQ5t\nWjO6ji2O5kZsuJrAgNfGM2PWbK2j5Vp+nrMVQgghdO7nLZvp5GaOp5sNtZwteLOeLevXrdE6VoGQ\nYiuEEEIvmJqZkfKvydRJGdmYmphqF6gAyWVkUeLduXOHy5cvU758eerXr691HCFKrLt379Ko/ku0\nLWuEvakB228nM3/pl4wYOVLraLmWl40IdvKE3X7+pqpqj+d0KMVW6L1t27YxZsQwqjtbExiVxLDR\nY1i4WHaTFEIrgYGBfLFkMQkJ8fTpP5Du3btrHemF5KXYtnlWg6qqHnlOh1JshV7LzMzE0d6O6S2d\nqOJgRmJ6FpMOhbN9nzeNGzfWOp4Qogh64RWknldMhSjqYmJiQM2mikPOlnxWJoZUdrQgKChIiq0Q\nokA9d4KUoihVFUXZqijK1Yc7//ylKMpfhRFOCF1ydHTExsaGo0HxANyNS+NqWAJ169bVOJkQorjJ\nzWzktcAKctZHbgesBzbqMpQQhcHAwIAdu/eyOTCTMbvv8dGhUJZ9/Q3Vq1d/dIyqqnz1xRc0qV8X\nz+ZN2bdvn4aJhRBFVW6Krbmqqj7k3N+9o6rqDKBo3bEW4inq169PUHAIFwOu8SAqmmHDh//n518u\nW8qyuTPoYR9LC6N7vDqwH8ePH9cobe5s27aN6u5ulHZ0YMzwYSQnJ2sdSYgSLzfLNZ4kZwOCrcBB\nIAT4XFXV6s85TyZICZ1QVZUjR44QHh5O06ZNqVixos76ql+7JgNKJ1PL2QKA369FYdKoB99+t1pn\nfebH2bNnecWrI2s71qCirTlTTgZSqpEn36+Xi1FCFIb8bLH3LmABTABmA+2BEQUbT/x/ycnJXLp0\nCUtLS+rWrftC+7wWZ9nZ2Qwd2J+LJ45SzcGKt+9FsWHzz3Tp0kUn/ZmYmJCSkfjodUoWWJuZ6aSv\ngrBv3z5ereZMK9ec/U3ntXCn/bbdGqcSQjy32Kqq6gugKIoBMEFV1QSdpyrhAgMD6dTGExslk+jk\nNOo3bcYvv+/AyCg3n42Ktz179hBw+jhH+9bH1MiA4/eieW3EcO6FR+ikv48+ncHro4bTJymDxIxs\nDtxN4/j4t3XSV0Gws7PjcuI/e4UExaVgY22tYSIhBORuNnIjRVH8AT/AX1GUy4qiNNR9tJJr/Ngx\nDHWz5HCfepwb3IjY65dYtWqV1rH0QnBwMA2crDA1yvmr26ysHWGRUWRlZemkv969e/PT1m1keHTE\npllPjp06TY0aNXTSV0EYMWIE19ONGfXHNWae/JNR3teZJ4t0CKG53Nyz9QPGq6p67OHrVsA3qqo+\n8/kIuWebd1XdyvNjWzeqO1gB8NX5IMLrtOfL5d9onEx758+f5+VO7dnToy6V7Cz44sIddsWbcvbS\nZa2j6Y34+HjWrVtHTEwMXbt2pUmTJlpHEqLEyM+uP1l/F1oAVVWPk/MYkNCR2rXrsOVmBKqqkpSR\nya7gOOo1kIsJAA0bNmT2/EW03nKO8quOsvWBypZtv2sdS6/Y2NgwYcIEpk+fLoW2gGzevJnqlStS\noYwL77/7DhkZudrWW4hHcjOyXQaYAz+Rs1byQCCVh8/aqqp64Snnycg2j0JDQ/Fq35bE6EgSUtLo\n3qMHa9ZvLFIbKOtaRkYGCQkJ2Nvby+QxoVOHDx9mQK9XmNjIAVszQ1b7xdGh/wgWLl6idTShh/K8\nebyiKIee8WNVVdX2TzlPim0+ZGRkcPv2bSwsLKhQoYLWcYQosSa+O4GoI5vp5+EIQFBMKl9fz+Zm\n4B2Nkwl9lOdHf1RVbaebSOJZjI2N9XoiTkEKCQkhLCyMatWqYf2MmbNhYWEEBQVRqVIlnJ2dCzGh\nKMlsbO24mfrPwOFBciZWVjLDW7yY3MxGdlEU5XtFUfY+fO2hKMoY3UcT+igkJARfX1/i4uIKpL05\ns2ZRt0Z1RvXuTjX3ipw5c+aJx63/4QdqVavC+EF9qFmlMj9v2fLctnfu3MnkSZNYtmyZrKIk8mzc\n+PFcjjPgm/ORbPKP5JtLscxZsFjrWKKIyc1l5L3krI/8iaqq9RRFMQIuqqpa5znnyWXkYmbR/PnM\n/WwW5e1tCEtMZev2HbRu3TrP7Z0+fZoBL3flYJ+XcLY0Zdef4Xx8IYygkND/HBcaGkrt6lXZ1/sl\nqjtY4f8gnld2+PFn0F0cHBye2Pbnc+eyfMl82pYz4a8EyLR35ejJ05iamuY5ryi5wsPDWbNmDUmJ\nifTs1Ut2hRJPlZ8VpEqpqvqzoihTAFRVzVQURTcPNQq9dfHiRZbMn8vJQY0pa2WGd9ADBvTpRUj4\ngzxP3Lp27RotXR1wtswpgN0rOzNq3xWSk5OxsLB4dFxgYCCVHG0fPQpVx8mGMjaW3L375GKblZXF\nzFkz+dqrPE6Wxqiqyqcn7rFnzx569+6dp6yiZHNxcWHKlClaxxBFWG5+SyYpiuJIzkxkFEVpBhTM\nNURRZNy4cYOm5Rwpa5WzVGHHik4kJycTGxub5zZr1KjByZBoHiSnAbD3rweUdnLE3Nz8P8dVqlSJ\nv6LiuRqZs3jZxfA4whKScXNze2K7mZmZZGVlYW+e81lSURQczY1ITEx84vFCCKFruRnZvg/sACor\ninICcAL66TSV0DvVq1fnTEgUoYmplLEyw+dOJBYWFtjZ2eW5zebNmzNm3Ds0XbqE8vbWhCamsW3X\n7sce5SldujRfrfiWbm+9QTlbK+7HJfHduh+wt7d/Yrumpqa082zFqktX6V3VmltRqfiFJ9G2bds8\nZxVCiPx47j1bgIf3aasDCnBDVdXnPtEt92yLn4Wff87ncz+jvJ01oYmp/PL7djw9PfPd7t27dwkL\nC6N69erY2to+9bjIyEju3LmDu7v7U+/V/i02NpY3x47hxPFjlHYpzfJVq2WBByGEzr3wc7aKojQG\nglVVDXv4ejjQF7gDzFBVNfo5HUqxLYbu3btHaGgo1apVe2ZhFAJy7ssHBQXh4eHx1Mv+QhQneSm2\nF4COqqpGK4riCWwG3gFeAmqqqvrMS8lSbIUo2T6f+xlLFyygtos9fmHRfPXtKgYNHqx1LCF0Ki/F\n9rKqqvUe/nk58EBV1RkPX19SVfWl53QoxVaIYiYkJIRjx45ha2tLp06dnrrt4/Xr12nTrAnHBzTE\nxdKUgMgEum67REh4BJaWloWcWojCk5dHfwwVRTFSVTUT6AC8nsvzhBDF0JkzZ+ju1RkPZwseJKXj\n4l6dPw4exsTE5LFjg4KCqOVij8vDx7pqlbLGxsyEsLAwKleuXNjRhdDcsx79+Qk4oijKdiAF+HuL\nvSrIoz9ClDhvjR3NqFpWTGpkxzxPJ5Lu3WTNmjVPPNbDwwO/sGj8H8QDcCDwAemqgqura2FGFkJv\nPHWEqqrqHEVRfIAywB//uiZsQM69WyFECRJyP5QaLXNmgRsoClWsFe7dC37isRUqVGD5yu/o/toY\nrM1MyFAVtm7fISt4iRIrV4/+5KlhuWcrRLHS8+WuEHiBUXXtiUnJZNrxB6xc/xPdunV76jlJSUmE\nh4dTrlw5KbSiRMjzFnv56FCKrRDFSGRkJL1e6cb5C5dQgRnTZ/DRxx9rHUsIvSLFVhQJN2/eZMG8\nucTHxdBv0KsMGDBA60ji/0lISMDMzAxjY2Otowihd/KzEYEQhSIwMJCWzZrQubwpZcwN+GDcYSIj\nHzBu3PjnnhsVFYW/vz/Ozs54eHgUQtqS61l7DgshnkxGtkJvzJw5gwtbljPmpVIA3IxK4dsbWfwZ\n9ORJOH87efIkvV/uTmV7SwKjExg0dBhLvvzqsTWWC5OqqmzcuJFDf+zDuUxZJv3vQ0qVKvXEY5OS\nkrh37x5ly5bFz8+Pq1evUqNGjXxtXyiE0IaMbIXey8zIxPhfD6OZGChkZT1/N8dhAwfwRSt3ulV2\nJi4tg46/bKZ7z1507NhRh2mf7bNZM9m8cjmvezjjH3CKlr/8wtlLlx9b4nL//v28OnAAtqbGRMQn\nYGFqxEtlbLgSkczI199izrzPNXoHuZORkUFqaqqMdoV4jrxtRCqEDgwaPJiDwan8cTuWi6FJfH0p\nljGvv/nMc7Kysgi6fx8vdycAbE2NaVHWllu3buU5h6qq3Lx5k8uXL5Oenp6n8xfMn88v3TwYVac8\nS9pUo7JZNr///vt/jouPj+fVgf3Z1LkGu3vUhuwsFrYvy/j6dsxv68KKr78iKCgoz+9D1xYtXICN\nlRUuTo60aNyQiIgIrSMJobek2Aq9UatWLfb+4c1fNh54p7gwbvJUPpn66TPPMTQ0pGblSmy+fh+A\nsKQ0DgZHU6dOnTxlyMzMZGCf3rRt1pgBXTrSsG5tQkNDX6gNVVXJyMzCxuSfCUS2JkaPFe6goCCc\nLc1oXs6eB8npuFgaY2eWc7HJxtQQF1sLwsPD8/Q+dO3AgQMs/XwOX3cpz4+9K1E65R4jhw7ROpYQ\nekuKrdArTZs2Zc8BH46d9uW99z/I1X3Xn37dxvwrkTT46RyNN53hrYkf0KpVqzz1v2LFCiL8znL5\n1aacHdiAzvYw4c03XqgNAwMDBg3ox1if6/iGxvK9XzAHg2Po0qXLf45zdXUlND6JG9GJVLazIC41\ni+N348lWVU4FJxCVkknNmjXz9D7+/PNP2nu2pEIZF7p17khISEie2nmaEydO0LKsKU6WxhgoCj2r\n2nDm7FkSEhIY3L8v9jbWuJcvx2+//Vag/QpRVMk9W1Hk1alThxt/BfHXX39RqlQpnJyc8tzWVb/L\ndK9gi6lRzufQPpVL8frpKwBER0ejqiqOjo7PbWfl92uZOuVDPjzwB84upTlw+CfKly//n2McHBz4\ncvkKur4znprOdmQaGPHDtWSWnArDtYwLO/fsw8bG5oXfQ2JiIu3btKZzGRjYyIpDd6/QuX1bLgdc\ne+rGAS+qfPny7IjPJitbxdBA4XpkCmVKl2bsqBFEXD7GFx1KE5KQzthRw3Fzc6Nhw4YF0q8QRZXM\nRhbiX7744gt2fL2AzV1qYWKoMOdsILfsK2NkYsrefftQFOjQvj2bt/6GmZlZgfR59+5dbt68SeXK\nlXF3dyc9Pf2Ji/vn1vHjx3ljcG/mtc6Z/ayqKuMPhOFz4gzVq1cvkMwZGRl07dSB4JsBuFiZEBCR\nxI7de+nexYsvO5V5dDl83eUomgx7n48++qhA+hVC38lsZCFyYdy4cRzx/oMGP53A2tQEA0sbvJpW\n5/jvG1nXww0FWOzry6zp05g7f0GB9FmhQgUqVKjw6HV+Ci2ApaUlsSnpZGSpGBsqpGRmk5iajpWV\nVX6jPmJsbMw+74N4e3sTGxtLq1atcHV1xc7WhtCEdOzMjFBVlbAUFXt7+wLrV4iiSka2Qvw/qqpy\n7do1UlJSqF27Nr26d+WltJs0L5/zeMu5kEROqBXwOXrime3cvHmTu3fv4uHhQdmyZQsjOpCTv/cr\n3QnyO0NdB0POPcikpVdPvluzVud9b926lTfHjKJNeXNCk1USzRw55Xte9rAVJYaMbIXey8jIYMHn\n8zh36iRulaswbeYsHBwcCj2Hoij/WYXKvUoVrhwNoJmriqIo+Eel4964yjPbmDt7FosXLcDNwYqg\n6CTW/LCBXr166To6kJN/6+87WLNmDdevXeWj+g0YNmxYofTdr18/3Nzc8Pb2xs7OjuHDh0uh1Vhk\nZCT79u1DURS6desmVxo0IiNboTcG9+tLpN8ZhlYtxdHQeM6lmnDmwqUCuzeaV5GRkXi2aIZhShwG\nBgophhYcO3UGFxeXJx4fEBBA2xbNWNS+NPbmRtyKSmH2qUjCH0TJzjeiUAUFBdG6WVPqO5qTpapc\njc/g+BlfypUrp3W0YutpI1t59EfohaioKPbs3cNGr5r0rlaaJZ5VMU2O59ixY1pHo1SpUpy/7M/i\n7zexYNUGLvoHPLXQAty+fZsqTlbYm+dcOKrqaI6JgSKLPohCN23KhwyvbMfGzjX4yasmvV0tmTVt\nqtaxSiQptsVIQEAAA3v3pLNnK5YuXkR2drbWkXJNVVUUFAwfPlerKApGhorevAdzc3M6d+6Ml5fX\ncy+L1qpVixsRCdyLTwPg3P1EFCNjSpcu/Z/jVFXl6tWr+Pr6kpqaqrPsouQKCwmhvtM/E+Pql7Ik\nLOSeholKLim2xURQUBDtW7eifsxNxjqksOmLhUyfWnQ+wTo6OuLp6clr3tfxDopk2snbRKsmeV6c\nQkuVK1dmyZdf8dHhMMb9Ecq3/on8tn0nxsbGqKpKWlrao5WqOrduweg+r1CvZnXu3LmjdXSdOnny\nJI1fqkuFsqUZ8epgEhIStI70XLdv32bRokUsW7aMsLCwXJ0TExPD4cOH8fPzQ+tbaZ4dO/G1fyhx\naRnEpGawIiAczw6dNM1UUsk922Ji8eLFXN20nKVtqgHwV2wyXXdeITQyWuNkuZeSksKMT6fmTJCq\nVJm5Cxc9NhrML1VVmT1zBl8sXUpWdhajRo1i0dIvMDQ0LNB+IGft4/DwcMqXL4+ZmRk7duzgtZEj\niIlPoHwZFxyVDPb0fAlTIwMW+gZx0cKVXX94F3gOfRAYGEjDl+ryWh0bKtmbsfVGPJbVGrN91x6t\noz3VxYsX8Wrfjp7uDqRmqRwMTeSk7znc3Nyees758+fp2rkjZaxNCI9PoevLPahQwY09O37Hzs6e\nmZ8voEWLFoX2HjIzMxn/xuus27ABBYWxr41m2VfLdfL3XeSQzeOLuSVLluC34Su+bJuzaMGt6CRe\n2XOV+w+iNE6mX75fvZpl06ewyasmZkaGjPa+QddRb/LJp9N02u/t27dp1qA+m7t60LC0LV9fuMPK\nS3e4MtoTRVG4GZ3I4ENB3LpbPC/xrV69mi2LpvJOg5yZsGmZ2by67TYpqWl6+4u/Z1cv2qTf47W6\nOSt/zTp1m5R67Vi+ctVTz/GoVoVupVLwrGhDamY2k73vYWFswpdtqxEYl8Inp4M4eupMnpfhzKvM\nzEwURdHb/9bFiUyQKuYGDBjA/uB4Fp4N5NcboYw4cJ3xE97TOpbeObB7JxPqlKGirQWlLU2ZXL8c\nB3bv0nm/vr6+tKpQisZl7DBQFN5p4EZkcjrhyWmoqsqvfz7Ao1YtnefQipWVFdGpWY8uq8akZmJq\nYoyBgf7+CoqJiqKKncWj11VszYiNfvaH18A7wTQql3NP38zIgNqlTOhSwZ6mZe0ZVLMsA6s4abJe\ntJGRkRRajenv33TxQlxdXTl2+gz3Kzdmp1KW92bO5eMidM+2sDg6u3A9NuXR6xvRSTjmYy3l3Cpd\nujTXIuNJzczZn/dmTBKqotBqy3mabLnAjohMln/3vU4zqKrKxYsX8fHxITq6cG8v9OzZkywrJxb7\nRrE1IIpZJyOZPfuzXG00oZUuPXoy93wwd+JSuBGdyDK/ULq80vOZ59SqWZ1DQTn3ouNSMzkTkoSr\n9T+PrsVlZmv+KJvQhlxGFiVKcHAwLRo3ormzOWYGBuy/G8PBY8eppeNRpaqqDB88iAvHDvGSszXe\nQQ9YsOxLWrVqTVJSEjVq1Mj3Mo3P63/k0CEc2LcHF2szQhLS2bP/AI0aNXruuampqfj4+JCamkqb\nNm0oVapUnjIkJiaycuVKQu/fp32HDnTr1i1P7RSWrKwspvxvMj+sXYuRkSHvTZrEpMkfPvMDws2b\nN+nSsT3pyYnEJafh6enJtQu+vF27NIEJafwenMi5y37PfHRMFG1yz1aIhyIiIvj111/JzMykV69e\nj+3GoyuqqrJ//35CQkJo1KgR9erVK5R+AX799Vc+fud1PmtVClMjA47eiWdvpAUBN24987yEhATa\nt2qBUUIUtqbGXIlOxufo8QLb0KAwnTlzhv3792Nvb8/IkSOxtrbWST8ZGRkEBgZib2+Pk5MT27Zt\nY/fvv2Hr4MjEDybh6ur66Ni0tDQ+/XgKPvv34uTszNxFS2nQoIFOconCIcVWvLDY2Fi2bdtGeno6\n3bp1K7SiVFIkJCQw6b0JnDx2jHKuriz5+pv/LBNZkBYsWMCp9YsZVTdn+cuk9Cxe232XpJRnP987\na+YMrvyylu861kBRFL69dJcjRmXYfcBHJzl15eeff2bc2NG0LW9BRKpKpIEtZ85f1FnBza3XRg7n\n7smDfNjAlatRicw+dw/fS5efOeNZ6DeZICVeSEREBPXr1ub7OR+xdek06tetg5+fn2Z5UlJSSE9P\n16z/3FJVFR8fH3744QcCAgKeeezgfn2IO3uIbxo704kHdGzTWmerTNWrV4/z4WnEpWYC4BOUQG2P\n58+IDQ4MpKmz5aNLp03L2HIvOFgnGXXpww/eY3ITR4bXdWRSk1I4ZMWxYcMGTTOpqsqPm7ewqkM1\nGpexY0RtV7pUdGD37t2a5hK6IcVWPNGCz+dR2yqd/zVx4J0GDvSvas7/3n+30HMkJyfTt8fL2Nva\nYGNlxaSJ72m+UABAXFwcGzduZO3atY8WO1BVlVHDh/LakH5smPcRni2asmnjxieen5iYiM/hIyxv\nV416zjaMrVee+k7WHD58WCd5vby8GPXmeMbtu8eb++9z6IERm7b88tzzmnu2YdOtKKJT08nIyubb\nK6E0a9lSJxl1KS4+ARerf+6Ju5grxMXFaZgoh6mxMfFpmY9ex6Vn6fTevdCO7Pojnig87D5uVv88\nKlDR1oQzYeGFnmPK5Emot/0IfqMdSRmZ9P31J1bX9GDs668Xepa/RURE0KJxQ6pbGmBmaMDHkydx\n+MRJwsLCOLx/DwvbOmNqZMDdOHPefGMsgwYPfuyxi5zVpCA+PRNHcxNUVSUmNUOnM1Vnzp7DhPfe\nJyYmBjc3N4yNjZ97zqhRowjwu0yNb1ZgaKDQpnUrVi37UmcZdaVbt26s9T3IyNq23E9I53BwMh93\n7vxCbXh7e7Ni2RJUVWXs2xPo2rVrvjIpisKHUz5mwFdLeMPDhasxKQQkZvNDv375alfoJxnZiifq\n5NWNvXdSeZCUQWJ6Fr/eSqKjV/5+ueTFiaOHGV+nLKZGBjiYmzCiWilOHj1c6Dn+bd5ns+nsZMJP\nXjVZ27E679R25uNJ7xMaGoqbvRmmRjn/rCrYmpKdnf3EZQlNTU15d8I79N59hVWX7jDW+zoZ1g50\nfsEC8KIcHR2pUqVKrgot5BSExcu+IDY+nrAHkew54KP5fc68WLl6DW5NOvK/o5GsvQ1r1m+iYcOG\nuT7/4MGDvNq/Dx3Tg+mSFcLoIYMK5HLvh1OmMH3JVwS4NsCxY19OnbuAnZ1dvtsV+kdGtuKJhg0f\nTlBQIO8tmE9GZhaDBw7gs7nzALh37x4JCQkv9Es7r8qWc8U3PJjGZexQVRXfyGQqNK6g0z6fJ/x+\nCG0c/1nsoF4pa/YEhdKwYUP8QxO4FWVKFQczdt+Ko3y5ctja2j6xnc8XLsKjTl1OHTtCnXYV+X7i\nxDyNbJOTk9mzZw/Jycl07NhRJxvVm5mZFennQy0tLVn/4095Pn/lV1/waaMKvForZ2s6QwVWfrmM\n7t275yuXoigMHjyYwYMH56sdof9kNrJ4pr//Hz6cYce418fyy5bN2JqbYmZjx76Dh3U6S/nGjRu0\nb92S+k5WxKdlEmtsydFTZzT99L/y229ZOWc6P3f1wMzIkFHe12nS51XmfD6fbdu2MWrEcFJSU6ni\nXpHfd+2hatWqOssSHx9Pq2ZNMEqKwsbUkIDIVLwPHSnUx4r+LSMjAyMjo0JZrCIwMBA/Pz8qVKhA\n/Z0hCBkAACAASURBVPr1ddrXwN49aZ14m+G1cx7b+eX6fX7PcmZnMV3LWuTd02Yjo6qqTr5yms67\nrKwsNSoqSs3KyspXO6LgbNy4UW1Q3lm9N66DGvtuZ/WTltXUrh3a6bzfiIgIdfPmzepvv/2mJiUl\n6by/58nOzlY/mjxJNTc1UU2NjdVRw15V09LS/vPzwso5a+ZMtX2VUurvg6qr2wfXUMc3Ka22bdm8\nUPr+t5iYGNWrYzvV2NBQtTQ3U5cuXqTT/n7++We1lI2V6lXTTS3nYKt+OOl9nfbn7e2tOttZq990\nqq2u9KqjlrazUXft2qXTPkXR9LD2PVYT9XJke+jQIQb07fN/7d13QNXV/8fx57ksQYaAogIuFMWF\nigKaCq7MzJ2lllqZliMtM8s0c2ZZlpWWWdmyvuUeaeXMNDT33rJEQdl7Xz6/P+Br9fumCd7Lh/F+\n/GN87me8Lul93/M553MO2dlZ2Nvbs27j5lJdKUP8s1denop1yEamBjQEICIlkz4/XeDqjcq5KHpB\nQQGapuk65+y4Z8aQf3QzfZsUPj8bnpTNsksaF0IjSjXHow8PIPXsfsa0ciEhK485IfGs+G4VvXr1\nuutzREVFsWvXLuzs7Ojbty+2trb/uF9ubi5uri5s6eeLr5sjSdl5dFpzlI3bdhWrH/b69evExsbi\n7e2Nvb39v+7/3wFSBQUFPDPx+XseICUqpnLznG1iYiKPDBzApNYOfDegAWOa2jKgz0Okp6frHa3S\na9zEh90x6eTkFy7o/nN4PI0aNdI5lX4MBoPuk7sHd+vOrmu5JGblk2csYOOVDDoHdyn1HPv27uPh\nxg5YWShq2VsT7GHN3t9+u+vjjxw5QpuWLfjmzWm8NXUC9wW0u+2/+cTERKyUwtfNEQDnKlb41nQu\n1nrAb8ydS0ufxozo9yCNG9Tn8OHD/3pMjx49WLflJzb89IsUWlFsZa7YXrhwgZoONrSqVbhyRjsP\nexxsDISGhuqcTDz55JN4tm5Pux8O023DKT65lMwnX3yld6xyq6CggF9//ZU1a9YQVcKJIoYMGcLI\nZ59j7E+RDFsfimOTdiz+cKmJk/67mm5uhCYVzkalaRoR6VC7GAO1nh8/luFNq/JCW2dm3eeKU1Ys\nS5f+8/uoUaMGdvZVWXcxBoBz8Wkcvp6Ar6/vXV3rwIEDLP9wMYeGBbB/cGsWBnoydNDAu84KhZOs\nLFq0iEkTJrBq1aoy8ey3KNvK3GhkDw8PopMzSM5yopqtJXEZecSnZZl8EXFRfBYWFvywbj2nT58m\nLS2NVq1a3dXtN/G/CgoKeHTgAM4dPkBDF3vGX0tg9YZNdO3atVjnUUoxe+48Zs6ajdFYehMixMfH\nM/PVaYRfvkSbgEDe/XApjwwawJG4AuIz87Gp4cno0aPv+nw3btygUcvC0c5KKRo4KGKu//PavhYW\nFmzY8hMD+/Rm2v4wsvMLWLb807u+y3Lu3Dk613HBraoNAP0a1eTJn07h79ucMRMm8cyzz97x+Nzc\nXLoFdcIiIZLGTgamr/0Pp04c4403F971+xWVT5krtvXq1eOll19m6nvv4uNWlXM305k9Z56sklFG\nKKXuugUhbm/9+vVEnDjE3sFtsLYwsDMijjFPjOBKCRePP3nyJE+NeIyIq1G0atmSb/7zA/Xr1zdt\n6CLZ2dl07XQfHe2NjPJw4rstqzhz6iRHjp9kz549ODo60rdvX2xsbO76nEHBwWw4uIPxbaxIyTGy\n+1ou73frftv9/fz8CIu6zo0bN3B1dS3WY0k+Pj7Mu5ZIQlYurrbW/BwWh5udFa83tuPF16djbW3N\nk089ddvjd+7cSUpMJAs6V8egFF0b5DPmvcW8Pntusd6zqFzKXLEFmDFzFr169+HSpUs0a9ZMt8cY\nhDCXq1ev0q6GPdYWhT05HTycubb1dInOlZiYSK/7uzO8iS1+Pd3ZERZBrx7dOHvxsln6lA8cOIBN\ndhoLe/milKJbveo0+TIEW1tbRo0aVaJzfvjxJzw25BGGrduJhaUFM2fOZODAO9/atbCwwMPDo9jX\n6tixIyOfGYf/kg+pbmNBbFoWq/v7EeBejXmBRr786os7FtvMzEyqVbHEUPR4k721BQZVuIKPFFtx\nO2Wuz/a/2rZty7Bhw6TQigopMDCQLeHxRKZkoWkaH5+4RoBf6xKd6+jRo3g62tClvhOONpYM8qlG\ncmJ8ifuB/03haMs/f9Y00Iq2l5SDgwM//vQL6ZmZZGRmMX3Ga/ce9A7mvrGAI6fO0NAvgInt6hPg\nXvjcdlJ2Hjb/0koOCgriSlIO20OTuZqSw6cnEmkfEICjo6NZM4vyrcwWWwE///wzQYH++Ps25913\n3i6zgzAyMjI4ffo0cXFxekcpMU3TSnVVoY4dO/LK63No//1B6n22jy3JBr5dvbZE53J2diY2LZtc\nY+Eo8dQcI+nZubeduepetW/fHqN9NabsvcyWKzd5aud5goKCTNLVY21tjcFQOh9L9evXZ/5bb/Px\nmRu8cyiM94+EM+tQJFOm37nQu7m5sWvPXk5b1GPx6VxcW3dh/eYtpZJZlF9l8jlbASEhIQx66EHe\n7dwQ1ypWTDsQwfBJLzL15Wl6R/ubAwcO0H/AQOwcnUiIvcH8efN4ftKkOx4THh7O2rVrMRgMDB06\n9NatwLlz57Jnzx4aNmzIkiVLSm16wG3btvHk448Rl5RMs8aNWLNxc6ktjp6bm0taWhouLi4lbhlq\nmsbQwQ9z5uBvNK1mweHYXEaOHse8ouk1zSExMZHZr80g7PJF/AI6MOP118vtLdRTp07x+fJPMBrz\neWLUaAICAvSOBBQueLFt2zYsLCx46KGHzPblSZiWLB5fzkwcP44ap3fzQrsGAByKTmbq6SSOn7uo\nc7I/aZqGu6cnj0+dT9vg+4mPuc7cpwawc/svt739f/r0aboHdaZ/AxfyNI1tUan8fvAQz455mhMH\nQ+hS35GTNzLJsrIn4loMlpaWxMXFsX//fuzt7QkODsbS0nRDDa5evUpb3xZ8c78PHTycWXH6Gp+G\nZXA+NKzUWlimUFBQwKpVqwgLC6NNmzb07t1b70jiHly5coVOHQJpXM2S3AK4mWfFH4ePykDRcuB2\nxbZMDpASYFOlCim5xls/p+TkYW1dtloOycnJpKWm0Tb4fgCq1/bAp40/586du22xnfvadF5q7c7Y\n1oWLCSw8FM7M6dPY89tvrOjXCGdbS/JbaozbEsayZcsICgri/m5daOhsQ0JmHp6NmvLLzt0me8Tl\n2LFjtHN3oaNn4QxMY3zr8NbhEGJjY3V/3CwqKoqIiAi8vb3/NYvBYJDJ7O/g6tWrvLVgPonx8fTp\nP5DHhw8vlfmbS2raSy/Sq44Vg3ycAfjiZCJvzJ3Dhx99rHMyUVLl56t7JTN2/AS+vZzAGwdC+eR4\nJM/9doVXXp+td6y/cXJyooptFc4e3g9ASmI8l04dpXHjxrc9JikhAS+nP6fha+hYhdiYG1gZDFSr\nUjhy1tKgqFHVihs3bjB29FMM9bbl1QBnFgbVIP3qBT777DOTvYdatWpxMT6VzLzCLzahyRlk5eXp\nvszZ8mXLaN28KVOfGEqLJt6sXrVK1zzl2Y0bNwhs50d8yEZqXjvAzCkTWfTO23rHuqOoqKvk5xs5\nHpNBnrEALycLoq+ZZ8CbKB1SbMuoRo0a8fvBQ+T4P0Bow/Z8u3Y9gwYN0jvW3xgMBlb/8APLZkxg\n/qgBTB9yPxPGjbvj/LS9BwzkrWPXCE3O4FJiOu+ejGbI8BFYW1vx7al4krPy2RuZypXEbIYNG0ZU\n1DVauBUWZwuDoomTIiI83GTvITAwkO4P9aXr+hOM33OZ3ptO8d77H+i6nFxkZCQzXnmZ3YP92N6/\nJZv6tuTZ0U+TkpJS4vM9eH93vOp60PfBB7h+/bqJE5dtP/zwAy2dDQxv6Up3r2q85O/Cu2W42EZG\nRnLlSiiHozP47lQcU7dHsjU8i673m3etY2Fechu5DPP29ub9JaU/9V5xdOvWjUsXLnD+/Hnc3d3x\n8vK64/4vvDiFpMQken/6CRYGC557YTKjx4yhbbt2PNSzO5svhmJrY83yz7+gRYsW+Pv7szX0ME/5\nupCWYyTkRj7vdOhwx2scO3aMrVu34uDgwMiRI3FxcbntvkopPv3iK3bs2MHVq1d50c8PPz+/Ev0u\nTCU8PJzGbtVo4FS4Zm7LGo7UsLfl2rVrxR4kk5WVRffgznRwzqVPSzv2XTtJjy5BnDx7vtRmm9Jb\nfn4+1n9pVthYGsjPN97+AJ1NeX4ivRrYMaSZC5qmsWh/NHluDRk3foLe0cQ9kAFSokyLj4+n34MP\ncObsOfKMRl58cTLzF7x12/62n3/+meFDH6FrHVsScxQRuTYcPnYCV1fXUk5+Z1FRUWzfvh1bW1v6\n9ev3t2kvr1+/TqtmPmzp50uz6g4cjE5i2LbzhEddx8HBoVjXOXz4MI/1f5B3u9QACge1TdoVy9bd\ne2nRooVJ31NZFRYWhr9fax5tXBUPeytWXcrggUdHsui9xXpH+0ftWrVgsFsazWoUftnaFZZMbJ37\n+H7NOp2TibshA6REuVS9enVCDh0hMTERW1tb7Ozs7rj/tCkvMKGNM+3cC4vX0iPxLF++nOnTp5dG\n3Lty/PhxenXvRpc6ziRl57Fg9ix+P3T4Vj+xh4cHS5Yt58Fnn6GWox1xGdms/H5VsQstgJ2dHWnZ\nueQZC7CyMJBr1MjIyf3X32NF4uXlxc5ff+O1aVM5mZjI0GdHM236DBITE9m4cSNGo5E+ffpQu3Zt\nvaMCENihI9t2raexqy15Ro0913MZNaST3rHEPZKWrahQ6nvW5uXWtng6Fo7cXn02AfeeT/D2O+/o\nnOxPPYI6MqBKKiObFz5fPGH3Bbz6j2DO3Hl/2y8pKYlr165Rr169Es9OpGkaD/fvS/iJP2jjauBI\nvJEW93Xjux9Wl+nRuOYWHR1Ne/+21LctwNJCcSY+l337D5Ta89V3kp6ezsMD+rF//36MBRpDhwzh\nsy++1H05R3F3pGUrKoWH+vTjm23rGONbjfjMPLZHZrGmT5+7OtZoNJbKB9rNGzdp5e9262dfF1tC\no6P/Zz9nZ2ecnZ3v6VpKKVav38hnn33GudOneK51G55++ulKVWiNRiMGg+Fv73n+3NkEuBQwsmVh\nf/7Gi0lMf/kl1m360eTXz87OZu/evRiNRjp37vyvK2XZ29vzy45dJCYmYmlpKZNZVBAyGlmUOydP\nnmTBggV88MEHJCYm/u219z74kNY9+vPq7wksv2hk8UefEBwcfMfzHTp0CO96dbC2tqJpwwacOHHC\nnPEJ7taNd49fIzPPyPW0bFZciDPrSFNLS0vGjRvHko+X8cwzz1SaFlJcXBw9gjpRxcaG6tWc+Obr\nr2+9djM6mnoOf/4e6jtaE3vzhskzJCUlEeDXmslPP8arY5+gVfOmRP/DF6v/TymFq6urFNoKRG4j\ni3Jlx44dPP7Iwwxt7MaNrHyOpxbwx7HjJR4AlZKSgk9DL95uX4c+DWuy7lIMs4/FcCk80mz9mpmZ\nmTw9cgTrN2/GysKSaa9OY8bM1++ptalpGt+uXMn61d/jVM2Zaa+9jo+PjwlTlz+9e3SnQUokczp4\ncTkpg4FbzrB52w4CAgL4+OOP+HD+TKYFumJlULx7OJH+T41n1py5Js0w5YXnOffL94xtUzgd53dn\nkrBuHszK7+W56YrqdreRpWUrypXpUyazJLgR8zs24vMePnSoZmDZsmUlPt+5c+fwsLehv3ctLAyK\nR33ccbRUXL582YSp/87Ozo7v164jMyubtMxMXnt91j3f1v3w/fd5bcokvBJPop3dRecO7Qk34fPI\n5dFvISG86l8fawsDzas7MLBhDfbt2wfAuHHjGTj8aSb8EsXoLZH49+zPjJmvmzxD2JXLNHexuvX/\nt3l1a8LDQk1+HVH2SbEV5UpySsqt508BGthbk5yYUOLzubm5cTUpjcTswhV/4jJziEnJoEaNGvec\n9d9YWFiYrO/0g8WLeKGtM8H1nXi4qQv31bZi5cqVJjl3eVXT1YVTcakAFGgaZ5Kybs0trJTizbff\nIT0zi8zsbJZ/vsKkc27/V2DHTvx6PYec/ALyjBo7r2YR2OE+k19HlH0yQEqUK7379GXmjs0sDmrI\njfQcVlyI5eu5dzcA6p80bNiQp8aMocfKr+nkUY3friUxecoU3N3dTZja/AoKNIrWoWdfZCoHr6Zw\n7KMlODs58dykSZVqQNR/ffjJpzzx+DAe9HLjUnIWVet4MWTIkL/tY+7fy0tTX+b0yRM8uWkTBqUI\n6tyZeQveMus1RdkkfbaiXMnJyeGF58azYf16qtrZMWv+AkY+8cQ9n3f37t1cvHiR5s2bExQUZIKk\npeuN+fP4aul7tKtuwS+hybzQ3h0bS8Xykym8PGcB48aN1zuiLs6fP8/evXtxdXWlf//+WFlZ6ZIj\nKSkJo9GIq6trpfziU5nIEnvif6xatYp1//kWO3sHXnp1eqWZUagi0jSNj5cu5a35c+hfz4qejQon\nyDgWk87OzJr8fvCIzgnF7WiaRkREBEajES8vr3K1tKP4XzJASvzN5599xqsTx9EjJ5KGUcfo1rkT\nFy+WnbVyRfEopZgwcSIP9O5Dam7Bre2pOcZKNVtUeZOTk0O/Bx+gfZtWBPm3pYVPExYsWEBISIje\n0YSJScu2kvJt4s07rVzo4FE4acLskMvYdBvMm28t1DlZ6cjOzmbDhg2kpqbSvXt3GjVqpHckkzh7\n9ixB93WgR10bbCwUW8MzWbvxR7p27ap3NPEP5sx6nUOrvuTL+3148qeTXEpKo0kNO47G5jFz7htM\nmDhR74iimGQGKfE3BQUFWBn+/PtgbTBgLMMroZhSZmYmXTp2wC49gTr2Nrz2ylTWbNxMly5d9I52\nz5o3b87vfxzk00+WkZ+Xx0/LnyQwMFDvWOI2Th87ysNeLhyITuZiUhrv9KyHpUFxMz2X56e+xDNj\nx+rWzyxMS4ptJTV63ASeW/Qms/zrcDMjhxUXbrLr05F6xzKb3bt38/7CN8nLzaVmvQa4ZSfy3UPN\nUUrxUx0nXhg/lhPnLugd0ySaNm3K4g8+NOk58/LymDL5edauXo2trS1z5i9g+IgRJr1GZeTdtBnb\nfj7LA3Wc8HCwxrLoC7BbVSuUgoyMjFsLVIjyTYptJfX85MnY2tmx4j/fUtWhJj/+sgJfX1+9Y5nF\n77//ztBBA5gXWI+q9pZMXLOKZ1t63BoV2qKGA3EHruqcsmyb9vJL7P9xNbMCnUjOzmfKpPG4e3jQ\nrVs3vaOVazNen8UDe37ljSNhRCelcywmnabV7dh0KZkm3o3KTKHNzs7mu+++Iy4uji5dutC+fXu9\nI5U70mcrKrzRT4ygceQRxrapB8Cig6F8ciqKXwb74+lQhSl7r2Bs7Md/ZL3QW2JiYnh5ymTCQ68Q\n2OE+Nm3YwKTmltSvVgWAdecScA4awvsfLtE5afmXn5/PsWPHOHjwIO+9/RYxsXH4+7Xh+zXr8PT0\n1DseOTk5BHfsgDE+ijpVFfuuZ/Hehx8xYmTFvRN2L6TPVlRaymAgr+DPL35NqzvgWrM23dYdJzM7\nmwd79OCbz7/QMWHZkp6eTucOgbRxzKWnqzU7t3xPako2sRlOt4ptXLZGA2cXnZNWDJaWlgQEBBAQ\nEMDEMjggas2aNeTGRTHrvsJnhIPqZDNl8vNSbItJiq2o8J6dMJEHu2/AxkJR1cqS+UeiWPL5Fwwc\nOJCCgoJKswrO3QoJCaGqls2IFoWLOzR3s2PEpjA+Pp7MxYRcknM1LmZY8tWECTonFaUhKSmJ2lX/\nnFrU3dGa5NR0NE2TCTqKQZ6zFRVeu3bt2LJ9BydrtmRPVS8+XfkdgwYNQiklhfYfWFhYkGcs4L/d\nQAWahkEZWLtxM80Hj6PnmJc5euIUbm5u/3ImURF07dqVP66nc+pmBqk5+Xx1KokeXYOl0BaT9NmK\nMiMsLIyIiAiaNm1K7dq19Y5TaWVnZxPYtg3uxgSau1qx51oO3oFd+H71Wr2jCZ1s3bqVieOeISEp\nmS7BwXy18jucnZ31jlUmyXSNokx7Z+FbvPnGPOq52BORmMGKr75h0KBBeseqtJKSkpg3exbhoZfx\n79CRqS+/Is97CnEXpNiKMuvChQt0DGzHoq61cLWz4kpiNnNCYrkRG4+tra3e8YQQ4q7J3MiizAoL\nC6NhdXtc7QpbTo1cqmBrZcGNGzd0TiaEEKYhxVbormnTplyJSycqJQeAEzEZGDGUuzVlhRDiduTR\nH6G7Bg0a8P6Sj3huwjic7WzIzNdYu2ETNjY2ekcTQgiTkD5bUWakpKQQHR1NvXr1ZFk4IUS5JAOk\nhBBCCDOTAVJCCCGETqTYCiGEEGYmA6REhZWcnMzRo0dxdHSkXbt2Mr2cEEI30rIVFdLZs2dp0cSb\nWWOfZFifXgzu3xej0ah3rArDaDRy5MgRQkJCyMrK0juOECV29OhROga0o2FdT54a8ThpaWlmuY4M\nkBIVUkf/tgyplsuTLTzINRYwYMsZRr02n1GjRukdrdzLzs6m34MPEH7+LPY2VmRZ2LBrXwgeHh56\nRxOiWKKiomjTsgXDm1alkUsVNlxKw867LT/+9EuJzykDpESlEh4RQY96heutWlsYCKpZlSuXL+uc\nqmJ4f/FirGLCODjEj98GtaJ/LWsmTxivdyxhZklJScTFxVGRGlG7d++mZU1bujVwoq6TDeP9XNi2\nfSe5ubkmv5YUW1EhtfL1ZeW5GDRNIyk7jx+vpuDXtq3esSqEy+fPcr+HI5aGwo+PXvVcuXzpgs6p\nhLnk5+cz8vFheLrXomH9uvS6vzsZGRl6xzIJOzs7UnKMt75ApOQYsbC0wNLS9MOZpNiKCumzr1ey\nNUnRfOVBfL/ZT89HhvLwww/rHatCaOnXlo0RSWTlF35Irboci2/rNnrHEmbyweL3OL1vB1/2rc9X\nfeuRG3mGaVOn6B3LJPr06YPm4MbiwwlsOJ/AnJB4Zs58DYPB9KVR+mxFhWU0GomMjMTR0ZHq1avr\nHafCyM/PZ8TQIezcvg1baytq1anLzzt34+rqqnc0YQaPDuqPe8xhujVwAuBMbCabEqtx8NhJnZOZ\nRnp6OkuXLiXm+jWCu3a756U9ZQYpIYTJaJpGVFQUOTk5eHl5YWFhoXckYSavTH2J45u/YYKfC0op\nvj+bREGjDqxat0HvaGWSFFshhBDFlpKSQpdO95GTdBMbSwPJBdbs2/8Hnp6eekcrk6TYCiGEKJGc\nnBz27dtHfn4+HTt2xMHBQe9IZZYUWyGEEMLM5DlbIYTZyOxcorguXLjA9u3biY6O1jtKqZBiK4Qo\nsejoaDoG+mNjbU11ZyfWrFmjdyRRDsyZNZNOge14dexIWjRtwqZNm/SOZHZyG1kIUWIdA/3xzLrK\nkGbOhCdls+BAPHtCDtCyZUu9o4ky6uTJk9wf3Il3u9XCqYollxKymH8gnriEJKysrPSOd8/kNrIQ\nwqTy8/M5eOQYQ5s5Y2lQeLva4u/pwP79+/WOJsqw0NBQvGvY41SlcJamxq62GNCIj4/XOZl5SbEV\nQpSIhYUFTg72hCfnAGAs0LiakkvNmjVNcv6EhAQOHDjA9evXTXI+UTa0aNGCCzfTuJZa+Pfm4LU0\nbGxscXNz0zmZecltZCFEia1evZpxY0YR4OFAZGoe9Zu15seft93zJBdbt25lxGNDqeVoS3RyBvMX\nvMlzEyeZKLXQ2xdfrOD5ic/haGtDPhZs3voTgYGBescyCXn0R9xWSEgIH7yzkLzcXIY/PUbmEBbF\ncurUKfbv30/NmjXp16/fPRfarKws3Gu6Mb29K02q23IzPZdpe27yx9HjeHt7myi10FtqaiqxsbHU\nqVMHGxsbveOYzO2KremXNhDlysGDBxnQuxev+dfF3sqCyc+OJi8vj6FDh+odTZQTvr6++Pr6mux8\nMTExVLEy0KS6LQA17a3xqmHPlStXpNhWII6Ojjg6Ouodo9RIn20lt2L5Mia39uCplnV4xMedRR0b\nsGzxu3rHEpWYu7s7OfkaZ2MzAYhJyyUsPp3GjRvrnEyIkpOWbSWnaRp/vd+h1P/c/RCiVFWpUoUf\n1qxl2KODcbHLIDY1k0XvvU/Dhg31jiZEiUmxreSefnYcfR9YT1UrC+ytLZl9KJKFS5bpHUtUcj17\n9iQsMorQ0FA8PT0r/EhVUfHJACnB77//zgfvLCQ3J5sRo59l8ODBekcSQohySUYjCyGEEGYmM0gJ\nIYQQOpFiK4QwiQ0bNvDEY0MZ/8wYrly5onccIcoUKbZCiHv2xYoVTH5mFG1jT+N0eg+dAgOIiIjQ\nO5YQZYb02Qqhk9TUVGJiYqhbty62trZ6x7knzRt5sbhtDdq7OwPw6r5LVH9oOHPmzNU5mRClS/ps\nhShDVn7zDZ7utejRqT11PWqzd+9evSPdk7z8fOws/5ym0c7SQH5eXqlmOH/+PPcHd6Zx/bo8/uhg\nEhMTS/X6QtyJtGyFKGURERH4+bZgbmc36jrZcDwmg6UnU7kWc7PczhH7xry5rP10KXMC6hGTns1r\nByPZ+ds+WrVqVSrXT0hIwLdpE15sWZPOHs58eiaay1Xc2BNyQCZqEaVKWrZClBHnzp2jUQ176joV\nFtY2tatiiUZ0dLTOyUru1RmvMeL5qbxztYCNea5s2PJTqRVagP3799PMpSpjfOvg42rPoiBvTp06\nRUJCQqllEOJOZAYpIUqZl5cXofHpJGTa42pnRWhiNpl5+SZbB1YPBoOBF1+ayosvTdXl+nZ2dsRl\n5lCgaRiUIjknj9x8I1WqVNEljxD/nxRbIUqZj48Pr7w6gykL3qCea1UiEjL44qtvsLOz0ztauRUU\nFISTZ32G/3KO+2pWZXVYImPHjsXe3l7vaEIA0mcrhG6uXLlCREQETZs2xcPDQ+845V52djZLVLTb\nFgAABWRJREFUly7langYAR3u4/HHH5f+WlHqZLpGIYQQwsxkgJQQQgihEym2QgghhJlJsRVCCCHM\nTIqtEEIIYWZSbIUQQggzk2IrhBBCmJkUWyFEmZGWlsYff/wh6+GKCkeKrRCiTDhx4gQ+DRswfsgg\nOrbz44XnJiDP6ouKQia1EEKUCb4+TXiuvg1Dm7qTkpNHz42nePfzr+ndu7fe0YS4azKphRCiTLsY\nFka/RoWLMTjZWNHF3YkLFy7onEqUhtOnTxPcsT2N6tXhiceHkZqaqnckk5NiK4QoE5p5e7P20g0A\nErNz2X0tmebNm+ucSpjbzZs36R4cRMv8SF5oYUXMkV08MrCf3rFMTlb9EUKUCStXreahnj34+Fws\nN1MyGDN2LA888IDesYSZ7dmzh8auNvRsWA2AcW2sGbY+hIyMDKpWrapzOtORYiuEKBNatGjBhdBw\nLl26hKurK56ennpHEqXAzs6OlOx8NE1DKUVarhENsLa21juaSckAKSGEELrJycmhU/sAbNOi8XY0\nsCc6l6FPPcMbby7UO1qJyBJ7QgghyqSMjAyWLl1KVEQ4nYK7MGTIkHK7FrEUWyGEEMLM5NEfIYQQ\nQidSbIUQQggzk2IrhBBCmJkUWyGEEMLMpNgKIYQQZibFVgghhDAzKbZCCCGEmUmxFUIIIcxMiq0Q\nQghhZlJshRBCCDMz66o/5XVuSyGEEMKUzDY3shBCCCEKyW1kIYQQwsyk2AohhBBmJsVWCCGEMDMp\ntkKYiVJqhlLqjFLqpFLqmFLK38TnD1ZK/Xi3201wvf5KKZ+//PyrUsrP1NcRoiIy62hkISorpVR7\noDfQWtO0fKWUC2BthkvdboSjOUY+DgC2ABfMcG4hKjRp2QphHrWBeE3T8gE0TUvUNO0GgFLKTym1\nRyl1WCn1s1KqZtH2X5VS7yuljiulTiml2hVt91dK7VdKHVVK/a6U8r7bEEopO6XUCqXUH0XH9y3a\n/oRSal3R9S8qpRb+5Zini7b9oZT6VCm1RCnVAegHvF3USvcq2v1RpdRBpdQFpVRHU/zihKiIpNgK\nYR7bgbpFRegjpVQQgFLKElgCPKxpmj/wJbDgL8fZaprWBphQ9BrAeaCTpmltgVnAm8XIMQPYpWla\ne6AbsEgpZVv0WivgEcAXGKKU8lBK1QZeAwKAjoAPoGmadgDYDEzVNM1P07SwonNYaJoWCEwGZhcj\nlxCVitxGFsIMNE3LKOrP7ExhkftBKTUNOAq0AHaowllfDED0Xw79vuj4fUopB6WUI+AIfFPUotUo\n3r/bnkBfpdTUop+tgbpF/71L07R0AKXUWaAeUAPYo2laStH2NcCdWtLri/48WnS8EOIfSLEVwky0\nwhlj9gJ7lVKngZHAMeCMpmm3u+X6//taNWAesFvTtEFKqXrAr8WIoShsRV/+28bCPuWcv2wq4M/P\ng+JM/fbfcxiRzxMhbktuIwthBkqpxkqpRn/Z1BqIBC4CNYqKHUopS6VUs7/sN6RoeycgRdO0NMAJ\nuF70+lPFjLINmPSXXK3/Zf/DQJBSyqnolvfDf3ktjcJW9u3I/KxC3IYUWyHMwx74uujRnxNAU2C2\npml5wGBgYdH240CHvxyXrZQ6BnwMjCra9jbwllLqKMX/NzsPsCoacHUGmHub/TQATdOiKexDPgTs\nA8KBlKJ9fgCmFg208uKfW+FCiH8gcyMLUUYopX4FpmiadkznHFWL+pwtgA3ACk3TNumZSYjyTlq2\nQpQdZeWb72yl1HHgNBAmhVaIeyctWyGEEMLMpGUrhBBCmJkUWyGEEMLMpNgKIYQQZibFVgghhDAz\nKbZCCCGEmUmxFUIIIczs/wCB5ulx+0dRCQAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_rca, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Manual Constraints\n", - "\n", - "Some of the algorithms we've mentioned have alternate ways to pass constraints.\n", - "So far we've been passing them as just class labels - and letting the internals of metric-learn deal with creating our constrints.\n", - "\n", - "We'll be looking at one other way to do this - which is to pass a Matrix X such that - (a,b,c,d) indices into X, such that $d(X[a],X[b]) < d(X[c],X[d])$. \n", - "\n", - "This kind of input is possible for ITML and LSML.\n", - "\n", - "We're going to create these constraints through the labels we have, i.e $Y$.\n", - "\n", - "This is done internally through metric learn anyway (do check out the `constraints` class!) - but we'll try our own version of this. I'm going to go ahead and assume that two points labelled the same will be closer than two points in different labels.\n", - "\n", - "Do keep in mind that we are doing this method because we know the labels - we can actually create the constraints any way we want to depending on the data!" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def create_constraints(labels):\n", - " import itertools\n", - " import random\n", - " \n", - " # aggregate indices of same class\n", - " zeros = np.where(Y==0)[0]\n", - " ones = np.where(Y==1)[0]\n", - " twos = np.where(Y==2)[0]\n", - " # make permutations of all those points in the same class\n", - " zeros_ = list(itertools.combinations(zeros, 2))\n", - " ones_ = list(itertools.combinations(ones, 2))\n", - " twos_ = list(itertools.combinations(twos, 2))\n", - " # put them together!\n", - " sim = np.array(zeros_ + ones_ + twos_)\n", - " \n", - " # similarily, put together indices in different classes\n", - " dis = []\n", - " for zero in zeros:\n", - " for one in ones:\n", - " dis.append((zero, one))\n", - " for two in twos:\n", - " dis.append((zero, two))\n", - " for one in ones:\n", - " for two in twos:\n", - " dis.append((one, two))\n", - " \n", - " # pick up just enough dissimilar examples as we have similar examples\n", - " dis = np.array(random.sample(dis, len(sim)))\n", - " \n", - " # return a four-tuple of arrays with d(X[a],X[b])" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(X_itml, Y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And that's the result of ITML after being trained on our manual constraints! A bit different from our old result but not too different. We can also notice that it might be better to rely on the randomised algorithms under the hood to make our constraints if we are not very sure how we want our transformed space to be.\n", - "\n", - "RCA and SDML also have their own specific ways of taking in inputs - it's worth one's while to poke around in the constraints.py file to see how exactly this is going on.\n", - "\n", - "This brings us to the end of this tutorial!\n", - "Have fun Metric Learning :)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py new file mode 100644 index 00000000..fd6cff20 --- /dev/null +++ b/examples/plot_metric_learning_examples.py @@ -0,0 +1,485 @@ +""" +Algorithms walkthrough +~~~~~~~~~~~~~~~~~~~~~~ + +This is a small walkthrough which illustrates most of the Metric Learning +algorithms implemented in metric-learn by using them on synthetic data, +with some visualizations to provide intuitions into what they are designed +to achieve. +""" + +# License: BSD 3 clause +# Authors: Bhargav Srinivasa Desikan +# William de Vazelhes + +###################################################################### +# Imports +# ^^^^^^^ +# + +from sklearn.manifold import TSNE + +import metric_learn +import numpy as np +from sklearn.datasets import make_classification, make_regression + +# visualisation imports +import matplotlib.pyplot as plt +np.random.seed(42) + + +###################################################################### +# Loading our dataset and setting up plotting +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# We will be using a synthetic dataset to illustrate the plotting, +# using the function `sklearn.datasets.make_classification` from +# scikit-learn. The dataset will contain: +# - 100 points in 3 classes with 2 clusters per class +# - 5 features, among which 3 are informative (correlated with the class +# labels) and two are random noise with large magnitude + +X, y = make_classification(n_samples=100, n_classes=3, n_clusters_per_class=2, + n_informative=3, class_sep=4., n_features=5, + n_redundant=0, shuffle=True, + scale=[1, 1, 20, 20, 20]) + +########################################################################### +# Note that the dimensionality of the data is 5, so to plot the +# transformed data in 2D, we will use the t-sne algorithm. (See +# `sklearn.manifold.TSNE`). + + +def plot_tsne(X, y, colormap=plt.cm.Paired): + plt.figure(figsize=(8, 6)) + + # clean the figure + plt.clf() + + tsne = TSNE() + X_embedded = tsne.fit_transform(X) + plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, cmap=colormap) + + plt.xticks(()) + plt.yticks(()) + + plt.show() + +################################### +# Let's now plot the dataset as is. + + +plot_tsne(X, y) + +######################################################################### +# We can see that the classes appear mixed up: this is because t-sne +# is based on preserving the original neighborhood of points in the embedding +# space, but this original neighborhood is based on the euclidean +# distance in the input space, in which the contribution of the noisy +# features is high. So even if points from the same class are close to each +# other in some subspace of the input space, this is not the case when +# considering all dimensions of the input space. +# +# Metric Learning +# ^^^^^^^^^^^^^^^ +# +# Why is Metric Learning useful? We can, with prior knowledge of which +# points are supposed to be closer, figure out a better way to compute +# distances between points for the task at hand. Especially in higher +# dimensions when Euclidean distances are a poor way to measure distance, this +# becomes very useful. +# +# Basically, we learn this distance: +# :math:`D(x, x') = \sqrt{(x-x')^\top M(x-x')}`. And we learn the parameters +# :math:`M` of this distance to satisfy certain constraints on the distance +# between points, for example requiring that points of the same class are +# close together and points of different class are far away. +# +# For more information, check the :ref:`intro_metric_learning` section +# from the documentation. Some good reading material can also be found +# `here `__. It serves as a +# good literature review of Metric Learning. +# +# We will briefly explain the metric learning algorithms implemented by +# metric-learn, before providing some examples for its usage, and also +# discuss how to perform metric learning with weaker supervision than class +# labels. +# +# Metric-learn can be easily integrated with your other machine learning +# pipelines, and follows scikit-learn conventions. +# + + +###################################################################### +# Large Margin Nearest Neighbour +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# LMNN is a metric learning algorithm primarily designed for k-nearest +# neighbor classification. The algorithm is based on semidefinite +# programming, a sub-class of convex programming (as most Metric Learning +# algorithms are). +# +# The main intuition behind LMNN is to learn a pseudometric under which +# all data instances in the training set are surrounded by at least k +# instances that share the same class label. If this is achieved, the +# leave-one-out error (a special case of cross validation) is minimized. +# You'll notice that the points from the same labels are closer together, +# but they are not necessary in a same cluster. This is particular to LMNN +# and we'll see that some other algorithms implicitly enforce points from +# the same class to cluster together. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`LMNN +# ` + + +###################################################################### +# Fit and then transform! +# ----------------------- +# + +# setting up LMNN +lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) + +# fit the data! +lmnn.fit(X, y) + +# transform our input space +X_lmnn = lmnn.transform(X) + + +###################################################################### +# So what have we learned? The matrix :math:`M` we talked about before. + + +###################################################################### +# Now let us plot the transformed space - this tells us what the original +# space looks like after being transformed with the new learned metric. +# + +plot_tsne(X_lmnn, y) + + +###################################################################### +# Pretty neat, huh? +# +# The rest of this notebook will briefly explain the other Metric Learning +# algorithms before plotting them. Also, while we have first run ``fit`` +# and then ``transform`` to see our data transformed, we can also use +# ``fit_transform``. The rest of the examples and illustrations will use +# ``fit_transform``. + +###################################################################### +# Information Theoretic Metric Learning +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# ITML uses a regularizer that automatically enforces a Semi-Definite +# Positive Matrix condition - the LogDet divergence. It uses soft +# must-link or cannot like constraints, and a simple algorithm based on +# Bregman projections. Unlike LMNN, ITML will implicitly enforce points from +# the same class to belong to the same cluster, as you can see below. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`ITML +# ` + +itml = metric_learn.ITML_Supervised() +X_itml = itml.fit_transform(X, y) + +plot_tsne(X_itml, y) + + +###################################################################### +# Mahalanobis Metric for Clustering +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# MMC is an algorithm that will try to minimize the distance between similar +# points, while ensuring that the sum of distances between dissimilar points is +# higher than a threshold. This is done by optimizing a cost function +# subject to an inequality constraint. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`MMC +# ` + +itml = metric_learn.ITML_Supervised() +X_itml = itml.fit_transform(X, y) + +plot_tsne(X_itml, y) + +###################################################################### +# Sparse Determinant Metric Learning +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Implements an efficient sparse metric learning algorithm in high +# dimensional space via an :math:`l_1`-penalized log-determinant +# regularization. Compared to the most existing distance metric learning +# algorithms, the algorithm exploits the sparsity nature underlying the +# intrinsic high dimensional feature space. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`SDML +# ` + +sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015) +X_sdml = sdml.fit_transform(X, y) + +plot_tsne(X_sdml, y) + + +###################################################################### +# Least Squares Metric Learning +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# LSML is a simple, yet effective, algorithm that learns a Mahalanobis +# metric from a given set of relative comparisons. This is done by +# formulating and minimizing a convex loss function that corresponds to +# the sum of squared hinge loss of violated constraints. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`LSML +# ` + +lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000) +X_lsml = lsml.fit_transform(X, y) + +plot_tsne(X_lsml, y) + + +###################################################################### +# Neighborhood Components Analysis +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# NCA is an extremly popular metric learning algorithm. +# +# Neighborhood components analysis aims at "learning" a distance metric +# by finding a linear transformation of input data such that the average +# leave-one-out (LOO) classification performance of a soft-nearest +# neighbors rule is maximized in the transformed space. The key insight to +# the algorithm is that a matrix :math:`A` corresponding to the +# transformation can be found by defining a differentiable objective function +# for :math:`A`, followed by use of an iterative solver such as +# `scipy.optimize.fmin_l_bfgs_b`. Like LMNN, this algorithm does not try to +# cluster points from the same class in a unique cluster, because it +# enforces conditions at a local neighborhood scale. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`NCA +# ` + +nca = metric_learn.NCA(max_iter=1000) +X_nca = nca.fit_transform(X, y) + +plot_tsne(X_nca, y) + +###################################################################### +# Local Fisher Discriminant Analysis +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# LFDA is a linear supervised dimensionality reduction method. It is +# particularly useful when dealing with multimodality, where one ore more +# classes consist of separate clusters in input space. The core +# optimization problem of LFDA is solved as a generalized eigenvalue +# problem. Like LMNN, and NCA, this algorithm does not try to cluster points +# from the same class in a unique cluster. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`LFDA +# ` + +lfda = metric_learn.LFDA(k=2, num_dims=2) +X_lfda = lfda.fit_transform(X, y) + +plot_tsne(X_lfda, y) + + +###################################################################### +# Relative Components Analysis +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# RCA is another one of the older algorithms. It learns a full rank +# Mahalanobis distance metric based on a weighted sum of in-class +# covariance matrices. It applies a global linear transformation to assign +# large weights to relevant dimensions and low weights to irrelevant +# dimensions. Those relevant dimensions are estimated using "chunklets", +# subsets of points that are known to belong to the same class. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`RCA +# ` + +rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2) +X_rca = rca.fit_transform(X, y) + +plot_tsne(X_rca, y) + +###################################################################### +# Regression example: Metric Learning for Kernel Regression +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# The previous algorithms took as input a dataset with class labels. Metric +# learning can also be useful for regression, when the labels are real numbers. +# An algorithm very similar to NCA but for regression is Metric +# Learning for Kernel Regression (MLKR). It will optimize for the average +# leave-one-out *regression* performance from a soft-nearest neighbors +# regression. +# +# - See more in the :ref:`User Guide ` +# - See more in the documentation of the class :py:class:`MLKR +# ` +# +# To illustrate MLKR, let's use the dataset +# `sklearn.datasets.make_regression` the same way as we did with the +# classification before. The dataset will contain: 100 points of 5 features +# each, among which 3 are informative (i.e., used to generate the +# regression target from a linear model), and two are random noise with the +# same magnitude. + +X_reg, y_reg = make_regression(n_samples=100, n_informative=3, n_features=5, + shuffle=True) + +###################################################################### +# Let's plot the dataset as is + +plot_tsne(X_reg, y_reg, plt.cm.Oranges) + +###################################################################### +# And let's plot the dataset after transformation by MLKR: +mlkr = metric_learn.MLKR() +X_mlkr = mlkr.fit_transform(X_reg, y_reg) +plot_tsne(X_mlkr, y_reg, plt.cm.Oranges) + +###################################################################### +# Points that have the same value to regress are now closer to each +# other ! This would improve the performance of +# `sklearn.neighbors.KNeighborsRegressor` for instance. + + +###################################################################### +# Metric Learning from Weaker Supervision +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# To learn the metric, so far we have always given the labels of the +# data to supervise the algorithms. However, in many applications, +# it is easier to obtain information about whether two samples are +# similar or dissimilar. For instance, when annotating a dataset of face +# images, it is easier for an annotator to tell if two faces belong to the same +# person or not, rather than finding the ID of the face among a huge database +# of every person's faces. +# Note that for some problems (e.g., in information +# retrieval where the goal is to rank documents by similarity to a query +# document), there is no notion of individual label but one can gather +# information on which pairs of points are similar or dissimilar. +# Fortunately, one of the strength of metric learning is the ability to +# learn from such weaker supervision. Indeed, some of the algorithms we've +# used above have alternate ways to pass some supervision about the metric +# we want to learn. The way to go is to pass a 2D array `pairs` of pairs, +# as well as an array of labels `pairs_labels` such that for each `i` between +# `0` and `n_pairs` we want `X[pairs[i, 0], :]` and `X[pairs[i, 1], :]` to be +# similar if `pairs_labels[i] == 1`, and we want them to be dissimilar if +# `pairs_labels[i] == -1`. In other words, we +# want to enforce a metric that projects similar points closer together and +# dissimilar points further away from each other. This kind of input is +# possible for ITML, SDML, and MMC. See :ref:`weakly_supervised_section` for +# details on other kinds of weak supervision that some algorithms can work +# with. +# +# For the purpose of this example, we're going to explicitly create these +# pairwise constraints through the labels we have, i.e. `y`. +# Do keep in mind that we are doing this method because we know the labels +# - we can actually create the constraints any way we want to depending on +# the data! +# +# Note that this is what metric-learn did under the hood in the previous +# examples (do check out the +# `constraints` module!) - but we'll try our own version of this. We're +# going to go ahead and assume that two points labeled the same will be +# closer than two points in different labels. + + +def create_constraints(labels): + import itertools + import random + + # aggregate indices of same class + zeros = np.where(y == 0)[0] + ones = np.where(y == 1)[0] + twos = np.where(y == 2)[0] + # make permutations of all those points in the same class + zeros_ = list(itertools.combinations(zeros, 2)) + ones_ = list(itertools.combinations(ones, 2)) + twos_ = list(itertools.combinations(twos, 2)) + # put them together! + sim = np.array(zeros_ + ones_ + twos_) + + # similarily, put together indices in different classes + dis = [] + for zero in zeros: + for one in ones: + dis.append((zero, one)) + for two in twos: + dis.append((zero, two)) + for one in ones: + for two in twos: + dis.append((one, two)) + + # pick up just enough dissimilar examples as we have similar examples + dis = np.array(random.sample(dis, len(sim))) + + # return an array of pairs of indices of shape=(2*len(sim), 2), and the corresponding labels, array of shape=(2*len(sim)) + # Each pair of similar points have a label of +1 and each pair of dissimilar points have a label of -1 + return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]), np.column_stack([dis[:, 0], dis[:, 1]])]), + np.concatenate([np.ones(len(sim)), -np.ones(len(sim))])) + +pairs, pairs_labels = create_constraints(y) + + +###################################################################### +# Now that we've created our constraints, let's see what it looks like! +# + +print(pairs) +print(pairs_labels) + + +###################################################################### +# Using our constraints, let's now train ITML again. Note that we are no +# longer calling the supervised class :py:class:`ITML_Supervised +# ` but the more generic +# (weakly-supervised) :py:class:`ITML `, which +# takes the dataset `X` through the `preprocessor` argument (see +# :ref:`this section ` of the documentation to learn +# about more advanced uses of `preprocessor`) and the pair information `pairs` +# and `pairs_labels` in the fit method. + +itml = metric_learn.ITML(preprocessor=X) +itml.fit(pairs, pairs_labels) + +X_itml = itml.transform(X) + +plot_tsne(X_itml, y) + + +###################################################################### +# And that's the result of ITML after being trained on our manually +# constructed constraints! A bit different from our old result, but not too +# different. +# +# RCA and LSML also have their own specific ways of taking in inputs - +# it's worth one's while to poke around in the constraints.py file to see +# how exactly this is going on. +# +# Finally, one of the main advantages of metric-learn is its out-of-the box +# compatibility with scikit-learn, for doing `model selection +# `__, +# cross-validation, and scoring for instance. Indeed, supervised algorithms are +# regular `sklearn.base.TransformerMixin` that can be plugged into any +# pipeline or cross-validation procedure. And weakly-supervised estimators are +# also compatible with scikit-learn, since their input dataset format described +# above allows to be sliced along the first dimension when doing +# cross-validations (see also this :ref:`section `). You +# can also look at some :ref:`use cases ` where you could combine +# metric-learn with scikit-learn estimators. + +######################################################################## +# This brings us to the end of this tutorial! Have fun Metric Learning :) From efba316bbd9b6c3fc8c4bfafee564a87bdf1128a Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 5 Jun 2019 12:40:36 +0200 Subject: [PATCH 114/210] [MRG] Use pseudo-inverse in Covariance (#206) * FIX: fix covariance algo * some fixes and add non regression test * Use size instead of len * Address https://github.com/metric-learn/metric-learn/pull/206#pullrequestreview-240810281 --- metric_learn/covariance.py | 9 +++++---- test/metric_learn_test.py | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 7a04923d..83d2f9d8 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -10,6 +10,7 @@ from __future__ import absolute_import import numpy as np +import scipy from sklearn.base import TransformerMixin from .base_metric import MahalanobisMixin @@ -35,11 +36,11 @@ def fit(self, X, y=None): y : unused """ X = self._prepare_inputs(X, ensure_min_samples=2) - M = np.cov(X, rowvar = False) - if M.ndim == 0: - M = 1./M + M = np.atleast_2d(np.cov(X, rowvar=False)) + if M.size == 1: + M = 1. / M else: - M = np.linalg.inv(M) + M = scipy.linalg.pinvh(M) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 06da087a..0ba1fdbe 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,7 +6,8 @@ from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.datasets import load_iris, make_classification, make_regression -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import (assert_array_almost_equal, assert_array_equal, + assert_allclose) from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y @@ -53,6 +54,23 @@ def test_iris(self): # deterministic result self.assertAlmostEqual(csep, 0.72981476) + def test_singular_returns_pseudo_inverse(self): + """Checks that if the input covariance matrix is singular, we return + the pseudo inverse""" + X, y = load_iris(return_X_y=True) + # We add a virtual column that is a linear combination of the other + # columns so that the covariance matrix will be singular + X = np.concatenate([X, X[:, :2].dot([[2], [3]])], axis=1) + cov_matrix = np.cov(X, rowvar=False) + covariance = Covariance() + covariance.fit(X) + pseudo_inverse = covariance.get_mahalanobis_matrix() + # here is the definition of a pseudo inverse according to wikipedia: + assert_allclose(cov_matrix.dot(pseudo_inverse).dot(cov_matrix), + cov_matrix) + assert_allclose(pseudo_inverse.dot(cov_matrix).dot(pseudo_inverse), + pseudo_inverse) + class TestLSML(MetricTestCase): def test_iris(self): From 3899653be835598a9d2b03e3edb82c11ecddb2ff Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 7 Jun 2019 11:50:13 +0200 Subject: [PATCH 115/210] [MRG] Uniformize num_dims to n_components and add it for LMNN (#193) * Uniformize num_dims and add it for LMNN * MAINT: fix imports * Fix: fix test_num_dims * MAINT: Address https://github.com/metric-learn/metric-learn/pull/193#pullrequestreview-228280763 * Refactor num_dims in n_components and add deprecation * FIX make some tests work * FIX Make tests work (fix deprecation messages and fix RCA example) * Remove unused import * Revert "Remove unused import" This reverts commit 81c9a8d1a7b4aca1db58153d6dd2b85dceffe0c7. * Fix import * FIX fix some tests * Allow more general sign switching in test_lfda --- metric_learn/_util.py | 10 ++ metric_learn/base_metric.py | 6 +- metric_learn/covariance.py | 2 +- metric_learn/itml.py | 8 +- metric_learn/lfda.py | 33 ++++--- metric_learn/lmnn.py | 28 +++++- metric_learn/lsml.py | 6 +- metric_learn/mlkr.py | 27 ++++-- metric_learn/mmc.py | 6 +- metric_learn/nca.py | 34 ++++--- metric_learn/rca.py | 55 ++++++----- metric_learn/sdml.py | 6 +- test/metric_learn_test.py | 103 ++++++++++++++++++--- test/test_base_metric.py | 61 ++++++++++-- test/test_fit_transform.py | 16 ++-- test/test_mahalanobis_mixin.py | 7 +- test/test_sklearn_compat.py | 8 +- test/test_transformer_metric_conversion.py | 6 +- test/test_utils.py | 26 +++++- 19 files changed, 327 insertions(+), 121 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 105d89b5..583f1105 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -411,3 +411,13 @@ def validate_vector(u, dtype=None): if u.ndim > 1: raise ValueError("Input vector should be 1-D.") return u + + +def _check_n_components(n_features, n_components): + """Checks that n_components is less than n_features and deal with the None + case""" + if n_components is None: + return n_features + if 0 < n_components <= n_features: + return n_components + raise ValueError('Invalid n_components, must be in [1, %d]' % n_features) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 9f127f58..856591cb 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -172,7 +172,7 @@ class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ @@ -232,7 +232,7 @@ def transform(self, X): Returns ------- - X_embedded : `numpy.ndarray`, shape=(n_samples, num_dims) + X_embedded : `numpy.ndarray`, shape=(n_samples, n_components) The embedded data points. """ X_checked = check_input(X, type_of_inputs='classic', estimator=self, @@ -288,7 +288,7 @@ def get_mahalanobis_matrix(self): Returns ------- - M : `numpy.ndarray`, shape=(n_components, n_features) + M : `numpy.ndarray`, shape=(n_features, n_features) The copy of the learned Mahalanobis matrix. """ return self.transformer_.T.dot(self.transformer_) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 83d2f9d8..7f606921 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -22,7 +22,7 @@ class Covariance(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ diff --git a/metric_learn/itml.py b/metric_learn/itml.py index e3ff515a..25518bf6 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -150,7 +150,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -218,7 +218,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ @@ -292,11 +292,11 @@ def fit(self, X, y, random_state=np.random, bounds=None): if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0', DeprecationWarning) + ' removed in 0.6.0', DeprecationWarning) if self.bounds != 'deprecated': warnings.warn('"bounds" parameter from initialization is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use the "bounds" parameter of this ' + ' removed in 0.6.0. Use the "bounds" parameter of this ' 'fit method instead.', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 2ca085d4..1851a734 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -16,6 +16,8 @@ from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.base import TransformerMixin + +from ._util import _check_n_components from .base_metric import MahalanobisMixin @@ -26,23 +28,29 @@ class LFDA(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. ''' - def __init__(self, num_dims=None, k=None, embedding_type='weighted', - preprocessor=None): + def __init__(self, n_components=None, num_dims='deprecated', + k=None, embedding_type='weighted', preprocessor=None): ''' Initialize LFDA. Parameters ---------- - num_dims : int, optional - Dimensionality of reduced space (defaults to dimension of X) + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. k : int, optional Number of nearest neighbors used in local scaling method. - Defaults to min(7, num_dims - 1). + Defaults to min(7, n_components - 1). embedding_type : str, optional Type of metric in the embedding space (default: 'weighted') @@ -56,6 +64,7 @@ def __init__(self, num_dims=None, k=None, embedding_type='weighted', ''' if embedding_type not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid embedding_type: %r' % embedding_type) + self.n_components = n_components self.num_dims = num_dims self.embedding_type = embedding_type self.k = k @@ -72,17 +81,17 @@ def fit(self, X, y): y : (n,) array-like Class labels, one per point of data. ''' + if self.num_dims != 'deprecated': + warnings.warn('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead', + DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) unique_classes, y = np.unique(y, return_inverse=True) n, d = X.shape num_classes = len(unique_classes) - if self.num_dims is None: - dim = d - else: - if not 0 < self.num_dims <= d: - raise ValueError('Invalid num_dims, must be in [1,%d]' % d) - dim = self.num_dims + dim = _check_n_components(d, self.n_components) if self.k is None: k = min(7, d - 1) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index d70ca3d0..1ba87684 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -19,6 +19,8 @@ from six.moves import xrange from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin + +from ._util import _check_n_components from .base_metric import MahalanobisMixin @@ -26,7 +28,8 @@ class _base_LMNN(MahalanobisMixin, TransformerMixin): def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, - verbose=False, preprocessor=None): + verbose=False, preprocessor=None, n_components=None, + num_dims='deprecated'): """Initialize the LMNN object. Parameters @@ -40,6 +43,15 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. """ self.k = k self.min_iter = min_iter @@ -49,6 +61,8 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, self.convergence_tol = convergence_tol self.use_pca = use_pca self.verbose = verbose + self.n_components = n_components + self.num_dims = num_dims super(_base_LMNN, self).__init__(preprocessor) @@ -56,20 +70,26 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, class python_LMNN(_base_LMNN): def fit(self, X, y): + if self.num_dims != 'deprecated': + warnings.warn('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead', + DeprecationWarning) k = self.k reg = self.regularization learn_rate = self.learn_rate X, y = self._prepare_inputs(X, y, dtype=float, ensure_min_samples=2) - num_pts, num_dims = X.shape + num_pts, d = X.shape + output_dim = _check_n_components(d, self.n_components) unique_labels, label_inds = np.unique(y, return_inverse=True) if len(label_inds) != num_pts: raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) if self.use_pca: warnings.warn('use_pca does nothing for the python_LMNN implementation') - self.transformer_ = np.eye(num_dims) + self.transformer_ = np.eye(output_dim, d) required_k = np.bincount(label_inds).min() if self.k > required_k: raise ValueError('not enough class labels for specified k' @@ -272,7 +292,7 @@ class LMNN(_base_LMNN): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 1d66cbc0..94366b88 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -146,7 +146,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ @@ -182,7 +182,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ @@ -241,7 +241,7 @@ def fit(self, X, y, random_state=np.random): if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0', DeprecationWarning) + ' removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 927c64e3..762317b9 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -23,6 +23,8 @@ from sklearn.metrics import pairwise_distances + +from metric_learn._util import _check_n_components from .base_metric import MahalanobisMixin EPS = np.finfo(float).eps @@ -36,19 +38,25 @@ class MLKR(MahalanobisMixin, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ - def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000, - verbose=False, preprocessor=None): + def __init__(self, n_components=None, num_dims='deprecated', A0=None, + tol=None, max_iter=1000, verbose=False, preprocessor=None): """ Initialize MLKR. Parameters ---------- - num_dims : int, optional - Dimensionality of reduced space (defaults to dimension of X) + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. A0: array-like, optional Initialization of transformation matrix. Defaults to PCA loadings. @@ -66,6 +74,7 @@ def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000, The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. """ + self.n_components = n_components self.num_dims = num_dims self.A0 = A0 self.tol = tol @@ -82,6 +91,11 @@ def fit(self, X, y): X : (n x d) array of samples y : (n) data labels """ + if self.num_dims != 'deprecated': + warnings.warn('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead', + DeprecationWarning) X, y = self._prepare_inputs(X, y, y_numeric=True, ensure_min_samples=2) n, d = X.shape @@ -90,7 +104,8 @@ def fit(self, X, y): % (n, y.shape[0])) A = self.A0 - m = self.num_dims + m = _check_n_components(d, self.n_components) + m = self.n_components if m is None: m = d if A is None: diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index eb7dc529..0e6cd5cb 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -356,7 +356,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -406,7 +406,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ @@ -469,7 +469,7 @@ def fit(self, X, y, random_state=np.random): if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0', DeprecationWarning) + ' removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 7139f0ff..3545aa89 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -23,6 +23,7 @@ from sklearn.utils.fixes import logsumexp from sklearn.base import TransformerMixin +from ._util import _check_n_components from .base_metric import MahalanobisMixin EPS = np.finfo(float).eps @@ -36,19 +37,24 @@ class NCA(MahalanobisMixin, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ - def __init__(self, num_dims=None, max_iter=100, tol=None, verbose=False, - preprocessor=None): + def __init__(self, n_components=None, num_dims='deprecated', max_iter=100, + tol=None, verbose=False, preprocessor=None): """Neighborhood Components Analysis Parameters ---------- - num_dims : int, optional (default=None) - Embedding dimensionality. If None, will be set to ``n_features`` - (``d``) at fit time. + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. max_iter : int, optional (default=100) Maximum number of iterations done by the optimization algorithm. @@ -59,6 +65,7 @@ def __init__(self, num_dims=None, max_iter=100, tol=None, verbose=False, verbose : bool, optional (default=False) Whether to print progress messages or not. """ + self.n_components = n_components self.num_dims = num_dims self.max_iter = max_iter self.tol = tol @@ -70,18 +77,21 @@ def fit(self, X, y): X: data matrix, (n x d) y: scalar labels, (n) """ + if self.num_dims != 'deprecated': + warnings.warn('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead', + DeprecationWarning) X, labels = self._prepare_inputs(X, y, ensure_min_samples=2) n, d = X.shape - num_dims = self.num_dims - if num_dims is None: - num_dims = d + n_components = _check_n_components(d, self.n_components) # Measure the total training time train_time = time.time() # Initialize A to a scaling matrix - A = np.zeros((num_dims, d)) - np.fill_diagonal(A, 1./(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))) + A = np.zeros((n_components, d)) + np.fill_diagonal(A, 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) # Run NCA mask = labels[:, np.newaxis] == labels[np.newaxis, :] @@ -130,7 +140,7 @@ def _loss_grad_lbfgs(self, A, X, mask, sign=1.0): start_time = time.time() A = A.reshape(-1, X.shape[1]) - X_embedded = np.dot(X, A.T) # (n_samples, num_dims) + X_embedded = np.dot(X, A.T) # (n_samples, n_components) # Compute softmax distances p_ij = pairwise_distances(X_embedded, squared=True) np.fill_diagonal(p_ij, np.inf) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 7d0bb21f..45c9bbf2 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -18,6 +18,7 @@ from sklearn import decomposition from sklearn.base import TransformerMixin +from ._util import _check_n_components from .base_metric import MahalanobisMixin from .constraints import Constraints @@ -42,17 +43,24 @@ class RCA(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ - def __init__(self, num_dims=None, pca_comps=None, preprocessor=None): + def __init__(self, n_components=None, num_dims='deprecated', + pca_comps=None, preprocessor=None): """Initialize the learner. Parameters ---------- - num_dims : int, optional - embedding dimension (default: original dimension of data) + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. pca_comps : int, float, None or string Number of components to keep during PCA preprocessing. @@ -65,6 +73,7 @@ def __init__(self, num_dims=None, pca_comps=None, preprocessor=None): The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. """ + self.n_components = n_components self.num_dims = num_dims self.pca_comps = pca_comps super(RCA, self).__init__(preprocessor) @@ -77,16 +86,7 @@ def _check_dimension(self, rank, X): 'You should adjust pca_comps to remove noise and ' 'redundant information.') - if self.num_dims is None: - dim = d - elif self.num_dims <= 0: - raise ValueError('Invalid embedding dimension: must be greater than 0.') - elif self.num_dims > d: - dim = d - warnings.warn('num_dims (%d) must be smaller than ' - 'the data dimension (%d)' % (self.num_dims, d)) - else: - dim = self.num_dims + dim = _check_n_components(d, self.n_components) return dim def fit(self, X, chunks): @@ -100,6 +100,11 @@ def fit(self, X, chunks): When ``chunks[i] == -1``, point i doesn't belong to any chunklet. When ``chunks[i] == j``, point i belongs to chunklet j. """ + if self.num_dims != 'deprecated': + warnings.warn('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead', + DeprecationWarning) X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) # PCA projection to remove noise and redundant information. @@ -145,12 +150,13 @@ class RCA_Supervised(RCA): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ - def __init__(self, num_dims=None, pca_comps=None, num_chunks=100, - chunk_size=2, preprocessor=None): + def __init__(self, num_dims='deprecated', n_components=None, + pca_comps=None, num_chunks=100, chunk_size=2, + preprocessor=None): """Initialize the supervised version of `RCA`. `RCA_Supervised` creates chunks of similar points by first sampling a @@ -159,16 +165,23 @@ def __init__(self, num_dims=None, pca_comps=None, num_chunks=100, Parameters ---------- - num_dims : int, optional - embedding dimension (default: original dimension of data) + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + num_chunks: int, optional chunk_size: int, optional preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. """ - RCA.__init__(self, num_dims=num_dims, pca_comps=pca_comps, - preprocessor=preprocessor) + RCA.__init__(self, num_dims=num_dims, n_components=n_components, + pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks self.chunk_size = chunk_size diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index b300b9ac..73eeefb7 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -139,7 +139,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -187,7 +187,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ @@ -247,7 +247,7 @@ def fit(self, X, y, random_state=np.random): if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0', DeprecationWarning) + ' removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 0ba1fdbe..969bd7e5 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -17,7 +17,7 @@ HAS_SKGGM = False else: HAS_SKGGM = True -from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, +from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, RCA, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised, SDML, ITML) # Import this specially for testing. @@ -89,7 +89,7 @@ def test_deprecation_num_labeled(self): lsml_supervised = LSML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0') + ' removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) @@ -110,7 +110,7 @@ def test_deprecation_num_labeled(self): itml_supervised = ITML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0') + ' removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) def test_deprecation_bounds(self): @@ -122,7 +122,7 @@ def test_deprecation_bounds(self): itml_supervised = ITML_Supervised(bounds=None) msg = ('"bounds" parameter from initialization is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use the "bounds" parameter of this ' + ' removed in 0.6.0. Use the "bounds" parameter of this ' 'fit method instead.') assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) @@ -429,7 +429,7 @@ def test_deprecation_num_labeled(self): balance_param=5e-5) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0') + ' removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) def test_sdml_raises_warning_non_psd(self): @@ -537,13 +537,13 @@ def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # With dimension reduction - nca = NCA(max_iter=(100000//n), num_dims=2) + nca = NCA(max_iter=(100000 // n), n_components=2) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20) @@ -583,7 +583,7 @@ def test_simple_example(self): """ X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=2,) + nca = NCA(n_components=2,) nca.fit(X, y) Xansformed = nca.transform(X) np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], @@ -626,7 +626,7 @@ def test_singleton_class(self): A = np.zeros((X.shape[1], X.shape[1])) np.fill_diagonal(A, 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca = NCA(max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.transformer_, A) @@ -639,14 +639,30 @@ def test_one_class(self): A = np.zeros((X.shape[1], X.shape[1])) np.fill_diagonal(A, 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca = NCA(max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.transformer_, A) +@pytest.mark.parametrize('num_dims', [None, 2]) +def test_deprecation_num_dims_nca(num_dims): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA(num_dims=num_dims) + msg = ('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead') + with pytest.warns(DeprecationWarning) as raised_warning: + nca.fit(X, y) + assert (str(raised_warning[0].message) == msg) + + class TestLFDA(MetricTestCase): def test_iris(self): - lfda = LFDA(k=2, num_dims=2) + lfda = LFDA(k=2, n_components=2) lfda.fit(self.iris_points, self.iris_labels) csep = class_separation(lfda.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) @@ -656,9 +672,25 @@ def test_iris(self): self.assertEqual(lfda.transformer_.shape, (2, 4)) +@pytest.mark.parametrize('num_dims', [None, 2]) +def test_deprecation_num_dims_lfda(num_dims): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lfda = LFDA(num_dims=num_dims) + msg = ('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead') + with pytest.warns(DeprecationWarning) as raised_warning: + lfda.fit(X, y) + assert (str(raised_warning[0].message) == msg) + + class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) @@ -667,19 +699,44 @@ def test_feature_null_variance(self): X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) # Apply PCA with the number of components - rca = RCA_Supervised(num_dims=2, pca_comps=3, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30, + chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) # Apply PCA with the minimum variance ratio - rca = RCA_Supervised(num_dims=2, pca_comps=0.95, num_chunks=30, + rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) +@pytest.mark.parametrize('num_dims', [None, 2]) +def test_deprecation_num_dims_rca(num_dims): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + rca = RCA(num_dims=num_dims) + msg = ('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead') + with pytest.warns(DeprecationWarning) as raised_warning: + rca.fit(X, y) + assert (str(raised_warning[0].message) == msg) + + # we take a small number of chunks so that RCA works on iris + rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10) + msg = ('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead') + with pytest.warns(DeprecationWarning) as raised_warning: + rca_supervised.fit(X, y) + assert (str(raised_warning[0].message) == msg) + + class TestMLKR(MetricTestCase): def test_iris(self): mlkr = MLKR() @@ -711,6 +768,22 @@ def grad_fn(M): np.testing.assert_almost_equal(rel_diff, 0.) +@pytest.mark.parametrize('num_dims', [None, 2]) +def test_deprecation_num_dims_mlkr(num_dims): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mlkr = MLKR(num_dims=num_dims) + msg = ('"num_dims" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0. Use "n_components" instead') + with pytest.warns(DeprecationWarning) as raised_warning: + mlkr.fit(X, y) + assert (str(raised_warning[0].message) == msg) + + class TestMMC(MetricTestCase): def test_iris(self): @@ -758,7 +831,7 @@ def test_deprecation_num_labeled(self): mmc_supervised = MMC_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0') + ' removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index e5f2e17b..7706b1e4 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -22,20 +22,22 @@ def test_lmnn(self): self.assertRegexpMatches( str(metric_learn.LMNN()), r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " - r"max_iter=1000,\s+min_iter=50, preprocessor=None, " - r"regularization=0.5, use_pca=True,\s+verbose=False\)") + r"max_iter=1000,\s+min_iter=50, n_components=None, " + r"num_dims='deprecated',\s+preprocessor=None, " + r"regularization=0.5, use_pca=True, verbose=False\)") def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA())), remove_spaces( - "NCA(max_iter=100, num_dims=None, preprocessor=None, " + "NCA(max_iter=100, n_components=None, " + "num_dims='deprecated', preprocessor=None, " "tol=None, verbose=False)")) def test_lfda(self): self.assertEqual(remove_spaces(str(metric_learn.LFDA())), remove_spaces( "LFDA(embedding_type='weighted', k=None, " - "num_dims=None, " + "n_components=None, num_dims='deprecated'," "preprocessor=None)")) def test_itml(self): @@ -79,19 +81,23 @@ def test_sdml(self): def test_rca(self): self.assertEqual(remove_spaces(str(metric_learn.RCA())), - remove_spaces("RCA(num_dims=None, pca_comps=None, " + remove_spaces("RCA(n_components=None, " + "num_dims='deprecated', " + "pca_comps=None, " "preprocessor=None)")) self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), remove_spaces( - "RCA_Supervised(chunk_size=2, num_chunks=100, " - "num_dims=None, pca_comps=None,\n " + "RCA_Supervised(chunk_size=2, " + "n_components=None, num_chunks=100, " + "num_dims='deprecated', pca_comps=None, " "preprocessor=None)")) def test_mlkr(self): self.assertEqual(remove_spaces(str(metric_learn.MLKR())), remove_spaces( - "MLKR(A0=None, max_iter=1000, num_dims=None, " - "preprocessor=None, tol=None,\n verbose=False)")) + "MLKR(A0=None, max_iter=1000, n_components=None, " + "num_dims='deprecated', " + "preprocessor=None, tol=None, verbose=False)")) def test_mmc(self): self.assertEqual(remove_spaces(str(metric_learn.MMC())), @@ -183,5 +189,42 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): assert len(record) == 0 +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_n_components(estimator, build_dataset): + """Check that estimators that have a n_components parameters can use it + and that it actually works as expected""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + + if hasattr(model, 'n_components'): + set_random_state(model) + model.set_params(n_components=None) + model.fit(input_data, labels) + assert model.transformer_.shape == (X.shape[1], X.shape[1]) + + model = clone(estimator) + set_random_state(model) + model.set_params(n_components=X.shape[1] - 1) + model.fit(input_data, labels) + assert model.transformer_.shape == (X.shape[1] - 1, X.shape[1]) + + model = clone(estimator) + set_random_state(model) + model.set_params(n_components=X.shape[1] + 1) + with pytest.raises(ValueError) as expected_err: + model.fit(input_data, labels) + assert (str(expected_err.value) == + 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) + + model = clone(estimator) + set_random_state(model) + model.set_params(n_components=0) + with pytest.raises(ValueError) as expected_err: + model.fit(input_data, labels) + assert (str(expected_err.value) == + 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index b85e9273..5e8a87f4 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -88,36 +88,34 @@ def test_nca(self): assert_array_almost_equal(res_1, res_2) def test_lfda(self): - lfda = LFDA(k=2, num_dims=2) + lfda = LFDA(k=2, n_components=2) lfda.fit(self.X, self.y) res_1 = lfda.transform(self.X) - lfda = LFDA(k=2, num_dims=2) + lfda = LFDA(k=2, n_components=2) res_2 = lfda.fit_transform(self.X, self.y) # signs may be flipped, that's okay - if np.sign(res_1[0,0]) != np.sign(res_2[0,0]): - res_2 *= -1 - assert_array_almost_equal(res_1, res_2) + assert_array_almost_equal(abs(res_1), abs(res_2)) def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) res_2 = rca.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) def test_mlkr(self): - mlkr = MLKR(num_dims=2) + mlkr = MLKR(n_components=2) mlkr.fit(self.X, self.y) res_1 = mlkr.transform(self.X) - mlkr = MLKR(num_dims=2) + mlkr = MLKR(n_components=2) res_2 = mlkr.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 15bf1aed..e7fa5b17 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -137,11 +137,8 @@ def test_embed_dim(estimator, build_dataset): model.score_pairs(model.transform(X[0, :])) assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction - if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}: - # TODO: - # avoid this enumeration and rather test if hasattr n_components - # as soon as we have made the arguments names as such (issue #167) - model.set_params(num_dims=2) + if hasattr(model, 'n_components'): + model.set_params(n_components=2) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 091c56e2..6b451aee 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -352,8 +352,8 @@ def test_dict_unchanged(estimator, build_dataset, with_preprocessor): to_transform) = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - if hasattr(estimator, "num_dims"): - estimator.num_dims = 1 + if hasattr(estimator, "n_components"): + estimator.n_components = 1 estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) def check_dict(): @@ -381,8 +381,8 @@ def test_dont_overwrite_parameters(estimator, build_dataset, input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - if hasattr(estimator, "num_dims"): - estimator.num_dims = 1 + if hasattr(estimator, "n_components"): + estimator.n_components = 1 dict_before_fit = estimator.__dict__.copy() estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 4328320d..0139f632 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -63,20 +63,20 @@ def test_nca(self): assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix()) def test_lfda(self): - lfda = LFDA(k=2, num_dims=2) + lfda = LFDA(k=2, n_components=2) lfda.fit(self.X, self.y) L = lfda.transformer_ assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) L = rca.transformer_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) def test_mlkr(self): - mlkr = MLKR(num_dims=2) + mlkr = MLKR(n_components=2) mlkr.fit(self.X, self.y) L = mlkr.transformer_ assert_array_almost_equal(L.T.dot(L), mlkr.get_mahalanobis_matrix()) diff --git a/test/test_utils.py b/test/test_utils.py index 6441fac6..08415a76 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -10,7 +10,7 @@ from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, check_collapsed_pairs, validate_vector, - _check_sdp_from_eigen, + _check_sdp_from_eigen, _check_n_components, check_y_valid_values_for_pairs) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, @@ -867,9 +867,9 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): formed_points_to_transform = dataset_formed.to_transform (indices_train, indices_test, y_train, y_test, formed_train, formed_test) = train_test_split(dataset_indices.data, - dataset_indices.target, - dataset_formed.data, - random_state=SEED) + dataset_indices.target, + dataset_formed.data, + random_state=SEED) def make_random_state(estimator): rs = {} @@ -1008,6 +1008,24 @@ def test_check_sdp_from_eigen_positive_err_messages(): _check_sdp_from_eigen(w, None) +def test__check_n_components(): + """Checks that n_components returns what is expected + (including the errors)""" + dim = _check_n_components(5, None) + assert dim == 5 + + dim = _check_n_components(5, 3) + assert dim == 3 + + with pytest.raises(ValueError) as expected_err: + _check_n_components(5, 10) + assert str(expected_err.value) == 'Invalid n_components, must be in [1, 5]' + + with pytest.raises(ValueError) as expected_err: + _check_n_components(5, 0) + assert str(expected_err.value) == 'Invalid n_components, must be in [1, 5]' + + @pytest.mark.unit @pytest.mark.parametrize('wrong_labels', [[0.5, 0.6, 0.7, 0.8, 0.9], From 130cbadff294b686e466d430f26b2d069f6bbf59 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 7 Jun 2019 17:26:57 +0200 Subject: [PATCH 116/210] [MRG] Uniformize initialization for all algorithms (#195) * initiate PR * Revert "initiate PR" This reverts commit a2ae9e10932e58448292e7e3412958649ec3c2d0. * FEAT: uniformize init for NCA and RCA * Let the check of num_dims be done in the other PR * Add metric initialization for algorithms that learn a mahalanobis matrix * Add initialization for MLKR * FIX: fix error message for dimension * FIX fix StringRepr for MLKR * FIX tests by reshaping to the right dataset size * Remove lda in docstring of MLKR * MAINT: Add deprecation for previous initializations * Update tests with new initialization * Make random init for mahalanobis metric generate an SPD matrix * Ensure the input mahalanobis metric initialization is symmetric, and say it should be SPD * various fixes * MAINT: various refactoring - MLKR: update default test init - SDML: refactor prior_inv * FIX fix default covariance for SDML in tests * Enhance docstring * Set random state for SDML * Fix merge remove_spaces that was forgotten * Fix indent * XP: try to change the way we choose n_components to see if it fixes the test * Revert "XP: try to change the way we choose n_components to see if it fixes the test" This reverts commit e86b61b7db2f432d291f8fd85e90ae80b55adf5e. * Be more tolerant in test * Add test for singular covariance matrix * Fix test_singular_covariance_init * DOC: update docstring saying pseudo-inverse * Revert "Fix test_singular_covariance_init" This reverts commit d2cc7cec3099edb1cd1bfaf761e24ec7473e110f. * Ensure definiteness before returning the inverse * wip deal with non definiteness * Rename init to prior for SDML and LSML * Update error messages with either prior or init * Remove message * A few nitpicks * PEP8 errors + change init in test * STY: PEP8 fixes * Address and remove TODOs * Replace init by prior for ITML * TST: fix ITML test with init changed into prior * Add precision for MMC * Add ChangedBehaviorWarning for the algorithms that changed * Address https://github.com/metric-learn/metric-learn/pull/195#pullrequestreview-245440568 * Remove the warnings check since we now have a ChangedBehaviorWarning * Be more precise: it should not raise any ConvergenceWarningError * Address https://github.com/metric-learn/metric-learn/pull/195#pullrequestreview-245911227 * FIX remaining comment * TST: update test error message * Improve readability * Address https://github.com/metric-learn/metric-learn/pull/195#pullrequestreview-246553439 * TST: Fix docsting lmnn * Fix warning messages * Fix warnings messages changed --- bench/benchmarks/iris.py | 2 +- metric_learn/_util.py | 335 ++++++++++++++++++- metric_learn/covariance.py | 2 +- metric_learn/exceptions.py | 8 + metric_learn/itml.py | 96 +++++- metric_learn/lmnn.py | 66 +++- metric_learn/lsml.py | 98 ++++-- metric_learn/mlkr.py | 92 +++++- metric_learn/mmc.py | 138 ++++++-- metric_learn/nca.py | 78 ++++- metric_learn/sdml.py | 127 ++++++-- test/metric_learn_test.py | 282 +++++++++++++--- test/test_base_metric.py | 87 ++--- test/test_fit_transform.py | 4 +- test/test_mahalanobis_mixin.py | 357 ++++++++++++++++++++- test/test_sklearn_compat.py | 6 +- test/test_transformer_metric_conversion.py | 7 +- test/test_utils.py | 64 +++- 18 files changed, 1626 insertions(+), 223 deletions(-) diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index 305c3a0f..e3390930 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -10,7 +10,7 @@ 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), 'MLKR': metric_learn.MLKR(), - 'NCA': metric_learn.NCA(max_iter=700, num_dims=2), + 'NCA': metric_learn.NCA(max_iter=700, n_components=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 583f1105..9cf6d7c6 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -1,10 +1,16 @@ -import warnings import numpy as np +import scipy import six from numpy.linalg import LinAlgError +from sklearn.datasets import make_spd_matrix +from sklearn.decomposition import PCA from sklearn.utils import check_array -from sklearn.utils.validation import check_X_y -from metric_learn.exceptions import PreprocessorError +from sklearn.utils.validation import check_X_y, check_random_state +from .exceptions import PreprocessorError, NonPSDError +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from scipy.linalg import pinvh +import sys +import time # hack around lack of axis kwarg in older numpy versions try: @@ -335,6 +341,8 @@ def check_collapsed_pairs(pairs): def _check_sdp_from_eigen(w, tol=None): """Checks if some of the eigenvalues given are negative, up to a tolerance level, with a default value of the tolerance depending on the eigenvalues. + It also returns whether the matrix is positive definite, up to the above + tolerance. Parameters ---------- @@ -342,9 +350,14 @@ def _check_sdp_from_eigen(w, tol=None): Eigenvalues to check for non semidefinite positiveness. tol : positive `float`, optional - Negative eigenvalues above - tol are considered zero. If + Absolute eigenvalues below tol are considered zero. If tol is None, and eps is the epsilon value for datatype of w, then tol - is set to w.max() * len(w) * eps. + is set to abs(w).max() * len(w) * eps. + + Returns + ------- + is_definite : bool + Whether the matrix is positive definite or not. See Also -------- @@ -352,11 +365,14 @@ def _check_sdp_from_eigen(w, tol=None): strategy is applied here) """ if tol is None: - tol = w.max() * len(w) * np.finfo(w.dtype).eps + tol = np.abs(w).max() * len(w) * np.finfo(w.dtype).eps if tol < 0: raise ValueError("tol should be positive.") if any(w < - tol): - raise ValueError("Matrix is not positive semidefinite (PSD).") + raise NonPSDError() + if any(abs(w) < tol): + return False + return True def transformer_from_metric(metric, tol=None): @@ -413,6 +429,311 @@ def validate_vector(u, dtype=None): return u +def _initialize_transformer(n_components, input, y=None, init='auto', + verbose=False, random_state=None, + has_classes=True): + """Returns the initial transformer to be used depending on the arguments. + + Parameters + ---------- + n_components : int + The number of components to take. (Note: it should have been checked + before, meaning it should not be None and it should be a value in + [1, X.shape[1]]) + + input : array-like + The input samples (can be tuples or regular samples). + + y : array-like or None + The input labels (or not if there are no labels). + + init : string or numpy array, optional (default='auto') + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda' (see + the description of 'lda' init), as it uses labels information. If + not, but ``n_components < min(n_features, n_samples)``, we use 'pca', + as it projects data onto meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`). + This initialization is possible only if `has_classes == True`. + + 'identity' + The identity matrix. If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + verbose : bool + Whether to print the details of the initialization or not. + + random_state : int or `numpy.RandomState` or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + + has_classes : bool (default=True) + Whether the labels are in fact classes. If true, this will allow to use + the 'lda' initialization. + + Returns + ------- + init_transformer : `numpy.ndarray` + The initial transformer to use. + """ + # if we are doing a regression we cannot use lda: + n_features = input.shape[-1] + authorized_inits = ['auto', 'pca', 'identity', 'random'] + if has_classes: + authorized_inits.append('lda') + + if isinstance(init, np.ndarray): + # we copy the array, so that if we update the metric, we don't want to + # update the init + init = check_array(init, copy=True) + + # Assert that init.shape[1] = X.shape[1] + if init.shape[1] != n_features: + raise ValueError('The input dimensionality ({}) of the given ' + 'linear transformation `init` must match the ' + 'dimensionality of the given inputs `X` ({}).' + .format(init.shape[1], n_features)) + + # Assert that init.shape[0] <= init.shape[1] + if init.shape[0] > init.shape[1]: + raise ValueError('The output dimensionality ({}) of the given ' + 'linear transformation `init` cannot be ' + 'greater than its input dimensionality ({}).' + .format(init.shape[0], init.shape[1])) + + # Assert that self.n_components = init.shape[0] + if n_components != init.shape[0]: + raise ValueError('The preferred dimensionality of the ' + 'projected space `n_components` ({}) does' + ' not match the output dimensionality of ' + 'the given linear transformation ' + '`init` ({})!' + .format(n_components, + init.shape[0])) + elif init not in authorized_inits: + raise ValueError( + "`init` must be '{}' " + "or a numpy array of shape (n_components, n_features)." + .format("', '".join(authorized_inits))) + + random_state = check_random_state(random_state) + if isinstance(init, np.ndarray): + return init + n_samples = input.shape[0] + if init == 'auto': + if has_classes: + n_classes = len(np.unique(y)) + else: + n_classes = -1 + init = _auto_select_init(has_classes, n_features, n_samples, n_components, + n_classes) + if init == 'identity': + return np.eye(n_components, input.shape[-1]) + elif init == 'random': + return random_state.randn(n_components, input.shape[-1]) + elif init in {'pca', 'lda'}: + init_time = time.time() + if init == 'pca': + pca = PCA(n_components=n_components, + random_state=random_state) + if verbose: + print('Finding principal components... ') + sys.stdout.flush() + pca.fit(input) + transformation = pca.components_ + elif init == 'lda': + lda = LinearDiscriminantAnalysis(n_components=n_components) + if verbose: + print('Finding most discriminative components... ') + sys.stdout.flush() + lda.fit(input, y) + transformation = lda.scalings_.T[:n_components] + if verbose: + print('done in {:5.2f}s'.format(time.time() - init_time)) + return transformation + + +def _auto_select_init(has_classes, n_features, n_samples, n_components, + n_classes): + if has_classes and n_components <= min(n_features, n_classes - 1): + init = 'lda' + elif n_components < min(n_features, n_samples): + init = 'pca' + else: + init = 'identity' + return init + + +def _initialize_metric_mahalanobis(input, init='identity', random_state=None, + return_inverse=False, strict_pd=False, + matrix_name='matrix'): + """Returns a PSD matrix that can be used as a prior or an initialization + for the Mahalanobis distance + + Parameters + ---------- + input : array-like + The input samples (can be tuples or regular samples). + + init : string or numpy array, optional (default='identity') + Specification for the matrix to initialize. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse covariance matrix (raises an error if the + covariance matrix is not definite and `strict_pd == True`) + + 'random' + A random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A PSD matrix (or strictly PD if strict_pd==True) of + shape (n_features, n_features), that will be used as such to + initialize the metric, or set the prior. + + random_state : int or `numpy.RandomState` or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random Mahalanobis + matrix. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the matrix. + + return_inverse : bool, optional (default=False) + Whether to return the inverse of the specified matrix. This + can be sometimes useful. It will return the pseudo-inverse (which is the + same as the inverse if the matrix is definite (i.e. invertible)). If + `strict_pd == True` and the matrix is not definite, it will return an + error. + + strict_pd : bool, optional (default=False) + Whether to enforce that the provided matrix is definite (in addition to + being PSD). + + param_name : str, optional (default='matrix') + The name of the matrix used (example: 'init', 'prior'). Will be used in + error messages. + + Returns + ------- + M, or (M, M_inv) : `numpy.ndarray` + The initial matrix to use M, and its inverse if `return_inverse=True`. + """ + n_features = input.shape[-1] + if isinstance(init, np.ndarray): + # we copy the array, so that if we update the metric, we don't want to + # update the init + init = check_array(init, copy=True) + + # Assert that init.shape[1] = n_features + if init.shape != (n_features,) * 2: + raise ValueError('The input dimensionality {} of the given ' + 'mahalanobis matrix `{}` must match the ' + 'dimensionality of the given inputs ({}).' + .format(init.shape, matrix_name, n_features)) + + # Assert that the matrix is symmetric + if not np.allclose(init, init.T): + raise ValueError("`{}` is not symmetric.".format(matrix_name)) + + elif init not in ['identity', 'covariance', 'random']: + raise ValueError( + "`{}` must be 'identity', 'covariance', 'random' " + "or a numpy array of shape (n_features, n_features)." + .format(matrix_name)) + + random_state = check_random_state(random_state) + M = init + if isinstance(init, np.ndarray): + s, u = scipy.linalg.eigh(init) + init_is_definite = _check_sdp_from_eigen(s) + if strict_pd and not init_is_definite: + raise LinAlgError("You should provide a strictly positive definite " + "matrix as `{}`. This one is not definite. Try another" + " {}, or an algorithm that does not " + "require the {} to be strictly positive definite." + .format(*((matrix_name,) * 3))) + if return_inverse: + M_inv = np.dot(u / s, u.T) + return M, M_inv + else: + return M + elif init == 'identity': + M = np.eye(n_features, n_features) + if return_inverse: + M_inv = M.copy() + return M, M_inv + else: + return M + elif init == 'covariance': + if input.ndim == 3: + # if the input are tuples, we need to form an X by deduplication + X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)}) + else: + X = input + # atleast2d is necessary to deal with scalar covariance matrices + M_inv = np.atleast_2d(np.cov(X, rowvar=False)) + s, u = scipy.linalg.eigh(M_inv) + cov_is_definite = _check_sdp_from_eigen(s) + if strict_pd and not cov_is_definite: + raise LinAlgError("Unable to get a true inverse of the covariance " + "matrix since it is not definite. Try another " + "`{}`, or an algorithm that does not " + "require the `{}` to be strictly positive definite." + .format(*((matrix_name,) * 2))) + M = np.dot(u / s, u.T) + if return_inverse: + return M, M_inv + else: + return M + elif init == 'random': + # we need to create a random symmetric matrix + M = make_spd_matrix(n_features, random_state=random_state) + if return_inverse: + # we use pinvh even if we know the matrix is definite, just because + # we need the returned matrix to be symmetric (and sometimes + # np.linalg.inv returns not symmetric inverses of symmetric matrices) + # TODO: there might be a more efficient method to do so + M_inv = pinvh(M) + return M, M_inv + else: + return M + + def _check_n_components(n_features, n_components): """Checks that n_components is less than n_features and deal with the None case""" diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 7f606921..19dad5d8 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -22,7 +22,7 @@ class Covariance(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ diff --git a/metric_learn/exceptions.py b/metric_learn/exceptions.py index 424d2c4f..76f09778 100644 --- a/metric_learn/exceptions.py +++ b/metric_learn/exceptions.py @@ -2,6 +2,7 @@ The :mod:`metric_learn.exceptions` module includes all custom warnings and error classes used across metric-learn. """ +from numpy.linalg import LinAlgError class PreprocessorError(Exception): @@ -10,3 +11,10 @@ def __init__(self, original_error): err_msg = ("An error occurred when trying to use the " "preprocessor: {}").format(repr(original_error)) super(PreprocessorError, self).__init__(err_msg) + + +class NonPSDError(LinAlgError): + + def __init__(self): + err_msg = "Matrix is not positive semidefinite (PSD)." + super(LinAlgError, self).__init__(err_msg) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 25518bf6..21303c18 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -23,7 +23,7 @@ from sklearn.base import TransformerMixin from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs -from ._util import vector_norm, transformer_from_metric +from ._util import transformer_from_metric, _initialize_metric_mahalanobis class _BaseITML(MahalanobisMixin): @@ -32,7 +32,8 @@ class _BaseITML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - A0=None, verbose=False, preprocessor=None): + prior='identity', A0='deprecated', verbose=False, + preprocessor=None, random_state=None): """Initialize ITML. Parameters @@ -44,8 +45,32 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, convergence_threshold : float, optional - A0 : (d x d) matrix, optional - initial regularization matrix, defaults to identity + prior : string or numpy array, optional (default='identity') + The Mahalanobis matrix to use as a prior. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + A0 : Not used + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. verbose : bool, optional if True, prints information while learning @@ -53,15 +78,26 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. """ self.gamma = gamma self.max_iter = max_iter self.convergence_threshold = convergence_threshold + self.prior = prior self.A0 = A0 self.verbose = verbose + self.random_state = random_state super(_BaseITML, self).__init__(preprocessor) def _fit(self, pairs, y, bounds=None): + if self.A0 != 'deprecated': + warnings.warn('"A0" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "prior" instead.', + DeprecationWarning) pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # init bounds @@ -76,11 +112,11 @@ def _fit(self, pairs, y, bounds=None): raise ValueError("`bounds` should be an array-like of two elements.") self.bounds_ = bounds self.bounds_[self.bounds_ == 0] = 1e-9 - # init metric - if self.A0 is None: - A = np.identity(pairs.shape[2]) - else: - A = check_array(self.A0, copy=True) + # set the prior + # pairs will be deduplicated into X two times, TODO: avoid that + A = _initialize_metric_mahalanobis(pairs, self.prior, self.random_state, + strict_pd=True, + matrix_name='prior') gamma = self.gamma pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] num_pos = len(pos_pairs) @@ -150,7 +186,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -218,14 +254,15 @@ class ITML_Supervised(_BaseITML, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, num_labeled='deprecated', num_constraints=None, - bounds='deprecated', A0=None, verbose=False, preprocessor=None): + bounds='deprecated', prior='identity', A0='deprecated', + verbose=False, preprocessor=None, random_state=None): """Initialize the supervised version of `ITML`. `ITML_Supervised` creates pairs of similar sample by taking same class @@ -249,17 +286,46 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, `bounds` was deprecated in version 0.5.0 and will be removed in 0.6.0. Set `bounds` at fit time instead : `itml_supervised.fit(X, y, bounds=...)` - A0 : (d x d) matrix, optional - initial regularization matrix, defaults to identity + + prior : string or numpy array, optional (default='identity') + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + A0 : Not used + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. verbose : bool, optional if True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. """ _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, - A0=A0, verbose=verbose, preprocessor=preprocessor) + A0=A0, prior=prior, verbose=verbose, + preprocessor=preprocessor, random_state=random_state) self.num_labeled = num_labeled self.num_constraints = num_constraints self.bounds = bounds diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 1ba87684..c2437b86 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -20,20 +20,60 @@ from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin -from ._util import _check_n_components +from ._util import _initialize_transformer, _check_n_components from .base_metric import MahalanobisMixin # commonality between LMNN implementations class _base_LMNN(MahalanobisMixin, TransformerMixin): - def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, - regularization=0.5, convergence_tol=0.001, use_pca=True, - verbose=False, preprocessor=None, n_components=None, - num_dims='deprecated'): + def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, + learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, + use_pca=True, verbose=False, preprocessor=None, + n_components=None, num_dims='deprecated', random_state=None): """Initialize the LMNN object. Parameters ---------- + init : string or numpy array, optional (default='auto') + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + k : int, optional Number of neighbors to consider, not including self-edges. @@ -52,7 +92,14 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, .. deprecated:: 0.5.0 `num_dims` was deprecated in version 0.5.0 and will be removed in 0.6.0. Use `n_components` instead. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. """ + self.init = init self.k = k self.min_iter = min_iter self.max_iter = max_iter @@ -63,6 +110,7 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, self.verbose = verbose self.n_components = n_components self.num_dims = num_dims + self.random_state = random_state super(_base_LMNN, self).__init__(preprocessor) @@ -87,9 +135,9 @@ def fit(self, X, y): if len(label_inds) != num_pts: raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) - if self.use_pca: - warnings.warn('use_pca does nothing for the python_LMNN implementation') - self.transformer_ = np.eye(output_dim, d) + self.transformer_ = _initialize_transformer(output_dim, X, y, self.init, + self.verbose, + self.random_state) required_k = np.bincount(label_inds).min() if self.k > required_k: raise ValueError('not enough class labels for specified k' @@ -122,6 +170,8 @@ def fit(self, X, y): self._loss_grad(X, L, dfG, impostors, 1, k, reg, target_neighbors, df, a1, a2)) + it = 1 # we already made one iteration + # main loop for it in xrange(2, self.max_iter): # then at each iteration, we try to find a value of L that has better diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 94366b88..4350b003 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -20,36 +20,64 @@ import scipy.linalg from six.moves import xrange from sklearn.base import TransformerMixin +from sklearn.exceptions import ChangedBehaviorWarning from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints -from ._util import transformer_from_metric +from ._util import transformer_from_metric, _initialize_metric_mahalanobis class _BaseLSML(MahalanobisMixin): _tuple_size = 4 # constraints are quadruplets - def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, - preprocessor=None): + def __init__(self, tol=1e-3, max_iter=1000, prior=None, + verbose=False, preprocessor=None, random_state=None): """Initialize LSML. Parameters ---------- + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + tol : float, optional max_iter : int, optional - prior : (d x d) matrix, optional - guess at a metric [default: inv(covariance(X))] verbose : bool, optional if True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. """ self.prior = prior self.tol = tol self.max_iter = max_iter self.verbose = verbose + self.random_state = random_state super(_BaseLSML, self).__init__(preprocessor) def _fit(self, quadruplets, weights=None): @@ -66,14 +94,23 @@ def _fit(self, quadruplets, weights=None): else: self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 + # if the prior is the default (identity), we raise a warning just in case if self.prior is None: - X = np.vstack({tuple(row) for row in - quadruplets.reshape(-1, quadruplets.shape[2])}) - prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) - M = np.linalg.inv(prior_inv) + msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " + "the default prior will now be set to " + "'identity', instead of 'covariance'. If you still want to use " + "the inverse of the covariance matrix as a prior, " + "set prior='covariance'. This warning will disappear in " + "v0.6.0, and `prior` parameter's default value will be set to " + "'identity'.") + warnings.warn(msg, ChangedBehaviorWarning) + prior = 'identity' else: - M = self.prior - prior_inv = np.linalg.inv(self.prior) + prior = self.prior + M, prior_inv = _initialize_metric_mahalanobis(quadruplets, prior, + return_inverse=True, + strict_pd=True, + matrix_name='prior') step_sizes = np.logspace(-10, 0, 10) # Keep track of the best step size and the loss at that step. @@ -146,7 +183,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ @@ -182,15 +219,14 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled='deprecated', num_constraints=None, weights=None, - verbose=False, - preprocessor=None): + verbose=False, preprocessor=None, random_state=None): """Initialize the supervised version of `LSML`. `LSML_Supervised` creates quadruplets from labeled samples by taking two @@ -202,8 +238,29 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, ---------- tol : float, optional max_iter : int, optional - prior : (d x d) matrix, optional - guess at a metric [default: covariance(X)] + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. num_labeled : Not used .. deprecated:: 0.5.0 `num_labeled` was deprecated in version 0.5.0 and will @@ -217,9 +274,14 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. """ _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, - verbose=verbose, preprocessor=preprocessor) + verbose=verbose, preprocessor=preprocessor, + random_state=random_state) self.num_labeled = num_labeled self.num_constraints = num_constraints self.weights = weights diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 762317b9..9e9cf433 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -14,18 +14,16 @@ import sys import warnings import numpy as np -from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize -from scipy.spatial.distance import pdist, squareform from sklearn.base import TransformerMixin -from sklearn.decomposition import PCA - from sklearn.metrics import pairwise_distances from metric_learn._util import _check_n_components from .base_metric import MahalanobisMixin +from ._util import _initialize_transformer EPS = np.finfo(float).eps @@ -42,8 +40,9 @@ class MLKR(MahalanobisMixin, TransformerMixin): The learned linear transformation ``L``. """ - def __init__(self, n_components=None, num_dims='deprecated', A0=None, - tol=None, max_iter=1000, verbose=False, preprocessor=None): + def __init__(self, n_components=None, num_dims='deprecated', init=None, + A0='deprecated', tol=None, max_iter=1000, verbose=False, + preprocessor=None, random_state=None): """ Initialize MLKR. @@ -58,14 +57,49 @@ def __init__(self, n_components=None, num_dims='deprecated', A0=None, `num_dims` was deprecated in version 0.5.0 and will be removed in 0.6.0. Use `n_components` instead. - A0: array-like, optional - Initialization of transformation matrix. Defaults to PCA loadings. + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components < min(n_features, n_samples)``, + we use 'pca', as it projects data in meaningful directions (those + of higher variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``num_dims`` is not None, n_features_a must match it. + + A0: Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. tol: float, optional (default=None) Convergence tolerance for the optimization. max_iter: int, optional - Cap on number of congugate gradient iterations. + Cap on number of conjugate gradient iterations. verbose : bool, optional (default=False) Whether to print progress messages or not. @@ -73,13 +107,21 @@ def __init__(self, n_components=None, num_dims='deprecated', A0=None, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. """ self.n_components = n_components self.num_dims = num_dims + self.init = init self.A0 = A0 self.tol = tol self.max_iter = max_iter self.verbose = verbose + self.random_state = random_state super(MLKR, self).__init__(preprocessor) def fit(self, X, y): @@ -91,11 +133,18 @@ def fit(self, X, y): X : (n x d) array of samples y : (n) data labels """ + if self.A0 != 'deprecated': + warnings.warn('"A0" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "init" instead.', + DeprecationWarning) + if self.num_dims != 'deprecated': warnings.warn('"num_dims" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0. Use "n_components" instead', DeprecationWarning) + X, y = self._prepare_inputs(X, y, y_numeric=True, ensure_min_samples=2) n, d = X.shape @@ -103,18 +152,27 @@ def fit(self, X, y): raise ValueError('Data and label lengths mismatch: %d != %d' % (n, y.shape[0])) - A = self.A0 m = _check_n_components(d, self.n_components) m = self.n_components if m is None: m = d - if A is None: - # initialize to PCA transformation matrix - # note: not the same as n_components=m ! - A = PCA().fit(X).components_.T[:m] - elif A.shape != (m, d): - raise ValueError('A0 needs shape (%d,%d) but got %s' % ( - m, d, A.shape)) + # if the init is the default (identity), we raise a warning just in case + if self.init is None: + # TODO: + # replace init=None by init='auto' in v0.6.0 and remove the warning + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'auto', instead of 'pca'. " + "If you still want to use PCA as an init, set init='pca'. " + "This warning will disappear in v0.6.0, and `init` parameter's" + " default value will be set to 'auto'.") + warnings.warn(msg, ChangedBehaviorWarning) + init = 'auto' + else: + init = self.init + A = _initialize_transformer(m, X, y, init=init, + random_state=self.random_state, + # MLKR works on regression targets: + has_classes=False) # Measure the total training time train_time = time.time() diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 0e6cd5cb..b3e6c203 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -21,11 +21,12 @@ import numpy as np from six.moves import xrange from sklearn.base import TransformerMixin -from sklearn.utils.validation import check_array, assert_all_finite +from sklearn.utils.validation import assert_all_finite +from sklearn.exceptions import ChangedBehaviorWarning from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs -from ._util import vector_norm, transformer_from_metric +from ._util import transformer_from_metric, _initialize_metric_mahalanobis class _BaseMMC(MahalanobisMixin): @@ -34,20 +35,51 @@ class _BaseMMC(MahalanobisMixin): _tuple_size = 2 # constraints are pairs def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, - A0=None, diagonal=False, diagonal_c=1.0, verbose=False, - preprocessor=None): + init=None, A0='deprecated', diagonal=False, + diagonal_c=1.0, verbose=False, preprocessor=None, + random_state=None): """Initialize MMC. Parameters ---------- max_iter : int, optional max_proj : int, optional convergence_threshold : float, optional - A0 : (d x d) matrix, optional - initial metric, defaults to identity - only the main diagonal is taken if `diagonal == True` + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + An SPD matrix of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. diagonal : bool, optional if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. diagonal_c : float, optional weight of the dissimilarity constraint for diagonal metric learning @@ -56,29 +88,49 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be gotten like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. """ self.max_iter = max_iter self.max_proj = max_proj self.convergence_threshold = convergence_threshold + self.init = init self.A0 = A0 self.diagonal = diagonal self.diagonal_c = diagonal_c self.verbose = verbose + self.random_state = random_state super(_BaseMMC, self).__init__(preprocessor) def _fit(self, pairs, y): + if self.A0 != 'deprecated': + warnings.warn('"A0" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "init" instead.', + DeprecationWarning) pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') - # init metric - if self.A0 is None: - self.A_ = np.identity(pairs.shape[2]) - if not self.diagonal: - # Don't know why division by 10... it's in the original code - # and seems to affect the overall scale of the learned metric. - self.A_ /= 10.0 + if self.init is None: + # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'identity', instead of the " + "identity divided by a scaling factor of 10. " + "If you still want to use the same init as in previous " + "versions, set init=np.eye(d)/10, where d is the dimension " + "of your input space (d=pairs.shape[1]). " + "This warning will disappear in v0.6.0, and `init` parameter's" + " default value will be set to 'auto'.") + warnings.warn(msg, ChangedBehaviorWarning) + init = 'identity' else: - self.A_ = check_array(self.A0) + init = self.init + + self.A_ = _initialize_metric_mahalanobis(pairs, init, + random_state=self.random_state, + matrix_name='init') if self.diagonal: return self._fit_diag(pairs, y) @@ -356,7 +408,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -406,15 +458,15 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_labeled='deprecated', num_constraints=None, A0=None, - diagonal=False, diagonal_c=1.0, verbose=False, - preprocessor=None): + num_labeled='deprecated', num_constraints=None, init=None, + A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, + preprocessor=None, random_state=None): """Initialize the supervised version of `MMC`. `MMC_Supervised` creates pairs of similar sample by taking same class @@ -432,9 +484,38 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, be removed in 0.6.0. num_constraints: int, optional number of constraints to generate - A0 : (d x d) matrix, optional - initial metric, defaults to identity - only the main diagonal is taken if `diagonal == True` + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A numpy array of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. diagonal : bool, optional if True, a diagonal metric will be learned, i.e., a simple scaling of dimensions @@ -446,11 +527,16 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + Mahalanobis matrix. """ _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, convergence_threshold=convergence_threshold, - A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, - verbose=verbose, preprocessor=preprocessor) + init=init, A0=A0, diagonal=diagonal, + diagonal_c=diagonal_c, verbose=verbose, + preprocessor=preprocessor, random_state=random_state) self.num_labeled = num_labeled self.num_constraints = num_constraints diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 3545aa89..1626e02f 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -19,11 +19,11 @@ import numpy as np from scipy.optimize import minimize from sklearn.metrics import pairwise_distances -from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning from sklearn.utils.fixes import logsumexp from sklearn.base import TransformerMixin -from ._util import _check_n_components +from ._util import _initialize_transformer, _check_n_components from .base_metric import MahalanobisMixin EPS = np.finfo(float).eps @@ -41,12 +41,55 @@ class NCA(MahalanobisMixin, TransformerMixin): The learned linear transformation ``L``. """ - def __init__(self, n_components=None, num_dims='deprecated', max_iter=100, - tol=None, verbose=False, preprocessor=None): + def __init__(self, init=None, n_components=None, num_dims='deprecated', + max_iter=100, tol=None, verbose=False, preprocessor=None, + random_state=None): """Neighborhood Components Analysis Parameters ---------- + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). @@ -64,12 +107,20 @@ def __init__(self, n_components=None, num_dims='deprecated', max_iter=100, verbose : bool, optional (default=False) Whether to print progress messages or not. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. """ self.n_components = n_components + self.init = init self.num_dims = num_dims self.max_iter = max_iter self.tol = tol self.verbose = verbose + self.random_state = random_state super(NCA, self).__init__(preprocessor) def fit(self, X, y): @@ -89,9 +140,22 @@ def fit(self, X, y): # Measure the total training time train_time = time.time() - # Initialize A to a scaling matrix - A = np.zeros((n_components, d)) - np.fill_diagonal(A, 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + # Initialize A + # if the init is the default (auto), we raise a warning just in case + if self.init is None: + # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'auto', instead of the " + "previous scaling matrix. same scaling matrix as before as an " + "init, set init=np.eye(X.shape[1])/" + "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning " + "will disappear in v0.6.0, and `init` parameter's default value " + "will be set to 'auto'.") + warnings.warn(msg, ChangedBehaviorWarning) + init = 'auto' + else: + init = self.init + A = _initialize_transformer(n_components, X, labels, init, self.verbose) # Run NCA mask = labels[:, np.newaxis] == labels[np.newaxis, :] diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 73eeefb7..b83c553d 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -18,11 +18,11 @@ from sklearn.base import TransformerMixin from scipy.linalg import pinvh from sklearn.covariance import graphical_lasso -from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric +from ._util import transformer_from_metric, _initialize_metric_mahalanobis try: from inverse_covariance import quic except ImportError: @@ -35,8 +35,9 @@ class _BaseSDML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - verbose=False, preprocessor=None): + def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, + use_cov='deprecated', verbose=False, preprocessor=None, + random_state=None): """ Parameters ---------- @@ -46,8 +47,34 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, sparsity_param : float, optional trade off between optimizer and sparseness (see graph_lasso) - use_cov : bool, optional - controls prior matrix, will use the identity if use_cov=False + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + use_cov : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. verbose : bool, optional if True, prints information while learning @@ -55,14 +82,25 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be gotten like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. """ self.balance_param = balance_param self.sparsity_param = sparsity_param + self.prior = prior self.use_cov = use_cov self.verbose = verbose + self.random_state = random_state super(_BaseSDML, self).__init__(preprocessor) def _fit(self, pairs, y): + if self.use_cov != 'deprecated': + warnings.warn('"use_cov" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "prior" instead.', + DeprecationWarning) if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") @@ -73,11 +111,26 @@ def _fit(self, pairs, y): type_of_inputs='tuples') # set up (the inverse of) the prior M - if self.use_cov: - X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) + # if the prior is the default (identity), we raise a warning just in case + if self.prior is None: + # TODO: + # replace prior=None by prior='identity' in v0.6.0 and remove the + # warning + msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " + "the default prior will now be set to " + "'identity', instead of 'covariance'. If you still want to use " + "the inverse of the covariance matrix as a prior, " + "set prior='covariance'. This warning will disappear in " + "v0.6.0, and `prior` parameter's default value will be set to " + "'identity'.") + warnings.warn(msg, ChangedBehaviorWarning) + prior = 'identity' else: - prior_inv = np.identity(pairs.shape[2]) + prior = self.prior + _, prior_inv = _initialize_metric_mahalanobis(pairs, prior, + return_inverse=True, + strict_pd=True, + matrix_name='prior') diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix @@ -92,7 +145,7 @@ def _fit(self, pairs, y): "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_cov=False.", + "`balance_param` and/or to set prior='identity'.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems @@ -139,7 +192,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) @@ -187,27 +240,56 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) """ - def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - num_labeled='deprecated', num_constraints=None, verbose=False, - preprocessor=None): + def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, + use_cov='deprecated', num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None, + random_state=None): """Initialize the supervised version of `SDML`. `SDML_Supervised` creates pairs of similar sample by taking same class samples, and pairs of dissimilar samples by taking different class samples. It then passes these pairs to `SDML` for training. + Parameters ---------- balance_param : float, optional trade off between sparsity and M0 prior sparsity_param : float, optional trade off between optimizer and sparseness (see graph_lasso) - use_cov : bool, optional - controls prior matrix, will use the identity if use_cov=False + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + use_cov : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + num_labeled : Not used .. deprecated:: 0.5.0 `num_labeled` was deprecated in version 0.5.0 and will @@ -219,10 +301,15 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. """ _BaseSDML.__init__(self, balance_param=balance_param, - sparsity_param=sparsity_param, use_cov=use_cov, - verbose=verbose, preprocessor=preprocessor) + sparsity_param=sparsity_param, prior=prior, + use_cov=use_cov, verbose=verbose, + preprocessor=preprocessor, random_state=random_state) self.num_labeled = num_labeled self.num_constraints = num_constraints diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 969bd7e5..18643363 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -5,11 +5,12 @@ from scipy.optimize import check_grad, approx_fprime from six.moves import xrange from sklearn.metrics import pairwise_distances -from sklearn.datasets import load_iris, make_classification, make_regression +from sklearn.datasets import (load_iris, make_classification, make_regression, + make_spd_matrix) from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_allclose) from sklearn.utils.testing import assert_warns_message -from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning from sklearn.utils.validation import check_X_y try: from inverse_covariance import quic @@ -19,7 +20,7 @@ HAS_SKGGM = True from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, RCA, LSML_Supervised, ITML_Supervised, SDML_Supervised, - RCA_Supervised, MMC_Supervised, SDML, ITML) + RCA_Supervised, MMC_Supervised, SDML, ITML, LSML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN, _sum_outer_products @@ -92,6 +93,31 @@ def test_deprecation_num_labeled(self): ' removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) + def test_changed_behaviour_warning(self): + # test that a ChangedBehavior warning is thrown about the init, if the + # default parameters are used. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " + "the default prior will now be set to " + "'identity', instead of 'covariance'. If you still want to use " + "the inverse of the covariance matrix as a prior, " + "set prior='covariance'. This warning will disappear in " + "v0.6.0, and `prior` parameter's default value will be set to " + "'identity'.") + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + lsml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + pairs = np.array([[[-10., 0.], [10., 0.], [-5., 3.], [5., 0.]], + [[0., 50.], [0., -60], [-10., 0.], [10., 0.]]]) + lsml = LSML() + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + lsml.fit(pairs) + assert any(msg == str(wrn.message) for wrn in raised_warning) + class TestITML(MetricTestCase): def test_iris(self): @@ -126,6 +152,27 @@ def test_deprecation_bounds(self): 'fit method instead.') assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) + def test_deprecation_A0(self): + # test that a deprecation message is thrown if A0 is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised(A0=np.ones_like(X)) + msg = ('"A0" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "prior" instead.') + with pytest.warns(DeprecationWarning) as raised_warning: + itml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + itml = ITML(A0=np.ones_like(X)) + with pytest.warns(DeprecationWarning) as raised_warning: + itml.fit(pairs, y_pairs) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -190,10 +237,10 @@ def test_loss_grad_lbfgs(self): X, y = lmnn._prepare_inputs(X, y, dtype=float, ensure_min_samples=2) - num_pts, num_dims = X.shape + num_pts, n_components = X.shape unique_labels, label_inds = np.unique(y, return_inverse=True) lmnn.labels_ = np.arange(len(unique_labels)) - lmnn.transformer_ = np.eye(num_dims) + lmnn.transformer_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) @@ -243,10 +290,10 @@ def test_toy_ex_lmnn(X, y, loss): X, y = lmnn._prepare_inputs(X, y, dtype=float, ensure_min_samples=2) - num_pts, num_dims = X.shape + num_pts, n_components = X.shape unique_labels, label_inds = np.unique(y, return_inverse=True) lmnn.labels_ = np.arange(len(unique_labels)) - lmnn.transformer_ = np.eye(num_dims) + lmnn.transformer_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) @@ -336,7 +383,7 @@ def test_sdml_raises_warning_msg_not_installed_skggm(self): # because it will return a non SPD matrix pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] - sdml = SDML(use_cov=False, balance_param=100, verbose=True) + sdml = SDML(prior='identity', balance_param=100, verbose=True) msg = ("There was a problem in SDML when using scikit-learn's graphical " "lasso solver. skggm's graphical lasso can sometimes converge on " @@ -352,14 +399,14 @@ def test_sdml_raises_warning_msg_not_installed_skggm(self): "installed.") def test_sdml_raises_warning_msg_installed_skggm(self): """Tests that the right warning message is raised if someone tries to - use SDML but has not installed skggm, and that the algorithm fails to + use SDML and has installed skggm, and that the algorithm fails to converge""" # TODO: remove if we don't need skggm anymore # case on which we know that skggm's graphical lasso fails # because it will return non finite values pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] - sdml = SDML(use_cov=False, balance_param=100, verbose=True) + sdml = SDML(prior='identity', balance_param=100, verbose=True) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") @@ -382,7 +429,7 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self): # pathological case) X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) y = [0, 0, 1, 1] - sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=False, + sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity', sparsity_param=0.01) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") @@ -395,25 +442,27 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self): "that no warning should be thrown.") def test_raises_no_warning_installed_skggm(self): # otherwise we should be able to instantiate and fit SDML and it - # should raise no warning + # should raise no error and no ConvergenceWarning pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) - with pytest.warns(None) as record: - sdml = SDML() + with pytest.warns(None) as records: + sdml = SDML(prior='covariance') sdml.fit(pairs, y_pairs) - assert len(record) == 0 - with pytest.warns(None) as record: - sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) - sdml.fit(X, y) - assert len(record) == 0 + for record in records: + assert record.category is not ConvergenceWarning + with pytest.warns(None) as records: + sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5) + sdml_supervised.fit(X, y) + for record in records: + assert record.category is not ConvergenceWarning def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! rs = np.random.RandomState(5555) - sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + sdml = SDML_Supervised(num_constraints=1500, prior='identity', balance_param=5e-5) sdml.fit(self.iris_points, self.iris_labels, random_state=rs) csep = class_separation(sdml.transform(self.iris_points), @@ -425,7 +474,7 @@ def test_deprecation_num_labeled(self): # initialization # TODO: remove in v.0.6 X, y = make_classification(random_state=42) - sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, + sdml_supervised = SDML_Supervised(num_labeled=np.inf, prior='identity', balance_param=5e-5) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' @@ -437,12 +486,12 @@ def test_sdml_raises_warning_non_psd(self): pseudo-covariance matrix is not PSD""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y = [1, -1] - sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + sdml = SDML(prior='covariance', sparsity_param=0.01, balance_param=0.5) msg = ("Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_cov=False.") + "`balance_param` and/or to set prior='identity'.") with pytest.warns(ConvergenceWarning) as raised_warning: try: sdml.fit(pairs, y) @@ -457,7 +506,7 @@ def test_sdml_converges_if_psd(self): pseudo-covariance matrix is PSD""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y = [1, -1] - sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + sdml = SDML(prior='covariance', sparsity_param=0.01, balance_param=0.5) sdml.fit(pairs, y) assert np.isfinite(sdml.get_mahalanobis_matrix()).all() @@ -470,8 +519,56 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): it should work, but scikit-learn's graphical_lasso does not work""" X, y = load_iris(return_X_y=True) sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, - use_cov=True) - sdml.fit(X, y) + prior='covariance') + sdml.fit(X, y, random_state=np.random.RandomState(42)) + + def test_deprecation_use_cov(self): + # test that a deprecation message is thrown if use_cov is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + sdml_supervised = SDML_Supervised(use_cov=np.ones_like(X), + balance_param=1e-5) + msg = ('"use_cov" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "prior" instead.') + with pytest.warns(DeprecationWarning) as raised_warning: + sdml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(use_cov=np.ones_like(X), balance_param=1e-5) + with pytest.warns(DeprecationWarning) as raised_warning: + sdml.fit(pairs, y_pairs) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning(self): + # test that a ChangedBehavior warning is thrown about the init, if the + # default parameters are used (except for the balance_param that we need + # to set for the algorithm to not diverge) + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + sdml_supervised = SDML_Supervised(balance_param=1e-5) + msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " + "the default prior will now be set to " + "'identity', instead of 'covariance'. If you still want to use " + "the inverse of the covariance matrix as a prior, " + "set prior='covariance'. This warning will disappear in " + "v0.6.0, and `prior` parameter's default value will be set to " + "'identity'.") + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + sdml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(balance_param=1e-5) + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + sdml.fit(pairs, y_pairs) + assert any(msg == str(wrn.message) for wrn in raised_warning) @pytest.mark.skipif(not HAS_SKGGM, @@ -483,7 +580,7 @@ def test_verbose_has_installed_skggm_sdml(capsys): # TODO: remove if we don't need skggm anymore pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] - sdml = SDML(verbose=True) + sdml = SDML(verbose=True, prior='covariance') sdml.fit(pairs, y_pairs) out, _ = capsys.readouterr() assert "SDML will use skggm's graphical lasso solver." in out @@ -496,8 +593,8 @@ def test_verbose_has_installed_skggm_sdml_supervised(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore - X, y = make_classification(random_state=42) - sdml = SDML_Supervised(verbose=True) + X, y = load_iris(return_X_y=True) + sdml = SDML_Supervised(verbose=True, prior='identity', balance_param=1e-5) sdml.fit(X, y) out, _ = capsys.readouterr() assert "SDML will use skggm's graphical lasso solver." in out @@ -512,7 +609,7 @@ def test_verbose_has_not_installed_skggm_sdml(capsys): # TODO: remove if we don't need skggm anymore pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] - sdml = SDML(verbose=True) + sdml = SDML(verbose=True, prior='covariance') sdml.fit(pairs, y_pairs) out, _ = capsys.readouterr() assert "SDML will use scikit-learn's graphical lasso solver." in out @@ -526,7 +623,7 @@ def test_verbose_has_not_installed_skggm_sdml_supervised(capsys): # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore X, y = make_classification(random_state=42) - sdml = SDML_Supervised(verbose=True, balance_param=1e-5, use_cov=False) + sdml = SDML_Supervised(verbose=True, balance_param=1e-5, prior='identity') sdml.fit(X, y) out, _ = capsys.readouterr() assert "SDML will use scikit-learn's graphical lasso solver." in out @@ -622,11 +719,8 @@ def test_singleton_class(self): X = X[[ind_0[0], ind_1[0], ind_2[0]]] y = y[[ind_0[0], ind_1[0], ind_2[0]]] - EPS = np.finfo(float).eps - A = np.zeros((X.shape[1], X.shape[1])) - np.fill_diagonal(A, - 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, n_components=X.shape[1]) + A = make_spd_matrix(X.shape[1], X.shape[1]) + nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.transformer_, A) @@ -635,18 +729,34 @@ def test_one_class(self): # must stay like the initialization X = self.iris_points[self.iris_labels == 0] y = self.iris_labels[self.iris_labels == 0] - EPS = np.finfo(float).eps - A = np.zeros((X.shape[1], X.shape[1])) - np.fill_diagonal(A, - 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, n_components=X.shape[1]) + + A = make_spd_matrix(X.shape[1], X.shape[1]) + nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.transformer_, A) + def test_changed_behaviour_warning(self): + # test that a ChangedBehavior warning is thrown about the init, if the + # default parameters are used. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA() + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'auto', instead of the " + "previous scaling matrix. same scaling matrix as before as an " + "init, set init=np.eye(X.shape[1])/" + "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning will" + " disappear in v0.6.0, and `init` parameter's default value will " + "be set to 'auto'.") + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + nca.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_nca(num_dims): - # test that a deprecation message is thrown if num_labeled is set at + # test that a deprecation message is thrown if num_dims is set at # initialization # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) @@ -674,7 +784,7 @@ def test_iris(self): @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_lfda(num_dims): - # test that a deprecation message is thrown if num_labeled is set at + # test that a deprecation message is thrown if num_dims is set at # initialization # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) @@ -715,7 +825,7 @@ def test_feature_null_variance(self): @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): - # test that a deprecation message is thrown if num_labeled is set at + # test that a deprecation message is thrown if num_dims is set at # initialization # TODO: remove in v.0.6 X, y = load_iris(return_X_y=True) @@ -767,10 +877,40 @@ def grad_fn(M): rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M)) np.testing.assert_almost_equal(rel_diff, 0.) + def test_deprecation_A0(self): + # test that a deprecation message is thrown if A0 is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mlkr = MLKR(A0=np.ones_like(X)) + msg = ('"A0" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "init" instead.') + with pytest.warns(DeprecationWarning) as raised_warning: + mlkr.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning(self): + # test that a ChangedBehavior warning is thrown about the init, if the + # default parameters are used. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([0.1, 0.2, 0.3, 0.4]) + mlkr = MLKR() + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'auto', instead of 'pca'. " + "If you still want to use PCA as an init, set init='pca'. " + "This warning will disappear in v0.6.0, and `init` parameter's" + " default value will be set to 'auto'.") + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + mlkr.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_mlkr(num_dims): - # test that a deprecation message is thrown if num_labeled is set at + # test that a deprecation message is thrown if num_dims is set at # initialization # TODO: remove in v.0.6 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) @@ -794,8 +934,9 @@ def test_iris(self): c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric - mmc = MMC(convergence_threshold=0.01) - mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) + n_features = self.iris_points.shape[1] + mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10) + mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, +0.002782, +0.003964], @@ -834,6 +975,53 @@ def test_deprecation_num_labeled(self): ' removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) + def test_deprecation_A0(self): + # test that a deprecation message is thrown if A0 is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised(A0=np.ones_like(X)) + msg = ('"A0" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. Use "init" instead.') + with pytest.warns(DeprecationWarning) as raised_warning: + mmc_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + mmc = MMC(A0=np.ones_like(X)) + with pytest.warns(DeprecationWarning) as raised_warning: + mmc.fit(pairs, y_pairs) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning(self): + # test that a ChangedBehavior warning is thrown about the init, if the + # default parameters are used. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'identity', instead of the " + "identity divided by a scaling factor of 10. " + "If you still want to use the same init as in previous " + "versions, set init=np.eye(d)/10, where d is the dimension " + "of your input space (d=pairs.shape[1]). " + "This warning will disappear in v0.6.0, and `init` parameter's" + " default value will be set to 'auto'.") + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + mmc_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + mmc = MMC() + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + mmc.fit(pairs, y_pairs) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 7706b1e4..1b312b35 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -21,62 +21,67 @@ def test_covariance(self): def test_lmnn(self): self.assertRegexpMatches( str(metric_learn.LMNN()), - r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " - r"max_iter=1000,\s+min_iter=50, n_components=None, " - r"num_dims='deprecated',\s+preprocessor=None, " - r"regularization=0.5, use_pca=True, verbose=False\)") + r"(python_)?LMNN\(convergence_tol=0.001, init='auto', k=3, " + r"learn_rate=1e-07,\s+" + r"max_iter=1000, min_iter=50, n_components=None, " + r"num_dims='deprecated',\s+preprocessor=None, random_state=None, " + r"regularization=0.5,\s+use_pca=True, verbose=False\)") def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA())), - remove_spaces( - "NCA(max_iter=100, n_components=None, " - "num_dims='deprecated', preprocessor=None, " - "tol=None, verbose=False)")) + remove_spaces("NCA(init=None, max_iter=100," + "n_components=None, " + "num_dims='deprecated', " + "preprocessor=None, random_state=None, " + "tol=None, verbose=False)")) def test_lfda(self): self.assertEqual(remove_spaces(str(metric_learn.LFDA())), remove_spaces( - "LFDA(embedding_type='weighted', k=None, " - "n_components=None, num_dims='deprecated'," - "preprocessor=None)")) + "LFDA(embedding_type='weighted', k=None, " + "n_components=None, num_dims='deprecated'," + "preprocessor=None)")) def test_itml(self): self.assertEqual(remove_spaces(str(metric_learn.ITML())), remove_spaces(""" -ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000, - preprocessor=None, verbose=False) +ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0, + max_iter=1000, preprocessor=None, prior='identity', random_state=None, + verbose=False) """)) self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())), remove_spaces(""" -ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001, - gamma=1.0, max_iter=1000, num_constraints=None, - num_labeled='deprecated', preprocessor=None, verbose=False) +ITML_Supervised(A0='deprecated', bounds='deprecated', + convergence_threshold=0.001, gamma=1.0, + max_iter=1000, num_constraints=None, num_labeled='deprecated', + preprocessor=None, prior='identity', random_state=None, verbose=False) """)) def test_lsml(self): - self.assertEqual( - remove_spaces(str(metric_learn.LSML())), - remove_spaces( - "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, " - "verbose=False)")) + self.assertEqual(remove_spaces(str(metric_learn.LSML())), + remove_spaces(""" +LSML(max_iter=1000, preprocessor=None, prior=None, + random_state=None, tol=0.001, verbose=False) +""")) self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())), remove_spaces(""" -LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated', - preprocessor=None, prior=None, tol=0.001, verbose=False, - weights=None) +LSML_Supervised(max_iter=1000, num_constraints=None, + num_labeled='deprecated', preprocessor=None, prior=None, + random_state=None, tol=0.001, verbose=False, weights=None) """)) def test_sdml(self): self.assertEqual(remove_spaces(str(metric_learn.SDML())), - remove_spaces( - "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True," - "\n verbose=False)")) + remove_spaces(""" +SDML(balance_param=0.5, preprocessor=None, prior=None, random_state=None, + sparsity_param=0.01, use_cov='deprecated', verbose=False) +""")) self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())), remove_spaces(""" SDML_Supervised(balance_param=0.5, num_constraints=None, - num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, - use_cov=True, verbose=False) + num_labeled='deprecated', preprocessor=None, prior=None, + random_state=None, sparsity_param=0.01, use_cov='deprecated', + verbose=False) """)) def test_rca(self): @@ -94,22 +99,26 @@ def test_rca(self): def test_mlkr(self): self.assertEqual(remove_spaces(str(metric_learn.MLKR())), - remove_spaces( - "MLKR(A0=None, max_iter=1000, n_components=None, " - "num_dims='deprecated', " - "preprocessor=None, tol=None, verbose=False)")) + remove_spaces("MLKR(A0='deprecated', init=None," + "max_iter=1000, n_components=None," + "num_dims='deprecated', preprocessor=None," + "random_state=None, tol=None, " + "verbose=False)" + )) def test_mmc(self): self.assertEqual(remove_spaces(str(metric_learn.MMC())), remove_spaces(""" -MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, - max_iter=100, max_proj=10000, preprocessor=None, verbose=False) +MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, + diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, + preprocessor=None, random_state=None, verbose=False) """)) self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())), remove_spaces(""" -MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, - diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, - num_labeled='deprecated', preprocessor=None, verbose=False) +MMC_Supervised(A0='deprecated', convergence_threshold=1e-06, diagonal=False, + diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, + num_constraints=None, num_labeled='deprecated', preprocessor=None, + random_state=None, verbose=False) """)) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 5e8a87f4..b7255ea9 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -65,13 +65,13 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - use_cov=False) + prior='identity') sdml.fit(self.X, self.y, random_state=seed) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - use_cov=False) + prior='identity') res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e7fa5b17..54c37936 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -2,19 +2,24 @@ import pytest import numpy as np +from numpy.linalg import LinAlgError from numpy.testing import assert_array_almost_equal, assert_allclose from scipy.spatial.distance import pdist, squareform, mahalanobis +from scipy.stats import ortho_group from sklearn import clone from sklearn.cluster import DBSCAN +from sklearn.datasets import make_spd_matrix from sklearn.utils import check_random_state +from sklearn.utils.multiclass import type_of_target from sklearn.utils.testing import set_random_state from metric_learn._util import make_context from metric_learn.base_metric import (_QuadrupletsClassifierMixin, _PairsClassifierMixin) +from metric_learn.exceptions import NonPSDError from test.test_utils import (ids_metric_learners, metric_learners, - remove_y_quadruplets) + remove_y_quadruplets, ids_classifiers) RNG = check_random_state(0) @@ -56,7 +61,7 @@ def test_score_pairs_toy_example(estimator, build_dataset): pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.transformer_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - - embedded_pairs[:, 0])**2, + embedded_pairs[:, 0])**2, axis=-1)) assert_array_almost_equal(model.score_pairs(pairs), distances) @@ -190,7 +195,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, a, b = (rng.randn(n_features), rng.randn(n_features)) expected_dist = mahalanobis(a[None], b[None], VI=model.get_mahalanobis_matrix()) - assert_allclose(metric(a, b), expected_dist, rtol=1e-15) + assert_allclose(metric(a, b), expected_dist, rtol=1e-13) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, @@ -300,3 +305,349 @@ def test_transformer_is_2D(estimator, build_dataset): labels = labels[to_keep] model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) assert model.transformer_.shape == (1, 1) # the transformer must be 2D + + +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if hasattr(ml, 'n_components') and + hasattr(ml, 'init')], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if hasattr(ml, 'n_components') and + hasattr(ml, 'init')]) +def test_init_transformation(estimator, build_dataset): + input_data, labels, _, X = build_dataset() + is_classification = (type_of_target(labels) in ['multiclass', 'binary']) + model = clone(estimator) + rng = np.random.RandomState(42) + + # Start learning from scratch + model.set_params(init='identity') + model.fit(input_data, labels) + + # Initialize with random + model.set_params(init='random') + model.fit(input_data, labels) + + # Initialize with auto + model.set_params(init='auto') + model.fit(input_data, labels) + + # Initialize with PCA + model.set_params(init='pca') + model.fit(input_data, labels) + + # Initialize with LDA + if is_classification: + model.set_params(init='lda') + model.fit(input_data, labels) + + # Initialize with a numpy array + init = rng.rand(X.shape[1], X.shape[1]) + model.set_params(init=init) + model.fit(input_data, labels) + + # init.shape[1] must match X.shape[1] + init = rng.rand(X.shape[1], X.shape[1] + 1) + model.set_params(init=init) + msg = ('The input dimensionality ({}) of the given ' + 'linear transformation `init` must match the ' + 'dimensionality of the given inputs `X` ({}).' + .format(init.shape[1], X.shape[1])) + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + # init.shape[0] must be <= init.shape[1] + init = rng.rand(X.shape[1] + 1, X.shape[1]) + model.set_params(init=init) + msg = ('The output dimensionality ({}) of the given ' + 'linear transformation `init` cannot be ' + 'greater than its input dimensionality ({}).' + .format(init.shape[0], init.shape[1])) + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + # init.shape[0] must match n_components + init = rng.rand(X.shape[1], X.shape[1]) + n_components = X.shape[1] - 1 + model.set_params(init=init, n_components=n_components) + msg = ('The preferred dimensionality of the ' + 'projected space `n_components` ({}) does not match ' + 'the output dimensionality of the given ' + 'linear transformation `init` ({})!' + .format(n_components, init.shape[0])) + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + # init must be as specified in the docstring + model.set_params(init=1) + msg = ("`init` must be 'auto', 'pca', 'identity', " + "'random'{} or a numpy array of shape " + "(n_components, n_features)." + .format(", 'lda'" if is_classification else '')) + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('n_samples', [3, 5, 7, 11]) +@pytest.mark.parametrize('n_features', [3, 5, 7, 11]) +@pytest.mark.parametrize('n_classes', [5, 7, 11]) +@pytest.mark.parametrize('n_components', [3, 5, 7, 11]) +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if hasattr(ml, 'n_components') and + hasattr(ml, 'init')], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if hasattr(ml, 'n_components') and + hasattr(ml, 'init')]) +def test_auto_init_transformation(n_samples, n_features, n_classes, + n_components, estimator, build_dataset): + # Test that auto choose the init transformation as expected with every + # configuration of order of n_samples, n_features, n_classes and + # n_components, for all metric learners that learn a transformation. + if n_classes >= n_samples: + pass + # n_classes > n_samples is impossible, and n_classes == n_samples + # throws an error from lda but is an absurd case + else: + input_data, labels, _, X = build_dataset() + model_base = clone(estimator) + rng = np.random.RandomState(42) + model_base.set_params(init='auto', + n_components=n_components, + random_state=rng) + # To make the test work for LMNN: + if 'LMNN' in model_base.__class__.__name__: + model_base.set_params(k=1) + # To make the test faster for estimators that have a max_iter: + if hasattr(model_base, 'max_iter'): + model_base.set_params(max_iter=1) + if n_components > n_features: + # this would return a ValueError, which is tested in + # test_init_transformation + pass + else: + # We need to build a dataset of the right shape: + num_to_pad_n_samples = ((n_samples // input_data.shape[0] + 1)) + num_to_pad_n_features = ((n_features // input_data.shape[-1] + 1)) + if input_data.ndim == 3: + input_data = np.tile(input_data, + (num_to_pad_n_samples, input_data.shape[1], + num_to_pad_n_features)) + else: + input_data = np.tile(input_data, + (num_to_pad_n_samples, num_to_pad_n_features)) + input_data = input_data[:n_samples, ..., :n_features] + assert input_data.shape[0] == n_samples + assert input_data.shape[-1] == n_features + has_classes = model_base.__class__.__name__ in ids_classifiers + if has_classes: + labels = np.tile(range(n_classes), n_samples // + n_classes + 1)[:n_samples] + else: + labels = np.tile(labels, n_samples // labels.shape[0] + 1)[:n_samples] + model = clone(model_base) + model.fit(input_data, labels) + if n_components <= min(n_classes - 1, n_features) and has_classes: + model_other = clone(model_base).set_params(init='lda') + elif n_components < min(n_features, n_samples): + model_other = clone(model_base).set_params(init='pca') + else: + model_other = clone(model_base).set_params(init='identity') + model_other.fit(input_data, labels) + assert_array_almost_equal(model.transformer_, + model_other.transformer_) + + +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if not hasattr(ml, 'n_components') and + hasattr(ml, 'init')], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if not hasattr(ml, 'n_components') and + hasattr(ml, 'init')]) +def test_init_mahalanobis(estimator, build_dataset): + """Tests that for estimators that learn a mahalanobis matrix + instead of a transformer, i.e. those that are mahalanobis metric learners + where we can change the init, but not choose the n_components, + (TODO: be more explicit on this characterization, for instance with + safe_flags like in scikit-learn) that the init has an expected behaviour. + """ + input_data, labels, _, X = build_dataset() + + matrices_to_set = [] + if hasattr(estimator, 'init'): + matrices_to_set.append('init') + if hasattr(estimator, 'prior'): + matrices_to_set.append('prior') + + for param in matrices_to_set: + model = clone(estimator) + set_random_state(model) + rng = np.random.RandomState(42) + + # Start learning from scratch + model.set_params(**{param: 'identity'}) + model.fit(input_data, labels) + + # Initialize with random + model.set_params(**{param: 'random'}) + model.fit(input_data, labels) + + # Initialize with covariance + model.set_params(**{param: 'covariance'}) + model.fit(input_data, labels) + + # Initialize with a random spd matrix + init = make_spd_matrix(X.shape[1], random_state=rng) + model.set_params(**{param: init}) + model.fit(input_data, labels) + + # init.shape[1] must match X.shape[1] + init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1) + model.set_params(**{param: init}) + msg = ('The input dimensionality {} of the given ' + 'mahalanobis matrix `{}` must match the ' + 'dimensionality of the given inputs ({}).' + .format(init.shape, param, input_data.shape[-1])) + + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + # The input matrix must be symmetric + init = rng.rand(X.shape[1], X.shape[1]) + model.set_params(**{param: init}) + msg = ("`{}` is not symmetric.".format(param)) + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + # The input matrix must be SPD + P = ortho_group.rvs(X.shape[1], random_state=rng) + w = np.abs(rng.randn(X.shape[1])) + w[0] = -10. + M = P.dot(np.diag(w)).dot(P.T) + model.set_params(**{param: M}) + msg = ("Matrix is not positive semidefinite (PSD).") + with pytest.raises(NonPSDError) as raised_err: + model.fit(input_data, labels) + assert str(raised_err.value) == msg + + # init must be as specified in the docstring + model.set_params(**{param: 1}) + msg = ("`{}` must be 'identity', 'covariance', " + "'random' or a numpy array of shape " + "(n_features, n_features).".format(param)) + with pytest.raises(ValueError) as raised_error: + model.fit(input_data, labels) + assert str(raised_error.value) == msg + + +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if idml[:4] in ['ITML', 'SDML', 'LSML']], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if idml[:4] in ['ITML', 'SDML', 'LSML']]) +def test_singular_covariance_init_or_prior(estimator, build_dataset): + """Tests that when using the 'covariance' init or prior, it returns the + appropriate error if the covariance matrix is singular, for algorithms + that need a strictly PD prior or init (see + https://github.com/metric-learn/metric-learn/issues/202 and + https://github.com/metric-learn/metric-learn/pull/195#issuecomment + -492332451) + """ + matrices_to_set = [] + if hasattr(estimator, 'init'): + matrices_to_set.append('init') + if hasattr(estimator, 'prior'): + matrices_to_set.append('prior') + + input_data, labels, _, X = build_dataset() + for param in matrices_to_set: + model = clone(estimator) + set_random_state(model) + # We create a feature that is a linear combination of the first two + # features: + input_data = np.concatenate([input_data, input_data[:, ..., :2] + .dot([[2], [3]])], + axis=-1) + model.set_params(**{param: 'covariance'}) + msg = ("Unable to get a true inverse of the covariance " + "matrix since it is not definite. Try another " + "`{}`, or an algorithm that does not " + "require the `{}` to be strictly positive definite." + .format(param, param)) + with pytest.raises(LinAlgError) as raised_err: + model.fit(input_data, labels) + assert str(raised_err.value) == msg + + +@pytest.mark.integration +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if idml[:4] in ['ITML', 'SDML', 'LSML']], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if idml[:4] in ['ITML', 'SDML', 'LSML']]) +@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) +def test_singular_array_init_or_prior(estimator, build_dataset, w0): + """Tests that when using a custom array init (or prior), it returns the + appropriate error if it is singular, for algorithms + that need a strictly PD prior or init (see + https://github.com/metric-learn/metric-learn/issues/202 and + https://github.com/metric-learn/metric-learn/pull/195#issuecomment + -492332451) + """ + matrices_to_set = [] + if hasattr(estimator, 'init'): + matrices_to_set.append('init') + if hasattr(estimator, 'prior'): + matrices_to_set.append('prior') + + rng = np.random.RandomState(42) + input_data, labels, _, X = build_dataset() + for param in matrices_to_set: + model = clone(estimator) + set_random_state(model) + + P = ortho_group.rvs(X.shape[1], random_state=rng) + w = np.abs(rng.randn(X.shape[1])) + w[0] = w0 + M = P.dot(np.diag(w)).dot(P.T) + if hasattr(model, 'init'): + model.set_params(init=M) + if hasattr(model, 'prior'): + model.set_params(prior=M) + if not hasattr(model, 'prior') and not hasattr(model, 'init'): + raise RuntimeError("Neither prior or init could be set in the model.") + msg = ("You should provide a strictly positive definite " + "matrix as `{}`. This one is not definite. Try another" + " {}, or an algorithm that does not " + "require the {} to be strictly positive definite." + .format(*(param,) * 3)) + with pytest.raises(LinAlgError) as raised_err: + model.fit(input_data, labels) + assert str(raised_err.value) == msg diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 6b451aee..0c0f098d 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -85,15 +85,15 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', num_constraints=num_constraints, verbose=verbose, preprocessor=preprocessor, - balance_param=1e-5, use_cov=False) + balance_param=1e-5, prior='identity') dSDML.__init__ = stable_init check_estimator(dSDML) def test_rca(self): - def stable_init(self, num_dims=None, pca_comps=None, + def stable_init(self, n_components=None, pca_comps=None, chunk_size=2, preprocessor=None): # this init makes RCA stable for scikit-learn examples. - RCA_Supervised.__init__(self, num_chunks=2, num_dims=num_dims, + RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, pca_comps=pca_comps, chunk_size=chunk_size, preprocessor=preprocessor) dRCA.__init__ = stable_init diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 0139f632..651f60ea 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -11,6 +11,7 @@ LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) from metric_learn._util import transformer_from_metric +from metric_learn.exceptions import NonPSDError class TestTransformerMetricConversion(unittest.TestCase): @@ -49,7 +50,7 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + sdml = SDML_Supervised(num_constraints=1500, prior='identity', balance_param=1e-5) sdml.fit(self.X, self.y, random_state=seed) L = sdml.transformer_ @@ -162,10 +163,10 @@ def test_non_psd_raises(self): P = ortho_group.rvs(7, random_state=rng) M = P.dot(D).dot(P.T) msg = ("Matrix is not positive semidefinite (PSD).") - with pytest.raises(ValueError) as raised_error: + with pytest.raises(NonPSDError) as raised_error: transformer_from_metric(M) assert str(raised_error.value) == msg - with pytest.raises(ValueError) as raised_error: + with pytest.raises(NonPSDError) as raised_error: transformer_from_metric(D) assert str(raised_error.value) == msg diff --git a/test/test_utils.py b/test/test_utils.py index 08415a76..2e57f489 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -11,7 +11,8 @@ make_name, preprocess_points, check_collapsed_pairs, validate_vector, _check_sdp_from_eigen, _check_n_components, - check_y_valid_values_for_pairs) + check_y_valid_values_for_pairs, + _auto_select_init) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -19,7 +20,7 @@ from metric_learn.base_metric import (ArrayIndexer, MahalanobisMixin, _PairsClassifierMixin, _QuadrupletsClassifierMixin) -from metric_learn.exceptions import PreprocessorError +from metric_learn.exceptions import PreprocessorError, NonPSDError from sklearn.datasets import make_regression, make_blobs, load_iris @@ -104,7 +105,7 @@ def build_quadruplets(with_preprocessor=False): pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster - (SDML(use_cov=False, balance_param=1e-5), build_pairs)] + (SDML(prior='identity', balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) @@ -118,13 +119,13 @@ def build_quadruplets(with_preprocessor=False): (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), - (SDML_Supervised(use_cov=False, balance_param=1e-5), + (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) -regressors = [(MLKR(), build_regression)] +regressors = [(MLKR(init='pca'), build_regression)] ids_regressors = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) @@ -993,7 +994,7 @@ def test__validate_vector(): validate_vector(x) -def test_check_sdp_from_eigen_positive_err_messages(): +def test__check_sdp_from_eigen_positive_err_messages(): """Tests that if _check_sdp_from_eigen is given a negative tol it returns an error, and if positive (or None) it does not""" w = np.abs(np.random.RandomState(42).randn(10)) + 1 @@ -1008,6 +1009,37 @@ def test_check_sdp_from_eigen_positive_err_messages(): _check_sdp_from_eigen(w, None) +@pytest.mark.unit +@pytest.mark.parametrize('w', [np.array([-1.2, 5.5, 6.6]), + np.array([-1.2, -5.6])]) +def test__check_sdp_from_eigen_positive_eigenvalues(w): + """Tests that _check_sdp_from_eigen, returns a NonPSDError when + the eigenvalues are negatives or null.""" + with pytest.raises(NonPSDError): + _check_sdp_from_eigen(w) + + +@pytest.mark.unit +@pytest.mark.parametrize('w', [np.array([0., 2.3, 5.3]), + np.array([1e-20, 3.5]), + np.array([1.5, 2.4, 4.6])]) +def test__check_sdp_from_eigen_negative_eigenvalues(w): + """Tests that _check_sdp_from_eigen, returns no error when the + eigenvalues are positive.""" + _check_sdp_from_eigen(w) + + +@pytest.mark.unit +@pytest.mark.parametrize('w, is_definite', [(np.array([1e-15, 5.6]), False), + (np.array([-1e-15, 5.6]), False), + (np.array([3.2, 5.6, 0.01]), True), + ]) +def test__check_sdp_from_eigen_returns_definiteness(w, is_definite): + """Tests that _check_sdp_from_eigen returns the definiteness of the + matrix (when it is PSD), based on the given eigenvalues""" + assert _check_sdp_from_eigen(w) == is_definite + + def test__check_n_components(): """Checks that n_components returns what is expected (including the errors)""" @@ -1094,3 +1126,23 @@ def test_check_input_pairs_learners_invalid_y(estimator, build_dataset, with pytest.raises(ValueError) as raised_error: model.fit(input_data, wrong_labels) assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('has_classes, n_features, n_samples, n_components, ' + 'n_classes, result', + [(False, 3, 20, 3, 0, 'identity'), + (False, 3, 2, 3, 0, 'identity'), + (False, 5, 3, 4, 0, 'identity'), + (False, 4, 5, 3, 0, 'pca'), + (True, 5, 6, 3, 4, 'lda'), + (True, 6, 3, 3, 3, 'identity'), + (True, 5, 6, 4, 2, 'pca'), + (True, 2, 6, 2, 10, 'lda'), + (True, 4, 6, 2, 3, 'lda') + ]) +def test__auto_select_init(has_classes, n_features, n_samples, n_components, + n_classes, + result): + """Checks that the auto selection of the init works as expected""" + assert (_auto_select_init(has_classes, n_features, + n_samples, n_components, n_classes) == result) From 999cb5b6514729bdf6f1f6867d11098a1f69cab9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 12 Jun 2019 15:27:45 +0200 Subject: [PATCH 117/210] [MRG] Add ChangedBehaviorWarning message for LMNN too (#214) * Add ChangedBehaviorWarning message for LMNN too * Remove useless 'as an init' --- metric_learn/lmnn.py | 27 ++++++++++++++++++++++----- metric_learn/lsml.py | 2 +- metric_learn/mlkr.py | 2 +- metric_learn/nca.py | 12 ++++++------ metric_learn/sdml.py | 2 +- test/metric_learn_test.py | 27 ++++++++++++++++++++++----- test/test_base_metric.py | 2 +- 7 files changed, 54 insertions(+), 20 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index c2437b86..bbef122d 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -17,6 +17,7 @@ import warnings from collections import Counter from six.moves import xrange +from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin @@ -26,7 +27,7 @@ # commonality between LMNN implementations class _base_LMNN(MahalanobisMixin, TransformerMixin): - def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, + def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, n_components=None, num_dims='deprecated', random_state=None): @@ -34,10 +35,12 @@ def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, Parameters ---------- - init : string or numpy array, optional (default='auto') + init : None, string or numpy array, optional (default=None) Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). 'auto' Depending on ``n_components``, the most reasonable initialization @@ -135,7 +138,21 @@ def fit(self, X, y): if len(label_inds) != num_pts: raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) - self.transformer_ = _initialize_transformer(output_dim, X, y, self.init, + + # if the init is the default (None), we raise a warning + if self.init is None: + # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'auto', instead of the " + "previous identity matrix. If you still want to use the identity " + "matrix as before, set init='identity'. This warning " + "will disappear in v0.6.0, and `init` parameter's default value " + "will be set to 'auto'.") + warnings.warn(msg, ChangedBehaviorWarning) + init = 'auto' + else: + init = self.init + self.transformer_ = _initialize_transformer(output_dim, X, y, init, self.verbose, self.random_state) required_k = np.bincount(label_inds).min() diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 4350b003..f59392c1 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -94,7 +94,7 @@ def _fit(self, quadruplets, weights=None): else: self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 - # if the prior is the default (identity), we raise a warning just in case + # if the prior is the default (None), we raise a warning if self.prior is None: msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " "the default prior will now be set to " diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 9e9cf433..c625b67c 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -156,7 +156,7 @@ def fit(self, X, y): m = self.n_components if m is None: m = d - # if the init is the default (identity), we raise a warning just in case + # if the init is the default (None), we raise a warning if self.init is None: # TODO: # replace init=None by init='auto' in v0.6.0 and remove the warning diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 1626e02f..2b541a64 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -141,16 +141,16 @@ def fit(self, X, y): train_time = time.time() # Initialize A - # if the init is the default (auto), we raise a warning just in case + # if the init is the default (None), we raise a warning if self.init is None: # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " "the default init will now be set to 'auto', instead of the " - "previous scaling matrix. same scaling matrix as before as an " - "init, set init=np.eye(X.shape[1])/" - "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning " - "will disappear in v0.6.0, and `init` parameter's default value " - "will be set to 'auto'.") + "previous scaling matrix. If you still want to use the same " + "scaling matrix as before, set " + "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)" + ", EPS))). This warning will disappear in v0.6.0, and `init` " + "parameter's default value will be set to 'auto'.") warnings.warn(msg, ChangedBehaviorWarning) init = 'auto' else: diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index b83c553d..c5e63fa8 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -111,7 +111,7 @@ def _fit(self, pairs, y): type_of_inputs='tuples') # set up (the inverse of) the prior M - # if the prior is the default (identity), we raise a warning just in case + # if the prior is the default (None), we raise a warning if self.prior is None: # TODO: # replace prior=None by prior='identity' in v0.6.0 and remove the diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 18643363..1ec7fe4c 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -275,6 +275,23 @@ def grad(x): np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) np.testing.assert_almost_equal(rel_diff, 0., decimal=5) + def test_changed_behaviour_warning(self): + # test that a ChangedBehavior warning is thrown about the init, if the + # default parameters are used. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lmnn = LMNN(k=2) + msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " + "the default init will now be set to 'auto', instead of the " + "previous identity matrix. If you still want to use the identity " + "matrix as before, set init='identity'. This warning " + "will disappear in v0.6.0, and `init` parameter's default value " + "will be set to 'auto'.") + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + lmnn.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), [1, 1, 0, 0], 3.0), @@ -744,11 +761,11 @@ def test_changed_behaviour_warning(self): nca = NCA() msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " "the default init will now be set to 'auto', instead of the " - "previous scaling matrix. same scaling matrix as before as an " - "init, set init=np.eye(X.shape[1])/" - "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning will" - " disappear in v0.6.0, and `init` parameter's default value will " - "be set to 'auto'.") + "previous scaling matrix. If you still want to use the same " + "scaling matrix as before, set " + "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)" + ", EPS))). This warning will disappear in v0.6.0, and `init` " + "parameter's default value will be set to 'auto'.") with pytest.warns(ChangedBehaviorWarning) as raised_warning: nca.fit(X, y) assert any(msg == str(wrn.message) for wrn in raised_warning) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 1b312b35..5c258f2e 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -21,7 +21,7 @@ def test_covariance(self): def test_lmnn(self): self.assertRegexpMatches( str(metric_learn.LMNN()), - r"(python_)?LMNN\(convergence_tol=0.001, init='auto', k=3, " + r"(python_)?LMNN\(convergence_tol=0.001, init=None, k=3, " r"learn_rate=1e-07,\s+" r"max_iter=1000, min_iter=50, n_components=None, " r"num_dims='deprecated',\s+preprocessor=None, random_state=None, " From a22c2e60e71a7d033f6fd642ba9b32778cb6c043 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 12 Jun 2019 16:19:47 +0200 Subject: [PATCH 118/210] [MRG] Remove shogun dependency (#216) * Remove shogun dependency * Finalize removing of shogun LMNN * Remove LMNN useless base class --- README.rst | 7 ------ bench/benchmarks/iris.py | 9 +------- doc/getting_started.rst | 8 ------- doc/supervised.rst | 5 ----- metric_learn/lmnn.py | 46 ++------------------------------------- test/metric_learn_test.py | 18 +++++++-------- test/test_base_metric.py | 15 +++++++------ 7 files changed, 19 insertions(+), 89 deletions(-) diff --git a/README.rst b/README.rst index 32a9bb90..027e5498 100644 --- a/README.rst +++ b/README.rst @@ -41,13 +41,6 @@ package installed). See the `sphinx documentation`_ for full documentation about installation, API, usage, and examples. -**Notes** - -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. The two implementations differ slightly, and the C++ version is -more complete. - .. _sphinx documentation: http://metric-learn.github.io/metric-learn/ diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index e3390930..5973f7b8 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -13,16 +13,9 @@ 'NCA': metric_learn.NCA(max_iter=700, n_components=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), - 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), + 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500) } -try: - from metric_learn.lmnn import python_LMNN - if python_LMNN is not metric_learn.LMNN: - CLASSES['python_LMNN'] = python_LMNN(k=5, learn_rate=1e-6, verbose=False) -except ImportError: - pass - class IrisDataset(object): params = [sorted(CLASSES)] diff --git a/doc/getting_started.rst b/doc/getting_started.rst index d620e401..5a671d86 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -23,14 +23,6 @@ Alternately, download the source repository and run: (install from commit `a0ed406 `_). - For running the examples only: matplotlib -**Notes** - -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. The two implementations differ slightly, and the C++ version is -more complete. - - Quick start =========== diff --git a/doc/supervised.rst b/doc/supervised.rst index 83bf4449..c438294f 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -87,11 +87,6 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False) -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. Otherwise, the included pure-Python version will be used. -The two implementations differ slightly, and the C++ version is more complete. - .. topic:: References: .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index bbef122d..20eeea3b 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -25,8 +25,7 @@ from .base_metric import MahalanobisMixin -# commonality between LMNN implementations -class _base_LMNN(MahalanobisMixin, TransformerMixin): +class LMNN(MahalanobisMixin, TransformerMixin): def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, @@ -114,11 +113,7 @@ def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, self.n_components = n_components self.num_dims = num_dims self.random_state = random_state - super(_base_LMNN, self).__init__(preprocessor) - - -# slower Python version -class python_LMNN(_base_LMNN): + super(LMNN, self).__init__(preprocessor) def fit(self, X, y): if self.num_dims != 'deprecated': @@ -344,40 +339,3 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): if weights is not None: return np.dot(Xab.T, Xab * weights[:,None]) return np.dot(Xab.T, Xab) - - -try: - # use the fast C++ version, if available - from modshogun import LMNN as shogun_LMNN - from modshogun import RealFeatures, MulticlassLabels - - class LMNN(_base_LMNN): - """Large Margin Nearest Neighbor (LMNN) - - Attributes - ---------- - n_iter_ : `int` - The number of iterations the solver has run. - - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. - """ - - def fit(self, X, y): - X, y = self._prepare_inputs(X, y, dtype=float, - ensure_min_samples=2) - labels = MulticlassLabels(y) - self._lmnn = shogun_LMNN(RealFeatures(X.T), labels, self.k) - self._lmnn.set_maxiter(self.max_iter) - self._lmnn.set_obj_threshold(self.convergence_tol) - self._lmnn.set_regularization(self.regularization) - self._lmnn.set_stepsize(self.learn_rate) - if self.use_pca: - self._lmnn.train() - else: - self._lmnn.train(np.eye(X.shape[1])) - self.transformer_ = self._lmnn.get_linear_transform(X) - return self - -except ImportError: - LMNN = python_LMNN diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1ec7fe4c..a58b8a99 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -23,7 +23,7 @@ RCA_Supervised, MMC_Supervised, SDML, ITML, LSML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs -from metric_learn.lmnn import python_LMNN, _sum_outer_products +from metric_learn.lmnn import _sum_outer_products def class_separation(X, labels): @@ -213,14 +213,12 @@ def test_bounds_parameters_invalid(bounds): class TestLMNN(MetricTestCase): def test_iris(self): - # Test both impls, if available. - for LMNN_cls in set((LMNN, python_LMNN)): - lmnn = LMNN_cls(k=5, learn_rate=1e-6, verbose=False) - lmnn.fit(self.iris_points, self.iris_labels) + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn.fit(self.iris_points, self.iris_labels) - csep = class_separation(lmnn.transform(self.iris_points), - self.iris_labels) - self.assertLess(csep, 0.25) + csep = class_separation(lmnn.transform(self.iris_points), + self.iris_labels) + self.assertLess(csep, 0.25) def test_loss_grad_lbfgs(self): """Test gradient of loss function @@ -336,7 +334,7 @@ def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with # this issue: https://github.com/metric-learn/metric-learn/issues/88 X, y = make_classification(random_state=0) - lmnn = python_LMNN(verbose=True) + lmnn = LMNN(verbose=True) lmnn.fit(X, y) out, _ = capsys.readouterr() assert "LMNN converged with objective" in out @@ -346,7 +344,7 @@ def test_no_twice_same_objective(capsys): # test that the objective function never has twice the same value # see https://github.com/metric-learn/metric-learn/issues/88 X, y = make_classification(random_state=0) - lmnn = python_LMNN(verbose=True) + lmnn = LMNN(verbose=True) lmnn.fit(X, y) out, _ = capsys.readouterr() lines = re.split("\n+", out) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 5c258f2e..7d0c11e7 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -19,13 +19,14 @@ def test_covariance(self): remove_spaces("Covariance(preprocessor=None)")) def test_lmnn(self): - self.assertRegexpMatches( - str(metric_learn.LMNN()), - r"(python_)?LMNN\(convergence_tol=0.001, init=None, k=3, " - r"learn_rate=1e-07,\s+" - r"max_iter=1000, min_iter=50, n_components=None, " - r"num_dims='deprecated',\s+preprocessor=None, random_state=None, " - r"regularization=0.5,\s+use_pca=True, verbose=False\)") + self.assertEqual( + remove_spaces(str(metric_learn.LMNN())), + remove_spaces( + "LMNN(convergence_tol=0.001, init=None, k=3, " + "learn_rate=1e-07, " + "max_iter=1000, min_iter=50, n_components=None, " + "num_dims='deprecated', preprocessor=None, random_state=None, " + "regularization=0.5, use_pca=True, verbose=False)")) def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA())), From 85185175f356697f4a91feacaed2d3a9d70af95f Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 12 Jun 2019 17:19:04 +0200 Subject: [PATCH 119/210] [MRG] Remove preprocessing the data for RCA (#194) * Remove initialization of the data for RCA * Add deprecated flag for supervised version too * Remove comment saying we'll do PCA * Add ChangedBehaviorWarning and do tests * improve change behavior warning * Update message in case covariance matrix is not invertible * FIX: still ignore testing RCA while fixed in #198 * Some reformatting * Fix test string * TST: add test for warning message when covariance is not definite * Address https://github.com/metric-learn/metric-learn/pull/194#discussion_r292387277 --- metric_learn/rca.py | 55 ++++++++++++++++++---------------- test/metric_learn_test.py | 62 +++++++++++++++++++++++++++++++++++++-- test/test_base_metric.py | 4 +-- 3 files changed, 92 insertions(+), 29 deletions(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 45c9bbf2..1dbffdd6 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -17,6 +17,7 @@ from six.moves import xrange from sklearn import decomposition from sklearn.base import TransformerMixin +from sklearn.exceptions import ChangedBehaviorWarning from ._util import _check_n_components from .base_metric import MahalanobisMixin @@ -48,7 +49,7 @@ class RCA(MahalanobisMixin, TransformerMixin): """ def __init__(self, n_components=None, num_dims='deprecated', - pca_comps=None, preprocessor=None): + pca_comps='deprecated', preprocessor=None): """Initialize the learner. Parameters @@ -62,12 +63,10 @@ def __init__(self, n_components=None, num_dims='deprecated', `num_dims` was deprecated in version 0.5.0 and will be removed in 0.6.0. Use `n_components` instead. - pca_comps : int, float, None or string - Number of components to keep during PCA preprocessing. - If None (default), does not perform PCA. - If ``0 < pca_comps < 1``, it is used as - the minimum explained variance ratio. - See sklearn.decomposition.PCA for more details. + pca_comps : Not used + .. deprecated:: 0.5.0 + `pca_comps` was deprecated in version 0.5.0 and will + be removed in 0.6.0. preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, @@ -83,8 +82,9 @@ def _check_dimension(self, rank, X): if rank < d: warnings.warn('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' - 'You should adjust pca_comps to remove noise and ' - 'redundant information.') + 'You should reduce the dimensionality of your input,' + 'for instance using `sklearn.decomposition.PCA` as a ' + 'preprocessing step.') dim = _check_n_components(d, self.n_components) return dim @@ -105,25 +105,33 @@ def fit(self, X, chunks): ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0. Use "n_components" instead', DeprecationWarning) + + if self.pca_comps != 'deprecated': + warnings.warn( + '"pca_comps" parameter is not used. ' + 'It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' + 'you still want to do it, you could use ' + '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.', + DeprecationWarning) + X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) - # PCA projection to remove noise and redundant information. - if self.pca_comps is not None: - pca = decomposition.PCA(n_components=self.pca_comps) - X_t = pca.fit_transform(X) - M_pca = pca.components_ - else: - X_t = X - X.mean(axis=0) - M_pca = None + warnings.warn( + "RCA will no longer center the data before training. If you want " + "to do some preprocessing, you should do it manually (you can also " + "use an `sklearn.pipeline.Pipeline` for instance). This warning " + "will disappear in version 0.6.0.", ChangedBehaviorWarning) - chunk_mask, chunked_data = _chunk_mean_centering(X_t, chunks) + chunks = np.asanyarray(chunks, dtype=int) + chunk_mask, chunked_data = _chunk_mean_centering(X, chunks) inner_cov = np.atleast_2d(np.cov(chunked_data, rowvar=0, bias=1)) - dim = self._check_dimension(np.linalg.matrix_rank(inner_cov), X_t) + dim = self._check_dimension(np.linalg.matrix_rank(inner_cov), X) # Fisher Linear Discriminant projection - if dim < X_t.shape[1]: - total_cov = np.cov(X_t[chunk_mask], rowvar=0) + if dim < X.shape[1]: + total_cov = np.cov(X[chunk_mask], rowvar=0) tmp = np.linalg.lstsq(total_cov, inner_cov)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] @@ -133,9 +141,6 @@ def fit(self, X, chunks): else: self.transformer_ = _inv_sqrtm(inner_cov).T - if M_pca is not None: - self.transformer_ = np.atleast_2d(self.transformer_.dot(M_pca)) - return self @@ -155,7 +160,7 @@ class RCA_Supervised(RCA): """ def __init__(self, num_dims='deprecated', n_components=None, - pca_comps=None, num_chunks=100, chunk_size=2, + pca_comps='deprecated', num_chunks=100, chunk_size=2, preprocessor=None): """Initialize the supervised version of `RCA`. diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index a58b8a99..c49c9ef5 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -18,9 +18,10 @@ HAS_SKGGM = False else: HAS_SKGGM = True -from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, RCA, +from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, LSML_Supervised, ITML_Supervised, SDML_Supervised, - RCA_Supervised, MMC_Supervised, SDML, ITML, LSML) + RCA_Supervised, MMC_Supervised, SDML, RCA, ITML, + LSML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import _sum_outer_products @@ -837,6 +838,63 @@ def test_feature_null_variance(self): csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) + def test_deprecation_pca_comps(self): + # test that a deprecation message is thrown if pca_comps is set at + # initialization + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(pca_comps=X.shape[1], num_chunks=20) + msg = ('"pca_comps" parameter is not used. ' + 'It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' + 'you still want to do it, you could use ' + '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.') + with pytest.warns(ChangedBehaviorWarning) as expected_msg: + rca_supervised.fit(X, y) + assert str(expected_msg[0].message) == msg + + rca = RCA(pca_comps=X.shape[1]) + with pytest.warns(ChangedBehaviorWarning) as expected_msg: + rca.fit(X, y) + assert str(expected_msg[0].message) == msg + + def test_changedbehaviorwarning_preprocessing(self): + # test that a ChangedBehaviorWarning is thrown when using RCA + # TODO: remove in v.0.6 + + msg = ("RCA will no longer center the data before training. If you want " + "to do some preprocessing, you should do it manually (you can also " + "use an `sklearn.pipeline.Pipeline` for instance). This warning " + "will disappear in version 0.6.0.") + + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + with pytest.warns(ChangedBehaviorWarning) as expected_msg: + rca_supervised.fit(X, y) + assert str(expected_msg[0].message) == msg + + rca = RCA() + with pytest.warns(ChangedBehaviorWarning) as expected_msg: + rca.fit(X, y) + assert str(expected_msg[0].message) == msg + + def test_rank_deficient_returns_warning(self): + """Checks that if the covariance matrix is not invertible, we raise a + warning message advising to use PCA""" + X, y = load_iris(return_X_y=True) + # we make the fourth column a linear combination of the two first, + # so that the covariance matrix will not be invertible: + X[:, 3] = X[:, 0] + 3 * X[:, 1] + rca = RCA() + msg = ('The inner covariance matrix is not invertible, ' + 'so the transformation matrix may contain Nan values. ' + 'You should reduce the dimensionality of your input,' + 'for instance using `sklearn.decomposition.PCA` as a ' + 'preprocessing step.') + with pytest.warns(None) as raised_warnings: + rca.fit(X, y) + assert any(str(w.message) == msg for w in raised_warnings) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 7d0c11e7..313948ec 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -89,13 +89,13 @@ def test_rca(self): self.assertEqual(remove_spaces(str(metric_learn.RCA())), remove_spaces("RCA(n_components=None, " "num_dims='deprecated', " - "pca_comps=None, " + "pca_comps='deprecated', " "preprocessor=None)")) self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), remove_spaces( "RCA_Supervised(chunk_size=2, " "n_components=None, num_chunks=100, " - "num_dims='deprecated', pca_comps=None, " + "num_dims='deprecated', pca_comps='deprecated', " "preprocessor=None)")) def test_mlkr(self): From 8c3cb3e348f38b3a0234017cf5fa10e56d2671b1 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 18 Jun 2019 17:07:53 +0200 Subject: [PATCH 120/210] [MRG] Fix quadruplets scoring (#220) * FIX: fix lsml scoring * Address https://github.com/metric-learn/metric-learn/pull/220#pullrequestreview-249487025 --- metric_learn/base_metric.py | 7 ++++++- test/test_pairs_classifiers.py | 29 ++++++++++++++++++++++++++++ test/test_quadruplets_classifiers.py | 23 ++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 856591cb..aa7d66dd 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -641,4 +641,9 @@ def score(self, quadruplets): score : float The quadruplets score. """ - return - np.mean(self.predict(quadruplets)) + # Since the prediction is a vector of values in {-1, +1}, we need to + # rescale them to {0, 1} to compute the accuracy using the mean (because + # then 1 means a correctly classified result (pairs are in the right + # order), and a 0 an incorrectly classified result (pairs are in the + # wrong order). + return self.predict(quadruplets).mean() / 2 + 0.5 diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 828181cb..decc0590 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -4,6 +4,7 @@ import pytest from numpy.testing import assert_array_equal +from scipy.spatial.distance import euclidean from metric_learn.base_metric import _PairsClassifierMixin, MahalanobisMixin from sklearn.exceptions import NotFittedError @@ -489,3 +490,31 @@ def breaking_fun(**args): # a function that fails so that we will miss with pytest.raises(ValueError) as raised_error: estimator.fit(input_data, labels, calibration_params={'strategy': 'weird'}) assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_accuracy_toy_example(estimator, build_dataset): + """Test that the accuracy works on some toy example (hence that the + prediction is OK)""" + input_data, labels, preprocessor, X = build_dataset(with_preprocessor=False) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + estimator.fit(input_data, labels) + # we force the transformation to be identity so that we control what it does + estimator.transformer_ = np.eye(X.shape[1]) + # the threshold for similar or dissimilar pairs is half of the distance + # between X[0] and X[1] + estimator.set_threshold(euclidean(X[0], X[1]) / 2) + # We take the two first points and we build 4 regularly spaced points on the + # line they define, so that it's easy to build quadruplets of different + # similarities. + X_test = X[0] + np.arange(4)[:, np.newaxis] * (X[0] - X[1]) / 4 + pairs_test = np.array( + [[X_test[0], X_test[1]], # similar + [X_test[0], X_test[3]], # dissimilar + [X_test[1], X_test[2]], # similar + [X_test[2], X_test[3]]]) # similar + y = np.array([-1, 1, 1, -1]) # [F, F, T, F] + assert accuracy_score(estimator.predict(pairs_test), y) == 0.25 diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index 2bf36b3f..d342b45d 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -40,3 +40,26 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with pytest.raises(NotFittedError): estimator.predict(input_data) + +@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, + ids=ids_quadruplets_learners) +def test_accuracy_toy_example(estimator, build_dataset): + """Test that the default scoring for quadruplets (accuracy) works on some + toy example""" + input_data, labels, preprocessor, X = build_dataset(with_preprocessor=False) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + estimator.fit(input_data) + # We take the two first points and we build 4 regularly spaced points on the + # line they define, so that it's easy to build quadruplets of different + # similarities. + X_test = X[0] + np.arange(4)[:, np.newaxis] * (X[0] - X[1]) / 4 + quadruplets_test = np.array( + [[X_test[0], X_test[2], X_test[0], X_test[1]], + [X_test[1], X_test[3], X_test[1], X_test[0]], + [X_test[1], X_test[2], X_test[0], X_test[3]], + [X_test[3], X_test[0], X_test[2], X_test[1]]]) + # we force the transformation to be identity so that we control what it does + estimator.transformer_ = np.eye(X.shape[1]) + assert estimator.score(quadruplets_test) == 0.25 From 580d38d12d01af755dc2cb9a3cf0d81d1f633cf9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 25 Jun 2019 16:16:37 +0200 Subject: [PATCH 121/210] [MRG] fix quadruplets decision_function (#217) * fix quadruplets decision_function * Address https://github.com/metric-learn/metric-learn/pull/217#issuecomment-501792220 * fix: I put the column at the wrong side, now it does some subsampling * Fix number of samples * let's try again with 30 samples * Use less chunks --- metric_learn/base_metric.py | 3 ++ test/test_sklearn_compat.py | 64 +++++++++++++++++++++++++++++++++++++ test/test_utils.py | 2 +- 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index aa7d66dd..6e5ffb13 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -618,6 +618,9 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ + quadruplets = check_input(quadruplets, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) return (self.score_pairs(quadruplets[:, 2:]) - self.score_pairs(quadruplets[:, :2])) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 0c0f098d..4c511263 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -105,6 +105,70 @@ def stable_init(self, n_components=None, pca_comps=None, # ---------------------- Test scikit-learn compatibility ---------------------- +def generate_array_like(input_data, labels=None): + """Helper function to generate array-like variants of numpy datasets, + for testing purposes.""" + list_data = input_data.tolist() + input_data_changed = [input_data, list_data, tuple(list_data)] + if input_data.ndim >= 2: + input_data_changed.append(tuple(tuple(x) for x in list_data)) + if input_data.ndim >= 3: + input_data_changed.append(tuple(tuple(tuple(x) for x in y) for y in + list_data)) + if input_data.ndim == 2: + pd = pytest.importorskip('pandas') + input_data_changed.append(pd.DataFrame(input_data)) + if labels is not None: + labels_changed = [labels, list(labels), tuple(labels)] + else: + labels_changed = [labels] + return input_data_changed, labels_changed + + +@pytest.mark.integration +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_array_like_inputs(estimator, build_dataset, with_preprocessor): + """Test that metric-learners can have as input (of all functions that are + applied on data) any array-like object.""" + input_data, labels, preprocessor, X = build_dataset(with_preprocessor) + + # we subsample the data for the test to be more efficient + input_data, _, labels, _ = train_test_split(input_data, labels, + train_size=20) + X = X[:10] + + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + input_variants, label_variants = generate_array_like(input_data, labels) + for input_variant in input_variants: + for label_variant in label_variants: + estimator.fit(*remove_y_quadruplets(estimator, input_variant, + label_variant)) + if hasattr(estimator, "predict"): + estimator.predict(input_variant) + if hasattr(estimator, "predict_proba"): + estimator.predict_proba(input_variant) # anticipation in case some + # time we have that, or if ppl want to contribute with new algorithms + # it will be checked automatically + if hasattr(estimator, "decision_function"): + estimator.decision_function(input_variant) + if hasattr(estimator, "score"): + for label_variant in label_variants: + estimator.score(*remove_y_quadruplets(estimator, input_variant, + label_variant)) + + X_variants, _ = generate_array_like(X) + for X_variant in X_variants: + estimator.transform(X_variant) + + pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) + pairs_variants, _ = generate_array_like(pairs) + for pairs_variant in pairs_variants: + estimator.score_pairs(pairs_variant) + @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, diff --git a/test/test_utils.py b/test/test_utils.py index 2e57f489..970b40a1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -118,7 +118,7 @@ def build_quadruplets(with_preprocessor=False): (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=10), build_classification), + (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, From 2dc9b90d0d2aad38d62e799ee76360b548d1a094 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Wed, 3 Jul 2019 11:06:44 +0200 Subject: [PATCH 122/210] [MRG] Enhance documentation (#208) * Add link to algorithm in the title of sections * Separate supervised and weakly supervised algorithms in Package Overview * Improve doc * Fix plot_metric_example * Update links with the new pages * Use random init for LMNN * Update remaining num_dims * Add description of the API for Weakly Supervised and Supervised algorithms * Remove to have the list of methods in the description of the class * Fix: Update wronly merged lmnn with None instead of 'auto' in init * Add some documentation about generating pairs and quadruplets * Add base module quick description * Put classes rather than modules * Add docstrings at top of algos * Update name of API details to Package Contents * Update name of API details to Package Contents --- .gitignore | 1 + doc/_templates/class.rst | 16 + doc/conf.py | 15 +- doc/index.rst | 2 +- doc/metric_learn.base_metric.rst | 7 - doc/metric_learn.constraints.rst | 7 - doc/metric_learn.covariance.rst | 22 - doc/metric_learn.itml.rst | 28 -- doc/metric_learn.lfda.rst | 31 -- doc/metric_learn.lmnn.rst | 34 -- doc/metric_learn.lsml.rst | 28 -- doc/metric_learn.mlkr.rst | 28 -- doc/metric_learn.mmc.rst | 28 -- doc/metric_learn.nca.rst | 29 -- doc/metric_learn.rca.rst | 28 -- doc/metric_learn.rst | 62 ++- doc/metric_learn.sdml.rst | 27 -- doc/supervised.rst | 152 +++++-- doc/unsupervised.rst | 37 ++ doc/weakly_supervised.rst | 512 ++++++++++++++++------ examples/plot_metric_learning_examples.py | 36 +- metric_learn/base_metric.py | 34 +- metric_learn/constraints.py | 5 + metric_learn/covariance.py | 23 +- metric_learn/itml.py | 266 +++++------ metric_learn/lfda.py | 95 ++-- metric_learn/lmnn.py | 218 +++++---- metric_learn/lsml.py | 236 +++++----- metric_learn/mlkr.py | 175 ++++---- metric_learn/mmc.py | 306 +++++++------ metric_learn/nca.py | 183 ++++---- metric_learn/rca.py | 125 +++--- metric_learn/sdml.py | 261 ++++++----- 33 files changed, 1708 insertions(+), 1349 deletions(-) create mode 100644 doc/_templates/class.rst delete mode 100644 doc/metric_learn.base_metric.rst delete mode 100644 doc/metric_learn.constraints.rst delete mode 100644 doc/metric_learn.covariance.rst delete mode 100644 doc/metric_learn.itml.rst delete mode 100644 doc/metric_learn.lfda.rst delete mode 100644 doc/metric_learn.lmnn.rst delete mode 100644 doc/metric_learn.lsml.rst delete mode 100644 doc/metric_learn.mlkr.rst delete mode 100644 doc/metric_learn.mmc.rst delete mode 100644 doc/metric_learn.nca.rst delete mode 100644 doc/metric_learn.rca.rst delete mode 100644 doc/metric_learn.sdml.rst create mode 100644 doc/unsupervised.rst diff --git a/.gitignore b/.gitignore index 449f70ea..8321c7d2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ htmlcov/ .cache/ .pytest_cache/ doc/auto_examples/* +doc/generated/* \ No newline at end of file diff --git a/doc/_templates/class.rst b/doc/_templates/class.rst new file mode 100644 index 00000000..f0c1b5bc --- /dev/null +++ b/doc/_templates/class.rst @@ -0,0 +1,16 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: + :undoc-members: + :inherited-members: + :special-members: __init__ + +.. include:: {{module}}.{{objname}}.examples + +.. raw:: html + +
diff --git a/doc/conf.py b/doc/conf.py index a11f8bba..e7e6a108 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import sys +import os extensions = [ 'sphinx.ext.autodoc', @@ -28,7 +29,6 @@ exclude_patterns = ['_build'] pygments_style = 'sphinx' todo_include_todos = True -numpydoc_show_class_members = False # Options for HTML output html_theme = 'sphinx_rtd_theme' @@ -50,3 +50,16 @@ 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), 'scikit-learn': ('https://scikit-learn.org/stable/', None) } + + +# sphinx-gallery configuration +sphinx_gallery_conf = { + # to generate mini-galleries at the end of each docstring in the API + # section: (see https://sphinx-gallery.github.io/configuration.html + # #references-to-examples) + 'doc_module': 'metric_learn', + 'backreferences_dir': os.path.join('generated'), +} + +# generate autosummary even if no references +autosummary_generate = True diff --git a/doc/index.rst b/doc/index.rst index 3e4d0ce3..9d303bee 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -25,7 +25,7 @@ Documentation outline .. toctree:: :maxdepth: 2 - Package Overview + Package Contents .. toctree:: :maxdepth: 2 diff --git a/doc/metric_learn.base_metric.rst b/doc/metric_learn.base_metric.rst deleted file mode 100644 index 050a360b..00000000 --- a/doc/metric_learn.base_metric.rst +++ /dev/null @@ -1,7 +0,0 @@ -metric_learn.base_metric module -=============================== - -.. automodule:: metric_learn.base_metric - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/metric_learn.constraints.rst b/doc/metric_learn.constraints.rst deleted file mode 100644 index 97d79002..00000000 --- a/doc/metric_learn.constraints.rst +++ /dev/null @@ -1,7 +0,0 @@ -metric_learn.constraints module -=============================== - -.. automodule:: metric_learn.constraints - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/metric_learn.covariance.rst b/doc/metric_learn.covariance.rst deleted file mode 100644 index 493878c1..00000000 --- a/doc/metric_learn.covariance.rst +++ /dev/null @@ -1,22 +0,0 @@ -Covariance metric (baseline method) -=================================== - -.. automodule:: metric_learn.covariance - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import Covariance - from sklearn.datasets import load_iris - - iris = load_iris()['data'] - - cov = Covariance().fit(iris) - x = cov.transform(iris) diff --git a/doc/metric_learn.itml.rst b/doc/metric_learn.itml.rst deleted file mode 100644 index addb4c76..00000000 --- a/doc/metric_learn.itml.rst +++ /dev/null @@ -1,28 +0,0 @@ -Information Theoretic Metric Learning (ITML) -============================================ - -.. automodule:: metric_learn.itml - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import ITML_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - itml = ITML_Supervised(num_constraints=200) - itml.fit(X, Y) - -References ----------- -`Information-theoretic Metric Learning `_ Jason V. Davis, et al. diff --git a/doc/metric_learn.lfda.rst b/doc/metric_learn.lfda.rst deleted file mode 100644 index 41088a68..00000000 --- a/doc/metric_learn.lfda.rst +++ /dev/null @@ -1,31 +0,0 @@ -Local Fisher Discriminant Analysis (LFDA) -========================================= - -.. automodule:: metric_learn.lfda - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - import numpy as np - from metric_learn import LFDA - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - lfda = LFDA(k=2, dim=2) - lfda.fit(X, Y) - -References ------------------- -`Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis `_ Masashi Sugiyama. - -`Local Fisher Discriminant Analysis on Beer Style Clustering `_ Yuan Tang. diff --git a/doc/metric_learn.lmnn.rst b/doc/metric_learn.lmnn.rst deleted file mode 100644 index bc65161e..00000000 --- a/doc/metric_learn.lmnn.rst +++ /dev/null @@ -1,34 +0,0 @@ -Large Margin Nearest Neighbor (LMNN) -==================================== - -.. automodule:: metric_learn.lmnn - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - import numpy as np - from metric_learn import LMNN - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - lmnn = LMNN(k=5, learn_rate=1e-6) - lmnn.fit(X, Y, verbose=False) - -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. Otherwise, the included pure-Python version will be used. -The two implementations differ slightly, and the C++ version is more complete. - -References ----------- -`Distance Metric Learning for Large Margin Nearest Neighbor Classification `_ Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul diff --git a/doc/metric_learn.lsml.rst b/doc/metric_learn.lsml.rst deleted file mode 100644 index 0deae4e6..00000000 --- a/doc/metric_learn.lsml.rst +++ /dev/null @@ -1,28 +0,0 @@ -Least Squares Metric Learning (LSML) -==================================== - -.. automodule:: metric_learn.lsml - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import LSML_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(X, Y) - -References ----------- - diff --git a/doc/metric_learn.mlkr.rst b/doc/metric_learn.mlkr.rst deleted file mode 100644 index f71697de..00000000 --- a/doc/metric_learn.mlkr.rst +++ /dev/null @@ -1,28 +0,0 @@ -Metric Learning for Kernel Regression (MLKR) -============================================ - -.. automodule:: metric_learn.mlkr - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import MLKR - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - mlkr = MLKR() - mlkr.fit(X, Y) - -References ----------- -`Information-theoretic Metric Learning `_ Jason V. Davis, et al. diff --git a/doc/metric_learn.mmc.rst b/doc/metric_learn.mmc.rst deleted file mode 100644 index bb9031ba..00000000 --- a/doc/metric_learn.mmc.rst +++ /dev/null @@ -1,28 +0,0 @@ -Mahalanobis Metric Learning for Clustering (MMC) -================================================ - -.. automodule:: metric_learn.mmc - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import MMC_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - mmc = MMC_Supervised(num_constraints=200) - mmc.fit(X, Y) - -References ----------- -`Distance metric learning with application to clustering with side-information `_ Xing, Jordan, Russell, Ng. diff --git a/doc/metric_learn.nca.rst b/doc/metric_learn.nca.rst deleted file mode 100644 index 00bc4eac..00000000 --- a/doc/metric_learn.nca.rst +++ /dev/null @@ -1,29 +0,0 @@ -Neighborhood Components Analysis (NCA) -====================================== - -.. automodule:: metric_learn.nca - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - import numpy as np - from metric_learn import NCA - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - nca = NCA(max_iter=1000) - nca.fit(X, Y) - -References ----------- - diff --git a/doc/metric_learn.rca.rst b/doc/metric_learn.rca.rst deleted file mode 100644 index 027d583b..00000000 --- a/doc/metric_learn.rca.rst +++ /dev/null @@ -1,28 +0,0 @@ -Relative Components Analysis (RCA) -================================== - -.. automodule:: metric_learn.rca - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import RCA_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - rca = RCA_Supervised(num_chunks=30, chunk_size=2) - rca.fit(X, Y) - -References ------------------- -`Adjustment learning and relevant component analysis `_ Noam Shental, et al. diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index eb606542..930404d0 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -4,16 +4,52 @@ metric_learn package Module Contents --------------- -.. toctree:: - - metric_learn.constraints - metric_learn.base_metric - metric_learn.itml - metric_learn.lfda - metric_learn.lmnn - metric_learn.lsml - metric_learn.mlkr - metric_learn.mmc - metric_learn.nca - metric_learn.rca - metric_learn.sdml +Base Classes +------------ + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + metric_learn.Constraints + metric_learn.base_metric.BaseMetricLearner + metric_learn.base_metric._PairsClassifierMixin + metric_learn.base_metric._QuadrupletsClassifierMixin + +Supervised Learning Algorithms +------------------------------ +.. autosummary:: + :toctree: generated/ + :template: class.rst + + metric_learn.LFDA + metric_learn.LMNN + metric_learn.MLKR + metric_learn.NCA + metric_learn.RCA + metric_learn.ITML_Supervised + metric_learn.LSML_Supervised + metric_learn.MMC_Supervised + metric_learn.SDML_Supervised + metric_learn.RCA_Supervised + +Weakly Supervised Learning Algorithms +------------------------------------- + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + metric_learn.ITML + metric_learn.LSML + metric_learn.MMC + metric_learn.SDML + +Unsupervised Learning Algorithms +-------------------------------- + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + metric_learn.Covariance \ No newline at end of file diff --git a/doc/metric_learn.sdml.rst b/doc/metric_learn.sdml.rst deleted file mode 100644 index 3e350a70..00000000 --- a/doc/metric_learn.sdml.rst +++ /dev/null @@ -1,27 +0,0 @@ -Sparse Determinant Metric Learning (SDML) -========================================= - -.. automodule:: metric_learn.sdml - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import SDML_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - sdml = SDML_Supervised(num_constraints=200) - sdml.fit(X, Y) - -References ------------------- diff --git a/doc/supervised.rst b/doc/supervised.rst index c438294f..5520ce8e 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -8,46 +8,108 @@ labels `y`, and learn a distance matrix that make points from the same class other, and points from different classes or with distant target values far away from each other. -Scikit-learn compatibility -========================== +General API +=========== -All supervised algorithms are scikit-learn `Estimators`, so they are -compatible with Pipelining and scikit-learn model selection routines. +Supervised Metric Learning Algorithms are the easiest metric-learn algorithms +to use, since they use the same API as ``scikit-learn``. -Algorithms -========== - -Covariance +Input data ---------- +In order to train a model, you need two `array-like `_ objects, `X` and `y`. `X` +should be a 2D array-like of shape `(n_samples, n_features)`, where +`n_samples` is the number of points of your dataset and `n_features` is the +number of attributes of each of your points. `y` should be a 1D array-like +of shape `(n_samples,)`, containing for each point in `X` the class it +belongs to (or the value to regress for this sample, if you use `MLKR` for +instance). -.. todo:: Covariance is unsupervised, so its doc should not be here. +Here is an example of a dataset of two dogs and one +cat (the classes are 'dog' and 'cat') an animal being being represented by +two numbers. -`Covariance` does not "learn" anything, rather it calculates -the covariance matrix of the input data. This is a simple baseline method. +>>> import numpy as np +>>> X = np.array([[2.3, 3.6], [0.2, 0.5], [6.7, 2.1]]) +>>> y = np.array(['dog', 'cat', 'dog']) -.. topic:: Example Code: +.. note:: -:: + You can also use a preprocessor instead of directly giving the inputs as + 2D arrays. See the :ref:`preprocessor_section` section for more details. - from metric_learn import Covariance - from sklearn.datasets import load_iris +Fit, transform, and so on +------------------------- +The goal of supervised metric-learning algorithms is to transform +points in a new space, in which the distance between two points from the +same class will be small, and the distance between two points from different +classes will be large. To do so, we fit the metric learner (example: +`NCA`). - iris = load_iris()['data'] +>>> from metric_learn import NCA +>>> nca = NCA(random_state=42) +>>> nca.fit(X, y) +NCA(init=None, max_iter=100, n_components=None, num_dims='deprecated', + preprocessor=None, random_state=42, tol=None, verbose=False) - cov = Covariance().fit(iris) - x = cov.transform(iris) -.. topic:: References: +Now that the estimator is fitted, you can use it on new data for several +purposes. + +First, you can transform the data in the learned space, using `transform`: +Here we transform two points in the new embedding space. + +>>> X_new = np.array([[9.4, 4.1], [2.1, 4.4]]) +>>> nca.transform(X_new) +array([[ 5.91884732, 10.25406973], + [ 3.1545886 , 6.80350083]]) + +Also, as explained before, our metric learners has learn a distance between +points. You can use this distance in two main ways: + +- You can either return the distance between pairs of points using the + `score_pairs` function: + +>>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) +array([0.49627072, 3.65287282]) - .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 +- Or you can return a function that will return the distance (in the new + space) between two 1D arrays (the coordinates of the points in the original + space), similarly to distance functions in `scipy.spatial.distance`. + +>>> metric_fun = nca.get_metric() +>>> metric_fun([3.5, 3.6], [5.6, 2.4]) +0.4962707194621285 + +.. note:: + + If the metric learner that you use learns a Mahalanobis Matrix (like it is + the case for all algorithms currently in metric-learn), you can get the + plain learned Mahalanobis matrix using `get_mahalanobis_matrix`. + + >>> nca.get_mahalanobis_matrix() + array([[0.43680409, 0.89169412], + [0.89169412, 1.9542479 ]]) + +.. TODO: remove the "like it is the case etc..." if it's not the case anymore + +Scikit-learn compatibility +-------------------------- + +All supervised algorithms are scikit-learn `sklearn.base.Estimators`, and +`sklearn.base.TransformerMixin` so they are compatible with Pipelining and +scikit-learn model selection routines. + +Algorithms +========== .. _lmnn: -LMNN ------ +:py:class:`LMNN ` +----------------------------------------- Large Margin Nearest Neighbor Metric Learning -(:py:class:`LMNN `) +(:py:class:`LMNN `) `LMNN` learns a Mahalanobis distance metric in the kNN classification setting. The learned metric attempts to keep close k-nearest neighbors @@ -97,10 +159,10 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, .. _nca: -NCA ---- +:py:class:`NCA ` +-------------------------------------- -Neighborhood Components Analysis(:py:class:`NCA `) +Neighborhood Components Analysis(:py:class:`NCA `) `NCA` is a distance metric learning algorithm which aims to improve the accuracy of nearest neighbors classification compared to the standard @@ -161,10 +223,10 @@ the sum of probability of being correctly classified: .. _lfda: -LFDA ----- +:py:class:`LFDA ` +----------------------------------------- -Local Fisher Discriminant Analysis(:py:class:`LFDA `) +Local Fisher Discriminant Analysis(:py:class:`LFDA `) `LFDA` is a linear supervised dimensionality reduction method. It is particularly useful when dealing with multi-modality, where one ore more classes @@ -235,10 +297,10 @@ same class are not imposed to be close. .. _mlkr: -MLKR ----- +:py:class:`MLKR ` +----------------------------------------- -Metric Learning for Kernel Regression(:py:class:`MLKR `) +Metric Learning for Kernel Regression(:py:class:`MLKR `) `MLKR` is an algorithm for supervised metric learning, which learns a distance function by directly minimizing the leave-one-out regression error. @@ -298,15 +360,35 @@ calculating a weighted average of all the training samples: Gerald Tesauro +.. _supervised_version: + Supervised versions of weakly-supervised algorithms --------------------------------------------------- Note that each :ref:`weakly-supervised algorithm ` has a supervised version of the form `*_Supervised` where similarity tuples are generated from the labels information and passed to the underlying algorithm. - -.. todo:: add more details about that (see issue ``_) +These constraints are sampled randomly under the hood. + +For pairs learners (see :ref:`learning_on_pairs`), pairs (tuple of two points +from the dataset), and labels (`int` indicating whether the two points are +similar (+1) or dissimilar (-1)), are sampled with the function +`metric_learn.constraints.positive_negative_pairs`. To sample positive pairs +(of label +1), this method will look at all the samples from the same label and +sample randomly a pair among them. To sample negative pairs (of label -1), this +method will look at all the samples from a different class and sample randomly +a pair among them. The method will try to build `num_constraints` positive +pairs and `num_constraints` negative pairs, but sometimes it cannot find enough +of one of those, so forcing `same_length=True` will return both times the +minimum of the two lenghts. + +For using quadruplets learners (see :ref:`learning_on_quadruplets`) in a +supervised way, we will basically sample positive and negative pairs like +before, but we'll just concatenate them, so that we have a 3D array of +quadruplets, where for each quadruplet the two first points are in fact points +from the same class, and the two last points are in fact points from a +different class (so indeed the two last points should be less similar than the +two first points). .. topic:: Example Code: diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst new file mode 100644 index 00000000..1d5bef43 --- /dev/null +++ b/doc/unsupervised.rst @@ -0,0 +1,37 @@ +============================ +Unsupervised Metric Learning +============================ + +Unsupervised metric learning algorithms just take as input points `X`. For +now, in metric-learn, there only is `Covariance`, which is a simple +baseline algorithm (see below). + + +Algorithms +========== +.. _covariance: + +Covariance +---------- + +`Covariance` does not "learn" anything, rather it calculates +the covariance matrix of the input data. This is a simple baseline method. +It can be used for ZCA whitening of the data (see the Wikipedia page of +`whitening transformation `_). + +.. topic:: Example Code: + +:: + + from metric_learn import Covariance + from sklearn.datasets import load_iris + + iris = load_iris()['data'] + + cov = Covariance().fit(iris) + x = cov.transform(iris) + +.. topic:: References: + + .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 351c4e3b..7e488ac7 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -11,17 +11,28 @@ and dissimilar points. Refer to the documentation of each algorithm for its particular form of input data. +General API +=========== + Input data -========== +---------- In the following paragraph we talk about tuples for sake of generality. These can be pairs, triplets, quadruplets etc, depending on the particular metric learning algorithm we use. Basic form ----------- -Every weakly supervised algorithm will take as input tuples of points, and if -needed labels for theses tuples. +^^^^^^^^^^ + +Every weakly supervised algorithm will take as input tuples of +points, and if needed labels for theses tuples. The tuples of points can +also be called "constraints". They are a set of points that we consider (ex: +two points, three points, etc...). The label is some information we have +about this set of points (e.g. "these two points are similar"). Note that +some information can be contained in the ordering of these tuples (see for +instance the section :ref:`learning_on_quadruplets`). For more details about +the specific of each algorithms, refer to the appropriate section: either +:ref:`learning_on_pairs` or :ref:`learning_on_quadruplets`) The `tuples` argument is the first argument of every method (like the X @@ -44,7 +55,7 @@ These are two data structures that can be used to represent tuple in metric learn: 3D array of tuples ------------------- +^^^^^^^^^^^^^^^^^^ The most intuitive way to represent tuples is to provide the algorithm with a 3D array-like of tuples of shape ``(n_tuples, t, n_features)``, where @@ -62,10 +73,10 @@ the number of features of each point. >>> [[-2.16, +0.11, -0.02], >>> [+1.58, +0.16, +0.93]], >>> ->>> [[+1.58, +0.16, +0.93 ], # same as tuples[1, 1, :] +>>> [[+1.58, +0.16, +0.93], # same as tuples[1, 1, :] >>> [+0.89, -0.34, +2.41]], >>> ->>> [[-0.12, -1.21, -0.20 ], # same as tuples[0, 0, :] +>>> [[-0.12, -1.21, -0.20], # same as tuples[0, 0, :] >>> [-2.16, +0.11, -0.02]]]) # same as tuples[1, 0, :] >>> y = np.array([-1, 1, 1, -1]) @@ -77,7 +88,7 @@ the number of features of each point. 2D array of indicators + preprocessor -------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Instead of forming each point in each tuple, a more efficient representation would be to keep the dataset of points ``X`` aside, and just represent tuples @@ -101,13 +112,7 @@ the feature dimension there, the resulting array is 2D. In order to fit metric learning algorithms with this type of input, we need to give the original dataset of points ``X`` to the estimator so that it knows the points the indices refer to. We do this when initializing the estimator, -through the argument `preprocessor`. - -.. topic:: Example: - ->>> from metric_learn import MMC ->>> mmc = MMC(preprocessor=X) ->>> mmc.fit(pairs_indice, y) +through the argument `preprocessor` (see below :ref:`fit_ws`) .. note:: @@ -118,17 +123,85 @@ through the argument `preprocessor`. paths in the filesystem, name of records in a database etc...) See section :ref:`preprocessor_section` for more details on how to use the preprocessor. -.. _sklearn_compat_ws: +.. _fit_ws: + +Fit, transform, and so on +------------------------- + +The goal of weakly-supervised metric-learning algorithms is to transform +points in a new space, in which the tuple-wise constraints between points +are respected. + +>>> from metric_learn import MMC +>>> mmc = MMC(random_state=42) +>>> mmc.fit(tuples, y) +MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, + diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, + preprocessor=None, random_state=42, verbose=False) + +Or alternatively (using a preprocessor): + +>>> from metric_learn import MMC +>>> mmc = MMC(preprocessor=X, random_state=42) +>>> mmc.fit(pairs_indice, y) + + +Now that the estimator is fitted, you can use it on new data for several +purposes. + +First, you can transform the data in the learned space, using `transform`: +Here we transform two points in the new embedding space. + +>>> X_new = np.array([[9.4, 4.1, 4.2], [2.1, 4.4, 2.3]]) +>>> mmc.transform(X_new) +array([[-3.24667162e+01, 4.62622348e-07, 3.88325421e-08], + [-3.61531114e+01, 4.86778289e-07, 2.12654397e-08]]) + +Also, as explained before, our metric learners has learn a distance between +points. You can use this distance in two main ways: + +- You can either return the distance between pairs of points using the + `score_pairs` function: + +>>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], +... [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]]) +array([7.27607365, 0.88853014]) + +- Or you can return a function that will return the distance + (in the new space) between two 1D arrays (the coordinates of the points in + the original space), similarly to distance functions in + `scipy.spatial.distance`. To do that, use the `get_metric` method. +>>> metric_fun = mmc.get_metric() +>>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7]) +7.276073646278203 + +.. note:: + + If the metric learner that you use learns a Mahalanobis Matrix (like it is + the case for all algorithms currently in metric-learn), you can get the + plain Mahalanobis matrix using `get_mahalanobis_matrix`. + +>>> mmc.get_mahalanobis_matrix() +array([[ 0.58603894, -5.69883982, -1.66614919], + [-5.69883982, 55.41743549, 16.20219519], + [-1.66614919, 16.20219519, 4.73697721]]) + +.. TODO: remove the "like it is the case etc..." if it's not the case anymore + +.. _sklearn_compat_ws: + Scikit-learn compatibility -========================== +-------------------------- Weakly supervised estimators are compatible with scikit-learn routines for model selection (grid-search, cross-validation etc). See the scoring section -for more details on the scoring used in the case of Weakly Supervised -Metric Learning. +of the appropriate algorithm (:ref:`pairs learners ` +or :ref:`quadruplets learners `) +for more details on the scoring used in the case of Weakly Supervised Metric +Learning. -.. topic:: Example +Example: >>> from metric_learn import MMC >>> from sklearn.datasets import load_iris @@ -141,13 +214,22 @@ Metric Learning. >>> mmc = MMC(preprocessor=X) >>> cross_val_score(mmc, pairs_indices, y) -Scoring -======= +Prediction and scoring +---------------------- + +Since weakly supervised are also able, after being fitted, to predict for a +given tuple what is its label (for pairs) or ordering (for quadruplets). See +the appropriate section for more details, either :ref:`this +one ` for pairs, or :ref:`this one +` for quadruplets. -Some default scoring are implemented in metric-learn, depending on the kind of -tuples you're working with (pairs, triplets...). See the docstring of the -`score` method of the estimator you use. +They also implement a default scoring method, `score`, that can be +used to evaluate the performance of a metric-learner on a test dataset. See +the appropriate section for more details, either :ref:`this +one ` for pairs, or :ref:`this one ` +for quadruplets. +.. _learning_on_pairs: Learning on pairs ================= @@ -158,15 +240,46 @@ corresponding target containing ``n_samples`` values being either +1 or -1. These values indicate whether the given pairs are similar points or dissimilar points. +Fitting +------- +Here is an example for fitting on pairs (see :ref:`fit_ws` for more details on +the input data format and how to fit, in the general case of learning on +tuples). + +>>> from metric_learn import MMC +>>> pairs = np.array([[[1.2, 3.2], [2.3, 5.5]], +>>> [[4.5, 2.3], [2.1, 2.3]]]) +>>> y_pairs = np.array([1, -1]) +>>> mmc = MMC(random_state=42) +>>> mmc.fit(pairs, y_pairs) +MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, + diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, preprocessor=None, + random_state=42, verbose=False) + +Here, we learned a metric that puts the two first points closer +together in the transformed space, and the two next points further away from +each other. + +.. _pairs_predicting: + +Predicting +---------- + +When a pairs learner is fitted, it is also able to predict, for an +upcoming pair, whether it is a pair of similar or dissimilar points. + +>>> mmc.predict([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]]]) +array([1, -1]) .. _calibration: Thresholding ------------ In order to predict whether a new pair represents similar or dissimilar -samples, we need to set a distance threshold, so that points closer (in the -learned space) than this threshold are predicted as similar, and points further -away are predicted as dissimilar. Several methods are possible for this +samples, we in fact need to set a distance threshold, so that points closer (in +the learned space) than this threshold are predicted as similar, and points +further away are predicted as dissimilar. Several methods are possible for this thresholding. - **At fit time**: The threshold is set with `calibrate_threshold` (see @@ -177,26 +290,73 @@ thresholding. overfitting. If you want to avoid that, calibrate the threshold after fitting, on a validation set. + >>> mmc.fit(pairs, y) # will fit the threshold automatically after fitting + - **Manual**: calling `set_threshold` will set the threshold to a particular value. + >>> mmc.set_threshold(0.4) + - **Calibration**: calling `calibrate_threshold` will calibrate the threshold to achieve a particular score on a validation set, the score being among the classical scores for classification (accuracy, f1 score...). + >>> mmc.calibrate_threshold(pairs, y) See also: `sklearn.calibration`. +.. _pairs_scoring: + +Scoring +------- + +Not only are they able to predict the label of given pairs, they can also +return a `decision_function` for a set of pairs. It is basically the "score" +that will be thresholded to find the prediction for the pair. In fact this +"score" is the opposite of the distance in the new space (higher score means + points are similar, and lower score dissimilar). + +>>> mmc.decision_function([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]]]) +array([-0.12811124, -0.74750256]) + +This allows to return all kinds of estimator scoring usually used in classic +classification tasks, like `sklearn.metrics.accuracy` for instance, which +can be used inside cross-validation routines: + +>>> from sklearn.model_selection import cross_val_score +>>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]], +... [[7.7, 5.6], [1.23, 8.4]]]) +>>> y_test = np.array([-1., 1., -1.]) +>>> cross_val_score(mmc, pairs_test, y_test, scoring='accuracy') +array([1., 0., 1.]) + +Pairs learners also have a default score, which basically +returns the `sklearn.metrics.roc_auc_score` (therefore is not dependent on +the threshold). + +>>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]], +... [[7.7, 5.6], [1.23, 8.4]]]) +>>> y_test = np.array([-1., 1., -1.]) +>>> mmc.score(pairs_test, y_test) +0.5 + +.. note:: + See :ref:`fit_ws` for more details on metric learners functions that are + not specific to learning on pairs, like `transform`, `score_pairs`, + `get_metric` and `get_mahalanobis_matrix`. Algorithms -========== +---------- .. _itml: -ITML ----- +:py:class:`ITML ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Information Theoretic Metric Learning(:py:class:`ITML `) +Information Theoretic Metric Learning(:py:class:`ITML `) `ITML` minimizes the (differential) relative entropy, aka Kullback–Leibler divergence, between two multivariate Gaussians subject to constraints on the @@ -270,99 +430,13 @@ is the prior distance metric, set to identity matrix by default, itml/ -.. _lsml: - -LSML ----- - -Metric Learning from Relative Comparisons by Minimizing Squared Residual -(:py:class:`LSML `) - -`LSML` proposes a simple, yet effective, algorithm that minimizes a convex -objective function corresponding to the sum of squared residuals of -constraints. This algorithm uses the constraints in the form of the -relative distance comparisons, such method is especially useful where -pairwise constraints are not natural to obtain, thus pairwise constraints -based algorithms become infeasible to be deployed. Furthermore, its sparsity -extension leads to more stable estimation when the dimension is high and -only a small amount of constraints is given. - -The loss function of each constraint -:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is -denoted as: - -.. math:: - - H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) - - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) - -where :math:`H(\cdot)` is the squared Hinge loss function defined as: - -.. math:: - - H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\ - \,\,x^2 \qquad x>0\end{aligned}\right.\\ - -The summed loss function :math:`L(C)` is the simple sum over all constraints -:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) -: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The -original paper suggested here should be a weighted sum since the confidence -or probability of each constraint might differ. However, for the sake of -simplicity and assumption of no extra knowledge provided, we just deploy -the simple sum here as well as what the authors did in the experiments. - -The distance metric learning problem becomes minimizing the summed loss -function of all constraints plus a regularization term w.r.t. the prior -knowledge: - -.. math:: - - \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, - \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( - \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ - -where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity -by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: - -.. math:: - - D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} - (\mathbf{M}) - -.. topic:: Example Code: - -:: - - from metric_learn import LSML - - quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], - [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], - [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], - [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] - - # we want to make closer points where the first feature is close, and - # further if the second feature is close - - lsml = LSML() - lsml.fit(quadruplets) - -.. topic:: References: - - .. [1] Liu et al. - "Metric Learning from Relative Comparisons by Minimizing Squared - Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf - - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 - .. _sdml: -======= - -SDML ----- +:py:class:`SDML ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Sparse High-Dimensional Metric Learning -(:py:class:`SDML `) +(:py:class:`SDML `) `SDML` is an efficient sparse metric learning in high-dimensional space via double regularization: an L1-penalization on the off-diagonal elements of the @@ -418,10 +492,10 @@ is the off-diagonal L1 norm. .. _rca: -RCA ---- +:py:class:`RCA ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Relative Components Analysis (:py:class:`RCA `) +Relative Components Analysis (:py:class:`RCA `) `RCA` learns a full rank Mahalanobis distance metric based on a weighted sum of in-chunklets covariance matrices. It applies a global linear transformation to @@ -474,11 +548,11 @@ as the Mahalanobis matrix. .. _mmc: -MMC ---- +:py:class:`MMC ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Metric Learning with Application for Clustering with Side Information -(:py:class:`MMC `) +(:py:class:`MMC `) `MMC` minimizes the sum of squared distances between similar points, while enforcing the sum of distances between dissimilar ones to be greater than one. @@ -528,23 +602,185 @@ points, while constrains the sum of distances between dissimilar points: .. [2] Adapted from Matlab code `here `_. + +.. _learning_on_quadruplets: + Learning on quadruplets ======================= -A type of information even weaker than pairs is information about relative -comparisons between pairs. The user should provide the algorithm with a -quadruplet of points, where the two first points are closer than the two -last points. No target vector (``y``) is needed, since the supervision is -already in the order that points are given in the quadruplet. + + +The goal of weakly-supervised metric-learning algorithms is to transform +points in a new space, in which the tuple-wise constraints between points +are respected. + +Fitting +------- +Here is an example for fitting on quadruplets (see :ref:`fit_ws` for more +details on the input data format and how to fit, in the general case of +learning on tuples). + +>>> from metric_learn import LSML +>>> quadruplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.4, 6.7], [2.1, 0.6]], +>>> [[4.5, 2.3], [2.1, 2.3], [0.6, 1.2], [7.3, 3.4]]]) +>>> lsml = LSML(random_state=42) +>>> lsml.fit(quadruplets) +LSML(max_iter=1000, preprocessor=None, prior=None, random_state=42, tol=0.001, + verbose=False) + +Or alternatively (using a preprocessor): + +>>> X = np.array([[1.2, 3.2], +>>> [2.3, 5.5], +>>> [2.4, 6.7], +>>> [2.1, 0.6], +>>> [4.5, 2.3], +>>> [2.1, 2.3], +>>> [0.6, 1.2], +>>> [7.3, 3.4]]) +>>> quadruplets_indices = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) +>>> lsml = LSML(preprocessor=X, random_state=42) +>>> lsml.fit(quadruplets_indices) +LSML(max_iter=1000, + preprocessor=array([[1.2, 3.2], + [2.3, 5.5], + [2.4, 6.7], + [2.1, 0.6], + [4.5, 2.3], + [2.1, 2.3], + [0.6, 1.2], + [7.3, 3.4]]), + prior=None, random_state=42, tol=0.001, verbose=False) + + +Here, we want to learn a metric that, for each of the two +`quadruplets`, will put the two first points closer together than the two +last points. + +.. _quadruplets_predicting: + +Predicting +---------- + +When a quadruplets learner is fitted, it is also able to predict, for an +upcoming quadruplet, whether the two first points are more similar than the +two last points (+1), or not (-1). + +>>> quadruplets_test = np.array( +... [[[5.6, 5.3], [2.2, 2.1], [0.4, 0.6], [1.2, 3.4]], +... [[6.0, 4.2], [4.3, 1.2], [4.5, 0.6], [0.1, 7.8]]]) +>>> lsml.predict(quadruplets_test) +array([-1., 1.]) + +.. _quadruplets_scoring: + +Scoring +------- + +Not only are they able to predict the label of given pairs, they can also +return a `decision_function` for a set of pairs. It is basically the "score" +which sign will be taken to find the prediction for the pair. In fact this +"score" is the difference between the distance between the two last points, +and the distance between the two last points of the quadruplet (higher +score means the two last points are more likely to be more dissimilar than +the two first points (i.e. more likely to have a +1 prediction since it's +the right ordering)). + +>>> lsml.decision_function(quadruplets_test) +array([-1.75700306, 4.98982131]) + +In the above example, for the first quadruplet in `quadruplets_test`, the +two first points are predicted less similar than the two last points (they +are further away in the transformed space). + +Unlike for pairs learners, quadruplets learners don't allow to give a `y` +when fitting, which does not allow to use scikit-learn scoring functions +like: + +>>> from sklearn.model_selection import cross_val_score +>>> cross_val_score(lsml, quadruplets, scoring='f1_score') # this won't work + +(This is actually intentional, for more details +about that, see +`this comment `_ +on github.) + +However, quadruplets learners do have a default scoring function, which will +basically return the accuracy score on a given test set, i.e. the proportion +of quadruplets have the right predicted ordering. + +>>> lsml.score(quadruplets_test) +0.5 + +.. note:: + See :ref:`fit_ws` for more details on metric learners functions that are + not specific to learning on pairs, like `transform`, `score_pairs`, + `get_metric` and `get_mahalanobis_matrix`. + + + Algorithms -========== +---------- + +.. _lsml: -LSML ----- +:py:class:`LSML ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared -Residual +Metric Learning from Relative Comparisons by Minimizing Squared Residual +(:py:class:`LSML `) + +`LSML` proposes a simple, yet effective, algorithm that minimizes a convex +objective function corresponding to the sum of squared residuals of +constraints. This algorithm uses the constraints in the form of the +relative distance comparisons, such method is especially useful where +pairwise constraints are not natural to obtain, thus pairwise constraints +based algorithms become infeasible to be deployed. Furthermore, its sparsity +extension leads to more stable estimation when the dimension is high and +only a small amount of constraints is given. + +The loss function of each constraint +:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is +denoted as: + +.. math:: + + H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) + - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) + +where :math:`H(\cdot)` is the squared Hinge loss function defined as: + +.. math:: + + H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\ + \,\,x^2 \qquad x>0\end{aligned}\right.\\ + +The summed loss function :math:`L(C)` is the simple sum over all constraints +:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) +: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The +original paper suggested here should be a weighted sum since the confidence +or probability of each constraint might differ. However, for the sake of +simplicity and assumption of no extra knowledge provided, we just deploy +the simple sum here as well as what the authors did in the experiments. + +The distance metric learning problem becomes minimizing the summed loss +function of all constraints plus a regularization term w.r.t. the prior +knowledge: + +.. math:: + + \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, + \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( + \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ + +where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity +by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: + +.. math:: + + D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} + (\mathbf{M}) .. topic:: Example Code: @@ -570,3 +806,5 @@ Residual Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + + diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index fd6cff20..b46d1adc 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -130,7 +130,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`LMNN -# ` +# ` ###################################################################### @@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # setting up LMNN -lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) +lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6, init='random') # fit the data! lmnn.fit(X, y) @@ -181,7 +181,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`ITML -# ` +# ` itml = metric_learn.ITML_Supervised() X_itml = itml.fit_transform(X, y) @@ -200,12 +200,12 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`MMC -# ` +# ` -itml = metric_learn.ITML_Supervised() -X_itml = itml.fit_transform(X, y) +mmc = metric_learn.MMC_Supervised() +X_mmc = mmc.fit_transform(X, y) -plot_tsne(X_itml, y) +plot_tsne(X_mmc, y) ###################################################################### # Sparse Determinant Metric Learning @@ -219,9 +219,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`SDML -# ` +# ` -sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015) +sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015, + prior='covariance') X_sdml = sdml.fit_transform(X, y) plot_tsne(X_sdml, y) @@ -238,9 +239,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`LSML -# ` +# ` -lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000) +lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000, + prior='covariance') X_lsml = lsml.fit_transform(X, y) plot_tsne(X_lsml, y) @@ -265,7 +267,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`NCA -# ` +# ` nca = metric_learn.NCA(max_iter=1000) X_nca = nca.fit_transform(X, y) @@ -285,7 +287,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`LFDA -# ` +# ` lfda = metric_learn.LFDA(k=2, num_dims=2) X_lfda = lfda.fit_transform(X, y) @@ -306,7 +308,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`RCA -# ` +# ` rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2) X_rca = rca.fit_transform(X, y) @@ -326,7 +328,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`MLKR -# ` +# ` # # To illustrate MLKR, let's use the dataset # `sklearn.datasets.make_regression` the same way as we did with the @@ -445,8 +447,8 @@ def create_constraints(labels): ###################################################################### # Using our constraints, let's now train ITML again. Note that we are no # longer calling the supervised class :py:class:`ITML_Supervised -# ` but the more generic -# (weakly-supervised) :py:class:`ITML `, which +# ` but the more generic +# (weakly-supervised) :py:class:`ITML `, which # takes the dataset `X` through the `preprocessor` argument (see # :ref:`this section ` of the documentation to learn # about more advanced uses of `preprocessor`) and the pair information `pairs` diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 6e5ffb13..55fba54f 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,3 +1,7 @@ +""" +Base module. +""" + from sklearn.base import BaseEstimator from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted @@ -10,16 +14,17 @@ class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): + """ + Base class for all metric-learners. - def __init__(self, preprocessor=None): - """ + Parameters + ---------- + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + """ - Parameters - ---------- - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - """ + def __init__(self, preprocessor=None): self.preprocessor = preprocessor @abstractmethod @@ -277,6 +282,8 @@ def metric_fun(u, v, squared=False): get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__ def metric(self): + """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix` + instead""" # TODO: remove this method in version 0.6.0 warnings.warn(("`metric` is deprecated since version 0.5.0 and will be " "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."), @@ -295,7 +302,8 @@ def get_mahalanobis_matrix(self): class _PairsClassifierMixin(BaseMetricLearner): - """ + """Base class for pairs learners. + Attributes ---------- threshold_ : `float` @@ -567,6 +575,8 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None, class _QuadrupletsClassifierMixin(BaseMetricLearner): + """Base class for quadruplets learners. + """ _tuple_size = 4 # number of points in a tuple, 4 for quadruplets @@ -578,7 +588,7 @@ def predict(self, quadruplets): Parameters ---------- - quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \ (n_quadruplets, 4) 3D Array of quadruplets to predict, with each row corresponding to four points, or 2D array of indices of quadruplets if the metric learner @@ -607,7 +617,7 @@ def decision_function(self, quadruplets): Parameters ---------- - quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \ (n_quadruplets, 4) 3D Array of quadruplets to predict, with each row corresponding to four points, or 2D array of indices of quadruplets if the metric learner @@ -633,7 +643,7 @@ def score(self, quadruplets): Parameters ---------- - quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \ (n_quadruplets, 4) 3D Array of quadruplets to score, with each row corresponding to four points, or 2D array of indices of quadruplets if the metric learner diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index e591830b..069a6564 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -11,6 +11,11 @@ class Constraints(object): + """ + Class to build constraints from labels. + + See more in the :ref:`User Guide ` + """ def __init__(self, partial_labels): '''partial_labels : int arraylike, -1 indicating unknown label''' partial_labels = np.asanyarray(partial_labels, dtype=int) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 19dad5d8..b9666494 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -1,11 +1,5 @@ """ Covariance metric (baseline method) - -This method does not "learn" anything, rather it calculates -the covariance matrix of the input data. - -This is a simple baseline method first introduced in -On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 """ from __future__ import absolute_import @@ -20,11 +14,28 @@ class Covariance(MahalanobisMixin, TransformerMixin): """Covariance metric (baseline method) + This method does not "learn" anything, rather it calculates + the covariance matrix of the input data. + + This is a simple baseline method first introduced in + On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 + + Read more in the :ref:`User Guide `. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + Examples + -------- + >>> from metric_learn import Covariance + >>> from sklearn.datasets import load_iris + >>> iris = load_iris()['data'] + >>> cov = Covariance().fit(iris) + >>> x = cov.transform(iris) + """ def __init__(self, preprocessor=None): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 21303c18..16fc21db 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -1,17 +1,5 @@ -r""" -Information Theoretic Metric Learning(ITML) - -`ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler -divergence, between two multivariate Gaussians subject to constraints on the -associated Mahalanobis distance, which can be formulated into a Bregman -optimization problem by minimizing the LogDet divergence subject to -linear constraints. This algorithm can handle a wide variety of constraints -and can optionally incorporate a prior on the distance function. Unlike some -other methods, `ITML` does not rely on an eigenvalue computation or -semi-definite programming. - -Read more in the :ref:`User Guide `. - +""" +Information Theoretic Metric Learning (ITML) """ from __future__ import print_function, absolute_import @@ -34,55 +22,6 @@ class _BaseITML(MahalanobisMixin): def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, prior='identity', A0='deprecated', verbose=False, preprocessor=None, random_state=None): - """Initialize ITML. - - Parameters - ---------- - gamma : float, optional - value for slack variables - - max_iter : int, optional - - convergence_threshold : float, optional - - prior : string or numpy array, optional (default='identity') - The Mahalanobis matrix to use as a prior. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. - """ self.gamma = gamma self.max_iter = max_iter self.convergence_threshold = convergence_threshold @@ -172,6 +111,66 @@ def _fit(self, pairs, y, bounds=None): class ITML(_BaseITML, _PairsClassifierMixin): """Information Theoretic Metric Learning (ITML) + `ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler + divergence, between two multivariate Gaussians subject to constraints on the + associated Mahalanobis distance, which can be formulated into a Bregman + optimization problem by minimizing the LogDet divergence subject to + linear constraints. This algorithm can handle a wide variety of constraints + and can optionally incorporate a prior on the distance function. Unlike some + other methods, `ITML` does not rely on an eigenvalue computation or + semi-definite programming. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + gamma : float, optional (default=1.) + Value for slack variables + + max_iter : int, optional (default=1000) + Maximum number of iteration of the optimization procedure. + + convergence_threshold : float, optional (default=1e-3) + Convergence tolerance. + + prior : string or numpy array, optional (default='identity') + The Mahalanobis matrix to use as a prior. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + A0 : Not used + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + verbose : bool, optional (default=False) + If True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) @@ -194,6 +193,22 @@ class ITML(_BaseITML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + Examples + -------- + >>> from metric_learn import ITML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> itml = ITML_Supervised(num_constraints=200) + >>> itml.fit(X, Y) + + References + ---------- + .. [1] `Information-theoretic Metric Learning + `_ Jason V. Davis, et al. """ def fit(self, pairs, y, bounds=None, calibration_params=None): @@ -204,7 +219,7 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): Parameters ---------- - pairs: array-like, shape=(n_constraints, 2, n_features) or + pairs: array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a @@ -240,6 +255,64 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): class ITML_Supervised(_BaseITML, TransformerMixin): """Supervised version of Information Theoretic Metric Learning (ITML) + `ITML_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `ITML` for training. + + Parameters + ---------- + gamma : float, optional + value for slack variables + max_iter : int, optional + convergence_threshold : float, optional + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints: int, optional + number of constraints to generate + bounds : Not used + .. deprecated:: 0.5.0 + `bounds` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Set `bounds` at fit time instead : + `itml_supervised.fit(X, y, bounds=...)` + + prior : string or numpy array, optional (default='identity') + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + A0 : Not used + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + + Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) @@ -257,71 +330,18 @@ class ITML_Supervised(_BaseITML, TransformerMixin): transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + See Also + -------- + metric_learn.ITML : The original weakly-supervised algorithm + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, num_labeled='deprecated', num_constraints=None, bounds='deprecated', prior='identity', A0='deprecated', verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `ITML`. - - `ITML_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `ITML` for training. - - Parameters - ---------- - gamma : float, optional - value for slack variables - max_iter : int, optional - convergence_threshold : float, optional - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - bounds : Not used - .. deprecated:: 0.5.0 - `bounds` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Set `bounds` at fit time instead : - `itml_supervised.fit(X, y, bounds=...)` - - prior : string or numpy array, optional (default='identity') - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. - """ _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, A0=A0, prior=prior, verbose=verbose, diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 1851a734..6c651b7b 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -1,13 +1,5 @@ -r""" -Local Fisher Discriminant Analysis(LFDA) - -LFDA is a linear supervised dimensionality reduction method. It is -particularly useful when dealing with multimodality, where one ore more classes -consist of separate clusters in input space. The core optimization problem of -LFDA is solved as a generalized eigenvalue problem. - -Read more in the :ref:`User Guide `. - +""" +Local Fisher Discriminant Analysis (LFDA) """ from __future__ import division, absolute_import import numpy as np @@ -24,44 +16,69 @@ class LFDA(MahalanobisMixin, TransformerMixin): ''' Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction - Sugiyama, ICML 2006 - Attributes + LFDA is a linear supervised dimensionality reduction method. It is + particularly useful when dealing with multimodality, where one ore more + classes consist of separate clusters in input space. The core optimization + problem of LFDA is solved as a generalized eigenvalue problem. + + Read more in the :ref:`User Guide `. + + Parameters ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. - ''' + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). - def __init__(self, n_components=None, num_dims='deprecated', - k=None, embedding_type='weighted', preprocessor=None): - ''' - Initialize LFDA. + num_dims : Not used - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. - num_dims : Not used + k : int, optional + Number of nearest neighbors used in local scaling method. + Defaults to min(7, n_components - 1). - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + embedding_type : str, optional + Type of metric in the embedding space (default: 'weighted') + 'weighted' - weighted eigenvectors + 'orthonormalized' - orthonormalized + 'plain' - raw eigenvectors - k : int, optional - Number of nearest neighbors used in local scaling method. - Defaults to min(7, n_components - 1). + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + The learned linear transformation ``L``. - embedding_type : str, optional - Type of metric in the embedding space (default: 'weighted') - 'weighted' - weighted eigenvectors - 'orthonormalized' - orthonormalized - 'plain' - raw eigenvectors + Examples + -------- + + >>> import numpy as np + >>> from metric_learn import LFDA + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lfda = LFDA(k=2, dim=2) + >>> lfda.fit(X, Y) + + References + ------------------ + .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher + Discriminant Analysis `_ + Masashi Sugiyama. + + .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering + `_ Yuan Tang. + ''' - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - ''' + def __init__(self, n_components=None, num_dims='deprecated', + k=None, embedding_type='weighted', preprocessor=None): if embedding_type not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid embedding_type: %r' % embedding_type) self.n_components = n_components diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 20eeea3b..600d55c0 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -1,16 +1,7 @@ -r""" -Large Margin Nearest Neighbor Metric learning(LMNN) - -LMNN learns a Mahalanobis distance metric in the kNN classification -setting. The learned metric attempts to keep close k-nearest neighbors -from the same class, while keeping examples from different classes -separated by a large margin. This algorithm makes no assumptions about -the distribution of the data. - -Read more in the :ref:`User Guide `. - """ -#TODO: periodic recalculation of impostors, PCA initialization +Large Margin Nearest Neighbor Metric learning (LMNN) +""" +# TODO: periodic recalculation of impostors, PCA initialization from __future__ import print_function, absolute_import import numpy as np @@ -26,81 +17,142 @@ class LMNN(MahalanobisMixin, TransformerMixin): + """Large Margin Nearest Neighbor (LMNN) + + LMNN learns a Mahalanobis distance metric in the kNN classification + setting. The learned metric attempts to keep close k-nearest neighbors + from the same class, while keeping examples from different classes + separated by a large margin. This algorithm makes no assumptions about + the distribution of the data. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + k : int, optional + Number of neighbors to consider, not including self-edges. + + min_iter : int, optional (default=50) + Minimum number of iterations of the optimization procedure. + + max_iter : int, optional (default=1000) + Maximum number of iterations of the optimization procedure. + + learn_rate : float, optional (default=1e-7) + Learning rate of the optimization procedure + + tol : float, optional (default=0.001) + Tolerance of the optimization procedure. If the objective value varies + less than `tol`, we consider the algorithm has converged and stop it. + + verbose : bool, optional (default=False) + Whether to print the progress of the optimization procedure. + + regularization: float, optional + Weighting of pull and push terms, with 0.5 meaning equal weight. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + + Attributes + ---------- + n_iter_ : `int` + The number of iterations the solver has run. + + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + The learned linear transformation ``L``. + + Examples + -------- + + >>> import numpy as np + >>> from metric_learn import LMNN + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lmnn = LMNN(k=5, learn_rate=1e-6) + >>> lmnn.fit(X, Y, verbose=False) + + Notes + ----- + + If a recent version of the Shogun Python modular (``modshogun``) library + is available, the LMNN implementation will use the fast C++ version from + there. Otherwise, the included pure-Python version will be used. + The two implementations differ slightly, and the C++ version is more + complete. + + References + ---------- + .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor + Classification `_ + Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul + """ + def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, n_components=None, num_dims='deprecated', random_state=None): - """Initialize the LMNN object. - - Parameters - ---------- - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - k : int, optional - Number of neighbors to consider, not including self-edges. - - regularization: float, optional - Weighting of pull and push terms, with 0.5 meaning equal weight. - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. - """ self.init = init self.k = k self.min_iter = min_iter diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index f59392c1..e3b0d323 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -1,17 +1,5 @@ -r""" -Metric Learning from Relative Comparisons by Minimizing Squared Residual(LSML) - -`LSML` proposes a simple, yet effective, algorithm that minimizes a convex -objective function corresponding to the sum of squared residuals of -constraints. This algorithm uses the constraints in the form of the -relative distance comparisons, such method is especially useful where -pairwise constraints are not natural to obtain, thus pairwise constraints -based algorithms become infeasible to be deployed. Furthermore, its sparsity -extension leads to more stable estimation when the dimension is high and -only a small amount of constraints is given. - -Read more in the :ref:`User Guide `. - +""" +Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML) """ from __future__ import print_function, absolute_import, division @@ -33,46 +21,6 @@ class _BaseLSML(MahalanobisMixin): def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, preprocessor=None, random_state=None): - """Initialize LSML. - - Parameters - ---------- - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - tol : float, optional - max_iter : int, optional - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. - """ self.prior = prior self.tol = tol self.max_iter = max_iter @@ -178,6 +126,55 @@ def _gradient(self, metric, vab, vcd, prior_inv): class LSML(_BaseLSML, _QuadrupletsClassifierMixin): """Least Squared-residual Metric Learning (LSML) + `LSML` proposes a simple, yet effective, algorithm that minimizes a convex + objective function corresponding to the sum of squared residuals of + constraints. This algorithm uses the constraints in the form of the + relative distance comparisons, such method is especially useful where + pairwise constraints are not natural to obtain, thus pairwise constraints + based algorithms become infeasible to be deployed. Furthermore, its sparsity + extension leads to more stable estimation when the dimension is high and + only a small amount of constraints is given. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + tol : float, optional + max_iter : int, optional + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. + Attributes ---------- n_iter_ : `int` @@ -186,6 +183,31 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + Examples + -------- + >>> from metric_learn import LSML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lsml = LSML_Supervised(num_constraints=200) + >>> lsml.fit(X, Y) + + References + ---------- + .. [1] Liu et al. `Metric Learning from Relative Comparisons by Minimizing + Squared Residual + `_. ICDM 2012. + + .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + + See Also + -------- + metric_learn.LSML : The original weakly-supervised algorithm + + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def fit(self, quadruplets, weights=None): @@ -193,7 +215,7 @@ def fit(self, quadruplets, weights=None): Parameters ---------- - quadruplets : array-like, shape=(n_constraints, 4, n_features) or + quadruplets : array-like, shape=(n_constraints, 4, n_features) or \ (n_constraints, 4) 3D array-like of quadruplets of points or 2D array of quadruplets of indicators. In order to supervise the algorithm in the right way, we @@ -214,6 +236,58 @@ def fit(self, quadruplets, weights=None): class LSML_Supervised(_BaseLSML, TransformerMixin): """Supervised version of Least Squared-residual Metric Learning (LSML) + `LSML_Supervised` creates quadruplets from labeled samples by taking two + samples from the same class, and two samples from different classes. + This way it builds quadruplets where the two first points must be more + similar than the two last points. + + Parameters + ---------- + tol : float, optional (default=1e-3) + Tolerance for the convergence procedure. + max_iter : int, optional (default=1000) + Number of maximum iterations of the convergence procedure. + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints: int, optional + number of constraints to generate + weights : (m,) array of floats, optional + scale factor for each constraint + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. + Attributes ---------- n_iter_ : `int` @@ -227,58 +301,6 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled='deprecated', num_constraints=None, weights=None, verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `LSML`. - - `LSML_Supervised` creates quadruplets from labeled samples by taking two - samples from the same class, and two samples from different classes. - This way it builds quadruplets where the two first points must be more - similar than the two last points. - - Parameters - ---------- - tol : float, optional - max_iter : int, optional - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - weights : (m,) array of floats, optional - scale factor for each constraint - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. - """ _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index c625b67c..ea8748be 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -1,13 +1,5 @@ -r""" -Metric Learning for Kernel Regression(MLKR) - -MLKR is an algorithm for supervised metric learning, which learns a -distance function by directly minimizing the leave-one-out regression error. -This algorithm can also be viewed as a supervised variation of PCA and can be -used for dimensionality reduction and high dimensional data visualization. - -Read more in the :ref:`User Guide `. - +""" +Metric Learning for Kernel Regression (MLKR) """ from __future__ import division, print_function import time @@ -31,6 +23,81 @@ class MLKR(MahalanobisMixin, TransformerMixin): """Metric Learning for Kernel Regression (MLKR) + MLKR is an algorithm for supervised metric learning, which learns a + distance function by directly minimizing the leave-one-out regression error. + This algorithm can also be viewed as a supervised variation of PCA and can be + used for dimensionality reduction and high dimensional data visualization. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components < min(n_features, n_samples)``, + we use 'pca', as it projects data in meaningful directions (those + of higher variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + A0: Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + + tol: float, optional (default=None) + Convergence tolerance for the optimization. + + max_iter: int, optional + Cap on number of conjugate gradient iterations. + + verbose : bool, optional (default=False) + Whether to print progress messages or not. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + Attributes ---------- n_iter_ : `int` @@ -38,82 +105,28 @@ class MLKR(MahalanobisMixin, TransformerMixin): transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. + + Examples + -------- + + >>> from metric_learn import MLKR + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> mlkr = MLKR() + >>> mlkr.fit(X, Y) + + References + ---------- + .. [1] `Information-theoretic Metric Learning + `_ Jason V. Davis, et al. """ def __init__(self, n_components=None, num_dims='deprecated', init=None, A0='deprecated', tol=None, max_iter=1000, verbose=False, preprocessor=None, random_state=None): - """ - Initialize MLKR. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components < min(n_features, n_samples)``, - we use 'pca', as it projects data in meaningful directions (those - of higher variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``num_dims`` is not None, n_features_a must match it. - - A0: Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - - tol: float, optional (default=None) - Convergence tolerance for the optimization. - - max_iter: int, optional - Cap on number of conjugate gradient iterations. - - verbose : bool, optional (default=False) - Whether to print progress messages or not. - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. - """ self.n_components = n_components self.num_dims = num_dims self.init = init diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index b3e6c203..9f02425c 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -1,21 +1,4 @@ -r""" -Metric Learning with Application for Clustering with Side Information(MMC) - -MMC minimizes the sum of squared distances between similar points, while -enforcing the sum of distances between dissimilar ones to be greater than one. -This leads to a convex and, thus, local-minima-free optimization problem that -can be solved efficiently. -However, the algorithm involves the computation of eigenvalues, which is the -main speed-bottleneck. Since it has initially been designed for clustering -applications, one of the implicit assumptions of MMC is that all classes form -a compact set, i.e., follow a unimodal distribution, which restricts the -possible use-cases of this method. However, it is one of the earliest and a -still often cited technique. - -Read more in the :ref:`User Guide `. - -""" - +"""Mahalanobis Metric for Clustering (MMC)""" from __future__ import print_function, absolute_import, division import warnings import numpy as np @@ -30,7 +13,6 @@ class _BaseMMC(MahalanobisMixin): - """Mahalanobis Metric for Clustering (MMC)""" _tuple_size = 2 # constraints are pairs @@ -38,61 +20,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, init=None, A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, random_state=None): - """Initialize MMC. - Parameters - ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse of the covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - An SPD matrix of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions. The initialization will then - be the diagonal coefficients of the matrix given as 'init'. - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. - """ self.max_iter = max_iter self.max_proj = max_proj self.convergence_threshold = convergence_threshold @@ -403,6 +330,80 @@ def _D_constraint(self, neg_pairs, w): class MMC(_BaseMMC, _PairsClassifierMixin): """Mahalanobis Metric for Clustering (MMC) + MMC minimizes the sum of squared distances between similar points, while + enforcing the sum of distances between dissimilar ones to be greater than + one. This leads to a convex and, thus, local-minima-free optimization + problem that can be solved efficiently. + However, the algorithm involves the computation of eigenvalues, which is the + main speed-bottleneck. Since it has initially been designed for clustering + applications, one of the implicit assumptions of MMC is that all classes form + a compact set, i.e., follow a unimodal distribution, which restricts the + possible use-cases of this method. However, it is one of the earliest and a + still often cited technique. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + max_iter : int, optional (default=100) + Maximum number of iterations of the convergence procedure. + + max_proj : int, optional (default=10000) + Maximum number of projection steps. + + convergence_threshold : float, optional (default=1e-6) + Convergence threshold for the convergence procedure. + + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + An SPD matrix of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. + Attributes ---------- n_iter_ : `int` @@ -416,6 +417,29 @@ class MMC(_BaseMMC, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + Examples + -------- + >>> from metric_learn import MMC_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> mmc = MMC_Supervised(num_constraints=200) + >>> mmc.fit(X, Y) + + References + ---------- + .. [1] `Distance metric learning with application to clustering with + side-information `_ + Xing, Jordan, Russell, Ng. + + See Also + -------- + metric_learn.MMC : The original weakly-supervised algorithm + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def fit(self, pairs, y, calibration_params=None): @@ -426,7 +450,7 @@ def fit(self, pairs, y, calibration_params=None): Parameters ---------- - pairs : array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a @@ -453,6 +477,73 @@ def fit(self, pairs, y, calibration_params=None): class MMC_Supervised(_BaseMMC, TransformerMixin): """Supervised version of Mahalanobis Metric for Clustering (MMC) + `MMC_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `MMC` for training. + + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints: int, optional + number of constraints to generate + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A numpy array of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + Mahalanobis matrix. + + `MMC_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `MMC` for training. + Attributes ---------- n_iter_ : `int` @@ -467,71 +558,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled='deprecated', num_constraints=None, init=None, A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `MMC`. - - `MMC_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `MMC` for training. - - Parameters - ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse of the covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A numpy array of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. - """ _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, convergence_threshold=convergence_threshold, init=init, A0=A0, diagonal=diagonal, diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 2b541a64..dcfdac8a 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -1,15 +1,5 @@ -r""" -Neighborhood Components Analysis(NCA) - -NCA is a distance metric learning algorithm which aims to improve the -accuracy of nearest neighbors classification compared to the standard -Euclidean distance. The algorithm directly maximizes a stochastic variant -of the leave-one-out k-nearest neighbors(KNN) score on the training set. -It can also learn a low-dimensional linear transformation of data that can -be used for data visualization and fast classification. - -Read more in the :ref:`User Guide `. - +""" +Neighborhood Components Analysis (NCA) """ from __future__ import absolute_import @@ -32,6 +22,95 @@ class NCA(MahalanobisMixin, TransformerMixin): """Neighborhood Components Analysis (NCA) + NCA is a distance metric learning algorithm which aims to improve the + accuracy of nearest neighbors classification compared to the standard + Euclidean distance. The algorithm directly maximizes a stochastic variant + of the leave-one-out k-nearest neighbors(KNN) score on the training set. + It can also learn a low-dimensional linear transformation of data that can + be used for data visualization and fast classification. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + max_iter : int, optional (default=100) + Maximum number of iterations done by the optimization algorithm. + + tol : float, optional (default=None) + Convergence tolerance for the optimization. + + verbose : bool, optional (default=False) + Whether to print progress messages or not. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + + Examples + -------- + + >>> import numpy as np + >>> from metric_learn import NCA + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> nca = NCA(max_iter=1000) + >>> nca.fit(X, Y) + Attributes ---------- n_iter_ : `int` @@ -39,81 +118,21 @@ class NCA(MahalanobisMixin, TransformerMixin): transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. + + References + ---------- + .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. `Neighbourhood + Components Analysis + `_. + Advances in Neural Information Processing Systems. 17, 513-520, 2005. + + .. [2] Wikipedia entry on `Neighborhood Components Analysis + `_ """ def __init__(self, init=None, n_components=None, num_dims='deprecated', max_iter=100, tol=None, verbose=False, preprocessor=None, random_state=None): - """Neighborhood Components Analysis - - Parameters - ---------- - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - max_iter : int, optional (default=100) - Maximum number of iterations done by the optimization algorithm. - - tol : float, optional (default=None) - Convergence tolerance for the optimization. - - verbose : bool, optional (default=False) - Whether to print progress messages or not. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. - """ self.n_components = n_components self.init = init self.num_dims = num_dims diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 1dbffdd6..503e2408 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -1,14 +1,5 @@ -r""" -Relative Components Analysis(RCA) - -RCA learns a full rank Mahalanobis distance metric based on a weighted sum of -in-chunklets covariance matrices. It applies a global linear transformation to -assign large weights to relevant dimensions and low weights to irrelevant -dimensions. Those relevant dimensions are estimated using "chunklets", subsets -of points that are known to belong to the same class. - -Read more in the :ref:`User Guide `. - +""" +Relative Components Analysis (RCA) """ from __future__ import absolute_import @@ -42,6 +33,52 @@ def _chunk_mean_centering(data, chunks): class RCA(MahalanobisMixin, TransformerMixin): """Relevant Components Analysis (RCA) + RCA learns a full rank Mahalanobis distance metric based on a weighted sum of + in-chunklets covariance matrices. It applies a global linear transformation + to assign large weights to relevant dimensions and low weights to irrelevant + dimensions. Those relevant dimensions are estimated using "chunklets", + subsets of points that are known to belong to the same class. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + pca_comps : Not used + .. deprecated:: 0.5.0 + `pca_comps` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + Examples + -------- + >>> from metric_learn import RCA_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) + >>> rca.fit(X, Y) + + References + ------------------ + .. [1] `Adjustment learning and relevant component analysis + `_ Noam + Shental, et al. + + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_components, n_features) @@ -50,28 +87,6 @@ class RCA(MahalanobisMixin, TransformerMixin): def __init__(self, n_components=None, num_dims='deprecated', pca_comps='deprecated', preprocessor=None): - """Initialize the learner. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - pca_comps : Not used - .. deprecated:: 0.5.0 - `pca_comps` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - """ self.n_components = n_components self.num_dims = num_dims self.pca_comps = pca_comps @@ -153,6 +168,27 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): """Supervised version of Relevant Components Analysis (RCA) + `RCA_Supervised` creates chunks of similar points by first sampling a + class, taking `chunk_size` elements in it, and repeating the process + `num_chunks` times. + + Parameters + ---------- + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + num_chunks: int, optional + chunk_size: int, optional + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_components, n_features) @@ -162,29 +198,6 @@ class RCA_Supervised(RCA): def __init__(self, num_dims='deprecated', n_components=None, pca_comps='deprecated', num_chunks=100, chunk_size=2, preprocessor=None): - """Initialize the supervised version of `RCA`. - - `RCA_Supervised` creates chunks of similar points by first sampling a - class, taking `chunk_size` elements in it, and repeating the process - `num_chunks` times. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - num_chunks: int, optional - chunk_size: int, optional - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - """ RCA.__init__(self, num_dims=num_dims, n_components=n_components, pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index c5e63fa8..70e65c86 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -1,15 +1,5 @@ -r""" -Sparse High-Dimensional Metric Learning(SDML) - -SDML is an efficient sparse metric learning in high-dimensional space via -double regularization: an L1-penalization on the off-diagonal elements of the -Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence between -:math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either :math:`\mathbf{I}` -or :math:`\mathbf{\Omega}^{-1}`, where :math:`\mathbf{\Omega}` is the -covariance matrix). - -Read more in the :ref:`User Guide `. - +""" +Sparse High-Dimensional Metric Learning (SDML) """ from __future__ import absolute_import @@ -38,55 +28,6 @@ class _BaseSDML(MahalanobisMixin): def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, use_cov='deprecated', verbose=False, preprocessor=None, random_state=None): - """ - Parameters - ---------- - balance_param : float, optional - trade off between sparsity and M0 prior - - sparsity_param : float, optional - trade off between optimizer and sparseness (see graph_lasso) - - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random positive definite (PD) matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. - """ self.balance_param = balance_param self.sparsity_param = sparsity_param self.prior = prior @@ -190,6 +131,63 @@ def _fit(self, pairs, y): class SDML(_BaseSDML, _PairsClassifierMixin): """Sparse Distance Metric Learning (SDML) + SDML is an efficient sparse metric learning in high-dimensional space via + double regularization: an L1-penalization on the off-diagonal elements of the + Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence + between :math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either + :math:`\mathbf{I}` or :math:`\mathbf{\Omega}^{-1}`, where + :math:`\mathbf{\Omega}` is the covariance matrix). + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + balance_param : float, optional + trade off between sparsity and M0 prior + + sparsity_param : float, optional + trade off between optimizer and sparseness (see graph_lasso) + + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + use_cov : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + verbose : bool, optional (default=False) + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) @@ -200,6 +198,27 @@ class SDML(_BaseSDML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + Examples + -------- + >>> from metric_learn import SDML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> sdml = SDML_Supervised(num_constraints=200) + >>> sdml.fit(X, Y) + + References + ---------- + + .. [1] Qi et al. + An efficient sparse metric learning in high-dimensional space via + L1-penalized log-determinant regularization. ICML 2009. + http://lms.comp.nus.edu.sg/sites/default/files/publication\ +-attachments/icml09-guojun.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439945 """ def fit(self, pairs, y, calibration_params=None): @@ -210,7 +229,7 @@ def fit(self, pairs, y, calibration_params=None): Parameters ---------- - pairs : array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a @@ -238,74 +257,78 @@ def fit(self, pairs, y, calibration_params=None): class SDML_Supervised(_BaseSDML, TransformerMixin): """Supervised version of Sparse Distance Metric Learning (SDML) + `SDML_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `SDML` for training. + + Parameters + ---------- + balance_param : float, optional (default=0.5) + trade off between sparsity and M0 prior + sparsity_param : float, optional (default=0.01) + trade off between optimizer and sparseness (see graph_lasso) + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + use_cov : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints : int, optional (default=None) + number of constraints to generate + verbose : bool, optional (default=False) + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + See Also + -------- + metric_learn.SDML : The original weakly-supervised algorithm + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, use_cov='deprecated', num_labeled='deprecated', num_constraints=None, verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `SDML`. - - `SDML_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `SDML` for training. - - Parameters - ---------- - balance_param : float, optional - trade off between sparsity and M0 prior - sparsity_param : float, optional - trade off between optimizer and sparseness (see graph_lasso) - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints : int, optional - number of constraints to generate - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. - """ _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, use_cov=use_cov, verbose=verbose, From 46a948a76301087191bb7cbdf1051d5795420f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Wed, 3 Jul 2019 14:02:33 +0200 Subject: [PATCH 123/210] [WIP] Small improvements in doc (#229) * get started * supervised.rst done * unsupervised.rst done * finish addressing comments * fix links and uniformize refs --- doc/supervised.rst | 91 +++++++++-------- doc/unsupervised.rst | 6 +- doc/user_guide.rst | 1 + doc/weakly_supervised.rst | 204 ++++++++++++++++++-------------------- 4 files changed, 150 insertions(+), 152 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index 5520ce8e..3c941b20 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -11,8 +11,8 @@ from each other. General API =========== -Supervised Metric Learning Algorithms are the easiest metric-learn algorithms -to use, since they use the same API as ``scikit-learn``. +Supervised metric learning algorithms essentially use the same API as +scikit-learn. Input data ---------- @@ -20,13 +20,14 @@ In order to train a model, you need two `array-like `_ objects, `X` and `y`. `X` should be a 2D array-like of shape `(n_samples, n_features)`, where `n_samples` is the number of points of your dataset and `n_features` is the -number of attributes of each of your points. `y` should be a 1D array-like +number of attributes describing each point. `y` should be a 1D +array-like of shape `(n_samples,)`, containing for each point in `X` the class it belongs to (or the value to regress for this sample, if you use `MLKR` for instance). Here is an example of a dataset of two dogs and one -cat (the classes are 'dog' and 'cat') an animal being being represented by +cat (the classes are 'dog' and 'cat') an animal being represented by two numbers. >>> import numpy as np @@ -83,9 +84,10 @@ array([0.49627072, 3.65287282]) .. note:: - If the metric learner that you use learns a Mahalanobis Matrix (like it is - the case for all algorithms currently in metric-learn), you can get the - plain learned Mahalanobis matrix using `get_mahalanobis_matrix`. + If the metric learner that you use learns a :ref:`Mahalanobis distance + ` (like it is the case for all algorithms + currently in metric-learn), you can get the plain learned Mahalanobis + matrix using `get_mahalanobis_matrix`. >>> nca.get_mahalanobis_matrix() array([[0.43680409, 0.89169412], @@ -96,9 +98,13 @@ array([0.49627072, 3.65287282]) Scikit-learn compatibility -------------------------- -All supervised algorithms are scikit-learn `sklearn.base.Estimators`, and -`sklearn.base.TransformerMixin` so they are compatible with Pipelining and -scikit-learn model selection routines. +All supervised algorithms are scikit-learn estimators +(`sklearn.base.BaseEstimator`) and transformers +(`sklearn.base.TransformerMixin`) so they are compatible with pipelines +(`sklearn.pipeline.Pipeline`) and +scikit-learn model selection routines +(`sklearn.model_selection.cross_val_score`, +`sklearn.model_selection.GridSearchCV`, etc). Algorithms ========== @@ -151,18 +157,20 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, .. topic:: References: - .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor - Classification - `_ Kilian Q. Weinberger, John - Blitzer, Lawrence K. Saul + .. [1] Weinberger et al. `Distance Metric Learning for Large Margin + Nearest Neighbor Classification + `_. + JMLR 2009 + + .. [2] `Wikipedia entry on Large Margin Nearest Neighbor `_ + .. _nca: :py:class:`NCA ` -------------------------------------- -Neighborhood Components Analysis(:py:class:`NCA `) +Neighborhood Components Analysis (:py:class:`NCA `) `NCA` is a distance metric learning algorithm which aims to improve the accuracy of nearest neighbors classification compared to the standard @@ -213,20 +221,19 @@ the sum of probability of being correctly classified: .. topic:: References: - .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. - "Neighbourhood Components Analysis". Advances in Neural Information - Processing Systems. 17, 513-520, 2005. - http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf + .. [1] Goldberger et al. + `Neighbourhood Components Analysis `_. + NIPS 2005 - .. [2] Wikipedia entry on Neighborhood Components Analysis - https://en.wikipedia.org/wiki/Neighbourhood_components_analysis + .. [2] `Wikipedia entry on Neighborhood Components Analysis `_ + .. _lfda: :py:class:`LFDA ` ----------------------------------------- -Local Fisher Discriminant Analysis(:py:class:`LFDA `) +Local Fisher Discriminant Analysis (:py:class:`LFDA `) `LFDA` is a linear supervised dimensionality reduction method. It is particularly useful when dealing with multi-modality, where one ore more classes @@ -287,20 +294,20 @@ same class are not imposed to be close. .. topic:: References: - .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local - Fisher Discriminant Analysis `_ Masashi Sugiyama. + .. [1] Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local + Fisher Discriminant Analysis `_. + JMLR 2007 - .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering + .. [2] Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering `_ Yuan Tang. + -discriminant-analysis-on-beer-style-clustering.html#>`_. .. _mlkr: :py:class:`MLKR ` ----------------------------------------- -Metric Learning for Kernel Regression(:py:class:`MLKR `) +Metric Learning for Kernel Regression (:py:class:`MLKR `) `MLKR` is an algorithm for supervised metric learning, which learns a distance function by directly minimizing the leave-one-out regression error. @@ -355,9 +362,8 @@ calculating a weighted average of all the training samples: .. topic:: References: - .. [1] `Metric Learning for Kernel Regression `_ Kilian Q. Weinberger, - Gerald Tesauro + .. [1] Weinberger et al. `Metric Learning for Kernel Regression `_. AISTATS 2007 .. _supervised_version: @@ -365,14 +371,14 @@ calculating a weighted average of all the training samples: Supervised versions of weakly-supervised algorithms --------------------------------------------------- -Note that each :ref:`weakly-supervised algorithm ` +Each :ref:`weakly-supervised algorithm ` has a supervised version of the form `*_Supervised` where similarity tuples are -generated from the labels information and passed to the underlying algorithm. -These constraints are sampled randomly under the hood. +randomly generated from the labels information and passed to the underlying +algorithm. For pairs learners (see :ref:`learning_on_pairs`), pairs (tuple of two points -from the dataset), and labels (`int` indicating whether the two points are -similar (+1) or dissimilar (-1)), are sampled with the function +from the dataset), and pair labels (`int` indicating whether the two points +are similar (+1) or dissimilar (-1)), are sampled with the function `metric_learn.constraints.positive_negative_pairs`. To sample positive pairs (of label +1), this method will look at all the samples from the same label and sample randomly a pair among them. To sample negative pairs (of label -1), this @@ -383,12 +389,11 @@ of one of those, so forcing `same_length=True` will return both times the minimum of the two lenghts. For using quadruplets learners (see :ref:`learning_on_quadruplets`) in a -supervised way, we will basically sample positive and negative pairs like -before, but we'll just concatenate them, so that we have a 3D array of -quadruplets, where for each quadruplet the two first points are in fact points -from the same class, and the two last points are in fact points from a -different class (so indeed the two last points should be less similar than the -two first points). +supervised way, positive and negative pairs are sampled as above and +concatenated so that we have a 3D array of +quadruplets, where for each quadruplet the two first points are from the same +class, and the two last points are from a different class (so indeed the two +last points should be less similar than the two first points). .. topic:: Example Code: diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst index 1d5bef43..1191e805 100644 --- a/doc/unsupervised.rst +++ b/doc/unsupervised.rst @@ -2,9 +2,9 @@ Unsupervised Metric Learning ============================ -Unsupervised metric learning algorithms just take as input points `X`. For -now, in metric-learn, there only is `Covariance`, which is a simple -baseline algorithm (see below). +Unsupervised metric learning algorithms only take as input an (unlabeled) +dataset `X`. For now, in metric-learn, there only is `Covariance`, which is a +simple baseline algorithm (see below). Algorithms diff --git a/doc/user_guide.rst b/doc/user_guide.rst index fb7060ce..5472107a 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -12,4 +12,5 @@ User Guide introduction.rst supervised.rst weakly_supervised.rst + unsupervised.rst preprocessor.rst \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 7e488ac7..38f08fbe 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -31,22 +31,21 @@ two points, three points, etc...). The label is some information we have about this set of points (e.g. "these two points are similar"). Note that some information can be contained in the ordering of these tuples (see for instance the section :ref:`learning_on_quadruplets`). For more details about -the specific of each algorithms, refer to the appropriate section: either -:ref:`learning_on_pairs` or :ref:`learning_on_quadruplets`) +specific forms of tuples, refer to the appropriate sections +(:ref:`learning_on_pairs` or :ref:`learning_on_quadruplets`). - -The `tuples` argument is the first argument of every method (like the X +The `tuples` argument is the first argument of every method (like the `X` argument for classical algorithms in scikit-learn). The second argument is the label of the tuple: its semantic depends on the algorithm used. For instance -for pairs learners ``y`` is a label indicating whether the pair is of similar +for pairs learners `y` is a label indicating whether the pair is of similar samples or dissimilar samples. Then one can fit a Weakly Supervised Metric Learner on this tuple, like this: >>> my_algo.fit(tuples, y) -Like in a classical setting we split the points ``X`` between train and test, -here we split the ``tuples`` between train and test. +Like in a classical setting we split the points `X` between train and test, +here we split the `tuples` between train and test. >>> from sklearn.model_selection import train_test_split >>> pairs_train, pairs_test, y_train, y_test = train_test_split(pairs, y) @@ -58,9 +57,9 @@ learn: ^^^^^^^^^^^^^^^^^^ The most intuitive way to represent tuples is to provide the algorithm with a -3D array-like of tuples of shape ``(n_tuples, t, n_features)``, where -``n_tuples`` is the number of tuples, ``tuple_size`` is the number of elements -in a tuple (2 for pairs, 3 for triplets for instance), and ``n_features`` is +3D array-like of tuples of shape `(n_tuples, t, n_features)`, where +`n_tuples` is the number of tuples, `tuple_size` is the number of elements +in a tuple (2 for pairs, 3 for triplets for instance), and `n_features` is the number of features of each point. .. topic:: Example: @@ -91,8 +90,8 @@ the number of features of each point. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Instead of forming each point in each tuple, a more efficient representation -would be to keep the dataset of points ``X`` aside, and just represent tuples -as a collection of tuples of *indices* from the points in ``X``. Since we loose +would be to keep the dataset of points `X` aside, and just represent tuples +as a collection of tuples of *indices* from the points in `X`. Since we loose the feature dimension there, the resulting array is 2D. .. topic:: Example: An equivalent representation of the above pairs would be: @@ -110,7 +109,7 @@ the feature dimension there, the resulting array is 2D. >>> y = np.array([-1, 1, 1, -1]) In order to fit metric learning algorithms with this type of input, we need to -give the original dataset of points ``X`` to the estimator so that it knows +give the original dataset of points `X` to the estimator so that it knows the points the indices refer to. We do this when initializing the estimator, through the argument `preprocessor` (see below :ref:`fit_ws`) @@ -118,7 +117,7 @@ through the argument `preprocessor` (see below :ref:`fit_ws`) .. note:: Instead of an array-like, you can give a callable in the argument - ``preprocessor``, which will go fetch and form the tuples. This allows to + `preprocessor`, which will go fetch and form the tuples. This allows to give more general indicators than just indices from an array (for instance paths in the filesystem, name of records in a database etc...) See section :ref:`preprocessor_section` for more details on how to use the preprocessor. @@ -157,7 +156,7 @@ Here we transform two points in the new embedding space. array([[-3.24667162e+01, 4.62622348e-07, 3.88325421e-08], [-3.61531114e+01, 4.86778289e-07, 2.12654397e-08]]) -Also, as explained before, our metric learners has learn a distance between +Also, as explained before, our metric learner has learned a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the @@ -178,9 +177,10 @@ array([7.27607365, 0.88853014]) .. note:: - If the metric learner that you use learns a Mahalanobis Matrix (like it is - the case for all algorithms currently in metric-learn), you can get the - plain Mahalanobis matrix using `get_mahalanobis_matrix`. + If the metric learner that you use learns a :ref:`Mahalanobis distance + ` (like it is the case for all algorithms + currently in metric-learn), you can get the plain Mahalanobis matrix using + `get_mahalanobis_matrix`. >>> mmc.get_mahalanobis_matrix() array([[ 0.58603894, -5.69883982, -1.66614919], @@ -190,53 +190,51 @@ array([[ 0.58603894, -5.69883982, -1.66614919], .. TODO: remove the "like it is the case etc..." if it's not the case anymore .. _sklearn_compat_ws: - + +Prediction and scoring +---------------------- + +Since weakly supervised are also able, after being fitted, to predict for a +given tuple what is its label (for pairs) or ordering (for quadruplets). See +the appropriate section for more details, either :ref:`this +one ` for pairs, or :ref:`this one +` for quadruplets. + +They also implement a default scoring method, `score`, that can be +used to evaluate the performance of a metric-learner on a test dataset. See +the appropriate section for more details, either :ref:`this +one ` for pairs, or :ref:`this one ` +for quadruplets. + Scikit-learn compatibility -------------------------- Weakly supervised estimators are compatible with scikit-learn routines for -model selection (grid-search, cross-validation etc). See the scoring section -of the appropriate algorithm (:ref:`pairs learners ` -or :ref:`quadruplets learners `) -for more details on the scoring used in the case of Weakly Supervised Metric -Learning. +model selection (`sklearn.model_selection.cross_val_score`, +`sklearn.model_selection.GridSearchCV`, etc). Example: >>> from metric_learn import MMC +>>> import numpy as np >>> from sklearn.datasets import load_iris >>> from sklearn.model_selection import cross_val_score >>> rng = np.random.RandomState(42) >>> X, _ = load_iris(return_X_y=True) >>> # let's sample 30 random pairs and labels of pairs >>> pairs_indices = rng.randint(X.shape[0], size=(30, 2)) ->>> y = rng.randint(2, size=30) +>>> y = 2 * rng.randint(2, size=30) - 1 >>> mmc = MMC(preprocessor=X) >>> cross_val_score(mmc, pairs_indices, y) -Prediction and scoring ----------------------- - -Since weakly supervised are also able, after being fitted, to predict for a -given tuple what is its label (for pairs) or ordering (for quadruplets). See -the appropriate section for more details, either :ref:`this -one ` for pairs, or :ref:`this one -` for quadruplets. - -They also implement a default scoring method, `score`, that can be -used to evaluate the performance of a metric-learner on a test dataset. See -the appropriate section for more details, either :ref:`this -one ` for pairs, or :ref:`this one ` -for quadruplets. - .. _learning_on_pairs: Learning on pairs ================= Some metric learning algorithms learn on pairs of samples. In this case, one -should provide the algorithm with ``n_samples`` pairs of points, with a -corresponding target containing ``n_samples`` values being either +1 or -1. +should provide the algorithm with `n_samples` pairs of points, with a +corresponding target containing `n_samples` values being either +1 or -1. These values indicate whether the given pairs are similar points or dissimilar points. @@ -262,11 +260,11 @@ each other. .. _pairs_predicting: -Predicting +Prediction ---------- -When a pairs learner is fitted, it is also able to predict, for an -upcoming pair, whether it is a pair of similar or dissimilar points. +When a pairs learner is fitted, it is also able to predict, for an unseen +pair, whether it is a pair of similar or dissimilar points. >>> mmc.predict([[[0.6, 1.6], [1.15, 2.75]], ... [[3.2, 1.1], [5.4, 6.1]]]) @@ -274,34 +272,37 @@ array([1, -1]) .. _calibration: -Thresholding ------------- -In order to predict whether a new pair represents similar or dissimilar -samples, we in fact need to set a distance threshold, so that points closer (in -the learned space) than this threshold are predicted as similar, and points -further away are predicted as dissimilar. Several methods are possible for this -thresholding. +Prediction threshold +^^^^^^^^^^^^^^^^^^^^ -- **At fit time**: The threshold is set with `calibrate_threshold` (see - below) on the trainset. You can specify the calibration parameters directly +Predicting whether a new pair represents similar or dissimilar +samples requires to set a threshold on the learned distance, so that points +closer (in the learned space) than this threshold are predicted as similar, +and points further away are predicted as dissimilar. Several methods are +possible for this thresholding. + +- **Calibration at fit time**: The threshold is set with `calibrate_threshold` + (see below) on the training set. You can specify the calibration + parameters directly in the `fit` method with the `threshold_params` parameter (see the documentation of the `fit` method of any metric learner that learns on pairs - of points for more information). This method can cause a little bit of - overfitting. If you want to avoid that, calibrate the threshold after - fitting, on a validation set. + of points for more information). Note that calibrating on the training set + may cause some overfitting. If you want to avoid that, calibrate the + threshold after fitting, on a validation set. >>> mmc.fit(pairs, y) # will fit the threshold automatically after fitting -- **Manual**: calling `set_threshold` will set the threshold to a - particular value. +- **Calibration on validation set**: calling `calibrate_threshold` will + calibrate the threshold to achieve a particular score on a validation set, + the score being among the classical scores for classification (accuracy, f1 + score...). - >>> mmc.set_threshold(0.4) + >>> mmc.calibrate_threshold(pairs, y) -- **Calibration**: calling `calibrate_threshold` will calibrate the - threshold to achieve a particular score on a validation set, the score - being among the classical scores for classification (accuracy, f1 score...). +- **Manual threshold**: calling `set_threshold` will set the threshold to a + particular value. - >>> mmc.calibrate_threshold(pairs, y) + >>> mmc.set_threshold(0.4) See also: `sklearn.calibration`. @@ -310,18 +311,17 @@ See also: `sklearn.calibration`. Scoring ------- -Not only are they able to predict the label of given pairs, they can also -return a `decision_function` for a set of pairs. It is basically the "score" -that will be thresholded to find the prediction for the pair. In fact this -"score" is the opposite of the distance in the new space (higher score means - points are similar, and lower score dissimilar). +Pair metric learners can also return a `decision_function` for a set of pairs. +It is basically the "score" that will be thresholded to find the prediction +for the pair. This score corresponds to the opposite of the distance in the +new space (higher score means points are similar, and lower score dissimilar). >>> mmc.decision_function([[[0.6, 1.6], [1.15, 2.75]], ... [[3.2, 1.1], [5.4, 6.1]]]) array([-0.12811124, -0.74750256]) -This allows to return all kinds of estimator scoring usually used in classic -classification tasks, like `sklearn.metrics.accuracy` for instance, which +This allows to use common scoring functions for binary classification, like +`sklearn.metrics.accuracy_score` for instance, which can be used inside cross-validation routines: >>> from sklearn.model_selection import cross_val_score @@ -333,15 +333,14 @@ can be used inside cross-validation routines: array([1., 0., 1.]) Pairs learners also have a default score, which basically -returns the `sklearn.metrics.roc_auc_score` (therefore is not dependent on -the threshold). +returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent). >>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]], ... [[3.2, 1.1], [5.4, 6.1]], ... [[7.7, 5.6], [1.23, 8.4]]]) ->>> y_test = np.array([-1., 1., -1.]) +>>> y_test = np.array([1., -1., -1.]) >>> mmc.score(pairs_test, y_test) -0.5 +1.0 .. note:: See :ref:`fit_ws` for more details on metric learners functions that are @@ -356,7 +355,7 @@ Algorithms :py:class:`ITML ` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Information Theoretic Metric Learning(:py:class:`ITML `) +Information Theoretic Metric Learning (:py:class:`ITML `) `ITML` minimizes the (differential) relative entropy, aka Kullback–Leibler divergence, between two multivariate Gaussians subject to constraints on the @@ -422,12 +421,9 @@ is the prior distance metric, set to identity matrix by default, .. topic:: References: - .. [1] `Information-theoretic Metric Learning `_ Jason V. Davis, - et al. + .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning `_. ICML 2007 - .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/ - itml/ + .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ .. _sdml: @@ -483,10 +479,9 @@ is the off-diagonal L1 norm. .. topic:: References: .. [1] Qi et al. - An efficient sparse metric learning in high-dimensional space via - L1-penalized log-determinant regularization. ICML 2009. - http://lms.comp.nus.edu.sg/sites/default/files/publication-attachments/ - icml09-guojun.pdf + `An efficient sparse metric learning in high-dimensional space via + L1-penalized log-determinant regularization `_. + ICML 2009. .. [2] Adapted from https://gist.github.com/kcarnold/5439945 @@ -537,14 +532,13 @@ as the Mahalanobis matrix. .. topic:: References: - .. [1] `Adjustment learning and relevant component analysis + .. [1] Shental et al. `Adjustment learning and relevant component analysis `_ Noam Shental, et al. + &rep=rep1&type=pdf>`_. ECCV 2002 - .. [2] 'Learning distance functions using equivalence relations', ICML 2003 + .. [2] Bar-Hillel et al. `Learning distance functions using equivalence relations `_. ICML 2003 - .. [3]'Learning a Mahalanobis metric from equivalence constraints', JMLR - 2005 + .. [3] Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints `_. JMLR 2005 .. _mmc: @@ -595,12 +589,11 @@ points, while constrains the sum of distances between dissimilar points: .. topic:: References: - .. [1] `Distance metric learning with application to clustering with + .. [1] Xing et al. `Distance metric learning with application to clustering with side-information `_ Xing, Jordan, Russell, Ng. - .. [2] Adapted from Matlab code `here `_. + -with-side-information.pdf>`_. NIPS 2002 + .. [2] Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz .. _learning_on_quadruplets: @@ -608,11 +601,10 @@ points, while constrains the sum of distances between dissimilar points: Learning on quadruplets ======================= - - -The goal of weakly-supervised metric-learning algorithms is to transform -points in a new space, in which the tuple-wise constraints between points -are respected. +Some metric learning algorithms learn on quadruplets of samples. In this case, +one should provide the algorithm with `n_samples` quadruplets of points. Th +semantic of each quadruplet is that the first two points should be closer +together than the last two points. Fitting ------- @@ -659,7 +651,7 @@ last points. .. _quadruplets_predicting: -Predicting +Prediction ---------- When a quadruplets learner is fitted, it is also able to predict, for an @@ -677,10 +669,10 @@ array([-1., 1.]) Scoring ------- -Not only are they able to predict the label of given pairs, they can also -return a `decision_function` for a set of pairs. It is basically the "score" -which sign will be taken to find the prediction for the pair. In fact this -"score" is the difference between the distance between the two last points, +Quadruplet metric learners can also +return a `decision_function` for a set of pairs. This is basically the "score" +which sign will be taken to find the prediction for the pair, which +corresponds to the difference between the distance between the two last points, and the distance between the two last points of the quadruplet (higher score means the two last points are more likely to be more dissimilar than the two first points (i.e. more likely to have a +1 prediction since it's @@ -802,8 +794,8 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: .. topic:: References: .. [1] Liu et al. - "Metric Learning from Relative Comparisons by Minimizing Squared - Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf + `Metric Learning from Relative Comparisons by Minimizing Squared + Residual `_. ICDM 2012 .. [2] Adapted from https://gist.github.com/kcarnold/5439917 From 44fd427b7cffc55de2082f630178766554178584 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 4 Jul 2019 00:20:35 +0200 Subject: [PATCH 124/210] [MRG] Remove random_seed in fit and use the one in init (#224) * Remove random_seed in fit and use the one in init * update tests with the new API * Update test_RCA in sklearn_compat * Update test_SDML in sklearn_compat * Remove testing of pca_comps since it's deprecated * Fix sklearn test * Fix random_seed for test_iris in TestRCA * Relaunch CI * Augment tolerance rather than fix random_seed * Add ChangedBehaviorWarning if the random_state is left default * Update the merge * Address https://github.com/metric-learn/metric-learn/pull/224#pullrequestreview-257618709 --- metric_learn/constraints.py | 12 +- metric_learn/itml.py | 29 +++- metric_learn/lsml.py | 26 ++- metric_learn/mmc.py | 26 ++- metric_learn/rca.py | 32 +++- metric_learn/sdml.py | 27 ++- test/metric_learn_test.py | 191 ++++++++++++++++++--- test/test_base_metric.py | 2 +- test/test_fit_transform.py | 42 ++--- test/test_sklearn_compat.py | 66 +++---- test/test_transformer_metric_conversion.py | 8 +- 11 files changed, 335 insertions(+), 126 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 069a6564..e42ef4b8 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -6,6 +6,7 @@ import warnings from six.moves import xrange from scipy.sparse import coo_matrix +from sklearn.utils import check_random_state __all__ = ['Constraints'] @@ -23,7 +24,8 @@ def __init__(self, partial_labels): self.known_label_idx, = np.where(partial_labels >= 0) self.known_labels = partial_labels[self.known_label_idx] - def adjacency_matrix(self, num_constraints, random_state=np.random): + def adjacency_matrix(self, num_constraints, random_state=None): + random_state = check_random_state(random_state) a, b, c, d = self.positive_negative_pairs(num_constraints, random_state=random_state) row = np.concatenate((a, c)) @@ -35,7 +37,8 @@ def adjacency_matrix(self, num_constraints, random_state=np.random): return adj + adj.T def positive_negative_pairs(self, num_constraints, same_length=False, - random_state=np.random): + random_state=None): + random_state = check_random_state(random_state) a, b = self._pairs(num_constraints, same_label=True, random_state=random_state) c, d = self._pairs(num_constraints, same_label=False, @@ -68,13 +71,14 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, ab = np.array(list(ab)[:num_constraints], dtype=int) return self.known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): + def chunks(self, num_chunks=100, chunk_size=2, random_state=None): """ the random state object to be passed must be a numpy random seed """ + random_state = check_random_state(random_state) chunks = -np.ones_like(self.known_label_idx, dtype=int) uniq, lookup = np.unique(self.known_labels, return_inverse=True) - all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] + all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))] idx = 0 while idx < num_chunks and all_inds: if len(all_inds) == 1: diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 16fc21db..36f5d715 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -6,6 +6,7 @@ import warnings import numpy as np from six.moves import xrange +from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_array from sklearn.base import TransformerMixin @@ -298,7 +299,6 @@ class ITML_Supervised(_BaseITML, TransformerMixin): A positive definite (PD) matrix of shape (n_features, n_features), that will be used as such to set the prior. - A0 : Not used .. deprecated:: 0.5.0 `A0` was deprecated in version 0.5.0 and will @@ -310,7 +310,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin): tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + ``prior='random'``, ``random_state`` is used to set the prior. In any + case, `random_state` is also used to randomly sample constraints from + labels. Attributes @@ -350,7 +352,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, self.num_constraints = num_constraints self.bounds = bounds - def fit(self, X, y, random_state=np.random, bounds=None): + def fit(self, X, y, random_state='deprecated', bounds=None): """Create constraints from labels and learn the ITML model. @@ -362,8 +364,11 @@ def fit(self, X, y, random_state=np.random, bounds=None): y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `ITML_Supervised` object). bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. @@ -384,6 +389,18 @@ def fit(self, X, y, random_state=np.random, bounds=None): ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0. Use the "bounds" parameter of this ' 'fit method instead.', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `ITML_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -392,6 +409,6 @@ def fit(self, X, y, random_state=np.random, bounds=None): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index e3b0d323..72a448ec 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -286,7 +286,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random - prior. + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. Attributes ---------- @@ -308,7 +309,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, self.num_constraints = num_constraints self.weights = weights - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the LSML model. Parameters @@ -319,13 +320,28 @@ def fit(self, X, y, random_state=np.random): y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `LSML_Supervised` object). """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `LSML_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -334,6 +350,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, - random_state=random_state) + random_state=self.random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], weights=self.weights) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 9f02425c..55337b2e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -538,7 +538,8 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. + Mahalanobis matrix. In any case, `random_state` is also used to + randomly sample constraints from labels. `MMC_Supervised` creates pairs of similar sample by taking same class samples, and pairs of dissimilar samples by taking different class @@ -566,7 +567,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the MMC model. Parameters @@ -575,13 +576,28 @@ def fit(self, X, y, random_state=np.random): Input data, where each row corresponds to a single instance. y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `MMC_Supervised` object). """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `MMC_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -590,6 +606,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 503e2408..8686f02d 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -184,11 +184,17 @@ class RCA_Supervised(RCA): be removed in 0.6.0. Use `n_components` instead. num_chunks: int, optional + chunk_size: int, optional + preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + It is used to randomly sample constraints from labels. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_components, n_features) @@ -197,13 +203,15 @@ class RCA_Supervised(RCA): def __init__(self, num_dims='deprecated', n_components=None, pca_comps='deprecated', num_chunks=100, chunk_size=2, - preprocessor=None): + preprocessor=None, random_state=None): + """Initialize the supervised version of `RCA`.""" RCA.__init__(self, num_dims=num_dims, n_components=n_components, pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks self.chunk_size = chunk_size + self.random_state = random_state - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. @@ -212,10 +220,26 @@ def fit(self, X, y, random_state=np.random): X : (n x d) data matrix each row corresponds to a single instance y : (n) data labels - random_state : a random.seed object to fix the random_state if needed. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `RCA_Supervised` object). """ + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `RCA_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, - random_state=random_state) + random_state=self.random_state) return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 70e65c86..9344ef7c 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -310,7 +310,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random - prior. + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. Attributes ---------- @@ -336,7 +337,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the SDML model. Parameters @@ -345,9 +346,11 @@ def fit(self, X, y, random_state=np.random): data matrix, where each row corresponds to a single instance y : array-like, shape (n,) data labels, one for each instance - random_state : {numpy.random.RandomState, int}, optional - Random number generator or random seed. If not given, the singleton - numpy.random will be used. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `SDML_Supervised` object). Returns ------- @@ -358,6 +361,18 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `SDML_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -366,6 +381,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseSDML._fit(self, pairs, y) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c49c9ef5..0f47a58a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -119,6 +119,37 @@ def test_changed_behaviour_warning(self): lsml.fit(pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `LSML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + lsml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ('As of v0.5.0, `LSML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + lsml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + class TestITML(MetricTestCase): def test_iris(self): @@ -174,6 +205,37 @@ def test_deprecation_A0(self): itml.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `ITML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + itml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + msg = ('As of v0.5.0, `ITML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + itml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -446,11 +508,11 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self): X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) y = [0, 0, 1, 1] sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity', - sparsity_param=0.01) + sparsity_param=0.01, random_state=rng) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") with pytest.raises(RuntimeError) as raised_error: - sdml_supervised.fit(X, y, random_state=rng) + sdml_supervised.fit(X, y) assert msg == str(raised_error.value) @pytest.mark.skipif(not HAS_SKGGM, @@ -535,8 +597,9 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): it should work, but scikit-learn's graphical_lasso does not work""" X, y = load_iris(return_X_y=True) sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, - prior='covariance') - sdml.fit(X, y, random_state=np.random.RandomState(42)) + prior='covariance', + random_state=np.random.RandomState(42)) + sdml.fit(X, y) def test_deprecation_use_cov(self): # test that a deprecation message is thrown if use_cov is set at @@ -586,6 +649,35 @@ def test_changed_behaviour_warning(self): sdml.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=5e-5) + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `SDML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + sdml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=5e-5) + msg = ('As of v0.5.0, `SDML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + sdml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' @@ -819,24 +911,7 @@ def test_iris(self): rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.25) - - def test_feature_null_variance(self): - X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) - - # Apply PCA with the number of components - rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30, - chunk_size=2) - rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(X), self.iris_labels) - self.assertLess(csep, 0.30) - - # Apply PCA with the minimum variance ratio - rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30, - chunk_size=2) - rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(X), self.iris_labels) - self.assertLess(csep, 0.30) + self.assertLess(csep, 0.29) def test_deprecation_pca_comps(self): # test that a deprecation message is thrown if pca_comps is set at @@ -851,12 +926,12 @@ def test_deprecation_pca_comps(self): '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.') with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca_supervised.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) rca = RCA(pca_comps=X.shape[1]) with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) def test_changedbehaviorwarning_preprocessing(self): # test that a ChangedBehaviorWarning is thrown when using RCA @@ -871,12 +946,12 @@ def test_changedbehaviorwarning_preprocessing(self): rca_supervised = RCA_Supervised(num_chunks=20) with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca_supervised.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) rca = RCA() with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a @@ -895,6 +970,35 @@ def test_rank_deficient_returns_warning(self): rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `RCA_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + rca_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + msg = ('As of v0.5.0, `RCA_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + rca_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): @@ -908,7 +1012,7 @@ def test_deprecation_num_dims_rca(num_dims): ' removed in 0.6.0. Use "n_components" instead') with pytest.warns(DeprecationWarning) as raised_warning: rca.fit(X, y) - assert (str(raised_warning[0].message) == msg) + assert any(str(w.message) == msg for w in raised_warning) # we take a small number of chunks so that RCA works on iris rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10) @@ -917,7 +1021,7 @@ def test_deprecation_num_dims_rca(num_dims): ' removed in 0.6.0. Use "n_components" instead') with pytest.warns(DeprecationWarning) as raised_warning: rca_supervised.fit(X, y) - assert (str(raised_warning[0].message) == msg) + assert any(str(w.message) == msg for w in raised_warning) class TestMLKR(MetricTestCase): @@ -1095,6 +1199,37 @@ def test_changed_behaviour_warning(self): mmc.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `MMC_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + mmc_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ('As of v0.5.0, `MMC_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + mmc_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 313948ec..0c1117ed 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -96,7 +96,7 @@ def test_rca(self): "RCA_Supervised(chunk_size=2, " "n_components=None, num_chunks=100, " "num_dims='deprecated', pca_comps='deprecated', " - "preprocessor=None)")) + "preprocessor=None, random_state=None)")) def test_mlkr(self): self.assertEqual(remove_spaces(str(metric_learn.MLKR())), diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index b7255ea9..a9b2719e 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -30,25 +30,25 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - res_2 = lsml.fit_transform(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - itml.fit(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml.fit(self.X, self.y) res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - res_2 = itml.fit_transform(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + res_2 = itml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -65,14 +65,14 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - prior='identity') - sdml.fit(self.X, self.y, random_state=seed) + prior='identity', random_state=seed) + sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - prior='identity') - res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) + prior='identity', random_state=seed) + res_2 = sdml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -100,13 +100,15 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - rca.fit(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=seed) + rca.fit(self.X, self.y) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - res_2 = rca.fit_transform(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=seed) + res_2 = rca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -122,13 +124,13 @@ def test_mlkr(self): def test_mmc_supervised(self): seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200) - mmc.fit(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc.fit(self.X, self.y) res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200) - res_2 = mmc.fit_transform(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200, random_state=seed) + res_2 = mmc.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 4c511263..b2056c09 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -24,31 +24,28 @@ quadruplets_learners) -# Wrap the _Supervised methods with a deterministic wrapper for testing. -class deterministic_mixin(object): - def fit(self, X, y): - rs = np.random.RandomState(1234) - return super(deterministic_mixin, self).fit(X, y, random_state=rs) +class Stable_RCA_Supervised(RCA_Supervised): + def __init__(self, n_components=None, pca_comps=None, + chunk_size=2, preprocessor=None, random_state=None): + # this init makes RCA stable for scikit-learn examples. + super(Stable_RCA_Supervised, self).__init__( + num_chunks=2, n_components=n_components, pca_comps=pca_comps, + chunk_size=chunk_size, preprocessor=preprocessor, + random_state=random_state) -class dLSML(deterministic_mixin, LSML_Supervised): - pass +class Stable_SDML_Supervised(SDML_Supervised): -class dITML(deterministic_mixin, ITML_Supervised): - pass - - -class dMMC(deterministic_mixin, MMC_Supervised): - pass - - -class dSDML(deterministic_mixin, SDML_Supervised): - pass - - -class dRCA(deterministic_mixin, RCA_Supervised): - pass + def __init__(self, sparsity_param=0.01, num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None, + random_state=None): + # this init makes SDML stable for scikit-learn examples. + super(Stable_SDML_Supervised, self).__init__( + sparsity_param=sparsity_param, num_labeled=num_labeled, + num_constraints=num_constraints, verbose=verbose, + preprocessor=preprocessor, balance_param=1e-5, prior='identity', + random_state=random_state) class TestSklearnCompat(unittest.TestCase): @@ -68,36 +65,19 @@ def test_nca(self): check_estimator(NCA) def test_lsml(self): - check_estimator(dLSML) + check_estimator(LSML_Supervised) def test_itml(self): - check_estimator(dITML) + check_estimator(ITML_Supervised) def test_mmc(self): - check_estimator(dMMC) + check_estimator(MMC_Supervised) def test_sdml(self): - def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None): - # this init makes SDML stable for scikit-learn examples. - SDML_Supervised.__init__(self, sparsity_param=sparsity_param, - num_labeled=num_labeled, - num_constraints=num_constraints, - verbose=verbose, - preprocessor=preprocessor, - balance_param=1e-5, prior='identity') - dSDML.__init__ = stable_init - check_estimator(dSDML) + check_estimator(Stable_SDML_Supervised) def test_rca(self): - def stable_init(self, n_components=None, pca_comps=None, - chunk_size=2, preprocessor=None): - # this init makes RCA stable for scikit-learn examples. - RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, - pca_comps=pca_comps, chunk_size=chunk_size, - preprocessor=preprocessor) - dRCA.__init__ = stable_init - check_estimator(dRCA) + check_estimator(Stable_RCA_Supervised) RNG = check_random_state(0) diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 651f60ea..62ac8777 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -30,8 +30,8 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml.fit(self.X, self.y) L = lsml.transformer_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) @@ -51,8 +51,8 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, prior='identity', - balance_param=1e-5) - sdml.fit(self.X, self.y, random_state=seed) + balance_param=1e-5, random_state=seed) + sdml.fit(self.X, self.y) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) From a7ede57731f6fb47af08fde0f53f07c3744b8e66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Thu, 4 Jul 2019 08:46:51 +0200 Subject: [PATCH 125/210] Rename transformer_ to components_ (#230) --- doc/introduction.rst | 2 +- metric_learn/_util.py | 14 ++--- metric_learn/base_metric.py | 14 ++--- metric_learn/covariance.py | 8 +-- metric_learn/itml.py | 12 ++--- metric_learn/lfda.py | 4 +- metric_learn/lmnn.py | 14 ++--- metric_learn/lsml.py | 12 ++--- metric_learn/mlkr.py | 14 ++--- metric_learn/mmc.py | 14 ++--- metric_learn/nca.py | 8 +-- metric_learn/rca.py | 8 +-- metric_learn/sdml.py | 12 ++--- test/metric_learn_test.py | 10 ++-- test/test_base_metric.py | 6 +-- ...y => test_components_metric_conversion.py} | 52 +++++++++---------- test/test_mahalanobis_mixin.py | 19 +++---- test/test_pairs_classifiers.py | 8 +-- test/test_quadruplets_classifiers.py | 2 +- 19 files changed, 117 insertions(+), 116 deletions(-) rename test/{test_transformer_metric_conversion.py => test_components_metric_conversion.py} (85%) diff --git a/doc/introduction.rst b/doc/introduction.rst index ef221971..04ae1a18 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -135,7 +135,7 @@ to the following resources: arrays and outputs the learned metric score on these two points .. :math:`M = L^{\top}L` such that distance between vectors ``x`` and .. ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. -.. - ``transformer_from_metric(metric)``, which returns a transformation matrix +.. - ``components_from_metric(metric)``, which returns a transformation matrix .. :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a .. data matrix :math:`X \in \mathbb{R}^{n \times d}` to the .. :math:`D`-dimensional learned metric space :math:`X L^{\top}`, diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 9cf6d7c6..b476e70b 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -375,7 +375,7 @@ def _check_sdp_from_eigen(w, tol=None): return True -def transformer_from_metric(metric, tol=None): +def components_from_metric(metric, tol=None): """Returns the transformation matrix from the Mahalanobis matrix. Returns the transformation matrix from the Mahalanobis matrix, i.e. the @@ -429,10 +429,10 @@ def validate_vector(u, dtype=None): return u -def _initialize_transformer(n_components, input, y=None, init='auto', - verbose=False, random_state=None, - has_classes=True): - """Returns the initial transformer to be used depending on the arguments. +def _initialize_components(n_components, input, y=None, init='auto', + verbose=False, random_state=None, + has_classes=True): + """Returns the initial transformation to be used depending on the arguments. Parameters ---------- @@ -503,8 +503,8 @@ def _initialize_transformer(n_components, input, y=None, init='auto', Returns ------- - init_transformer : `numpy.ndarray` - The initial transformer to use. + init_components : `numpy.ndarray` + The initial transformation to use. """ # if we are doing a regression we cannot use lda: n_features = input.shape[-1] diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 55fba54f..cd1c3c71 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -177,7 +177,7 @@ class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ @@ -243,10 +243,10 @@ def transform(self, X): X_checked = check_input(X, type_of_inputs='classic', estimator=self, preprocessor=self.preprocessor_, accept_sparse=True) - return X_checked.dot(self.transformer_.T) + return X_checked.dot(self.components_.T) def get_metric(self): - transformer_T = self.transformer_.T.copy() + components_T = self.components_.T.copy() def metric_fun(u, v, squared=False): """This function computes the metric between u and v, according to the @@ -271,7 +271,7 @@ def metric_fun(u, v, squared=False): """ u = validate_vector(u) v = validate_vector(v) - transformed_diff = (u - v).dot(transformer_T) + transformed_diff = (u - v).dot(components_T) dist = np.dot(transformed_diff, transformed_diff.T) if not squared: dist = np.sqrt(dist) @@ -298,7 +298,7 @@ def get_mahalanobis_matrix(self): M : `numpy.ndarray`, shape=(n_features, n_features) The copy of the learned Mahalanobis matrix. """ - return self.transformer_.T.dot(self.transformer_) + return self.components_.T.dot(self.components_) class _PairsClassifierMixin(BaseMetricLearner): @@ -333,7 +333,7 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ - check_is_fitted(self, ['threshold_', 'transformer_']) + check_is_fitted(self, ['threshold_', 'components_']) return 2 * (- self.decision_function(pairs) <= self.threshold_) - 1 def decision_function(self, pairs): @@ -599,7 +599,7 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ - check_is_fitted(self, 'transformer_') + check_is_fitted(self, 'components_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index b9666494..7214dd62 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -8,7 +8,7 @@ from sklearn.base import TransformerMixin from .base_metric import MahalanobisMixin -from ._util import transformer_from_metric +from ._util import components_from_metric class Covariance(MahalanobisMixin, TransformerMixin): @@ -24,9 +24,9 @@ class Covariance(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) Examples -------- @@ -53,5 +53,5 @@ def fit(self, X, y=None): else: M = scipy.linalg.pinvh(M) - self.transformer_ = transformer_from_metric(np.atleast_2d(M)) + self.components_ = components_from_metric(np.atleast_2d(M)) return self diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 36f5d715..c3b91fc4 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -12,7 +12,7 @@ from sklearn.base import TransformerMixin from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric, _initialize_metric_mahalanobis +from ._util import components_from_metric, _initialize_metric_mahalanobis class _BaseITML(MahalanobisMixin): @@ -105,7 +105,7 @@ def _fit(self, pairs, y, bounds=None): print('itml converged at iter: %d, conv = %f' % (it, conv)) self.n_iter_ = it - self.transformer_ = transformer_from_metric(A) + self.components_ = components_from_metric(A) return self @@ -186,9 +186,9 @@ class ITML(_BaseITML, _PairsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) threshold_ : `float` If the distance metric between two points is lower than this threshold, @@ -329,9 +329,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) See Also -------- diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 6c651b7b..ffc4c885 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -51,7 +51,7 @@ class LFDA(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. Examples @@ -155,7 +155,7 @@ def fit(self, X, y): elif self.embedding_type == 'orthonormalized': vecs, _ = np.linalg.qr(vecs) - self.transformer_ = vecs.T + self.components_ = vecs.T return self diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 600d55c0..5dc1810d 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -12,7 +12,7 @@ from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin -from ._util import _initialize_transformer, _check_n_components +from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin @@ -117,7 +117,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. Examples @@ -199,9 +199,9 @@ def fit(self, X, y): init = 'auto' else: init = self.init - self.transformer_ = _initialize_transformer(output_dim, X, y, init, - self.verbose, - self.random_state) + self.components_ = _initialize_components(output_dim, X, y, init, + self.verbose, + self.random_state) required_k = np.bincount(label_inds).min() if self.k > required_k: raise ValueError('not enough class labels for specified k' @@ -226,7 +226,7 @@ def fit(self, X, y): a2[nn_idx] = np.array([]) # initialize L - L = self.transformer_ + L = self.components_ # first iteration: we compute variables (including objective and gradient) # at initialization point @@ -281,7 +281,7 @@ def fit(self, X, y): print("LMNN didn't converge in %d steps." % self.max_iter) # store the last L - self.transformer_ = L + self.components_ = L self.n_iter_ = it return self diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 72a448ec..b1b2fc7f 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -12,7 +12,7 @@ from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints -from ._util import transformer_from_metric, _initialize_metric_mahalanobis +from ._util import components_from_metric, _initialize_metric_mahalanobis class _BaseLSML(MahalanobisMixin): @@ -94,7 +94,7 @@ def _fit(self, quadruplets, weights=None): print("Didn't converge after", it, "iterations. Final loss:", s_best) self.n_iter_ = it - self.transformer_ = transformer_from_metric(M) + self.components_ = components_from_metric(M) return self def _comparison_loss(self, metric, vab, vcd): @@ -180,9 +180,9 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) Examples -------- @@ -294,9 +294,9 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) """ def __init__(self, tol=1e-3, max_iter=1000, prior=None, diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index ea8748be..471694b6 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -15,7 +15,7 @@ from metric_learn._util import _check_n_components from .base_metric import MahalanobisMixin -from ._util import _initialize_transformer +from ._util import _initialize_components EPS = np.finfo(float).eps @@ -103,7 +103,7 @@ class MLKR(MahalanobisMixin, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. Examples @@ -182,10 +182,10 @@ def fit(self, X, y): init = 'auto' else: init = self.init - A = _initialize_transformer(m, X, y, init=init, - random_state=self.random_state, - # MLKR works on regression targets: - has_classes=False) + A = _initialize_components(m, X, y, init=init, + random_state=self.random_state, + # MLKR works on regression targets: + has_classes=False) # Measure the total training time train_time = time.time() @@ -194,7 +194,7 @@ def fit(self, X, y): res = minimize(self._loss, A.ravel(), (X, y), method='L-BFGS-B', jac=True, tol=self.tol, options=dict(maxiter=self.max_iter)) - self.transformer_ = res.x.reshape(A.shape) + self.components_ = res.x.reshape(A.shape) # Stop timer train_time = time.time() - train_time diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 55337b2e..c8c52b24 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -9,7 +9,7 @@ from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric, _initialize_metric_mahalanobis +from ._util import components_from_metric, _initialize_metric_mahalanobis class _BaseMMC(MahalanobisMixin): @@ -185,7 +185,7 @@ def _fit_full(self, pairs, y): self.A_[:] = A_old self.n_iter_ = cycle - self.transformer_ = transformer_from_metric(self.A_) + self.components_ = components_from_metric(self.A_) return self def _fit_diag(self, pairs, y): @@ -246,7 +246,7 @@ def _fit_diag(self, pairs, y): self.A_ = np.diag(w) - self.transformer_ = transformer_from_metric(self.A_) + self.components_ = components_from_metric(self.A_) return self def _fD(self, neg_pairs, A): @@ -409,9 +409,9 @@ class MMC(_BaseMMC, _PairsClassifierMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) threshold_ : `float` If the distance metric between two points is lower than this threshold, @@ -550,9 +550,9 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) """ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, diff --git a/metric_learn/nca.py b/metric_learn/nca.py index dcfdac8a..37fe0923 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -13,7 +13,7 @@ from sklearn.utils.fixes import logsumexp from sklearn.base import TransformerMixin -from ._util import _initialize_transformer, _check_n_components +from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin EPS = np.finfo(float).eps @@ -116,7 +116,7 @@ class NCA(MahalanobisMixin, TransformerMixin): n_iter_ : `int` The number of iterations the solver has run. - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. References @@ -174,7 +174,7 @@ def fit(self, X, y): init = 'auto' else: init = self.init - A = _initialize_transformer(n_components, X, labels, init, self.verbose) + A = _initialize_components(n_components, X, labels, init, self.verbose) # Run NCA mask = labels[:, np.newaxis] == labels[np.newaxis, :] @@ -191,7 +191,7 @@ def fit(self, X, y): self.n_iter_ = 0 opt_result = minimize(**optimizer_params) - self.transformer_ = opt_result.x.reshape(-1, X.shape[1]) + self.components_ = opt_result.x.reshape(-1, X.shape[1]) self.n_iter_ = opt_result.nit # Stop timer diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 8686f02d..8471a1b1 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -81,7 +81,7 @@ class RCA(MahalanobisMixin, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ @@ -152,9 +152,9 @@ def fit(self, X, chunks): inds = np.argsort(vals)[:dim] A = vecs[:, inds] inner_cov = np.atleast_2d(A.T.dot(inner_cov).dot(A)) - self.transformer_ = _inv_sqrtm(inner_cov).dot(A.T) + self.components_ = _inv_sqrtm(inner_cov).dot(A.T) else: - self.transformer_ = _inv_sqrtm(inner_cov).T + self.components_ = _inv_sqrtm(inner_cov).T return self @@ -197,7 +197,7 @@ class RCA_Supervised(RCA): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + components_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. """ diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 9344ef7c..cfd37955 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -12,7 +12,7 @@ from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric, _initialize_metric_mahalanobis +from ._util import components_from_metric, _initialize_metric_mahalanobis try: from inverse_covariance import quic except ImportError: @@ -124,7 +124,7 @@ def _fit(self, pairs, y): raised_error) raise RuntimeError(msg) - self.transformer_ = transformer_from_metric(np.atleast_2d(M)) + self.components_ = components_from_metric(np.atleast_2d(M)) return self @@ -190,9 +190,9 @@ class SDML(_BaseSDML, _PairsClassifierMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) threshold_ : `float` If the distance metric between two points is lower than this threshold, @@ -315,9 +315,9 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): Attributes ---------- - transformer_ : `numpy.ndarray`, shape=(n_features, n_features) + components_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `transformer_from_metric`.) + metric (See function `components_from_metric`.) See Also -------- diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 0f47a58a..c2056110 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -301,7 +301,7 @@ def test_loss_grad_lbfgs(self): num_pts, n_components = X.shape unique_labels, label_inds = np.unique(y, return_inverse=True) lmnn.labels_ = np.arange(len(unique_labels)) - lmnn.transformer_ = np.eye(n_components) + lmnn.components_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) @@ -371,7 +371,7 @@ def test_toy_ex_lmnn(X, y, loss): num_pts, n_components = X.shape unique_labels, label_inds = np.unique(y, return_inverse=True) lmnn.labels_ = np.arange(len(unique_labels)) - lmnn.transformer_ = np.eye(n_components) + lmnn.components_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) @@ -830,7 +830,7 @@ def test_singleton_class(self): A = make_spd_matrix(X.shape[1], X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) - assert_array_equal(nca.transformer_, A) + assert_array_equal(nca.components_, A) def test_one_class(self): # if there is only one class the gradient is null, so the final matrix @@ -841,7 +841,7 @@ def test_one_class(self): A = make_spd_matrix(X.shape[1], X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) - assert_array_equal(nca.transformer_, A) + assert_array_equal(nca.components_, A) def test_changed_behaviour_warning(self): # test that a ChangedBehavior warning is thrown about the init, if the @@ -887,7 +887,7 @@ def test_iris(self): # Sanity checks for learned matrices. self.assertEqual(lfda.get_mahalanobis_matrix().shape, (4, 4)) - self.assertEqual(lfda.transformer_.shape, (2, 4)) + self.assertEqual(lfda.components_.shape, (2, 4)) @pytest.mark.parametrize('num_dims', [None, 2]) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 0c1117ed..f36a7e27 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -191,7 +191,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): assert len(record) == 0 # Test that the scalar case works - model.transformer_ = np.array([3.1]) + model.components_ = np.array([3.1]) metric = model.get_metric() for u, v in [(5, 6.7), ([5], [6.7]), ([[5]], [[6.7]])]: with pytest.warns(None) as record: @@ -211,13 +211,13 @@ def test_n_components(estimator, build_dataset): set_random_state(model) model.set_params(n_components=None) model.fit(input_data, labels) - assert model.transformer_.shape == (X.shape[1], X.shape[1]) + assert model.components_.shape == (X.shape[1], X.shape[1]) model = clone(estimator) set_random_state(model) model.set_params(n_components=X.shape[1] - 1) model.fit(input_data, labels) - assert model.transformer_.shape == (X.shape[1] - 1, X.shape[1]) + assert model.components_.shape == (X.shape[1] - 1, X.shape[1]) model = clone(estimator) set_random_state(model) diff --git a/test/test_transformer_metric_conversion.py b/test/test_components_metric_conversion.py similarity index 85% rename from test/test_transformer_metric_conversion.py rename to test/test_components_metric_conversion.py index 62ac8777..c0a7bbd4 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -10,7 +10,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) -from metric_learn._util import transformer_from_metric +from metric_learn._util import components_from_metric from metric_learn.exceptions import NonPSDError @@ -25,27 +25,27 @@ def setUpClass(self): def test_cov(self): cov = Covariance() cov.fit(self.X) - L = cov.transformer_ + L = cov.components_ assert_array_almost_equal(L.T.dot(L), cov.get_mahalanobis_matrix()) def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200, random_state=seed) lsml.fit(self.X, self.y) - L = lsml.transformer_ + L = lsml.components_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) def test_itml_supervised(self): seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) itml.fit(self.X, self.y, random_state=seed) - L = itml.transformer_ + L = itml.components_ assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix()) def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) - L = lmnn.transformer_ + L = lmnn.components_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) def test_sdml_supervised(self): @@ -53,38 +53,38 @@ def test_sdml_supervised(self): sdml = SDML_Supervised(num_constraints=1500, prior='identity', balance_param=1e-5, random_state=seed) sdml.fit(self.X, self.y) - L = sdml.transformer_ + L = sdml.components_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) - L = nca.transformer_ + L = nca.components_ assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix()) def test_lfda(self): lfda = LFDA(k=2, n_components=2) lfda.fit(self.X, self.y) - L = lfda.transformer_ + L = lfda.components_ assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) - L = rca.transformer_ + L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) def test_mlkr(self): mlkr = MLKR(n_components=2) mlkr.fit(self.X, self.y) - L = mlkr.transformer_ + L = mlkr.components_ assert_array_almost_equal(L.T.dot(L), mlkr.get_mahalanobis_matrix()) @ignore_warnings - def test_transformer_from_metric_edge_cases(self): - """Test that transformer_from_metric returns the right result in various + def test_components_from_metric_edge_cases(self): + """Test that components_from_metric returns the right result in various edge cases""" rng = np.random.RandomState(42) @@ -97,25 +97,25 @@ def test_transformer_from_metric_edge_cases(self): # https://github.com/metric-learn/metric-learn/issues/175) M = np.diag([1e-15, 2e-16, 3e-15, 4e-16, 5e-15, 6e-16, 7e-15]) M = P.dot(M).dot(P.T) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) # diagonal matrix M = np.diag(np.abs(rng.randn(5))) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) # low-rank matrix (with zeros) M = np.zeros((7, 7)) small_random = rng.randn(3, 3) M[:3, :3] = small_random.T.dot(small_random) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) # low-rank matrix (without necessarily zeros) R = np.abs(rng.randn(7, 7)) M = R.dot(np.diag([1, 5, 3, 2, 0, 0, 0])).dot(R.T) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) # matrix with a determinant still high but which should be considered as a @@ -131,54 +131,54 @@ def test_transformer_from_metric_edge_cases(self): assert str(err_msg.value) == 'Matrix is not positive definite' # (just to show that this case is indeed considered by numpy as an # indefinite case) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) # matrix with lots of small nonzeros that make a big zero when multiplied M = np.diag([1e-3, 1e-3, 1e-3, 1e-3, 1e-3, 1e-3, 1e-3]) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) # full rank matrix M = rng.randn(10, 10) M = M.T.dot(M) assert np.linalg.matrix_rank(M) == 10 - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) def test_non_symmetric_matrix_raises(self): """Checks that if a non symmetric matrix is given to - transformer_from_metric, an error is thrown""" + components_from_metric, an error is thrown""" rng = np.random.RandomState(42) M = rng.randn(10, 10) with pytest.raises(ValueError) as raised_error: - transformer_from_metric(M) + components_from_metric(M) assert str(raised_error.value) == "The input metric should be symmetric." def test_non_psd_raises(self): """Checks that a non PSD matrix (i.e. with negative eigenvalues) will - raise an error when passed to transformer_from_metric""" + raise an error when passed to components_from_metric""" rng = np.random.RandomState(42) D = np.diag([1, 5, 3, 4.2, -4, -2, 1]) P = ortho_group.rvs(7, random_state=rng) M = P.dot(D).dot(P.T) msg = ("Matrix is not positive semidefinite (PSD).") with pytest.raises(NonPSDError) as raised_error: - transformer_from_metric(M) + components_from_metric(M) assert str(raised_error.value) == msg with pytest.raises(NonPSDError) as raised_error: - transformer_from_metric(D) + components_from_metric(D) assert str(raised_error.value) == msg def test_almost_psd_dont_raise(self): """Checks that if the metric is almost PSD (i.e. it has some negative - eigenvalues very close to zero), then transformer_from_metric will still + eigenvalues very close to zero), then components_from_metric will still work""" rng = np.random.RandomState(42) D = np.diag([1, 5, 3, 4.2, -1e-20, -2e-20, -1e-20]) P = ortho_group.rvs(7, random_state=rng) M = P.dot(D).dot(P.T) - L = transformer_from_metric(M) + L = components_from_metric(M) assert_allclose(L.T.dot(L), M) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 54c37936..737d2341 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -59,7 +59,7 @@ def test_score_pairs_toy_example(estimator, build_dataset): set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) - embedded_pairs = pairs.dot(model.transformer_.T) + embedded_pairs = pairs.dot(model.components_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - embedded_pairs[:, 0])**2, axis=-1)) @@ -119,7 +119,7 @@ def test_embed_toy_example(estimator, build_dataset): model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) - embedded_points = X.dot(model.transformer_.T) + embedded_points = X.dot(model.components_.T) assert_array_almost_equal(model.transform(X), embedded_points) @@ -278,14 +278,14 @@ def test_get_squared_metric(estimator, build_dataset): @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_transformer_is_2D(estimator, build_dataset): - """Tests that the transformer of metric learners is 2D""" +def test_components_is_2D(estimator, build_dataset): + """Tests that the transformation matrix of metric learners is 2D""" input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features model.fit(*remove_y_quadruplets(estimator, input_data, labels)) - assert model.transformer_.shape == (X.shape[1], X.shape[1]) + assert model.components_.shape == (X.shape[1], X.shape[1]) # test that it works for 1 feature trunc_data = input_data[..., :1] @@ -304,7 +304,7 @@ def test_transformer_is_2D(estimator, build_dataset): trunc_data = trunc_data[to_keep] labels = labels[to_keep] model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) - assert model.transformer_.shape == (1, 1) # the transformer must be 2D + assert model.components_.shape == (1, 1) # the components must be 2D @pytest.mark.parametrize('estimator, build_dataset', @@ -466,8 +466,8 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, else: model_other = clone(model_base).set_params(init='identity') model_other.fit(input_data, labels) - assert_array_almost_equal(model.transformer_, - model_other.transformer_) + assert_array_almost_equal(model.components_, + model_other.components_) @pytest.mark.parametrize('estimator, build_dataset', @@ -483,7 +483,8 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, hasattr(ml, 'init')]) def test_init_mahalanobis(estimator, build_dataset): """Tests that for estimators that learn a mahalanobis matrix - instead of a transformer, i.e. those that are mahalanobis metric learners + instead of a linear transformation, i.e. those that are mahalanobis metric + learners where we can change the init, but not choose the n_components, (TODO: be more explicit on this characterization, for instance with safe_flags like in scikit-learn) that the init has an expected behaviour. diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index decc0590..affc70f6 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -126,13 +126,13 @@ def test_threshold_different_scores_is_finite(estimator, build_dataset, class IdentityPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): """A simple pairs classifier for testing purposes, that will just have - identity as transformer_, and a string threshold so that it returns an + identity as components_, and a string threshold so that it returns an error if not explicitely set. """ def fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') - self.transformer_ = np.atleast_2d(np.identity(pairs.shape[2])) + self.components_ = np.atleast_2d(np.identity(pairs.shape[2])) self.threshold_ = 'I am not set.' return self @@ -347,7 +347,7 @@ class MockBadPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): """ def fit(self, pairs, y, calibration_params=None): - self.transformer_ = 'not used' + self.components_ = 'not used' self.calibrate_threshold(pairs, y, **(calibration_params if calibration_params is not None else dict())) @@ -503,7 +503,7 @@ def test_accuracy_toy_example(estimator, build_dataset): set_random_state(estimator) estimator.fit(input_data, labels) # we force the transformation to be identity so that we control what it does - estimator.transformer_ = np.eye(X.shape[1]) + estimator.components_ = np.eye(X.shape[1]) # the threshold for similar or dissimilar pairs is half of the distance # between X[0] and X[1] estimator.set_threshold(euclidean(X[0], X[1]) / 2) diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index d342b45d..efe10030 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -61,5 +61,5 @@ def test_accuracy_toy_example(estimator, build_dataset): [X_test[1], X_test[2], X_test[0], X_test[3]], [X_test[3], X_test[0], X_test[2], X_test[1]]]) # we force the transformation to be identity so that we control what it does - estimator.transformer_ = np.eye(X.shape[1]) + estimator.components_ = np.eye(X.shape[1]) assert estimator.score(quadruplets_test) == 0.25 From 731b32718873d0d40da298b88271375498fc290f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Thu, 4 Jul 2019 08:48:28 +0200 Subject: [PATCH 126/210] [MRG] Deprecate use_pca parameter of LMNN (#231) * deprecate use_pca * fix failing test --- metric_learn/lmnn.py | 13 ++++++++++++- test/metric_learn_test.py | 12 ++++++++++++ test/test_base_metric.py | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 5dc1810d..0c0650b3 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -87,6 +87,12 @@ class LMNN(MahalanobisMixin, TransformerMixin): Tolerance of the optimization procedure. If the objective value varies less than `tol`, we consider the algorithm has converged and stop it. + use_pca : Not used + + .. deprecated:: 0.5.0 + `use_pca` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + verbose : bool, optional (default=False) Whether to print the progress of the optimization procedure. @@ -151,7 +157,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, - use_pca=True, verbose=False, preprocessor=None, + use_pca='deprecated', verbose=False, preprocessor=None, n_components=None, num_dims='deprecated', random_state=None): self.init = init self.k = k @@ -173,6 +179,11 @@ def fit(self, X, y): ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0. Use "n_components" instead', DeprecationWarning) + if self.use_pca != 'deprecated': + warnings.warn('"use_pca" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0.', + DeprecationWarning) k = self.k reg = self.regularization learn_rate = self.learn_rate diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c2056110..d7b8ab72 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -353,6 +353,18 @@ def test_changed_behaviour_warning(self): lmnn.fit(X, y) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_use_pca(self): + # test that a DeprecationWarning is thrown about use_pca, if the + # default parameters are used. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lmnn = LMNN(k=2, use_pca=True) + msg = ('"use_pca" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + ' removed in 0.6.0.') + assert_warns_message(DeprecationWarning, msg, lmnn.fit, X, y) + @pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), [1, 1, 0, 0], 3.0), diff --git a/test/test_base_metric.py b/test/test_base_metric.py index f36a7e27..725df31a 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -26,7 +26,7 @@ def test_lmnn(self): "learn_rate=1e-07, " "max_iter=1000, min_iter=50, n_components=None, " "num_dims='deprecated', preprocessor=None, random_state=None, " - "regularization=0.5, use_pca=True, verbose=False)")) + "regularization=0.5, use_pca='deprecated', verbose=False)")) def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA())), From 09dcd56e5edb544d031f77f4ea0694967add7bdb Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 4 Jul 2019 09:11:27 +0200 Subject: [PATCH 127/210] [MRG] update impostors, closer to original implem (#228) * first attempt to change the function * Add test and make it work * Import the right scipy * Add test where the number of impostors varies and tests the gradient * fix little pbs * Fix L_next * Fix LMNN * add forgotten L as argument * add forgotten L as argument * fix cost fn call * fix cost fn call * nitpicks * make example work and fix python2 error --- examples/plot_metric_learning_examples.py | 2 +- metric_learn/lmnn.py | 77 ++++------- test/metric_learn_test.py | 159 +++++++++++++++++++--- 3 files changed, 169 insertions(+), 69 deletions(-) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index b46d1adc..0d602cbb 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # setting up LMNN -lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6, init='random') +lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) # fit the data! lmnn.fit(X, y) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 0c0650b3..ed74ffcd 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -1,8 +1,6 @@ """ Large Margin Nearest Neighbor Metric learning (LMNN) """ -# TODO: periodic recalculation of impostors, PCA initialization - from __future__ import print_function, absolute_import import numpy as np import warnings @@ -219,31 +217,19 @@ def fit(self, X, y): ' (smallest class has %d)' % required_k) target_neighbors = self._select_targets(X, label_inds) - impostors = self._find_impostors(target_neighbors[:, -1], X, label_inds) - if len(impostors) == 0: - # L has already been initialized to an identity matrix - return # sum outer products dfG = _sum_outer_products(X, target_neighbors.flatten(), np.repeat(np.arange(X.shape[0]), k)) - df = np.zeros_like(dfG) - - # storage - a1 = [None]*k - a2 = [None]*k - for nn_idx in xrange(k): - a1[nn_idx] = np.array([]) - a2[nn_idx] = np.array([]) # initialize L L = self.components_ # first iteration: we compute variables (including objective and gradient) # at initialization point - G, objective, total_active, df, a1, a2 = ( - self._loss_grad(X, L, dfG, impostors, 1, k, reg, target_neighbors, df, - a1, a2)) + G, objective, total_active = self._loss_grad(X, L, dfG, k, + reg, target_neighbors, + label_inds) it = 1 # we already made one iteration @@ -257,10 +243,9 @@ def fit(self, X, y): # we compute the objective at next point # we copy variables that can be modified by _loss_grad, because if we # retry we don t want to modify them several times - (G_next, objective_next, total_active_next, df_next, a1_next, - a2_next) = ( - self._loss_grad(X, L_next, dfG, impostors, it, k, reg, - target_neighbors, df.copy(), list(a1), list(a2))) + (G_next, objective_next, total_active_next) = ( + self._loss_grad(X, L_next, dfG, k, reg, target_neighbors, + label_inds)) assert not np.isnan(objective) delta_obj = objective_next - objective if delta_obj > 0: @@ -275,8 +260,7 @@ def fit(self, X, y): # old variables to these new ones before next iteration and we # slightly increase the learning rate L = L_next - G, df, objective, total_active, a1, a2 = ( - G_next, df_next, objective_next, total_active_next, a1_next, a2_next) + G, objective, total_active = G_next, objective_next, total_active_next learn_rate *= 1.01 if self.verbose: @@ -296,46 +280,37 @@ def fit(self, X, y): self.n_iter_ = it return self - def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, - a1, a2): + def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): # Compute pairwise distances under current metric Lx = L.dot(X.T).T - g0 = _inplace_paired_L2(*Lx[impostors]) + + # we need to find the furthest neighbor: Ni = 1 + _inplace_paired_L2(Lx[target_neighbors], Lx[:, None, :]) + furthest_neighbors = np.take_along_axis(target_neighbors, + Ni.argmax(axis=1)[:, None], 1) + impostors = self._find_impostors(furthest_neighbors.ravel(), X, + label_inds, L) + + g0 = _inplace_paired_L2(*Lx[impostors]) + + # we reorder the target neighbors g1, g2 = Ni[impostors] # compute the gradient total_active = 0 - for nn_idx in reversed(xrange(k)): + df = np.zeros((X.shape[1], X.shape[1])) + for nn_idx in reversed(xrange(k)): # note: reverse not useful here act1 = g0 < g1[:, nn_idx] act2 = g0 < g2[:, nn_idx] total_active += act1.sum() + act2.sum() - if it > 1: - plus1 = act1 & ~a1[nn_idx] - minus1 = a1[nn_idx] & ~act1 - plus2 = act2 & ~a2[nn_idx] - minus2 = a2[nn_idx] & ~act2 - else: - plus1 = act1 - plus2 = act2 - minus1 = np.zeros(0, dtype=int) - minus2 = np.zeros(0, dtype=int) - targets = target_neighbors[:, nn_idx] - PLUS, pweight = _count_edges(plus1, plus2, impostors, targets) + PLUS, pweight = _count_edges(act1, act2, impostors, targets) df += _sum_outer_products(X, PLUS[:, 0], PLUS[:, 1], pweight) - MINUS, mweight = _count_edges(minus1, minus2, impostors, targets) - df -= _sum_outer_products(X, MINUS[:, 0], MINUS[:, 1], mweight) in_imp, out_imp = impostors - df += _sum_outer_products(X, in_imp[minus1], out_imp[minus1]) - df += _sum_outer_products(X, in_imp[minus2], out_imp[minus2]) - - df -= _sum_outer_products(X, in_imp[plus1], out_imp[plus1]) - df -= _sum_outer_products(X, in_imp[plus2], out_imp[plus2]) + df -= _sum_outer_products(X, in_imp[act1], out_imp[act1]) + df -= _sum_outer_products(X, in_imp[act2], out_imp[act2]) - a1[nn_idx] = act1 - a2[nn_idx] = act2 # do the gradient update assert not np.isnan(df).any() G = dfG * reg + df * (1 - reg) @@ -343,7 +318,7 @@ def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, # compute the objective function objective = total_active * (1 - reg) objective += G.flatten().dot(L.flatten()) - return 2 * G, objective, total_active, df, a1, a2 + return 2 * G, objective, total_active def _select_targets(self, X, label_inds): target_neighbors = np.empty((X.shape[0], self.k), dtype=int) @@ -355,8 +330,8 @@ def _select_targets(self, X, label_inds): target_neighbors[inds] = inds[nn] return target_neighbors - def _find_impostors(self, furthest_neighbors, X, label_inds): - Lx = self.transform(X) + def _find_impostors(self, furthest_neighbors, X, label_inds, L): + Lx = X.dot(L.T) margin_radii = 1 + _inplace_paired_L2(Lx[furthest_neighbors], Lx) impostors = [] for label in self.labels_[:-1]: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index d7b8ab72..00314ad0 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -2,9 +2,10 @@ import re import pytest import numpy as np +import scipy from scipy.optimize import check_grad, approx_fprime from six.moves import xrange -from sklearn.metrics import pairwise_distances +from sklearn.metrics import pairwise_distances, euclidean_distances from sklearn.datasets import (load_iris, make_classification, make_regression, make_spd_matrix) from numpy.testing import (assert_array_almost_equal, assert_array_equal, @@ -304,25 +305,15 @@ def test_loss_grad_lbfgs(self): lmnn.components_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) - impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) # sum outer products dfG = _sum_outer_products(X, target_neighbors.flatten(), np.repeat(np.arange(X.shape[0]), k)) - df = np.zeros_like(dfG) - - # storage - a1 = [None]*k - a2 = [None]*k - for nn_idx in xrange(k): - a1[nn_idx] = np.array([]) - a2[nn_idx] = np.array([]) # initialize L def loss_grad(flat_L): - return lmnn._loss_grad(X, flat_L.reshape(-1, X.shape[1]), dfG, impostors, - 1, k, reg, target_neighbors, df.copy(), - list(a1), list(a2)) + return lmnn._loss_grad(X, flat_L.reshape(-1, X.shape[1]), dfG, + k, reg, target_neighbors, label_inds) def fun(x): return loss_grad(x)[1] @@ -366,6 +357,141 @@ def test_deprecation_use_pca(self): assert_warns_message(DeprecationWarning, msg, lmnn.fit, X, y) +def test_loss_func(capsys): + """Test the loss function (and its gradient) on a simple example, + by comparing the results with the actual implementation of metric-learn, + with a very simple (but nonperformant) implementation""" + + # toy dataset to use + X, y = make_classification(n_samples=10, n_classes=2, + n_features=6, + n_redundant=0, shuffle=True, + scale=[1, 1, 20, 20, 20, 20], random_state=42) + + def hinge(a): + if a > 0: + return a, 1 + else: + return 0, 0 + + def loss_fn(L, X, y, target_neighbors, reg): + L = L.reshape(-1, X.shape[1]) + Lx = np.dot(X, L.T) + loss = 0 + total_active = 0 + grad = np.zeros_like(L) + for i in range(X.shape[0]): + for j in target_neighbors[i]: + loss += (1 - reg) * np.sum((Lx[i] - Lx[j]) ** 2) + grad += (1 - reg) * np.outer(Lx[i] - Lx[j], X[i] - X[j]) + for l in range(X.shape[0]): + if y[i] != y[l]: + hin, active = hinge(1 + np.sum((Lx[i] - Lx[j])**2) - + np.sum((Lx[i] - Lx[l])**2)) + total_active += active + if active: + loss += reg * hin + grad += (reg * (np.outer(Lx[i] - Lx[j], X[i] - X[j]) - + np.outer(Lx[i] - Lx[l], X[i] - X[l]))) + grad = 2 * grad + return grad, loss, total_active + + # we check that the gradient we have computed in the non-performant implem + # is indeed the true gradient on a toy example: + + def _select_targets(X, y, k): + target_neighbors = np.empty((X.shape[0], k), dtype=int) + for label in np.unique(y): + inds, = np.nonzero(y == label) + dd = euclidean_distances(X[inds], squared=True) + np.fill_diagonal(dd, np.inf) + nn = np.argsort(dd)[..., :k] + target_neighbors[inds] = inds[nn] + return target_neighbors + + target_neighbors = _select_targets(X, y, 2) + regularization = 0.5 + n_features = X.shape[1] + x0 = np.random.randn(1, n_features) + + def loss(x0): + return loss_fn(x0.reshape(-1, X.shape[1]), X, y, target_neighbors, + regularization)[1] + + def grad(x0): + return loss_fn(x0.reshape(-1, X.shape[1]), X, y, target_neighbors, + regularization)[0].ravel() + + scipy.optimize.check_grad(loss, grad, x0.ravel()) + + class LMNN_with_callback(LMNN): + """ We will use a callback to get the gradient (see later) + """ + + def __init__(self, callback, *args, **kwargs): + self.callback = callback + super(LMNN_with_callback, self).__init__(*args, **kwargs) + + def _loss_grad(self, *args, **kwargs): + grad, objective, total_active = ( + super(LMNN_with_callback, self)._loss_grad(*args, **kwargs)) + self.callback.append(grad) + return grad, objective, total_active + + class LMNN_nonperformant(LMNN_with_callback): + + def fit(self, X, y): + self.y = y + return super(LMNN_nonperformant, self).fit(X, y) + + def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): + grad, loss, total_active = loss_fn(L.ravel(), X, self.y, + target_neighbors, self.regularization) + self.callback.append(grad) + return grad, loss, total_active + + mem1, mem2 = [], [] + lmnn_perf = LMNN_with_callback(verbose=True, random_state=42, + init='identity', max_iter=30, callback=mem1) + lmnn_nonperf = LMNN_nonperformant(verbose=True, random_state=42, + init='identity', max_iter=30, + callback=mem2) + objectives, obj_diffs, learn_rate, total_active = (dict(), dict(), dict(), + dict()) + for algo, name in zip([lmnn_perf, lmnn_nonperf], ['perf', 'nonperf']): + algo.fit(X, y) + out, _ = capsys.readouterr() + lines = re.split("\n+", out) + # we get every variable that is printed from the algorithm in verbose + num = '(-?\d+.?\d*(e[+|-]\d+)?)' + strings = [re.search("\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})" + .format(num, num, num), s) for s in lines] + objectives[name] = [float(match.group(1)) for match in strings if match is + not None] + obj_diffs[name] = [float(match.group(3)) for match in strings if match is + not None] + total_active[name] = [float(match.group(5)) for match in strings if + match is not + None] + learn_rate[name] = [float(match.group(6)) for match in strings if match is + not None] + assert len(strings) >= 10 # we ensure that we actually did more than 10 + # iterations + assert total_active[name][0] >= 2 # we ensure that we have some active + # constraints (that's the case we want to test) + # we remove the last element because it can be equal to the penultimate + # if the last gradient update is null + for i in range(len(mem1)): + np.testing.assert_allclose(lmnn_perf.callback[i], + lmnn_nonperf.callback[i], + err_msg='Gradient different at position ' + '{}'.format(i)) + np.testing.assert_allclose(objectives['perf'], objectives['nonperf']) + np.testing.assert_allclose(obj_diffs['perf'], obj_diffs['nonperf']) + np.testing.assert_allclose(total_active['perf'], total_active['nonperf']) + np.testing.assert_allclose(learn_rate['perf'], learn_rate['nonperf']) + + @pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), [1, 1, 0, 0], 3.0), (np.array([[0], [1], [2], [3]]), @@ -386,7 +512,7 @@ def test_toy_ex_lmnn(X, y, loss): lmnn.components_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) - impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds, L) # sum outer products dfG = _sum_outer_products(X, target_neighbors.flatten(), @@ -401,9 +527,8 @@ def test_toy_ex_lmnn(X, y, loss): a2[nn_idx] = np.array([]) # assert that the loss equals the one computed by hand - assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, k, - reg, target_neighbors, df, a1, a2)[1] == loss - + assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, k, + reg, target_neighbors, label_inds)[1] == loss def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with From 54c9d89c428631a39a7c0bca3d17e7d9fc992cd4 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Sat, 6 Jul 2019 18:22:49 +0200 Subject: [PATCH 128/210] make check_preprocessor private (#235) --- metric_learn/base_metric.py | 4 ++-- test/test_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index cd1c3c71..570172a9 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -51,7 +51,7 @@ def score_pairs(self, pairs): learner is. """ - def check_preprocessor(self): + def _check_preprocessor(self): """Initializes the preprocessor""" if _is_arraylike(self.preprocessor): self.preprocessor_ = ArrayIndexer(self.preprocessor) @@ -92,7 +92,7 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', y: `numpy.ndarray` (optional) The checked input labels array. """ - self.check_preprocessor() + self._check_preprocessor() return check_input(X, y, type_of_inputs=type_of_inputs, preprocessor=self.preprocessor_, diff --git a/test/test_utils.py b/test/test_utils.py index 970b40a1..37abb307 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -779,7 +779,7 @@ class MockMetricLearner(MahalanobisMixin): mock_algo = MockMetricLearner(preprocessor=preprocessor) with pytest.raises(ValueError) as e: - mock_algo.check_preprocessor() + mock_algo._check_preprocessor() assert str(e.value) == ("Invalid type for the preprocessor: {}. You should " "provide either None, an array-like object, " "or a callable.".format(type(preprocessor))) @@ -812,7 +812,7 @@ def test_error_message_t_score_pairs(estimator, _): """ estimator = clone(estimator) set_random_state(estimator) - estimator.check_preprocessor() + estimator._check_preprocessor() triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) with pytest.raises(ValueError) as raised_err: From 7d746717173fd0567c088e578c033967d1ac2408 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 9 Jul 2019 06:28:15 +0200 Subject: [PATCH 129/210] Fix random state in algorithms (#234) --- metric_learn/lmnn.py | 2 +- metric_learn/lsml.py | 5 ++--- metric_learn/nca.py | 3 ++- metric_learn/sdml.py | 5 ++--- test/test_mahalanobis_mixin.py | 22 ++++++++++++++++++++++ 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index ed74ffcd..2035588f 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -210,7 +210,7 @@ def fit(self, X, y): init = self.init self.components_ = _initialize_components(output_dim, X, y, init, self.verbose, - self.random_state) + random_state=self.random_state) required_k = np.bincount(label_inds).min() if self.k > required_k: raise ValueError('not enough class labels for specified k' diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index b1b2fc7f..340e6bf2 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -56,9 +56,8 @@ def _fit(self, quadruplets, weights=None): else: prior = self.prior M, prior_inv = _initialize_metric_mahalanobis(quadruplets, prior, - return_inverse=True, - strict_pd=True, - matrix_name='prior') + return_inverse=True, strict_pd=True, matrix_name='prior', + random_state=self.random_state) step_sizes = np.logspace(-10, 0, 10) # Keep track of the best step size and the loss at that step. diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 37fe0923..03abdc41 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -174,7 +174,8 @@ def fit(self, X, y): init = 'auto' else: init = self.init - A = _initialize_components(n_components, X, labels, init, self.verbose) + A = _initialize_components(n_components, X, labels, init, self.verbose, + self.random_state) # Run NCA mask = labels[:, np.newaxis] == labels[np.newaxis, :] diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index cfd37955..2d67e0b8 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -69,9 +69,8 @@ def _fit(self, pairs, y): else: prior = self.prior _, prior_inv = _initialize_metric_mahalanobis(pairs, prior, - return_inverse=True, - strict_pd=True, - matrix_name='prior') + return_inverse=True, strict_pd=True, matrix_name='prior', + random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 737d2341..a812d185 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -652,3 +652,25 @@ def test_singular_array_init_or_prior(estimator, build_dataset, w0): with pytest.raises(LinAlgError) as raised_err: model.fit(input_data, labels) assert str(raised_err.value) == msg + + +@pytest.mark.integration +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_deterministic_initialization(estimator, build_dataset): + """Test that estimators that have a prior or an init are deterministic + when it is set to to random and when the random_state is fixed.""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + if hasattr(estimator, 'init'): + model.set_params(init='random') + if hasattr(estimator, 'prior'): + model.set_params(prior='random') + model1 = clone(model) + set_random_state(model1, 42) + model1 = model1.fit(input_data, labels) + model2 = clone(model) + set_random_state(model2, 42) + model2 = model2.fit(input_data, labels) + np.testing.assert_allclose(model1.get_mahalanobis_matrix(), + model2.get_mahalanobis_matrix()) From 57086e91b65b88a95c89449aa501ff68a61dc39a Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 18 Jul 2019 16:24:46 +0200 Subject: [PATCH 130/210] Prepare new version (#232) * Prepare new version * Update authors * reorder authors --- doc/conf.py | 8 ++++---- metric_learn/_version.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index e7e6a108..5d1baeda 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,10 +20,10 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2018, CJ Carey and Yuan Tang' -author = u'CJ Carey and Yuan Tang' -version = '0.4.0' -release = '0.4.0' +copyright = u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier' +author = u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier' +version = '0.5.0' +release = '0.5.0' language = 'en' exclude_patterns = ['_build'] diff --git a/metric_learn/_version.py b/metric_learn/_version.py index abeeedbf..2b8877c5 100644 --- a/metric_learn/_version.py +++ b/metric_learn/_version.py @@ -1 +1 @@ -__version__ = '0.4.0' +__version__ = '0.5.0' From e7837a17f8835383acd4dfd775109eb015880570 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 19 Jul 2019 12:01:36 -0400 Subject: [PATCH 131/210] Update author information in setup.py (#236) --- setup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dfb20fc0..601b9ba7 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,13 @@ version=version['__version__'], description='Python implementations of metric learning algorithms', long_description=long_description, - author=['CJ Carey', 'Yuan Tang'], + author=[ + 'CJ Carey', + 'Yuan Tang', + 'William de Vazelhes', + 'Aurélien Bellet', + 'Nathalie Vauquier' + ], author_email='ccarey@cs.umass.edu', url='http://github.com/metric-learn/metric-learn', license='MIT', From c750ef57e20249b2bdcb5dd1e62851f8a489af6f Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Mon, 29 Jul 2019 10:54:05 +0200 Subject: [PATCH 132/210] Remove message about shogun in LMNN (#239) --- metric_learn/lmnn.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 2035588f..cb72aaf7 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -136,15 +136,6 @@ class LMNN(MahalanobisMixin, TransformerMixin): >>> lmnn = LMNN(k=5, learn_rate=1e-6) >>> lmnn.fit(X, Y, verbose=False) - Notes - ----- - - If a recent version of the Shogun Python modular (``modshogun``) library - is available, the LMNN implementation will use the fast C++ version from - there. Otherwise, the included pure-Python version will be used. - The two implementations differ slightly, and the C++ version is more - complete. - References ---------- .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor From fa339f8892a15ea3193a9604358cd126a24f7054 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 1 Aug 2019 11:19:43 +0200 Subject: [PATCH 133/210] [MRG] Address comments for sklearn-contrib integration (#238) * Fix flake8 errors: Mostly: - blank spaces (missing or to be removed) - lines too long - unused variables or imports - bad indentation * Remove unused pytest import * Update .travis.yml to put also python 3.7 * no more flake8 error * Add flake8 check * Fix flake8 command * Solve W605 errors and update list of ignored errors * Improve flake8 command * Add file to run travis flake8 only on diff * Fix typo * Put good name for flake8 file * Introduce a PEP8 error to see if it works * Introduce another pep8 error, inline this time * Actually call check_files * Be less tolerant on pep8 checks * Test new way to specify travis jobs * Go back to full PEP8 * Remove PEP8 errors introduced for testing * Fix missing entry * Try something with removing 'after_success' * Put an error in test to see what happens * Remove unused flake8 install for pytest runs and rename flake8 job * Add error for SDML, and remove the other error * Remove file * Put again after success * Remove skggm error --- .travis.yml | 78 +++++++++---- build_tools/travis/flake8_diff.sh | 132 ++++++++++++++++++++++ doc/conf.py | 6 +- examples/plot_metric_learning_examples.py | 38 ++++--- examples/plot_sandwich.py | 13 ++- metric_learn/__init__.py | 5 + metric_learn/base_metric.py | 8 +- metric_learn/constraints.py | 4 +- metric_learn/itml.py | 21 ++-- metric_learn/lfda.py | 23 ++-- metric_learn/lmnn.py | 26 ++--- metric_learn/lsml.py | 13 ++- metric_learn/mmc.py | 44 ++++---- metric_learn/rca.py | 21 ++-- metric_learn/sdml.py | 9 +- test/metric_learn_test.py | 44 ++++---- test/test_base_metric.py | 22 ++-- test/test_components_metric_conversion.py | 2 +- test/test_fit_transform.py | 5 +- test/test_utils.py | 2 +- 20 files changed, 349 insertions(+), 167 deletions(-) create mode 100644 build_tools/travis/flake8_diff.sh diff --git a/.travis.yml b/.travis.yml index 0e510a9f..116d8b93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,27 +1,57 @@ language: python sudo: false cache: pip -python: - - "2.7" - - "3.4" - - "3.6" -before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy codecov pytest-cov - - if $TRAVIS_PYTHON_VERSION == "3.6"; then - pip install scikit-learn; - else - pip install scikit-learn==0.20.3; - fi - - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || - ($TRAVIS_PYTHON_VERSION == "2.7")]]; then - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; - fi -script: - # we do coverage for all versions so that codecov will merge them: this - # way we will see that both paths (with or without skggm) are tested - - pytest test --cov; -after_success: - - bash <(curl -s https://codecov.io/bash) - +language: python +matrix: + include: + - name: "Pytest python 2.7 with skggm" + python: "2.7" + before_install: + - sudo apt-get install liblapack-dev + - pip install --upgrade pip pytest + - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn + - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + script: + - pytest test --cov; + after_success: + - bash <(curl -s https://codecov.io/bash) + - name: "Pytest python 3.4 without skggm" + python: "3.4" + before_install: + - sudo apt-get install liblapack-dev + - pip install --upgrade pip pytest + - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn + script: + - pytest test --cov; + after_success: + - bash <(curl -s https://codecov.io/bash) + - name: "Pytest python 3.6 with skggm" + python: "3.6" + before_install: + - sudo apt-get install liblapack-dev + - pip install --upgrade pip pytest + - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn + - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + script: + - pytest test --cov; + after_success: + - bash <(curl -s https://codecov.io/bash) + - name: "Pytest python 3.7 with skggm" + python: "3.7" + before_install: + - sudo apt-get install liblapack-dev + - pip install --upgrade pip pytest + - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn + - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + script: + - pytest test --cov; + after_success: + - bash <(curl -s https://codecov.io/bash) + - name: "Syntax checking with flake8" + python: "3.7" + before_install: + - pip install flake8 + script: + - flake8 --extend-ignore=E111,E114 --show-source; + # Use this instead to have a syntax check only on the diff: + # - source ./build_tools/travis/flake8_diff.sh; diff --git a/build_tools/travis/flake8_diff.sh b/build_tools/travis/flake8_diff.sh new file mode 100644 index 00000000..ce3cdd24 --- /dev/null +++ b/build_tools/travis/flake8_diff.sh @@ -0,0 +1,132 @@ +# This file is not used yet but we keep it in case we need to check the pep8 difference +# on the diff (see .travis.yml) +# +#!/bin/bash +# copied-pasted and adapted from http://github.com/sklearn-contrib/imbalanced-learn +# (more precisely: https://raw.githubusercontent.com/glemaitre/imbalanced-learn +# /adcb9d8e6210b321dac2c1b06879e5e889d52d77/build_tools/travis/flake8_diff.sh) + +# This script is used in Travis to check that PRs do not add obvious +# flake8 violations. It relies on two things: +# - find common ancestor between branch and +# scikit-learn/scikit-learn remote +# - run flake8 --diff on the diff between the branch and the common +# ancestor +# +# Additional features: +# - the line numbers in Travis match the local branch on the PR +# author machine. +# - ./build_tools/travis/flake8_diff.sh can be run locally for quick +# turn-around + +set -e +# pipefail is necessary to propagate exit codes +set -o pipefail + +PROJECT=metric-learn/metric-learn +PROJECT_URL=https://github.com/$PROJECT.git + +# Find the remote with the project name (upstream in most cases) +REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '') + +# Add a temporary remote if needed. For example this is necessary when +# Travis is configured to run in a fork. In this case 'origin' is the +# fork and not the reference repo we want to diff against. +if [[ -z "$REMOTE" ]]; then + TMP_REMOTE=tmp_reference_upstream + REMOTE=$TMP_REMOTE + git remote add $REMOTE $PROJECT_URL +fi + +echo "Remotes:" +echo '--------------------------------------------------------------------------------' +git remote --verbose + +# Travis does the git clone with a limited depth (50 at the time of +# writing). This may not be enough to find the common ancestor with +# $REMOTE/master so we unshallow the git checkout +if [[ -a .git/shallow ]]; then + echo -e '\nTrying to unshallow the repo:' + echo '--------------------------------------------------------------------------------' + git fetch --unshallow +fi + +if [[ "$TRAVIS" == "true" ]]; then + if [[ "$TRAVIS_PULL_REQUEST" == "false" ]] + then + # In main repo, using TRAVIS_COMMIT_RANGE to test the commits + # that were pushed into a branch + if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then + if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then + echo "New branch, no commit range from Travis so passing this test by convention" + exit 0 + fi + COMMIT_RANGE=$TRAVIS_COMMIT_RANGE + fi + else + # We want to fetch the code as it is in the PR branch and not + # the result of the merge into master. This way line numbers + # reported by Travis will match with the local code. + LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST + # In Travis the PR target is always origin + git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF + fi +fi + +# If not using the commit range from Travis we need to find the common +# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master +if [[ -z "$COMMIT_RANGE" ]]; then + if [[ -z "$LOCAL_BRANCH_REF" ]]; then + LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) + fi + echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:" + echo '--------------------------------------------------------------------------------' + git log -2 $LOCAL_BRANCH_REF + + REMOTE_MASTER_REF="$REMOTE/master" + # Make sure that $REMOTE_MASTER_REF is a valid reference + echo -e "\nFetching $REMOTE_MASTER_REF" + echo '--------------------------------------------------------------------------------' + git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF + LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) + REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF) + + COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \ + echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)" + + if [ -z "$COMMIT" ]; then + exit 1 + fi + + COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) + + echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ + "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:" + echo '--------------------------------------------------------------------------------' + git show --no-patch $COMMIT_SHORT_HASH + + COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH" + + if [[ -n "$TMP_REMOTE" ]]; then + git remote remove $TMP_REMOTE + fi + +else + echo "Got the commit range from Travis: $COMMIT_RANGE" +fi + +echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ + "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" +echo '--------------------------------------------------------------------------------' + +# to not include the context (some lines before and after the modified lines), add the +# flag --unified=0 (warning: it will not include some errors like for instance adding too +# much blank lines +check_files() { + git diff $COMMIT_RANGE | flake8 --diff --show-source --extend-ignore=E111,E114 +} + +check_files + +echo -e "No problem detected by flake8\n" + diff --git a/doc/conf.py b/doc/conf.py index 5d1baeda..0f46992d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,8 +20,10 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier' -author = u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier' +copyright = (u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien ' + u'Bellet and Nathalie Vauquier') +author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and ' + u'Nathalie Vauquier') version = '0.5.0' release = '0.5.0' language = 'en' diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 0d602cbb..047fcc1e 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -88,7 +88,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # distances between points for the task at hand. Especially in higher # dimensions when Euclidean distances are a poor way to measure distance, this # becomes very useful. -# +# # Basically, we learn this distance: # :math:`D(x, x') = \sqrt{(x-x')^\top M(x-x')}`. And we learn the parameters # :math:`M` of this distance to satisfy certain constraints on the distance @@ -113,12 +113,12 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Large Margin Nearest Neighbour # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # LMNN is a metric learning algorithm primarily designed for k-nearest # neighbor classification. The algorithm is based on semidefinite # programming, a sub-class of convex programming (as most Metric Learning # algorithms are). -# +# # The main intuition behind LMNN is to learn a pseudometric under which # all data instances in the training set are surrounded by at least k # instances that share the same class label. If this is achieved, the @@ -136,7 +136,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Fit and then transform! # ----------------------- -# +# # setting up LMNN lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) @@ -162,7 +162,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Pretty neat, huh? -# +# # The rest of this notebook will briefly explain the other Metric Learning # algorithms before plotting them. Also, while we have first run ``fit`` # and then ``transform`` to see our data transformed, we can also use @@ -172,7 +172,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Information Theoretic Metric Learning # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # ITML uses a regularizer that automatically enforces a Semi-Definite # Positive Matrix condition - the LogDet divergence. It uses soft # must-link or cannot like constraints, and a simple algorithm based on @@ -231,7 +231,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Least Squares Metric Learning # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # LSML is a simple, yet effective, algorithm that learns a Mahalanobis # metric from a given set of relative comparisons. This is done by # formulating and minimizing a convex loss function that corresponds to @@ -277,7 +277,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Local Fisher Discriminant Analysis # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # LFDA is a linear supervised dimensionality reduction method. It is # particularly useful when dealing with multimodality, where one ore more # classes consist of separate clusters in input space. The core @@ -298,7 +298,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Relative Components Analysis # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # RCA is another one of the older algorithms. It learns a full rank # Mahalanobis distance metric based on a weighted sum of in-class # covariance matrices. It applies a global linear transformation to assign @@ -402,7 +402,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): def create_constraints(labels): import itertools import random - + # aggregate indices of same class zeros = np.where(y == 0)[0] ones = np.where(y == 1)[0] @@ -413,7 +413,7 @@ def create_constraints(labels): twos_ = list(itertools.combinations(twos, 2)) # put them together! sim = np.array(zeros_ + ones_ + twos_) - + # similarily, put together indices in different classes dis = [] for zero in zeros: @@ -424,21 +424,25 @@ def create_constraints(labels): for one in ones: for two in twos: dis.append((one, two)) - + # pick up just enough dissimilar examples as we have similar examples dis = np.array(random.sample(dis, len(sim))) - - # return an array of pairs of indices of shape=(2*len(sim), 2), and the corresponding labels, array of shape=(2*len(sim)) - # Each pair of similar points have a label of +1 and each pair of dissimilar points have a label of -1 - return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]), np.column_stack([dis[:, 0], dis[:, 1]])]), + + # return an array of pairs of indices of shape=(2*len(sim), 2), and the + # corresponding labels, array of shape=(2*len(sim)) + # Each pair of similar points have a label of +1 and each pair of + # dissimilar points have a label of -1 + return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]), + np.column_stack([dis[:, 0], dis[:, 1]])]), np.concatenate([np.ones(len(sim)), -np.ones(len(sim))])) + pairs, pairs_labels = create_constraints(y) ###################################################################### # Now that we've created our constraints, let's see what it looks like! -# +# print(pairs) print(pairs_labels) diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py index 84e53d07..d5856667 100644 --- a/examples/plot_sandwich.py +++ b/examples/plot_sandwich.py @@ -11,7 +11,8 @@ from sklearn.metrics import pairwise_distances from sklearn.neighbors import NearestNeighbors -from metric_learn import LMNN, ITML_Supervised, LSML_Supervised, SDML_Supervised +from metric_learn import (LMNN, ITML_Supervised, LSML_Supervised, + SDML_Supervised) def sandwich_demo(): @@ -47,10 +48,10 @@ def sandwich_demo(): # TODO: use this somewhere def visualize_class_separation(X, labels): - _, (ax1,ax2) = plt.subplots(ncols=2) + _, (ax1, ax2) = plt.subplots(ncols=2) label_order = np.argsort(labels) ax1.imshow(pairwise_distances(X[label_order]), interpolation='nearest') - ax2.imshow(pairwise_distances(labels[label_order,None]), + ax2.imshow(pairwise_distances(labels[label_order, None]), interpolation='nearest') @@ -77,19 +78,19 @@ def sandwich_data(): for k, xc in enumerate(x_centers): data[i, k, 0] = np.random.normal(xc, 0.1) data[i, k, 1] = np.random.normal(yc, 0.1) - labels[i,:] = i + labels[i, :] = i return data.reshape((-1, 2)), labels.ravel() def plot_sandwich_data(x, y, axis=plt, colors='rbgmky'): for idx, val in enumerate(np.unique(y)): - xi = x[y==val] + xi = x[y == val] axis.scatter(*xi.T, s=50, facecolors='none', edgecolors=colors[idx]) def plot_neighborhood_graph(x, nn, y, axis=plt, colors='rbgmky'): for i, a in enumerate(x): - b = x[nn[i,1]] + b = x[nn[i, 1]] axis.plot((a[0], b[0]), (a[1], b[1]), colors[y[i]]) diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index b2b84559..b036ccfa 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -13,3 +13,8 @@ from .mmc import MMC, MMC_Supervised from ._version import __version__ + +__all__ = ['Constraints', 'Covariance', 'ITML', 'ITML_Supervised', + 'LMNN', 'LSML', 'LSML_Supervised', 'SDML', + 'SDML_Supervised', 'NCA', 'LFDA', 'RCA', 'RCA_Supervised', + 'MLKR', 'MMC', 'MMC_Supervised', '__version__'] diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 570172a9..6feccc72 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -163,7 +163,7 @@ def transform(self, X): class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, MetricTransformer)): - """Mahalanobis metric learning algorithms. + r"""Mahalanobis metric learning algorithms. Algorithm that learns a Mahalanobis (pseudo) distance :math:`d_M(x, x')`, defined between two column vectors :math:`x` and :math:`x'` by: :math:`d_M(x, @@ -182,7 +182,7 @@ class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, """ def score_pairs(self, pairs): - """Returns the learned Mahalanobis distance between pairs. + r"""Returns the learned Mahalanobis distance between pairs. This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` where ``M`` is the learned Mahalanobis matrix, for every pair of points @@ -241,8 +241,8 @@ def transform(self, X): The embedded data points. """ X_checked = check_input(X, type_of_inputs='classic', estimator=self, - preprocessor=self.preprocessor_, - accept_sparse=True) + preprocessor=self.preprocessor_, + accept_sparse=True) return X_checked.dot(self.components_.T) def get_metric(self): diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index e42ef4b8..89a3b48d 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -32,7 +32,7 @@ def adjacency_matrix(self, num_constraints, random_state=None): col = np.concatenate((b, d)) data = np.ones_like(row, dtype=int) data[len(a):] = -1 - adj = coo_matrix((data, (row, col)), shape=(self.num_points,)*2) + adj = coo_matrix((data, (row, col)), shape=(self.num_points,) * 2) # symmetrize return adj + adj.T @@ -84,7 +84,7 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): if len(all_inds) == 1: c = 0 else: - c = random_state.randint(0, high=len(all_inds)-1) + c = random_state.randint(0, high=len(all_inds) - 1) inds = all_inds[c] if len(inds) < chunk_size: del all_inds[c] diff --git a/metric_learn/itml.py b/metric_learn/itml.py index c3b91fc4..1f3f90ee 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -63,7 +63,7 @@ def _fit(self, pairs, y, bounds=None): num_neg = len(neg_pairs) _lambda = np.zeros(num_pos + num_neg) lambdaold = np.zeros_like(_lambda) - gamma_proj = 1. if gamma is np.inf else gamma/(gamma+1.) + gamma_proj = 1. if gamma is np.inf else gamma / (gamma + 1.) pos_bhat = np.zeros(num_pos) + self.bounds_[0] neg_bhat = np.zeros(num_neg) + self.bounds_[1] pos_vv = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] @@ -71,22 +71,23 @@ def _fit(self, pairs, y, bounds=None): for it in xrange(self.max_iter): # update positives - for i,v in enumerate(pos_vv): + for i, v in enumerate(pos_vv): wtw = v.dot(A).dot(v) # scalar - alpha = min(_lambda[i], gamma_proj*(1./wtw - 1./pos_bhat[i])) + alpha = min(_lambda[i], gamma_proj * (1. / wtw - 1. / pos_bhat[i])) _lambda[i] -= alpha - beta = alpha/(1 - alpha*wtw) - pos_bhat[i] = 1./((1 / pos_bhat[i]) + (alpha / gamma)) + beta = alpha / (1 - alpha * wtw) + pos_bhat[i] = 1. / ((1 / pos_bhat[i]) + (alpha / gamma)) Av = A.dot(v) A += np.outer(Av, Av * beta) # update negatives - for i,v in enumerate(neg_vv): + for i, v in enumerate(neg_vv): wtw = v.dot(A).dot(v) # scalar - alpha = min(_lambda[i+num_pos], gamma_proj*(1./neg_bhat[i] - 1./wtw)) - _lambda[i+num_pos] -= alpha - beta = -alpha/(1 + alpha*wtw) - neg_bhat[i] = 1./((1 / neg_bhat[i]) - (alpha / gamma)) + alpha = min(_lambda[i + num_pos], + gamma_proj * (1. / neg_bhat[i] - 1. / wtw)) + _lambda[i + num_pos] -= alpha + beta = -alpha / (1 + alpha * wtw) + neg_bhat[i] = 1. / ((1 / neg_bhat[i]) - (alpha / gamma)) Av = A.dot(v) A += np.outer(Av, Av * beta) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index ffc4c885..99e7c978 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -113,15 +113,16 @@ def fit(self, X, y): if self.k is None: k = min(7, d - 1) elif self.k >= d: - warnings.warn('Chosen k (%d) too large, using %d instead.' % (self.k,d-1)) + warnings.warn('Chosen k (%d) too large, using %d instead.' + % (self.k, d - 1)) k = d - 1 else: k = int(self.k) - tSb = np.zeros((d,d)) - tSw = np.zeros((d,d)) + tSb = np.zeros((d, d)) + tSw = np.zeros((d, d)) for c in xrange(num_classes): - Xc = X[y==c] + Xc = X[y == c] nc = Xc.shape[0] # classwise affinity matrix @@ -132,14 +133,14 @@ def fit(self, X, y): local_scale = np.outer(sigma, sigma) with np.errstate(divide='ignore', invalid='ignore'): - A = np.exp(-dist/local_scale) - A[local_scale==0] = 0 + A = np.exp(-dist / local_scale) + A[local_scale == 0] = 0 - G = Xc.T.dot(A.sum(axis=0)[:,None] * Xc) - Xc.T.dot(A).dot(Xc) - tSb += G/n + (1-nc/n)*Xc.T.dot(Xc) + _sum_outer(Xc)/n - tSw += G/nc + G = Xc.T.dot(A.sum(axis=0)[:, None] * Xc) - Xc.T.dot(A).dot(Xc) + tSb += G / n + (1 - nc / n) * Xc.T.dot(Xc) + _sum_outer(Xc) / n + tSw += G / nc - tSb -= _sum_outer(X)/n - tSw + tSb -= _sum_outer(X) / n - tSw # symmetrize tSb = (tSb + tSb.T) / 2 @@ -148,7 +149,7 @@ def fit(self, X, y): vals, vecs = _eigh(tSb, tSw, dim) order = np.argsort(-vals)[:dim] vals = vals[order].real - vecs = vecs[:,order] + vecs = vecs[:, order] if self.embedding_type == 'weighted': vecs *= np.sqrt(vals) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index cb72aaf7..15dd9a18 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -235,8 +235,8 @@ def fit(self, X, y): # we copy variables that can be modified by _loss_grad, because if we # retry we don t want to modify them several times (G_next, objective_next, total_active_next) = ( - self._loss_grad(X, L_next, dfG, k, reg, target_neighbors, - label_inds)) + self._loss_grad(X, L_next, dfG, k, reg, target_neighbors, + label_inds)) assert not np.isnan(objective) delta_obj = objective_next - objective if delta_obj > 0: @@ -329,15 +329,15 @@ def _find_impostors(self, furthest_neighbors, X, label_inds, L): in_inds, = np.nonzero(label_inds == label) out_inds, = np.nonzero(label_inds > label) dist = euclidean_distances(Lx[out_inds], Lx[in_inds], squared=True) - i1,j1 = np.nonzero(dist < margin_radii[out_inds][:,None]) - i2,j2 = np.nonzero(dist < margin_radii[in_inds]) - i = np.hstack((i1,i2)) - j = np.hstack((j1,j2)) + i1, j1 = np.nonzero(dist < margin_radii[out_inds][:, None]) + i2, j2 = np.nonzero(dist < margin_radii[in_inds]) + i = np.hstack((i1, i2)) + j = np.hstack((j1, j2)) if i.size > 0: # get unique (i,j) pairs using index trickery - shape = (i.max()+1, j.max()+1) - tmp = np.ravel_multi_index((i,j), shape) - i,j = np.unravel_index(np.unique(tmp), shape) + shape = (i.max() + 1, j.max() + 1) + tmp = np.ravel_multi_index((i, j), shape) + i, j = np.unravel_index(np.unique(tmp), shape) impostors.append(np.vstack((in_inds[j], out_inds[i]))) if len(impostors) == 0: # No impostors detected @@ -352,19 +352,19 @@ def _inplace_paired_L2(A, B): def _count_edges(act1, act2, impostors, targets): - imp = impostors[0,act1] + imp = impostors[0, act1] c = Counter(zip(imp, targets[imp])) - imp = impostors[1,act2] + imp = impostors[1, act2] c.update(zip(imp, targets[imp])) if c: active_pairs = np.array(list(c.keys())) else: - active_pairs = np.empty((0,2), dtype=int) + active_pairs = np.empty((0, 2), dtype=int) return active_pairs, np.array(list(c.values())) def _sum_outer_products(data, a_inds, b_inds, weights=None): Xab = data[a_inds] - data[b_inds] if weights is not None: - return np.dot(Xab.T, Xab * weights[:,None]) + return np.dot(Xab.T, Xab * weights[:, None]) return np.dot(Xab.T, Xab) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 340e6bf2..dfb12720 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -55,9 +55,10 @@ def _fit(self, quadruplets, weights=None): prior = 'identity' else: prior = self.prior - M, prior_inv = _initialize_metric_mahalanobis(quadruplets, prior, - return_inverse=True, strict_pd=True, matrix_name='prior', - random_state=self.random_state) + M, prior_inv = _initialize_metric_mahalanobis( + quadruplets, prior, + return_inverse=True, strict_pd=True, matrix_name='prior', + random_state=self.random_state) step_sizes = np.logspace(-10, 0, 10) # Keep track of the best step size and the loss at that step. @@ -65,7 +66,7 @@ def _fit(self, quadruplets, weights=None): s_best = self._total_loss(M, vab, vcd, prior_inv) if self.verbose: print('initial loss', s_best) - for it in xrange(1, self.max_iter+1): + for it in xrange(1, self.max_iter + 1): grad = self._gradient(M, vab, vcd, prior_inv) grad_norm = scipy.linalg.norm(grad) if grad_norm < self.tol: @@ -117,8 +118,8 @@ def _gradient(self, metric, vab, vcd, prior_inv): # TODO: vectorize for vab, dab, vcd, dcd in zip(vab[violations], dabs[violations], vcd[violations], dcds[violations]): - dMetric += ((1-np.sqrt(dcd/dab))*np.outer(vab, vab) + - (1-np.sqrt(dab/dcd))*np.outer(vcd, vcd)) + dMetric += ((1 - np.sqrt(dcd / dab)) * np.outer(vab, vab) + + (1 - np.sqrt(dab / dcd)) * np.outer(vcd, vcd)) return dMetric diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index c8c52b24..0999e417 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -77,7 +77,7 @@ def _fit_full(self, pairs, y): """ num_dim = pairs.shape[2] - error1 = error2 = 1e10 + error2 = 1e10 eps = 0.01 # error-bound of iterative projection on C1 and C2 A = self.A_ @@ -105,7 +105,8 @@ def _fit_full(self, pairs, y): # constraint function grad2 = self._fD1(neg_pairs, A) # gradient of dissimilarity # constraint function - M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 + # gradient of fD1 orthogonal to fS1: + M = self._grad_projection(grad1, grad2) A_old = A.copy() @@ -133,7 +134,7 @@ def _fit_full(self, pairs, y): # PSD constraint A >= 0 # project A onto domain A>0 l, V = np.linalg.eigh((A + A.T) / 2) - A[:] = np.dot(V * np.maximum(0, l[None,:]), V.T) + A[:] = np.dot(V * np.maximum(0, l[None, :]), V.T) fDC2 = w.dot(A.ravel()) error2 = (fDC2 - t) / t @@ -172,7 +173,8 @@ def _fit_full(self, pairs, y): if delta < self.convergence_threshold: break if self.verbose: - print('mmc iter: %d, conv = %f, projections = %d' % (cycle, delta, it+1)) + print('mmc iter: %d, conv = %f, projections = %d' % + (cycle, delta, it + 1)) if delta > self.convergence_threshold: self.converged_ = False @@ -214,8 +216,10 @@ def _fit_diag(self, pairs, y): obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 fS_1st_d = s_sum # first derivative of the similarity constraints - gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective - hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective + # gradient of the objective: + gradient = fS_1st_d - self.diagonal_c * fD_1st_d + # Hessian of the objective: + hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) step = np.dot(np.linalg.inv(hessian), gradient) # Newton-Rapshon update @@ -250,16 +254,17 @@ def _fit_diag(self, pairs, y): return self def _fD(self, neg_pairs, A): - """The value of the dissimilarity constraint function. + r"""The value of the dissimilarity constraint function. f = f(\sum_{ij \in D} distance(x_i, x_j)) i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} """ diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] - return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + 1e-6) + return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + + 1e-6) def _fD1(self, neg_pairs, A): - """The gradient of the dissimilarity constraint function w.r.t. A. + r"""The gradient of the dissimilarity constraint function w.r.t. A. For example, let distance by L1 norm: f = f(\sum_{ij \in D} \sqrt{(x_i-x_j)A(x_i-x_j)'}) @@ -270,19 +275,19 @@ def _fD1(self, neg_pairs, A): df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)}) * 0.5*(\sum_{ij \in D} (1/sqrt{tr(d_ij'*d_ij*A)})*(d_ij'*d_ij)) """ - dim = neg_pairs.shape[2] diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] # outer products of all rows in `diff` M = np.einsum('ij,ik->ijk', diff, diff) # faster version of: dist = np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) dist = np.sqrt(np.einsum('ijk,jk', M, A)) - # faster version of: sum_deri = np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis=0) + # faster version of: sum_deri = np.sum(M / + # (2 * (dist[:,None,None] + 1e-6)), axis=0) sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) def _fS1(self, pos_pairs, A): - """The gradient of the similarity constraint function w.r.t. A. + r"""The gradient of the similarity constraint function w.r.t. A. f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij' df/dA = d(d_ij*A*d_ij')/dA @@ -290,9 +295,9 @@ def _fS1(self, pos_pairs, A): Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij """ - dim = pos_pairs.shape[2] diff = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] - return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` + # sum of outer products of all rows in `diff`: + return np.einsum('ij,ik->jk', diff, diff) def _grad_projection(self, grad1, grad2): grad2 = grad2 / np.linalg.norm(grad2) @@ -303,7 +308,7 @@ def _grad_projection(self, grad1, grad2): def _D_objective(self, neg_pairs, w): return np.log(np.sum(np.sqrt(np.sum(((neg_pairs[:, 0, :] - neg_pairs[:, 1, :]) ** 2) * - w[None,:], axis=1) + 1e-6))) + w[None, :], axis=1) + 1e-6))) def _D_constraint(self, neg_pairs, w): """Compute the value, 1st derivative, second derivative (Hessian) of @@ -317,13 +322,14 @@ def _D_constraint(self, neg_pairs, w): sum_deri2 = np.einsum( 'ij,ik->jk', diff_sq, - diff_sq / (-4 * np.maximum(1e-6, dist**3))[:,None] + diff_sq / (-4 * np.maximum(1e-6, dist**3))[:, None] ) sum_dist = dist.sum() return ( - np.log(sum_dist), - sum_deri1 / sum_dist, - sum_deri2 / sum_dist - np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) + np.log(sum_dist), + sum_deri1 / sum_dist, + sum_deri2 / sum_dist - + np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) ) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 8471a1b1..2a9ab1e8 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -6,7 +6,6 @@ import numpy as np import warnings from six.moves import xrange -from sklearn import decomposition from sklearn.base import TransformerMixin from sklearn.exceptions import ChangedBehaviorWarning @@ -123,20 +122,20 @@ def fit(self, X, chunks): if self.pca_comps != 'deprecated': warnings.warn( - '"pca_comps" parameter is not used. ' - 'It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' - 'you still want to do it, you could use ' - '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.', - DeprecationWarning) + '"pca_comps" parameter is not used. ' + 'It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' + 'you still want to do it, you could use ' + '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.', + DeprecationWarning) X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) warnings.warn( - "RCA will no longer center the data before training. If you want " - "to do some preprocessing, you should do it manually (you can also " - "use an `sklearn.pipeline.Pipeline` for instance). This warning " - "will disappear in version 0.6.0.", ChangedBehaviorWarning) + "RCA will no longer center the data before training. If you want " + "to do some preprocessing, you should do it manually (you can also " + "use an `sklearn.pipeline.Pipeline` for instance). This warning " + "will disappear in version 0.6.0.", ChangedBehaviorWarning) chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 2d67e0b8..2cdaa164 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -68,9 +68,10 @@ def _fit(self, pairs, y): prior = 'identity' else: prior = self.prior - _, prior_inv = _initialize_metric_mahalanobis(pairs, prior, - return_inverse=True, strict_pd=True, matrix_name='prior', - random_state=self.random_state) + _, prior_inv = _initialize_metric_mahalanobis( + pairs, prior, + return_inverse=True, strict_pd=True, matrix_name='prior', + random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix @@ -128,7 +129,7 @@ def _fit(self, pairs, y): class SDML(_BaseSDML, _PairsClassifierMixin): - """Sparse Distance Metric Learning (SDML) + r"""Sparse Distance Metric Learning (SDML) SDML is an efficient sparse metric learning in high-dimensional space via double regularization: an L1-penalization on the off-diagonal elements of the diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 00314ad0..79322124 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -15,6 +15,7 @@ from sklearn.utils.validation import check_X_y try: from inverse_covariance import quic + assert(quic) except ImportError: HAS_SKGGM = False else: @@ -32,9 +33,9 @@ def class_separation(X, labels): unique_labels, label_inds = np.unique(labels, return_inverse=True) ratio = 0 for li in xrange(len(unique_labels)): - Xc = X[label_inds==li] - Xnc = X[label_inds!=li] - ratio += pairwise_distances(Xc).mean() / pairwise_distances(Xc,Xnc).mean() + Xc = X[label_inds == li] + Xnc = X[label_inds != li] + ratio += pairwise_distances(Xc).mean() / pairwise_distances(Xc, Xnc).mean() return ratio / len(unique_labels) @@ -434,7 +435,7 @@ def __init__(self, callback, *args, **kwargs): def _loss_grad(self, *args, **kwargs): grad, objective, total_active = ( - super(LMNN_with_callback, self)._loss_grad(*args, **kwargs)) + super(LMNN_with_callback, self)._loss_grad(*args, **kwargs)) self.callback.append(grad) return grad, objective, total_active @@ -463,18 +464,18 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): out, _ = capsys.readouterr() lines = re.split("\n+", out) # we get every variable that is printed from the algorithm in verbose - num = '(-?\d+.?\d*(e[+|-]\d+)?)' - strings = [re.search("\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})" + num = r'(-?\d+.?\d*(e[+|-]\d+)?)' + strings = [re.search(r"\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})" .format(num, num, num), s) for s in lines] objectives[name] = [float(match.group(1)) for match in strings if match is not None] obj_diffs[name] = [float(match.group(3)) for match in strings if match is - not None] + not None] total_active[name] = [float(match.group(5)) for match in strings if match is not None] learn_rate[name] = [float(match.group(6)) for match in strings if match is - not None] + not None] assert len(strings) >= 10 # we ensure that we actually did more than 10 # iterations assert total_active[name][0] >= 2 # we ensure that we have some active @@ -512,16 +513,14 @@ def test_toy_ex_lmnn(X, y, loss): lmnn.components_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) - impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds, L) # sum outer products dfG = _sum_outer_products(X, target_neighbors.flatten(), np.repeat(np.arange(X.shape[0]), k)) - df = np.zeros_like(dfG) # storage - a1 = [None]*k - a2 = [None]*k + a1 = [None] * k + a2 = [None] * k for nn_idx in xrange(k): a1[nn_idx] = np.array([]) a2[nn_idx] = np.array([]) @@ -530,6 +529,7 @@ def test_toy_ex_lmnn(X, y, loss): assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, k, reg, target_neighbors, label_inds)[1] == loss + def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with # this issue: https://github.com/metric-learn/metric-learn/issues/88 @@ -553,7 +553,7 @@ def test_no_twice_same_objective(capsys): # number), and which is followed by a (signed) float (delta obj). It # matches for instance: # 3 **1113.7665747189938** -3.182774197440267 46431.0200999999999998e-06 - objectives = [re.search("\d* (?:(\d*.\d*))[ | -]\d*.\d*", s) + objectives = [re.search(r"\d* (?:(\d*.\d*))[ | -]\d*.\d*", s) for s in lines] objectives = [match.group(1) for match in objectives if match is not None] # we remove the last element because it can be equal to the penultimate @@ -1241,9 +1241,9 @@ def test_deprecation_num_dims_mlkr(num_dims): class TestMMC(MetricTestCase): def test_iris(self): - # Generate full set of constraints for comparison with reference implementation - n = self.iris_points.shape[0] - mask = (self.iris_labels[None] == self.iris_labels[:,None]) + # Generate full set of constraints for comparison with reference + # implementation + mask = self.iris_labels[None] == self.iris_labels[:, None] a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) @@ -1260,7 +1260,7 @@ def test_iris(self): # Diagonal metric mmc = MMC(diagonal=True) - mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) + mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), decimal=6) @@ -1270,7 +1270,7 @@ def test_iris(self): mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) - + # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) @@ -1388,10 +1388,10 @@ def test_verbose(algo_class, dataset, capsys): for line in lines[3:-2]: # The following regex will match for instance: # '[NCA] 0 6.988936e+01 0.01' - assert re.match("\[" + algo_class.__name__ + "\]\ *\d+\ *\d\.\d{6}e[+|-]" - "\d+\ *\d+\.\d{2}", line) - assert re.match("\[" + algo_class.__name__ + "\] Training took\ *" - "\d+\.\d{2}s\.", lines[-2]) + assert re.match(r"\[" + algo_class.__name__ + r"\]\ *\d+\ *\d\.\d{6}e[+|-]" + r"\d+\ *\d+\.\d{2}", line) + assert re.match(r"\[" + algo_class.__name__ + r"\] Training took\ *" + r"\d+\.\d{2}s\.", lines[-2]) assert lines[-1] == '' diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 725df31a..b2b1d339 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -9,7 +9,7 @@ def remove_spaces(s): - return re.sub('\s+', '', s) + return re.sub(r'\s+', '', s) class TestStringRepr(unittest.TestCase): @@ -22,11 +22,11 @@ def test_lmnn(self): self.assertEqual( remove_spaces(str(metric_learn.LMNN())), remove_spaces( - "LMNN(convergence_tol=0.001, init=None, k=3, " - "learn_rate=1e-07, " - "max_iter=1000, min_iter=50, n_components=None, " - "num_dims='deprecated', preprocessor=None, random_state=None, " - "regularization=0.5, use_pca='deprecated', verbose=False)")) + "LMNN(convergence_tol=0.001, init=None, k=3, " + "learn_rate=1e-07, " + "max_iter=1000, min_iter=50, n_components=None, " + "num_dims='deprecated', preprocessor=None, random_state=None, " + "regularization=0.5, use_pca='deprecated', verbose=False)")) def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA())), @@ -47,7 +47,7 @@ def test_itml(self): self.assertEqual(remove_spaces(str(metric_learn.ITML())), remove_spaces(""" ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0, - max_iter=1000, preprocessor=None, prior='identity', random_state=None, + max_iter=1000, preprocessor=None, prior='identity', random_state=None, verbose=False) """)) self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())), @@ -93,10 +93,10 @@ def test_rca(self): "preprocessor=None)")) self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), remove_spaces( - "RCA_Supervised(chunk_size=2, " - "n_components=None, num_chunks=100, " - "num_dims='deprecated', pca_comps='deprecated', " - "preprocessor=None, random_state=None)")) + "RCA_Supervised(chunk_size=2, " + "n_components=None, num_chunks=100, " + "num_dims='deprecated', pca_comps='deprecated', " + "preprocessor=None, random_state=None)")) def test_mlkr(self): self.assertEqual(remove_spaces(str(metric_learn.MLKR())), diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index c0a7bbd4..969bd448 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -58,7 +58,7 @@ def test_sdml_supervised(self): def test_nca(self): n = self.X.shape[0] - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) L = nca.components_ assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix()) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index a9b2719e..d4d4bfe0 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -1,4 +1,3 @@ -import pytest import unittest import numpy as np from sklearn.datasets import load_iris @@ -78,11 +77,11 @@ def test_sdml_supervised(self): def test_nca(self): n = self.X.shape[0] - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) res_1 = nca.transform(self.X) - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_utils.py b/test/test_utils.py index 37abb307..3092e168 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -3,7 +3,6 @@ import numpy as np from numpy.testing import assert_array_equal, assert_equal from sklearn.model_selection import train_test_split -from sklearn.exceptions import DataConversionWarning from sklearn.utils import check_random_state, shuffle from sklearn.utils.testing import set_random_state from sklearn.base import clone @@ -964,6 +963,7 @@ def test_check_collapsed_pairs_raises_error(): "the same as the right element), out of 3 pairs in" " total.") + def test__validate_vector(): """Replica of scipy.spatial.tests.test_distance.test__validate_vector""" x = [1, 2, 3] From 4e57071135bde29d84f01bb35ba7a658a5d416dd Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 1 Aug 2019 16:30:38 +0200 Subject: [PATCH 134/210] empty commit to relaunch CI From 966a4189129a15e86faeb1d9cf2eb9d56a4a6381 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 1 Aug 2019 16:50:17 +0200 Subject: [PATCH 135/210] Update some references in README.rst --- README.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 027e5498..80f18a09 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,9 @@ metric-learn ============= -Metric Learning algorithms in Python. +Metric Learning algorithms in Python. + +As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use of all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms. **Algorithms** @@ -42,13 +44,13 @@ package installed). See the `sphinx documentation`_ for full documentation about installation, API, usage, and examples. -.. _sphinx documentation: http://metric-learn.github.io/metric-learn/ +.. _sphinx documentation: http://contrib.scikit-learn.org/metric-learn/ -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/metric-learn/metric-learn.svg?branch=master - :target: https://travis-ci.org/metric-learn/metric-learn +.. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master + :target: https://travis-ci.org/scikit-learn-contrib/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn -.. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg - :target: https://codecov.io/gh/metric-learn/metric-learn +.. |Code coverage| image:: https://codecov.io/gh/scikit-learn-contrib/metric-learn/branch/master/graph/badge.svg + :target: https://codecov.io/gh/scikit-learn-contrib/metric-learn From f67d0e969a6c400330222b7e888170d8c5a65b68 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 1 Aug 2019 17:11:44 +0200 Subject: [PATCH 136/210] Update links in documentation --- doc/index.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index 9d303bee..3eac5fa7 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,11 +34,11 @@ Documentation outline :ref:`genindex` | :ref:`modindex` | :ref:`search` -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/metric-learn/metric-learn.svg?branch=master - :target: https://travis-ci.org/metric-learn/metric-learn +.. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master + :target: https://travis-ci.org/scikit-learn-contrib/metric-learn .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org -.. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg - :target: https://codecov.io/gh/metric-learn/metric-learn +.. |Code coverage| image:: https://codecov.io/gh/scikit-learn-contrib/metric-learn/branch/master/graph/badge.svg + :target: https://codecov.io/gh/scikit-learn-contrib/metric-learn From 274edea017e265ab3423f00cc4dfd6945e7e55a5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 1 Aug 2019 17:15:59 +0200 Subject: [PATCH 137/210] Add link to github page in documentation --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index 3eac5fa7..0b065d25 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -2,7 +2,7 @@ metric-learn: Metric Learning in Python ======================================= |Travis-CI Build Status| |License| |PyPI version| |Code coverage| -Metric-learn contains efficient Python implementations of several +`Metric-learn `_ contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. The API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in From 66457b5c79b1c0d62fe304cf33ffa7b4d6684b9c Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 2 Aug 2019 10:13:04 -0400 Subject: [PATCH 138/210] Only build on master branch (#242) --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 116d8b93..a22598e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,3 +55,6 @@ matrix: - flake8 --extend-ignore=E111,E114 --show-source; # Use this instead to have a syntax check only on the diff: # - source ./build_tools/travis/flake8_diff.sh; +branches: + only: + - master From ca1109bd9c567f99bad799b3af6111e1d92fb4af Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 2 Aug 2019 10:14:08 -0400 Subject: [PATCH 139/210] Extract skggm commit hash to an environment variable (#241) --- .travis.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index a22598e1..8a7ebf38 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: python sudo: false cache: pip language: python +env: + global: + - SKGGM_VERSION=a0ed406586c4364ea3297a658f415e13b5cbdaf8 matrix: include: - name: "Pytest python 2.7 with skggm" @@ -10,7 +13,7 @@ matrix: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; script: - pytest test --cov; after_success: @@ -31,7 +34,7 @@ matrix: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; script: - pytest test --cov; after_success: @@ -42,7 +45,7 @@ matrix: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; script: - pytest test --cov; after_success: From 1e42acb64ddfbc1607b4917511615ae9c2bc93aa Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 2 Aug 2019 16:15:26 +0200 Subject: [PATCH 140/210] Rename repo address (#244) --- build_tools/travis/flake8_diff.sh | 2 +- doc/weakly_supervised.rst | 2 +- setup.py | 2 +- test/metric_learn_test.py | 4 ++-- test/test_components_metric_conversion.py | 2 +- test/test_mahalanobis_mixin.py | 8 ++++---- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/build_tools/travis/flake8_diff.sh b/build_tools/travis/flake8_diff.sh index ce3cdd24..aea926c8 100644 --- a/build_tools/travis/flake8_diff.sh +++ b/build_tools/travis/flake8_diff.sh @@ -23,7 +23,7 @@ set -e # pipefail is necessary to propagate exit codes set -o pipefail -PROJECT=metric-learn/metric-learn +PROJECT=scikit-learn-contrib/metric-learn PROJECT_URL=https://github.com/$PROJECT.git # Find the remote with the project name (upstream in most cases) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 38f08fbe..c8ab51f3 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -694,7 +694,7 @@ like: (This is actually intentional, for more details about that, see -`this comment `_ +`this comment `_ on github.) However, quadruplets learners do have a default scoring function, which will diff --git a/setup.py b/setup.py index 601b9ba7..c8b38b7c 100755 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ 'Nathalie Vauquier' ], author_email='ccarey@cs.umass.edu', - url='http://github.com/metric-learn/metric-learn', + url='http://github.com/scikit-learn-contrib/metric-learn', license='MIT', classifiers=[ 'Development Status :: 4 - Beta', diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 79322124..f713a059 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -532,7 +532,7 @@ def test_toy_ex_lmnn(X, y, loss): def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with - # this issue: https://github.com/metric-learn/metric-learn/issues/88 + # this issue: https://github.com/scikit-learn-contrib/metric-learn/issues/88 X, y = make_classification(random_state=0) lmnn = LMNN(verbose=True) lmnn.fit(X, y) @@ -542,7 +542,7 @@ def test_convergence_simple_example(capsys): def test_no_twice_same_objective(capsys): # test that the objective function never has twice the same value - # see https://github.com/metric-learn/metric-learn/issues/88 + # see https://github.com/scikit-learn-contrib/metric-learn/issues/88 X, y = make_classification(random_state=0) lmnn = LMNN(verbose=True) lmnn.fit(X, y) diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index 969bd448..d1e2acf4 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -94,7 +94,7 @@ def test_components_from_metric_edge_cases(self): # matrix with all its coefficients very low (to check that the algorithm # does not consider it as a diagonal matrix)(non regression test for - # https://github.com/metric-learn/metric-learn/issues/175) + # https://github.com/scikit-learn-contrib/metric-learn/issues/175) M = np.diag([1e-15, 2e-16, 3e-15, 4e-16, 5e-15, 6e-16, 7e-15]) M = P.dot(M).dot(P.T) L = components_from_metric(M) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index a812d185..91aa129a 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -573,8 +573,8 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): """Tests that when using the 'covariance' init or prior, it returns the appropriate error if the covariance matrix is singular, for algorithms that need a strictly PD prior or init (see - https://github.com/metric-learn/metric-learn/issues/202 and - https://github.com/metric-learn/metric-learn/pull/195#issuecomment + https://github.com/scikit-learn-contrib/metric-learn/issues/202 and + https://github.com/scikit-learn-contrib/metric-learn/pull/195#issuecomment -492332451) """ matrices_to_set = [] @@ -618,8 +618,8 @@ def test_singular_array_init_or_prior(estimator, build_dataset, w0): """Tests that when using a custom array init (or prior), it returns the appropriate error if it is singular, for algorithms that need a strictly PD prior or init (see - https://github.com/metric-learn/metric-learn/issues/202 and - https://github.com/metric-learn/metric-learn/pull/195#issuecomment + https://github.com/scikit-learn-contrib/metric-learn/issues/202 and + https://github.com/scikit-learn-contrib/metric-learn/pull/195#issuecomment -492332451) """ matrices_to_set = [] From 5ad94c3c9d13cb1eefc5dbaa586a3860079424c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Fri, 2 Aug 2019 17:08:22 +0200 Subject: [PATCH 141/210] [MRG] Consistent readme and doc index (#245) * consistent readme and doc index * lower case m --- README.rst | 8 +++----- doc/index.rst | 10 ++++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 80f18a09..111e3b0a 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,9 @@ |Travis-CI Build Status| |License| |PyPI version| |Code coverage| -metric-learn -============= +metric-learn: Metric Learning in Python +======================================= -Metric Learning algorithms in Python. - -As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use of all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms. +metric-learn contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use of all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms through a unified interface. **Algorithms** diff --git a/doc/index.rst b/doc/index.rst index 0b065d25..6c3dee7e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -2,12 +2,14 @@ metric-learn: Metric Learning in Python ======================================= |Travis-CI Build Status| |License| |PyPI version| |Code coverage| -`Metric-learn `_ contains efficient Python implementations of several -popular supervised and weakly-supervised metric learning algorithms. The API -of metric-learn is compatible with `scikit-learn +`metric-learn `_ +contains efficient Python implementations of several popular supervised and +weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib +`_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use of all the scikit-learn routines (for pipelining, -model selection, etc) with metric learning algorithms. +model selection, etc) with metric learning algorithms through a unified +interface. Documentation outline --------------------- From 90b54f2caf88c13107d0e4a261456fb64644dd48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Wed, 14 Aug 2019 10:21:43 +0200 Subject: [PATCH 142/210] [MRG] Update README with paper reference (#248) * update readme with paper ref * add bibtex * fix indent * weird appearance * try no indent --- README.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.rst b/README.rst index 111e3b0a..e20f2591 100644 --- a/README.rst +++ b/README.rst @@ -41,6 +41,24 @@ package installed). See the `sphinx documentation`_ for full documentation about installation, API, usage, and examples. +**Citation** + +If you use metric-learn in a scientific publication, we would appreciate +citations to the following paper: + +`metric-learn: Metric Learning Algorithms in Python +`_, de Vazelhes +*et al.*, arXiv:1908.04710, 2019. + +Bibtex entry:: + + @techreport{metric-learn, + title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython}, + author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and + {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien}, + institution = {arXiv:1908.04710}, + year = {2019} + } .. _sphinx documentation: http://contrib.scikit-learn.org/metric-learn/ From f65bfd622f7d0cee41eab9ba8157198f17c172ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Wed, 14 Aug 2019 18:49:47 +0200 Subject: [PATCH 143/210] ref in doc + fix small typo (#249) --- README.rst | 2 +- doc/index.rst | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index e20f2591..c1dd6bb5 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ metric-learn: Metric Learning in Python ======================================= -metric-learn contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use of all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms through a unified interface. +metric-learn contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms through a unified interface. **Algorithms** diff --git a/doc/index.rst b/doc/index.rst index 6c3dee7e..3168ca36 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -7,10 +7,28 @@ contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in -Python. This allows to use of all the scikit-learn routines (for pipelining, +Python. This allows to use all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms through a unified interface. +If you use metric-learn in a scientific publication, we would appreciate +citations to the following paper: + +`metric-learn: Metric Learning Algorithms in Python +`_, de Vazelhes +*et al.*, arXiv:1908.04710, 2019. + +Bibtex entry:: + + @techreport{metric-learn, + title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython}, + author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and + {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien}, + institution = {arXiv:1908.04710}, + year = {2019} + } + + Documentation outline --------------------- From a30471424d35b0ef47582751fa6acea7b3a3bce5 Mon Sep 17 00:00:00 2001 From: LBrummer Date: Sat, 12 Oct 2019 15:04:42 +0200 Subject: [PATCH 144/210] Fixed typo in tuple description. (#252) --- doc/weakly_supervised.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index c8ab51f3..1bd096c7 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -57,7 +57,7 @@ learn: ^^^^^^^^^^^^^^^^^^ The most intuitive way to represent tuples is to provide the algorithm with a -3D array-like of tuples of shape `(n_tuples, t, n_features)`, where +3D array-like of tuples of shape `(n_tuples, tuple_size, n_features)`, where `n_tuples` is the number of tuples, `tuple_size` is the number of elements in a tuple (2 for pairs, 3 for triplets for instance), and `n_features` is the number of features of each point. From b871028ec9228f3d5f6e6adf5efe1144cd010d4f Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Mon, 28 Oct 2019 17:10:20 +0100 Subject: [PATCH 145/210] Solves #225 : adds first line to lmnn's verbose (#253) * solved issue 225 * maj * maj * Delete test.py --- metric_learn/lmnn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 15dd9a18..a1b5a42f 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -224,6 +224,10 @@ def fit(self, X, y): it = 1 # we already made one iteration + if self.verbose: + print("iter | objective | objective difference | active constraints", + "| learning rate") + # main loop for it in xrange(2, self.max_iter): # then at each iteration, we try to find a value of L that has better From 77005293f99164c92d1fc12cdf8f86a938425f47 Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Wed, 13 Nov 2019 13:37:36 +0100 Subject: [PATCH 146/210] Break chunks generation in RCA when not enough possible chunks, fixes issue #200 (#254) * fixes issue 200 * maj * add max_chunks in error message * tests the building of chunks in constraints.py * corrected faulty generation * still small mistake at generation * encapsulate tests, modified message * Testing chunk generation in constraints --- metric_learn/constraints.py | 9 ++++-- test/test_constraints.py | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 test/test_constraints.py diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 89a3b48d..b71c9b96 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -79,6 +79,12 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): chunks = -np.ones_like(self.known_label_idx, dtype=int) uniq, lookup = np.unique(self.known_labels, return_inverse=True) all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))] + max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds])) + if max_chunks < num_chunks: + raise ValueError(('Not enough possible chunks of %d elements in each' + ' class to form expected %d chunks - maximum number' + ' of chunks is %d' + ) % (chunk_size, num_chunks, max_chunks)) idx = 0 while idx < num_chunks and all_inds: if len(all_inds) == 1: @@ -93,9 +99,6 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): inds.difference_update(ii) chunks[ii] = idx idx += 1 - if idx < num_chunks: - raise ValueError('Unable to make %d chunks of %d examples each' % - (num_chunks, chunk_size)) return chunks diff --git a/test/test_constraints.py b/test/test_constraints.py new file mode 100644 index 00000000..a135985e --- /dev/null +++ b/test/test_constraints.py @@ -0,0 +1,63 @@ +import unittest +import pytest +import numpy as np +from sklearn.utils import shuffle +from metric_learn.constraints import Constraints + +SEED = 42 + + +def gen_labels_for_chunks(num_chunks, chunk_size, + n_classes=10, n_unknown_labels=5): + """Generates num_chunks*chunk_size labels that split in num_chunks chunks, + that are homogeneous in the label.""" + assert min(num_chunks, chunk_size) > 0 + classes = shuffle(np.arange(n_classes), random_state=SEED) + n_per_class = chunk_size * (num_chunks // n_classes) + n_maj_class = chunk_size * num_chunks - n_per_class * (n_classes - 1) + + first_labels = classes[0] * np.ones(n_maj_class, dtype=int) + remaining_labels = np.concatenate([k * np.ones(n_per_class, dtype=int) + for k in classes[1:]]) + unknown_labels = -1 * np.ones(n_unknown_labels, dtype=int) + + labels = np.concatenate([first_labels, remaining_labels, unknown_labels]) + return shuffle(labels, random_state=SEED) + + +@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_exact_num_points_for_chunks(num_chunks, chunk_size): + """Checks that the chunk generation works well with just enough points.""" + labels = gen_labels_for_chunks(num_chunks, chunk_size) + + constraints = Constraints(labels) + chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + random_state=SEED) + + chunk_no, size_each_chunk = np.unique(chunks, return_counts=True) + + np.testing.assert_array_equal(size_each_chunk, chunk_size) + assert chunk_no.shape[0] == num_chunks + + +@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_chunk_case_one_miss_point(num_chunks, chunk_size): + """Checks that the chunk generation breaks when one point is missing.""" + labels = gen_labels_for_chunks(num_chunks, chunk_size) + + assert len(labels) >= 1 + constraints = Constraints(labels[1:]) + with pytest.raises(ValueError) as e: + constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + random_state=SEED) + + expected_message = (('Not enough possible chunks of %d elements in each' + ' class to form expected %d chunks - maximum number' + ' of chunks is %d' + ) % (chunk_size, num_chunks, num_chunks - 1)) + + assert str(e.value) == expected_message + + +if __name__ == '__main__': + unittest.main() From 65a98ccd9b93785c6d36de84d5e844ad79534e37 Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Wed, 20 Nov 2019 16:10:36 +0100 Subject: [PATCH 147/210] Correct display of references problem in Sphinx 2.1.1 (#265) Reverts to html4 mode because themes haven't updated yet. --- doc/conf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index 0f46992d..66ff3dcd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -65,3 +65,7 @@ # generate autosummary even if no references autosummary_generate = True + +# Switch to old behavior with html4, for a good display of references, +# as described in https://github.com/sphinx-doc/sphinx/issues/6705 +html4_writer = True From 710379edd7b10a97d31d9a71af5bd7c017da30a2 Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Thu, 21 Nov 2019 15:01:55 +0100 Subject: [PATCH 148/210] [DOC] solves several small raised issues (#266) * solves #258, #261, and #256 --- doc/getting_started.rst | 19 +++++++++++-------- doc/supervised.rst | 7 ++++++- metric_learn/itml.py | 1 + metric_learn/lsml.py | 1 + metric_learn/mmc.py | 1 + metric_learn/sdml.py | 1 + 6 files changed, 21 insertions(+), 9 deletions(-) diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 5a671d86..5ce4242a 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -29,11 +29,14 @@ Quick start This example loads the iris dataset, and evaluates a k-nearest neighbors algorithm on an embedding space learned with `NCA`. ->>> from metric_learn import NCA ->>> from sklearn.datasets import load_iris ->>> from sklearn.model_selection import cross_val_score ->>> from sklearn.pipeline import make_pipeline ->>> ->>> X, y = load_iris(return_X_y=True) ->>> clf = make_pipeline(NCA(), KNeighborsClassifier()) ->>> cross_val_score(clf, X, y) +:: + + from metric_learn import NCA + from sklearn.datasets import load_iris + from sklearn.model_selection import cross_val_score + from sklearn.pipeline import make_pipeline + from sklearn.neighbors import KNeighborsClassifier + + X, y = load_iris(return_X_y=True) + clf = make_pipeline(NCA(), KNeighborsClassifier()) + cross_val_score(clf, X, y) diff --git a/doc/supervised.rst b/doc/supervised.rst index 3c941b20..f221ba4a 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -374,7 +374,12 @@ Supervised versions of weakly-supervised algorithms Each :ref:`weakly-supervised algorithm ` has a supervised version of the form `*_Supervised` where similarity tuples are randomly generated from the labels information and passed to the underlying -algorithm. +algorithm. + +.. warning:: + Supervised versions of weakly-supervised algorithms interpret label -1 + (or any negative label) as a point with unknown label. + Those points are discarded in the learning process. For pairs learners (see :ref:`learning_on_pairs`), pairs (tuple of two points from the dataset), and pair labels (`int` indicating whether the two points diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 1f3f90ee..5202c9e1 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -273,6 +273,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): be removed in 0.6.0. num_constraints: int, optional number of constraints to generate + (`20 * num_classes**2` constraints by default) bounds : Not used .. deprecated:: 0.5.0 `bounds` was deprecated in version 0.5.0 and will diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index dfb12720..b9df3825 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -276,6 +276,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): be removed in 0.6.0. num_constraints: int, optional number of constraints to generate + (`20 * num_classes**2` constraints by default) weights : (m,) array of floats, optional scale factor for each constraint verbose : bool, optional diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 0999e417..622beb25 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -498,6 +498,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): be removed in 0.6.0. num_constraints: int, optional number of constraints to generate + (`20 * num_classes**2` constraints by default) init : None, string or numpy array, optional (default=None) Initialization of the Mahalanobis matrix. Possible options are 'identity', 'covariance', 'random', and a numpy array of diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 2cdaa164..21fadd74 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -302,6 +302,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): be removed in 0.6.0. num_constraints : int, optional (default=None) number of constraints to generate + (`20 * num_classes**2` constraints by default) verbose : bool, optional (default=False) if True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable From 7af910f6d46a89e2adf42535845a5a53834323b5 Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Wed, 4 Dec 2019 15:03:23 +0100 Subject: [PATCH 149/210] More systematic checks that an estimator was fit before using its parameters (#267) * maj * added fit checks * maj * Added checks that the function was fitted. * check the input before if model is fitted * made more sensible checks. * added a test for a threshold * added a test for the unset threshold --- metric_learn/base_metric.py | 12 ++++++++++-- test/test_pairs_classifiers.py | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 6feccc72..427fcf86 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -240,12 +240,14 @@ def transform(self, X): X_embedded : `numpy.ndarray`, shape=(n_samples, n_components) The embedded data points. """ + check_is_fitted(self, ['preprocessor_', 'components_']) X_checked = check_input(X, type_of_inputs='classic', estimator=self, preprocessor=self.preprocessor_, accept_sparse=True) return X_checked.dot(self.components_.T) def get_metric(self): + check_is_fitted(self, 'components_') components_T = self.components_.T.copy() def metric_fun(u, v, squared=False): @@ -298,6 +300,7 @@ def get_mahalanobis_matrix(self): M : `numpy.ndarray`, shape=(n_features, n_features) The copy of the learned Mahalanobis matrix. """ + check_is_fitted(self, 'components_') return self.components_.T.dot(self.components_) @@ -333,7 +336,10 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ - check_is_fitted(self, ['threshold_', 'components_']) + if "threshold_" not in vars(self): + msg = ("A threshold for this estimator has not been set," + "call its set_threshold or calibrate_threshold method.") + raise AttributeError(msg) return 2 * (- self.decision_function(pairs) <= self.threshold_) - 1 def decision_function(self, pairs): @@ -357,6 +363,7 @@ def decision_function(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted decision function value for each pair. """ + check_is_fitted(self, 'preprocessor_') pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -599,7 +606,7 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ - check_is_fitted(self, 'components_') + check_is_fitted(self, 'preprocessor_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -628,6 +635,7 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ + check_is_fitted(self, 'preprocessor_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index affc70f6..840cd151 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -73,7 +73,7 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) with pytest.raises(NotFittedError): - estimator.predict(input_data) + estimator.decision_function(input_data) @pytest.mark.parametrize('calibration_params', @@ -133,10 +133,25 @@ def fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') self.components_ = np.atleast_2d(np.identity(pairs.shape[2])) - self.threshold_ = 'I am not set.' + # self.threshold_ is not set. return self +def test_unset_threshold(): + # test that set_threshold indeed sets the threshold + identity_pairs_classifier = IdentityPairsClassifier() + pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) + y = np.array([1, 1, -1, -1]) + identity_pairs_classifier.fit(pairs, y) + with pytest.raises(AttributeError) as e: + identity_pairs_classifier.predict(pairs) + + expected_msg = ("A threshold for this estimator has not been set," + "call its set_threshold or calibrate_threshold method.") + + assert str(e.value) == expected_msg + + def test_set_threshold(): # test that set_threshold indeed sets the threshold identity_pairs_classifier = IdentityPairsClassifier() From 7819e7ca905a7489fcaec153d43986b9f387b7d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Fri, 13 Dec 2019 11:49:47 +0100 Subject: [PATCH 150/210] Fix failing tests in last build (#270) * add print to test * fix * fix again * fix again * remove attributes from check_is_fitted * if condition based on python version * add TODO everywhere --- metric_learn/base_metric.py | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 427fcf86..5367a01e 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -11,6 +11,7 @@ import six from ._util import ArrayIndexer, check_input, validate_vector import warnings +import sys class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): @@ -240,14 +241,22 @@ def transform(self, X): X_embedded : `numpy.ndarray`, shape=(n_samples, n_components) The embedded data points. """ - check_is_fitted(self, ['preprocessor_', 'components_']) + # TODO: remove when we stop supporting Python < 3.5 + if sys.version_info.major < 3 or sys.version_info.minor < 5: + check_is_fitted(self, ['preprocessor_', 'components_']) + else: + check_is_fitted(self) X_checked = check_input(X, type_of_inputs='classic', estimator=self, preprocessor=self.preprocessor_, accept_sparse=True) return X_checked.dot(self.components_.T) def get_metric(self): - check_is_fitted(self, 'components_') + # TODO: remove when we stop supporting Python < 3.5 + if sys.version_info.major < 3 or sys.version_info.minor < 5: + check_is_fitted(self, 'components_') + else: + check_is_fitted(self) components_T = self.components_.T.copy() def metric_fun(u, v, squared=False): @@ -300,7 +309,11 @@ def get_mahalanobis_matrix(self): M : `numpy.ndarray`, shape=(n_features, n_features) The copy of the learned Mahalanobis matrix. """ - check_is_fitted(self, 'components_') + # TODO: remove when we stop supporting Python < 3.5 + if sys.version_info.major < 3 or sys.version_info.minor < 5: + check_is_fitted(self, 'components_') + else: + check_is_fitted(self) return self.components_.T.dot(self.components_) @@ -363,7 +376,11 @@ def decision_function(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted decision function value for each pair. """ - check_is_fitted(self, 'preprocessor_') + # TODO: remove when we stop supporting Python < 3.5 + if sys.version_info.major < 3 or sys.version_info.minor < 5: + check_is_fitted(self, 'preprocessor_') + else: + check_is_fitted(self) pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -606,7 +623,11 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ - check_is_fitted(self, 'preprocessor_') + # TODO: remove when we stop supporting Python < 3.5 + if sys.version_info.major < 3 or sys.version_info.minor < 5: + check_is_fitted(self, 'preprocessor_') + else: + check_is_fitted(self) quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -635,7 +656,11 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ - check_is_fitted(self, 'preprocessor_') + # TODO: remove when we stop supporting Python < 3.5 + if sys.version_info.major < 3 or sys.version_info.minor < 5: + check_is_fitted(self, 'preprocessor_') + else: + check_is_fitted(self) quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) From 0370198f6b6d37ec067b1673a4aebd9315e0917e Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Fri, 3 Jan 2020 10:20:44 +0100 Subject: [PATCH 151/210] Repairs chunk generation for unknown labels, solves issue #260 (#263) * chunks return a map of index to chunk * maj * maj * remove storing of known labels * typo * no self.num_points * tests for unlabeled, repairs chunk generation * maj * testing diff features * corrected test * diff warning * maj * added parameter bound test --- metric_learn/constraints.py | 35 ++++++++++++--------------------- metric_learn/rca.py | 13 ++++++++++++- test/metric_learn_test.py | 39 ++++++++++++++++++++++++++++++++++++- test/test_constraints.py | 16 +++++++++++---- 4 files changed, 74 insertions(+), 29 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index b71c9b96..752ca6e0 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -5,7 +5,6 @@ import numpy as np import warnings from six.moves import xrange -from scipy.sparse import coo_matrix from sklearn.utils import check_random_state __all__ = ['Constraints'] @@ -20,21 +19,7 @@ class Constraints(object): def __init__(self, partial_labels): '''partial_labels : int arraylike, -1 indicating unknown label''' partial_labels = np.asanyarray(partial_labels, dtype=int) - self.num_points, = partial_labels.shape - self.known_label_idx, = np.where(partial_labels >= 0) - self.known_labels = partial_labels[self.known_label_idx] - - def adjacency_matrix(self, num_constraints, random_state=None): - random_state = check_random_state(random_state) - a, b, c, d = self.positive_negative_pairs(num_constraints, - random_state=random_state) - row = np.concatenate((a, c)) - col = np.concatenate((b, d)) - data = np.ones_like(row, dtype=int) - data[len(a):] = -1 - adj = coo_matrix((data, (row, col)), shape=(self.num_points,) * 2) - # symmetrize - return adj + adj.T + self.partial_labels = partial_labels def positive_negative_pairs(self, num_constraints, same_length=False, random_state=None): @@ -50,17 +35,19 @@ def positive_negative_pairs(self, num_constraints, same_length=False, def _pairs(self, num_constraints, same_label=True, max_iter=10, random_state=np.random): - num_labels = len(self.known_labels) + known_label_idx, = np.where(self.partial_labels >= 0) + known_labels = self.partial_labels[known_label_idx] + num_labels = len(known_labels) ab = set() it = 0 while it < max_iter and len(ab) < num_constraints: nc = num_constraints - len(ab) for aidx in random_state.randint(num_labels, size=nc): if same_label: - mask = self.known_labels[aidx] == self.known_labels + mask = known_labels[aidx] == known_labels mask[aidx] = False # avoid identity pairs else: - mask = self.known_labels[aidx] != self.known_labels + mask = known_labels[aidx] != known_labels b_choices, = np.where(mask) if len(b_choices) > 0: ab.add((aidx, random_state.choice(b_choices))) @@ -69,16 +56,18 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, warnings.warn("Only generated %d %s constraints (requested %d)" % ( len(ab), 'positive' if same_label else 'negative', num_constraints)) ab = np.array(list(ab)[:num_constraints], dtype=int) - return self.known_label_idx[ab.T] + return known_label_idx[ab.T] def chunks(self, num_chunks=100, chunk_size=2, random_state=None): """ the random state object to be passed must be a numpy random seed """ random_state = check_random_state(random_state) - chunks = -np.ones_like(self.known_label_idx, dtype=int) - uniq, lookup = np.unique(self.known_labels, return_inverse=True) - all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))] + chunks = -np.ones_like(self.partial_labels, dtype=int) + uniq, lookup = np.unique(self.partial_labels, return_inverse=True) + unknown_uniq = np.where(uniq < 0)[0] + all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq)) + if c not in unknown_uniq] max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds])) if max_chunks < num_chunks: raise ValueError(('Not enough possible chunks of %d elements in each' diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 2a9ab1e8..204bd360 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -93,10 +93,12 @@ def __init__(self, n_components=None, num_dims='deprecated', def _check_dimension(self, rank, X): d = X.shape[1] + if rank < d: warnings.warn('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' - 'You should reduce the dimensionality of your input,' + 'You should remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') @@ -241,4 +243,13 @@ def fit(self, X, y, random_state='deprecated'): chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, random_state=self.random_state) + + if self.num_chunks * (self.chunk_size - 1) < X.shape[1]: + warnings.warn('Due to the parameters of RCA_Supervised, ' + 'the inner covariance matrix is not invertible, ' + 'so the transformation matrix will contain Nan values. ' + 'Increase the number or size of the chunks to correct ' + 'this problem.' + ) + return RCA.fit(self, X, chunks) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index f713a059..5a271890 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1100,9 +1100,11 @@ def test_rank_deficient_returns_warning(self): rca = RCA() msg = ('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' - 'You should reduce the dimensionality of your input,' + 'You should remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') + with pytest.warns(None) as raised_warnings: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) @@ -1136,6 +1138,41 @@ def test_changed_behaviour_warning_random_state(self): rca_supervised.fit(X, y) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_unknown_labels(self): + n = 200 + num_chunks = 50 + X, y = make_classification(random_state=42, n_samples=2 * n, + n_features=6, n_informative=6, n_redundant=0) + y2 = np.concatenate((y[:n], -np.ones(n))) + + rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca.fit(X[:n], y[:n]) + + rca2 = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca2.fit(X, y2) + + assert not np.any(np.isnan(rca.components_)) + assert not np.any(np.isnan(rca2.components_)) + + np.testing.assert_array_equal(rca.components_, rca2.components_) + + def test_bad_parameters(self): + n = 200 + num_chunks = 3 + X, y = make_classification(random_state=42, n_samples=n, + n_features=6, n_informative=6, n_redundant=0) + + rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) + msg = ('Due to the parameters of RCA_Supervised, ' + 'the inner covariance matrix is not invertible, ' + 'so the transformation matrix will contain Nan values. ' + 'Increase the number or size of the chunks to correct ' + 'this problem.' + ) + with pytest.warns(None) as raised_warning: + rca.fit(X, y) + assert any(str(w.message) == msg for w in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): diff --git a/test/test_constraints.py b/test/test_constraints.py index a135985e..243028f6 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -1,4 +1,3 @@ -import unittest import pytest import numpy as np from sklearn.utils import shuffle @@ -34,7 +33,8 @@ def test_exact_num_points_for_chunks(num_chunks, chunk_size): chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, random_state=SEED) - chunk_no, size_each_chunk = np.unique(chunks, return_counts=True) + chunk_no, size_each_chunk = np.unique(chunks[chunks >= 0], + return_counts=True) np.testing.assert_array_equal(size_each_chunk, chunk_size) assert chunk_no.shape[0] == num_chunks @@ -59,5 +59,13 @@ def test_chunk_case_one_miss_point(num_chunks, chunk_size): assert str(e.value) == expected_message -if __name__ == '__main__': - unittest.main() +@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_unknown_labels_not_in_chunks(num_chunks, chunk_size): + """Checks that unknown labels are not assigned to any chunk.""" + labels = gen_labels_for_chunks(num_chunks, chunk_size) + + constraints = Constraints(labels) + chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + random_state=SEED) + + assert np.all(chunks[labels < 0] < 0) From 7a57b0625f2310df0bd3bc93d658f018421d1de7 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Tue, 7 Jan 2020 10:26:56 -0500 Subject: [PATCH 152/210] Fix broken link to ITML paper (#271) * Update itml.py --- metric_learn/itml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 5202c9e1..0c8087df 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -209,8 +209,8 @@ class ITML(_BaseITML, _PairsClassifierMixin): References ---------- .. [1] `Information-theoretic Metric Learning - `_ Jason V. Davis, et al. + `_ Jason V. Davis, et al. """ def fit(self, pairs, y, bounds=None, calibration_params=None): From f48a55d62b28e19d9662307ea0659d1b0076e553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Mon, 13 Jan 2020 14:35:15 +0100 Subject: [PATCH 153/210] Revert changes in #270 due to revert decision in sklearn (#273) --- metric_learn/base_metric.py | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 5367a01e..427fcf86 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -11,7 +11,6 @@ import six from ._util import ArrayIndexer, check_input, validate_vector import warnings -import sys class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): @@ -241,22 +240,14 @@ def transform(self, X): X_embedded : `numpy.ndarray`, shape=(n_samples, n_components) The embedded data points. """ - # TODO: remove when we stop supporting Python < 3.5 - if sys.version_info.major < 3 or sys.version_info.minor < 5: - check_is_fitted(self, ['preprocessor_', 'components_']) - else: - check_is_fitted(self) + check_is_fitted(self, ['preprocessor_', 'components_']) X_checked = check_input(X, type_of_inputs='classic', estimator=self, preprocessor=self.preprocessor_, accept_sparse=True) return X_checked.dot(self.components_.T) def get_metric(self): - # TODO: remove when we stop supporting Python < 3.5 - if sys.version_info.major < 3 or sys.version_info.minor < 5: - check_is_fitted(self, 'components_') - else: - check_is_fitted(self) + check_is_fitted(self, 'components_') components_T = self.components_.T.copy() def metric_fun(u, v, squared=False): @@ -309,11 +300,7 @@ def get_mahalanobis_matrix(self): M : `numpy.ndarray`, shape=(n_features, n_features) The copy of the learned Mahalanobis matrix. """ - # TODO: remove when we stop supporting Python < 3.5 - if sys.version_info.major < 3 or sys.version_info.minor < 5: - check_is_fitted(self, 'components_') - else: - check_is_fitted(self) + check_is_fitted(self, 'components_') return self.components_.T.dot(self.components_) @@ -376,11 +363,7 @@ def decision_function(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted decision function value for each pair. """ - # TODO: remove when we stop supporting Python < 3.5 - if sys.version_info.major < 3 or sys.version_info.minor < 5: - check_is_fitted(self, 'preprocessor_') - else: - check_is_fitted(self) + check_is_fitted(self, 'preprocessor_') pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -623,11 +606,7 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ - # TODO: remove when we stop supporting Python < 3.5 - if sys.version_info.major < 3 or sys.version_info.minor < 5: - check_is_fitted(self, 'preprocessor_') - else: - check_is_fitted(self) + check_is_fitted(self, 'preprocessor_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -656,11 +635,7 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ - # TODO: remove when we stop supporting Python < 3.5 - if sys.version_info.major < 3 or sys.version_info.minor < 5: - check_is_fitted(self, 'preprocessor_') - else: - check_is_fitted(self) + check_is_fitted(self, 'preprocessor_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) From 1b40c3b6210a80408054f8b68fdca078906a804b Mon Sep 17 00:00:00 2001 From: Gabriel Rudloff Date: Mon, 20 Jan 2020 14:35:48 +0100 Subject: [PATCH 154/210] Changes in documentation. Rephrasing, fixed examples, standarized notation, etc. (#274) * Multiple changes to the documentation. Rephrasing, fixed examples and standarized notation, and others. * Forgot to change one A to L * Replaced broken modindex link for module list * fixed compliance with flake8 * Fixed typos, misplaced example, etc * No new bullet and rectification * remove modules index link * add "respectively" * fix rca examples * fix rca examples again --- README.rst | 1 + doc/getting_started.rst | 3 +- doc/index.rst | 2 +- doc/supervised.rst | 30 +++++++++--------- doc/weakly_supervised.rst | 37 +++++++++++------------ examples/plot_metric_learning_examples.py | 2 +- metric_learn/itml.py | 27 ++++++++++++----- metric_learn/lsml.py | 26 +++++++++++----- metric_learn/mmc.py | 27 ++++++++++++----- metric_learn/rca.py | 25 ++++++++++----- 10 files changed, 114 insertions(+), 66 deletions(-) diff --git a/README.rst b/README.rst index c1dd6bb5..8f68df90 100644 --- a/README.rst +++ b/README.rst @@ -26,6 +26,7 @@ metric-learn contains efficient Python implementations of several popular superv - For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). + ``pip install 'git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8'`` to install the required version of skggm from GitHub. - For running the examples only: matplotlib **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 5ce4242a..97abb9a5 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -10,7 +10,7 @@ Run ``pip install metric-learn`` to download and install from PyPI. Alternately, download the source repository and run: - ``python setup.py install`` for default installation. -- ``python setup.py test`` to run all tests. +- ``pytest test`` to run all tests. **Dependencies** @@ -21,6 +21,7 @@ Alternately, download the source repository and run: - For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). + ``pip install 'git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8'`` to install the required version of skggm from GitHub. - For running the examples only: matplotlib Quick start diff --git a/doc/index.rst b/doc/index.rst index 3168ca36..8f000246 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -52,7 +52,7 @@ Documentation outline auto_examples/index -:ref:`genindex` | :ref:`modindex` | :ref:`search` +:ref:`genindex` | :ref:`search` .. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master :target: https://travis-ci.org/scikit-learn-contrib/metric-learn diff --git a/doc/supervised.rst b/doc/supervised.rst index f221ba4a..fc77287b 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -131,13 +131,13 @@ The distance is learned by solving the following optimization problem: c\sum_{i, j, l}\eta_{ij}(1-y_{ij})[1+||\mathbf{L(x_i-x_j)}||^2-|| \mathbf{L(x_i-x_l)}||^2]_+) -where :math:`\mathbf{x}_i` is an data point, :math:`\mathbf{x}_j` is one -of its k nearest neighbors sharing the same label, and :math:`\mathbf{x}_l` +where :math:`\mathbf{x}_i` is a data point, :math:`\mathbf{x}_j` is one +of its k-nearest neighbors sharing the same label, and :math:`\mathbf{x}_l` are all the other instances within that region with different labels, :math:`\eta_{ij}, y_{ij} \in \{0, 1\}` are both the indicators, -:math:`\eta_{ij}` represents :math:`\mathbf{x}_{j}` is the k nearest -neighbors(with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0` -indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, +:math:`\eta_{ij}` represents :math:`\mathbf{x}_{j}` is the k-nearest +neighbors (with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0` +indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, :math:`[\cdot]_+=\max(0, \cdot)` is the Hinge loss. .. topic:: Example Code: @@ -235,7 +235,7 @@ the sum of probability of being correctly classified: Local Fisher Discriminant Analysis (:py:class:`LFDA `) -`LFDA` is a linear supervised dimensionality reduction method. It is +`LFDA` is a linear supervised dimensionality reduction method which effectively combines the ideas of `Linear Discriminant Analysis ` and Locality-Preserving Projection . It is particularly useful when dealing with multi-modality, where one ore more classes consist of separate clusters in input space. The core optimization problem of LFDA is solved as a generalized eigenvalue problem. @@ -261,18 +261,18 @@ where \,\,\mathbf{A}_{i,j}(1/n-1/n_l) \qquad y_i = y_j\end{aligned}\right.\\ here :math:`\mathbf{A}_{i,j}` is the :math:`(i,j)`-th entry of the affinity -matrix :math:`\mathbf{A}`:, which can be calculated with local scaling methods. +matrix :math:`\mathbf{A}`:, which can be calculated with local scaling methods, `n` and `n_l` are the total number of points and the number of points per cluster `l` respectively. Then the learning problem becomes derive the LFDA transformation matrix -:math:`\mathbf{T}_{LFDA}`: +:math:`\mathbf{L}_{LFDA}`: .. math:: - \mathbf{T}_{LFDA} = \arg\max_\mathbf{T} - [\text{tr}((\mathbf{T}^T\mathbf{S}^{(w)} - \mathbf{T})^{-1}\mathbf{T}^T\mathbf{S}^{(b)}\mathbf{T})] + \mathbf{L}_{LFDA} = \arg\max_\mathbf{L} + [\text{tr}((\mathbf{L}^T\mathbf{S}^{(w)} + \mathbf{L})^{-1}\mathbf{L}^T\mathbf{S}^{(b)}\mathbf{L})] -That is, it is looking for a transformation matrix :math:`\mathbf{T}` such that +That is, it is looking for a transformation matrix :math:`\mathbf{L}` such that nearby data pairs in the same class are made close and the data pairs in different classes are separated from each other; far apart data pairs in the same class are not imposed to be close. @@ -326,9 +326,9 @@ empirical development. The Gaussian kernel is denoted as: where :math:`d(\cdot, \cdot)` is the squared distance under some metrics, here in the fashion of Mahalanobis, it should be :math:`d(\mathbf{x}_i, -\mathbf{x}_j) = ||\mathbf{A}(\mathbf{x}_i - \mathbf{x}_j)||`, the transition -matrix :math:`\mathbf{A}` is derived from the decomposition of Mahalanobis -matrix :math:`\mathbf{M=A^TA}`. +\mathbf{x}_j) = ||\mathbf{L}(\mathbf{x}_i - \mathbf{x}_j)||`, the transition +matrix :math:`\mathbf{L}` is derived from the decomposition of Mahalanobis +matrix :math:`\mathbf{M=L^TL}`. Since :math:`\sigma^2` can be integrated into :math:`d(\cdot)`, we can set :math:`\sigma^2=1` for the sake of simplicity. Here we use the cumulative diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 1bd096c7..cf313ba1 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -367,36 +367,36 @@ other methods, `ITML` does not rely on an eigenvalue computation or semi-definite programming. -Given a Mahalanobis distance parameterized by :math:`A`, its corresponding +Given a Mahalanobis distance parameterized by :math:`M`, its corresponding multivariate Gaussian is denoted as: .. math:: - p(\mathbf{x}; \mathbf{A}) = \frac{1}{Z}\exp(-\frac{1}{2}d_\mathbf{A} + p(\mathbf{x}; \mathbf{M}) = \frac{1}{Z}\exp(-\frac{1}{2}d_\mathbf{M} (\mathbf{x}, \mu)) - = \frac{1}{Z}\exp(-\frac{1}{2}((\mathbf{x} - \mu)^T\mathbf{A} + = \frac{1}{Z}\exp(-\frac{1}{2}((\mathbf{x} - \mu)^T\mathbf{M} (\mathbf{x} - \mu)) where :math:`Z` is the normalization constant, the inverse of Mahalanobis -matrix :math:`\mathbf{A}^{-1}` is the covariance of the Gaussian. +matrix :math:`\mathbf{M}^{-1}` is the covariance of the Gaussian. Given pairs of similar points :math:`S` and pairs of dissimilar points :math:`D`, the distance metric learning problem is to minimize the LogDet divergence, which is equivalent as minimizing :math:`\textbf{KL}(p(\mathbf{x}; -\mathbf{A}_0) || p(\mathbf{x}; \mathbf{A}))`: +\mathbf{M}_0) || p(\mathbf{x}; \mathbf{M}))`: .. math:: - \min_\mathbf{A} D_{\ell \mathrm{d}}\left(A, A_{0}\right) = - \operatorname{tr}\left(A A_{0}^{-1}\right)-\log \operatorname{det} - \left(A A_{0}^{-1}\right)-n\\ - \text{subject to } \quad d_\mathbf{A}(\mathbf{x}_i, \mathbf{x}_j) + \min_\mathbf{A} D_{\ell \mathrm{d}}\left(M, M_{0}\right) = + \operatorname{tr}\left(M M_{0}^{-1}\right)-\log \operatorname{det} + \left(M M_{0}^{-1}\right)-n\\ + \text{subject to } \quad d_\mathbf{M}(\mathbf{x}_i, \mathbf{x}_j) \leq u \qquad (\mathbf{x}_i, \mathbf{x}_j)\in S \\ - d_\mathbf{A}(\mathbf{x}_i, \mathbf{x}_j) \geq l \qquad (\mathbf{x}_i, + d_\mathbf{M}(\mathbf{x}_i, \mathbf{x}_j) \geq l \qquad (\mathbf{x}_i, \mathbf{x}_j)\in D where :math:`u` and :math:`l` is the upper and the lower bound of distance -for similar and dissimilar pairs respectively, and :math:`\mathbf{A}_0` +for similar and dissimilar pairs respectively, and :math:`\mathbf{M}_0` is the prior distance metric, set to identity matrix by default, :math:`D_{\ell \mathrm{d}}(\cdot)` is the log determinant. @@ -518,17 +518,14 @@ as the Mahalanobis matrix. from metric_learn import RCA - pairs = [[[1.2, 7.5], [1.3, 1.5]], - [[6.4, 2.6], [6.2, 9.7]], - [[1.3, 4.5], [3.2, 4.6]], - [[6.2, 5.5], [5.4, 5.4]]] - y = [1, 1, -1, -1] - - # in this task we want points where the first feature is close to be closer - # to each other, no matter how close the second feature is + X = [[-0.05, 3.0],[0.05, -3.0], + [0.1, -3.55],[-0.1, 3.55], + [-0.95, -0.05],[0.95, 0.05], + [0.4, 0.05],[-0.4, -0.05]] + chunks = [0, 0, 1, 1, 2, 2, 3, 3] rca = RCA() - rca.fit(pairs, y) + rca.fit(X, chunks) .. topic:: References: diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 047fcc1e..014d9af3 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -175,7 +175,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # ITML uses a regularizer that automatically enforces a Semi-Definite # Positive Matrix condition - the LogDet divergence. It uses soft -# must-link or cannot like constraints, and a simple algorithm based on +# must-link or cannot-link constraints, and a simple algorithm based on # Bregman projections. Unlike LMNN, ITML will implicitly enforce points from # the same class to belong to the same cluster, as you can see below. # diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 0c8087df..50eb41a4 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -198,13 +198,16 @@ class ITML(_BaseITML, _PairsClassifierMixin): Examples -------- - >>> from metric_learn import ITML_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> itml = ITML_Supervised(num_constraints=200) - >>> itml.fit(X, Y) + >>> from metric_learn import ITML + >>> pairs = [[[1.2, 7.5], [1.3, 1.5]], + >>> [[6.4, 2.6], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6]], + >>> [[6.2, 5.5], [5.4, 5.4]]] + >>> y = [1, 1, -1, -1] + >>> # in this task we want points where the first feature is close to be + >>> # closer to each other, no matter how close the second feature is + >>> itml = ITML() + >>> itml.fit(pairs, y) References ---------- @@ -335,6 +338,16 @@ class ITML_Supervised(_BaseITML, TransformerMixin): The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `components_from_metric`.) + Examples + -------- + >>> from metric_learn import ITML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> itml = ITML_Supervised(num_constraints=200) + >>> itml.fit(X, Y) + See Also -------- metric_learn.ITML : The original weakly-supervised algorithm diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index b9df3825..7989d0b9 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -186,13 +186,15 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Examples -------- - >>> from metric_learn import LSML_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> lsml = LSML_Supervised(num_constraints=200) - >>> lsml.fit(X, Y) + >>> from metric_learn import LSML + >>> quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], + >>> [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], + >>> [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] + >>> # we want to make closer points where the first feature is close, and + >>> # further if the second feature is close + >>> lsml = LSML() + >>> lsml.fit(quadruplets) References ---------- @@ -290,6 +292,16 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): prior. In any case, `random_state` is also used to randomly sample constraints from labels. + Examples + -------- + >>> from metric_learn import LSML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lsml = LSML_Supervised(num_constraints=200) + >>> lsml.fit(X, Y) + Attributes ---------- n_iter_ : `int` diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 622beb25..981bec48 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -426,13 +426,16 @@ class MMC(_BaseMMC, _PairsClassifierMixin): Examples -------- - >>> from metric_learn import MMC_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> mmc = MMC_Supervised(num_constraints=200) - >>> mmc.fit(X, Y) + >>> from metric_learn import MMC + >>> pairs = [[[1.2, 7.5], [1.3, 1.5]], + >>> [[6.4, 2.6], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6]], + >>> [[6.2, 5.5], [5.4, 5.4]]] + >>> y = [1, 1, -1, -1] + >>> # in this task we want points where the first feature is close to be + >>> # closer to each other, no matter how close the second feature is + >>> mmc = MMC() + >>> mmc.fit(pairs, y) References ---------- @@ -552,6 +555,16 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): samples, and pairs of dissimilar samples by taking different class samples. It then passes these pairs to `MMC` for training. + Examples + -------- + >>> from metric_learn import MMC_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> mmc = MMC_Supervised(num_constraints=200) + >>> mmc.fit(X, Y) + Attributes ---------- n_iter_ : `int` diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 204bd360..060a797d 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -62,13 +62,14 @@ class RCA(MahalanobisMixin, TransformerMixin): Examples -------- - >>> from metric_learn import RCA_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) - >>> rca.fit(X, Y) + >>> from metric_learn import RCA + >>> X = [[-0.05, 3.0],[0.05, -3.0], + >>> [0.1, -3.55],[-0.1, 3.55], + >>> [-0.95, -0.05],[0.95, 0.05], + >>> [0.4, 0.05],[-0.4, -0.05]] + >>> chunks = [0, 0, 1, 1, 2, 2, 3, 3] + >>> rca = RCA() + >>> rca.fit(X, chunks) References ------------------ @@ -196,6 +197,16 @@ class RCA_Supervised(RCA): A pseudo random number generator object or a seed for it if int. It is used to randomly sample constraints from labels. + Examples + -------- + >>> from metric_learn import RCA_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) + >>> rca.fit(X, Y) + Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) From 2380f5196b7fba552aa29d77dc45fe7383191025 Mon Sep 17 00:00:00 2001 From: Robin Vogel Date: Fri, 24 Jan 2020 14:11:49 +0100 Subject: [PATCH 155/210] Corrects the forgotten bits of PR #267 (#269) * maj * maj * corrected PR 267 * trailing whitespace * test calibrate_threshold, test predict * maj * Checks estimator is fitted before set threshold * correct failed tests with MockBadClassifier * remove checks * forgot one * missed one check_is_fitted * sklearn changed the assumptions behind check_is_fitted --- metric_learn/base_metric.py | 10 +++++++++- test/test_pairs_classifiers.py | 27 +++++++++++++++++++++++---- test/test_utils.py | 4 ++++ 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 427fcf86..ee73c793 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -93,6 +93,8 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', The checked input labels array. """ self._check_preprocessor() + + check_is_fitted(self, ['preprocessor_']) return check_input(X, y, type_of_inputs=type_of_inputs, preprocessor=self.preprocessor_, @@ -215,6 +217,7 @@ def score_pairs(self, pairs): :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. """ + check_is_fitted(self, ['preprocessor_']) pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=2) @@ -336,8 +339,10 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ + check_is_fitted(self, 'preprocessor_') + if "threshold_" not in vars(self): - msg = ("A threshold for this estimator has not been set," + msg = ("A threshold for this estimator has not been set, " "call its set_threshold or calibrate_threshold method.") raise AttributeError(msg) return 2 * (- self.decision_function(pairs) <= self.threshold_) - 1 @@ -414,6 +419,8 @@ def set_threshold(self, threshold): self : `_PairsClassifier` The pairs classifier with the new threshold set. """ + check_is_fitted(self, 'preprocessor_') + self.threshold_ = threshold return self @@ -476,6 +483,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', -------- sklearn.calibration : scikit-learn's module for calibrating classifiers """ + check_is_fitted(self, 'preprocessor_') self._validate_calibration_params(strategy, min_rate, beta) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 840cd151..6c71abcd 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -66,14 +66,31 @@ def test_predict_monotonous(estimator, build_dataset, ids=ids_pairs_learners) def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with_preprocessor): - """Test that a NotFittedError is raised if someone tries to predict and - the metric learner has not been fitted.""" + """Test that a NotFittedError is raised if someone tries to use + score_pairs, decision_function, get_metric, transform or + get_mahalanobis_matrix on input data and the metric learner + has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.score_pairs(input_data) with pytest.raises(NotFittedError): estimator.decision_function(input_data) + with pytest.raises(NotFittedError): + estimator.get_metric() + with pytest.raises(NotFittedError): + estimator.transform(input_data) + with pytest.raises(NotFittedError): + estimator.get_mahalanobis_matrix() + with pytest.raises(NotFittedError): + estimator.calibrate_threshold(input_data, labels) + + with pytest.raises(NotFittedError): + estimator.set_threshold(0.5) + with pytest.raises(NotFittedError): + estimator.predict(input_data) @pytest.mark.parametrize('calibration_params', @@ -138,7 +155,8 @@ def fit(self, pairs, y): def test_unset_threshold(): - # test that set_threshold indeed sets the threshold + """Tests that the "threshold is unset" error is raised when using predict + (performs binary classification on pairs) with an unset threshold.""" identity_pairs_classifier = IdentityPairsClassifier() pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) y = np.array([1, 1, -1, -1]) @@ -146,7 +164,7 @@ def test_unset_threshold(): with pytest.raises(AttributeError) as e: identity_pairs_classifier.predict(pairs) - expected_msg = ("A threshold for this estimator has not been set," + expected_msg = ("A threshold for this estimator has not been set, " "call its set_threshold or calibrate_threshold method.") assert str(e.value) == expected_msg @@ -362,6 +380,7 @@ class MockBadPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): """ def fit(self, pairs, y, calibration_params=None): + self.preprocessor_ = 'not used' self.components_ = 'not used' self.calibrate_threshold(pairs, y, **(calibration_params if calibration_params is not None else diff --git a/test/test_utils.py b/test/test_utils.py index 3092e168..76be5817 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -749,6 +749,8 @@ def test_array_like_indexer_array_like_valid_classic(input_data, indices): """Checks that any array-like is valid in the 'preprocessor' argument, and in the indices, for a classic input""" class MockMetricLearner(MahalanobisMixin): + def fit(self): + pass pass mock_algo = MockMetricLearner(preprocessor=input_data) @@ -763,6 +765,8 @@ def test_array_like_indexer_array_like_valid_tuples(input_data, indices): """Checks that any array-like is valid in the 'preprocessor' argument, and in the indices, for a classic input""" class MockMetricLearner(MahalanobisMixin): + def fit(self): + pass pass mock_algo = MockMetricLearner(preprocessor=input_data) From e7392399dfa3d64a87eb31dfdfc532ada310ea59 Mon Sep 17 00:00:00 2001 From: Gabriel Rudloff Date: Tue, 4 Feb 2020 11:10:21 +0100 Subject: [PATCH 156/210] Fix covariance initialization when matrix is not invertible (#277) * Fix covariance init when matrix is not invertible * replaced import scipy for only required functions * Change inv for pseudo-inv on custom matrix init * Change from EVD to SVD * Roll back to EVD and pseudo inverse of EVD * Fix non-ASCII char * rephrasing warnings * added tests * more rephrasing * fix test * add test * fixes & adds singular pinv test fron eig * fix tolerance of assert * fix tolerance of assert * fix tolerance of assert * fix random seed * isolate random seed setting --- metric_learn/_util.py | 62 +++++++++++++++++++++++---- test/test_mahalanobis_mixin.py | 77 +++++++++++++++++++++++++++++++--- test/test_utils.py | 27 +++++++++++- 3 files changed, 151 insertions(+), 15 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index b476e70b..fa196a69 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -1,5 +1,4 @@ import numpy as np -import scipy import six from numpy.linalg import LinAlgError from sklearn.datasets import make_spd_matrix @@ -8,9 +7,10 @@ from sklearn.utils.validation import check_X_y, check_random_state from .exceptions import PreprocessorError, NonPSDError from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from scipy.linalg import pinvh +from scipy.linalg import pinvh, eigh import sys import time +import warnings # hack around lack of axis kwarg in older numpy versions try: @@ -678,17 +678,20 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, random_state = check_random_state(random_state) M = init - if isinstance(init, np.ndarray): - s, u = scipy.linalg.eigh(init) - init_is_definite = _check_sdp_from_eigen(s) + if isinstance(M, np.ndarray): + w, V = eigh(M, check_finite=False) + init_is_definite = _check_sdp_from_eigen(w) if strict_pd and not init_is_definite: raise LinAlgError("You should provide a strictly positive definite " "matrix as `{}`. This one is not definite. Try another" " {}, or an algorithm that does not " "require the {} to be strictly positive definite." .format(*((matrix_name,) * 3))) + elif return_inverse and not init_is_definite: + warnings.warn('The initialization matrix is not invertible: ' + 'using the pseudo-inverse instead.') if return_inverse: - M_inv = np.dot(u / s, u.T) + M_inv = _pseudo_inverse_from_eig(w, V) return M, M_inv else: return M @@ -707,15 +710,23 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, X = input # atleast2d is necessary to deal with scalar covariance matrices M_inv = np.atleast_2d(np.cov(X, rowvar=False)) - s, u = scipy.linalg.eigh(M_inv) - cov_is_definite = _check_sdp_from_eigen(s) + w, V = eigh(M_inv, check_finite=False) + cov_is_definite = _check_sdp_from_eigen(w) if strict_pd and not cov_is_definite: raise LinAlgError("Unable to get a true inverse of the covariance " "matrix since it is not definite. Try another " "`{}`, or an algorithm that does not " "require the `{}` to be strictly positive definite." .format(*((matrix_name,) * 2))) - M = np.dot(u / s, u.T) + elif not cov_is_definite: + warnings.warn('The covariance matrix is not invertible: ' + 'using the pseudo-inverse instead.' + 'To make the covariance matrix invertible' + ' you can remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' + 'for instance using `sklearn.decomposition.PCA` as a ' + 'preprocessing step.') + M = _pseudo_inverse_from_eig(w, V) if return_inverse: return M, M_inv else: @@ -742,3 +753,36 @@ def _check_n_components(n_features, n_components): if 0 < n_components <= n_features: return n_components raise ValueError('Invalid n_components, must be in [1, %d]' % n_features) + + +def _pseudo_inverse_from_eig(w, V, tol=None): + """Compute the (Moore-Penrose) pseudo-inverse of the EVD of a symetric + matrix. + + Parameters + ---------- + w : (..., M) ndarray + The eigenvalues in ascending order, each repeated according to + its multiplicity. + + v : {(..., M, M) ndarray, (..., M, M) matrix} + The column ``v[:, i]`` is the normalized eigenvector corresponding + to the eigenvalue ``w[i]``. Will return a matrix object if `a` is + a matrix object. + + tol : positive `float`, optional + Absolute eigenvalues below tol are considered zero. + + Returns + ------- + output : (..., M, N) array_like + The pseudo-inverse given by the EVD. + """ + if tol is None: + tol = np.amax(w) * np.max(w.shape) * np.finfo(w.dtype).eps + # discard small eigenvalues and invert the rest + large = np.abs(w) > tol + w = np.divide(1, w, where=large, out=w) + w[~large] = 0 + + return np.dot(V * w, np.conjugate(V).T) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 91aa129a..91fb435f 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -8,12 +8,12 @@ from scipy.stats import ortho_group from sklearn import clone from sklearn.cluster import DBSCAN -from sklearn.datasets import make_spd_matrix -from sklearn.utils import check_random_state +from sklearn.datasets import make_spd_matrix, make_blobs +from sklearn.utils import check_random_state, shuffle from sklearn.utils.multiclass import type_of_target from sklearn.utils.testing import set_random_state -from metric_learn._util import make_context +from metric_learn._util import make_context, _initialize_metric_mahalanobis from metric_learn.base_metric import (_QuadrupletsClassifierMixin, _PairsClassifierMixin) from metric_learn.exceptions import NonPSDError @@ -569,7 +569,7 @@ def test_init_mahalanobis(estimator, build_dataset): in zip(ids_metric_learners, metric_learners) if idml[:4] in ['ITML', 'SDML', 'LSML']]) -def test_singular_covariance_init_or_prior(estimator, build_dataset): +def test_singular_covariance_init_or_prior_strictpd(estimator, build_dataset): """Tests that when using the 'covariance' init or prior, it returns the appropriate error if the covariance matrix is singular, for algorithms that need a strictly PD prior or init (see @@ -603,6 +603,48 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): assert str(raised_err.value) == msg +@pytest.mark.integration +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if idml[:3] in ['MMC']], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if idml[:3] in ['MMC']]) +def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset): + """Tests that when using the 'covariance' init or prior, it returns the + appropriate warning if the covariance matrix is singular, for algorithms + that don't need a strictly PD init. Also checks that the returned + inverse matrix has finite values + """ + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + # We create a feature that is a linear combination of the first two + # features: + input_data = np.concatenate([input_data, input_data[:, ..., :2].dot([[2], + [3]])], + axis=-1) + model.set_params(init='covariance') + msg = ('The covariance matrix is not invertible: ' + 'using the pseudo-inverse instead.' + 'To make the covariance matrix invertible' + ' you can remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' + 'for instance using `sklearn.decomposition.PCA` as a ' + 'preprocessing step.') + with pytest.warns(UserWarning) as raised_warning: + model.fit(input_data, labels) + assert np.any([str(warning.message) == msg for warning in raised_warning]) + M, _ = _initialize_metric_mahalanobis(X, init='covariance', + random_state=RNG, + return_inverse=True, + strict_pd=False) + assert np.isfinite(M).all() + + @pytest.mark.integration @pytest.mark.parametrize('estimator, build_dataset', [(ml, bd) for idml, (ml, bd) @@ -614,7 +656,7 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): metric_learners) if idml[:4] in ['ITML', 'SDML', 'LSML']]) @pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) -def test_singular_array_init_or_prior(estimator, build_dataset, w0): +def test_singular_array_init_or_prior_strictpd(estimator, build_dataset, w0): """Tests that when using a custom array init (or prior), it returns the appropriate error if it is singular, for algorithms that need a strictly PD prior or init (see @@ -654,6 +696,31 @@ def test_singular_array_init_or_prior(estimator, build_dataset, w0): assert str(raised_err.value) == msg +@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) +def test_singular_array_init_of_non_strict_pd(w0): + """Tests that when using a custom array init, it returns the + appropriate warning if it is singular. Also checks if the returned + inverse matrix is finite. This isn't checked for model fitting as no + model curently uses this setting. + """ + rng = np.random.RandomState(42) + X, y = shuffle(*make_blobs(random_state=rng), + random_state=rng) + P = ortho_group.rvs(X.shape[1], random_state=rng) + w = np.abs(rng.randn(X.shape[1])) + w[0] = w0 + M = P.dot(np.diag(w)).dot(P.T) + msg = ('The initialization matrix is not invertible: ' + 'using the pseudo-inverse instead.') + with pytest.warns(UserWarning) as raised_warning: + _, M_inv = _initialize_metric_mahalanobis(X, init=M, + random_state=rng, + return_inverse=True, + strict_pd=False) + assert str(raised_warning[0].message) == msg + assert np.isfinite(M_inv).all() + + @pytest.mark.integration @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) diff --git a/test/test_utils.py b/test/test_utils.py index 76be5817..2510ed89 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,5 @@ import pytest +from scipy.linalg import eigh, pinvh from collections import namedtuple import numpy as np from numpy.testing import assert_array_equal, assert_equal @@ -11,7 +12,7 @@ check_collapsed_pairs, validate_vector, _check_sdp_from_eigen, _check_n_components, check_y_valid_values_for_pairs, - _auto_select_init) + _auto_select_init, _pseudo_inverse_from_eig) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -1150,3 +1151,27 @@ def test__auto_select_init(has_classes, n_features, n_samples, n_components, """Checks that the auto selection of the init works as expected""" assert (_auto_select_init(has_classes, n_features, n_samples, n_components, n_classes) == result) + + +@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) +def test_pseudo_inverse_from_eig_and_pinvh_singular(w0): + """Checks that _pseudo_inverse_from_eig returns the same result as + scipy.linalg.pinvh for a singular matrix""" + rng = np.random.RandomState(SEED) + A = rng.rand(100, 100) + A = A + A.T + w, V = eigh(A) + w[0] = w0 + A = V.dot(np.diag(w)).dot(V.T) + np.testing.assert_allclose(_pseudo_inverse_from_eig(w, V), pinvh(A), + rtol=1e-05) + + +def test_pseudo_inverse_from_eig_and_pinvh_nonsingular(): + """Checks that _pseudo_inverse_from_eig returns the same result as + scipy.linalg.pinvh for a non singular matrix""" + rng = np.random.RandomState(SEED) + A = rng.rand(100, 100) + A = A + A.T + w, V = eigh(A, check_finite=False) + np.testing.assert_allclose(_pseudo_inverse_from_eig(w, V), pinvh(A)) From 127604081d45c9ad9cef667f8178888add6a92d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Wed, 4 Mar 2020 15:19:17 +0100 Subject: [PATCH 157/210] [MRG] Improve docstrings: add them for Constraints class and methods and fix minor problems (#280) * add docstrings for constraints class, pairs and chunks methods * fix missing optional values and descriptions, uniformize * fix indentation problems in docstring and uniformize --- metric_learn/_util.py | 118 ++++++++--------- metric_learn/base_metric.py | 12 +- metric_learn/constraints.py | 74 ++++++++++- metric_learn/itml.py | 251 ++++++++++++++++++----------------- metric_learn/lfda.py | 29 ++--- metric_learn/lmnn.py | 135 ++++++++++--------- metric_learn/lsml.py | 188 +++++++++++++++------------ metric_learn/mlkr.py | 101 +++++++-------- metric_learn/mmc.py | 252 +++++++++++++++++++----------------- metric_learn/nca.py | 103 ++++++++------- metric_learn/rca.py | 53 ++++---- metric_learn/sdml.py | 174 +++++++++++++------------ 12 files changed, 804 insertions(+), 686 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index fa196a69..77e8d9fa 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -448,45 +448,45 @@ def _initialize_components(n_components, input, y=None, init='auto', The input labels (or not if there are no labels). init : string or numpy array, optional (default='auto') - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda' (see - the description of 'lda' init), as it uses labels information. If - not, but ``n_components < min(n_features, n_samples)``, we use 'pca', - as it projects data onto meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`). - This initialization is possible only if `has_classes == True`. - - 'identity' - The identity matrix. If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda' (see + the description of 'lda' init), as it uses labels information. If + not, but ``n_components < min(n_features, n_samples)``, we use 'pca', + as it projects data onto meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`). + This initialization is possible only if `has_classes == True`. + + 'identity' + The identity matrix. If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. verbose : bool Whether to print the details of the initialization or not. @@ -606,26 +606,26 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, The input samples (can be tuples or regular samples). init : string or numpy array, optional (default='identity') - Specification for the matrix to initialize. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse covariance matrix (raises an error if the - covariance matrix is not definite and `strict_pd == True`) - - 'random' - A random positive definite (PD) matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A PSD matrix (or strictly PD if strict_pd==True) of - shape (n_features, n_features), that will be used as such to - initialize the metric, or set the prior. + Specification for the matrix to initialize. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse covariance matrix (raises an error if the + covariance matrix is not definite and `strict_pd == True`) + + 'random' + A random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A PSD matrix (or strictly PD if strict_pd==True) of + shape (n_features, n_features), that will be used as such to + initialize the metric, or set the prior. random_state : int or `numpy.RandomState` or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index ee73c793..d19998ff 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -154,12 +154,12 @@ def transform(self, X): Parameters ---------- X : (n x d) matrix - Data to transform. + Data to transform. Returns ------- transformed : (n x d) matrix - Input data transformed to the metric space by :math:`XL^{\\top}` + Input data transformed to the metric space by :math:`XL^{\\top}` """ @@ -180,7 +180,7 @@ class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. """ def score_pairs(self, pairs): @@ -313,9 +313,9 @@ class _PairsClassifierMixin(BaseMetricLearner): Attributes ---------- threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ _tuple_size = 2 # number of points in a tuple, 2 for pairs diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 752ca6e0..36d77194 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -12,17 +12,60 @@ class Constraints(object): """ - Class to build constraints from labels. + Class to build constraints from labeled data. - See more in the :ref:`User Guide ` + See more in the :ref:`User Guide `. + + Parameters + ---------- + partial_labels : `numpy.ndarray` of ints, shape=(n_samples,) + Array of labels, with -1 indicating unknown label. + + Attributes + ---------- + partial_labels : `numpy.ndarray` of ints, shape=(n_samples,) + Array of labels, with -1 indicating unknown label. """ + def __init__(self, partial_labels): - '''partial_labels : int arraylike, -1 indicating unknown label''' partial_labels = np.asanyarray(partial_labels, dtype=int) self.partial_labels = partial_labels def positive_negative_pairs(self, num_constraints, same_length=False, random_state=None): + """ + Generates positive pairs and negative pairs from labeled data. + + Positive pairs are formed by randomly drawing ``num_constraints`` pairs of + points with the same label. Negative pairs are formed by randomly drawing + ``num_constraints`` pairs of points with different label. + + In the case where it is not possible to generate enough positive or + negative pairs, a smaller number of pairs will be returned with a warning. + + Parameters + ---------- + num_constraints : int + Number of positive and negative constraints to generate. + same_length : bool, optional (default=False) + If True, forces the number of positive and negative pairs to be + equal by ignoring some pairs from the larger set. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + Returns + ------- + a : array-like, shape=(n_constraints,) + 1D array of indicators for the left elements of positive pairs. + + b : array-like, shape=(n_constraints,) + 1D array of indicators for the right elements of positive pairs. + + c : array-like, shape=(n_constraints,) + 1D array of indicators for the left elements of negative pairs. + + d : array-like, shape=(n_constraints,) + 1D array of indicators for the right elements of negative pairs. + """ random_state = check_random_state(random_state) a, b = self._pairs(num_constraints, same_label=True, random_state=random_state) @@ -60,7 +103,30 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, def chunks(self, num_chunks=100, chunk_size=2, random_state=None): """ - the random state object to be passed must be a numpy random seed + Generates chunks from labeled data. + + Each of ``num_chunks`` chunks is composed of ``chunk_size`` points from + the same class drawn at random. Each point can belong to at most 1 chunk. + + In the case where there is not enough points to generate ``num_chunks`` + chunks of size ``chunk_size``, a ValueError will be raised. + + Parameters + ---------- + num_chunks : int, optional (default=100) + Number of chunks to generate. + + chunk_size : int, optional (default=2) + Number of points in each chunk. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + Returns + ------- + chunks : array-like, shape=(n_samples,) + 1D array of chunk indicators, where -1 indicates that the point does not + belong to any chunk. """ random_state = check_random_state(random_state) chunks = -np.ones_like(self.partial_labels, dtype=int) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 50eb41a4..2094e160 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -126,75 +126,75 @@ class ITML(_BaseITML, _PairsClassifierMixin): Parameters ---------- - gamma : float, optional (default=1.) - Value for slack variables + gamma : float, optional (default=1.0) + Value for slack variables max_iter : int, optional (default=1000) - Maximum number of iteration of the optimization procedure. + Maximum number of iteration of the optimization procedure. convergence_threshold : float, optional (default=1e-3) - Convergence tolerance. + Convergence tolerance. prior : string or numpy array, optional (default='identity') - The Mahalanobis matrix to use as a prior. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). + The Mahalanobis matrix to use as a prior. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The inverse covariance matrix. + 'covariance' + The inverse covariance matrix. - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. verbose : bool, optional (default=False) - If True, prints information while learning + If True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. If - not provided at initialization, bounds_[0] and bounds_[1] are set at - train time to the 5th and 95th percentile of the pairwise distances among - all points present in the input `pairs`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. If + not provided at initialization, bounds_[0] and bounds_[1] are set at + train time to the 5th and 95th percentile of the pairwise distances among + all points present in the input `pairs`. n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. Examples -------- @@ -226,28 +226,28 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): ---------- pairs: array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) - 3D Array of pairs with each row corresponding to two points, - or 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. y: array-like, of shape (n_constraints,) - Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. bounds : array-like of two numbers - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] will be - set to the 5th and 95th percentile of the pairwise distances among all - points present in the input `pairs`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points present in the input `pairs`. calibration_params : `dict` or `None` - Dictionary of parameters to give to `calibrate_threshold` for the - threshold calibration step done at the end of `fit`. If `None` is - given, `calibrate_threshold` will use the default parameters. + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object - Returns the instance. + Returns the instance. """ calibration_params = (calibration_params if calibration_params is not None else dict()) @@ -266,77 +266,88 @@ class ITML_Supervised(_BaseITML, TransformerMixin): Parameters ---------- - gamma : float, optional - value for slack variables - max_iter : int, optional - convergence_threshold : float, optional + gamma : float, optional (default=1.0) + Value for slack variables + + max_iter : int, optional (default=1000) + Maximum number of iterations of the optimization procedure. + + convergence_threshold : float, optional (default=1e-3) + Tolerance of the optimization procedure. + num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - (`20 * num_classes**2` constraints by default) + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + + num_constraints: int, optional (default=None) + Number of constraints to generate. If None, default to `20 * + num_classes**2`. + bounds : Not used - .. deprecated:: 0.5.0 - `bounds` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Set `bounds` at fit time instead : - `itml_supervised.fit(X, y, bounds=...)` + .. deprecated:: 0.5.0 + `bounds` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Set `bounds` at fit time instead : + `itml_supervised.fit(X, y, bounds=...)` prior : string or numpy array, optional (default='identity') - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + A0 : Not used .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional - if True, prints information while learning + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. In any - case, `random_state` is also used to randomly sample constraints from - labels. + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. In any + case, `random_state` is also used to randomly sample constraints from + labels. Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] are set at - train time to the 5th and 95th percentile of the pairwise distances - among all points in the training data `X`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] are set at + train time to the 5th and 95th percentile of the pairwise distances + among all points in the training data `X`. n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) Examples -------- @@ -355,7 +366,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): that describes the supervised version of weakly supervised estimators. """ - def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, + def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3, num_labeled='deprecated', num_constraints=None, bounds='deprecated', prior='identity', A0='deprecated', verbose=False, preprocessor=None, random_state=None): @@ -374,10 +385,10 @@ def fit(self, X, y, random_state='deprecated', bounds=None): Parameters ---------- X : (n x d) matrix - Input data, where each row corresponds to a single instance. + Input data, where each row corresponds to a single instance. y : (n) array-like - Data labels. + Data labels. random_state : Not used .. deprecated:: 0.5.0 @@ -386,13 +397,13 @@ def fit(self, X, y, random_state='deprecated', bounds=None): instead (when instantiating a new `ITML_Supervised` object). bounds : array-like of two numbers - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] will be - set to the 5th and 95th percentile of the pairwise distances among all - points in the training data `X`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points in the training data `X`. """ # TODO: remove these in v0.6.0 if self.num_labeled != 'deprecated': diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 99e7c978..12617a94 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -27,27 +27,26 @@ class LFDA(MahalanobisMixin, TransformerMixin): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). num_dims : Not used + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + k : int, optional (default=None) + Number of nearest neighbors used in local scaling method. If None, + defaults to min(7, n_features - 1). - k : int, optional - Number of nearest neighbors used in local scaling method. - Defaults to min(7, n_components - 1). - - embedding_type : str, optional - Type of metric in the embedding space (default: 'weighted') - 'weighted' - weighted eigenvectors - 'orthonormalized' - orthonormalized - 'plain' - raw eigenvectors + embedding_type : str, optional (default: 'weighted') + Type of metric in the embedding space + 'weighted' - weighted eigenvectors + 'orthonormalized' - orthonormalized + 'plain' - raw eigenvectors preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. Attributes ---------- diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index a1b5a42f..df8fe649 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -28,101 +28,100 @@ class LMNN(MahalanobisMixin, TransformerMixin): Parameters ---------- init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - k : int, optional - Number of neighbors to consider, not including self-edges. + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, and + stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + k : int, optional (default=3) + Number of neighbors to consider, not including self-edges. min_iter : int, optional (default=50) - Minimum number of iterations of the optimization procedure. + Minimum number of iterations of the optimization procedure. max_iter : int, optional (default=1000) - Maximum number of iterations of the optimization procedure. + Maximum number of iterations of the optimization procedure. learn_rate : float, optional (default=1e-7) - Learning rate of the optimization procedure + Learning rate of the optimization procedure tol : float, optional (default=0.001) - Tolerance of the optimization procedure. If the objective value varies - less than `tol`, we consider the algorithm has converged and stop it. + Tolerance of the optimization procedure. If the objective value varies + less than `tol`, we consider the algorithm has converged and stop it. use_pca : Not used - - .. deprecated:: 0.5.0 - `use_pca` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + .. deprecated:: 0.5.0 + `use_pca` was deprecated in version 0.5.0 and will + be removed in 0.6.0. verbose : bool, optional (default=False) - Whether to print the progress of the optimization procedure. + Whether to print the progress of the optimization procedure. - regularization: float, optional - Weighting of pull and push terms, with 0.5 meaning equal weight. + regularization: float, optional (default=0.5) + Relative weight between pull and push terms, with 0.5 meaning equal + weight. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. Examples -------- diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 7989d0b9..c4cdca97 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -140,49 +140,55 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Parameters ---------- prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - tol : float, optional - max_iter : int, optional - verbose : bool, optional - if True, prints information while learning + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + tol : float, optional (default=1e-3) + Convergence tolerance of the optimization procedure. + + max_iter : int, optional (default=1000) + Maximum number of iteration of the optimization procedure. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) Examples -------- @@ -219,18 +225,19 @@ def fit(self, quadruplets, weights=None): ---------- quadruplets : array-like, shape=(n_constraints, 4, n_features) or \ (n_constraints, 4) - 3D array-like of quadruplets of points or 2D array of quadruplets of - indicators. In order to supervise the algorithm in the right way, we - should have the four samples ordered in a way such that: - d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < - n_constraints. + 3D array-like of quadruplets of points or 2D array of quadruplets of + indicators. In order to supervise the algorithm in the right way, we + should have the four samples ordered in a way such that: + d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < + n_constraints. + weights : (n_constraints,) array of floats, optional - scale factor for each constraint + scale factor for each constraint Returns ------- self : object - Returns the instance. + Returns the instance. """ return self._fit(quadruplets, weights=weights) @@ -246,51 +253,60 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): Parameters ---------- tol : float, optional (default=1e-3) - Tolerance for the convergence procedure. + Convergence tolerance of the optimization procedure. + max_iter : int, optional (default=1000) - Number of maximum iterations of the convergence procedure. + Number of maximum iterations of the optimization procedure. + prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + num_labeled : Not used .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - (`20 * num_classes**2` constraints by default) - weights : (m,) array of floats, optional - scale factor for each constraint - verbose : bool, optional - if True, prints information while learning + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + + num_constraints: int, optional (default=None) + Number of constraints to generate. If None, default to `20 * + num_classes**2`. + + weights : (num_constraints,) array of floats, optional (default=None) + Relative weight given to each constraint. If None, defaults to uniform + weights. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. In any case, `random_state` is also used to randomly sample - constraints from labels. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. Examples -------- @@ -305,11 +321,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) """ def __init__(self, tol=1e-3, max_iter=1000, prior=None, @@ -328,10 +344,10 @@ def fit(self, X, y, random_state='deprecated'): Parameters ---------- X : (n x d) matrix - Input data, where each row corresponds to a single instance. + Input data, where each row corresponds to a single instance. y : (n) array-like - Data labels. + Data labels. random_state : Not used .. deprecated:: 0.5.0 diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 471694b6..3199b518 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -33,78 +33,77 @@ class MLKR(MahalanobisMixin, TransformerMixin): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components < min(n_features, n_samples)``, - we use 'pca', as it projects data in meaningful directions (those - of higher variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components < min(n_features, n_samples)``, + we use 'pca', as it projects data in meaningful directions (those + of higher variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. A0: Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. tol: float, optional (default=None) - Convergence tolerance for the optimization. + Convergence tolerance for the optimization. - max_iter: int, optional - Cap on number of conjugate gradient iterations. + max_iter: int, optional (default=1000) + Cap on number of conjugate gradient iterations. verbose : bool, optional (default=False) - Whether to print progress messages or not. + Whether to print progress messages or not. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. Examples -------- diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 981bec48..3769497e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -70,10 +70,10 @@ def _fit_full(self, pairs, y): Parameters ---------- X : (n x d) data matrix - each row corresponds to a single instance + Each row corresponds to a single instance. constraints : 4-tuple of arrays - (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) - dissimilar pairs + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs. """ num_dim = pairs.shape[2] @@ -195,10 +195,10 @@ def _fit_diag(self, pairs, y): Parameters ---------- X : (n x d) data matrix - each row corresponds to a single instance + Each row corresponds to a single instance. constraints : 4-tuple of arrays - (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) - dissimilar pairs + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs. """ num_dim = pairs.shape[2] pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] @@ -352,77 +352,80 @@ class MMC(_BaseMMC, _PairsClassifierMixin): Parameters ---------- max_iter : int, optional (default=100) - Maximum number of iterations of the convergence procedure. + Maximum number of iterations of the optimization procedure. max_proj : int, optional (default=10000) - Maximum number of projection steps. + Maximum number of projection steps. - convergence_threshold : float, optional (default=1e-6) - Convergence threshold for the convergence procedure. + convergence_threshold : float, optional (default=1e-3) + Convergence threshold for the optimization procedure. init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The (pseudo-)inverse of the covariance matrix. + 'covariance' + The (pseudo-)inverse of the covariance matrix. - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - An SPD matrix of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning + numpy array + An SPD matrix of shape (n_features, n_features), that will + be used as such to initialize the metric. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions. The initialization will then - be the diagonal coefficients of the matrix given as 'init'. - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + + diagonal : bool, optional (default=False) + If True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + + diagonal_c : float, optional (default=1.0) + Weight of the dissimilarity constraint for diagonal + metric learning. Ignored if ``diagonal=False``. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. Examples -------- @@ -461,19 +464,22 @@ def fit(self, pairs, y, calibration_params=None): ---------- pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) - 3D Array of pairs with each row corresponding to two points, - or 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y : array-like, of shape (n_constraints,) - Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + calibration_params : `dict` or `None` - Dictionary of parameters to give to `calibrate_threshold` for the - threshold calibration step done at the end of `fit`. If `None` is - given, `calibrate_threshold` will use the default parameters. + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. + Returns ------- self : object - Returns the instance. + Returns the instance. """ calibration_params = (calibration_params if calibration_params is not None else dict()) @@ -492,68 +498,76 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): Parameters ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional + max_iter : int, optional (default=100) + Maximum number of iterations of the optimization procedure. + + max_proj : int, optional (default=10000) + Maximum number of projection steps. + + convergence_threshold : float, optional (default=1e-3) + Convergence threshold for the optimization procedure. + num_labeled : Not used .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - (`20 * num_classes**2` constraints by default) - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. - 'identity' - An identity matrix of shape (n_features, n_features). + num_constraints: int, optional (default=None) + Number of constraints to generate. If None, default to `20 * + num_classes**2`. - 'covariance' - The (pseudo-)inverse of the covariance matrix. + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'covariance' + The (pseudo-)inverse of the covariance matrix. - numpy array - A numpy array of shape (n_features, n_features), that will - be used as such to initialize the metric. + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - verbose : bool, optional - if True, prints information while learning + numpy array + A numpy array of shape (n_features, n_features), that will + be used as such to initialize the metric. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + + diagonal : bool, optional (default=False) + If True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + + diagonal_c : float, optional (default=1.0) + Weight of the dissimilarity constraint for diagonal + metric learning. Ignored if ``diagonal=False``. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. In any case, `random_state` is also used to - randomly sample constraints from labels. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. - `MMC_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `MMC` for training. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + Mahalanobis matrix. In any case, `random_state` is also used to + randomly sample constraints from labels. Examples -------- @@ -568,11 +582,11 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) """ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, @@ -593,9 +607,11 @@ def fit(self, X, y, random_state='deprecated'): Parameters ---------- X : (n x d) matrix - Input data, where each row corresponds to a single instance. + Input data, where each row corresponds to a single instance. + y : (n) array-like - Data labels. + Data labels. + random_state : Not used .. deprecated:: 0.5.0 `random_state` in the `fit` function was deprecated in version 0.5.0 diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 03abdc41..983f1120 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -34,70 +34,69 @@ class NCA(MahalanobisMixin, TransformerMixin): Parameters ---------- init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. max_iter : int, optional (default=100) Maximum number of iterations done by the optimization algorithm. tol : float, optional (default=None) - Convergence tolerance for the optimization. + Convergence tolerance for the optimization. verbose : bool, optional (default=False) Whether to print progress messages or not. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. Examples -------- @@ -114,10 +113,10 @@ class NCA(MahalanobisMixin, TransformerMixin): Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. References ---------- diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 060a797d..f3a2ac89 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -43,22 +43,21 @@ class RCA(MahalanobisMixin, TransformerMixin): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. pca_comps : Not used - .. deprecated:: 0.5.0 + .. deprecated:: 0.5.0 `pca_comps` was deprecated in version 0.5.0 and will be removed in 0.6.0. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. Examples -------- @@ -82,7 +81,7 @@ class RCA(MahalanobisMixin, TransformerMixin): Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. """ def __init__(self, n_components=None, num_dims='deprecated', @@ -112,10 +111,11 @@ def fit(self, X, chunks): Parameters ---------- data : (n x d) data matrix - Each row corresponds to a single instance + Each row corresponds to a single instance + chunks : (n,) array of ints - When ``chunks[i] == -1``, point i doesn't belong to any chunklet. - When ``chunks[i] == j``, point i belongs to chunklet j. + When ``chunks[i] == -1``, point i doesn't belong to any chunklet. + When ``chunks[i] == j``, point i belongs to chunklet j. """ if self.num_dims != 'deprecated': warnings.warn('"num_dims" parameter is not used.' @@ -177,25 +177,26 @@ class RCA_Supervised(RCA): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). num_dims : Not used + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - num_chunks: int, optional + num_chunks: int, optional (default=100) + Number of chunks to generate. - chunk_size: int, optional + chunk_size: int, optional (default=2) + Number of points per chunk. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. - It is used to randomly sample constraints from labels. + A pseudo random number generator object or a seed for it if int. + It is used to randomly sample constraints from labels. Examples -------- @@ -210,7 +211,7 @@ class RCA_Supervised(RCA): Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. """ def __init__(self, num_dims='deprecated', n_components=None, @@ -230,8 +231,10 @@ def fit(self, X, y, random_state='deprecated'): Parameters ---------- X : (n x d) data matrix - each row corresponds to a single instance + each row corresponds to a single instance + y : (n) data labels + random_state : Not used .. deprecated:: 0.5.0 `random_state` in the `fit` function was deprecated in version 0.5.0 diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 21fadd74..944739f2 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -142,62 +142,62 @@ class SDML(_BaseSDML, _PairsClassifierMixin): Parameters ---------- - balance_param : float, optional - trade off between sparsity and M0 prior + balance_param : float, optional (default=0.5) + Trade off between sparsity and M0 prior. - sparsity_param : float, optional - trade off between optimizer and sparseness (see graph_lasso) + sparsity_param : float, optional (default=0.01) + Trade off between optimizer and sparseness (see graph_lasso). prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The inverse covariance matrix. + 'covariance' + The inverse covariance matrix. - 'random' - The prior will be a random positive definite (PD) matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + The prior will be a random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. verbose : bool, optional (default=False) - if True, prints information while learning + If True, prints information while learning. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. Attributes ---------- components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. Examples -------- @@ -231,20 +231,22 @@ def fit(self, pairs, y, calibration_params=None): ---------- pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) - 3D Array of pairs with each row corresponding to two points, - or 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y : array-like, of shape (n_constraints,) - Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + calibration_params : `dict` or `None` - Dictionary of parameters to give to `calibrate_threshold` for the - threshold calibration step done at the end of `fit`. If `None` is - given, `calibrate_threshold` will use the default parameters. + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object - Returns the instance. + Returns the instance. """ calibration_params = (calibration_params if calibration_params is not None else dict()) @@ -264,61 +266,67 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): Parameters ---------- balance_param : float, optional (default=0.5) - trade off between sparsity and M0 prior + Trade off between sparsity and M0 prior. + sparsity_param : float, optional (default=0.01) - trade off between optimizer and sparseness (see graph_lasso) + Trade off between optimizer and sparseness (see graph_lasso). + prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The inverse covariance matrix. + 'covariance' + The inverse covariance matrix. - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. num_labeled : Not used .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints : int, optional (default=None) - number of constraints to generate - (`20 * num_classes**2` constraints by default) + Number of constraints to generate. If None, defaults to `20 * + num_classes**2`. + verbose : bool, optional (default=False) - if True, prints information while learning + If True, prints information while learning. + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. In any case, `random_state` is also used to randomly sample - constraints from labels. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. Attributes ---------- components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) See Also -------- @@ -344,9 +352,11 @@ def fit(self, X, y, random_state='deprecated'): Parameters ---------- X : array-like, shape (n, d) - data matrix, where each row corresponds to a single instance + data matrix, where each row corresponds to a single instance + y : array-like, shape (n,) - data labels, one for each instance + data labels, one for each instance + random_state : Not used .. deprecated:: 0.5.0 `random_state` in the `fit` function was deprecated in version 0.5.0 @@ -356,7 +366,7 @@ def fit(self, X, y, random_state='deprecated'): Returns ------- self : object - Returns the instance. + Returns the instance. """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' From 833e1861a96e193ab4cbcc2c67b4ff09ac02a8e5 Mon Sep 17 00:00:00 2001 From: Gabriel Rudloff Date: Wed, 4 Mar 2020 17:15:28 +0100 Subject: [PATCH 158/210] [MRG] Learning on Triplets (#279) * add _TripletsClassifierMixin * added doc * remove redundant code * added tests * triplets added to doc autosumary * rephrasing, added docstring and small changes * small rephrasing * small flake8 fix * Handle low number of neighbors for knn triplets * add tests for knn triplet generation * fixed typos and rephrasing * added more tests for knn triplet construction * sorted triplet & fix test_generate_knntriplets_k * added over the edge knn triplets test * multiple small code refactoring * more refactoring * Fix & test unlabeled handling triplet generation * closer unlabeled point * small clarity enhancement & repmat replacement --- doc/metric_learn.rst | 1 + doc/weakly_supervised.rst | 139 +++++++++++++++++++++++++----- metric_learn/base_metric.py | 88 ++++++++++++++++++- metric_learn/constraints.py | 119 +++++++++++++++++++++++++ test/test_constraints.py | 102 ++++++++++++++++++++++ test/test_triplets_classifiers.py | 66 ++++++++++++++ test/test_utils.py | 35 ++++++++ 7 files changed, 526 insertions(+), 24 deletions(-) create mode 100644 test/test_triplets_classifiers.py diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 930404d0..76c91f48 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -14,6 +14,7 @@ Base Classes metric_learn.Constraints metric_learn.base_metric.BaseMetricLearner metric_learn.base_metric._PairsClassifierMixin + metric_learn.base_metric._TripletsClassifierMixin metric_learn.base_metric._QuadrupletsClassifierMixin Supervised Learning Algorithms diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index cf313ba1..72f68627 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -592,6 +592,114 @@ points, while constrains the sum of distances between dissimilar points: -with-side-information.pdf>`_. NIPS 2002 .. [2] Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz +.. _learning_on_triplets: + +Learning on triplets +==================== + +Some metric learning algorithms learn on triplets of samples. In this case, +one should provide the algorithm with `n_samples` triplets of points. The +semantic of each triplet is that the first point should be closer to the +second point than to the third one. + +Fitting +------- +Here is an example for fitting on triplets (see :ref:`fit_ws` for more +details on the input data format and how to fit, in the general case of +learning on tuples). + +>>> from metric_learn import SCML +>>> triplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.1, 0.6]], +>>> [[4.5, 2.3], [2.1, 2.3], [7.3, 3.4]]]) +>>> scml = SCML(random_state=42) +>>> scml.fit(triplets) +SCML(beta=1e-5, B=None, max_iter=100000, verbose=False, + preprocessor=None, random_state=None) + +Or alternatively (using a preprocessor): + +>>> X = np.array([[[1.2, 3.2], +>>> [2.3, 5.5], +>>> [2.1, 0.6], +>>> [4.5, 2.3], +>>> [2.1, 2.3], +>>> [7.3, 3.4]]) +>>> triplets_indices = np.array([[0, 1, 2], [3, 4, 5]]) +>>> scml = SCML(preprocessor=X, random_state=42) +>>> scml.fit(triplets_indices) +SCML(beta=1e-5, B=None, max_iter=100000, verbose=False, + preprocessor=array([[1.2, 3.2], + [2.3, 5.5], + [2.4, 6.7], + [2.1, 0.6], + [4.5, 2.3], + [2.1, 2.3], + [0.6, 1.2], + [7.3, 3.4]]), + random_state=None) + + +Here, we want to learn a metric that, for each of the two +`triplets`, will make the first point closer to the +second point than to the third one. + +.. _triplets_predicting: + +Prediction +---------- + +When a triplets learner is fitted, it is also able to predict, for an +upcoming triplet, whether the first point is closer to the second point +than to the third one (+1), or not (-1). + +>>> triplets_test = np.array( +... [[[5.6, 5.3], [2.2, 2.1], [1.2, 3.4]], +... [[6.0, 4.2], [4.3, 1.2], [0.1, 7.8]]]) +>>> scml.predict(triplets_test) +array([-1., 1.]) + +.. _triplets_scoring: + +Scoring +------- + +Triplet metric learners can also return a `decision_function` for a set of triplets, +which corresponds to the distance between the first two points minus the distance +between the first and last points of the triplet (the higher the value, the more +similar the first point to the second point compared to the last one). This "score" +can be interpreted as a measure of likeliness of having a +1 prediction for this +triplet. + +>>> scml.decision_function(triplets_test) +array([-1.75700306, 4.98982131]) + +In the above example, for the first triplet in `triplets_test`, the first +point is predicted less similar to the second point than to the last point +(they are further away in the transformed space). + +Unlike pairs learners, triplets learners do not allow to give a `y` when fitting: we +assume that the ordering of points within triplets is such that the training triplets +are all positive. Therefore, it is not possible to use scikit-learn scoring functions +(such as 'f1_score') for triplets learners. + +However, triplets learners do have a default scoring function, which will +basically return the accuracy score on a given test set, i.e. the proportion +of triplets that have the right predicted ordering. + +>>> scml.score(triplets_test) +0.5 + +.. note:: + See :ref:`fit_ws` for more details on metric learners functions that are + not specific to learning on pairs, like `transform`, `score_pairs`, + `get_metric` and `get_mahalanobis_matrix`. + + + + +Algorithms +---------- + .. _learning_on_quadruplets: @@ -599,7 +707,7 @@ Learning on quadruplets ======================= Some metric learning algorithms learn on quadruplets of samples. In this case, -one should provide the algorithm with `n_samples` quadruplets of points. Th +one should provide the algorithm with `n_samples` quadruplets of points. The semantic of each quadruplet is that the first two points should be closer together than the last two points. @@ -666,14 +774,12 @@ array([-1., 1.]) Scoring ------- -Quadruplet metric learners can also -return a `decision_function` for a set of pairs. This is basically the "score" -which sign will be taken to find the prediction for the pair, which -corresponds to the difference between the distance between the two last points, -and the distance between the two last points of the quadruplet (higher -score means the two last points are more likely to be more dissimilar than -the two first points (i.e. more likely to have a +1 prediction since it's -the right ordering)). +Quadruplet metric learners can also return a `decision_function` for a set of +quadruplets, which corresponds to the distance between the first pair of points minus +the distance between the second pair of points of the triplet (the higher the value, +the more similar the first pair is than the last pair). +This "score" can be interpreted as a measure of likeliness of having a +1 prediction +for this quadruplet. >>> lsml.decision_function(quadruplets_test) array([-1.75700306, 4.98982131]) @@ -682,17 +788,10 @@ In the above example, for the first quadruplet in `quadruplets_test`, the two first points are predicted less similar than the two last points (they are further away in the transformed space). -Unlike for pairs learners, quadruplets learners don't allow to give a `y` -when fitting, which does not allow to use scikit-learn scoring functions -like: - ->>> from sklearn.model_selection import cross_val_score ->>> cross_val_score(lsml, quadruplets, scoring='f1_score') # this won't work - -(This is actually intentional, for more details -about that, see -`this comment `_ -on github.) +Like triplet learners, quadruplets learners do not allow to give a `y` when fitting: we +assume that the ordering of points within triplets is such that the training triplets +are all positive. Therefore, it is not possible to use scikit-learn scoring functions +(such as 'f1_score') for triplets learners. However, quadruplets learners do have a default scoring function, which will basically return the accuracy score on a given test set, i.e. the proportion diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index d19998ff..9dc38622 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -589,6 +589,90 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None, 'Got {} instead.'.format(type(beta))) +class _TripletsClassifierMixin(BaseMetricLearner): + """Base class for triplets learners. + """ + + _tuple_size = 3 # number of points in a tuple, 3 for triplets + + def predict(self, triplets): + """Predicts the ordering between sample distances in input triplets. + + For each triplets, returns 1 if the first element is closer to the second + than to the last and -1 if not. + + Parameters + ---------- + triplets : array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3) + 3D array of triplets to predict, with each row corresponding to three + points, or 2D array of indices of triplets if the metric learner + uses a preprocessor. + + Returns + ------- + prediction : `numpy.ndarray` of floats, shape=(n_constraints,) + Predictions of the ordering of pairs, for each triplet. + """ + return np.sign(self.decision_function(triplets)) + + def decision_function(self, triplets): + """Predicts differences between sample distances in input triplets. + + For each triplet (X_a, X_b, X_c) in the samples, computes the difference + between the learned distance of the second pair (X_a, X_c) minus the + learned distance of the first pair (X_a, X_b). The higher it is, the more + probable it is that the pairs in the triplets are presented in the right + order, i.e. that the label of the triplet is 1. The lower it is, the more + probable it is that the label of the triplet is -1. + + Parameters + ---------- + triplet : array-like, shape=(n_triplets, 3, n_features) or \ + (n_triplets, 3) + 3D array of triplets to predict, with each row corresponding to three + points, or 2D array of indices of triplets if the metric learner + uses a preprocessor. + + Returns + ------- + decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) + Metric differences. + """ + check_is_fitted(self, 'preprocessor_') + triplets = check_input(triplets, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) + return (self.score_pairs(triplets[:, [0, 2]]) - + self.score_pairs(triplets[:, :2])) + + def score(self, triplets): + """Computes score on input triplets. + + Returns the accuracy score of the following classification task: a triplet + (X_a, X_b, X_c) is correctly classified if the predicted similarity between + the first pair (X_a, X_b) is higher than that of the second pair (X_a, X_c) + + Parameters + ---------- + triplets : array-like, shape=(n_triplets, 3, n_features) or \ + (n_triplets, 3) + 3D array of triplets to score, with each row corresponding to three + points, or 2D array of indices of triplets if the metric learner + uses a preprocessor. + + Returns + ------- + score : float + The triplets score. + """ + # Since the prediction is a vector of values in {-1, +1}, we need to + # rescale them to {0, 1} to compute the accuracy using the mean (because + # then 1 means a correctly classified result (pairs are in the right + # order), and a 0 an incorrectly classified result (pairs are in the + # wrong order). + return self.predict(triplets).mean() / 2 + 0.5 + + class _QuadrupletsClassifierMixin(BaseMetricLearner): """Base class for quadruplets learners. """ @@ -614,10 +698,6 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ - check_is_fitted(self, 'preprocessor_') - quadruplets = check_input(quadruplets, type_of_inputs='tuples', - preprocessor=self.preprocessor_, - estimator=self, tuple_size=self._tuple_size) return np.sign(self.decision_function(quadruplets)) def decision_function(self, quadruplets): diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 36d77194..305223cd 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -6,6 +6,7 @@ import warnings from six.moves import xrange from sklearn.utils import check_random_state +from sklearn.neighbors import NearestNeighbors __all__ = ['Constraints'] @@ -76,6 +77,115 @@ def positive_negative_pairs(self, num_constraints, same_length=False, return a[:n], b[:n], c[:n], d[:n] return a, b, c, d + def generate_knntriplets(self, X, k_genuine, k_impostor): + """ + Generates triplets from labeled data. + + For every point (X_a) the triplets (X_a, X_b, X_c) are constructed from all + the combinations of taking one of its `k_genuine`-nearest neighbors of the + same class (X_b) and taking one of its `k_impostor`-nearest neighbors of + other classes (X_c). + + In the case a class doesn't have enough points in the same class (other + classes) to yield `k_genuine` (`k_impostor`) neighbors a warning will be + raised and the maximum value of genuine (impostor) neighbors will be used + for that class. + + Parameters + ---------- + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + k_genuine : int + Number of neighbors of the same class to be taken into account. + k_impostor : int + Number of neighbors of different classes to be taken into account. + + Returns + ------- + triplets : array-like, shape=(n_constraints, 3) + 2D array of triplets of indicators. + """ + # Ignore unlabeled samples + known_labels_mask = self.partial_labels >= 0 + known_labels = self.partial_labels[known_labels_mask] + X = X[known_labels_mask] + + labels, labels_count = np.unique(known_labels, return_counts=True) + len_input = known_labels.shape[0] + + # Handle the case where there are too few elements to yield k_genuine or + # k_impostor neighbors for every class. + + k_genuine_vec = np.full_like(labels, k_genuine) + k_impostor_vec = np.full_like(labels, k_impostor) + + for i, count in enumerate(labels_count): + if k_genuine + 1 > count: + k_genuine_vec[i] = count-1 + warnings.warn("The class {} has {} elements, which is not sufficient " + "to generate {} genuine neighbors as specified by " + "k_genuine. Will generate {} genuine neighbors instead." + "\n" + .format(labels[i], count, k_genuine+1, + k_genuine_vec[i])) + if k_impostor > len_input - count: + k_impostor_vec[i] = len_input - count + warnings.warn("The class {} has {} elements of other classes, which is" + " not sufficient to generate {} impostor neighbors as " + "specified by k_impostor. Will generate {} impostor " + "neighbors instead.\n" + .format(labels[i], k_impostor_vec[i], k_impostor, + k_impostor_vec[i])) + + # The total number of possible triplets combinations per label comes from + # taking one of the k_genuine_vec[i] genuine neighbors and one of the + # k_impostor_vec[i] impostor neighbors for the labels_count[i] elements + comb_per_label = labels_count * k_genuine_vec * k_impostor_vec + + # Get start and finish for later triplet assigning + # append zero at the begining for start and get cumulative sum + start_finish_indices = np.hstack((0, comb_per_label)).cumsum() + + # Total number of triplets is the sum of all possible combinations per + # label + num_triplets = start_finish_indices[-1] + triplets = np.empty((num_triplets, 3), dtype=np.intp) + + neigh = NearestNeighbors() + + for i, label in enumerate(labels): + + # generate mask for current label + gen_mask = known_labels == label + gen_indx = np.where(gen_mask) + + # get k_genuine genuine neighbors + neigh.fit(X=X[gen_indx]) + # Take elements of gen_indx according to the yielded k-neighbors + gen_relative_indx = neigh.kneighbors(n_neighbors=k_genuine_vec[i], + return_distance=False) + gen_neigh = np.take(gen_indx, gen_relative_indx) + + # generate mask for impostors of current label + imp_indx = np.where(~gen_mask) + + # get k_impostor impostor neighbors + neigh.fit(X=X[imp_indx]) + # Take elements of imp_indx according to the yielded k-neighbors + imp_relative_indx = neigh.kneighbors(n_neighbors=k_impostor_vec[i], + X=X[gen_mask], + return_distance=False) + imp_neigh = np.take(imp_indx, imp_relative_indx) + + # length = len_label*k_genuine*k_impostor + start, finish = start_finish_indices[i:i+2] + + triplets[start:finish, :] = comb(gen_indx, gen_neigh, imp_neigh, + k_genuine_vec[i], + k_impostor_vec[i]) + + return triplets + def _pairs(self, num_constraints, same_label=True, max_iter=10, random_state=np.random): known_label_idx, = np.where(self.partial_labels >= 0) @@ -157,6 +267,15 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): return chunks +def comb(A, B, C, sizeB, sizeC): + # generate_knntriplets helper function + # generate an array with all combinations of choosing + # an element from A, B and C + return np.vstack((np.tile(A, (sizeB*sizeC, 1)).ravel(order='F'), + np.tile(np.hstack(B), (sizeC, 1)).ravel(order='F'), + np.tile(C, (1, sizeB)).ravel())).T + + def wrap_pairs(X, constraints): a = np.array(constraints[0]) b = np.array(constraints[1]) diff --git a/test/test_constraints.py b/test/test_constraints.py index 243028f6..92876779 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -2,6 +2,7 @@ import numpy as np from sklearn.utils import shuffle from metric_learn.constraints import Constraints +from sklearn.datasets import make_blobs SEED = 42 @@ -69,3 +70,104 @@ def test_unknown_labels_not_in_chunks(num_chunks, chunk_size): random_state=SEED) assert np.all(chunks[labels < 0] < 0) + + +@pytest.mark.parametrize("k_genuine, k_impostor, T_test", + [(2, 2, + [[0, 1, 3], [0, 1, 4], [0, 2, 3], [0, 2, 4], + [1, 0, 3], [1, 0, 4], [1, 2, 3], [1, 2, 4], + [2, 0, 3], [2, 0, 4], [2, 1, 3], [2, 1, 4], + [3, 4, 1], [3, 4, 2], [3, 5, 1], [3, 5, 2], + [4, 3, 1], [4, 3, 2], [4, 5, 1], [4, 5, 2], + [5, 3, 1], [5, 3, 2], [5, 4, 1], [5, 4, 2]]), + (1, 3, + [[0, 1, 3], [0, 1, 4], [0, 1, 5], [1, 0, 3], + [1, 0, 4], [1, 0, 5], [2, 1, 3], [2, 1, 4], + [2, 1, 5], [3, 4, 0], [3, 4, 1], [3, 4, 2], + [4, 3, 0], [4, 3, 1], [4, 3, 2], [5, 4, 0], + [5, 4, 1], [5, 4, 2]]), + (1, 2, + [[0, 1, 3], [0, 1, 4], [1, 0, 3], [1, 0, 4], + [2, 1, 3], [2, 1, 4], [3, 4, 1], [3, 4, 2], + [4, 3, 1], [4, 3, 2], [5, 4, 1], [5, 4, 2]])]) +def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test): + """Checks under the edge cases of knn triplet construction with enough + neighbors""" + + X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) + y = np.array([1, 1, 1, 2, 2, 2, -1]) + + T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) + + assert np.array_equal(sorted(T.tolist()), T_test) + + +@pytest.mark.parametrize("k_genuine, k_impostor,", + [(2, 3), (3, 3), (2, 4), (3, 4)]) +def test_generate_knntriplets(k_genuine, k_impostor): + """Checks edge and over the edge cases of knn triplet construction with not + enough neighbors""" + + T_test = [[0, 1, 3], [0, 1, 4], [0, 1, 5], [0, 2, 3], [0, 2, 4], [0, 2, 5], + [1, 0, 3], [1, 0, 4], [1, 0, 5], [1, 2, 3], [1, 2, 4], [1, 2, 5], + [2, 0, 3], [2, 0, 4], [2, 0, 5], [2, 1, 3], [2, 1, 4], [2, 1, 5], + [3, 4, 0], [3, 4, 1], [3, 4, 2], [3, 5, 0], [3, 5, 1], [3, 5, 2], + [4, 3, 0], [4, 3, 1], [4, 3, 2], [4, 5, 0], [4, 5, 1], [4, 5, 2], + [5, 3, 0], [5, 3, 1], [5, 3, 2], [5, 4, 0], [5, 4, 1], [5, 4, 2]] + + X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) + y = np.array([1, 1, 1, 2, 2, 2, -1]) + + T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) + + assert np.array_equal(sorted(T.tolist()), T_test) + + +def test_generate_knntriplets_k_genuine(): + """Checks the correct error raised when k_genuine is too big """ + X, y = shuffle(*make_blobs(random_state=SEED), + random_state=SEED) + + label, labels_count = np.unique(y, return_counts=True) + labels_count_min = np.min(labels_count) + idx_smallest_label, = np.where(labels_count == labels_count_min) + k_genuine = labels_count_min + + warn_msgs = [] + for idx in idx_smallest_label: + warn_msgs.append("The class {} has {} elements, which is not sufficient " + "to generate {} genuine neighbors as specified by " + "k_genuine. Will generate {} genuine neighbors instead." + "\n" + .format(label[idx], k_genuine, k_genuine+1, k_genuine-1)) + + with pytest.warns(UserWarning) as raised_warning: + Constraints(y).generate_knntriplets(X, k_genuine, 1) + for warn in raised_warning: + assert str(warn.message) in warn_msgs + + +def test_generate_knntriplets_k_impostor(): + """Checks the correct error raised when k_impostor is too big """ + X, y = shuffle(*make_blobs(random_state=SEED), + random_state=SEED) + + length = len(y) + label, labels_count = np.unique(y, return_counts=True) + labels_count_max = np.max(labels_count) + idx_biggest_label, = np.where(labels_count == labels_count_max) + k_impostor = length - labels_count_max + 1 + + warn_msgs = [] + for idx in idx_biggest_label: + warn_msgs.append("The class {} has {} elements of other classes, which is" + " not sufficient to generate {} impostor neighbors as " + "specified by k_impostor. Will generate {} impostor " + "neighbors instead.\n" + .format(label[idx], k_impostor-1, k_impostor, + k_impostor-1)) + + with pytest.warns(UserWarning) as raised_warning: + Constraints(y).generate_knntriplets(X, 1, k_impostor) + for warn in raised_warning: + assert str(warn.message) in warn_msgs diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py new file mode 100644 index 00000000..8cedd8cc --- /dev/null +++ b/test/test_triplets_classifiers.py @@ -0,0 +1,66 @@ +import pytest +from sklearn.exceptions import NotFittedError +from sklearn.model_selection import train_test_split + +from test.test_utils import triplets_learners, ids_triplets_learners +from sklearn.utils.testing import set_random_state +from sklearn import clone +import numpy as np + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_predict_only_one_or_minus_one(estimator, build_dataset, + with_preprocessor): + """Test that all predicted values are either +1 or -1""" + input_data, preprocessor = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + triplets_train, triplets_test = train_test_split(input_data) + estimator.fit(triplets_train) + predictions = estimator.predict(triplets_test) + + not_valid = [e for e in predictions if e not in [-1, 1]] + assert len(not_valid) == 0 + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, + with_preprocessor): + """Test that a NotFittedError is raised if someone tries to predict and + the metric learner has not been fitted.""" + input_data, preprocessor = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.predict(input_data) + + +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_accuracy_toy_example(estimator, build_dataset): + """Test that the default scoring for triplets (accuracy) works on some + toy example""" + triplets, X = build_dataset(with_preprocessor=True) + triplets = X[triplets] + estimator = clone(estimator) + set_random_state(estimator) + estimator.fit(triplets) + # We take the two first points and we build 4 regularly spaced points on the + # line they define, so that it's easy to build triplets of different + # similarities. + X_test = X[0] + np.arange(4)[:, np.newaxis] * (X[0] - X[1]) / 4 + + triplets_test = np.array( + [[X_test[0], X_test[2], X_test[1]], + [X_test[1], X_test[3], X_test[0]], + [X_test[1], X_test[2], X_test[3]], + [X_test[3], X_test[0], X_test[2]]]) + # we force the transformation to be identity so that we control what it does + estimator.components_ = np.eye(X.shape[1]) + assert estimator.score(triplets_test) == 0.25 diff --git a/test/test_utils.py b/test/test_utils.py index 2510ed89..a4cf86f4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -19,9 +19,11 @@ Constraints) from metric_learn.base_metric import (ArrayIndexer, MahalanobisMixin, _PairsClassifierMixin, + _TripletsClassifierMixin, _QuadrupletsClassifierMixin) from metric_learn.exceptions import PreprocessorError, NonPSDError from sklearn.datasets import make_regression, make_blobs, load_iris +from metric_learn.lsml import _BaseLSML SEED = 42 @@ -83,6 +85,34 @@ def build_pairs(with_preprocessor=False): return Dataset(X[c], target, None, X[c[:, 0]]) +def build_triplets(with_preprocessor=False): + input_data, labels = load_iris(return_X_y=True) + X, y = shuffle(input_data, labels, random_state=SEED) + constraints = Constraints(y) + triplets = constraints.generate_knntriplets(X, k_genuine=3, k_impostor=4) + if with_preprocessor: + # if preprocessor, we build a 2D array of triplets of indices + return triplets, X + else: + # if not, we build a 3D array of triplets of samples + return X[triplets], None + + +class mock_triplet_LSML(_BaseLSML, _TripletsClassifierMixin): + # Mock Triplet learner from LSML which is a quadruplets learner + # in order to test TripletClassifierMixin basic methods + + _tuple_size = 4 + + def fit(self, triplets, weights=None): + quadruplets = triplets[:, [0, 1, 0, 2]] + return self._fit(quadruplets, weights=weights) + + def decision_function(self, triplets): + self._tuple_size = 3 + return _TripletsClassifierMixin.decision_function(self, triplets) + + def build_quadruplets(with_preprocessor=False): # builds a toy quadruplets problem X, indices = build_data() @@ -103,6 +133,11 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) +triplets_learners = [(mock_triplet_LSML(), build_triplets)] +ids_triplets_learners = list(map(lambda x: x.__class__.__name__, + [learner for (learner, _) in + triplets_learners])) + pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs)] From c378e4b0e3c594feb1f7d1a6b8af0573c02f1f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Wed, 4 Mar 2020 17:18:31 +0100 Subject: [PATCH 159/210] [MRG] Correct a few small doc issues following #280 (#281) * add docstrings for constraints class, pairs and chunks methods * fix missing optional values and descriptions, uniformize * fix indentation problems in docstring and uniformize * fix more small things * cosmit * remove unnecessary line * missing blank line for pep8 --- doc/conf.py | 11 +++++++++++ doc/weakly_supervised.rst | 4 ++-- metric_learn/base_metric.py | 14 +++++++------- metric_learn/constraints.py | 29 ++++++++++++++++------------- metric_learn/itml.py | 9 ++++++--- metric_learn/lfda.py | 29 ++++++++++++++++++----------- metric_learn/lmnn.py | 9 +++++---- metric_learn/lsml.py | 2 +- metric_learn/mlkr.py | 12 ++++++------ metric_learn/mmc.py | 17 +++++------------ metric_learn/nca.py | 2 +- metric_learn/rca.py | 8 ++++---- metric_learn/sdml.py | 12 +++++------- 13 files changed, 87 insertions(+), 71 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 66ff3dcd..796b7861 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -69,3 +69,14 @@ # Switch to old behavior with html4, for a good display of references, # as described in https://github.com/sphinx-doc/sphinx/issues/6705 html4_writer = True + + +# Temporary work-around for spacing problem between parameter and parameter +# type in the doc, see https://github.com/numpy/numpydoc/issues/215. The bug +# has been fixed in sphinx (https://github.com/sphinx-doc/sphinx/pull/5976) but +# through a change in sphinx basic.css except rtd_theme does not use basic.css. +# In an ideal world, this would get fixed in this PR: +# https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files +def setup(app): + app.add_javascript('js/copybutton.js') + app.add_stylesheet("basic.css") diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 72f68627..174d1a8b 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -483,7 +483,7 @@ is the off-diagonal L1 norm. L1-penalized log-determinant regularization `_. ICML 2009. - .. [2] Adapted from https://gist.github.com/kcarnold/5439945 + .. [2] Code adapted from https://gist.github.com/kcarnold/5439945 .. _rca: @@ -893,6 +893,6 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: `Metric Learning from Relative Comparisons by Minimizing Squared Residual `_. ICDM 2012 - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + .. [2] Code adapted from https://gist.github.com/kcarnold/5439917 diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 9dc38622..65692aed 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -39,7 +39,7 @@ def score_pairs(self, pairs): Returns ------- - scores: `numpy.ndarray` of shape=(n_pairs,) + scores : `numpy.ndarray` of shape=(n_pairs,) The score of every pair. See Also @@ -69,19 +69,19 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', Parameters ---------- - input: array-like + X : array-like The input data array to check. y : array-like The input labels array to check. - type_of_inputs: `str` {'classic', 'tuples'} + type_of_inputs : `str` {'classic', 'tuples'} The type of inputs to check. If 'classic', the input should be a 2D array-like of points or a 1D array like of indicators of points. If 'tuples', the input should be a 3D array-like of tuples or a 2D array-like of indicators of tuples. - **kwargs: dict + **kwargs : dict Arguments to pass to check_input. Returns @@ -89,7 +89,7 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', X : `numpy.ndarray` The checked input data array. - y: `numpy.ndarray` (optional) + y : `numpy.ndarray` (optional) The checked input labels array. """ self._check_preprocessor() @@ -203,7 +203,7 @@ def score_pairs(self, pairs): Returns ------- - scores: `numpy.ndarray` of shape=(n_pairs,) + scores : `numpy.ndarray` of shape=(n_pairs,) The learned Mahalanobis distance for every pair. See Also @@ -271,7 +271,7 @@ def metric_fun(u, v, squared=False): Returns ------- - distance: float + distance : float The distance between u and v according to the new metric. """ u = validate_vector(u) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 305223cd..b15d0277 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -20,12 +20,12 @@ class Constraints(object): Parameters ---------- partial_labels : `numpy.ndarray` of ints, shape=(n_samples,) - Array of labels, with -1 indicating unknown label. + Array of labels, with -1 indicating unknown label. Attributes ---------- partial_labels : `numpy.ndarray` of ints, shape=(n_samples,) - Array of labels, with -1 indicating unknown label. + Array of labels, with -1 indicating unknown label. """ def __init__(self, partial_labels): @@ -46,26 +46,29 @@ def positive_negative_pairs(self, num_constraints, same_length=False, Parameters ---------- - num_constraints : int - Number of positive and negative constraints to generate. - same_length : bool, optional (default=False) - If True, forces the number of positive and negative pairs to be - equal by ignoring some pairs from the larger set. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. + num_constraints : int + Number of positive and negative constraints to generate. + + same_length : bool, optional (default=False) + If True, forces the number of positive and negative pairs to be + equal by ignoring some pairs from the larger set. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + Returns ------- a : array-like, shape=(n_constraints,) - 1D array of indicators for the left elements of positive pairs. + 1D array of indicators for the left elements of positive pairs. b : array-like, shape=(n_constraints,) - 1D array of indicators for the right elements of positive pairs. + 1D array of indicators for the right elements of positive pairs. c : array-like, shape=(n_constraints,) - 1D array of indicators for the left elements of negative pairs. + 1D array of indicators for the left elements of negative pairs. d : array-like, shape=(n_constraints,) - 1D array of indicators for the right elements of negative pairs. + 1D array of indicators for the right elements of negative pairs. """ random_state = check_random_state(random_state) a, b = self._pairs(num_constraints, same_label=True, diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 2094e160..5db438d8 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -211,9 +211,9 @@ class ITML(_BaseITML, _PairsClassifierMixin): References ---------- - .. [1] `Information-theoretic Metric Learning + .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning `_ Jason V. Davis, et al. + /DavisKJSD07_ICML.pdf>`_. ICML 2007. """ def fit(self, pairs, y, bounds=None, calibration_params=None): @@ -229,8 +229,10 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a preprocessor. + y: array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` @@ -239,6 +241,7 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): If not provided at initialization, bounds_[0] and bounds_[1] will be set to the 5th and 95th percentile of the pairwise distances among all points present in the input `pairs`. + calibration_params : `dict` or `None` Dictionary of parameters to give to `calibrate_threshold` for the threshold calibration step done at the end of `fit`. If `None` is @@ -280,7 +283,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): `num_labeled` was deprecated in version 0.5.0 and will be removed in 0.6.0. - num_constraints: int, optional (default=None) + num_constraints : int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 12617a94..a970e789 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -39,10 +39,16 @@ class LFDA(MahalanobisMixin, TransformerMixin): defaults to min(7, n_features - 1). embedding_type : str, optional (default: 'weighted') - Type of metric in the embedding space - 'weighted' - weighted eigenvectors - 'orthonormalized' - orthonormalized - 'plain' - raw eigenvectors + Type of metric in the embedding space. + + 'weighted' + weighted eigenvectors + + 'orthonormalized' + orthonormalized + + 'plain' + raw eigenvectors preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, @@ -67,13 +73,14 @@ class LFDA(MahalanobisMixin, TransformerMixin): References ------------------ - .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher - Discriminant Analysis `_ - Masashi Sugiyama. - - .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering - `_ Yuan Tang. + .. [1] Masashi Sugiyama. `Dimensionality Reduction of Multimodal Labeled + Data by Local Fisher Discriminant Analysis + `_. JMLR 2007. + + .. [2] Yuan Tang. `Local Fisher Discriminant Analysis on Beer Style + Clustering + `_. ''' def __init__(self, n_components=None, num_dims='deprecated', diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index df8fe649..a026a8f6 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -137,10 +137,11 @@ class LMNN(MahalanobisMixin, TransformerMixin): References ---------- - .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor - Classification `_ - Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul + .. [1] K. Q. Weinberger, J. Blitzer, L. K. Saul. `Distance Metric + Learning for Large Margin Nearest Neighbor Classification + `_. NIPS + 2005. """ def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index c4cdca97..5e84bf86 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -208,7 +208,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Squared Residual `_. ICDM 2012. - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + .. [2] Code adapted from https://gist.github.com/kcarnold/5439917 See Also -------- diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 3199b518..5fffee9b 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -73,15 +73,15 @@ class MLKR(MahalanobisMixin, TransformerMixin): :meth:`fit` and n_features_a must be less than or equal to that. If ``n_components`` is not None, n_features_a must match it. - A0: Not used. + A0 : Not used. .. deprecated:: 0.5.0 `A0` was deprecated in version 0.5.0 and will be removed in 0.6.0. Use 'init' instead. - tol: float, optional (default=None) + tol : float, optional (default=None) Convergence tolerance for the optimization. - max_iter: int, optional (default=1000) + max_iter : int, optional (default=1000) Cap on number of conjugate gradient iterations. verbose : bool, optional (default=False) @@ -118,9 +118,9 @@ class MLKR(MahalanobisMixin, TransformerMixin): References ---------- - .. [1] `Information-theoretic Metric Learning - `_ Jason V. Davis, et al. + .. [1] K.Q. Weinberger and G. Tesauto. `Metric Learning for Kernel + Regression `_. AISTATS 2007. """ def __init__(self, n_components=None, num_dims='deprecated', init=None, diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 3769497e..3ef9c534 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -383,10 +383,6 @@ class MMC(_BaseMMC, _PairsClassifierMixin): An SPD matrix of shape (n_features, n_features), that will be used as such to initialize the metric. - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. .. deprecated:: 0.5.0 `A0` was deprecated in version 0.5.0 and will @@ -442,10 +438,11 @@ class MMC(_BaseMMC, _PairsClassifierMixin): References ---------- - .. [1] `Distance metric learning with application to clustering with - side-information `_ - Xing, Jordan, Russell, Ng. + .. [1] Xing, Jordan, Russell, Ng. `Distance metric learning with application + to clustering with side-information + `_. + NIPS 2002. See Also -------- @@ -538,10 +535,6 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): A numpy array of shape (n_features, n_features), that will be used as such to initialize the metric. - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. .. deprecated:: 0.5.0 `A0` was deprecated in version 0.5.0 and will diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 983f1120..fbce5658 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -123,7 +123,7 @@ class NCA(MahalanobisMixin, TransformerMixin): .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. `Neighbourhood Components Analysis `_. - Advances in Neural Information Processing Systems. 17, 513-520, 2005. + NIPS 2005. .. [2] Wikipedia entry on `Neighborhood Components Analysis `_ diff --git a/metric_learn/rca.py b/metric_learn/rca.py index f3a2ac89..32024a43 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -72,10 +72,10 @@ class RCA(MahalanobisMixin, TransformerMixin): References ------------------ - .. [1] `Adjustment learning and relevant component analysis - `_ Noam - Shental, et al. + .. [1] Noam Shental, et al. `Adjustment learning and relevant component + analysis `_ . + ECCV 2002. Attributes diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 944739f2..38c50955 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -211,14 +211,12 @@ class SDML(_BaseSDML, _PairsClassifierMixin): References ---------- + .. [1] Qi et al. `An efficient sparse metric learning in high-dimensional + space via L1-penalized log-determinant regularization + `_. + ICML 2009. - .. [1] Qi et al. - An efficient sparse metric learning in high-dimensional space via - L1-penalized log-determinant regularization. ICML 2009. - http://lms.comp.nus.edu.sg/sites/default/files/publication\ --attachments/icml09-guojun.pdf - - .. [2] Adapted from https://gist.github.com/kcarnold/5439945 + .. [2] Code adapted from https://gist.github.com/kcarnold/5439945 """ def fit(self, pairs, y, calibration_params=None): From a748c07d902911e8988fd0ea2cbef85998543d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Thu, 26 Mar 2020 13:17:17 +0100 Subject: [PATCH 160/210] install pytest 4 in travis for python 3.4 (#282) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8a7ebf38..321d195a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ matrix: python: "3.4" before_install: - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest + - pip install --upgrade pip "pytest<5" - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn script: - pytest test --cov; From 8a02af7258a5f249a0b0fe4b8e48fbf354c35c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Tue, 31 Mar 2020 18:34:47 +0200 Subject: [PATCH 161/210] [MRG] Allow installation from conda (#283) * add placeholder sentence * update setup.cfg to include license file to dist * update README * README * README typo * update doc * typo --- README.rst | 7 +++---- doc/getting_started.rst | 11 +++++++---- setup.cfg | 4 +++- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 8f68df90..5afe556c 100644 --- a/README.rst +++ b/README.rst @@ -31,12 +31,11 @@ metric-learn contains efficient Python implementations of several popular superv **Installation/Setup** -Run ``pip install metric-learn`` to download and install from PyPI. +- If you use Anaconda: ``conda install -c conda-forge metric-learn``. See more options `here `_. -Run ``python setup.py install`` for default installation. +- To install from PyPI: ``pip install metric-learn``. -Run ``pytest test`` to run all tests (you will need to have the ``pytest`` -package installed). +- For a manual install of the latest code, download the source repository and run ``python setup.py install``. You may then run ``pytest test`` to run all tests (you will need to have the ``pytest`` package installed). **Usage** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 97abb9a5..f1b35b4f 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -5,12 +5,15 @@ Getting started Installation and Setup ====================== -Run ``pip install metric-learn`` to download and install from PyPI. +**Installation** -Alternately, download the source repository and run: +metric-learn can be installed in either of the following ways: -- ``python setup.py install`` for default installation. -- ``pytest test`` to run all tests. +- If you use Anaconda: ``conda install -c conda-forge metric-learn``. See more options `here `_. + +- To install from PyPI: ``pip install metric-learn``. + +- For a manual install of the latest code, download the source repository and run ``python setup.py install``. You may then run ``pytest test`` to run all tests (you will need to have the ``pytest`` package installed). **Dependencies** diff --git a/setup.cfg b/setup.cfg index 8d95aa1e..bc7695e3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,6 @@ universal = 1 [metadata] -description-file = README.rst \ No newline at end of file +description-file = README.rst +license_files = + LICENSE.txt From 2d5a942c0f48a093f2e1c0dca2426e632626a9a6 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Fri, 22 May 2020 14:50:35 -0400 Subject: [PATCH 162/210] Use scipy's logsumexp function (#290) Fixes gh-289 --- metric_learn/mlkr.py | 8 +++----- metric_learn/nca.py | 6 +++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 5fffee9b..c65341be 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -6,16 +6,14 @@ import sys import warnings import numpy as np -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning -from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize +from scipy.special import logsumexp from sklearn.base import TransformerMixin - +from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning from sklearn.metrics import pairwise_distances -from metric_learn._util import _check_n_components from .base_metric import MahalanobisMixin -from ._util import _initialize_components +from ._util import _initialize_components, _check_n_components EPS = np.finfo(float).eps diff --git a/metric_learn/nca.py b/metric_learn/nca.py index fbce5658..d09e7282 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -8,10 +8,10 @@ import sys import numpy as np from scipy.optimize import minimize -from sklearn.metrics import pairwise_distances -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning -from sklearn.utils.fixes import logsumexp +from scipy.special import logsumexp from sklearn.base import TransformerMixin +from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +from sklearn.metrics import pairwise_distances from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin From c15f1c3e2650593388c5dab0e50147dca88c4e58 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 27 May 2020 09:04:31 -0400 Subject: [PATCH 163/210] Drop support for python 2 and python 3.5 (#291) * Drop Python2 support * Fix test failures and pyflakes error * fix pep8 issue * Stop testing py3.5 * Update README --- .travis.yml | 15 +---- README.rst | 2 +- metric_learn/__init__.py | 2 - metric_learn/_util.py | 3 +- metric_learn/base_metric.py | 9 ++- metric_learn/constraints.py | 3 +- metric_learn/covariance.py | 1 - metric_learn/itml.py | 4 +- metric_learn/lfda.py | 4 +- metric_learn/lmnn.py | 6 +- metric_learn/lsml.py | 4 +- metric_learn/mlkr.py | 1 - metric_learn/mmc.py | 6 +- metric_learn/nca.py | 1 - metric_learn/rca.py | 4 +- metric_learn/sdml.py | 1 - setup.py | 3 +- test/metric_learn_test.py | 13 ++-- test/test_base_metric.py | 120 ++++++++++----------------------- test/test_pairs_classifiers.py | 2 - 20 files changed, 58 insertions(+), 146 deletions(-) diff --git a/.travis.yml b/.travis.yml index 321d195a..d294c294 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,23 +7,12 @@ env: - SKGGM_VERSION=a0ed406586c4364ea3297a658f415e13b5cbdaf8 matrix: include: - - name: "Pytest python 2.7 with skggm" - python: "2.7" + - name: "Pytest python 3.6 without skggm" + python: "3.6" before_install: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; - script: - - pytest test --cov; - after_success: - - bash <(curl -s https://codecov.io/bash) - - name: "Pytest python 3.4 without skggm" - python: "3.4" - before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip "pytest<5" - - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn script: - pytest test --cov; after_success: diff --git a/README.rst b/README.rst index 5afe556c..ceb2eb33 100644 --- a/README.rst +++ b/README.rst @@ -19,7 +19,7 @@ metric-learn contains efficient Python implementations of several popular superv **Dependencies** -- Python 2.7+, 3.4+ +- Python 3.6+ - numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index b036ccfa..c9d53883 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from .constraints import Constraints from .covariance import Covariance from .itml import ITML, ITML_Supervised diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 77e8d9fa..764a34c8 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -1,5 +1,4 @@ import numpy as np -import six from numpy.linalg import LinAlgError from sklearn.datasets import make_spd_matrix from sklearn.decomposition import PCA @@ -283,7 +282,7 @@ def make_name(estimator): if a string is given """ if estimator is not None: - if isinstance(estimator, six.string_types): + if isinstance(estimator, str): estimator_name = estimator else: estimator_name = estimator.__class__.__name__ diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 65692aed..d1af0821 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -8,12 +8,11 @@ from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve import numpy as np from abc import ABCMeta, abstractmethod -import six from ._util import ArrayIndexer, check_input, validate_vector import warnings -class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): +class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta): """ Base class for all metric-learners. @@ -145,7 +144,7 @@ def get_metric(self): """ -class MetricTransformer(six.with_metaclass(ABCMeta)): +class MetricTransformer(metaclass=ABCMeta): @abstractmethod def transform(self, X): @@ -163,8 +162,8 @@ def transform(self, X): """ -class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, - MetricTransformer)): +class MahalanobisMixin(BaseMetricLearner, MetricTransformer, + metaclass=ABCMeta): r"""Mahalanobis metric learning algorithms. Algorithm that learns a Mahalanobis (pseudo) distance :math:`d_M(x, x')`, diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index b15d0277..2d86b819 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -4,7 +4,6 @@ """ import numpy as np import warnings -from six.moves import xrange from sklearn.utils import check_random_state from sklearn.neighbors import NearestNeighbors @@ -245,7 +244,7 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): chunks = -np.ones_like(self.partial_labels, dtype=int) uniq, lookup = np.unique(self.partial_labels, return_inverse=True) unknown_uniq = np.where(uniq < 0)[0] - all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq)) + all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq)) if c not in unknown_uniq] max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds])) if max_chunks < num_chunks: diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 7214dd62..3b218e6d 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -2,7 +2,6 @@ Covariance metric (baseline method) """ -from __future__ import absolute_import import numpy as np import scipy from sklearn.base import TransformerMixin diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 5db438d8..48d5a222 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -2,10 +2,8 @@ Information Theoretic Metric Learning (ITML) """ -from __future__ import print_function, absolute_import import warnings import numpy as np -from six.moves import xrange from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_array @@ -69,7 +67,7 @@ def _fit(self, pairs, y, bounds=None): pos_vv = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] neg_vv = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] - for it in xrange(self.max_iter): + for it in range(self.max_iter): # update positives for i, v in enumerate(pos_vv): wtw = v.dot(A).dot(v) # scalar diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index a970e789..2feed169 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -1,11 +1,9 @@ """ Local Fisher Discriminant Analysis (LFDA) """ -from __future__ import division, absolute_import import numpy as np import scipy import warnings -from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.base import TransformerMixin @@ -127,7 +125,7 @@ def fit(self, X, y): tSb = np.zeros((d, d)) tSw = np.zeros((d, d)) - for c in xrange(num_classes): + for c in range(num_classes): Xc = X[y == c] nc = Xc.shape[0] diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index a026a8f6..12eb5ab1 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -1,11 +1,9 @@ """ Large Margin Nearest Neighbor Metric learning (LMNN) """ -from __future__ import print_function, absolute_import import numpy as np import warnings from collections import Counter -from six.moves import xrange from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin @@ -229,7 +227,7 @@ def fit(self, X, y): "| learning rate") # main loop - for it in xrange(2, self.max_iter): + for it in range(2, self.max_iter): # then at each iteration, we try to find a value of L that has better # objective than the previous L, following the gradient: while True: @@ -293,7 +291,7 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): # compute the gradient total_active = 0 df = np.zeros((X.shape[1], X.shape[1])) - for nn_idx in reversed(xrange(k)): # note: reverse not useful here + for nn_idx in reversed(range(k)): # note: reverse not useful here act1 = g0 < g1[:, nn_idx] act2 = g0 < g2[:, nn_idx] total_active += act1.sum() + act2.sum() diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 5e84bf86..0cf9dc22 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -2,11 +2,9 @@ Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML) """ -from __future__ import print_function, absolute_import, division import warnings import numpy as np import scipy.linalg -from six.moves import xrange from sklearn.base import TransformerMixin from sklearn.exceptions import ChangedBehaviorWarning @@ -66,7 +64,7 @@ def _fit(self, quadruplets, weights=None): s_best = self._total_loss(M, vab, vcd, prior_inv) if self.verbose: print('initial loss', s_best) - for it in xrange(1, self.max_iter + 1): + for it in range(1, self.max_iter + 1): grad = self._gradient(M, vab, vcd, prior_inv) grad_norm = scipy.linalg.norm(grad) if grad_norm < self.tol: diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index c65341be..9b84dba8 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -1,7 +1,6 @@ """ Metric Learning for Kernel Regression (MLKR) """ -from __future__ import division, print_function import time import sys import warnings diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 3ef9c534..330e2113 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -1,8 +1,6 @@ """Mahalanobis Metric for Clustering (MMC)""" -from __future__ import print_function, absolute_import, division import warnings import numpy as np -from six.moves import xrange from sklearn.base import TransformerMixin from sklearn.utils.validation import assert_all_finite from sklearn.exceptions import ChangedBehaviorWarning @@ -110,12 +108,12 @@ def _fit_full(self, pairs, y): A_old = A.copy() - for cycle in xrange(self.max_iter): + for cycle in range(self.max_iter): # projection of constraints C1 and C2 satisfy = False - for it in xrange(self.max_proj): + for it in range(self.max_proj): # First constraint: # f(A) = \sum_{i,j \in S} d_ij' A d_ij <= t (1) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index d09e7282..217d7d28 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -2,7 +2,6 @@ Neighborhood Components Analysis (NCA) """ -from __future__ import absolute_import import warnings import time import sys diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 32024a43..2004b9d4 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -2,10 +2,8 @@ Relative Components Analysis (RCA) """ -from __future__ import absolute_import import numpy as np import warnings -from six.moves import xrange from sklearn.base import TransformerMixin from sklearn.exceptions import ChangedBehaviorWarning @@ -22,7 +20,7 @@ def _chunk_mean_centering(data, chunks): # mean on it chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask] - for c in xrange(num_chunks): + for c in range(num_chunks): mask = chunk_labels == c chunk_data[mask] -= chunk_data[mask].mean(axis=0) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 38c50955..f7c801e8 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -2,7 +2,6 @@ Sparse High-Dimensional Metric Learning (SDML) """ -from __future__ import absolute_import import warnings import numpy as np from sklearn.base import TransformerMixin diff --git a/setup.py b/setup.py index c8b38b7c..1e6f0002 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', + 'Programming Language :: Python :: 3', 'Operating System :: OS Independent', 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering' @@ -39,7 +39,6 @@ 'numpy', 'scipy', 'scikit-learn', - 'six' ], extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 5a271890..a97f6437 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -4,7 +4,6 @@ import numpy as np import scipy from scipy.optimize import check_grad, approx_fprime -from six.moves import xrange from sklearn.metrics import pairwise_distances, euclidean_distances from sklearn.datasets import (load_iris, make_classification, make_regression, make_spd_matrix) @@ -32,7 +31,7 @@ def class_separation(X, labels): unique_labels, label_inds = np.unique(labels, return_inverse=True) ratio = 0 - for li in xrange(len(unique_labels)): + for li in range(len(unique_labels)): Xc = X[label_inds == li] Xnc = X[label_inds != li] ratio += pairwise_distances(Xc).mean() / pairwise_distances(Xc, Xnc).mean() @@ -385,15 +384,15 @@ def loss_fn(L, X, y, target_neighbors, reg): for j in target_neighbors[i]: loss += (1 - reg) * np.sum((Lx[i] - Lx[j]) ** 2) grad += (1 - reg) * np.outer(Lx[i] - Lx[j], X[i] - X[j]) - for l in range(X.shape[0]): - if y[i] != y[l]: + for k in range(X.shape[0]): + if y[i] != y[k]: hin, active = hinge(1 + np.sum((Lx[i] - Lx[j])**2) - - np.sum((Lx[i] - Lx[l])**2)) + np.sum((Lx[i] - Lx[k])**2)) total_active += active if active: loss += reg * hin grad += (reg * (np.outer(Lx[i] - Lx[j], X[i] - X[j]) - - np.outer(Lx[i] - Lx[l], X[i] - X[l]))) + np.outer(Lx[i] - Lx[k], X[i] - X[k]))) grad = 2 * grad return grad, loss, total_active @@ -521,7 +520,7 @@ def test_toy_ex_lmnn(X, y, loss): # storage a1 = [None] * k a2 = [None] * k - for nn_idx in xrange(k): + for nn_idx in range(k): a1[nn_idx] = np.array([]) a2[nn_idx] = np.array([]) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index b2b1d339..0b1fbd22 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -16,111 +16,59 @@ class TestStringRepr(unittest.TestCase): def test_covariance(self): self.assertEqual(remove_spaces(str(metric_learn.Covariance())), - remove_spaces("Covariance(preprocessor=None)")) + remove_spaces("Covariance()")) def test_lmnn(self): self.assertEqual( - remove_spaces(str(metric_learn.LMNN())), - remove_spaces( - "LMNN(convergence_tol=0.001, init=None, k=3, " - "learn_rate=1e-07, " - "max_iter=1000, min_iter=50, n_components=None, " - "num_dims='deprecated', preprocessor=None, random_state=None, " - "regularization=0.5, use_pca='deprecated', verbose=False)")) + remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, k=6))), + remove_spaces("LMNN(convergence_tol=0.01, k=6)")) def test_nca(self): - self.assertEqual(remove_spaces(str(metric_learn.NCA())), - remove_spaces("NCA(init=None, max_iter=100," - "n_components=None, " - "num_dims='deprecated', " - "preprocessor=None, random_state=None, " - "tol=None, verbose=False)")) + self.assertEqual(remove_spaces(str(metric_learn.NCA(max_iter=42))), + remove_spaces("NCA(max_iter=42)")) def test_lfda(self): - self.assertEqual(remove_spaces(str(metric_learn.LFDA())), - remove_spaces( - "LFDA(embedding_type='weighted', k=None, " - "n_components=None, num_dims='deprecated'," - "preprocessor=None)")) + self.assertEqual(remove_spaces(str(metric_learn.LFDA(k=2))), + remove_spaces("LFDA(k=2)")) def test_itml(self): - self.assertEqual(remove_spaces(str(metric_learn.ITML())), - remove_spaces(""" -ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0, - max_iter=1000, preprocessor=None, prior='identity', random_state=None, - verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())), - remove_spaces(""" -ITML_Supervised(A0='deprecated', bounds='deprecated', - convergence_threshold=0.001, gamma=1.0, - max_iter=1000, num_constraints=None, num_labeled='deprecated', - preprocessor=None, prior='identity', random_state=None, verbose=False) -""")) + self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))), + remove_spaces("ITML(gamma=0.5)")) + self.assertEqual( + remove_spaces(str(metric_learn.ITML_Supervised(num_constraints=7))), + remove_spaces("ITML_Supervised(num_constraints=7)")) def test_lsml(self): - self.assertEqual(remove_spaces(str(metric_learn.LSML())), - remove_spaces(""" -LSML(max_iter=1000, preprocessor=None, prior=None, - random_state=None, tol=0.001, verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())), - remove_spaces(""" -LSML_Supervised(max_iter=1000, num_constraints=None, - num_labeled='deprecated', preprocessor=None, prior=None, - random_state=None, tol=0.001, verbose=False, weights=None) -""")) + self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))), + remove_spaces("LSML(tol=0.1)")) + self.assertEqual( + remove_spaces(str(metric_learn.LSML_Supervised(verbose=True))), + remove_spaces("LSML_Supervised(verbose=True)")) def test_sdml(self): - self.assertEqual(remove_spaces(str(metric_learn.SDML())), - remove_spaces(""" -SDML(balance_param=0.5, preprocessor=None, prior=None, random_state=None, - sparsity_param=0.01, use_cov='deprecated', verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())), - remove_spaces(""" -SDML_Supervised(balance_param=0.5, num_constraints=None, - num_labeled='deprecated', preprocessor=None, prior=None, - random_state=None, sparsity_param=0.01, use_cov='deprecated', - verbose=False) -""")) + self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))), + remove_spaces("SDML(verbose=True)")) + self.assertEqual( + remove_spaces(str(metric_learn.SDML_Supervised(sparsity_param=0.5))), + remove_spaces("SDML_Supervised(sparsity_param=0.5)")) def test_rca(self): - self.assertEqual(remove_spaces(str(metric_learn.RCA())), - remove_spaces("RCA(n_components=None, " - "num_dims='deprecated', " - "pca_comps='deprecated', " - "preprocessor=None)")) - self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), - remove_spaces( - "RCA_Supervised(chunk_size=2, " - "n_components=None, num_chunks=100, " - "num_dims='deprecated', pca_comps='deprecated', " - "preprocessor=None, random_state=None)")) + self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))), + remove_spaces("RCA(n_components=3)")) + self.assertEqual( + remove_spaces(str(metric_learn.RCA_Supervised(num_chunks=5))), + remove_spaces("RCA_Supervised(num_chunks=5)")) def test_mlkr(self): - self.assertEqual(remove_spaces(str(metric_learn.MLKR())), - remove_spaces("MLKR(A0='deprecated', init=None," - "max_iter=1000, n_components=None," - "num_dims='deprecated', preprocessor=None," - "random_state=None, tol=None, " - "verbose=False)" - )) + self.assertEqual(remove_spaces(str(metric_learn.MLKR(max_iter=777))), + remove_spaces("MLKR(max_iter=777)")) def test_mmc(self): - self.assertEqual(remove_spaces(str(metric_learn.MMC())), - remove_spaces(""" -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, - preprocessor=None, random_state=None, verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())), - remove_spaces(""" -MMC_Supervised(A0='deprecated', convergence_threshold=1e-06, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, - num_constraints=None, num_labeled='deprecated', preprocessor=None, - random_state=None, verbose=False) -""")) + self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))), + remove_spaces("MMC(diagonal=True)")) + self.assertEqual( + remove_spaces(str(metric_learn.MMC_Supervised(max_iter=1))), + remove_spaces("MMC_Supervised(max_iter=1)")) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 6c71abcd..c5ca27f4 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -1,5 +1,3 @@ -from __future__ import division - from functools import partial import pytest From 43a60c9f4380448b9013efa4fb021796e7e1ff3c Mon Sep 17 00:00:00 2001 From: Gabriel Rudloff Date: Wed, 17 Jun 2020 16:34:46 -0400 Subject: [PATCH 164/210] SCML : Sparse Compositional Metric Learning (#278) * scml first commit * add scml to __init__.py * fix in components calculation * remove triplet generator, added in triplets PR * change init&fit interface, faster compute & others * added coments & docstrings, small code changes * typos and added choice of gamma & output_iter * some small improvements * lda tail handling rollback * performance improvement by precomputing rand_ints * small fix in components computation * flake8 fix * SCML_global fit fix & other small changes * Proper use of init vars and unsup bases generation * triplet dataset format & remove_y for triplets * adaptation with dataset format * remove labels for triplets and quadruplets * remove labels * remove labels & old fit random_state asignation * compliant with older numpy versions * small typo and fix order * fix n_basis check * initialize_basis_supervised and some refactoring * proper n_basis handling * scml specific tests * remove small mistake * test user input basis * Changed names and messages and some refactoring * triplets in features form passed to _fit * change indeces handlig and edge case fix * name change and typos * improve test_components_is_2D * Replace triplet_diffs option by better aproach * some comments, docstring and refactoring * fix bad triplet set * flake8 fix * SCML doc first draft * find neighbors for every class only once * improve some docstring and warnings * add sklearn compat test * changes to doc * fix and improve tests * use components_from_metric * change TestSCML to object and parametrize tests * fix test_iris * use model._authorized_basis and other fixes * verbose test * revert sum_where * small n_basis warning instead of error * add test iris on triplet_diffs * test lda & triplet_diffs * improved messages * remove quadruplets and triplets from pipeline test * test big n_features * Correct output iters * output_iter on supervised and improved verbose * flake8 fix * bases generation test comments * change big_n_basis_lda error msg * test generated n_basis and basis shape * add mini batch optimization * correct iter convention * eliminate n_samples = 1000 * batch grad refactored * adagrad adaptive learning * int input checks and tests * flake8 fix * no double division and smaller triplets arrays * minor grammar fixes * minor formatting tweaks Co-authored-by: CJ Carey --- doc/metric_learn.rst | 2 + doc/weakly_supervised.rst | 73 +++- metric_learn/__init__.py | 4 +- metric_learn/scml.py | 646 ++++++++++++++++++++++++++++++ test/metric_learn_test.py | 238 ++++++++++- test/test_base_metric.py | 18 +- test/test_mahalanobis_mixin.py | 60 +-- test/test_sklearn_compat.py | 127 +++--- test/test_triplets_classifiers.py | 7 +- test/test_utils.py | 79 ++-- 10 files changed, 1080 insertions(+), 174 deletions(-) create mode 100644 metric_learn/scml.py diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 76c91f48..8f91d91c 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -33,6 +33,7 @@ Supervised Learning Algorithms metric_learn.MMC_Supervised metric_learn.SDML_Supervised metric_learn.RCA_Supervised + metric_learn.SCML_Supervised Weakly Supervised Learning Algorithms ------------------------------------- @@ -45,6 +46,7 @@ Weakly Supervised Learning Algorithms metric_learn.LSML metric_learn.MMC metric_learn.SDML + metric_learn.SCML Unsupervised Learning Algorithms -------------------------------- diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 174d1a8b..82793b5b 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -700,6 +700,63 @@ of triplets that have the right predicted ordering. Algorithms ---------- +.. _scml: + +:py:class:`SCML ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sparse Compositional Metric Learning +(:py:class:`SCML `) + +`SCML` learns a squared Mahalanobis distance from triplet constraints by +optimizing sparse positive weights assigned to a set of :math:`K` rank-one +PSD bases. This can be formulated as an optimization problem with only +:math:`K` parameters, that can be solved with an efficient stochastic +composite scheme. + +The Mahalanobis matrix :math:`M` is built from a basis set :math:`B = \{b_i\}_{i=\{1,...,K\}}` +weighted by a :math:`K` dimensional vector :math:`w = \{w_i\}_{i=\{1,...,K\}}` as: + +.. math:: + + M = \sum_{i=1}^K w_i b_i b_i^T = B \cdot diag(w) \cdot B^T \quad w_i \geq 0 + +Learning :math:`M` in this form makes it PSD by design, as it is a +nonnegative sum of PSD matrices. The basis set :math:`B` is fixed in advance +and it is possible to construct it from the data. The optimization problem +over :math:`w` is formulated as a classic margin-based hinge loss function +involving the set :math:`C` of triplets. A regularization :math:`\ell_1` +is added to yield a sparse combination. The formulation is the following: + +.. math:: + + \min_{w\geq 0} \sum_{(x_i,x_j,x_k)\in C} [1 + d_w(x_i,x_j)-d_w(x_i,x_k)]_+ + \beta||w||_1 + +where :math:`[\cdot]_+` is the hinge loss. + +.. topic:: Example Code: + +:: + + from metric_learn import SCML + + triplets = [[[1.2, 7.5], [1.3, 1.5], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6], [5.4, 5.4]], + [[3.2, 7.5], [3.3, 1.5], [8.2, 9.7]], + [[3.3, 4.5], [5.2, 4.6], [7.4, 5.4]]] + + scml = SCML() + scml.fit(triplets) + +.. topic:: References: + + .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. + `_. \ + (AAAI), 2014. + + .. [2] Adapted from original \ + `Matlab implementation.`_. + .. _learning_on_quadruplets: @@ -829,13 +886,13 @@ extension leads to more stable estimation when the dimension is high and only a small amount of constraints is given. The loss function of each constraint -:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is +:math:`d(\mathbf{x}_i, \mathbf{x}_j) < d(\mathbf{x}_k, \mathbf{x}_l)` is denoted as: .. math:: - H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) - - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) + H(d_\mathbf{M}(\mathbf{x}_i, \mathbf{x}_j) + - d_\mathbf{M}(\mathbf{x}_k, \mathbf{x}_l)) where :math:`H(\cdot)` is the squared Hinge loss function defined as: @@ -845,8 +902,8 @@ where :math:`H(\cdot)` is the squared Hinge loss function defined as: \,\,x^2 \qquad x>0\end{aligned}\right.\\ The summed loss function :math:`L(C)` is the simple sum over all constraints -:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) -: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The +:math:`C = \{(\mathbf{x}_i , \mathbf{x}_j , \mathbf{x}_k , \mathbf{x}_l) +: d(\mathbf{x}_i , \mathbf{x}_j) < d(\mathbf{x}_k , \mathbf{x}_l)\}`. The original paper suggested here should be a weighted sum since the confidence or probability of each constraint might differ. However, for the sake of simplicity and assumption of no extra knowledge provided, we just deploy @@ -858,9 +915,9 @@ knowledge: .. math:: - \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, - \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( - \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ + \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_i, + \mathbf{x}_j, \mathbf{x}_k, \mathbf{x}_l)\in C}H(d_\mathbf{M}( + \mathbf{x}_i, \mathbf{x}_j) - d_\mathbf{M}(\mathbf{x}_k, \mathbf{x}_l))\\ where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index c9d53883..92823fb1 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -9,10 +9,12 @@ from .rca import RCA, RCA_Supervised from .mlkr import MLKR from .mmc import MMC, MMC_Supervised +from .scml import SCML, SCML_Supervised from ._version import __version__ __all__ = ['Constraints', 'Covariance', 'ITML', 'ITML_Supervised', 'LMNN', 'LSML', 'LSML_Supervised', 'SDML', 'SDML_Supervised', 'NCA', 'LFDA', 'RCA', 'RCA_Supervised', - 'MLKR', 'MMC', 'MMC_Supervised', '__version__'] + 'MLKR', 'MMC', 'MMC_Supervised', 'SCML', + 'SCML_Supervised', '__version__'] diff --git a/metric_learn/scml.py b/metric_learn/scml.py new file mode 100644 index 00000000..7bbd101a --- /dev/null +++ b/metric_learn/scml.py @@ -0,0 +1,646 @@ +""" +Sparse Compositional Metric Learning (SCML) +""" + +from __future__ import print_function, absolute_import, division +import numpy as np +from .base_metric import _TripletsClassifierMixin, MahalanobisMixin +from ._util import components_from_metric +from sklearn.base import TransformerMixin +from .constraints import Constraints +from sklearn.preprocessing import normalize +from sklearn.neighbors import NearestNeighbors +from sklearn.cluster import KMeans +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.utils import check_array, check_random_state +import warnings + + +class _BaseSCML(MahalanobisMixin): + + _tuple_size = 3 # constraints are triplets + _authorized_basis = ['triplet_diffs'] + + def __init__(self, beta=1e-5, basis='triplet_diffs', n_basis=None, + gamma=5e-3, max_iter=10000, output_iter=500, batch_size=10, + verbose=False, preprocessor=None, random_state=None): + self.beta = beta + self.basis = basis + self.n_basis = n_basis + self.gamma = gamma + self.max_iter = max_iter + self.output_iter = output_iter + self.batch_size = batch_size + self.verbose = verbose + self.preprocessor = preprocessor + self.random_state = random_state + super(_BaseSCML, self).__init__(preprocessor) + + def _fit(self, triplets, basis=None, n_basis=None): + """ + Optimization procedure to find a sparse vector of weights to + construct the metric from the basis set. This is based on the + dual averaging method. + """ + + if not isinstance(self.max_iter, int): + raise ValueError("max_iter should be an integer, instead it is of type" + " %s" % type(self.max_iter)) + if not isinstance(self.output_iter, int): + raise ValueError("output_iter should be an integer, instead it is of " + "type %s" % type(self.output_iter)) + if not isinstance(self.batch_size, int): + raise ValueError("batch_size should be an integer, instead it is of type" + " %s" % type(self.batch_size)) + + if(self.output_iter > self.max_iter): + raise ValueError("The value of output_iter must be equal or smaller than" + " max_iter.") + + # Currently prepare_inputs makes triplets contain points and not indices + triplets = self._prepare_inputs(triplets, type_of_inputs='tuples') + + # TODO: + # This algorithm is built to work with indices, but in order to be + # compliant with the current handling of inputs it is converted + # back to indices by the following function. This should be improved + # in the future. + triplets, X = self._to_index_points(triplets) + + if basis is None: + basis, n_basis = self._initialize_basis(triplets, X) + + dist_diff = self._compute_dist_diff(triplets, X, basis) + + n_triplets = triplets.shape[0] + + # weight vector + w = np.zeros((1, n_basis)) + # avarage obj gradient wrt weights + avg_grad_w = np.zeros((1, n_basis)) + + # l2 norm in time of all obj gradients wrt weights + ada_grad_w = np.zeros((1, n_basis)) + # slack for not dividing by zero + delta = 0.001 + + best_obj = np.inf + + rng = check_random_state(self.random_state) + rand_int = rng.randint(low=0, high=n_triplets, + size=(self.max_iter, self.batch_size)) + for iter in range(self.max_iter): + + idx = rand_int[iter] + + slack_val = 1 + np.matmul(dist_diff[idx, :], w.T) + slack_mask = np.squeeze(slack_val > 0, axis=1) + + grad_w = np.sum(dist_diff[idx[slack_mask], :], + axis=0, keepdims=True)/self.batch_size + avg_grad_w = (iter * avg_grad_w + grad_w) / (iter+1) + + ada_grad_w = np.sqrt(np.square(ada_grad_w) + np.square(grad_w)) + + scale_f = -(iter+1) / (self.gamma * (delta + ada_grad_w)) + + # proximal operator with negative trimming equivalent + w = scale_f * np.minimum(avg_grad_w + self.beta, 0) + + if (iter + 1) % self.output_iter == 0: + # regularization part of obj function + obj1 = np.sum(w)*self.beta + + # Every triplet distance difference in the space given by L + # plus a slack of one + slack_val = 1 + np.matmul(dist_diff, w.T) + # Mask of places with positive slack + slack_mask = slack_val > 0 + + # loss function of learning task part of obj function + obj2 = np.sum(slack_val[slack_mask])/n_triplets + + obj = obj1 + obj2 + if self.verbose: + count = np.sum(slack_mask) + print("[%s] iter %d\t obj %.6f\t num_imp %d" % + (self.__class__.__name__, (iter+1), obj, count)) + + # update the best + if obj < best_obj: + best_obj = obj + best_w = w + + if self.verbose: + print("max iteration reached.") + + # return L matrix yielded from best weights + self.n_iter_ = iter + self.components_ = self._components_from_basis_weights(basis, best_w) + + return self + + def _compute_dist_diff(self, triplets, X, basis): + """ + Helper function to compute the distance difference of every triplet in the + space yielded by the basis set. + """ + # Transformation of data by the basis set + XB = np.matmul(X, basis.T) + + n_triplets = triplets.shape[0] + # get all positive and negative pairs with lowest index first + # np.array (2*n_triplets,2) + triplets_pairs_sorted = np.sort(np.vstack((triplets[:, [0, 1]], + triplets[:, [0, 2]])), + kind='stable') + # calculate all unique pairs and their indices + uniqPairs, indices = np.unique(triplets_pairs_sorted, return_inverse=True, + axis=0) + # calculate L2 distance acording to bases only for unique pairs + dist = np.square(XB[uniqPairs[:, 0], :] - XB[uniqPairs[:, 1], :]) + + # return the diference of distances between all positive and negative + # pairs + return dist[indices[:n_triplets]] - dist[indices[n_triplets:]] + + def _components_from_basis_weights(self, basis, w): + """ + Get components matrix (L) from computed mahalanobis matrix. + """ + + # get rid of inactive bases + # TODO: Maybe have a tolerance over zero? + active_idx, = w > 0 + w = w[..., active_idx] + basis = basis[active_idx, :] + + n_basis, n_features = basis.shape + + if n_basis < n_features: # if metric is low-rank + warnings.warn("The number of bases with nonzero weight is less than the " + "number of features of the input, in consequence the " + "learned transformation reduces the dimension to %d." + % n_basis) + return np.sqrt(w.T)*basis # equivalent to np.diag(np.sqrt(w)).dot(basis) + + else: # if metric is full rank + return components_from_metric(np.matmul(basis.T, w.T*basis)) + + def _to_index_points(self, triplets): + shape = triplets.shape + X, triplets = np.unique(np.vstack(triplets), return_inverse=True, axis=0) + triplets = triplets.reshape(shape[:2]) + return triplets, X + + def _initialize_basis(self, triplets, X): + """ Checks if the basis array is well constructed or constructs it based + on one of the available options. + """ + n_features = X.shape[1] + + if isinstance(self.basis, np.ndarray): + # TODO: should copy? + basis = check_array(self.basis, copy=True) + if basis.shape[1] != n_features: + raise ValueError('The dimensionality ({}) of the provided bases must' + ' match the dimensionality of the data ' + '({}).'.format(basis.shape[1], n_features)) + elif self.basis not in self._authorized_basis: + raise ValueError( + "`basis` must be one of the options '{}' " + "or an array of shape (n_basis, n_features)." + .format("', '".join(self._authorized_basis))) + if self.basis == 'triplet_diffs': + basis, n_basis = self._generate_bases_dist_diff(triplets, X) + + return basis, n_basis + + def _generate_bases_dist_diff(self, triplets, X): + """ Constructs the basis set from the differences of positive and negative + pairs from the triplets constraints. + + The basis set is constructed iteratively by taking n_features triplets, + then adding and substracting respectively all the outerproducts of the + positive and negative pairs, and finally selecting the eigenvectors + of this matrix with positive eigenvalue. This is done until n_basis are + selected. + """ + n_features = X.shape[1] + n_triplets = triplets.shape[0] + + if self.n_basis is None: + # TODO: Get a good default n_basis directive + n_basis = n_features*80 + warnings.warn('As no value for `n_basis` was selected, the number of ' + 'basis will be set to n_basis= %d' % n_basis) + elif isinstance(self.n_basis, int): + n_basis = self.n_basis + else: + raise ValueError("n_basis should be an integer, instead it is of type %s" + % type(self.n_basis)) + + basis = np.zeros((n_basis, n_features)) + + # get all positive and negative pairs with lowest index first + # np.array (2*n_triplets,2) + triplets_pairs_sorted = np.sort(np.vstack((triplets[:, [0, 1]], + triplets[:, [0, 2]])), + kind='stable') + # calculate all unique pairs and their indices + uniqPairs, indices = np.unique(triplets_pairs_sorted, return_inverse=True, + axis=0) + # calculate differences only for unique pairs + diff = X[uniqPairs[:, 0], :] - X[uniqPairs[:, 1], :] + + diff_pos = diff[indices[:n_triplets], :] + diff_neg = diff[indices[n_triplets:], :] + + rng = check_random_state(self.random_state) + + start = 0 + finish = 0 + + while(finish != n_basis): + + # Select triplets to yield diff + + select_triplet = rng.choice(n_triplets, size=n_features, replace=False) + + # select n_features positive differences + d_pos = diff_pos[select_triplet, :] + + # select n_features negative differences + d_neg = diff_neg[select_triplet, :] + + # Yield matrix + diff_sum = d_pos.T.dot(d_pos) - d_neg.T.dot(d_neg) + + # Calculate eigenvalue and eigenvectors + w, v = np.linalg.eigh(diff_sum.T.dot(diff_sum)) + + # Add eigenvectors with positive eigenvalue to basis set + pos_eig_mask = w > 0 + start = finish + finish += pos_eig_mask.sum() + + try: + basis[start:finish, :] = v[pos_eig_mask] + except ValueError: + # if finish is greater than n_basis + basis[start:, :] = v[pos_eig_mask][:n_basis-start] + break + + # TODO: maybe add a warning in case there are no added bases, this could + # be caused by a bad triplet set. This would cause an infinite loop + + return basis, n_basis + + +class SCML(_BaseSCML, _TripletsClassifierMixin): + """Sparse Compositional Metric Learning (SCML) + + `SCML` learns an squared Mahalanobis distance from triplet constraints by + optimizing sparse positive weights assigned to a set of :math:`K` rank-one + PSD bases. This can be formulated as an optimization problem with only + :math:`K` parameters, that can be solved with an efficient stochastic + composite scheme. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + beta: float (default=1e-5) + L1 regularization parameter. + + basis : string or array-like, optional (default='triplet_diffs') + Set of bases to construct the metric. Possible options are + 'triplet_diffs', and an array-like of shape (n_basis, n_features). + + 'triplet_diffs' + The basis set is constructed from the differences between points of + `n_basis` positive or negative pairs taken from the triplets + constrains. + + array-like + A matrix of shape (n_basis, n_features), that will be used as + the basis set for the metric construction. + + n_basis : int, optional + Number of basis to be yielded. In case it is not set it will be set based + on `basis`. If no value is selected a default will be computed based on + the input. + + gamma: float (default = 5e-3) + Learning rate for the optimization algorithm. + + max_iter : int (default = 100000) + Number of iterations for the algorithm. + + output_iter : int (default = 5000) + Number of iterations to check current weights performance and output this + information in case verbose is True. + + verbose : bool, optional + If True, prints information while learning. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get triplets from indices. If array-like, + triplets will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + Attributes + ---------- + components_ : `numpy.ndarray`, shape=(n_features, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `_components_from_basis_weights`.) + + Examples + -------- + >>> from metric_learn import SCML + >>> triplets = [[[1.2, 7.5], [1.3, 1.5], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6], [5.4, 5.4]], + >>> [[3.2, 7.5], [3.3, 1.5], [8.2, 9.7]], + >>> [[3.3, 4.5], [5.2, 4.6], [7.4, 5.4]]] + >>> scml = SCML() + >>> scml.fit(triplets) + + References + ---------- + .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. + `_. \ + (AAAI), 2014. + + .. [2] Adapted from original \ + `Matlab implementation.`_. + + See Also + -------- + metric_learn.SCML_Supervised : The supervised version of the algorithm. + + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. + """ + + def fit(self, triplets): + """Learn the SCML model. + + Parameters + ---------- + triplets : array-like, shape=(n_constraints, 3, n_features) or \ + (n_constraints, 3) + 3D array-like of triplets of points or 2D array of triplets of + indicators. Triplets are assumed to be ordered such that: + d(triplets[i, 0],triplets[i, 1]) < d(triplets[i, 0], triplets[i, 2]). + + Returns + ------- + self : object + Returns the instance. + """ + + return self._fit(triplets) + + +class SCML_Supervised(_BaseSCML, TransformerMixin): + """Supervised version of Sparse Compositional Metric Learning (SCML) + + `SCML_Supervised` creates triplets by taking `k_genuine` neighbours + of the same class and `k_impostor` neighbours from different classes for each + point and then runs the SCML algorithm on these triplets. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + beta: float (default=1e-5) + L1 regularization parameter. + + basis : string or an array-like, optional (default='lda') + Set of bases to construct the metric. Possible options are + 'lda', and an array-like of shape (n_basis, n_features). + + 'lda' + The `n_basis` basis set is constructed from the LDA of significant + local regions in the feature space via clustering, for each region + center k-nearest neighbors are used to obtain the LDA scalings, + which correspond to the locally discriminative basis. + + array-like + A matrix of shape (n_basis, n_features), that will be used as + the basis set for the metric construction. + + n_basis : int, optional + Number of basis to be yielded. In case it is not set it will be set based + on `basis`. If no value is selected a default will be computed based on + the input. + + gamma: float (default = 5e-3) + Learning rate for the optimization algorithm. + + max_iter : int (default = 100000) + Number of iterations for the algorithm. + + output_iter : int (default = 5000) + Number of iterations to check current weights performance and output this + information in case verbose is True. + + verbose : bool, optional + If True, prints information while learning. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get triplets from indices. If array-like, + triplets will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + Attributes + ---------- + components_ : `numpy.ndarray`, shape=(n_features, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `_components_from_basis_weights`.) + + Examples + -------- + >>> from metric_learn import SCML + >>> triplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.1, 0.6]], + >>> [[4.5, 2.3], [2.1, 2.3], [7.3, 3.4]]]) + >>> scml = SCML(random_state=42) + >>> scml.fit(triplets) + SCML(beta=1e-5, B=None, max_iter=100000, verbose=False, + preprocessor=None, random_state=None) + + References + ---------- + .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. + `_. \ + (AAAI), 2014. + + .. [2] Adapted from original \ + `Matlab implementation.`_. + + See Also + -------- + metric_learn.SCML : The weakly supervised version of this + algorithm. + """ + # Add supervised authorized basis construction options + _authorized_basis = _BaseSCML._authorized_basis + ['lda'] + + def __init__(self, k_genuine=3, k_impostor=10, beta=1e-5, basis='lda', + n_basis=None, gamma=5e-3, max_iter=10000, output_iter=500, + batch_size=10, verbose=False, preprocessor=None, + random_state=None): + self.k_genuine = k_genuine + self.k_impostor = k_impostor + _BaseSCML.__init__(self, beta=beta, basis=basis, n_basis=n_basis, + max_iter=max_iter, output_iter=output_iter, + batch_size=batch_size, verbose=verbose, + preprocessor=preprocessor, random_state=random_state) + + def fit(self, X, y): + """Create constraints from labels and learn the SCML model. + + Parameters + ---------- + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + + y : (n) array-like + Data labels. + + Returns + ------- + self : object + Returns the instance. + """ + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) + + basis, n_basis = self._initialize_basis_supervised(X, y) + + if not isinstance(self.k_genuine, int): + raise ValueError("k_genuine should be an integer, instead it is of type" + " %s" % type(self.k_genuine)) + if not isinstance(self.k_impostor, int): + raise ValueError("k_impostor should be an integer, instead it is of " + "type %s" % type(self.k_impostor)) + + constraints = Constraints(y) + triplets = constraints.generate_knntriplets(X, self.k_genuine, + self.k_impostor) + + triplets = X[triplets] + + return self._fit(triplets, basis, n_basis) + + def _initialize_basis_supervised(self, X, y): + """ Constructs the basis set following one of the supervised options in + case one is selected. + """ + + if self.basis == 'lda': + basis, n_basis = self._generate_bases_LDA(X, y) + else: + basis, n_basis = None, None + + return basis, n_basis + + def _generate_bases_LDA(self, X, y): + """ Generates bases for the 'lda' option. + + The basis set is constructed using Linear Discriminant Analysis of + significant local regions in the feature space via clustering, for + each region center k-nearest neighbors are used to obtain the LDA scalings, + which correspond to the locally discriminative basis. Currently this is + done at two scales `k={10,20}` if `n_feature < 50` or else `k={20,50}`. + """ + + labels, class_count = np.unique(y, return_counts=True) + n_class = len(labels) + + n_features = X.shape[1] + # Number of basis yielded from each LDA + num_eig = min(n_class-1, n_features) + + if self.n_basis is None: + # TODO: Get a good default n_basis directive + n_basis = min(20*n_features, X.shape[0]*2*num_eig - 1) + warnings.warn('As no value for `n_basis` was selected, the number of ' + 'basis will be set to n_basis= %d' % n_basis) + + elif isinstance(self.n_basis, int): + n_basis = self.n_basis + else: + raise ValueError("n_basis should be an integer, instead it is of type %s" + % type(self.n_basis)) + + # Number of clusters needed for 2 scales given the number of basis + # yielded by every LDA + n_clusters = int(np.ceil(n_basis/(2 * num_eig))) + + if n_basis < n_class: + warnings.warn("The number of basis is less than the number of classes, " + "which may lead to poor discriminative performance.") + elif n_basis >= X.shape[0]*2*num_eig: + raise ValueError("Not enough samples to generate %d LDA bases, n_basis" + "should be smaller than %d" % + (n_basis, X.shape[0]*2*num_eig)) + + kmeans = KMeans(n_clusters=n_clusters, random_state=self.random_state, + algorithm='elkan').fit(X) + cX = kmeans.cluster_centers_ + + n_scales = 2 + if n_features > 50: + scales = [20, 50] + else: + scales = [10, 20] + + k_class = np.vstack((np.minimum(class_count, scales[0]), + np.minimum(class_count, scales[1]))) + + idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int), + np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int)] + + start_finish_indices = np.hstack((np.zeros((2, 1), np.int), + k_class)).cumsum(axis=1) + + neigh = NearestNeighbors() + + for c in range(n_class): + sel_c = np.where(y == labels[c]) + + # get k_class same class neighbors + neigh.fit(X=X[sel_c]) + # Only take the neighbors once for the biggest scale + neighbors = neigh.kneighbors(X=cX, n_neighbors=k_class[-1, c], + return_distance=False) + + # add index set of neighbors for every cluster center for both scales + for s, k in enumerate(k_class[:, c]): + start, finish = start_finish_indices[s, c:c+2] + idx_set[s][:, start:finish] = np.take(sel_c, neighbors[:, :k]) + + # Compute basis for every cluster in both scales + basis = np.zeros((n_basis, n_features)) + lda = LinearDiscriminantAnalysis() + start_finish_indices = np.hstack((np.vstack((0, n_clusters * num_eig)), + np.full((2, n_clusters), + num_eig))).cumsum(axis=1) + + for s in range(n_scales): + for c in range(n_clusters): + lda.fit(X[idx_set[s][c, :]], y[idx_set[s][c, :]]) + start, finish = start_finish_indices[s, c:c+2] + normalized_scalings = normalize(lda.scalings_.T) + try: + basis[start: finish, :] = normalized_scalings + except ValueError: + # handle tail + basis[start:, :] = normalized_scalings[:n_basis-start] + break + + return basis, n_basis diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index a97f6437..b6b9eea2 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -12,6 +12,7 @@ from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning from sklearn.utils.validation import check_X_y +from sklearn.preprocessing import StandardScaler try: from inverse_covariance import quic assert(quic) @@ -20,11 +21,11 @@ else: HAS_SKGGM = True from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, - RCA_Supervised, MMC_Supervised, SDML, RCA, ITML, - LSML) + SCML_Supervised, LSML_Supervised, + ITML_Supervised, SDML_Supervised, RCA_Supervised, + MMC_Supervised, SDML, RCA, ITML, LSML, SCML) # Import this specially for testing. -from metric_learn.constraints import wrap_pairs +from metric_learn.constraints import wrap_pairs, Constraints from metric_learn.lmnn import _sum_outer_products @@ -75,6 +76,235 @@ def test_singular_returns_pseudo_inverse(self): pseudo_inverse) +class TestSCML(object): + @pytest.mark.parametrize('basis', ('lda', 'triplet_diffs')) + def test_iris(self, basis): + X, y = load_iris(return_X_y=True) + scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5, + random_state=42) + scml.fit(X, y) + csep = class_separation(scml.transform(X), y) + assert csep < 0.24 + + def test_big_n_features(self): + X, y = make_classification(n_samples=100, n_classes=3, n_features=60, + n_informative=60, n_redundant=0, n_repeated=0, + random_state=42) + X = StandardScaler().fit_transform(X) + scml = SCML_Supervised(random_state=42) + scml.fit(X, y) + csep = class_separation(scml.transform(X), y) + assert csep < 0.7 + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.ones((3, 3, 3)),)), + (SCML_Supervised, (np.array([[0, 0], [0, 1], + [2, 0], [2, 1]]), + np.array([1, 0, 1, 0])))]) + def test_bad_basis(self, estimator, data): + model = estimator(basis='bad_basis') + msg = ("`basis` must be one of the options '{}' or an array of shape " + "(n_basis, n_features)." + .format("', '".join(model._authorized_basis))) + with pytest.raises(ValueError) as raised_error: + model.fit(*data) + assert msg == raised_error.value.args[0] + + def test_dimension_reduction_msg(self): + scml = SCML(n_basis=2) + triplets = np.array([[[0, 1], [2, 1], [0, 0]], + [[2, 1], [0, 1], [2, 0]], + [[0, 0], [2, 0], [0, 1]], + [[2, 0], [0, 0], [2, 1]]]) + msg = ("The number of bases with nonzero weight is less than the " + "number of features of the input, in consequence the " + "learned transformation reduces the dimension to 1.") + with pytest.warns(UserWarning) as raised_warning: + scml.fit(triplets) + assert msg == raised_warning[0].message.args[0] + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.array([[[0, 1], [2, 1], [0, 0]], + [[2, 1], [0, 1], [2, 0]], + [[0, 0], [2, 0], [0, 1]], + [[2, 0], [0, 0], [2, 1]]]),)), + (SCML_Supervised, (np.array([[0, 0], [1, 1], + [3, 3]]), + np.array([1, 2, 3])))]) + def test_n_basis_wrong_type(self, estimator, data): + n_basis = 4.0 + model = estimator(n_basis=n_basis) + msg = ("n_basis should be an integer, instead it is of type %s" + % type(n_basis)) + with pytest.raises(ValueError) as raised_error: + model.fit(*data) + assert msg == raised_error.value.args[0] + + def test_small_n_basis_lda(self): + X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) + y = np.array([0, 0, 1, 1]) + + n_class = 2 + scml = SCML_Supervised(n_basis=n_class-1) + msg = ("The number of basis is less than the number of classes, which may" + " lead to poor discriminative performance.") + with pytest.warns(UserWarning) as raised_warning: + scml.fit(X, y) + assert msg == raised_warning[0].message.args[0] + + def test_big_n_basis_lda(self): + X = np.array([[0, 0], [1, 1], [3, 3]]) + y = np.array([1, 2, 3]) + + n_class = 3 + num_eig = min(n_class - 1, X.shape[1]) + n_basis = X.shape[0] * 2 * num_eig + + scml = SCML_Supervised(n_basis=n_basis) + msg = ("Not enough samples to generate %d LDA bases, n_basis" + "should be smaller than %d" % + (n_basis, n_basis)) + with pytest.raises(ValueError) as raised_error: + scml.fit(X, y) + assert msg == raised_error.value.args[0] + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.random.rand(3, 3, 2),)), + (SCML_Supervised, (np.array([[0, 0], [0, 1], + [2, 0], [2, 1]]), + np.array([1, 0, 1, 0])))]) + def test_array_basis(self, estimator, data): + """ Test that the proper error is raised when the shape of the input basis + array is not consistent with the input + """ + basis = np.eye(3) + scml = estimator(n_basis=3, basis=basis) + + msg = ('The dimensionality ({}) of the provided bases must match the ' + 'dimensionality of the data ({}).' + .format(basis.shape[1], data[0].shape[-1])) + with pytest.raises(ValueError) as raised_error: + scml.fit(*data) + assert msg == raised_error.value.args[0] + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.array([[0, 1, 2], [0, 1, 3], [1, 0, 2], + [1, 0, 3], [2, 3, 1], [2, 3, 0], + [3, 2, 1], [3, 2, 0]]),)), + (SCML_Supervised, (np.array([0, 1, 2, 3]), + np.array([0, 0, 1, 1])))]) + def test_verbose(self, estimator, data, capsys): + # assert there is proper output when verbose = True + model = estimator(preprocessor=np.array([[0, 0], [1, 1], [2, 2], [3, 3]]), + max_iter=1, output_iter=1, batch_size=1, + basis='triplet_diffs', random_state=42, verbose=True) + model.fit(*data) + out, _ = capsys.readouterr() + expected_out = ('[%s] iter 1\t obj 0.569946\t num_imp 2\n' + 'max iteration reached.\n' % estimator.__name__) + assert out == expected_out + + def test_triplet_diffs_toy(self): + expected_n_basis = 10 + model = SCML_Supervised(n_basis=expected_n_basis) + X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) + triplets = np.array([[0, 1, 2], [0, 1, 3], [1, 0, 2], [1, 0, 3], + [2, 3, 1], [2, 3, 0], [3, 2, 1], [3, 2, 0]]) + basis, n_basis = model._generate_bases_dist_diff(triplets, X) + # All points are along the same line, so the only possible basis will be + # the vector along that line normalized. + expected_basis = np.ones((expected_n_basis, 2))/np.sqrt(2) + assert n_basis == expected_n_basis + np.testing.assert_allclose(basis, expected_basis) + + def test_lda_toy(self): + expected_n_basis = 7 + model = SCML_Supervised(n_basis=expected_n_basis) + X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) + y = np.array([0, 0, 1, 1]) + basis, n_basis = model._generate_bases_LDA(X, y) + # All points are along the same line, so the only possible basis will be + # the vector along that line normalized. In this case it is possible to + # obtain it with positive or negative orientations. + expected_basis = np.ones((expected_n_basis, 2))/np.sqrt(2) + assert n_basis == expected_n_basis + np.testing.assert_allclose(np.abs(basis), expected_basis) + + @pytest.mark.parametrize('n_samples', [100, 500]) + @pytest.mark.parametrize('n_features', [10, 50, 100]) + @pytest.mark.parametrize('n_classes', [5, 10, 15]) + def test_triplet_diffs(self, n_samples, n_features, n_classes): + X, y = make_classification(n_samples=n_samples, n_classes=n_classes, + n_features=n_features, n_informative=n_features, + n_redundant=0, n_repeated=0) + X = StandardScaler().fit_transform(X) + + model = SCML_Supervised() + constraints = Constraints(y) + triplets = constraints.generate_knntriplets(X, model.k_genuine, + model.k_impostor) + basis, n_basis = model._generate_bases_dist_diff(triplets, X) + + expected_n_basis = n_features * 80 + assert n_basis == expected_n_basis + assert basis.shape == (expected_n_basis, n_features) + + @pytest.mark.parametrize('n_samples', [100, 500]) + @pytest.mark.parametrize('n_features', [10, 50, 100]) + @pytest.mark.parametrize('n_classes', [5, 10, 15]) + def test_lda(self, n_samples, n_features, n_classes): + X, y = make_classification(n_samples=n_samples, n_classes=n_classes, + n_features=n_features, n_informative=n_features, + n_redundant=0, n_repeated=0) + X = StandardScaler().fit_transform(X) + + model = SCML_Supervised() + basis, n_basis = model._generate_bases_LDA(X, y) + + num_eig = min(n_classes - 1, n_features) + expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1) + assert n_basis == expected_n_basis + assert basis.shape == (expected_n_basis, n_features) + + @pytest.mark.parametrize('name', ['max_iter', 'output_iter', 'batch_size', + 'n_basis']) + def test_int_inputs(self, name): + value = 1.0 + d = {name: value} + scml = SCML(**d) + triplets = np.array([[[0, 1], [2, 1], [0, 0]]]) + + msg = ("%s should be an integer, instead it is of type" + " %s" % (name, type(value))) + with pytest.raises(ValueError) as raised_error: + scml.fit(triplets) + assert msg == raised_error.value.args[0] + + @pytest.mark.parametrize('name', ['max_iter', 'output_iter', 'batch_size', + 'k_genuine', 'k_impostor', 'n_basis']) + def test_int_inputs_supervised(self, name): + value = 1.0 + d = {name: value} + scml = SCML_Supervised(**d) + X = np.array([[0, 0], [1, 1], [3, 3], [4, 4]]) + y = np.array([1, 1, 0, 0]) + msg = ("%s should be an integer, instead it is of type" + " %s" % (name, type(value))) + with pytest.raises(ValueError) as raised_error: + scml.fit(X, y) + assert msg == raised_error.value.args[0] + + def test_large_output_iter(self): + scml = SCML(max_iter=1, output_iter=2) + triplets = np.array([[[0, 1], [2, 1], [0, 0]]]) + msg = ("The value of output_iter must be equal or smaller than" + " max_iter.") + + with pytest.raises(ValueError) as raised_error: + scml.fit(triplets) + assert msg == raised_error.value.args[0] + + class TestLSML(MetricTestCase): def test_iris(self): lsml = LSML_Supervised(num_constraints=200) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 0b1fbd22..fed9018a 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -5,7 +5,7 @@ import numpy as np from sklearn import clone from sklearn.utils.testing import set_random_state -from test.test_utils import ids_metric_learners, metric_learners +from test.test_utils import ids_metric_learners, metric_learners, remove_y def remove_spaces(s): @@ -83,12 +83,12 @@ def test_get_metric_is_independent_from_metric_learner(estimator, # we fit the metric learner on it and then we compute the metric on some # points - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) metric = model.get_metric() score = metric(X[0], X[1]) # then we refit the estimator on another dataset - model.fit(np.sin(input_data), labels) + model.fit(*remove_y(model, np.sin(input_data), labels)) # we recompute the distance between the two points: it should be the same score_bis = metric(X[0], X[1]) @@ -103,7 +103,7 @@ def test_get_metric_raises_error(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) metric = model.get_metric() list_test_get_metric_raises = [(X[0].tolist() + [5.2], X[1]), # vectors with @@ -126,7 +126,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) metric = model.get_metric() list_test_get_metric_doesnt_raise = [(X[0], X[1]), @@ -158,20 +158,20 @@ def test_n_components(estimator, build_dataset): if hasattr(model, 'n_components'): set_random_state(model) model.set_params(n_components=None) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert model.components_.shape == (X.shape[1], X.shape[1]) model = clone(estimator) set_random_state(model) model.set_params(n_components=X.shape[1] - 1) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert model.components_.shape == (X.shape[1] - 1, X.shape[1]) model = clone(estimator) set_random_state(model) model.set_params(n_components=X.shape[1] + 1) with pytest.raises(ValueError) as expected_err: - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert (str(expected_err.value) == 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) @@ -179,7 +179,7 @@ def test_n_components(estimator, build_dataset): set_random_state(model) model.set_params(n_components=0) with pytest.raises(ValueError) as expected_err: - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert (str(expected_err.value) == 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 91fb435f..2e3c3ef4 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -15,11 +15,12 @@ from metric_learn._util import make_context, _initialize_metric_mahalanobis from metric_learn.base_metric import (_QuadrupletsClassifierMixin, + _TripletsClassifierMixin, _PairsClassifierMixin) from metric_learn.exceptions import NonPSDError from test.test_utils import (ids_metric_learners, metric_learners, - remove_y_quadruplets, ids_classifiers) + remove_y, ids_classifiers) RNG = check_random_state(0) @@ -33,7 +34,7 @@ def test_score_pairs_pairwise(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) pairwise = model.score_pairs(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) @@ -57,7 +58,7 @@ def test_score_pairs_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.components_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - @@ -73,7 +74,7 @@ def test_score_pairs_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) assert np.isfinite(model.score_pairs(pairs)).all() @@ -87,7 +88,7 @@ def test_score_pairs_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) assert model.score_pairs(tuples).shape == (tuples.shape[0],) context = make_context(estimator) @@ -118,7 +119,7 @@ def test_embed_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) embedded_points = X.dot(model.components_.T) assert_array_almost_equal(model.transform(X), embedded_points) @@ -130,7 +131,7 @@ def test_embed_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D @@ -144,7 +145,7 @@ def test_embed_dim(estimator, build_dataset): # we test that the shape is also OK when doing dimensionality reduction if hasattr(model, 'n_components'): model.set_params(n_components=2) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: @@ -159,7 +160,7 @@ def test_embed_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert np.isfinite(model.transform(X)).all() @@ -170,7 +171,7 @@ def test_embed_is_linear(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert_array_almost_equal(model.transform(X[:10] + X[10:20]), model.transform(X[:10]) + model.transform(X[10:20])) @@ -189,7 +190,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) @@ -208,7 +209,7 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -234,7 +235,7 @@ def test_metric_raises_deprecation_warning(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) with pytest.warns(DeprecationWarning) as raised_warning: model.metric() @@ -251,7 +252,7 @@ def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) clustering = DBSCAN(metric=model.get_metric()) clustering.fit(X) @@ -264,7 +265,7 @@ def test_get_squared_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -284,26 +285,31 @@ def test_components_is_2D(estimator, build_dataset): model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert model.components_.shape == (X.shape[1], X.shape[1]) # test that it works for 1 feature trunc_data = input_data[..., :1] # we drop duplicates that might have been formed, i.e. of the form # aabc or abcc or aabb for quadruplets, and aa for pairs. + if isinstance(estimator, _QuadrupletsClassifierMixin): - for slice_idx in [slice(0, 2), slice(2, 4)]: - pairs = trunc_data[:, slice_idx, :] - diffs = pairs[:, 1, :] - pairs[:, 0, :] - to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) - trunc_data = trunc_data[to_keep] - labels = labels[to_keep] + pairs_idx = [[0, 1], [2, 3]] + elif isinstance(estimator, _TripletsClassifierMixin): + pairs_idx = [[0, 1], [0, 2]] elif isinstance(estimator, _PairsClassifierMixin): - diffs = trunc_data[:, 1, :] - trunc_data[:, 0, :] - to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) + pairs_idx = [[0, 1]] + else: + pairs_idx = [] + + for pair_idx in pairs_idx: + pairs = trunc_data[:, pair_idx, :] + diffs = pairs[:, 1, :] - pairs[:, 0, :] + to_keep = np.abs(diffs.ravel()) > 1e-9 trunc_data = trunc_data[to_keep] labels = labels[to_keep] - model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) + + model.fit(*remove_y(estimator, trunc_data, labels)) assert model.components_.shape == (1, 1) # the components must be 2D @@ -735,9 +741,9 @@ def test_deterministic_initialization(estimator, build_dataset): model.set_params(prior='random') model1 = clone(model) set_random_state(model1, 42) - model1 = model1.fit(input_data, labels) + model1 = model1.fit(*remove_y(model, input_data, labels)) model2 = clone(model) set_random_state(model2, 42) - model2 = model2.fit(input_data, labels) + model2 = model2.fit(*remove_y(model, input_data, labels)) np.testing.assert_allclose(model1.get_mahalanobis_matrix(), model2.get_mahalanobis_matrix()) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index b2056c09..7f7d7037 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -10,7 +10,8 @@ from metric_learn import (Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, - MMC_Supervised, RCA_Supervised, SDML_Supervised) + MMC_Supervised, RCA_Supervised, SDML_Supervised, + SCML_Supervised) from sklearn import clone import numpy as np from sklearn.model_selection import (cross_val_score, cross_val_predict, @@ -20,8 +21,9 @@ from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, ids_tuples_learners, pairs_learners, - ids_pairs_learners, remove_y_quadruplets, - quadruplets_learners) + ids_pairs_learners, remove_y, + metric_learners_pipeline, + ids_metric_learners_pipeline) class Stable_RCA_Supervised(RCA_Supervised): @@ -79,6 +81,9 @@ def test_sdml(self): def test_rca(self): check_estimator(Stable_RCA_Supervised) + def test_scml(self): + check_estimator(SCML_Supervised) + RNG = check_random_state(0) @@ -125,8 +130,7 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): input_variants, label_variants = generate_array_like(input_data, labels) for input_variant in input_variants: for label_variant in label_variants: - estimator.fit(*remove_y_quadruplets(estimator, input_variant, - label_variant)) + estimator.fit(*remove_y(estimator, input_variant, label_variant)) if hasattr(estimator, "predict"): estimator.predict(input_variant) if hasattr(estimator, "predict_proba"): @@ -137,8 +141,7 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): estimator.decision_function(input_variant) if hasattr(estimator, "score"): for label_variant in label_variants: - estimator.score(*remove_y_quadruplets(estimator, input_variant, - label_variant)) + estimator.score(*remove_y(estimator, input_variant, label_variant)) X_variants, _ = generate_array_like(X) for X_variant in X_variants: @@ -199,13 +202,10 @@ def test_cross_validation_is_finite(estimator, build_dataset): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) assert np.isfinite(cross_val_score(estimator, - *remove_y_quadruplets(estimator, - input_data, - labels))).all() + *remove_y(estimator, input_data, labels) + )).all() assert np.isfinite(cross_val_predict(estimator, - *remove_y_quadruplets(estimator, - input_data, - labels) + *remove_y(estimator, input_data, labels) )).all() @@ -237,28 +237,26 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(*remove_y_quadruplets(estimator, - input_data[train_mask], - y_train)) + estimator.fit(*remove_y(estimator, input_data[train_mask], y_train)) if hasattr(estimator, "score"): - scores.append(estimator.score(*remove_y_quadruplets( + scores.append(estimator.score(*remove_y( estimator, input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): assert all(scores == cross_val_score( - estimator, *remove_y_quadruplets(estimator, input_data, labels), + estimator, *remove_y(estimator, input_data, labels), cv=kfold)) if hasattr(estimator, "predict"): assert all(predictions == cross_val_predict( estimator, - *remove_y_quadruplets(estimator, input_data, labels), + *remove_y(estimator, input_data, labels), cv=kfold)) def check_score(estimator, tuples, y): if hasattr(estimator, "score"): - score = estimator.score(*remove_y_quadruplets(estimator, tuples, y)) + score = estimator.score(*remove_y(estimator, tuples, y)) assert np.isfinite(score) @@ -282,7 +280,7 @@ def test_simple_estimator(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train)) + estimator.fit(*remove_y(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test) @@ -329,62 +327,53 @@ def test_estimators_fit_returns_self(estimator, build_dataset, input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - assert estimator.fit(*remove_y_quadruplets(estimator, - input_data, - labels)) is estimator + assert estimator.fit(*remove_y(estimator, input_data, labels)) is estimator @pytest.mark.parametrize('with_preprocessor', [True, False]) -@pytest.mark.parametrize('estimator, build_dataset', metric_learners, - ids=ids_metric_learners) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners_pipeline, + ids=ids_metric_learners_pipeline) def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est - # we do this test on all except quadruplets (since they don't have a y - # in fit): - if estimator.__class__.__name__ not in [e.__class__.__name__ - for (e, _) in - quadruplets_learners]: - input_data, y, preprocessor, _ = build_dataset(with_preprocessor) - - def make_random_state(estimator, in_pipeline): - rs = {} - name_estimator = estimator.__class__.__name__ - if name_estimator[-11:] == '_Supervised': - name_param = 'random_state' - if in_pipeline: - name_param = name_estimator.lower() + '__' + name_param - rs[name_param] = check_random_state(0) - return rs - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - pipeline = make_pipeline(estimator) - estimator.fit(*remove_y_quadruplets(estimator, input_data, y), - **make_random_state(estimator, False)) - pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), - **make_random_state(estimator, True)) - - if hasattr(estimator, 'score'): - result = estimator.score(*remove_y_quadruplets(estimator, - input_data, - y)) - result_pipe = pipeline.score(*remove_y_quadruplets(estimator, - input_data, - y)) - assert_allclose_dense_sparse(result, result_pipe) + input_data, y, preprocessor, _ = build_dataset(with_preprocessor) - if hasattr(estimator, 'predict'): - result = estimator.predict(input_data) - result_pipe = pipeline.predict(input_data) - assert_allclose_dense_sparse(result, result_pipe) + def make_random_state(estimator, in_pipeline): + rs = {} + name_estimator = estimator.__class__.__name__ + if name_estimator[-11:] == '_Supervised': + name_param = 'random_state' + if in_pipeline: + name_param = name_estimator.lower() + '__' + name_param + rs[name_param] = check_random_state(0) + return rs - if issubclass(estimator.__class__, TransformerMixin): - if hasattr(estimator, 'transform'): - result = estimator.transform(input_data) - result_pipe = pipeline.transform(input_data) - assert_allclose_dense_sparse(result, result_pipe) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor, + **make_random_state(estimator, False)) + pipeline = make_pipeline(estimator) + estimator.fit(input_data, y) + estimator.set_params(preprocessor=preprocessor) + pipeline.set_params(**make_random_state(estimator, True)) + pipeline.fit(input_data, y) + + if hasattr(estimator, 'score'): + result = estimator.score(input_data, y) + result_pipe = pipeline.score(input_data, y) + assert_allclose_dense_sparse(result, result_pipe) + + if hasattr(estimator, 'predict'): + result = estimator.predict(input_data) + result_pipe = pipeline.predict(input_data) + assert_allclose_dense_sparse(result, result_pipe) + + if issubclass(estimator.__class__, TransformerMixin): + if hasattr(estimator, 'transform'): + result = estimator.transform(input_data) + result_pipe = pipeline.transform(input_data) + assert_allclose_dense_sparse(result, result_pipe) @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -398,7 +387,7 @@ def test_dict_unchanged(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "n_components"): estimator.n_components = 1 - estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) + estimator.fit(*remove_y(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( @@ -429,7 +418,7 @@ def test_dont_overwrite_parameters(estimator, build_dataset, estimator.n_components = 1 dict_before_fit = estimator.__dict__.copy() - estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) + estimator.fit(*remove_y(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [key for key in dict_after_fit.keys() diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index 8cedd8cc..10393919 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -14,7 +14,7 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, with_preprocessor): """Test that all predicted values are either +1 or -1""" - input_data, preprocessor = build_dataset(with_preprocessor) + input_data, _, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) @@ -33,7 +33,7 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with_preprocessor): """Test that a NotFittedError is raised if someone tries to predict and the metric learner has not been fitted.""" - input_data, preprocessor = build_dataset(with_preprocessor) + input_data, _, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) @@ -46,8 +46,7 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, def test_accuracy_toy_example(estimator, build_dataset): """Test that the default scoring for triplets (accuracy) works on some toy example""" - triplets, X = build_dataset(with_preprocessor=True) - triplets = X[triplets] + triplets, _, _, X = build_dataset(with_preprocessor=False) estimator = clone(estimator) set_random_state(estimator) estimator.fit(triplets) diff --git a/test/test_utils.py b/test/test_utils.py index a4cf86f4..fdcb864a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,14 +16,13 @@ from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, - Constraints) + SCML, SCML_Supervised, Constraints) from metric_learn.base_metric import (ArrayIndexer, MahalanobisMixin, _PairsClassifierMixin, _TripletsClassifierMixin, _QuadrupletsClassifierMixin) from metric_learn.exceptions import PreprocessorError, NonPSDError from sklearn.datasets import make_regression, make_blobs, load_iris -from metric_learn.lsml import _BaseLSML SEED = 42 @@ -92,25 +91,10 @@ def build_triplets(with_preprocessor=False): triplets = constraints.generate_knntriplets(X, k_genuine=3, k_impostor=4) if with_preprocessor: # if preprocessor, we build a 2D array of triplets of indices - return triplets, X + return Dataset(triplets, np.ones(len(triplets)), X, np.arange(len(X))) else: # if not, we build a 3D array of triplets of samples - return X[triplets], None - - -class mock_triplet_LSML(_BaseLSML, _TripletsClassifierMixin): - # Mock Triplet learner from LSML which is a quadruplets learner - # in order to test TripletClassifierMixin basic methods - - _tuple_size = 4 - - def fit(self, triplets, weights=None): - quadruplets = triplets[:, [0, 1, 0, 2]] - return self._fit(quadruplets, weights=weights) - - def decision_function(self, triplets): - self._tuple_size = 3 - return _TripletsClassifierMixin.decision_function(self, triplets) + return Dataset(X[triplets], np.ones(len(triplets)), None, X) def build_quadruplets(with_preprocessor=False): @@ -133,7 +117,7 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) -triplets_learners = [(mock_triplet_LSML(), build_triplets)] +triplets_learners = [(SCML(), build_triplets)] ids_triplets_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in triplets_learners])) @@ -155,7 +139,8 @@ def build_quadruplets(with_preprocessor=False): (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), - build_classification)] + build_classification), + (SCML_Supervised(), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) @@ -165,10 +150,12 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, + _TripletsClassifierMixin, _QuadrupletsClassifierMixin) -tuples_learners = pairs_learners + quadruplets_learners -ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners +tuples_learners = pairs_learners + triplets_learners + quadruplets_learners +ids_tuples_learners = ids_pairs_learners + ids_triplets_learners \ + + ids_quadruplets_learners supervised_learners = classifiers + regressors ids_supervised_learners = ids_classifiers + ids_regressors @@ -176,14 +163,17 @@ def build_quadruplets(with_preprocessor=False): metric_learners = tuples_learners + supervised_learners ids_metric_learners = ids_tuples_learners + ids_supervised_learners +metric_learners_pipeline = pairs_learners + supervised_learners +ids_metric_learners_pipeline = ids_pairs_learners + ids_supervised_learners + -def remove_y_quadruplets(estimator, X, y): - """Quadruplets learners have no y in fit, but to write test for all - estimators, it is convenient to have this function, that will return X and y - if the estimator needs a y to fit on, and just X otherwise.""" +def remove_y(estimator, X, y): + """Quadruplets and triplets learners have no y in fit, but to write test for + all estimators, it is convenient to have this function, that will return X + and y if the estimator needs a y to fit on, and just X otherwise.""" + no_y_fit = quadruplets_learners + triplets_learners if estimator.__class__.__name__ in [e.__class__.__name__ - for (e, _) in - quadruplets_learners]: + for (e, _) in no_y_fit]: return (X,) else: return (X, y) @@ -831,13 +821,12 @@ def test_error_message_tuple_size(estimator, _): per tuple, it throws an error message""" estimator = clone(estimator) set_random_state(estimator) - invalid_pairs = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], - [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) + invalid_pairs = np.ones((2, 5, 2)) y = [1, 1] with pytest.raises(ValueError) as raised_err: - estimator.fit(*remove_y_quadruplets(estimator, invalid_pairs, y)) - expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 3 " - "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" + estimator.fit(*remove_y(estimator, invalid_pairs, y)) + expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 5 " + "element(s) instead (shape=(2, 5, 2)):\ninput={}.\n" .format(estimator._tuple_size, make_context(estimator), invalid_pairs)) assert str(raised_err.value) == expected_msg @@ -911,35 +900,21 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): dataset_formed.data, random_state=SEED) - def make_random_state(estimator): - rs = {} - if estimator.__class__.__name__[-11:] == '_Supervised': - rs['random_state'] = check_random_state(SEED) - return rs - estimator_with_preprocessor = clone(estimator) set_random_state(estimator_with_preprocessor) estimator_with_preprocessor.set_params(preprocessor=X) - estimator_with_preprocessor.fit(*remove_y_quadruplets(estimator, - indices_train, - y_train), - **make_random_state(estimator)) + estimator_with_preprocessor.fit(*remove_y(estimator, indices_train, y_train)) estimator_without_preprocessor = clone(estimator) set_random_state(estimator_without_preprocessor) estimator_without_preprocessor.set_params(preprocessor=None) - estimator_without_preprocessor.fit(*remove_y_quadruplets(estimator, - formed_train, - y_train), - **make_random_state(estimator)) + estimator_without_preprocessor.fit(*remove_y(estimator, formed_train, + y_train)) estimator_with_prep_formed = clone(estimator) set_random_state(estimator_with_prep_formed) estimator_with_prep_formed.set_params(preprocessor=X) - estimator_with_prep_formed.fit(*remove_y_quadruplets(estimator, - indices_train, - y_train), - **make_random_state(estimator)) + estimator_with_prep_formed.fit(*remove_y(estimator, indices_train, y_train)) # test prediction methods for method in ["predict", "decision_function"]: From 6f783de0254727a144c535056fe9e0022e0ce5bf Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Fri, 26 Jun 2020 17:34:47 +0200 Subject: [PATCH 165/210] =?UTF-8?q?[MRG+2]=C2=A0Update=20the=20repo=20for?= =?UTF-8?q?=20release=20(#295)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Get rid of deprecations * Some more modifications * more modifs and fixes * remove flake8 error * some fixes * fix * Fixes * Fixes * Add SCML and change version number * add warning for scml, and fix doc generation * small fix * fix --- README.rst | 1 + doc/conf.py | 4 +- doc/modules.rst | 7 + doc/supervised.rst | 2 +- doc/weakly_supervised.rst | 6 +- examples/plot_metric_learning_examples.py | 2 +- metric_learn/_version.py | 2 +- metric_learn/base_metric.py | 10 - metric_learn/itml.py | 68 +-- metric_learn/lfda.py | 13 +- metric_learn/lmnn.py | 51 +- metric_learn/lsml.py | 65 +-- metric_learn/mlkr.py | 50 +- metric_learn/mmc.py | 85 +--- metric_learn/nca.py | 39 +- metric_learn/rca.py | 68 +-- metric_learn/scml.py | 8 + metric_learn/sdml.py | 86 +--- test/metric_learn_test.py | 553 +--------------------- test/test_components_metric_conversion.py | 7 +- test/test_mahalanobis_mixin.py | 18 - test/test_sklearn_compat.py | 8 +- 22 files changed, 93 insertions(+), 1060 deletions(-) create mode 100644 doc/modules.rst diff --git a/README.rst b/README.rst index ceb2eb33..20850964 100644 --- a/README.rst +++ b/README.rst @@ -11,6 +11,7 @@ metric-learn contains efficient Python implementations of several popular superv - Information Theoretic Metric Learning (ITML) - Sparse Determinant Metric Learning (SDML) - Least Squares Metric Learning (LSML) +- Sparse Compositional Metric Learning (SCML) - Neighborhood Components Analysis (NCA) - Local Fisher Discriminant Analysis (LFDA) - Relative Components Analysis (RCA) diff --git a/doc/conf.py b/doc/conf.py index 796b7861..b6408d31 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -24,8 +24,8 @@ u'Bellet and Nathalie Vauquier') author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and ' u'Nathalie Vauquier') -version = '0.5.0' -release = '0.5.0' +version = '0.6.0' +release = '0.6.0' language = 'en' exclude_patterns = ['_build'] diff --git a/doc/modules.rst b/doc/modules.rst new file mode 100644 index 00000000..55d5ad40 --- /dev/null +++ b/doc/modules.rst @@ -0,0 +1,7 @@ +metric_learn +============ + +.. toctree:: + :maxdepth: 4 + + metric_learn diff --git a/doc/supervised.rst b/doc/supervised.rst index fc77287b..1b1180e9 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -50,7 +50,7 @@ classes will be large. To do so, we fit the metric learner (example: >>> from metric_learn import NCA >>> nca = NCA(random_state=42) >>> nca.fit(X, y) -NCA(init=None, max_iter=100, n_components=None, num_dims='deprecated', +NCA(init='auto', max_iter=100, n_components=None, preprocessor=None, random_state=42, tol=None, verbose=False) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 82793b5b..174210b8 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -135,7 +135,7 @@ are respected. >>> mmc = MMC(random_state=42) >>> mmc.fit(tuples, y) MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, + diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) Or alternatively (using a preprocessor): @@ -250,8 +250,8 @@ tuples). >>> y_pairs = np.array([1, -1]) >>> mmc = MMC(random_state=42) >>> mmc.fit(pairs, y_pairs) -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, preprocessor=None, +MMC(convergence_threshold=0.001, diagonal=False, + diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) Here, we learned a metric that puts the two first points closer diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 014d9af3..71229554 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -289,7 +289,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`LFDA # ` -lfda = metric_learn.LFDA(k=2, num_dims=2) +lfda = metric_learn.LFDA(k=2, n_components=2) X_lfda = lfda.fit_transform(X, y) plot_tsne(X_lfda, y) diff --git a/metric_learn/_version.py b/metric_learn/_version.py index 2b8877c5..ef7eb44d 100644 --- a/metric_learn/_version.py +++ b/metric_learn/_version.py @@ -1 +1 @@ -__version__ = '0.5.0' +__version__ = '0.6.0' diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index d1af0821..721d7ba0 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -9,7 +9,6 @@ import numpy as np from abc import ABCMeta, abstractmethod from ._util import ArrayIndexer, check_input, validate_vector -import warnings class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta): @@ -285,15 +284,6 @@ def metric_fun(u, v, squared=False): get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__ - def metric(self): - """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix` - instead""" - # TODO: remove this method in version 0.6.0 - warnings.warn(("`metric` is deprecated since version 0.5.0 and will be " - "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."), - DeprecationWarning) - return self.get_mahalanobis_matrix() - def get_mahalanobis_matrix(self): """Returns a copy of the Mahalanobis matrix learned by the metric learner. diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 48d5a222..43872b60 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -2,9 +2,7 @@ Information Theoretic Metric Learning (ITML) """ -import warnings import numpy as np -from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_array from sklearn.base import TransformerMixin @@ -19,23 +17,17 @@ class _BaseITML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - prior='identity', A0='deprecated', verbose=False, + prior='identity', verbose=False, preprocessor=None, random_state=None): self.gamma = gamma self.max_iter = max_iter self.convergence_threshold = convergence_threshold self.prior = prior - self.A0 = A0 self.verbose = verbose self.random_state = random_state super(_BaseITML, self).__init__(preprocessor) def _fit(self, pairs, y, bounds=None): - if self.A0 != 'deprecated': - warnings.warn('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.', - DeprecationWarning) pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # init bounds @@ -155,11 +147,6 @@ class ITML(_BaseITML, _PairsClassifierMixin): (n_features, n_features), that will be used as such to set the prior. - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional (default=False) If True, prints information while learning @@ -276,21 +263,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin): convergence_threshold : float, optional (default=1e-3) Tolerance of the optimization procedure. - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints : int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. - bounds : Not used - .. deprecated:: 0.5.0 - `bounds` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Set `bounds` at fit time instead : - `itml_supervised.fit(X, y, bounds=...)` - prior : string or numpy array, optional (default='identity') Initialization of the Mahalanobis matrix. Possible options are 'identity', 'covariance', 'random', and a numpy array of shape @@ -313,11 +289,6 @@ class ITML_Supervised(_BaseITML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional (default=False) If True, prints information while learning @@ -368,18 +339,15 @@ class ITML_Supervised(_BaseITML, TransformerMixin): """ def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3, - num_labeled='deprecated', num_constraints=None, - bounds='deprecated', prior='identity', A0='deprecated', + num_constraints=None, prior='identity', verbose=False, preprocessor=None, random_state=None): _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, - A0=A0, prior=prior, verbose=verbose, + prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled self.num_constraints = num_constraints - self.bounds = bounds - def fit(self, X, y, random_state='deprecated', bounds=None): + def fit(self, X, y, bounds=None): """Create constraints from labels and learn the ITML model. @@ -391,12 +359,6 @@ def fit(self, X, y, random_state='deprecated', bounds=None): y : (n) array-like Data labels. - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `ITML_Supervised` object). - bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` @@ -406,28 +368,6 @@ def fit(self, X, y, random_state='deprecated', bounds=None): set to the 5th and 95th percentile of the pairwise distances among all points in the training data `X`. """ - # TODO: remove these in v0.6.0 - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if self.bounds != 'deprecated': - warnings.warn('"bounds" parameter from initialization is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use the "bounds" parameter of this ' - 'fit method instead.', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `ITML_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 2feed169..bfa3275e 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -27,11 +27,6 @@ class LFDA(MahalanobisMixin, TransformerMixin): n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - k : int, optional (default=None) Number of nearest neighbors used in local scaling method. If None, defaults to min(7, n_features - 1). @@ -81,12 +76,11 @@ class LFDA(MahalanobisMixin, TransformerMixin): -discriminant-analysis-on-beer-style-clustering.html#>`_. ''' - def __init__(self, n_components=None, num_dims='deprecated', + def __init__(self, n_components=None, k=None, embedding_type='weighted', preprocessor=None): if embedding_type not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid embedding_type: %r' % embedding_type) self.n_components = n_components - self.num_dims = num_dims self.embedding_type = embedding_type self.k = k super(LFDA, self).__init__(preprocessor) @@ -102,11 +96,6 @@ def fit(self, X, y): y : (n,) array-like Class labels, one per point of data. ''' - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) unique_classes, y = np.unique(y, return_inverse=True) n, d = X.shape diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 12eb5ab1..8bdc4bf0 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -2,9 +2,7 @@ Large Margin Nearest Neighbor Metric learning (LMNN) """ import numpy as np -import warnings from collections import Counter -from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin @@ -25,12 +23,10 @@ class LMNN(MahalanobisMixin, TransformerMixin): Parameters ---------- - init : None, string or numpy array, optional (default=None) + init : string or numpy array, optional (default='auto') Initialization of the linear transformation. Possible options are 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, and - stays to its default value None, in v0.5.0). + (n_features_a, n_features_b). 'auto' Depending on ``n_components``, the most reasonable initialization @@ -83,11 +79,6 @@ class LMNN(MahalanobisMixin, TransformerMixin): Tolerance of the optimization procedure. If the objective value varies less than `tol`, we consider the algorithm has converged and stop it. - use_pca : Not used - .. deprecated:: 0.5.0 - `use_pca` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - verbose : bool, optional (default=False) Whether to print the progress of the optimization procedure. @@ -102,11 +93,6 @@ class LMNN(MahalanobisMixin, TransformerMixin): n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random @@ -142,10 +128,10 @@ class LMNN(MahalanobisMixin, TransformerMixin): 2005. """ - def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, + def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, - use_pca='deprecated', verbose=False, preprocessor=None, - n_components=None, num_dims='deprecated', random_state=None): + verbose=False, preprocessor=None, + n_components=None, random_state=None): self.init = init self.k = k self.min_iter = min_iter @@ -153,24 +139,12 @@ def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, self.learn_rate = learn_rate self.regularization = regularization self.convergence_tol = convergence_tol - self.use_pca = use_pca self.verbose = verbose self.n_components = n_components - self.num_dims = num_dims self.random_state = random_state super(LMNN, self).__init__(preprocessor) def fit(self, X, y): - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) - if self.use_pca != 'deprecated': - warnings.warn('"use_pca" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0.', - DeprecationWarning) k = self.k reg = self.regularization learn_rate = self.learn_rate @@ -184,20 +158,7 @@ def fit(self, X, y): raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) - # if the init is the default (None), we raise a warning - if self.init is None: - # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous identity matrix. If you still want to use the identity " - "matrix as before, set init='identity'. This warning " - "will disappear in v0.6.0, and `init` parameter's default value " - "will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'auto' - else: - init = self.init - self.components_ = _initialize_components(output_dim, X, y, init, + self.components_ = _initialize_components(output_dim, X, y, self.init, self.verbose, random_state=self.random_state) required_k = np.bincount(label_inds).min() diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 0cf9dc22..28f65ce7 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -2,11 +2,9 @@ Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML) """ -import warnings import numpy as np import scipy.linalg from sklearn.base import TransformerMixin -from sklearn.exceptions import ChangedBehaviorWarning from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints @@ -17,7 +15,7 @@ class _BaseLSML(MahalanobisMixin): _tuple_size = 4 # constraints are quadruplets - def __init__(self, tol=1e-3, max_iter=1000, prior=None, + def __init__(self, tol=1e-3, max_iter=1000, prior='identity', verbose=False, preprocessor=None, random_state=None): self.prior = prior self.tol = tol @@ -40,21 +38,8 @@ def _fit(self, quadruplets, weights=None): else: self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 - # if the prior is the default (None), we raise a warning - if self.prior is None: - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - warnings.warn(msg, ChangedBehaviorWarning) - prior = 'identity' - else: - prior = self.prior M, prior_inv = _initialize_metric_mahalanobis( - quadruplets, prior, + quadruplets, self.prior, return_inverse=True, strict_pd=True, matrix_name='prior', random_state=self.random_state) @@ -137,13 +122,11 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Parameters ---------- - prior : None, string or numpy array, optional (default=None) + prior : string or numpy array, optional (default='identity') Prior to set for the metric. Possible options are 'identity', 'covariance', 'random', and a numpy array of shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + positive definite (PD). 'identity' An identity matrix of shape (n_features, n_features). @@ -256,13 +239,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): max_iter : int, optional (default=1000) Number of maximum iterations of the optimization procedure. - prior : None, string or numpy array, optional (default=None) + prior : string or numpy array, optional (default='identity') Prior to set for the metric. Possible options are 'identity', 'covariance', 'random', and a numpy array of shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + positive definite (PD). 'identity' An identity matrix of shape (n_features, n_features). @@ -280,11 +261,6 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. @@ -326,17 +302,16 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): metric (See function `components_from_metric`.) """ - def __init__(self, tol=1e-3, max_iter=1000, prior=None, - num_labeled='deprecated', num_constraints=None, weights=None, + def __init__(self, tol=1e-3, max_iter=1000, prior='identity', + num_constraints=None, weights=None, verbose=False, preprocessor=None, random_state=None): _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled self.num_constraints = num_constraints self.weights = weights - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the LSML model. Parameters @@ -346,29 +321,7 @@ def fit(self, X, y, random_state='deprecated'): y : (n) array-like Data labels. - - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `LSML_Supervised` object). """ - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `LSML_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 9b84dba8..01d185e7 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -8,7 +8,7 @@ from scipy.optimize import minimize from scipy.special import logsumexp from sklearn.base import TransformerMixin -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import pairwise_distances from .base_metric import MahalanobisMixin @@ -32,17 +32,10 @@ class MLKR(MahalanobisMixin, TransformerMixin): n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - init : None, string or numpy array, optional (default=None) + init : string or numpy array, optional (default='auto') Initialization of the linear transformation. Possible options are 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). + (n_features_a, n_features_b). 'auto' Depending on ``n_components``, the most reasonable initialization @@ -70,11 +63,6 @@ class MLKR(MahalanobisMixin, TransformerMixin): :meth:`fit` and n_features_a must be less than or equal to that. If ``n_components`` is not None, n_features_a must match it. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - tol : float, optional (default=None) Convergence tolerance for the optimization. @@ -120,13 +108,11 @@ class MLKR(MahalanobisMixin, TransformerMixin): /weinberger07a.pdf>`_. AISTATS 2007. """ - def __init__(self, n_components=None, num_dims='deprecated', init=None, - A0='deprecated', tol=None, max_iter=1000, verbose=False, + def __init__(self, n_components=None, init='auto', + tol=None, max_iter=1000, verbose=False, preprocessor=None, random_state=None): self.n_components = n_components - self.num_dims = num_dims self.init = init - self.A0 = A0 self.tol = tol self.max_iter = max_iter self.verbose = verbose @@ -142,18 +128,6 @@ def fit(self, X, y): X : (n x d) array of samples y : (n) data labels """ - if self.A0 != 'deprecated': - warnings.warn('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.', - DeprecationWarning) - - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) - X, y = self._prepare_inputs(X, y, y_numeric=True, ensure_min_samples=2) n, d = X.shape @@ -166,19 +140,7 @@ def fit(self, X, y): if m is None: m = d # if the init is the default (None), we raise a warning - if self.init is None: - # TODO: - # replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of 'pca'. " - "If you still want to use PCA as an init, set init='pca'. " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'auto' - else: - init = self.init - A = _initialize_components(m, X, y, init=init, + A = _initialize_components(m, X, y, init=self.init, random_state=self.random_state, # MLKR works on regression targets: has_classes=False) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 330e2113..e4f89cfe 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -1,9 +1,7 @@ """Mahalanobis Metric for Clustering (MMC)""" -import warnings import numpy as np from sklearn.base import TransformerMixin from sklearn.utils.validation import assert_all_finite -from sklearn.exceptions import ChangedBehaviorWarning from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs @@ -15,14 +13,13 @@ class _BaseMMC(MahalanobisMixin): _tuple_size = 2 # constraints are pairs def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, - init=None, A0='deprecated', diagonal=False, + init='identity', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, random_state=None): self.max_iter = max_iter self.max_proj = max_proj self.convergence_threshold = convergence_threshold self.init = init - self.A0 = A0 self.diagonal = diagonal self.diagonal_c = diagonal_c self.verbose = verbose @@ -30,30 +27,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, super(_BaseMMC, self).__init__(preprocessor) def _fit(self, pairs, y): - if self.A0 != 'deprecated': - warnings.warn('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.', - DeprecationWarning) pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') - if self.init is None: - # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'identity', instead of the " - "identity divided by a scaling factor of 10. " - "If you still want to use the same init as in previous " - "versions, set init=np.eye(d)/10, where d is the dimension " - "of your input space (d=pairs.shape[1]). " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'identity' - else: - init = self.init - - self.A_ = _initialize_metric_mahalanobis(pairs, init, + self.A_ = _initialize_metric_mahalanobis(pairs, self.init, random_state=self.random_state, matrix_name='init') @@ -358,12 +335,10 @@ class MMC(_BaseMMC, _PairsClassifierMixin): convergence_threshold : float, optional (default=1e-3) Convergence threshold for the optimization procedure. - init : None, string or numpy array, optional (default=None) + init : string or numpy array, optional (default='identity') Initialization of the Mahalanobis matrix. Possible options are 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). + shape (n_features, n_features). 'identity' An identity matrix of shape (n_features, n_features). @@ -381,11 +356,6 @@ class MMC(_BaseMMC, _PairsClassifierMixin): An SPD matrix of shape (n_features, n_features), that will be used as such to initialize the metric. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional (default=False) If True, a diagonal metric will be learned, i.e., a simple scaling of dimensions. The initialization will then @@ -502,21 +472,14 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): convergence_threshold : float, optional (default=1e-3) Convergence threshold for the optimization procedure. - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. - init : None, string or numpy array, optional (default=None) + init : string or numpy array, optional (default='identity') Initialization of the Mahalanobis matrix. Possible options are 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). + shape (n_features, n_features). 'identity' An identity matrix of shape (n_features, n_features). @@ -533,11 +496,6 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): A numpy array of shape (n_features, n_features), that will be used as such to initialize the metric. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional (default=False) If True, a diagonal metric will be learned, i.e., a simple scaling of dimensions. The initialization will then @@ -581,18 +539,17 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): """ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_labeled='deprecated', num_constraints=None, init=None, - A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, + num_constraints=None, init='identity', + diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, random_state=None): _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, convergence_threshold=convergence_threshold, - init=init, A0=A0, diagonal=diagonal, + init=init, diagonal=diagonal, diagonal_c=diagonal_c, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the MMC model. Parameters @@ -602,29 +559,7 @@ def fit(self, X, y, random_state='deprecated'): y : (n) array-like Data labels. - - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `MMC_Supervised` object). """ - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `MMC_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 217d7d28..7b4423d3 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -9,7 +9,7 @@ from scipy.optimize import minimize from scipy.special import logsumexp from sklearn.base import TransformerMixin -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import pairwise_distances from ._util import _initialize_components, _check_n_components @@ -32,12 +32,10 @@ class NCA(MahalanobisMixin, TransformerMixin): Parameters ---------- - init : None, string or numpy array, optional (default=None) + init : string or numpy array, optional (default='auto') Initialization of the linear transformation. Possible options are 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). + (n_features_a, n_features_b). 'auto' Depending on ``n_components``, the most reasonable initialization @@ -77,11 +75,6 @@ class NCA(MahalanobisMixin, TransformerMixin): n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - max_iter : int, optional (default=100) Maximum number of iterations done by the optimization algorithm. @@ -128,12 +121,11 @@ class NCA(MahalanobisMixin, TransformerMixin): `_ """ - def __init__(self, init=None, n_components=None, num_dims='deprecated', + def __init__(self, init='auto', n_components=None, max_iter=100, tol=None, verbose=False, preprocessor=None, random_state=None): self.n_components = n_components self.init = init - self.num_dims = num_dims self.max_iter = max_iter self.tol = tol self.verbose = verbose @@ -145,11 +137,6 @@ def fit(self, X, y): X: data matrix, (n x d) y: scalar labels, (n) """ - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) X, labels = self._prepare_inputs(X, y, ensure_min_samples=2) n, d = X.shape n_components = _check_n_components(d, self.n_components) @@ -158,22 +145,8 @@ def fit(self, X, y): train_time = time.time() # Initialize A - # if the init is the default (None), we raise a warning - if self.init is None: - # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous scaling matrix. If you still want to use the same " - "scaling matrix as before, set " - "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)" - ", EPS))). This warning will disappear in v0.6.0, and `init` " - "parameter's default value will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'auto' - else: - init = self.init - A = _initialize_components(n_components, X, labels, init, self.verbose, - self.random_state) + A = _initialize_components(n_components, X, labels, self.init, + self.verbose, self.random_state) # Run NCA mask = labels[:, np.newaxis] == labels[np.newaxis, :] diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 2004b9d4..34f7f3ff 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -5,7 +5,6 @@ import numpy as np import warnings from sklearn.base import TransformerMixin -from sklearn.exceptions import ChangedBehaviorWarning from ._util import _check_n_components from .base_metric import MahalanobisMixin @@ -43,16 +42,6 @@ class RCA(MahalanobisMixin, TransformerMixin): n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - pca_comps : Not used - .. deprecated:: 0.5.0 - `pca_comps` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. @@ -82,11 +71,8 @@ class RCA(MahalanobisMixin, TransformerMixin): The learned linear transformation ``L``. """ - def __init__(self, n_components=None, num_dims='deprecated', - pca_comps='deprecated', preprocessor=None): + def __init__(self, n_components=None, preprocessor=None): self.n_components = n_components - self.num_dims = num_dims - self.pca_comps = pca_comps super(RCA, self).__init__(preprocessor) def _check_dimension(self, rank, X): @@ -115,29 +101,8 @@ def fit(self, X, chunks): When ``chunks[i] == -1``, point i doesn't belong to any chunklet. When ``chunks[i] == j``, point i belongs to chunklet j. """ - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) - - if self.pca_comps != 'deprecated': - warnings.warn( - '"pca_comps" parameter is not used. ' - 'It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' - 'you still want to do it, you could use ' - '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.', - DeprecationWarning) - X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) - warnings.warn( - "RCA will no longer center the data before training. If you want " - "to do some preprocessing, you should do it manually (you can also " - "use an `sklearn.pipeline.Pipeline` for instance). This warning " - "will disappear in version 0.6.0.", ChangedBehaviorWarning) - chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(X, chunks) @@ -177,11 +142,6 @@ class RCA_Supervised(RCA): n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - num_chunks: int, optional (default=100) Number of chunks to generate. @@ -212,17 +172,15 @@ class RCA_Supervised(RCA): The learned linear transformation ``L``. """ - def __init__(self, num_dims='deprecated', n_components=None, - pca_comps='deprecated', num_chunks=100, chunk_size=2, + def __init__(self, n_components=None, num_chunks=100, chunk_size=2, preprocessor=None, random_state=None): """Initialize the supervised version of `RCA`.""" - RCA.__init__(self, num_dims=num_dims, n_components=n_components, - pca_comps=pca_comps, preprocessor=preprocessor) + RCA.__init__(self, n_components=n_components, preprocessor=preprocessor) self.num_chunks = num_chunks self.chunk_size = chunk_size self.random_state = random_state - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. @@ -232,25 +190,7 @@ def fit(self, X, y, random_state='deprecated'): each row corresponds to a single instance y : (n) data labels - - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `RCA_Supervised` object). """ - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `RCA_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, diff --git a/metric_learn/scml.py b/metric_learn/scml.py index 7bbd101a..c3fde272 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -308,6 +308,10 @@ class SCML(_BaseSCML, _TripletsClassifierMixin): Read more in the :ref:`User Guide `. + .. warning:: + SCML is still a bit experimental, don't hesitate to report if + something fails/doesn't work as expected. + Parameters ---------- beta: float (default=1e-5) @@ -413,6 +417,10 @@ class SCML_Supervised(_BaseSCML, TransformerMixin): Read more in the :ref:`User Guide `. + .. warning:: + SCML is still a bit experimental, don't hesitate to report if + something fails/doesn't work as expected. + Parameters ---------- beta: float (default=1e-5) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index f7c801e8..a0736ffa 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -7,7 +7,7 @@ from sklearn.base import TransformerMixin from scipy.linalg import pinvh from sklearn.covariance import graphical_lasso -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +from sklearn.exceptions import ConvergenceWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs @@ -24,23 +24,17 @@ class _BaseSDML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, - use_cov='deprecated', verbose=False, preprocessor=None, + def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity', + verbose=False, preprocessor=None, random_state=None): self.balance_param = balance_param self.sparsity_param = sparsity_param self.prior = prior - self.use_cov = use_cov self.verbose = verbose self.random_state = random_state super(_BaseSDML, self).__init__(preprocessor) def _fit(self, pairs, y): - if self.use_cov != 'deprecated': - warnings.warn('"use_cov" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.', - DeprecationWarning) if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") @@ -52,23 +46,8 @@ def _fit(self, pairs, y): # set up (the inverse of) the prior M # if the prior is the default (None), we raise a warning - if self.prior is None: - # TODO: - # replace prior=None by prior='identity' in v0.6.0 and remove the - # warning - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - warnings.warn(msg, ChangedBehaviorWarning) - prior = 'identity' - else: - prior = self.prior _, prior_inv = _initialize_metric_mahalanobis( - pairs, prior, + pairs, self.prior, return_inverse=True, strict_pd=True, matrix_name='prior', random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] @@ -147,13 +126,11 @@ class SDML(_BaseSDML, _PairsClassifierMixin): sparsity_param : float, optional (default=0.01) Trade off between optimizer and sparseness (see graph_lasso). - prior : None, string or numpy array, optional (default=None) + prior : string or numpy array, optional (default='identity') Prior to set for the metric. Possible options are 'identity', 'covariance', 'random', and a numpy array of shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + positive definite (PD). 'identity' An identity matrix of shape (n_features, n_features). @@ -171,11 +148,6 @@ class SDML(_BaseSDML, _PairsClassifierMixin): (n_features, n_features), that will be used as such to set the prior. - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional (default=False) If True, prints information while learning. @@ -268,13 +240,11 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): sparsity_param : float, optional (default=0.01) Trade off between optimizer and sparseness (see graph_lasso). - prior : None, string or numpy array, optional (default=None) + prior : string or numpy array, optional (default='identity') Prior to set for the metric. Possible options are 'identity', 'covariance', 'random', and a numpy array of shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + positive definite (PD). 'identity' An identity matrix of shape (n_features, n_features). @@ -292,16 +262,6 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints : int, optional (default=None) Number of constraints to generate. If None, defaults to `20 * num_classes**2`. @@ -332,18 +292,16 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): that describes the supervised version of weakly supervised estimators. """ - def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, - use_cov='deprecated', num_labeled='deprecated', + def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity', num_constraints=None, verbose=False, preprocessor=None, random_state=None): _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, - use_cov=use_cov, verbose=verbose, + verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the SDML model. Parameters @@ -354,33 +312,11 @@ def fit(self, X, y, random_state='deprecated'): y : array-like, shape (n,) data labels, one for each instance - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `SDML_Supervised` object). - Returns ------- self : object Returns the instance. """ - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `SDML_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index b6b9eea2..4db0a1fc 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -10,7 +10,7 @@ from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_allclose) from sklearn.utils.testing import assert_warns_message -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y from sklearn.preprocessing import StandardScaler try: @@ -23,7 +23,7 @@ from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, SCML_Supervised, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, - MMC_Supervised, SDML, RCA, ITML, LSML, SCML) + MMC_Supervised, SDML, RCA, ITML, SCML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs, Constraints from metric_learn.lmnn import _sum_outer_products @@ -313,74 +313,6 @@ def test_iris(self): csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised() - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lsml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.], [-5., 3.], [5., 0.]], - [[0., 50.], [0., -60], [-10., 0.], [10., 0.]]]) - lsml = LSML() - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lsml.fit(pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised() - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `LSML_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - lsml_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised() - msg = ('As of v0.5.0, `LSML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lsml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - class TestITML(MetricTestCase): def test_iris(self): @@ -390,83 +322,6 @@ def test_iris(self): csep = class_separation(itml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) - - def test_deprecation_bounds(self): - # test that a deprecation message is thrown if bounds is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised(bounds=None) - msg = ('"bounds" parameter from initialization is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use the "bounds" parameter of this ' - 'fit method instead.') - assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) - - def test_deprecation_A0(self): - # test that a deprecation message is thrown if A0 is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised(A0=np.ones_like(X)) - msg = ('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - itml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - itml = ITML(A0=np.ones_like(X)) - with pytest.warns(DeprecationWarning) as raised_warning: - itml.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised() - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `ITML_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - itml_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised() - msg = ('As of v0.5.0, `ITML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - itml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -557,35 +412,6 @@ def grad(x): np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) np.testing.assert_almost_equal(rel_diff, 0., decimal=5) - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lmnn = LMNN(k=2) - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous identity matrix. If you still want to use the identity " - "matrix as before, set init='identity'. This warning " - "will disappear in v0.6.0, and `init` parameter's default value " - "will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lmnn.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_use_pca(self): - # test that a DeprecationWarning is thrown about use_pca, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lmnn = LMNN(k=2, use_pca=True) - msg = ('"use_pca" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0.') - assert_warns_message(DeprecationWarning, msg, lmnn.fit, X, y) - def test_loss_func(capsys): """Test the loss function (and its gradient) on a simple example, @@ -803,8 +629,7 @@ def test_sdml_supervised_raises_warning_msg_not_installed_skggm(self): # load_iris: dataset where we know scikit-learn's graphical lasso fails # with a Floating Point error X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True, - sparsity_param=0.01) + sdml_supervised = SDML_Supervised(balance_param=0.5, sparsity_param=0.01) msg = ("There was a problem in SDML when using scikit-learn's graphical " "lasso solver. skggm's graphical lasso can sometimes converge on " "non SPD cases where scikit-learn's graphical lasso fails to " @@ -907,24 +732,12 @@ def test_iris(self): rs = np.random.RandomState(5555) sdml = SDML_Supervised(num_constraints=1500, prior='identity', - balance_param=5e-5) - sdml.fit(self.iris_points, self.iris_labels, random_state=rs) + balance_param=5e-5, random_state=rs) + sdml.fit(self.iris_points, self.iris_labels) csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.22) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42) - sdml_supervised = SDML_Supervised(num_labeled=np.inf, prior='identity', - balance_param=5e-5) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) - def test_sdml_raises_warning_non_psd(self): """Tests that SDML raises a warning on a toy example where we know the pseudo-covariance matrix is not PSD""" @@ -967,83 +780,6 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): random_state=np.random.RandomState(42)) sdml.fit(X, y) - def test_deprecation_use_cov(self): - # test that a deprecation message is thrown if use_cov is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - sdml_supervised = SDML_Supervised(use_cov=np.ones_like(X), - balance_param=1e-5) - msg = ('"use_cov" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - sdml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - sdml = SDML(use_cov=np.ones_like(X), balance_param=1e-5) - with pytest.warns(DeprecationWarning) as raised_warning: - sdml.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used (except for the balance_param that we need - # to set for the algorithm to not diverge) - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - sdml_supervised = SDML_Supervised(balance_param=1e-5) - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - sdml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - sdml = SDML(balance_param=1e-5) - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - sdml.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=5e-5) - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `SDML_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - sdml_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=5e-5) - msg = ('As of v0.5.0, `SDML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - sdml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - @pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' @@ -1209,40 +945,6 @@ def test_one_class(self): nca.fit(X, y) assert_array_equal(nca.components_, A) - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA() - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous scaling matrix. If you still want to use the same " - "scaling matrix as before, set " - "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)" - ", EPS))). This warning will disappear in v0.6.0, and `init` " - "parameter's default value will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - nca.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_nca(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - nca.fit(X, y) - assert (str(raised_warning[0].message) == msg) - class TestLFDA(MetricTestCase): def test_iris(self): @@ -1256,22 +958,6 @@ def test_iris(self): self.assertEqual(lfda.components_.shape, (2, 4)) -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_lfda(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lfda = LFDA(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - lfda.fit(X, y) - assert (str(raised_warning[0].message) == msg) - - class TestRCA(MetricTestCase): def test_iris(self): rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) @@ -1279,46 +965,6 @@ def test_iris(self): csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.29) - def test_deprecation_pca_comps(self): - # test that a deprecation message is thrown if pca_comps is set at - # initialization - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(pca_comps=X.shape[1], num_chunks=20) - msg = ('"pca_comps" parameter is not used. ' - 'It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' - 'you still want to do it, you could use ' - '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.') - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca_supervised.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - - rca = RCA(pca_comps=X.shape[1]) - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - - def test_changedbehaviorwarning_preprocessing(self): - # test that a ChangedBehaviorWarning is thrown when using RCA - # TODO: remove in v.0.6 - - msg = ("RCA will no longer center the data before training. If you want " - "to do some preprocessing, you should do it manually (you can also " - "use an `sklearn.pipeline.Pipeline` for instance). This warning " - "will disappear in version 0.6.0.") - - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(num_chunks=20) - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca_supervised.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - - rca = RCA() - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a warning message advising to use PCA""" @@ -1338,35 +984,6 @@ def test_rank_deficient_returns_warning(self): rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(num_chunks=20) - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `RCA_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - rca_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(num_chunks=20) - msg = ('As of v0.5.0, `RCA_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - rca_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - def test_unknown_labels(self): n = 200 num_chunks = 50 @@ -1403,30 +1020,6 @@ def test_bad_parameters(self): assert any(str(w.message) == msg for w in raised_warning) -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_rca(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X, y = load_iris(return_X_y=True) - rca = RCA(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - rca.fit(X, y) - assert any(str(w.message) == msg for w in raised_warning) - - # we take a small number of chunks so that RCA works on iris - rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - rca_supervised.fit(X, y) - assert any(str(w.message) == msg for w in raised_warning) - - class TestMLKR(MetricTestCase): def test_iris(self): mlkr = MLKR() @@ -1457,52 +1050,6 @@ def grad_fn(M): rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M)) np.testing.assert_almost_equal(rel_diff, 0.) - def test_deprecation_A0(self): - # test that a deprecation message is thrown if A0 is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mlkr = MLKR(A0=np.ones_like(X)) - msg = ('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - mlkr.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([0.1, 0.2, 0.3, 0.4]) - mlkr = MLKR() - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of 'pca'. " - "If you still want to use PCA as an init, set init='pca'. " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mlkr.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_mlkr(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mlkr = MLKR(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - mlkr.fit(X, y) - assert (str(raised_warning[0].message) == msg) - class TestMMC(MetricTestCase): def test_iris(self): @@ -1543,96 +1090,6 @@ def test_iris(self): csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) - - def test_deprecation_A0(self): - # test that a deprecation message is thrown if A0 is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised(A0=np.ones_like(X)) - msg = ('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - mmc_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - mmc = MMC(A0=np.ones_like(X)) - with pytest.warns(DeprecationWarning) as raised_warning: - mmc.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised() - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'identity', instead of the " - "identity divided by a scaling factor of 10. " - "If you still want to use the same init as in previous " - "versions, set init=np.eye(d)/10, where d is the dimension " - "of your input space (d=pairs.shape[1]). " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mmc_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - mmc = MMC() - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mmc.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised() - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `MMC_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - mmc_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised() - msg = ('As of v0.5.0, `MMC_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mmc_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index d1e2acf4..b9da87ed 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -37,8 +37,8 @@ def test_lsml_supervised(self): def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - itml.fit(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml.fit(self.X, self.y) L = itml.components_ assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix()) @@ -70,9 +70,8 @@ def test_lfda(self): assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): - seed = np.random.RandomState(1234) rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - rca.fit(self.X, self.y, random_state=seed) + rca.fit(self.X, self.y) L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 2e3c3ef4..ab7e972d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -226,24 +226,6 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): np.isclose(metric(a, c), metric(a, b) + metric(b, c), rtol=1e-20)) -@pytest.mark.parametrize('estimator, build_dataset', metric_learners, - ids=ids_metric_learners) -def test_metric_raises_deprecation_warning(estimator, build_dataset): - """assert that a deprecation warning is raised if someones wants to call - the `metric` function""" - # TODO: remove this method in version 0.6.0 - input_data, labels, _, X = build_dataset() - model = clone(estimator) - set_random_state(model) - model.fit(*remove_y(estimator, input_data, labels)) - - with pytest.warns(DeprecationWarning) as raised_warning: - model.metric() - assert (str(raised_warning[0].message) == - ("`metric` is deprecated since version 0.5.0 and will be removed " - "in 0.6.0. Use `get_mahalanobis_matrix` instead.")) - - @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 7f7d7037..e18eb7f4 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -28,23 +28,23 @@ class Stable_RCA_Supervised(RCA_Supervised): - def __init__(self, n_components=None, pca_comps=None, + def __init__(self, n_components=None, chunk_size=2, preprocessor=None, random_state=None): # this init makes RCA stable for scikit-learn examples. super(Stable_RCA_Supervised, self).__init__( - num_chunks=2, n_components=n_components, pca_comps=pca_comps, + num_chunks=2, n_components=n_components, chunk_size=chunk_size, preprocessor=preprocessor, random_state=random_state) class Stable_SDML_Supervised(SDML_Supervised): - def __init__(self, sparsity_param=0.01, num_labeled='deprecated', + def __init__(self, sparsity_param=0.01, num_constraints=None, verbose=False, preprocessor=None, random_state=None): # this init makes SDML stable for scikit-learn examples. super(Stable_SDML_Supervised, self).__init__( - sparsity_param=sparsity_param, num_labeled=num_labeled, + sparsity_param=sparsity_param, num_constraints=num_constraints, verbose=verbose, preprocessor=preprocessor, balance_param=1e-5, prior='identity', random_state=random_state) From 899ef47889426cc2a6ffa606ba43b892af7b48da Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Sun, 28 Jun 2020 20:07:06 +0200 Subject: [PATCH 166/210] Remove modules.rst added by mistake --- doc/modules.rst | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 doc/modules.rst diff --git a/doc/modules.rst b/doc/modules.rst deleted file mode 100644 index 55d5ad40..00000000 --- a/doc/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -metric_learn -============ - -.. toctree:: - :maxdepth: 4 - - metric_learn From 49b811d61b152eb0762135476c2853d13521bdac Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Tue, 30 Jun 2020 20:00:49 +0200 Subject: [PATCH 167/210] replace +1 by inf (#297) --- metric_learn/mmc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index e4f89cfe..1ff30b1e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -204,7 +204,7 @@ def _fit_diag(self, pairs, y): obj = (np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(neg_pairs, w_tmp)) assert_all_finite(obj) - obj_previous = obj + 1 # just to get the while-loop started + obj_previous = np.inf # just to get the while-loop started inner_it = 0 while obj < obj_previous: From a3435f3523c0dc8328ea49f4853101b0c1acecc2 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 1 Jul 2020 10:05:31 +0200 Subject: [PATCH 168/210] Update 2019 to 2020 --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index b6408d31..eac09b38 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,7 +20,7 @@ # General information about the project. project = u'metric-learn' -copyright = (u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien ' +copyright = (u'2015-2020, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien ' u'Bellet and Nathalie Vauquier') author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and ' u'Nathalie Vauquier') From 3e1af680085f52bb972a4866665eb25ead2ca9b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Wed, 1 Jul 2020 11:50:48 +0200 Subject: [PATCH 169/210] fix dependencies doc and add pointer to v0.5.0 for earlier Python versions (#298) --- README.rst | 3 ++- doc/getting_started.rst | 7 ++++--- doc/introduction.rst | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 20850964..ff770932 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,8 @@ metric-learn contains efficient Python implementations of several popular superv **Dependencies** -- Python 3.6+ +- Python 3.6+ (the last version supporting Python 2 and Python 3.5 was + `v0.5.0 `_) - numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index f1b35b4f..44fd1436 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -17,15 +17,16 @@ metric-learn can be installed in either of the following ways: **Dependencies** -- Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn>=0.20.3 +- Python 3.6+ (the last version supporting Python 2 and Python 3.5 was + `v0.5.0 `_) +- numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** - For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). ``pip install 'git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8'`` to install the required version of skggm from GitHub. -- For running the examples only: matplotlib +- For running the examples only: matplotlib Quick start =========== diff --git a/doc/introduction.rst b/doc/introduction.rst index 04ae1a18..7d9f52d0 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -96,7 +96,7 @@ examples (for code illustrating some of these use-cases, see the metric learning provides a way to bias the clusters found by algorithms like K-Means towards the intended semantics. - Information retrieval: the learned metric can be used to retrieve the - elements of a database that are semantically closer to a query element. + elements of a database that are semantically closest to a query element. - Dimensionality reduction: metric learning may be seen as a way to reduce the data dimension in a (weakly) supervised setting. - More generally, the learned transformation :math:`L` can be used to project From d218d78365b8355ef5beadf126a6ed5accb6277f Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 2 Jul 2020 14:13:07 +0200 Subject: [PATCH 170/210] [MRG] Add python requires plus message for old pips (#299) * Add python requires plus message for old pips * Update install requires for scikit-learn --- metric_learn/_version.py | 2 +- setup.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/metric_learn/_version.py b/metric_learn/_version.py index ef7eb44d..8411e551 100644 --- a/metric_learn/_version.py +++ b/metric_learn/_version.py @@ -1 +1 @@ -__version__ = '0.6.0' +__version__ = '0.6.1' diff --git a/setup.py b/setup.py index 1e6f0002..8677e7bf 100755 --- a/setup.py +++ b/setup.py @@ -3,6 +3,32 @@ from setuptools import setup import os import io +import sys + + +CURRENT_PYTHON = sys.version_info[:2] +REQUIRED_PYTHON = (3, 6) + +# This check and everything above must remain compatible with Python 2.7. +if CURRENT_PYTHON < REQUIRED_PYTHON: + sys.stderr.write(""" +========================== +Unsupported Python version +========================== +This version of metric-learn requires Python {}.{}, but you're trying to +install it on Python {}.{}. +This may be because you are using a version of pip that doesn't +understand the python_requires classifier. Make sure you +have pip >= 9.0 and setuptools >= 24.2, then try again: + $ python -m pip install --upgrade pip setuptools + $ python -m pip install django +This will install the latest version of metric-learn which works on your +version of Python. If you can't upgrade your pip (or Python), request +an older version of metric-learn: + $ python -m pip install "metric-learn<0.6.0" +""".format(*(REQUIRED_PYTHON + CURRENT_PYTHON))) + sys.exit(1) + version = {} with io.open(os.path.join('metric_learn', '_version.py')) as fp: @@ -16,6 +42,7 @@ version=version['__version__'], description='Python implementations of metric learning algorithms', long_description=long_description, + python_requires='>={}.{}'.format(*REQUIRED_PYTHON), author=[ 'CJ Carey', 'Yuan Tang', @@ -38,7 +65,7 @@ install_requires=[ 'numpy', 'scipy', - 'scikit-learn', + 'scikit-learn>=0.20.3', ], extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], From 9a6fe8aa34b888a63e79fec537614a7d74572678 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 2 Jul 2020 14:54:13 +0200 Subject: [PATCH 171/210] Add the right version number everywhere (#300) --- doc/conf.py | 4 ++-- metric_learn/_version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index eac09b38..812cc68a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -24,8 +24,8 @@ u'Bellet and Nathalie Vauquier') author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and ' u'Nathalie Vauquier') -version = '0.6.0' -release = '0.6.0' +version = '0.6.2' +release = '0.6.2' language = 'en' exclude_patterns = ['_build'] diff --git a/metric_learn/_version.py b/metric_learn/_version.py index 8411e551..aece342d 100644 --- a/metric_learn/_version.py +++ b/metric_learn/_version.py @@ -1 +1 @@ -__version__ = '0.6.1' +__version__ = '0.6.2' From 730abf1790ec15e117e2cf33132d612373b5d0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Thu, 30 Jul 2020 15:11:36 +0200 Subject: [PATCH 172/210] Update paper ref after JMLR acceptance (#301) * update paper ref * wrong space --- README.rst | 13 ++++++++----- doc/index.rst | 13 ++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index ff770932..41181ee8 100644 --- a/README.rst +++ b/README.rst @@ -49,17 +49,20 @@ If you use metric-learn in a scientific publication, we would appreciate citations to the following paper: `metric-learn: Metric Learning Algorithms in Python -`_, de Vazelhes -*et al.*, arXiv:1908.04710, 2019. +`_, de Vazelhes +*et al.*, Journal of Machine Learning Research, 21(138):1-6, 2020. Bibtex entry:: - @techreport{metric-learn, + @article{metric-learn, title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython}, author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien}, - institution = {arXiv:1908.04710}, - year = {2019} + journal = {Journal of Machine Learning Research}, + year = {2020}, + volume = {21}, + number = {138}, + pages = {1--6} } .. _sphinx documentation: http://contrib.scikit-learn.org/metric-learn/ diff --git a/doc/index.rst b/doc/index.rst index 8f000246..6ec4fb26 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -15,17 +15,20 @@ If you use metric-learn in a scientific publication, we would appreciate citations to the following paper: `metric-learn: Metric Learning Algorithms in Python -`_, de Vazelhes -*et al.*, arXiv:1908.04710, 2019. +`_, de Vazelhes +*et al.*, Journal of Machine Learning Research, 21(138):1-6, 2020. Bibtex entry:: - @techreport{metric-learn, + @article{metric-learn, title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython}, author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien}, - institution = {arXiv:1908.04710}, - year = {2019} + journal = {Journal of Machine Learning Research}, + year = {2020}, + volume = {21}, + number = {138}, + pages = {1--6} } From 86a5208fc2c4f9f850ea1047d49ea6afec9b8292 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 30 Jul 2020 16:00:34 +0200 Subject: [PATCH 173/210] Fix matplotlib agg messages --- doc/conf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index 812cc68a..94263c7a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import sys import os +import warnings extensions = [ 'sphinx.ext.autodoc', @@ -80,3 +81,9 @@ def setup(app): app.add_javascript('js/copybutton.js') app.add_stylesheet("basic.css") + + +# Remove matplotlib agg warnings from generated doc when using plt.show +warnings.filterwarnings("ignore", category=UserWarning, + message='Matplotlib is currently using agg, which is a' + ' non-GUI backend, so cannot show the figure.') From 617adaa2cfd176e58efd6e5fe8147185c7d337cd Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Mon, 3 Aug 2020 15:08:33 +0200 Subject: [PATCH 174/210] [MRG] Fix test for components_from_metric and add tests for _check_sdp_from_eigen (#303) * Fix test for components_from_metric and add tests for _check_sdp_from_eigen * Fix trailing whitespace * Remove unused LinAlgError Co-authored-by: William de Vazelhes 80055062 --- test/test_components_metric_conversion.py | 10 ++--- test/test_utils.py | 47 +++++++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index b9da87ed..04d0d007 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -1,7 +1,6 @@ import unittest import numpy as np import pytest -from numpy.linalg import LinAlgError from scipy.stats import ortho_group from sklearn.datasets import load_iris from numpy.testing import assert_array_almost_equal, assert_allclose @@ -117,17 +116,14 @@ def test_components_from_metric_edge_cases(self): L = components_from_metric(M) assert_allclose(L.T.dot(L), M) - # matrix with a determinant still high but which should be considered as a - # non-definite matrix (to check we don't test the definiteness with the - # determinant which is a bad strategy) + # matrix with a determinant still high but which is + # undefinite w.r.t to numpy standards M = np.diag([1e5, 1e5, 1e5, 1e5, 1e5, 1e5, 1e-20]) M = P.dot(M).dot(P.T) assert np.abs(np.linalg.det(M)) > 10 assert np.linalg.slogdet(M)[1] > 1 # (just to show that the computed # determinant is far from null) - with pytest.raises(LinAlgError) as err_msg: - np.linalg.cholesky(M) - assert str(err_msg.value) == 'Matrix is not positive definite' + assert np.linalg.matrix_rank(M) < M.shape[0] # (just to show that this case is indeed considered by numpy as an # indefinite case) L = components_from_metric(M) diff --git a/test/test_utils.py b/test/test_utils.py index fdcb864a..9b8b5e7e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1055,6 +1055,53 @@ def test__check_sdp_from_eigen_returns_definiteness(w, is_definite): assert _check_sdp_from_eigen(w) == is_definite +@pytest.mark.unit +@pytest.mark.parametrize('w, tol, is_definite', + [(np.array([5., 3.]), 2, True), + (np.array([5., 1.]), 2, False), + (np.array([5., -1.]), 2, False)]) +def test__check_sdp_from_eigen_tol_psd(w, tol, is_definite): + """Tests that _check_sdp_from_eigen, for PSD matrices, returns + False if an eigenvalue is lower than tol""" + assert _check_sdp_from_eigen(w, tol=tol) == is_definite + + +@pytest.mark.unit +@pytest.mark.parametrize('w, tol', + [(np.array([5., -3.]), 2), + (np.array([1., -3.]), 2)]) +def test__check_sdp_from_eigen_tol_non_psd(w, tol): + """Tests that _check_sdp_from_eigen raises a NonPSDError + when there is a negative value with abs value higher than tol""" + with pytest.raises(NonPSDError): + _check_sdp_from_eigen(w, tol=tol) + + +@pytest.mark.unit +@pytest.mark.parametrize('w, is_definite', + [(np.array([1e5, 1e5, 1e5, 1e5, + 1e5, 1e5, 1e-20]), False), + (np.array([1e-10, 1e-10]), True)]) +def test__check_sdp_from_eigen_tol_default_psd(w, is_definite): + """Tests that the default tol argument gives good results for edge cases + like even if the determinant is high but clearly one eigenvalue is low, + (undefinite so returns False) or when all eigenvalues are low (definite so + returns True)""" + assert _check_sdp_from_eigen(w, tol=None) == is_definite + + +@pytest.mark.unit +@pytest.mark.parametrize('w', + [np.array([1., -1.]), + np.array([-1e-10, 1e-10])]) +def test__check_sdp_from_eigen_tol_default_non_psd(w): + """Tests that the default tol argument is good for raising + NonPSDError, e.g. that when a value is clearly relatively + negative it raises such an error""" + with pytest.raises(NonPSDError): + _check_sdp_from_eigen(w, tol=None) + + def test__check_n_components(): """Checks that n_components returns what is expected (including the errors)""" From eb878d6f42bb71f8f9ef7ac16eed108c9c3b6ecf Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 3 Aug 2020 17:03:45 -0400 Subject: [PATCH 175/210] Resolve a numpy deprecation warning --- test/test_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9b8b5e7e..570b8578 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -931,16 +931,17 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): assert np.array(output_with_prep == output_with_prep_formed).all() # test score_pairs + idx1 = np.array([[0, 2],[5, 3]], dtype=int) output_with_prep = estimator_with_preprocessor.score_pairs( - indicators_to_transform[[[[0, 2], [5, 3]]]]) + indicators_to_transform[idx1]) output_without_prep = estimator_without_preprocessor.score_pairs( - formed_points_to_transform[[[[0, 2], [5, 3]]]]) + formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() output_with_prep = estimator_with_preprocessor.score_pairs( - indicators_to_transform[[[[0, 2], [5, 3]]]]) + indicators_to_transform[idx1]) output_without_prep = estimator_with_prep_formed.score_pairs( - formed_points_to_transform[[[[0, 2], [5, 3]]]]) + formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() # test transform From 66a12edbfba74774d84e6cd0f4f0fd5d438a79f8 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 3 Aug 2020 21:35:39 -0400 Subject: [PATCH 176/210] Fix CI build - flake8 E231 (#304) --- test/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 570b8578..5ddf3d71 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -931,7 +931,7 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): assert np.array(output_with_prep == output_with_prep_formed).all() # test score_pairs - idx1 = np.array([[0, 2],[5, 3]], dtype=int) + idx1 = np.array([[0, 2], [5, 3]], dtype=int) output_with_prep = estimator_with_preprocessor.score_pairs( indicators_to_transform[idx1]) output_without_prep = estimator_without_preprocessor.score_pairs( From 7eef7c6f9f376ad6f482369519e04d9062adc31d Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 15 Apr 2021 18:15:51 +0200 Subject: [PATCH 177/210] [MRG+2] Update repo to work with both new and old scikit-learn (#313) * Update repo to work with both new and old scikit-learn, and add a travis job to test the old scikit-learn * Add random seed for test * fix str representation for various sklearn versions * fix flake8 error * add more samples to the test for robustness * add comment for additional travis test * change str to dict for simplicity * simplify conditional imports with sklearn_shims file * remove typo added blankline * empty commit Co-authored-by: William de Vazelhes 80055062 --- .travis.yml | 13 +++ metric_learn/sklearn_shims.py | 27 +++++ test/metric_learn_test.py | 2 +- test/test_base_metric.py | 122 +++++++++++++++++++--- test/test_components_metric_conversion.py | 2 +- test/test_mahalanobis_mixin.py | 2 +- test/test_pairs_classifiers.py | 2 +- test/test_quadruplets_classifiers.py | 2 +- test/test_sklearn_compat.py | 36 +++---- test/test_triplets_classifiers.py | 2 +- test/test_utils.py | 2 +- 11 files changed, 170 insertions(+), 42 deletions(-) create mode 100644 metric_learn/sklearn_shims.py diff --git a/.travis.yml b/.travis.yml index d294c294..03ba1706 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,6 +39,19 @@ matrix: - pytest test --cov; after_success: - bash <(curl -s https://codecov.io/bash) + - name: "Pytest python 3.6 with skggm + scikit-learn 0.20.3" + # checks that tests work for the oldest supported scikit-learn version + python: "3.6" + before_install: + - sudo apt-get install liblapack-dev + - pip install --upgrade pip pytest + - pip install wheel cython numpy scipy codecov pytest-cov + - pip install scikit-learn==0.20.3 + - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; + script: + - pytest test --cov; + after_success: + - bash <(curl -s https://codecov.io/bash) - name: "Syntax checking with flake8" python: "3.7" before_install: diff --git a/metric_learn/sklearn_shims.py b/metric_learn/sklearn_shims.py new file mode 100644 index 00000000..654a9144 --- /dev/null +++ b/metric_learn/sklearn_shims.py @@ -0,0 +1,27 @@ +"""This file is for fixing imports due to different APIs +depending on the scikit-learn version""" +import sklearn +from packaging import version +SKLEARN_AT_LEAST_0_22 = (version.parse(sklearn.__version__) + >= version.parse('0.22.0')) +if SKLEARN_AT_LEAST_0_22: + from sklearn.utils._testing import (set_random_state, + assert_warns_message, + ignore_warnings, + assert_allclose_dense_sparse, + _get_args) + from sklearn.utils.estimator_checks import (_is_public_parameter + as is_public_parameter) + from sklearn.metrics._scorer import get_scorer +else: + from sklearn.utils.testing import (set_random_state, + assert_warns_message, + ignore_warnings, + assert_allclose_dense_sparse, + _get_args) + from sklearn.utils.estimator_checks import is_public_parameter + from sklearn.metrics.scorer import get_scorer + +__all__ = ['set_random_state', 'assert_warns_message', 'set_random_state', + 'ignore_warnings', 'assert_allclose_dense_sparse', '_get_args', + 'is_public_parameter', 'get_scorer'] diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 4db0a1fc..5cae80f2 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -9,7 +9,7 @@ make_spd_matrix) from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_allclose) -from sklearn.utils.testing import assert_warns_message +from metric_learn.sklearn_shims import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y from sklearn.preprocessing import StandardScaler diff --git a/test/test_base_metric.py b/test/test_base_metric.py index fed9018a..67f9b6a0 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -4,71 +4,161 @@ import metric_learn import numpy as np from sklearn import clone -from sklearn.utils.testing import set_random_state from test.test_utils import ids_metric_learners, metric_learners, remove_y +from metric_learn.sklearn_shims import set_random_state, SKLEARN_AT_LEAST_0_22 def remove_spaces(s): return re.sub(r'\s+', '', s) +def sk_repr_kwargs(def_kwargs, nndef_kwargs): + """Given the non-default arguments, and the default + keywords arguments, build the string that will appear + in the __repr__ of the estimator, depending on the + version of scikit-learn. + """ + if SKLEARN_AT_LEAST_0_22: + def_kwargs = {} + def_kwargs.update(nndef_kwargs) + args_str = ",".join(f"{key}={repr(value)}" + for key, value in def_kwargs.items()) + return args_str + + class TestStringRepr(unittest.TestCase): def test_covariance(self): + def_kwargs = {'preprocessor': None} + nndef_kwargs = {} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.Covariance())), - remove_spaces("Covariance()")) + remove_spaces(f"Covariance({merged_kwargs})")) def test_lmnn(self): + def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'k': 3, + 'learn_rate': 1e-07, 'max_iter': 1000, 'min_iter': 50, + 'n_components': None, 'preprocessor': None, + 'random_state': None, 'regularization': 0.5, + 'verbose': False} + nndef_kwargs = {'convergence_tol': 0.01, 'k': 6} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, k=6))), - remove_spaces("LMNN(convergence_tol=0.01, k=6)")) + remove_spaces(f"LMNN({merged_kwargs})")) def test_nca(self): + def_kwargs = {'init': 'auto', 'max_iter': 100, 'n_components': None, + 'preprocessor': None, 'random_state': None, 'tol': None, + 'verbose': False} + nndef_kwargs = {'max_iter': 42} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.NCA(max_iter=42))), - remove_spaces("NCA(max_iter=42)")) + remove_spaces(f"NCA({merged_kwargs})")) def test_lfda(self): + def_kwargs = {'embedding_type': 'weighted', 'k': None, + 'n_components': None, 'preprocessor': None} + nndef_kwargs = {'k': 2} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.LFDA(k=2))), - remove_spaces("LFDA(k=2)")) + remove_spaces(f"LFDA({merged_kwargs})")) def test_itml(self): + def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0, + 'max_iter': 1000, 'preprocessor': None, + 'prior': 'identity', 'random_state': None, 'verbose': False} + nndef_kwargs = {'gamma': 0.5} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))), - remove_spaces("ITML(gamma=0.5)")) + remove_spaces(f"ITML({merged_kwargs})")) + def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0, + 'max_iter': 1000, 'num_constraints': None, + 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'verbose': False} + nndef_kwargs = {'num_constraints': 7} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( remove_spaces(str(metric_learn.ITML_Supervised(num_constraints=7))), - remove_spaces("ITML_Supervised(num_constraints=7)")) + remove_spaces(f"ITML_Supervised({merged_kwargs})")) def test_lsml(self): + def_kwargs = {'max_iter': 1000, 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'tol': 0.001, 'verbose': False} + nndef_kwargs = {'tol': 0.1} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))), - remove_spaces("LSML(tol=0.1)")) + remove_spaces(f"LSML({merged_kwargs})")) + def_kwargs = {'max_iter': 1000, 'num_constraints': None, + 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'tol': 0.001, 'verbose': False, + 'weights': None} + nndef_kwargs = {'verbose': True} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( remove_spaces(str(metric_learn.LSML_Supervised(verbose=True))), - remove_spaces("LSML_Supervised(verbose=True)")) + remove_spaces(f"LSML_Supervised({merged_kwargs})")) def test_sdml(self): + def_kwargs = {'balance_param': 0.5, 'preprocessor': None, + 'prior': 'identity', 'random_state': None, + 'sparsity_param': 0.01, 'verbose': False} + nndef_kwargs = {'verbose': True} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))), - remove_spaces("SDML(verbose=True)")) + remove_spaces(f"SDML({merged_kwargs})")) + def_kwargs = {'balance_param': 0.5, 'num_constraints': None, + 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'sparsity_param': 0.01, + 'verbose': False} + nndef_kwargs = {'sparsity_param': 0.5} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( remove_spaces(str(metric_learn.SDML_Supervised(sparsity_param=0.5))), - remove_spaces("SDML_Supervised(sparsity_param=0.5)")) + remove_spaces(f"SDML_Supervised({merged_kwargs})")) def test_rca(self): + def_kwargs = {'n_components': None, 'preprocessor': None} + nndef_kwargs = {'n_components': 3} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))), - remove_spaces("RCA(n_components=3)")) + remove_spaces(f"RCA({merged_kwargs})")) + def_kwargs = {'chunk_size': 2, 'n_components': None, 'num_chunks': 100, + 'preprocessor': None, 'random_state': None} + nndef_kwargs = {'num_chunks': 5} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( remove_spaces(str(metric_learn.RCA_Supervised(num_chunks=5))), - remove_spaces("RCA_Supervised(num_chunks=5)")) + remove_spaces(f"RCA_Supervised({merged_kwargs})")) def test_mlkr(self): + def_kwargs = {'init': 'auto', 'max_iter': 1000, + 'n_components': None, 'preprocessor': None, + 'random_state': None, 'tol': None, 'verbose': False} + nndef_kwargs = {'max_iter': 777} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.MLKR(max_iter=777))), - remove_spaces("MLKR(max_iter=777)")) + remove_spaces(f"MLKR({merged_kwargs})")) def test_mmc(self): + def_kwargs = {'convergence_threshold': 0.001, 'diagonal': False, + 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, + 'max_proj': 10000, 'preprocessor': None, + 'random_state': None, 'verbose': False} + nndef_kwargs = {'diagonal': True} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))), - remove_spaces("MMC(diagonal=True)")) + remove_spaces(f"MMC({merged_kwargs})")) + def_kwargs = {'convergence_threshold': 1e-06, 'diagonal': False, + 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, + 'max_proj': 10000, 'num_constraints': None, + 'preprocessor': None, 'random_state': None, + 'verbose': False} + nndef_kwargs = {'max_iter': 1} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( remove_spaces(str(metric_learn.MMC_Supervised(max_iter=1))), - remove_spaces("MMC_Supervised(max_iter=1)")) + remove_spaces(f"MMC_Supervised({merged_kwargs})")) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index 04d0d007..5502ad90 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -4,7 +4,7 @@ from scipy.stats import ortho_group from sklearn.datasets import load_iris from numpy.testing import assert_array_almost_equal, assert_allclose -from sklearn.utils.testing import ignore_warnings +from metric_learn.sklearn_shims import ignore_warnings from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index ab7e972d..84058b32 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -11,7 +11,7 @@ from sklearn.datasets import make_spd_matrix, make_blobs from sklearn.utils import check_random_state, shuffle from sklearn.utils.multiclass import type_of_target -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from metric_learn._util import make_context, _initialize_metric_mahalanobis from metric_learn.base_metric import (_QuadrupletsClassifierMixin, diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index c5ca27f4..824bb622 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -11,7 +11,7 @@ from sklearn.model_selection import train_test_split from test.test_utils import pairs_learners, ids_pairs_learners -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np from itertools import product diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index efe10030..a8319961 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -3,7 +3,7 @@ from sklearn.model_selection import train_test_split from test.test_utils import quadruplets_learners, ids_quadruplets_learners -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index e18eb7f4..3ad69712 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -4,10 +4,9 @@ from sklearn.base import TransformerMixin from sklearn.pipeline import make_pipeline from sklearn.utils import check_random_state -from sklearn.utils.estimator_checks import is_public_parameter -from sklearn.utils.testing import (assert_allclose_dense_sparse, - set_random_state) - +from metric_learn.sklearn_shims import (assert_allclose_dense_sparse, + set_random_state, _get_args, + is_public_parameter, get_scorer) from metric_learn import (Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -16,8 +15,6 @@ import numpy as np from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) -from sklearn.metrics.scorer import get_scorer -from sklearn.utils.testing import _get_args from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, ids_tuples_learners, pairs_learners, @@ -52,37 +49,37 @@ def __init__(self, sparsity_param=0.01, class TestSklearnCompat(unittest.TestCase): def test_covariance(self): - check_estimator(Covariance) + check_estimator(Covariance()) def test_lmnn(self): - check_estimator(LMNN) + check_estimator(LMNN()) def test_lfda(self): - check_estimator(LFDA) + check_estimator(LFDA()) def test_mlkr(self): - check_estimator(MLKR) + check_estimator(MLKR()) def test_nca(self): - check_estimator(NCA) + check_estimator(NCA()) def test_lsml(self): - check_estimator(LSML_Supervised) + check_estimator(LSML_Supervised()) def test_itml(self): - check_estimator(ITML_Supervised) + check_estimator(ITML_Supervised()) def test_mmc(self): - check_estimator(MMC_Supervised) + check_estimator(MMC_Supervised()) def test_sdml(self): - check_estimator(Stable_SDML_Supervised) + check_estimator(Stable_SDML_Supervised()) def test_rca(self): - check_estimator(Stable_RCA_Supervised) + check_estimator(Stable_RCA_Supervised()) def test_scml(self): - check_estimator(SCML_Supervised) + check_estimator(SCML_Supervised()) RNG = check_random_state(0) @@ -121,7 +118,8 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): # we subsample the data for the test to be more efficient input_data, _, labels, _ = train_test_split(input_data, labels, - train_size=20) + train_size=40, + random_state=42) X = X[:10] estimator = clone(estimator) @@ -160,7 +158,7 @@ def test_various_scoring_on_tuples_learners(estimator, build_dataset, with_preprocessor): """Tests that scikit-learn's scoring returns something finite, for other scoring than default scoring. (List of scikit-learn's scores can be - found in sklearn.metrics.scorer). For each type of output (predict, + found in sklearn.metrics._scorer). For each type of output (predict, predict_proba, decision_function), we test a bunch of scores. We only test on pairs learners because quadruplets don't have a y argument. """ diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index 10393919..0f0bf7df 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -3,7 +3,7 @@ from sklearn.model_selection import train_test_split from test.test_utils import triplets_learners, ids_triplets_learners -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np diff --git a/test/test_utils.py b/test/test_utils.py index 5ddf3d71..072b94c5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -5,7 +5,7 @@ from numpy.testing import assert_array_equal, assert_equal from sklearn.model_selection import train_test_split from sklearn.utils import check_random_state, shuffle -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn.base import clone from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, From d12bf3704ee0461d36af4422a733c2419cbf22f8 Mon Sep 17 00:00:00 2001 From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com> Date: Thu, 22 Apr 2021 18:33:17 +0200 Subject: [PATCH 178/210] [MRG+1] add github issues buttons (#316) * add github issues buttons * add blankline at end of file --- .../bug_report.md} | 12 ++++++++++ .github/ISSUE_TEMPLATE/config.yml | 18 +++++++++++++++ .github/ISSUE_TEMPLATE/doc_improvement.md | 23 +++++++++++++++++++ .../ISSUE_TEMPLATE/enhancement_proposal.md | 18 +++++++++++++++ 4 files changed, 71 insertions(+) rename .github/{issue_template.md => ISSUE_TEMPLATE/bug_report.md} (82%) create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/doc_improvement.md create mode 100644 .github/ISSUE_TEMPLATE/enhancement_proposal.md diff --git a/.github/issue_template.md b/.github/ISSUE_TEMPLATE/bug_report.md similarity index 82% rename from .github/issue_template.md rename to .github/ISSUE_TEMPLATE/bug_report.md index d4fb0abe..ae757838 100644 --- a/.github/issue_template.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,3 +1,9 @@ +--- +name: Reproducible bug report +about: Create a reproducible bug report. Not for support requests. +labels: 'bug' +--- + #### Description @@ -42,3 +48,9 @@ $ pip show metric_learn | grep Version ) --> + +--- + +**Message from the maintainers**: + +Impacted by this bug? Give it a 👍. We prioritise the issues with the most 👍. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..c979fd98 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,18 @@ +blank_issues_enabled: false + +contact_links: + - name: Have you read the docs? + url: http://contrib.scikit-learn.org/metric-learn/ + about: Much help can be found in the docs + - name: Ask a question + url: https://github.com/sklearn-contrib/metric-learn/discussions/new + about: Ask a question or start a discussion about metric-learn + - name: Stack Overflow + url: https://stackoverflow.com + about: Please ask and answer metric-learn usage questions (API, installation...) on Stack Overflow + - name: Cross Validated + url: https://stats.stackexchange.com + about: Please ask and answer metric learning questions (use cases, algorithms & theory...) on Cross Validated + - name: Blank issue + url: https://github.com/sklearn-contrib/metric-learn/issues/new + about: Please note that Github Discussions should be used in most cases instead diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.md b/.github/ISSUE_TEMPLATE/doc_improvement.md new file mode 100644 index 00000000..753cf2f7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/doc_improvement.md @@ -0,0 +1,23 @@ +--- +name: Documentation improvement +about: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change. +labels: Documentation +--- + +#### Describe the issue linked to the documentation + + + +#### Suggest a potential alternative/fix + + + +--- + +**Message from the maintainers**: + +Confused by this part of the doc too? Give it a 👍. We prioritise the issues with the most 👍. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/enhancement_proposal.md b/.github/ISSUE_TEMPLATE/enhancement_proposal.md new file mode 100644 index 00000000..01dfb1d7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement_proposal.md @@ -0,0 +1,18 @@ +--- +name: Enhancement proposal +about: Propose an enhancement for metric-learn +labels: 'enhancement' +--- +# Summary + +What change needs making? + +# Use Cases + +When would you use this? + +--- + +**Message from the maintainers**: + +Want to see this feature happen? Give it a 👍. We prioritise the issues with the most 👍. \ No newline at end of file From 137880d9c6ce9a2b81a8af24c07d80e528f657cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Mon, 26 Apr 2021 10:27:27 +0200 Subject: [PATCH 179/210] fix url issue buttons (#319) --- .github/ISSUE_TEMPLATE/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index c979fd98..415acfcd 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -5,7 +5,7 @@ contact_links: url: http://contrib.scikit-learn.org/metric-learn/ about: Much help can be found in the docs - name: Ask a question - url: https://github.com/sklearn-contrib/metric-learn/discussions/new + url: https://github.com/scikit-learn-contrib/metric-learn/discussions/new about: Ask a question or start a discussion about metric-learn - name: Stack Overflow url: https://stackoverflow.com @@ -14,5 +14,5 @@ contact_links: url: https://stats.stackexchange.com about: Please ask and answer metric learning questions (use cases, algorithms & theory...) on Cross Validated - name: Blank issue - url: https://github.com/sklearn-contrib/metric-learn/issues/new + url: https://github.com/scikit-learn-contrib/metric-learn/issues/new about: Please note that Github Discussions should be used in most cases instead From 8583df4b9e56dae540f9c0ace86bfd3070dce235 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Tue, 14 Sep 2021 15:12:35 +0200 Subject: [PATCH 180/210] Fix 5 test regarding make_spd_matrix usage from sklearn (#321) * Add venv to gitignore * Fix 5 test regarding make_spd_matrix usage from sklearn --- .gitignore | 3 ++- test/metric_learn_test.py | 4 ++-- test/test_mahalanobis_mixin.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 8321c7d2..16917890 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ htmlcov/ .cache/ .pytest_cache/ doc/auto_examples/* -doc/generated/* \ No newline at end of file +doc/generated/* +venv/ \ No newline at end of file diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 5cae80f2..4d058c8d 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -929,7 +929,7 @@ def test_singleton_class(self): X = X[[ind_0[0], ind_1[0], ind_2[0]]] y = y[[ind_0[0], ind_1[0], ind_2[0]]] - A = make_spd_matrix(X.shape[1], X.shape[1]) + A = make_spd_matrix(n_dim=X.shape[1], random_state=X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A) @@ -940,7 +940,7 @@ def test_one_class(self): X = self.iris_points[self.iris_labels == 0] y = self.iris_labels[self.iris_labels == 0] - A = make_spd_matrix(X.shape[1], X.shape[1]) + A = make_spd_matrix(n_dim=X.shape[1], random_state=X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 84058b32..e3d981a4 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -503,12 +503,12 @@ def test_init_mahalanobis(estimator, build_dataset): model.fit(input_data, labels) # Initialize with a random spd matrix - init = make_spd_matrix(X.shape[1], random_state=rng) + init = make_spd_matrix(n_dim=X.shape[1], random_state=rng) model.set_params(**{param: init}) model.fit(input_data, labels) # init.shape[1] must match X.shape[1] - init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1) + init = make_spd_matrix(n_dim=X.shape[1] + 1, random_state=rng) model.set_params(**{param: init}) msg = ('The input dimensionality {} of the given ' 'mahalanobis matrix `{}` must match the ' From 10b6d25a6e3e4c4eddef1945ace718dbd4b71183 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Wed, 15 Sep 2021 21:58:11 +0200 Subject: [PATCH 181/210] Addressing LFDA sign indeterminacy (#326) * Add venv to gitignore * Note proposed to address sign indeterminancy --- doc/supervised.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/supervised.rst b/doc/supervised.rst index 1b1180e9..c6d8b68b 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -292,6 +292,11 @@ same class are not imposed to be close. lfda = LFDA(k=2, dim=2) lfda.fit(X, Y) +.. note:: + LDFA suffers from a problem called “sign indeterminacy”, which means the sign of the ``components`` and the output from transform depend on a random state. This is directly related to the calculation of eigenvectors in the algorithm. The same input ran in different times might lead to different transforms, but both valid. + + To work around this, fit instances of this class to data once, then keep the instance around to do transformations. + .. topic:: References: .. [1] Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local From 5427159e5258e60df291ae01655f4f3d130d2c62 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Thu, 16 Sep 2021 13:14:08 +0200 Subject: [PATCH 182/210] YML file: CI Migration to Github Actions (#327) --- .github/workflows/main.yml | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..46e5d2c9 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,68 @@ +name: CI + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + + # Checks compatibility with an old version of sklearn (0.20.3) + compatibility: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.6', '3.7', '3.8'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Run Tests with skggm + scikit-learn 0.20.3 + env: + SKGGM_VERSION: a0ed406586c4364ea3297a658f415e13b5cbdaf8 + run: | + sudo apt-get install liblapack-dev + pip install --upgrade pip pytest + pip install wheel cython numpy scipy codecov pytest-cov + pip install scikit-learn==0.20.3 + pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION} + pytest test --cov + bash <(curl -s https://codecov.io/bash) + + # Run normal testing with the latests versions of all dependencies + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.6', '3.7', '3.8', '3.9'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Run Tests without skggm + run: | + sudo apt-get install liblapack-dev + pip install --upgrade pip pytest + pip install wheel cython numpy scipy codecov pytest-cov scikit-learn + pytest test --cov + bash <(curl -s https://codecov.io/bash) + - name: Run Tests with skggm + env: + SKGGM_VERSION: a0ed406586c4364ea3297a658f415e13b5cbdaf8 + run: | + pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION} + pytest test --cov + bash <(curl -s https://codecov.io/bash) + - name: Syntax checking with flake8 + run: | + pip install flake8 + flake8 --extend-ignore=E111,E114 --show-source; From f548b1e781821dd7420c6e793ac9d28449f42770 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Thu, 16 Sep 2021 09:50:24 -0400 Subject: [PATCH 183/210] Tidy up files to reflect the migration to GitHub Actions (#328) Signed-off-by: Yuan Tang --- .travis.yml | 65 --------------- README.rst | 6 +- build_tools/travis/flake8_diff.sh | 132 ------------------------------ 3 files changed, 3 insertions(+), 200 deletions(-) delete mode 100644 .travis.yml delete mode 100644 build_tools/travis/flake8_diff.sh diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 03ba1706..00000000 --- a/.travis.yml +++ /dev/null @@ -1,65 +0,0 @@ -language: python -sudo: false -cache: pip -language: python -env: - global: - - SKGGM_VERSION=a0ed406586c4364ea3297a658f415e13b5cbdaf8 -matrix: - include: - - name: "Pytest python 3.6 without skggm" - python: "3.6" - before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - script: - - pytest test --cov; - after_success: - - bash <(curl -s https://codecov.io/bash) - - name: "Pytest python 3.6 with skggm" - python: "3.6" - before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; - script: - - pytest test --cov; - after_success: - - bash <(curl -s https://codecov.io/bash) - - name: "Pytest python 3.7 with skggm" - python: "3.7" - before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn - - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; - script: - - pytest test --cov; - after_success: - - bash <(curl -s https://codecov.io/bash) - - name: "Pytest python 3.6 with skggm + scikit-learn 0.20.3" - # checks that tests work for the oldest supported scikit-learn version - python: "3.6" - before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy codecov pytest-cov - - pip install scikit-learn==0.20.3 - - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}; - script: - - pytest test --cov; - after_success: - - bash <(curl -s https://codecov.io/bash) - - name: "Syntax checking with flake8" - python: "3.7" - before_install: - - pip install flake8 - script: - - flake8 --extend-ignore=E111,E114 --show-source; - # Use this instead to have a syntax check only on the diff: - # - source ./build_tools/travis/flake8_diff.sh; -branches: - only: - - master diff --git a/README.rst b/README.rst index 41181ee8..681e29f6 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -|Travis-CI Build Status| |License| |PyPI version| |Code coverage| +|GitHub Actions Build Status| |License| |PyPI version| |Code coverage| metric-learn: Metric Learning in Python ======================================= @@ -67,8 +67,8 @@ Bibtex entry:: .. _sphinx documentation: http://contrib.scikit-learn.org/metric-learn/ -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master - :target: https://travis-ci.org/scikit-learn-contrib/metric-learn +.. |GitHub Actions Build Status| image:: https://github.com/scikit-learn-contrib/metric-learn/workflows/CI/badge.svg + :target: https://github.com/scikit-learn-contrib/metric-learn/actions?query=event%3Apush+branch%3Amaster .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg diff --git a/build_tools/travis/flake8_diff.sh b/build_tools/travis/flake8_diff.sh deleted file mode 100644 index aea926c8..00000000 --- a/build_tools/travis/flake8_diff.sh +++ /dev/null @@ -1,132 +0,0 @@ -# This file is not used yet but we keep it in case we need to check the pep8 difference -# on the diff (see .travis.yml) -# -#!/bin/bash -# copied-pasted and adapted from http://github.com/sklearn-contrib/imbalanced-learn -# (more precisely: https://raw.githubusercontent.com/glemaitre/imbalanced-learn -# /adcb9d8e6210b321dac2c1b06879e5e889d52d77/build_tools/travis/flake8_diff.sh) - -# This script is used in Travis to check that PRs do not add obvious -# flake8 violations. It relies on two things: -# - find common ancestor between branch and -# scikit-learn/scikit-learn remote -# - run flake8 --diff on the diff between the branch and the common -# ancestor -# -# Additional features: -# - the line numbers in Travis match the local branch on the PR -# author machine. -# - ./build_tools/travis/flake8_diff.sh can be run locally for quick -# turn-around - -set -e -# pipefail is necessary to propagate exit codes -set -o pipefail - -PROJECT=scikit-learn-contrib/metric-learn -PROJECT_URL=https://github.com/$PROJECT.git - -# Find the remote with the project name (upstream in most cases) -REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '') - -# Add a temporary remote if needed. For example this is necessary when -# Travis is configured to run in a fork. In this case 'origin' is the -# fork and not the reference repo we want to diff against. -if [[ -z "$REMOTE" ]]; then - TMP_REMOTE=tmp_reference_upstream - REMOTE=$TMP_REMOTE - git remote add $REMOTE $PROJECT_URL -fi - -echo "Remotes:" -echo '--------------------------------------------------------------------------------' -git remote --verbose - -# Travis does the git clone with a limited depth (50 at the time of -# writing). This may not be enough to find the common ancestor with -# $REMOTE/master so we unshallow the git checkout -if [[ -a .git/shallow ]]; then - echo -e '\nTrying to unshallow the repo:' - echo '--------------------------------------------------------------------------------' - git fetch --unshallow -fi - -if [[ "$TRAVIS" == "true" ]]; then - if [[ "$TRAVIS_PULL_REQUEST" == "false" ]] - then - # In main repo, using TRAVIS_COMMIT_RANGE to test the commits - # that were pushed into a branch - if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then - if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then - echo "New branch, no commit range from Travis so passing this test by convention" - exit 0 - fi - COMMIT_RANGE=$TRAVIS_COMMIT_RANGE - fi - else - # We want to fetch the code as it is in the PR branch and not - # the result of the merge into master. This way line numbers - # reported by Travis will match with the local code. - LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST - # In Travis the PR target is always origin - git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF - fi -fi - -# If not using the commit range from Travis we need to find the common -# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master -if [[ -z "$COMMIT_RANGE" ]]; then - if [[ -z "$LOCAL_BRANCH_REF" ]]; then - LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) - fi - echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:" - echo '--------------------------------------------------------------------------------' - git log -2 $LOCAL_BRANCH_REF - - REMOTE_MASTER_REF="$REMOTE/master" - # Make sure that $REMOTE_MASTER_REF is a valid reference - echo -e "\nFetching $REMOTE_MASTER_REF" - echo '--------------------------------------------------------------------------------' - git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF - LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) - REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF) - - COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \ - echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)" - - if [ -z "$COMMIT" ]; then - exit 1 - fi - - COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) - - echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ - "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:" - echo '--------------------------------------------------------------------------------' - git show --no-patch $COMMIT_SHORT_HASH - - COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH" - - if [[ -n "$TMP_REMOTE" ]]; then - git remote remove $TMP_REMOTE - fi - -else - echo "Got the commit range from Travis: $COMMIT_RANGE" -fi - -echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ - "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" -echo '--------------------------------------------------------------------------------' - -# to not include the context (some lines before and after the modified lines), add the -# flag --unified=0 (warning: it will not include some errors like for instance adding too -# much blank lines -check_files() { - git diff $COMMIT_RANGE | flake8 --diff --show-source --extend-ignore=E111,E114 -} - -check_files - -echo -e "No problem detected by flake8\n" - From 8571f975c4e114b1823b2007ea2eacdfa59b81ea Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Fri, 1 Oct 2021 17:20:49 +0200 Subject: [PATCH 184/210] SCML iris test refactor [Minor] (#332) * Add venv to gitignore * Create yml draft Trying to run tests with github actions * Update yml * Update 2 * Update 3 * YML: Only Python3 versions * Add Codecov to CI * Mirroring actual yml from metric_learning repo * Fix codecov * Fix old scikit learn * Update yml * Remove 3.9 from compatibility * Fixed issue with sklearn 0.20 * Delete comments, and unnecesary workflow_dispatch * SCML test refactor * Commented te test --- test/metric_learn_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 4d058c8d..2debe426 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -79,12 +79,17 @@ def test_singular_returns_pseudo_inverse(self): class TestSCML(object): @pytest.mark.parametrize('basis', ('lda', 'triplet_diffs')) def test_iris(self, basis): + """ + SCML applied to Iris dataset should give better results when + computing class separation. + """ X, y = load_iris(return_X_y=True) + before = class_separation(X, y) scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5, random_state=42) scml.fit(X, y) - csep = class_separation(scml.transform(X), y) - assert csep < 0.24 + after = class_separation(scml.transform(X), y) + assert before > after + 0.03 # It's better by a margin of 0.03 def test_big_n_features(self): X, y = make_classification(n_samples=100, n_classes=3, n_features=60, From 7a2a49dce8d6dc3e00aa8b53c49d9d97fce33f97 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Fri, 8 Oct 2021 15:43:29 +0200 Subject: [PATCH 185/210] Change min sklearn version to 0.21.3 (#334) --- .github/workflows/main.yml | 8 ++++---- README.rst | 2 +- doc/getting_started.rst | 2 +- setup.py | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 46e5d2c9..fbe91dea 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,27 +10,27 @@ on: jobs: - # Checks compatibility with an old version of sklearn (0.20.3) + # Checks compatibility with an old version of sklearn (0.21.3) compatibility: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] - python-version: ['3.6', '3.7', '3.8'] + python-version: ['3.6', '3.7'] steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - - name: Run Tests with skggm + scikit-learn 0.20.3 + - name: Run Tests with skggm + scikit-learn 0.21.3 env: SKGGM_VERSION: a0ed406586c4364ea3297a658f415e13b5cbdaf8 run: | sudo apt-get install liblapack-dev pip install --upgrade pip pytest pip install wheel cython numpy scipy codecov pytest-cov - pip install scikit-learn==0.20.3 + pip install scikit-learn==0.21.3 pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION} pytest test --cov bash <(curl -s https://codecov.io/bash) diff --git a/README.rst b/README.rst index 681e29f6..b2f6e6d4 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ metric-learn contains efficient Python implementations of several popular superv - Python 3.6+ (the last version supporting Python 2 and Python 3.5 was `v0.5.0 `_) -- numpy, scipy, scikit-learn>=0.20.3 +- numpy>= 1.11.0, scipy>= 0.17.0, scikit-learn>=0.21.3 **Optional dependencies** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 44fd1436..90b7c7ee 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -19,7 +19,7 @@ metric-learn can be installed in either of the following ways: - Python 3.6+ (the last version supporting Python 2 and Python 3.5 was `v0.5.0 `_) -- numpy, scipy, scikit-learn>=0.20.3 +- numpy>= 1.11.0, scipy>= 0.17.0, scikit-learn>=0.21.3 **Optional dependencies** diff --git a/setup.py b/setup.py index 8677e7bf..255671a2 100755 --- a/setup.py +++ b/setup.py @@ -63,9 +63,9 @@ ], packages=['metric_learn'], install_requires=[ - 'numpy', - 'scipy', - 'scikit-learn>=0.20.3', + 'numpy>= 1.11.0', + 'scipy>= 0.17.0', + 'scikit-learn>=0.21.3', ], extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], From 44be909b3d8d74072aeb9f96ad587bd1ca6b8cda Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Tue, 12 Oct 2021 11:21:04 -0300 Subject: [PATCH 186/210] Disallow 0 on Triplets predictions (#331) * Remove 3.9 from compatibility * Fix Triplets predict function. Made a test to show the point. * Fix identation * Simplified prediction as suggested * Resolved code review comments * Fix weird commit * Simplified assertion --- metric_learn/base_metric.py | 2 +- test/test_triplets_classifiers.py | 44 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 721d7ba0..21506011 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -602,7 +602,7 @@ def predict(self, triplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each triplet. """ - return np.sign(self.decision_function(triplets)) + return 2 * (self.decision_function(triplets) > 0) - 1 def decision_function(self, triplets): """Predicts differences between sample distances in input triplets. diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index 0f0bf7df..f2d5c015 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -6,6 +6,7 @@ from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np +from numpy.testing import assert_array_equal @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -26,6 +27,49 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, assert len(not_valid) == 0 +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_no_zero_prediction(estimator, build_dataset): + """ + Test that all predicted values are not zero, even when the + distance d(x,y) and d(x,z) is the same for a triplet of the + form (x, y, z). i.e border cases. + """ + triplets, _, _, X = build_dataset(with_preprocessor=False) + # Force 3 dimentions only, to use cross product and get easy orthogonal vec. + triplets = np.array([[t[0][:3], t[1][:3], t[2][:3]] for t in triplets]) + X = X[:, :3] + # Dummy fit + estimator = clone(estimator) + set_random_state(estimator) + estimator.fit(triplets) + # We force the transformation to be identity, to force euclidean distance + estimator.components_ = np.eye(X.shape[1]) + + # Get two orthogonal vectors in respect to X[1] + k = X[1] / np.linalg.norm(X[1]) # Normalize first vector + x = X[2] - X[2].dot(k) * k # Get random orthogonal vector + x /= np.linalg.norm(x) # Normalize + y = np.cross(k, x) # Get orthogonal vector to x + # Assert these orthogonal vectors are different + with pytest.raises(AssertionError): + assert_array_equal(X[1], x) + with pytest.raises(AssertionError): + assert_array_equal(X[1], y) + # Assert the distance is the same for both + assert estimator.get_metric()(X[1], x) == estimator.get_metric()(X[1], y) + + # Form the three scenarios where predict() gives 0 with numpy.sign + triplets_test = np.array( # Critical examples + [[X[0], X[2], X[2]], + [X[1], X[1], X[1]], + [X[1], x, y]]) + # Predict + predictions = estimator.predict(triplets_test) + # Check there are no zero values + assert np.sum(predictions == 0) == 0 + + @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', triplets_learners, ids=ids_triplets_learners) From e2c3e921159bb0da48bfd4bafa096ffe7a7fb3f0 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Mon, 18 Oct 2021 05:30:21 -0300 Subject: [PATCH 187/210] Change SCML supervised docs (#337) --- metric_learn/scml.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/metric_learn/scml.py b/metric_learn/scml.py index c3fde272..ee585018 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -473,13 +473,18 @@ class SCML_Supervised(_BaseSCML, TransformerMixin): Examples -------- - >>> from metric_learn import SCML - >>> triplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.1, 0.6]], - >>> [[4.5, 2.3], [2.1, 2.3], [7.3, 3.4]]]) - >>> scml = SCML(random_state=42) - >>> scml.fit(triplets) - SCML(beta=1e-5, B=None, max_iter=100000, verbose=False, - preprocessor=None, random_state=None) + >>> from metric_learn import SCML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> scml = SCML_Supervised(random_state=33) + >>> scml.fit(X, Y) + SCML_Supervised(random_state=33) + >>> scml.score_pairs([[X[0], X[1]], [X[0], X[2]]]) + array([1.84640733, 1.55984363]) + >>> scml.get_metric()(X[0], X[1]) + 1.8464073327922157 References ---------- From aaf8d44b8d31d6ea418f0bd80ef86958e5081b4c Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Thu, 21 Oct 2021 09:05:22 -0300 Subject: [PATCH 188/210] score_pairs refactor (#333) * Remove 3.9 from compatibility * First draft of refactoring BaseMetricLearner and Mahalanobis Learner * Avoid warning related to score_pairs deprecation in tests of pair_calibraiton * Minor fix * Replaced score_pairs with pair_distance in tests * Replace score_pairs with pair_distance inb docs. * Fix weird commit * Update classifiers to use pair_similarity * Updated rst docs * Fix identation * Update docs of score_pairs, get_metric * Add deprecation Test. Fix identation * Fixed changes requested 1 * Fixed changes requested 2 * Add equivalence test, p_dist == p_score * Fix tests and identation. * Fixed changes requested 3 * Fix identation * Last requested changes * Last small detail --- doc/introduction.rst | 23 ---- doc/supervised.rst | 20 +++- doc/weakly_supervised.rst | 30 +++-- metric_learn/base_metric.py | 210 +++++++++++++++++++++++++++++---- test/test_base_metric.py | 24 ++++ test/test_mahalanobis_mixin.py | 47 ++++++-- test/test_pairs_classifiers.py | 20 ++-- test/test_sklearn_compat.py | 13 +- test/test_utils.py | 79 +++++++++---- 9 files changed, 360 insertions(+), 106 deletions(-) diff --git a/doc/introduction.rst b/doc/introduction.rst index 7d9f52d0..e9ff0015 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -123,26 +123,3 @@ to the following resources: Survey `_ (2012) - **Book:** `Metric Learning `_ (2015) - -.. Methods [TO MOVE TO SUPERVISED/WEAK SECTIONS] -.. ============================================= - -.. Currently, each metric learning algorithm supports the following methods: - -.. - ``fit(...)``, which learns the model. -.. - ``get_mahalanobis_matrix()``, which returns a Mahalanobis matrix -.. - ``get_metric()``, which returns a function that takes as input two 1D - arrays and outputs the learned metric score on these two points -.. :math:`M = L^{\top}L` such that distance between vectors ``x`` and -.. ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. -.. - ``components_from_metric(metric)``, which returns a transformation matrix -.. :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a -.. data matrix :math:`X \in \mathbb{R}^{n \times d}` to the -.. :math:`D`-dimensional learned metric space :math:`X L^{\top}`, -.. in which standard Euclidean distances may be used. -.. - ``transform(X)``, which applies the aforementioned transformation. -.. - ``score_pairs(pairs)`` which returns the distance between pairs of -.. points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, -.. 2, n_features)``, or it can be a 2D array-like of pairs indicators of -.. shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more -.. details). \ No newline at end of file diff --git a/doc/supervised.rst b/doc/supervised.rst index c6d8b68b..e27b58ec 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -69,10 +69,10 @@ Also, as explained before, our metric learners has learn a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the - `score_pairs` function: + `pair_distance` function: ->>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) -array([0.49627072, 3.65287282]) +>>> nca.pair_distance([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +array([0.49627072, 3.65287282, 6.06079877]) - Or you can return a function that will return the distance (in the new space) between two 1D arrays (the coordinates of the points in the original @@ -82,6 +82,18 @@ array([0.49627072, 3.65287282]) >>> metric_fun([3.5, 3.6], [5.6, 2.4]) 0.4962707194621285 +- Alternatively, you can use `pair_score` to return the **score** between + pairs of points (the larger the score, the more similar the pair). + For Mahalanobis learners, it is equal to the opposite of the distance. + +>>> score = nca.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score +array([-0.49627072, -3.65287282, -6.06079877]) + +This is useful because `pair_score` matches the **score** semantic of +scikit-learn's `Classification metrics +`_. + .. note:: If the metric learner that you use learns a :ref:`Mahalanobis distance @@ -93,7 +105,6 @@ array([0.49627072, 3.65287282]) array([[0.43680409, 0.89169412], [0.89169412, 1.9542479 ]]) -.. TODO: remove the "like it is the case etc..." if it's not the case anymore Scikit-learn compatibility -------------------------- @@ -105,6 +116,7 @@ All supervised algorithms are scikit-learn estimators scikit-learn model selection routines (`sklearn.model_selection.cross_val_score`, `sklearn.model_selection.GridSearchCV`, etc). +You can also use some of the scoring functions from `sklearn.metrics`. Algorithms ========== diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 174210b8..02ea4ef6 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -160,9 +160,9 @@ Also, as explained before, our metric learner has learned a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the - `score_pairs` function: + `pair_distance` function: ->>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], +>>> mmc.pair_distance([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], ... [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]]) array([7.27607365, 0.88853014]) @@ -175,6 +175,18 @@ array([7.27607365, 0.88853014]) >>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7]) 7.276073646278203 +- Alternatively, you can use `pair_score` to return the **score** between + pairs of points (the larger the score, the more similar the pair). + For Mahalanobis learners, it is equal to the opposite of the distance. + +>>> score = mmc.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score +array([-0.49627072, -3.65287282, -6.06079877]) + + This is useful because `pair_score` matches the **score** semantic of + scikit-learn's `Classification metrics + `_. + .. note:: If the metric learner that you use learns a :ref:`Mahalanobis distance @@ -187,8 +199,6 @@ array([[ 0.58603894, -5.69883982, -1.66614919], [-5.69883982, 55.41743549, 16.20219519], [-1.66614919, 16.20219519, 4.73697721]]) -.. TODO: remove the "like it is the case etc..." if it's not the case anymore - .. _sklearn_compat_ws: Prediction and scoring @@ -344,8 +354,8 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent). .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, - `get_metric` and `get_mahalanobis_matrix`. + not specific to learning on pairs, like `transform`, `pair_distance`, + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. Algorithms ---------- @@ -691,8 +701,8 @@ of triplets that have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, - `get_metric` and `get_mahalanobis_matrix`. + not specific to learning on pairs, like `transform`, `pair_distance`, + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. @@ -859,8 +869,8 @@ of quadruplets have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, - `get_metric` and `get_mahalanobis_matrix`. + not specific to learning on pairs, like `transform`, `pair_distance`, + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 21506011..e7dbd608 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -9,6 +9,7 @@ import numpy as np from abc import ABCMeta, abstractmethod from ._util import ArrayIndexer, check_input, validate_vector +import warnings class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta): @@ -27,13 +28,54 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): - """Returns the score between pairs + """ + .. deprecated:: 0.7.0 Refer to `pair_distance` and `pair_score`. + + .. warning:: + This method will be removed in 0.8.0. Please refer to `pair_distance` + or `pair_score`. This change will occur in order to add learners + that don't necessarily learn a Mahalanobis distance. + + Returns the score between pairs (can be a similarity, or a distance/metric depending on the algorithm) Parameters ---------- - pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) - 3D array of pairs. + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The score of every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference between `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + """ + + @abstractmethod + def pair_score(self, pairs): + """ + .. versionadded:: 0.7.0 Compute the similarity score between pairs + + Returns the similarity score between pairs of points (the larger the score, + the more similar the pair). For metric learners that learn a distance, + the score is simply the opposite of the distance between pairs. All + learners have access to this method. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. Returns ------- @@ -43,10 +85,40 @@ def score_pairs(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_score` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + """ + + @abstractmethod + def pair_distance(self, pairs): + """ + .. versionadded:: 0.7.0 Compute the distance between pairs + + Returns the (pseudo) distance between pairs, when available. For metric + learners that do not learn a (pseudo) distance, an error is thrown + instead. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs for which to compute the distance, with each + row corresponding to two points, for 2D array of indices of pairs + if the metric learner uses a preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The distance between every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `pair_distance` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. """ def _check_preprocessor(self): @@ -102,8 +174,10 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', @abstractmethod def get_metric(self): - """Returns a function that takes as input two 1D arrays and outputs the - learned metric score on these two points. + """Returns a function that takes as input two 1D arrays and outputs + the value of the learned metric on these two points. Depending on the + algorithm, it can return a distance or a similarity function between + pairs. This function will be independent from the metric learner that learned it (it will not be modified if the initial metric learner is modified), @@ -136,10 +210,17 @@ def get_metric(self): See Also -------- - score_pairs : a method that returns the metric score between several pairs - of points. Unlike `get_metric`, this is a method of the metric learner - and therefore can change if the metric learner changes. Besides, it can - use the metric learner's preprocessor, and works on concatenated arrays. + pair_distance : a method that returns the distance between several + pairs of points. Unlike `get_metric`, this is a method of the metric + learner and therefore can change if the metric learner changes. Besides, + it can use the metric learner's preprocessor, and works on concatenated + arrays. + + pair_score : a method that returns the similarity score between + several pairs of points. Unlike `get_metric`, this is a method of the + metric learner and therefore can change if the metric learner changes. + Besides, it can use the metric learner's preprocessor, and works on + concatenated arrays. """ @@ -182,13 +263,92 @@ class MahalanobisMixin(BaseMetricLearner, MetricTransformer, """ def score_pairs(self, pairs): - r"""Returns the learned Mahalanobis distance between pairs. + r""" + .. deprecated:: 0.7.0 + This method is deprecated. Please use `pair_distance` instead. + + .. warning:: + This method will be removed in 0.8.0. Please refer to `pair_distance` + or `pair_score`. This change will occur in order to add learners + that don't necessarily learn a Mahalanobis distance. + + Returns the learned Mahalanobis distance between pairs. + + This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` + where ``M`` is the learned Mahalanobis matrix, for every pair of points + ``x`` and ``x'``. This corresponds to the euclidean distance between + embeddings of the points in a new space, obtained through a linear + transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e - + x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See + :class:`MahalanobisMixin`). + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. + """ + dpr_msg = ("score_pairs will be deprecated in release 0.7.0. " + "Use pair_score to compute similarity scores, or " + "pair_distances to compute distances.") + warnings.warn(dpr_msg, category=FutureWarning) + return self.pair_distance(pairs) + + def pair_score(self, pairs): + """ + Returns the opposite of the learned Mahalanobis distance between pairs. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The opposite of the learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `pair_score` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. + """ + return -1 * self.pair_distance(pairs) + + def pair_distance(self, pairs): + """ + Returns the learned Mahalanobis distance between pairs. - This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` + This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` where ``M`` is the learned Mahalanobis matrix, for every pair of points ``x`` and ``x'``. This corresponds to the euclidean distance between embeddings of the points in a new space, obtained through a linear - transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - + transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e - x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See :class:`MahalanobisMixin`). @@ -207,10 +367,10 @@ def score_pairs(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_distance` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. @@ -361,7 +521,7 @@ def decision_function(self, pairs): pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return - self.score_pairs(pairs) + return self.pair_score(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -631,8 +791,8 @@ def decision_function(self, triplets): triplets = check_input(triplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.score_pairs(triplets[:, [0, 2]]) - - self.score_pairs(triplets[:, :2])) + return (self.pair_score(triplets[:, :2]) - + self.pair_score(triplets[:, [0, 2]])) def score(self, triplets): """Computes score on input triplets. @@ -716,8 +876,8 @@ def decision_function(self, quadruplets): quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.score_pairs(quadruplets[:, 2:]) - - self.score_pairs(quadruplets[:, :2])) + return (self.pair_score(quadruplets[:, :2]) - + self.pair_score(quadruplets[:, 2:])) def score(self, quadruplets): """Computes score on input quadruplets diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 67f9b6a0..baa585b9 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,3 +1,4 @@ +from numpy.core.numeric import array_equal import pytest import re import unittest @@ -274,5 +275,28 @@ def test_n_components(estimator, build_dataset): 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_warning(estimator, build_dataset): + """Tests that score_pairs returns a FutureWarning regarding deprecation. + Also that score_pairs and pair_distance have the same behaviour""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + + # We fit the metric learner on it and then we call score_pairs on some + # points + model.fit(*remove_y(model, input_data, labels)) + + msg = ("score_pairs will be deprecated in release 0.7.0. " + "Use pair_score to compute similarity scores, or " + "pair_distances to compute distances.") + with pytest.warns(FutureWarning) as raised_warning: + score = model.score_pairs([[X[0], X[1]], ]) + dist = model.pair_distance([[X[0], X[1]], ]) + assert array_equal(score, dist) + assert any([str(warning.message) == msg for warning in raised_warning]) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e3d981a4..e2aa1e4d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -3,7 +3,8 @@ import pytest import numpy as np from numpy.linalg import LinAlgError -from numpy.testing import assert_array_almost_equal, assert_allclose +from numpy.testing import assert_array_almost_equal, assert_allclose, \ + assert_array_equal from scipy.spatial.distance import pdist, squareform, mahalanobis from scipy.stats import ortho_group from sklearn import clone @@ -27,7 +28,27 @@ @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_pairwise(estimator, build_dataset): +def test_pair_distance_pair_score_equivalent(estimator, build_dataset): + """ + For Mahalanobis learners, pair_score should be equivalent to the + opposite of the pair_distance result. + """ + input_data, labels, _, X = build_dataset() + n_samples = 20 + X = X[:n_samples] + model = clone(estimator) + set_random_state(model) + model.fit(*remove_y(estimator, input_data, labels)) + + distances = model.pair_distance(np.array(list(product(X, X)))) + scores = model.pair_score(np.array(list(product(X, X)))) + + assert_array_equal(distances, -1 * scores) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_pair_distance_pairwise(estimator, build_dataset): # Computing pairwise scores should return a euclidean distance matrix. input_data, labels, _, X = build_dataset() n_samples = 20 @@ -36,7 +57,7 @@ def test_score_pairs_pairwise(estimator, build_dataset): set_random_state(model) model.fit(*remove_y(estimator, input_data, labels)) - pairwise = model.score_pairs(np.array(list(product(X, X))))\ + pairwise = model.pair_distance(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) check_is_distance_matrix(pairwise) @@ -51,8 +72,8 @@ def test_score_pairs_pairwise(estimator, build_dataset): @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_toy_example(estimator, build_dataset): - # Checks that score_pairs works on a toy example +def test_pair_distance_toy_example(estimator, build_dataset): + # Checks that pair_distance works on a toy example input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] @@ -64,24 +85,24 @@ def test_score_pairs_toy_example(estimator, build_dataset): distances = np.sqrt(np.sum((embedded_pairs[:, 1] - embedded_pairs[:, 0])**2, axis=-1)) - assert_array_almost_equal(model.score_pairs(pairs), distances) + assert_array_almost_equal(model.pair_distance(pairs), distances) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_finite(estimator, build_dataset): +def test_pair_distance_finite(estimator, build_dataset): # tests that the score is finite input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) - assert np.isfinite(model.score_pairs(pairs)).all() + assert np.isfinite(model.pair_distance(pairs)).all() @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_dim(estimator, build_dataset): +def test_pair_distance_dim(estimator, build_dataset): # scoring of 3D arrays should return 1D array (several tuples), # and scoring of 2D arrays (one tuple) should return an error (like # scikit-learn's error when scoring 1D arrays) @@ -90,13 +111,13 @@ def test_score_pairs_dim(estimator, build_dataset): set_random_state(model) model.fit(*remove_y(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) - assert model.score_pairs(tuples).shape == (tuples.shape[0],) + assert model.pair_distance(tuples).shape == (tuples.shape[0],) context = make_context(estimator) msg = ("3D array of formed tuples expected{}. Found 2D array " "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n" .format(context, tuples[1])) with pytest.raises(ValueError) as raised_error: - model.score_pairs(tuples[1]) + model.pair_distance(tuples[1]) assert str(raised_error.value) == msg @@ -140,7 +161,7 @@ def test_embed_dim(estimator, build_dataset): "instead:\ninput={}. Reshape your data and/or use a " "preprocessor.\n".format(context, X[0])) with pytest.raises(ValueError) as raised_error: - model.score_pairs(model.transform(X[0, :])) + model.pair_distance(model.transform(X[0, :])) assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if hasattr(model, 'n_components'): @@ -625,7 +646,7 @@ def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset): 'preprocessing step.') with pytest.warns(UserWarning) as raised_warning: model.fit(input_data, labels) - assert np.any([str(warning.message) == msg for warning in raised_warning]) + assert any([str(warning.message) == msg for warning in raised_warning]) M, _ = _initialize_metric_mahalanobis(X, init='covariance', random_state=RNG, return_inverse=True, diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 824bb622..714cbd08 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -49,14 +49,14 @@ def test_predict_monotonous(estimator, build_dataset, pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, labels) estimator.fit(pairs_train, y_train) - distances = estimator.score_pairs(pairs_test) + scores = estimator.pair_score(pairs_test) predictions = estimator.predict(pairs_test) - min_dissimilar = np.min(distances[predictions == -1]) - max_similar = np.max(distances[predictions == 1]) - assert max_similar <= min_dissimilar - separator = np.mean([min_dissimilar, max_similar]) - assert (predictions[distances > separator] == -1).all() - assert (predictions[distances < separator] == 1).all() + max_dissimilar = np.max(scores[predictions == -1]) + min_similar = np.min(scores[predictions == 1]) + assert max_dissimilar <= min_similar + separator = np.mean([max_dissimilar, min_similar]) + assert (predictions[scores < separator] == -1).all() + assert (predictions[scores > separator] == 1).all() @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -65,15 +65,17 @@ def test_predict_monotonous(estimator, build_dataset, def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with_preprocessor): """Test that a NotFittedError is raised if someone tries to use - score_pairs, decision_function, get_metric, transform or + pair_score, score_pairs, decision_function, get_metric, transform or get_mahalanobis_matrix on input data and the metric learner has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - with pytest.raises(NotFittedError): + with pytest.raises(NotFittedError): # Remove in 0.8.0 estimator.score_pairs(input_data) + with pytest.raises(NotFittedError): + estimator.pair_score(input_data) with pytest.raises(NotFittedError): estimator.decision_function(input_data) with pytest.raises(NotFittedError): diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 3ad69712..b08fcf25 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -147,8 +147,19 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) pairs_variants, _ = generate_array_like(pairs) + + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + for pairs_variant in pairs_variants: - estimator.score_pairs(pairs_variant) + estimator.pair_score(pairs_variant) # All learners have pair_score + + # But not all of them will have pair_distance + try: + estimator.pair_distance(pairs_variant) + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg @pytest.mark.parametrize('with_preprocessor', [True, False]) diff --git a/test/test_utils.py b/test/test_utils.py index 072b94c5..83bdd86a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -834,9 +834,9 @@ def test_error_message_tuple_size(estimator, _): @pytest.mark.parametrize('estimator, _', metric_learners, ids=ids_metric_learners) -def test_error_message_t_score_pairs(estimator, _): - """tests that if you want to score_pairs on triplets for instance, it returns - the right error message +def test_error_message_t_pair_distance_or_score(estimator, _): + """Tests that if you want to pair_distance or pair_score on triplets + for instance, it returns the right error message """ estimator = clone(estimator) set_random_state(estimator) @@ -844,12 +844,22 @@ def test_error_message_t_score_pairs(estimator, _): triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) with pytest.raises(ValueError) as raised_err: - estimator.score_pairs(triplets) + estimator.pair_score(triplets) expected_msg = ("Tuples of 2 element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(make_context(estimator), triplets)) assert str(raised_err.value) == expected_msg + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + + # One exception will trigger for sure + with pytest.raises(Exception) as raised_exception: + estimator.pair_distance(triplets) + err_value = raised_exception.value.args[0] + assert err_value == expected_msg or err_value == not_implemented_msg + def test_preprocess_tuples_simple_example(): """Test the preprocessor on a very simple example of tuples to ensure the @@ -930,32 +940,59 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): method)(formed_test) assert np.array(output_with_prep == output_with_prep_formed).all() - # test score_pairs + # Test pair_score, all learners have it. idx1 = np.array([[0, 2], [5, 3]], dtype=int) - output_with_prep = estimator_with_preprocessor.score_pairs( + output_with_prep = estimator_with_preprocessor.pair_score( indicators_to_transform[idx1]) - output_without_prep = estimator_without_preprocessor.score_pairs( + output_without_prep = estimator_without_preprocessor.pair_score( formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - output_with_prep = estimator_with_preprocessor.score_pairs( + output_with_prep = estimator_with_preprocessor.pair_score( indicators_to_transform[idx1]) - output_without_prep = estimator_with_prep_formed.score_pairs( + output_without_prep = estimator_with_prep_formed.pair_score( formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - # test transform - output_with_prep = estimator_with_preprocessor.transform( - indicators_to_transform) - output_without_prep = estimator_without_preprocessor.transform( - formed_points_to_transform) - assert np.array(output_with_prep == output_without_prep).all() - - output_with_prep = estimator_with_preprocessor.transform( - indicators_to_transform) - output_without_prep = estimator_with_prep_formed.transform( - formed_points_to_transform) - assert np.array(output_with_prep == output_without_prep).all() + # Test pair_distance + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + try: + output_with_prep = estimator_with_preprocessor.pair_distance( + indicators_to_transform[idx1]) + output_without_prep = estimator_without_preprocessor.pair_distance( + formed_points_to_transform[idx1]) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.pair_distance( + indicators_to_transform[idx1]) + output_without_prep = estimator_with_prep_formed.pair_distance( + formed_points_to_transform[idx1]) + assert np.array(output_with_prep == output_without_prep).all() + + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg + + # Test transform + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have transform" + try: + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_without_preprocessor.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_with_prep_formed.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg def test_check_collapsed_pairs_raises_no_error(): From 6a4aaea62d35c173376fbd3e89b90e9efb0ebb16 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Thu, 21 Oct 2021 12:36:03 -0300 Subject: [PATCH 189/210] Fix 7 sources of warnings in the tests (#339) * Fix 7 sources of warnings * Fix indentation * Generalized warnings, as old sklearn throw more warnings * Changed np.any() for any() * Fix identation --- metric_learn/_util.py | 2 +- metric_learn/itml.py | 2 +- metric_learn/rca.py | 2 +- metric_learn/scml.py | 6 +++--- pytest.ini | 4 ++++ test/metric_learn_test.py | 8 ++++---- test/test_constraints.py | 21 ++++++++++++++++++--- test/test_sklearn_compat.py | 2 +- test/test_triplets_classifiers.py | 25 ++++++++++++++++++++++--- 9 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 pytest.ini diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 764a34c8..868ececa 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -704,7 +704,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, elif init == 'covariance': if input.ndim == 3: # if the input are tuples, we need to form an X by deduplication - X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)}) + X = np.unique(np.vstack(input), axis=0) else: X = input # atleast2d is necessary to deal with scalar covariance matrices diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 43872b60..9fa3b75e 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -32,7 +32,7 @@ def _fit(self, pairs, y, bounds=None): type_of_inputs='tuples') # init bounds if bounds is None: - X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) + X = np.unique(np.vstack(pairs), axis=0) self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) else: bounds = check_array(bounds, allow_nd=False, ensure_min_samples=0, diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 34f7f3ff..1da00062 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -112,7 +112,7 @@ def fit(self, X, chunks): # Fisher Linear Discriminant projection if dim < X.shape[1]: total_cov = np.cov(X[chunk_mask], rowvar=0) - tmp = np.linalg.lstsq(total_cov, inner_cov)[0] + tmp = np.linalg.lstsq(total_cov, inner_cov, rcond=None)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] A = vecs[:, inds] diff --git a/metric_learn/scml.py b/metric_learn/scml.py index ee585018..b86c6fe1 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -615,10 +615,10 @@ def _generate_bases_LDA(self, X, y): k_class = np.vstack((np.minimum(class_count, scales[0]), np.minimum(class_count, scales[1]))) - idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int), - np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int)] + idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int64), + np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int64)] - start_finish_indices = np.hstack((np.zeros((2, 1), np.int), + start_finish_indices = np.hstack((np.zeros((2, 1), np.int64), k_class)).cumsum(axis=1) neigh = NearestNeighbors() diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..ef3c8acb --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + integration: mark a test as integration + unit: mark a test as unit \ No newline at end of file diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 2debe426..542e1e0a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -9,7 +9,6 @@ make_spd_matrix) from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_allclose) -from metric_learn.sklearn_shims import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y from sklearn.preprocessing import StandardScaler @@ -1143,9 +1142,10 @@ def test_convergence_warning(dataset, algo_class): X, y = dataset model = algo_class(max_iter=2, verbose=True) cls_name = model.__class__.__name__ - assert_warns_message(ConvergenceWarning, - '[{}] {} did not converge'.format(cls_name, cls_name), - model.fit, X, y) + msg = '[{}] {} did not converge'.format(cls_name, cls_name) + with pytest.warns(Warning) as raised_warning: + model.fit(X, y) + assert any([msg in str(warn.message) for warn in raised_warning]) if __name__ == '__main__': diff --git a/test/test_constraints.py b/test/test_constraints.py index 92876779..def228d4 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -103,7 +103,7 @@ def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test): @pytest.mark.parametrize("k_genuine, k_impostor,", - [(2, 3), (3, 3), (2, 4), (3, 4)]) + [(3, 3), (2, 4), (3, 4), (10, 9), (144, 33)]) def test_generate_knntriplets(k_genuine, k_impostor): """Checks edge and over the edge cases of knn triplet construction with not enough neighbors""" @@ -118,8 +118,23 @@ def test_generate_knntriplets(k_genuine, k_impostor): X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) y = np.array([1, 1, 1, 2, 2, 2, -1]) - T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) - + msg1 = ("The class 1 has 3 elements, which is not sufficient to " + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine") + msg2 = ("The class 2 has 3 elements, which is not sufficient to " + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine") + msg3 = ("The class 1 has 3 elements of other classes, which is " + f"not sufficient to generate {k_impostor} impostor " + "neighbors as specified by k_impostor") + msg4 = ("The class 2 has 3 elements of other classes, which is " + f"not sufficient to generate {k_impostor} impostor " + "neighbors as specified by k_impostor") + msgs = [msg1, msg2, msg3, msg4] + with pytest.warns(UserWarning) as user_warning: + T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) + assert any([[msg in str(warn.message) for msg in msgs] + for warn in user_warning]) assert np.array_equal(sorted(T.tolist()), T_test) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index b08fcf25..a23a88d0 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -235,7 +235,7 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, n_splits = 3 kfold = KFold(shuffle=False, n_splits=n_splits) n_samples = input_data.shape[0] - fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int64) fold_sizes[:n_samples % n_splits] += 1 current = 0 scores, predictions = [], np.zeros(input_data.shape[0]) diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index f2d5c015..600947e6 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -1,6 +1,7 @@ import pytest from sklearn.exceptions import NotFittedError from sklearn.model_selection import train_test_split +import metric_learn from test.test_utils import triplets_learners, ids_triplets_learners from metric_learn.sklearn_shims import set_random_state @@ -20,7 +21,13 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) triplets_train, triplets_test = train_test_split(input_data) - estimator.fit(triplets_train) + if isinstance(estimator, metric_learn.SCML): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + estimator.fit(triplets_train) + assert msg in str(raised_warning[0].message) + else: + estimator.fit(triplets_train) predictions = estimator.predict(triplets_test) not_valid = [e for e in predictions if e not in [-1, 1]] @@ -42,7 +49,13 @@ def test_no_zero_prediction(estimator, build_dataset): # Dummy fit estimator = clone(estimator) set_random_state(estimator) - estimator.fit(triplets) + if isinstance(estimator, metric_learn.SCML): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + estimator.fit(triplets) + assert msg in str(raised_warning[0].message) + else: + estimator.fit(triplets) # We force the transformation to be identity, to force euclidean distance estimator.components_ = np.eye(X.shape[1]) @@ -93,7 +106,13 @@ def test_accuracy_toy_example(estimator, build_dataset): triplets, _, _, X = build_dataset(with_preprocessor=False) estimator = clone(estimator) set_random_state(estimator) - estimator.fit(triplets) + if isinstance(estimator, metric_learn.SCML): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + estimator.fit(triplets) + assert msg in str(raised_warning[0].message) + else: + estimator.fit(triplets) # We take the two first points and we build 4 regularly spaced points on the # line they define, so that it's easy to build triplets of different # similarities. From bdfdb2487b065fd088224a14808c395430a21be0 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Tue, 2 Nov 2021 15:37:51 +0100 Subject: [PATCH 190/210] Assigned n_basis to SCML when needed for tests. Catch warn when needed as well. (#341) --- test/metric_learn_test.py | 31 +++++++++++++++++++++++-------- test/test_mahalanobis_mixin.py | 4 ++-- test/test_sklearn_compat.py | 5 ++++- test/test_triplets_classifiers.py | 25 +++---------------------- test/test_utils.py | 4 ++-- 5 files changed, 34 insertions(+), 35 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 542e1e0a..fe1560c2 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -95,7 +95,7 @@ def test_big_n_features(self): n_informative=60, n_redundant=0, n_repeated=0, random_state=42) X = StandardScaler().fit_transform(X) - scml = SCML_Supervised(random_state=42) + scml = SCML_Supervised(random_state=42, n_basis=399) scml.fit(X, y) csep = class_separation(scml.transform(X), y) assert csep < 0.7 @@ -106,7 +106,7 @@ def test_big_n_features(self): [2, 0], [2, 1]]), np.array([1, 0, 1, 0])))]) def test_bad_basis(self, estimator, data): - model = estimator(basis='bad_basis') + model = estimator(basis='bad_basis', n_basis=33) # n_basis doesn't matter msg = ("`basis` must be one of the options '{}' or an array of shape " "(n_basis, n_features)." .format("', '".join(model._authorized_basis))) @@ -238,16 +238,23 @@ def test_lda_toy(self): @pytest.mark.parametrize('n_features', [10, 50, 100]) @pytest.mark.parametrize('n_classes', [5, 10, 15]) def test_triplet_diffs(self, n_samples, n_features, n_classes): + """ + Test that the correct value of n_basis is being generated with + different triplet constraints. + """ X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) - - model = SCML_Supervised() + model = SCML_Supervised(n_basis=None) # Explicit n_basis=None constraints = Constraints(y) triplets = constraints.generate_knntriplets(X, model.k_genuine, model.k_impostor) - basis, n_basis = model._generate_bases_dist_diff(triplets, X) + + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + basis, n_basis = model._generate_bases_dist_diff(triplets, X) + assert msg in str(raised_warning[0].message) expected_n_basis = n_features * 80 assert n_basis == expected_n_basis @@ -257,13 +264,21 @@ def test_triplet_diffs(self, n_samples, n_features, n_classes): @pytest.mark.parametrize('n_features', [10, 50, 100]) @pytest.mark.parametrize('n_classes', [5, 10, 15]) def test_lda(self, n_samples, n_features, n_classes): + """ + Test that when n_basis=None, the correct n_basis is generated, + for SCML_Supervised and different values of n_samples, n_features + and n_classes. + """ X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) - model = SCML_Supervised() - basis, n_basis = model._generate_bases_LDA(X, y) + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + model = SCML_Supervised(n_basis=None) # Explicit n_basis=None + basis, n_basis = model._generate_bases_LDA(X, y) + assert msg in str(raised_warning[0].message) num_eig = min(n_classes - 1, n_features) expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1) @@ -299,7 +314,7 @@ def test_int_inputs_supervised(self, name): assert msg == raised_error.value.args[0] def test_large_output_iter(self): - scml = SCML(max_iter=1, output_iter=2) + scml = SCML(max_iter=1, output_iter=2, n_basis=33) # n_basis don't matter triplets = np.array([[[0, 1], [2, 1], [0, 0]]]) msg = ("The value of output_iter must be equal or smaller than" " max_iter.") diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e2aa1e4d..e69aa032 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -291,8 +291,8 @@ def test_components_is_2D(estimator, build_dataset): model.fit(*remove_y(estimator, input_data, labels)) assert model.components_.shape == (X.shape[1], X.shape[1]) - # test that it works for 1 feature - trunc_data = input_data[..., :1] + # test that it works for 1 feature. Use 2nd dimention, to avoid border cases + trunc_data = input_data[..., 1:2] # we drop duplicates that might have been formed, i.e. of the form # aabc or abcc or aabb for quadruplets, and aa for pairs. diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index a23a88d0..d2369b1c 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -79,7 +79,10 @@ def test_rca(self): check_estimator(Stable_RCA_Supervised()) def test_scml(self): - check_estimator(SCML_Supervised()) + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + check_estimator(SCML_Supervised()) + assert msg in str(raised_warning[0].message) RNG = check_random_state(0) diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index 600947e6..f2d5c015 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -1,7 +1,6 @@ import pytest from sklearn.exceptions import NotFittedError from sklearn.model_selection import train_test_split -import metric_learn from test.test_utils import triplets_learners, ids_triplets_learners from metric_learn.sklearn_shims import set_random_state @@ -21,13 +20,7 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) triplets_train, triplets_test = train_test_split(input_data) - if isinstance(estimator, metric_learn.SCML): - msg = "As no value for `n_basis` was selected, " - with pytest.warns(UserWarning) as raised_warning: - estimator.fit(triplets_train) - assert msg in str(raised_warning[0].message) - else: - estimator.fit(triplets_train) + estimator.fit(triplets_train) predictions = estimator.predict(triplets_test) not_valid = [e for e in predictions if e not in [-1, 1]] @@ -49,13 +42,7 @@ def test_no_zero_prediction(estimator, build_dataset): # Dummy fit estimator = clone(estimator) set_random_state(estimator) - if isinstance(estimator, metric_learn.SCML): - msg = "As no value for `n_basis` was selected, " - with pytest.warns(UserWarning) as raised_warning: - estimator.fit(triplets) - assert msg in str(raised_warning[0].message) - else: - estimator.fit(triplets) + estimator.fit(triplets) # We force the transformation to be identity, to force euclidean distance estimator.components_ = np.eye(X.shape[1]) @@ -106,13 +93,7 @@ def test_accuracy_toy_example(estimator, build_dataset): triplets, _, _, X = build_dataset(with_preprocessor=False) estimator = clone(estimator) set_random_state(estimator) - if isinstance(estimator, metric_learn.SCML): - msg = "As no value for `n_basis` was selected, " - with pytest.warns(UserWarning) as raised_warning: - estimator.fit(triplets) - assert msg in str(raised_warning[0].message) - else: - estimator.fit(triplets) + estimator.fit(triplets) # We take the two first points and we build 4 regularly spaced points on the # line they define, so that it's easy to build triplets of different # similarities. diff --git a/test/test_utils.py b/test/test_utils.py index 83bdd86a..f3000344 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -117,7 +117,7 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) -triplets_learners = [(SCML(), build_triplets)] +triplets_learners = [(SCML(n_basis=320), build_triplets)] ids_triplets_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in triplets_learners])) @@ -140,7 +140,7 @@ def build_quadruplets(with_preprocessor=False): (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification), - (SCML_Supervised(), build_classification)] + (SCML_Supervised(n_basis=80), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) From 964f28d79a298bb16c2e930789d1f0f4833141f3 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Tue, 2 Nov 2021 17:03:12 +0100 Subject: [PATCH 191/210] Threshold must be a real number (#322) * Add venv to gitignore * Check if threshold is a real value * Simplified threshold type-check * Follow linter rules * Fix last linter error * Add test to check correct behaviour. Sacrified simplicity for the bool case. * Update test. Stick to custom message. It's bool permissive * Explicit boolean permissive case in test * Changed isinstance for custom ValueError message * TypeError for most input. ValueError for String case. --- metric_learn/base_metric.py | 10 ++++++++-- test/test_pairs_classifiers.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index e7dbd608..9064c100 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -569,8 +569,14 @@ def set_threshold(self, threshold): The pairs classifier with the new threshold set. """ check_is_fitted(self, 'preprocessor_') - - self.threshold_ = threshold + try: + self.threshold_ = float(threshold) + except TypeError: + raise ValueError('Parameter threshold must be a real number. ' + 'Got {} instead.'.format(type(threshold))) + except ValueError: + raise ValueError('Parameter threshold must be a real number. ' + 'Got {} instead.'.format(type(threshold))) return self def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 714cbd08..6a725f23 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -180,6 +180,25 @@ def test_set_threshold(): assert identity_pairs_classifier.threshold_ == 0.5 +@pytest.mark.parametrize('value', ["ABC", None, [1, 2, 3], {'key': None}, + (1, 2), set(), + np.array([[[0.], [1.]], [[1.], [3.]]])]) +def test_set_wrong_type_threshold(value): + """ + Test that `set_threshold` indeed sets the threshold + and cannot accept nothing but float or integers, but + being permissive with boolean True=1.0 and False=0.0 + """ + model = IdentityPairsClassifier() + model.fit(np.array([[[0.], [1.]]]), np.array([1])) + msg = ('Parameter threshold must be a real number. ' + 'Got {} instead.'.format(type(value))) + + with pytest.raises(ValueError) as e: # String + model.set_threshold(value) + assert str(e.value).startswith(msg) + + def test_f_beta_1_is_f_1(): # test that putting beta to 1 indeed finds the best threshold to optimize # the f1_score From a797635b8050ecad29f6484eb2d44bafe5fabceb Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Thu, 11 Nov 2021 06:21:00 +0100 Subject: [PATCH 192/210] [Minor] GUI for matplotlib to run examples (#323) * Proposal to add pyqt5 as GUI to run examples correctly * Add note about matplotlib gui backend instead --- examples/plot_metric_learning_examples.py | 4 ++++ examples/plot_sandwich.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 71229554..d03242b2 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -15,7 +15,11 @@ ###################################################################### # Imports # ^^^^^^^ +# .. note:: # +# In order to show the charts of the examples you need a graphical +# ``matplotlib`` backend installed. For intance, use ``pip install pyqt5`` +# to get Qt graphical interface or use your favorite one. from sklearn.manifold import TSNE diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py index d5856667..07ab78e9 100644 --- a/examples/plot_sandwich.py +++ b/examples/plot_sandwich.py @@ -6,6 +6,13 @@ Sandwich demo based on code from http://nbviewer.ipython.org/6576096 """ +###################################################################### +# .. note:: +# +# In order to show the charts of the examples you need a graphical +# ``matplotlib`` backend installed. For intance, use ``pip install pyqt5`` +# to get Qt graphical interface or use your favorite one. + import numpy as np from matplotlib import pyplot as plt from sklearn.metrics import pairwise_distances From 4e0c4442329ebe12a7e2a31598fc13ae053fd6b6 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Wed, 17 Nov 2021 14:54:36 +0100 Subject: [PATCH 193/210] [MRG][DOC] Fixes almost all warnings in the docs (#338) * Update API names, unuse depretaed html4 * Fixes a lot of warning. Add Methods doctree * More warnings solved * Fix docs dependencies * New style for Example Code and References * Add all Methods to all classes in docstrings, in alphabetical order * Add MetricTransformer and MahalanobisMixin to auto-docs * Delete unused vars in docs. Use simple quotes * Fix identation * Fix Github CI instead of old Travis CI * References Lists are now numbered * RemoveExample Code body almost everywhere * Removed Methods directive. Kept warnings * Deprecated directive now is red as in sklearn --- doc/_static/css/styles.css | 36 +++++++++ doc/conf.py | 11 +-- doc/index.rst | 6 +- doc/metric_learn.rst | 2 + doc/supervised.rst | 56 +++++++------- doc/unsupervised.rst | 9 ++- doc/weakly_supervised.rst | 93 +++++++++++++---------- examples/plot_metric_learning_examples.py | 6 +- metric_learn/base_metric.py | 38 +++++---- metric_learn/constraints.py | 14 ++-- metric_learn/covariance.py | 4 + metric_learn/itml.py | 2 +- metric_learn/lfda.py | 2 +- metric_learn/rca.py | 2 +- metric_learn/scml.py | 8 +- setup.py | 3 +- 16 files changed, 175 insertions(+), 117 deletions(-) create mode 100644 doc/_static/css/styles.css diff --git a/doc/_static/css/styles.css b/doc/_static/css/styles.css new file mode 100644 index 00000000..6d350ae4 --- /dev/null +++ b/doc/_static/css/styles.css @@ -0,0 +1,36 @@ +.hatnote { + border-color: #e1e4e5 ; + border-style: solid ; + border-width: 1px ; + font-size: x-small ; + font-style: italic ; + margin-left: auto ; + margin-right: auto ; + margin-bottom: 24px; + padding: 12px; +} +.hatnote-gray { + background-color: #f5f5f5 +} +.hatnote li { + list-style-type: square; + margin-left: 12px !important; +} +.hatnote ul { + list-style-type: square; + margin-left: 0px !important; + margin-bottom: 0px !important; +} +.deprecated { + color: #b94a48; + background-color: #F3E5E5; + border-color: #eed3d7; + margin-top: 0.5rem; + padding: 0.5rem; + border-radius: 0.5rem; + margin-bottom: 0.5rem; +} + +.deprecated p { + margin-bottom: 0 !important; +} \ No newline at end of file diff --git a/doc/conf.py b/doc/conf.py index 94263c7a..5eb312dc 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -38,9 +38,6 @@ html_static_path = ['_static'] htmlhelp_basename = 'metric-learndoc' -# Option to only need single backticks to refer to symbols -default_role = 'any' - # Option to hide doctests comments in the documentation (like # doctest: # +NORMALIZE_WHITESPACE for instance) trim_doctest_flags = True @@ -67,10 +64,6 @@ # generate autosummary even if no references autosummary_generate = True -# Switch to old behavior with html4, for a good display of references, -# as described in https://github.com/sphinx-doc/sphinx/issues/6705 -html4_writer = True - # Temporary work-around for spacing problem between parameter and parameter # type in the doc, see https://github.com/numpy/numpydoc/issues/215. The bug @@ -79,8 +72,8 @@ # In an ideal world, this would get fixed in this PR: # https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files def setup(app): - app.add_javascript('js/copybutton.js') - app.add_stylesheet("basic.css") + app.add_js_file('js/copybutton.js') + app.add_css_file('css/styles.css') # Remove matplotlib agg warnings from generated doc when using plt.show diff --git a/doc/index.rst b/doc/index.rst index 6ec4fb26..f9dfd83d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,6 +1,6 @@ metric-learn: Metric Learning in Python ======================================= -|Travis-CI Build Status| |License| |PyPI version| |Code coverage| +|GitHub Actions Build Status| |License| |PyPI version| |Code coverage| `metric-learn `_ contains efficient Python implementations of several popular supervised and @@ -57,8 +57,8 @@ Documentation outline :ref:`genindex` | :ref:`search` -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master - :target: https://travis-ci.org/scikit-learn-contrib/metric-learn +.. |GitHub Actions Build Status| image:: https://github.com/scikit-learn-contrib/metric-learn/workflows/CI/badge.svg + :target: https://github.com/scikit-learn-contrib/metric-learn/actions?query=event%3Apush+branch%3Amaster .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 8f91d91c..4d0676b9 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -13,6 +13,8 @@ Base Classes metric_learn.Constraints metric_learn.base_metric.BaseMetricLearner + metric_learn.base_metric.MetricTransformer + metric_learn.base_metric.MahalanobisMixin metric_learn.base_metric._PairsClassifierMixin metric_learn.base_metric._TripletsClassifierMixin metric_learn.base_metric._QuadrupletsClassifierMixin diff --git a/doc/supervised.rst b/doc/supervised.rst index e27b58ec..09077dc2 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -152,7 +152,7 @@ neighbors (with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0` indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, :math:`[\cdot]_+=\max(0, \cdot)` is the Hinge loss. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -167,15 +167,15 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False) -.. topic:: References: +.. rubric:: References - .. [1] Weinberger et al. `Distance Metric Learning for Large Margin - Nearest Neighbor Classification - `_. - JMLR 2009 - .. [2] `Wikipedia entry on Large Margin Nearest Neighbor `_ - +.. container:: hatnote hatnote-gray + + [1]. Weinberger et al. `Distance Metric Learning for Large Margin Nearest Neighbor Classification `_. JMLR 2009. + + [2]. `Wikipedia entry on Large Margin Nearest Neighbor `_. + .. _nca: @@ -216,7 +216,7 @@ the sum of probability of being correctly classified: \mathbf{L} = \text{argmax}\sum_i p_i -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -231,13 +231,14 @@ the sum of probability of being correctly classified: nca = NCA(max_iter=1000) nca.fit(X, Y) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Goldberger et al. - `Neighbourhood Components Analysis `_. - NIPS 2005 + [1]. Goldberger et al. `Neighbourhood Components Analysis `_. NIPS 2005. - .. [2] `Wikipedia entry on Neighborhood Components Analysis `_ + [2]. `Wikipedia entry on Neighborhood Components Analysis `_. .. _lfda: @@ -289,7 +290,7 @@ nearby data pairs in the same class are made close and the data pairs in different classes are separated from each other; far apart data pairs in the same class are not imposed to be close. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -309,15 +310,14 @@ same class are not imposed to be close. To work around this, fit instances of this class to data once, then keep the instance around to do transformations. -.. topic:: References: +.. rubric:: References - .. [1] Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local - Fisher Discriminant Analysis `_. - JMLR 2007 - .. [2] Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering - `_. +.. container:: hatnote hatnote-gray + + [1]. Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis `_. JMLR 2007. + + [2]. Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering `_. .. _mlkr: @@ -363,7 +363,7 @@ calculating a weighted average of all the training samples: \hat{y}_i = \frac{\sum_{j\neq i}y_jk_{ij}}{\sum_{j\neq i}k_{ij}} -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -377,10 +377,12 @@ calculating a weighted average of all the training samples: mlkr = MLKR() mlkr.fit(X, Y) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Weinberger et al. `Metric Learning for Kernel Regression `_. AISTATS 2007 + [1]. Weinberger et al. `Metric Learning for Kernel Regression `_. AISTATS 2007. .. _supervised_version: @@ -417,7 +419,7 @@ quadruplets, where for each quadruplet the two first points are from the same class, and the two last points are from a different class (so indeed the two last points should be less similar than the two first points). -.. topic:: Example Code: +.. rubric:: Example Code :: diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst index 1191e805..110b07f9 100644 --- a/doc/unsupervised.rst +++ b/doc/unsupervised.rst @@ -20,7 +20,7 @@ It can be used for ZCA whitening of the data (see the Wikipedia page of `whitening transformation `_). -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -32,6 +32,9 @@ Whitening_transformation>`_). cov = Covariance().fit(iris) x = cov.transform(iris) -.. topic:: References: +.. rubric:: References - .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 \ No newline at end of file + +.. container:: hatnote hatnote-gray + + [1]. On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936. \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 02ea4ef6..341d959a 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -62,8 +62,9 @@ The most intuitive way to represent tuples is to provide the algorithm with a in a tuple (2 for pairs, 3 for triplets for instance), and `n_features` is the number of features of each point. -.. topic:: Example: - Here is an artificial dataset of 4 pairs of 2 points of 3 features each: +.. rubric:: Example Code + +Here is an artificial dataset of 4 pairs of 2 points of 3 features each: >>> import numpy as np >>> tuples = np.array([[[-0.12, -1.21, -0.20], @@ -94,7 +95,9 @@ would be to keep the dataset of points `X` aside, and just represent tuples as a collection of tuples of *indices* from the points in `X`. Since we loose the feature dimension there, the resulting array is 2D. -.. topic:: Example: An equivalent representation of the above pairs would be: +.. rubric:: Example Code + +An equivalent representation of the above pairs would be: >>> X = np.array([[-0.12, -1.21, -0.20], >>> [+0.05, -0.19, -0.05], @@ -410,7 +413,7 @@ for similar and dissimilar pairs respectively, and :math:`\mathbf{M}_0` is the prior distance metric, set to identity matrix by default, :math:`D_{\ell \mathrm{d}}(\cdot)` is the log determinant. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -429,11 +432,14 @@ is the prior distance metric, set to identity matrix by default, itml = ITML() itml.fit(pairs, y) -.. topic:: References: +.. rubric:: References + - .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning `_. ICML 2007 +.. container:: hatnote hatnote-gray - .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ + [1]. Jason V. Davis, et al. `Information-theoretic Metric Learning `_. ICML 2007. + + [2]. Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ . .. _sdml: @@ -468,7 +474,7 @@ the sums of the row elements of :math:`\mathbf{K}`., :math:`||\cdot||_{1, off}` is the off-diagonal L1 norm. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -486,19 +492,19 @@ is the off-diagonal L1 norm. sdml = SDML() sdml.fit(pairs, y) -.. topic:: References: +.. rubric:: References + - .. [1] Qi et al. - `An efficient sparse metric learning in high-dimensional space via - L1-penalized log-determinant regularization `_. - ICML 2009. +.. container:: hatnote hatnote-gray - .. [2] Code adapted from https://gist.github.com/kcarnold/5439945 + [1]. Qi et al. `An efficient sparse metric learning in high-dimensional space via L1-penalized log-determinant regularization `_. ICML 2009. + + [2]. Code adapted from https://gist.github.com/kcarnold/5439945 . .. _rca: :py:class:`RCA ` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Relative Components Analysis (:py:class:`RCA `) @@ -522,7 +528,7 @@ where chunklet :math:`j` consists of :math:`\{\mathbf{x}_{ji}\}_{i=1}^{n_j}` with a mean :math:`\hat{m}_j`. The inverse of :math:`\mathbf{C}^{-1}` is used as the Mahalanobis matrix. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -537,15 +543,16 @@ as the Mahalanobis matrix. rca = RCA() rca.fit(X, chunks) -.. topic:: References: +.. rubric:: References + - .. [1] Shental et al. `Adjustment learning and relevant component analysis - `_. ECCV 2002 +.. container:: hatnote hatnote-gray - .. [2] Bar-Hillel et al. `Learning distance functions using equivalence relations `_. ICML 2003 + [1]. Shental et al. `Adjustment learning and relevant component analysis `_. ECCV 2002. - .. [3] Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints `_. JMLR 2005 + [2]. Bar-Hillel et al. `Learning distance functions using equivalence relations `_. ICML 2003. + + [3]. Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints `_. JMLR 2005. .. _mmc: @@ -576,7 +583,7 @@ points, while constrains the sum of distances between dissimilar points: \qquad \qquad \text{s.t.} \qquad \sum_{(\mathbf{x}_i, \mathbf{x}_j) \in D} d^2_{\mathbf{M}}(\mathbf{x}_i, \mathbf{x}_j) \geq 1 -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -594,13 +601,14 @@ points, while constrains the sum of distances between dissimilar points: mmc = MMC() mmc.fit(pairs, y) -.. topic:: References: +.. rubric:: References + - .. [1] Xing et al. `Distance metric learning with application to clustering with - side-information `_. NIPS 2002 - .. [2] Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz +.. container:: hatnote hatnote-gray + + [1]. Xing et al. `Distance metric learning with application to clustering with side-information `_. NIPS 2002. + + [2]. Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz . .. _learning_on_triplets: @@ -744,7 +752,7 @@ is added to yield a sparse combination. The formulation is the following: where :math:`[\cdot]_+` is the hinge loss. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -758,14 +766,14 @@ where :math:`[\cdot]_+` is the hinge loss. scml = SCML() scml.fit(triplets) -.. topic:: References: +.. rubric:: References + - .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. - `_. \ - (AAAI), 2014. +.. container:: hatnote hatnote-gray - .. [2] Adapted from original \ - `Matlab implementation.`_. + [1]. Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. `_. (AAAI), 2014. + + [2]. Adapted from original `Matlab implementation. `_. .. _learning_on_quadruplets: @@ -937,7 +945,7 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} (\mathbf{M}) -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -954,12 +962,13 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: lsml = LSML() lsml.fit(quadruplets) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Liu et al. - `Metric Learning from Relative Comparisons by Minimizing Squared - Residual `_. ICDM 2012 + [1]. Liu et al. `Metric Learning from Relative Comparisons by Minimizing Squared Residual `_. ICDM 2012. - .. [2] Code adapted from https://gist.github.com/kcarnold/5439917 + [2]. Code adapted from https://gist.github.com/kcarnold/5439917 . diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index d03242b2..4ef15497 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -39,9 +39,9 @@ # We will be using a synthetic dataset to illustrate the plotting, # using the function `sklearn.datasets.make_classification` from # scikit-learn. The dataset will contain: -# - 100 points in 3 classes with 2 clusters per class -# - 5 features, among which 3 are informative (correlated with the class -# labels) and two are random noise with large magnitude +# - 100 points in 3 classes with 2 clusters per class +# - 5 features, among which 3 are informative (correlated with the class +# labels) and two are random noise with large magnitude X, y = make_classification(n_samples=100, n_classes=3, n_clusters_per_class=2, n_informative=3, class_sep=4., n_features=5, diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 9064c100..7b449c8e 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -29,16 +29,17 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): """ - .. deprecated:: 0.7.0 Refer to `pair_distance` and `pair_score`. + Returns the score between pairs + (can be a similarity, or a distance/metric depending on the algorithm) + + .. deprecated:: 0.7.0 + Refer to `pair_distance` and `pair_score`. .. warning:: This method will be removed in 0.8.0. Please refer to `pair_distance` or `pair_score`. This change will occur in order to add learners that don't necessarily learn a Mahalanobis distance. - Returns the score between pairs - (can be a similarity, or a distance/metric depending on the algorithm) - Parameters ---------- pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) @@ -225,7 +226,10 @@ def get_metric(self): class MetricTransformer(metaclass=ABCMeta): - + """ + Base class for all learners that can transform data into a new space + with the metric learned. + """ @abstractmethod def transform(self, X): """Applies the metric transformation. @@ -264,14 +268,6 @@ class MahalanobisMixin(BaseMetricLearner, MetricTransformer, def score_pairs(self, pairs): r""" - .. deprecated:: 0.7.0 - This method is deprecated. Please use `pair_distance` instead. - - .. warning:: - This method will be removed in 0.8.0. Please refer to `pair_distance` - or `pair_score`. This change will occur in order to add learners - that don't necessarily learn a Mahalanobis distance. - Returns the learned Mahalanobis distance between pairs. This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` @@ -282,6 +278,14 @@ def score_pairs(self, pairs): x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See :class:`MahalanobisMixin`). + .. deprecated:: 0.7.0 + Please use `pair_distance` instead. + + .. warning:: + This method will be removed in 0.8.0. Please refer to `pair_distance` + or `pair_score`. This change will occur in order to add learners + that don't necessarily learn a Mahalanobis distance. + Parameters ---------- pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) @@ -632,7 +636,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', evaluation tool in clinical medicine, MH Zweig, G Campbell - Clinical chemistry, 1993 - .. [2] most of the code of this function is from scikit-learn's PR #10117 + .. [2] Most of the code of this function is from scikit-learn's PR #10117 See Also -------- @@ -745,7 +749,8 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None, class _TripletsClassifierMixin(BaseMetricLearner): - """Base class for triplets learners. + """ + Base class for triplets learners. """ _tuple_size = 3 # number of points in a tuple, 3 for triplets @@ -829,7 +834,8 @@ def score(self, triplets): class _QuadrupletsClassifierMixin(BaseMetricLearner): - """Base class for quadruplets learners. + """ + Base class for quadruplets learners. """ _tuple_size = 4 # number of points in a tuple, 4 for quadruplets diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 2d86b819..68e205f6 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -95,12 +95,14 @@ def generate_knntriplets(self, X, k_genuine, k_impostor): Parameters ---------- - X : (n x d) matrix - Input data, where each row corresponds to a single instance. - k_genuine : int - Number of neighbors of the same class to be taken into account. - k_impostor : int - Number of neighbors of different classes to be taken into account. + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + + k_genuine : int + Number of neighbors of the same class to be taken into account. + + k_impostor : int + Number of neighbors of different classes to be taken into account. Returns ------- diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 3b218e6d..2c05b28d 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -42,6 +42,10 @@ def __init__(self, preprocessor=None): def fit(self, X, y=None): """ + Calculates the covariance matrix of the input data. + + Parameters + ---------- X : data matrix, (n x d) y : unused """ diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 9fa3b75e..af87f530 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -198,7 +198,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): ---------- .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning `_. ICML 2007. + /DavisKJSD07_ICML.pdf>`_. ICML 2007. """ def fit(self, pairs, y, bounds=None, calibration_params=None): diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index bfa3275e..82ae20eb 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -65,7 +65,7 @@ class LFDA(MahalanobisMixin, TransformerMixin): >>> lfda.fit(X, Y) References - ------------------ + ---------- .. [1] Masashi Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis `_. JMLR 2007. diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 1da00062..a63aa1d8 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -58,7 +58,7 @@ class RCA(MahalanobisMixin, TransformerMixin): >>> rca.fit(X, chunks) References - ------------------ + ---------- .. [1] Noam Shental, et al. `Adjustment learning and relevant component analysis `_ . diff --git a/metric_learn/scml.py b/metric_learn/scml.py index b86c6fe1..2bdd0d57 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -377,8 +377,8 @@ class SCML(_BaseSCML, _TripletsClassifierMixin): `_. \ (AAAI), 2014. - .. [2] Adapted from original \ - `Matlab implementation.`_. + .. [2] Adapted from original `Matlab implementation. \ + `_. See Also -------- @@ -492,8 +492,8 @@ class SCML_Supervised(_BaseSCML, TransformerMixin): `_. \ (AAAI), 2014. - .. [2] Adapted from original \ - `Matlab implementation.`_. + .. [2] Adapted from original `Matlab implementation. \ + `_. See Also -------- diff --git a/setup.py b/setup.py index 255671a2..23392077 100755 --- a/setup.py +++ b/setup.py @@ -68,7 +68,8 @@ 'scikit-learn>=0.21.3', ], extras_require=dict( - docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], + docs=['sphinx', 'sphinx_rtd_theme', 'numpydoc', 'sphinx-gallery', + 'matplotlib'], demo=['matplotlib'], sdml=['skggm>=0.2.9'] ), From 72b76c86577c9cfdd4691c67007acadc0b4ef921 Mon Sep 17 00:00:00 2001 From: Nikolaos Michas <6445960+nikosmichas@users.noreply.github.com> Date: Fri, 11 Mar 2022 08:00:21 +0200 Subject: [PATCH 194/210] Update SCML docstring (#347) match actual default value of max_iter --- metric_learn/scml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/scml.py b/metric_learn/scml.py index 2bdd0d57..199dfc40 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -338,7 +338,7 @@ class SCML(_BaseSCML, _TripletsClassifierMixin): gamma: float (default = 5e-3) Learning rate for the optimization algorithm. - max_iter : int (default = 100000) + max_iter : int (default = 10000) Number of iterations for the algorithm. output_iter : int (default = 5000) From b8530b2c92510316d9d5e69517aabe8511cb1ba8 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 20 Jun 2022 09:52:47 -0400 Subject: [PATCH 195/210] Support for SLEP010 This requires setting a public `n_features_in_` attribute as part of the fit() logic. For details, see: https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep010/proposal.html --- .gitignore | 3 ++- metric_learn/base_metric.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 16917890..66eb3551 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ htmlcov/ .pytest_cache/ doc/auto_examples/* doc/generated/* -venv/ \ No newline at end of file +venv/ +.vscode/ diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 7b449c8e..d0ba1ef9 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -166,12 +166,16 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', self._check_preprocessor() check_is_fitted(self, ['preprocessor_']) - return check_input(X, y, + outs = check_input(X, y, type_of_inputs=type_of_inputs, preprocessor=self.preprocessor_, estimator=self, tuple_size=getattr(self, '_tuple_size', None), **kwargs) + # Conform to SLEP010 + if not hasattr(self, 'n_features_in_'): + self.n_features_in_ = (outs if y is None else outs[0]).shape[1] + return outs @abstractmethod def get_metric(self): From d78c720b87723114262e839717fafee16e7eba42 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 20 Jun 2022 09:55:09 -0400 Subject: [PATCH 196/210] Update CI to test on Python 3.10 --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fbe91dea..aaec0736 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -35,13 +35,13 @@ jobs: pytest test --cov bash <(curl -s https://codecov.io/bash) - # Run normal testing with the latests versions of all dependencies + # Run normal testing with the latest versions of all dependencies build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] - python-version: ['3.6', '3.7', '3.8', '3.9'] + python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 - name: Set up Python From 85204180e7ae5f4ef709830184da3ccf6cfa14d2 Mon Sep 17 00:00:00 2001 From: Nikolaos Michas <6445960+nikosmichas@users.noreply.github.com> Date: Tue, 21 Jun 2022 02:53:54 +0300 Subject: [PATCH 197/210] SCML: Raise ValueError if n_features larger than n_triplets (#350) --- metric_learn/scml.py | 6 ++++++ test/test_triplets_classifiers.py | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/metric_learn/scml.py b/metric_learn/scml.py index 199dfc40..db2fdf64 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -240,6 +240,12 @@ def _generate_bases_dist_diff(self, triplets, X): raise ValueError("n_basis should be an integer, instead it is of type %s" % type(self.n_basis)) + if n_features > n_triplets: + raise ValueError( + "Number of features (%s) is greater than the number of triplets(%s).\n" + "Consider using dimensionality reduction or using another basis " + "generation scheme." % (n_features, n_triplets)) + basis = np.zeros((n_basis, n_features)) # get all positive and negative pairs with lowest index first diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index f2d5c015..515a0a33 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -2,7 +2,12 @@ from sklearn.exceptions import NotFittedError from sklearn.model_selection import train_test_split -from test.test_utils import triplets_learners, ids_triplets_learners +from metric_learn import SCML +from test.test_utils import ( + triplets_learners, + ids_triplets_learners, + build_triplets +) from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np @@ -107,3 +112,16 @@ def test_accuracy_toy_example(estimator, build_dataset): # we force the transformation to be identity so that we control what it does estimator.components_ = np.eye(X.shape[1]) assert estimator.score(triplets_test) == 0.25 + + +def test_raise_big_number_of_features(): + triplets, _, _, X = build_triplets(with_preprocessor=False) + triplets = triplets[:3, :, :] + estimator = SCML(n_basis=320) + set_random_state(estimator) + with pytest.raises(ValueError) as exc_info: + estimator.fit(triplets) + assert exc_info.value.args[0] == \ + "Number of features (4) is greater than the number of triplets(3)." \ + "\nConsider using dimensionality reduction or using another basis " \ + "generation scheme." From 17216a7b0db43b310a2f041a97f631b9ce425789 Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Mon, 20 Jun 2022 20:09:16 -0400 Subject: [PATCH 198/210] Rename variables, proposed by issue #257 (#324) * Rename number_constrains to n_constraints * Renamed num_chunks to n_chunks * LMNN k parameter renamed to n_neighbors * Replaced all 'convergence_threshold' with 'tol' * Fix tests * Fixed more test regarding rename of variable * Warnings for n_constrains * Add all warnings regarding n_constrains * Deprecation warnings for n_chunks * Add deprecation warn to n_neighbors * Add convergence_threshold warnings --- bench/benchmarks/iris.py | 10 ++-- doc/supervised.rst | 8 +-- doc/weakly_supervised.rst | 4 +- examples/plot_metric_learning_examples.py | 4 +- examples/plot_sandwich.py | 6 +- metric_learn/constraints.py | 60 +++++++++++++------- metric_learn/itml.py | 62 +++++++++++++++------ metric_learn/lmnn.py | 28 +++++++--- metric_learn/lsml.py | 33 +++++++---- metric_learn/mmc.py | 68 ++++++++++++++++------- metric_learn/rca.py | 32 +++++++---- metric_learn/sdml.py | 29 +++++++--- test/metric_learn_test.py | 28 +++++----- test/test_base_metric.py | 33 +++++------ test/test_components_metric_conversion.py | 10 ++-- test/test_constraints.py | 38 ++++++------- test/test_fit_transform.py | 24 ++++---- test/test_mahalanobis_mixin.py | 2 +- test/test_sklearn_compat.py | 6 +- test/test_utils.py | 6 +- 20 files changed, 305 insertions(+), 186 deletions(-) diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index 5973f7b8..05035085 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -5,15 +5,15 @@ CLASSES = { 'Covariance': metric_learn.Covariance(), - 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), + 'ITML_Supervised': metric_learn.ITML_Supervised(n_constraints=200), 'LFDA': metric_learn.LFDA(k=2, dim=2), - 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), - 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), + 'LMNN': metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False), + 'LSML_Supervised': metric_learn.LSML_Supervised(n_constraints=200), 'MLKR': metric_learn.MLKR(), 'NCA': metric_learn.NCA(max_iter=700, n_components=2), - 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, + 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, n_chunks=30, chunk_size=2), - 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500) + 'SDML_Supervised': metric_learn.SDML_Supervised(n_constraints=1500) } diff --git a/doc/supervised.rst b/doc/supervised.rst index 09077dc2..a847a33c 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -164,7 +164,7 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, X = iris_data['data'] Y = iris_data['target'] - lmnn = LMNN(k=5, learn_rate=1e-6) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False) .. rubric:: References @@ -407,8 +407,8 @@ are similar (+1) or dissimilar (-1)), are sampled with the function (of label +1), this method will look at all the samples from the same label and sample randomly a pair among them. To sample negative pairs (of label -1), this method will look at all the samples from a different class and sample randomly -a pair among them. The method will try to build `num_constraints` positive -pairs and `num_constraints` negative pairs, but sometimes it cannot find enough +a pair among them. The method will try to build `n_constraints` positive +pairs and `n_constraints` negative pairs, but sometimes it cannot find enough of one of those, so forcing `same_length=True` will return both times the minimum of the two lenghts. @@ -430,5 +430,5 @@ last points should be less similar than the two first points). X = iris_data['data'] Y = iris_data['target'] - mmc = MMC_Supervised(num_constraints=200) + mmc = MMC_Supervised(n_constraints=200) mmc.fit(X, Y) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 341d959a..76f7c14e 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -137,7 +137,7 @@ are respected. >>> from metric_learn import MMC >>> mmc = MMC(random_state=42) >>> mmc.fit(tuples, y) -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, +MMC(A0='deprecated', tol=0.001, diagonal=False, diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) @@ -263,7 +263,7 @@ tuples). >>> y_pairs = np.array([1, -1]) >>> mmc = MMC(random_state=42) >>> mmc.fit(pairs, y_pairs) -MMC(convergence_threshold=0.001, diagonal=False, +MMC(tol=0.001, diagonal=False, diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 4ef15497..32759636 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -143,7 +143,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # setting up LMNN -lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) +lmnn = metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6) # fit the data! lmnn.fit(X, y) @@ -314,7 +314,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`RCA # ` -rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2) +rca = metric_learn.RCA_Supervised(n_chunks=30, chunk_size=2) X_rca = rca.fit_transform(X, y) plot_tsne(X_rca, y) diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py index 07ab78e9..740852be 100644 --- a/examples/plot_sandwich.py +++ b/examples/plot_sandwich.py @@ -35,9 +35,9 @@ def sandwich_demo(): mls = [ LMNN(), - ITML_Supervised(num_constraints=200), - SDML_Supervised(num_constraints=200, balance_param=0.001), - LSML_Supervised(num_constraints=200), + ITML_Supervised(n_constraints=200), + SDML_Supervised(n_constraints=200, balance_param=0.001), + LSML_Supervised(n_constraints=200), ] for ax_num, ml in enumerate(mls, start=3): diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 68e205f6..4993e9ef 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -7,6 +7,7 @@ from sklearn.utils import check_random_state from sklearn.neighbors import NearestNeighbors + __all__ = ['Constraints'] @@ -31,21 +32,21 @@ def __init__(self, partial_labels): partial_labels = np.asanyarray(partial_labels, dtype=int) self.partial_labels = partial_labels - def positive_negative_pairs(self, num_constraints, same_length=False, - random_state=None): + def positive_negative_pairs(self, n_constraints, same_length=False, + random_state=None, num_constraints='deprecated'): """ Generates positive pairs and negative pairs from labeled data. - Positive pairs are formed by randomly drawing ``num_constraints`` pairs of + Positive pairs are formed by randomly drawing ``n_constraints`` pairs of points with the same label. Negative pairs are formed by randomly drawing - ``num_constraints`` pairs of points with different label. + ``n_constraints`` pairs of points with different label. In the case where it is not possible to generate enough positive or negative pairs, a smaller number of pairs will be returned with a warning. Parameters ---------- - num_constraints : int + n_constraints : int Number of positive and negative constraints to generate. same_length : bool, optional (default=False) @@ -55,6 +56,8 @@ def positive_negative_pairs(self, num_constraints, same_length=False, random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + Returns ------- a : array-like, shape=(n_constraints,) @@ -69,10 +72,18 @@ def positive_negative_pairs(self, num_constraints, same_length=False, d : array-like, shape=(n_constraints,) 1D array of indicators for the right elements of negative pairs. """ + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints random_state = check_random_state(random_state) - a, b = self._pairs(num_constraints, same_label=True, + a, b = self._pairs(n_constraints, same_label=True, random_state=random_state) - c, d = self._pairs(num_constraints, same_label=False, + c, d = self._pairs(n_constraints, same_label=False, random_state=random_state) if same_length and len(a) != len(c): n = min(len(a), len(c)) @@ -190,15 +201,15 @@ def generate_knntriplets(self, X, k_genuine, k_impostor): return triplets - def _pairs(self, num_constraints, same_label=True, max_iter=10, + def _pairs(self, n_constraints, same_label=True, max_iter=10, random_state=np.random): known_label_idx, = np.where(self.partial_labels >= 0) known_labels = self.partial_labels[known_label_idx] num_labels = len(known_labels) ab = set() it = 0 - while it < max_iter and len(ab) < num_constraints: - nc = num_constraints - len(ab) + while it < max_iter and len(ab) < n_constraints: + nc = n_constraints - len(ab) for aidx in random_state.randint(num_labels, size=nc): if same_label: mask = known_labels[aidx] == known_labels @@ -209,25 +220,26 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, if len(b_choices) > 0: ab.add((aidx, random_state.choice(b_choices))) it += 1 - if len(ab) < num_constraints: + if len(ab) < n_constraints: warnings.warn("Only generated %d %s constraints (requested %d)" % ( - len(ab), 'positive' if same_label else 'negative', num_constraints)) - ab = np.array(list(ab)[:num_constraints], dtype=int) + len(ab), 'positive' if same_label else 'negative', n_constraints)) + ab = np.array(list(ab)[:n_constraints], dtype=int) return known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2, random_state=None): + def chunks(self, n_chunks=100, chunk_size=2, random_state=None, + num_chunks='deprecated'): """ Generates chunks from labeled data. - Each of ``num_chunks`` chunks is composed of ``chunk_size`` points from + Each of ``n_chunks`` chunks is composed of ``chunk_size`` points from the same class drawn at random. Each point can belong to at most 1 chunk. - In the case where there is not enough points to generate ``num_chunks`` + In the case where there is not enough points to generate ``n_chunks`` chunks of size ``chunk_size``, a ValueError will be raised. Parameters ---------- - num_chunks : int, optional (default=100) + n_chunks : int, optional (default=100) Number of chunks to generate. chunk_size : int, optional (default=2) @@ -236,12 +248,20 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. + num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0 + Returns ------- chunks : array-like, shape=(n_samples,) 1D array of chunk indicators, where -1 indicates that the point does not belong to any chunk. """ + if num_chunks != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_chunks = num_chunks random_state = check_random_state(random_state) chunks = -np.ones_like(self.partial_labels, dtype=int) uniq, lookup = np.unique(self.partial_labels, return_inverse=True) @@ -249,13 +269,13 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq)) if c not in unknown_uniq] max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds])) - if max_chunks < num_chunks: + if max_chunks < n_chunks: raise ValueError(('Not enough possible chunks of %d elements in each' ' class to form expected %d chunks - maximum number' ' of chunks is %d' - ) % (chunk_size, num_chunks, max_chunks)) + ) % (chunk_size, n_chunks, max_chunks)) idx = 0 - while idx < num_chunks and all_inds: + while idx < n_chunks and all_inds: if len(all_inds) == 1: c = 0 else: diff --git a/metric_learn/itml.py b/metric_learn/itml.py index af87f530..9537eec2 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -9,6 +9,7 @@ from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseITML(MahalanobisMixin): @@ -16,12 +17,20 @@ class _BaseITML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, + def __init__(self, gamma=1., max_iter=1000, tol=1e-3, prior='identity', verbose=False, - preprocessor=None, random_state=None): + preprocessor=None, random_state=None, + convergence_threshold='deprecated'): + if convergence_threshold != 'deprecated': + warnings.warn('"convergence_threshold" parameter has been ' + ' renamed to "tol". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + tol = convergence_threshold + self.convergence_threshold = 'deprecated' # Avoid errors self.gamma = gamma self.max_iter = max_iter - self.convergence_threshold = convergence_threshold + self.tol = tol self.prior = prior self.verbose = verbose self.random_state = random_state @@ -86,7 +95,7 @@ def _fit(self, pairs, y, bounds=None): conv = np.inf break conv = np.abs(lambdaold - _lambda).sum() / normsum - if conv < self.convergence_threshold: + if conv < self.tol: break lambdaold = _lambda.copy() if self.verbose: @@ -122,7 +131,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): max_iter : int, optional (default=1000) Maximum number of iteration of the optimization procedure. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Convergence tolerance. prior : string or numpy array, optional (default='identity') @@ -158,6 +167,8 @@ class ITML(_BaseITML, _PairsClassifierMixin): A pseudo random number generator object or a seed for it if int. If ``prior='random'``, ``random_state`` is used to set the prior. + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) @@ -260,10 +271,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin): max_iter : int, optional (default=1000) Maximum number of iterations of the optimization procedure. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Tolerance of the optimization procedure. - num_constraints : int, optional (default=None) + n_constraints : int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. @@ -302,6 +313,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin): case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 Attributes ---------- @@ -328,7 +342,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> itml = ITML_Supervised(num_constraints=200) + >>> itml = ITML_Supervised(n_constraints=200) >>> itml.fit(X, Y) See Also @@ -338,14 +352,26 @@ class ITML_Supervised(_BaseITML, TransformerMixin): that describes the supervised version of weakly supervised estimators. """ - def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3, - num_constraints=None, prior='identity', - verbose=False, preprocessor=None, random_state=None): + def __init__(self, gamma=1.0, max_iter=1000, tol=1e-3, + n_constraints=None, prior='identity', + verbose=False, preprocessor=None, random_state=None, + num_constraints='deprecated', + convergence_threshold='deprecated'): _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, - convergence_threshold=convergence_threshold, + tol=tol, prior=prior, verbose=verbose, - preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + preprocessor=preprocessor, + random_state=random_state, + convergence_threshold=convergence_threshold) + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_constraints = num_constraints + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' def fit(self, X, y, bounds=None): """Create constraints from labels and learn the ITML model. @@ -369,13 +395,13 @@ def fit(self, X, y, bounds=None): points in the training data `X`. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 8bdc4bf0..47bb065f 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -5,6 +5,7 @@ from collections import Counter from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin +import warnings from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin @@ -63,7 +64,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): :meth:`fit` and n_features_a must be less than or equal to that. If ``n_components`` is not None, n_features_a must match it. - k : int, optional (default=3) + n_neighbors : int, optional (default=3) Number of neighbors to consider, not including self-edges. min_iter : int, optional (default=50) @@ -99,6 +100,8 @@ class LMNN(MahalanobisMixin, TransformerMixin): transformation. If ``init='pca'``, ``random_state`` is passed as an argument to PCA when initializing the transformation. + k : Renamed to n_neighbors. Will be deprecated in 0.7.0 + Attributes ---------- n_iter_ : `int` @@ -116,7 +119,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> lmnn = LMNN(k=5, learn_rate=1e-6) + >>> lmnn = LMNN(n_neighbors=5, learn_rate=1e-6) >>> lmnn.fit(X, Y, verbose=False) References @@ -128,12 +131,19 @@ class LMNN(MahalanobisMixin, TransformerMixin): 2005. """ - def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, + def __init__(self, init='auto', n_neighbors=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, verbose=False, preprocessor=None, - n_components=None, random_state=None): + n_components=None, random_state=None, k='deprecated'): self.init = init - self.k = k + if k != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_neighbors = k + self.k = 'deprecated' # To avoid no_attribute error + self.n_neighbors = n_neighbors self.min_iter = min_iter self.max_iter = max_iter self.learn_rate = learn_rate @@ -145,7 +155,7 @@ def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, super(LMNN, self).__init__(preprocessor) def fit(self, X, y): - k = self.k + k = self.n_neighbors reg = self.regularization learn_rate = self.learn_rate @@ -162,7 +172,7 @@ def fit(self, X, y): self.verbose, random_state=self.random_state) required_k = np.bincount(label_inds).min() - if self.k > required_k: + if self.n_neighbors > required_k: raise ValueError('not enough class labels for specified k' ' (smallest class has %d)' % required_k) @@ -275,12 +285,12 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): return 2 * G, objective, total_active def _select_targets(self, X, label_inds): - target_neighbors = np.empty((X.shape[0], self.k), dtype=int) + target_neighbors = np.empty((X.shape[0], self.n_neighbors), dtype=int) for label in self.labels_: inds, = np.nonzero(label_inds == label) dd = euclidean_distances(X[inds], squared=True) np.fill_diagonal(dd, np.inf) - nn = np.argsort(dd)[..., :self.k] + nn = np.argsort(dd)[..., :self.n_neighbors] target_neighbors[inds] = inds[nn] return target_neighbors diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 28f65ce7..af7fa95b 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -9,6 +9,7 @@ from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseLSML(MahalanobisMixin): @@ -261,11 +262,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - num_constraints: int, optional (default=None) + n_constraints: int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. - weights : (num_constraints,) array of floats, optional (default=None) + weights : (n_constraints,) array of floats, optional (default=None) Relative weight given to each constraint. If None, defaults to uniform weights. @@ -282,6 +283,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): prior. In any case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + Examples -------- >>> from metric_learn import LSML_Supervised @@ -289,7 +292,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> lsml = LSML_Supervised(num_constraints=200) + >>> lsml = LSML_Supervised(n_constraints=200) >>> lsml.fit(X, Y) Attributes @@ -303,12 +306,22 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): """ def __init__(self, tol=1e-3, max_iter=1000, prior='identity', - num_constraints=None, weights=None, - verbose=False, preprocessor=None, random_state=None): + n_constraints=None, weights=None, + verbose=False, preprocessor=None, random_state=None, + num_constraints='deprecated'): _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' self.weights = weights def fit(self, X, y): @@ -323,13 +336,13 @@ def fit(self, X, y): Data labels. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, + pos_neg = c.positive_negative_pairs(n_constraints, same_length=True, random_state=self.random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], weights=self.weights) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 1ff30b1e..5cf166fd 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -6,19 +6,28 @@ from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseMMC(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, + def __init__(self, max_iter=100, max_proj=10000, tol=1e-3, init='identity', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, - random_state=None): + random_state=None, + convergence_threshold='deprecated'): + if convergence_threshold != 'deprecated': + warnings.warn('"convergence_threshold" parameter has been ' + ' renamed to "tol". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + tol = convergence_threshold + self.convergence_threshold = 'deprecated' # Avoid errors self.max_iter = max_iter self.max_proj = max_proj - self.convergence_threshold = convergence_threshold + self.tol = tol self.init = init self.diagonal = diagonal self.diagonal_c = diagonal_c @@ -145,13 +154,13 @@ def _fit_full(self, pairs, y): A[:] = A_old + alpha * M delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old) - if delta < self.convergence_threshold: + if delta < self.tol: break if self.verbose: print('mmc iter: %d, conv = %f, projections = %d' % (cycle, delta, it + 1)) - if delta > self.convergence_threshold: + if delta > self.tol: self.converged_ = False if self.verbose: print('mmc did not converge, conv = %f' % (delta,)) @@ -185,7 +194,7 @@ def _fit_diag(self, pairs, y): reduction = 2.0 w = np.diag(self.A_).copy() - while error > self.convergence_threshold and it < self.max_iter: + while error > self.tol and it < self.max_iter: fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 @@ -332,7 +341,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin): max_proj : int, optional (default=10000) Maximum number of projection steps. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Convergence threshold for the optimization procedure. init : string or numpy array, optional (default='identity') @@ -377,6 +386,8 @@ class MMC(_BaseMMC, _PairsClassifierMixin): ``init='random'``, ``random_state`` is used to initialize the random transformation. + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + Attributes ---------- n_iter_ : `int` @@ -469,10 +480,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): max_proj : int, optional (default=10000) Maximum number of projection steps. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Convergence threshold for the optimization procedure. - num_constraints: int, optional (default=None) + n_constraints: int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. @@ -518,6 +529,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): Mahalanobis matrix. In any case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + Examples -------- >>> from metric_learn import MMC_Supervised @@ -525,7 +540,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> mmc = MMC_Supervised(num_constraints=200) + >>> mmc = MMC_Supervised(n_constraints=200) >>> mmc.fit(X, Y) Attributes @@ -538,16 +553,29 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): metric (See function `components_from_metric`.) """ - def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_constraints=None, init='identity', + def __init__(self, max_iter=100, max_proj=10000, tol=1e-6, + n_constraints=None, init='identity', diagonal=False, diagonal_c=1.0, verbose=False, - preprocessor=None, random_state=None): + preprocessor=None, random_state=None, + num_constraints='deprecated', + convergence_threshold='deprecated'): _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, - convergence_threshold=convergence_threshold, + tol=tol, init=init, diagonal=diagonal, diagonal_c=diagonal_c, verbose=verbose, - preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + preprocessor=preprocessor, + random_state=random_state, + convergence_threshold=convergence_threshold) + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' def fit(self, X, y): """Create constraints from labels and learn the MMC model. @@ -561,13 +589,13 @@ def fit(self, X, y): Data labels. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index a63aa1d8..253b9c92 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -13,13 +13,13 @@ # mean center each chunklet separately def _chunk_mean_centering(data, chunks): - num_chunks = chunks.max() + 1 + n_chunks = chunks.max() + 1 chunk_mask = chunks != -1 # We need to ensure the data is float so that we can substract the # mean on it chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask] - for c in range(num_chunks): + for c in range(n_chunks): mask = chunk_labels == c chunk_data[mask] -= chunk_data[mask].mean(axis=0) @@ -135,14 +135,14 @@ class RCA_Supervised(RCA): `RCA_Supervised` creates chunks of similar points by first sampling a class, taking `chunk_size` elements in it, and repeating the process - `num_chunks` times. + `n_chunks` times. Parameters ---------- n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_chunks: int, optional (default=100) + n_chunks: int, optional (default=100) Number of chunks to generate. chunk_size: int, optional (default=2) @@ -156,6 +156,8 @@ class RCA_Supervised(RCA): A pseudo random number generator object or a seed for it if int. It is used to randomly sample constraints from labels. + num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0 + Examples -------- >>> from metric_learn import RCA_Supervised @@ -163,7 +165,7 @@ class RCA_Supervised(RCA): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) + >>> rca = RCA_Supervised(n_chunks=30, chunk_size=2) >>> rca.fit(X, Y) Attributes @@ -172,17 +174,25 @@ class RCA_Supervised(RCA): The learned linear transformation ``L``. """ - def __init__(self, n_components=None, num_chunks=100, chunk_size=2, - preprocessor=None, random_state=None): + def __init__(self, n_components=None, n_chunks=100, chunk_size=2, + preprocessor=None, random_state=None, + num_chunks='deprecated'): """Initialize the supervised version of `RCA`.""" RCA.__init__(self, n_components=n_components, preprocessor=preprocessor) - self.num_chunks = num_chunks + if num_chunks != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_chunks = num_chunks + self.num_chunks = 'deprecated' # To avoid no_attribute error + self.n_chunks = n_chunks self.chunk_size = chunk_size self.random_state = random_state def fit(self, X, y): """Create constraints from labels and learn the RCA model. - Needs num_constraints specified in constructor. + Needs n_constraints specified in constructor. (Not true?) Parameters ---------- @@ -192,11 +202,11 @@ def fit(self, X, y): y : (n) data labels """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - chunks = Constraints(y).chunks(num_chunks=self.num_chunks, + chunks = Constraints(y).chunks(n_chunks=self.n_chunks, chunk_size=self.chunk_size, random_state=self.random_state) - if self.num_chunks * (self.chunk_size - 1) < X.shape[1]: + if self.n_chunks * (self.chunk_size - 1) < X.shape[1]: warnings.warn('Due to the parameters of RCA_Supervised, ' 'the inner covariance matrix is not invertible, ' 'so the transformation matrix will contain Nan values. ' diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index a0736ffa..93f3f441 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -177,7 +177,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> sdml = SDML_Supervised(num_constraints=200) + >>> sdml = SDML_Supervised(n_constraints=200) >>> sdml.fit(X, Y) References @@ -262,7 +262,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - num_constraints : int, optional (default=None) + n_constraints : int, optional (default=None) Number of constraints to generate. If None, defaults to `20 * num_classes**2`. @@ -279,6 +279,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): prior. In any case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + Attributes ---------- components_ : `numpy.ndarray`, shape=(n_features, n_features) @@ -293,13 +295,22 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): """ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity', - num_constraints=None, verbose=False, preprocessor=None, - random_state=None): + n_constraints=None, verbose=False, preprocessor=None, + random_state=None, num_constraints='deprecated'): _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' def fit(self, X, y): """Create constraints from labels and learn the SDML model. @@ -318,13 +329,13 @@ def fit(self, X, y): Returns the instance. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseSDML._fit(self, pairs, y) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index fe1560c2..a39c7b3c 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -326,7 +326,7 @@ def test_large_output_iter(self): class TestLSML(MetricTestCase): def test_iris(self): - lsml = LSML_Supervised(num_constraints=200) + lsml = LSML_Supervised(n_constraints=200) lsml.fit(self.iris_points, self.iris_labels) csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) @@ -335,7 +335,7 @@ def test_iris(self): class TestITML(MetricTestCase): def test_iris(self): - itml = ITML_Supervised(num_constraints=200) + itml = ITML_Supervised(n_constraints=200) itml.fit(self.iris_points, self.iris_labels) csep = class_separation(itml.transform(self.iris_points), self.iris_labels) @@ -381,7 +381,7 @@ def test_bounds_parameters_invalid(bounds): class TestLMNN(MetricTestCase): def test_iris(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(self.iris_points), @@ -398,7 +398,7 @@ def test_loss_grad_lbfgs(self): L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1]) lmnn = LMNN() - k = lmnn.k + k = lmnn.n_neighbors reg = lmnn.regularization X, y = lmnn._prepare_inputs(X, y, dtype=float, @@ -574,9 +574,9 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): def test_toy_ex_lmnn(X, y, loss): """Test that the loss give the right result on a toy example""" L = np.array([[1]]) - lmnn = LMNN(k=1, regularization=0.5) + lmnn = LMNN(n_neighbors=1, regularization=0.5) - k = lmnn.k + k = lmnn.n_neighbors reg = lmnn.regularization X, y = lmnn._prepare_inputs(X, y, dtype=float, @@ -750,7 +750,7 @@ def test_iris(self): # TODO: un-flake it! rs = np.random.RandomState(5555) - sdml = SDML_Supervised(num_constraints=1500, prior='identity', + sdml = SDML_Supervised(n_constraints=1500, prior='identity', balance_param=5e-5, random_state=rs) sdml.fit(self.iris_points, self.iris_labels) csep = class_separation(sdml.transform(self.iris_points), @@ -979,7 +979,7 @@ def test_iris(self): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.29) @@ -1005,15 +1005,15 @@ def test_rank_deficient_returns_warning(self): def test_unknown_labels(self): n = 200 - num_chunks = 50 + n_chunks = 50 X, y = make_classification(random_state=42, n_samples=2 * n, n_features=6, n_informative=6, n_redundant=0) y2 = np.concatenate((y[:n], -np.ones(n))) - rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca = RCA_Supervised(n_chunks=n_chunks, random_state=42) rca.fit(X[:n], y[:n]) - rca2 = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca2 = RCA_Supervised(n_chunks=n_chunks, random_state=42) rca2.fit(X, y2) assert not np.any(np.isnan(rca.components_)) @@ -1023,11 +1023,11 @@ def test_unknown_labels(self): def test_bad_parameters(self): n = 200 - num_chunks = 3 + n_chunks = 3 X, y = make_classification(random_state=42, n_samples=n, n_features=6, n_informative=6, n_redundant=0) - rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca = RCA_Supervised(n_chunks=n_chunks, random_state=42) msg = ('Due to the parameters of RCA_Supervised, ' 'the inner covariance matrix is not invertible, ' 'so the transformation matrix will contain Nan values. ' @@ -1081,7 +1081,7 @@ def test_iris(self): # Full metric n_features = self.iris_points.shape[1] - mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10) + mmc = MMC(tol=0.01, init=np.eye(n_features) / 10) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], diff --git a/test/test_base_metric.py b/test/test_base_metric.py index baa585b9..fa641526 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -37,15 +37,16 @@ def test_covariance(self): remove_spaces(f"Covariance({merged_kwargs})")) def test_lmnn(self): - def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'k': 3, + def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'n_neighbors': 3, 'learn_rate': 1e-07, 'max_iter': 1000, 'min_iter': 50, 'n_components': None, 'preprocessor': None, 'random_state': None, 'regularization': 0.5, 'verbose': False} - nndef_kwargs = {'convergence_tol': 0.01, 'k': 6} + nndef_kwargs = {'convergence_tol': 0.01, 'n_neighbors': 6} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, k=6))), + remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, + n_neighbors=6))), remove_spaces(f"LMNN({merged_kwargs})")) def test_nca(self): @@ -66,21 +67,21 @@ def test_lfda(self): remove_spaces(f"LFDA({merged_kwargs})")) def test_itml(self): - def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0, + def_kwargs = {'tol': 0.001, 'gamma': 1.0, 'max_iter': 1000, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'verbose': False} nndef_kwargs = {'gamma': 0.5} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))), remove_spaces(f"ITML({merged_kwargs})")) - def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0, - 'max_iter': 1000, 'num_constraints': None, + def_kwargs = {'tol': 0.001, 'gamma': 1.0, + 'max_iter': 1000, 'n_constraints': None, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'verbose': False} - nndef_kwargs = {'num_constraints': 7} + nndef_kwargs = {'n_constraints': 7} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.ITML_Supervised(num_constraints=7))), + remove_spaces(str(metric_learn.ITML_Supervised(n_constraints=7))), remove_spaces(f"ITML_Supervised({merged_kwargs})")) def test_lsml(self): @@ -90,7 +91,7 @@ def test_lsml(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))), remove_spaces(f"LSML({merged_kwargs})")) - def_kwargs = {'max_iter': 1000, 'num_constraints': None, + def_kwargs = {'max_iter': 1000, 'n_constraints': None, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'tol': 0.001, 'verbose': False, 'weights': None} @@ -108,7 +109,7 @@ def test_sdml(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))), remove_spaces(f"SDML({merged_kwargs})")) - def_kwargs = {'balance_param': 0.5, 'num_constraints': None, + def_kwargs = {'balance_param': 0.5, 'n_constraints': None, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'sparsity_param': 0.01, 'verbose': False} @@ -124,12 +125,12 @@ def test_rca(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))), remove_spaces(f"RCA({merged_kwargs})")) - def_kwargs = {'chunk_size': 2, 'n_components': None, 'num_chunks': 100, + def_kwargs = {'chunk_size': 2, 'n_components': None, 'n_chunks': 100, 'preprocessor': None, 'random_state': None} - nndef_kwargs = {'num_chunks': 5} + nndef_kwargs = {'n_chunks': 5} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.RCA_Supervised(num_chunks=5))), + remove_spaces(str(metric_learn.RCA_Supervised(n_chunks=5))), remove_spaces(f"RCA_Supervised({merged_kwargs})")) def test_mlkr(self): @@ -142,7 +143,7 @@ def test_mlkr(self): remove_spaces(f"MLKR({merged_kwargs})")) def test_mmc(self): - def_kwargs = {'convergence_threshold': 0.001, 'diagonal': False, + def_kwargs = {'tol': 0.001, 'diagonal': False, 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, 'max_proj': 10000, 'preprocessor': None, 'random_state': None, 'verbose': False} @@ -150,9 +151,9 @@ def test_mmc(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))), remove_spaces(f"MMC({merged_kwargs})")) - def_kwargs = {'convergence_threshold': 1e-06, 'diagonal': False, + def_kwargs = {'tol': 1e-06, 'diagonal': False, 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, - 'max_proj': 10000, 'num_constraints': None, + 'max_proj': 10000, 'n_constraints': None, 'preprocessor': None, 'random_state': None, 'verbose': False} nndef_kwargs = {'max_iter': 1} diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index 5502ad90..c6113957 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -29,27 +29,27 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) L = lsml.components_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) itml.fit(self.X, self.y) L = itml.components_ assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix()) def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.components_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, prior='identity', + sdml = SDML_Supervised(n_constraints=1500, prior='identity', balance_param=1e-5, random_state=seed) sdml.fit(self.X, self.y) L = sdml.components_ @@ -69,7 +69,7 @@ def test_lfda(self): assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2) rca.fit(self.X, self.y) L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) diff --git a/test/test_constraints.py b/test/test_constraints.py index def228d4..3429d9cc 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -7,14 +7,14 @@ SEED = 42 -def gen_labels_for_chunks(num_chunks, chunk_size, +def gen_labels_for_chunks(n_chunks, chunk_size, n_classes=10, n_unknown_labels=5): - """Generates num_chunks*chunk_size labels that split in num_chunks chunks, + """Generates n_chunks*chunk_size labels that split in n_chunks chunks, that are homogeneous in the label.""" - assert min(num_chunks, chunk_size) > 0 + assert min(n_chunks, chunk_size) > 0 classes = shuffle(np.arange(n_classes), random_state=SEED) - n_per_class = chunk_size * (num_chunks // n_classes) - n_maj_class = chunk_size * num_chunks - n_per_class * (n_classes - 1) + n_per_class = chunk_size * (n_chunks // n_classes) + n_maj_class = chunk_size * n_chunks - n_per_class * (n_classes - 1) first_labels = classes[0] * np.ones(n_maj_class, dtype=int) remaining_labels = np.concatenate([k * np.ones(n_per_class, dtype=int) @@ -25,48 +25,48 @@ def gen_labels_for_chunks(num_chunks, chunk_size, return shuffle(labels, random_state=SEED) -@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) -def test_exact_num_points_for_chunks(num_chunks, chunk_size): +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_exact_num_points_for_chunks(n_chunks, chunk_size): """Checks that the chunk generation works well with just enough points.""" - labels = gen_labels_for_chunks(num_chunks, chunk_size) + labels = gen_labels_for_chunks(n_chunks, chunk_size) constraints = Constraints(labels) - chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, random_state=SEED) chunk_no, size_each_chunk = np.unique(chunks[chunks >= 0], return_counts=True) np.testing.assert_array_equal(size_each_chunk, chunk_size) - assert chunk_no.shape[0] == num_chunks + assert chunk_no.shape[0] == n_chunks -@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) -def test_chunk_case_one_miss_point(num_chunks, chunk_size): +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_chunk_case_one_miss_point(n_chunks, chunk_size): """Checks that the chunk generation breaks when one point is missing.""" - labels = gen_labels_for_chunks(num_chunks, chunk_size) + labels = gen_labels_for_chunks(n_chunks, chunk_size) assert len(labels) >= 1 constraints = Constraints(labels[1:]) with pytest.raises(ValueError) as e: - constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, random_state=SEED) expected_message = (('Not enough possible chunks of %d elements in each' ' class to form expected %d chunks - maximum number' ' of chunks is %d' - ) % (chunk_size, num_chunks, num_chunks - 1)) + ) % (chunk_size, n_chunks, n_chunks - 1)) assert str(e.value) == expected_message -@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) -def test_unknown_labels_not_in_chunks(num_chunks, chunk_size): +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_unknown_labels_not_in_chunks(n_chunks, chunk_size): """Checks that unknown labels are not assigned to any chunk.""" - labels = gen_labels_for_chunks(num_chunks, chunk_size) + labels = gen_labels_for_chunks(n_chunks, chunk_size) constraints = Constraints(labels) - chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, random_state=SEED) assert np.all(chunks[labels < 0] < 0) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index d4d4bfe0..246223b0 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -29,47 +29,47 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) itml.fit(self.X, self.y) res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) res_2 = itml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) res_2 = sdml.fit_transform(self.X, self.y) @@ -99,13 +99,13 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2, random_state=seed) rca.fit(self.X, self.y) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2, random_state=seed) res_2 = rca.fit_transform(self.X, self.y) @@ -123,12 +123,12 @@ def test_mlkr(self): def test_mmc_supervised(self): seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc = MMC_Supervised(n_constraints=200, random_state=seed) mmc.fit(self.X, self.y) res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc = MMC_Supervised(n_constraints=200, random_state=seed) res_2 = mmc.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e69aa032..b5dbc248 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -438,7 +438,7 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, random_state=rng) # To make the test work for LMNN: if 'LMNN' in model_base.__class__.__name__: - model_base.set_params(k=1) + model_base.set_params(n_neighbors=1) # To make the test faster for estimators that have a max_iter: if hasattr(model_base, 'max_iter'): model_base.set_params(max_iter=1) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index d2369b1c..798d9036 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -29,7 +29,7 @@ def __init__(self, n_components=None, chunk_size=2, preprocessor=None, random_state=None): # this init makes RCA stable for scikit-learn examples. super(Stable_RCA_Supervised, self).__init__( - num_chunks=2, n_components=n_components, + n_chunks=2, n_components=n_components, chunk_size=chunk_size, preprocessor=preprocessor, random_state=random_state) @@ -37,12 +37,12 @@ def __init__(self, n_components=None, class Stable_SDML_Supervised(SDML_Supervised): def __init__(self, sparsity_param=0.01, - num_constraints=None, verbose=False, preprocessor=None, + n_constraints=None, verbose=False, preprocessor=None, random_state=None): # this init makes SDML stable for scikit-learn examples. super(Stable_SDML_Supervised, self).__init__( sparsity_param=sparsity_param, - num_constraints=num_constraints, verbose=verbose, + n_constraints=n_constraints, verbose=verbose, preprocessor=preprocessor, balance_param=1e-5, prior='identity', random_state=random_state) diff --git a/test/test_utils.py b/test/test_utils.py index f3000344..43d67111 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -60,11 +60,11 @@ def build_regression(with_preprocessor=False): def build_data(): input_data, labels = load_iris(return_X_y=True) X, y = shuffle(input_data, labels, random_state=SEED) - num_constraints = 50 + n_constraints = 50 constraints = Constraints(y) pairs = ( constraints - .positive_negative_pairs(num_constraints, same_length=True, + .positive_negative_pairs(n_constraints, same_length=True, random_state=check_random_state(SEED))) return X, pairs @@ -137,7 +137,7 @@ def build_quadruplets(with_preprocessor=False): (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=5), build_classification), + (RCA_Supervised(n_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification), (SCML_Supervised(n_basis=80), build_classification)] From 4a6caeac69c4fe1e6124f171f15452dec7e768b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Tue, 21 Jun 2022 13:53:05 +0200 Subject: [PATCH 199/210] fix docstring (#351) --- metric_learn/scml.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/metric_learn/scml.py b/metric_learn/scml.py index db2fdf64..63d64cfb 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -328,9 +328,10 @@ class SCML(_BaseSCML, _TripletsClassifierMixin): 'triplet_diffs', and an array-like of shape (n_basis, n_features). 'triplet_diffs' - The basis set is constructed from the differences between points of - `n_basis` positive or negative pairs taken from the triplets - constrains. + The basis set is constructed iteratively from differences between points + of `n_features` positive or negative pairs randomly sampled from the + triplets constraints. Requires the number of training triplets to be + great or equal to `n_features`. array-like A matrix of shape (n_basis, n_features), that will be used as From 0acf8b1ec746809dfeef2207a6f29b41de122a0f Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:03:44 -0400 Subject: [PATCH 200/210] Remove deprecated assert_warns_message import This was deprecated and is now removed from sklearn. Thankfully, we don't appear to be using it. --- metric_learn/sklearn_shims.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metric_learn/sklearn_shims.py b/metric_learn/sklearn_shims.py index 654a9144..8d746890 100644 --- a/metric_learn/sklearn_shims.py +++ b/metric_learn/sklearn_shims.py @@ -6,7 +6,6 @@ >= version.parse('0.22.0')) if SKLEARN_AT_LEAST_0_22: from sklearn.utils._testing import (set_random_state, - assert_warns_message, ignore_warnings, assert_allclose_dense_sparse, _get_args) @@ -15,13 +14,12 @@ from sklearn.metrics._scorer import get_scorer else: from sklearn.utils.testing import (set_random_state, - assert_warns_message, ignore_warnings, assert_allclose_dense_sparse, _get_args) from sklearn.utils.estimator_checks import is_public_parameter from sklearn.metrics.scorer import get_scorer -__all__ = ['set_random_state', 'assert_warns_message', 'set_random_state', +__all__ = ['set_random_state', 'set_random_state', 'ignore_warnings', 'assert_allclose_dense_sparse', '_get_args', 'is_public_parameter', 'get_scorer'] From 084e1a9412e09b8a77a6cedb8e1f764b58c041cc Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:07:54 -0400 Subject: [PATCH 201/210] Update Python versions to test --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aaec0736..1fe061e9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,7 +41,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - name: Set up Python From 8d5059c63de2f52b31dc0a534931d4a1f09da895 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:09:45 -0400 Subject: [PATCH 202/210] Fix minor lint issues --- metric_learn/scml.py | 7 ++----- test/metric_learn_test.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/metric_learn/scml.py b/metric_learn/scml.py index 63d64cfb..c3a332f6 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -53,7 +53,7 @@ def _fit(self, triplets, basis=None, n_basis=None): raise ValueError("batch_size should be an integer, instead it is of type" " %s" % type(self.batch_size)) - if(self.output_iter > self.max_iter): + if self.output_iter > self.max_iter: raise ValueError("The value of output_iter must be equal or smaller than" " max_iter.") @@ -266,11 +266,8 @@ def _generate_bases_dist_diff(self, triplets, X): start = 0 finish = 0 - - while(finish != n_basis): - + while finish != n_basis: # Select triplets to yield diff - select_triplet = rng.choice(n_triplets, size=n_features, replace=False) # select n_features positive differences diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index a39c7b3c..f109a667 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -14,7 +14,7 @@ from sklearn.preprocessing import StandardScaler try: from inverse_covariance import quic - assert(quic) + assert quic except ImportError: HAS_SKGGM = False else: From 7102bc5a8d0db499c7732e5f9031cf000a538298 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:20:32 -0400 Subject: [PATCH 203/210] Resolve some SCML issues The sklearn KMeans class now warns if you don't provide a value for the n_init parameter. I'm setting it to the original default, but we may want to consider setting it to 'auto' in the future. --- metric_learn/scml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metric_learn/scml.py b/metric_learn/scml.py index c3a332f6..fedf393d 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -558,7 +558,7 @@ def _initialize_basis_supervised(self, X, y): case one is selected. """ - if self.basis == 'lda': + if isinstance(self.basis, str) and self.basis == 'lda': basis, n_basis = self._generate_bases_LDA(X, y) else: basis, n_basis = None, None @@ -606,8 +606,8 @@ def _generate_bases_LDA(self, X, y): "should be smaller than %d" % (n_basis, X.shape[0]*2*num_eig)) - kmeans = KMeans(n_clusters=n_clusters, random_state=self.random_state, - algorithm='elkan').fit(X) + kmeans = KMeans(n_clusters=n_clusters, n_init=10, + random_state=self.random_state, algorithm='elkan').fit(X) cX = kmeans.cluster_centers_ n_scales = 2 From 81901ee39de7abb5dfcfb631967a6c7ef3e8f735 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:27:38 -0400 Subject: [PATCH 204/210] Fix mahalanobis test --- test/test_mahalanobis_mixin.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index b5dbc248..1e94065d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -215,8 +215,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) - expected_dist = mahalanobis(a[None], b[None], - VI=model.get_mahalanobis_matrix()) + expected_dist = mahalanobis(a, b, VI=model.get_mahalanobis_matrix()) assert_allclose(metric(a, b), expected_dist, rtol=1e-13) From 053779ad6351752dc0d92c63622ef5e8eaf17bc9 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:35:47 -0400 Subject: [PATCH 205/210] Skip single-feature test for SDML --- test/test_mahalanobis_mixin.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 1e94065d..9378ac60 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -15,6 +15,7 @@ from metric_learn.sklearn_shims import set_random_state from metric_learn._util import make_context, _initialize_metric_mahalanobis +from metric_learn.sdml import _BaseSDML from metric_learn.base_metric import (_QuadrupletsClassifierMixin, _TripletsClassifierMixin, _PairsClassifierMixin) @@ -290,7 +291,11 @@ def test_components_is_2D(estimator, build_dataset): model.fit(*remove_y(estimator, input_data, labels)) assert model.components_.shape == (X.shape[1], X.shape[1]) - # test that it works for 1 feature. Use 2nd dimention, to avoid border cases + if isinstance(estimator, _BaseSDML): + # SDML doesn't support running on a single feature. + return + + # test that it works for 1 feature. Use 2nd dimension, to avoid border cases trunc_data = input_data[..., 1:2] # we drop duplicates that might have been formed, i.e. of the form # aabc or abcc or aabb for quadruplets, and aa for pairs. From 4e89e3de31e98425f84c3c6a424bba9f25745e74 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 09:38:39 -0400 Subject: [PATCH 206/210] Drop old Python versions from the test matrix --- .github/workflows/main.yml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1fe061e9..0935a109 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,32 +9,6 @@ on: branches: [ master ] jobs: - - # Checks compatibility with an old version of sklearn (0.21.3) - compatibility: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: ['3.6', '3.7'] - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Run Tests with skggm + scikit-learn 0.21.3 - env: - SKGGM_VERSION: a0ed406586c4364ea3297a658f415e13b5cbdaf8 - run: | - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - pip install wheel cython numpy scipy codecov pytest-cov - pip install scikit-learn==0.21.3 - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION} - pytest test --cov - bash <(curl -s https://codecov.io/bash) - # Run normal testing with the latest versions of all dependencies build: runs-on: ${{ matrix.os }} From e5b06fa208e33388feead20831130ea6859d67bd Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 14:21:14 -0400 Subject: [PATCH 207/210] Fix sklearn compat issues --- metric_learn/base_metric.py | 11 +++++++---- metric_learn/sdml.py | 4 ++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index d0ba1ef9..47efe4b7 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -2,7 +2,7 @@ Base module. """ -from sklearn.base import BaseEstimator +from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve @@ -464,7 +464,7 @@ def get_mahalanobis_matrix(self): return self.components_.T.dot(self.components_) -class _PairsClassifierMixin(BaseMetricLearner): +class _PairsClassifierMixin(BaseMetricLearner, ClassifierMixin): """Base class for pairs learners. Attributes @@ -475,6 +475,7 @@ class _PairsClassifierMixin(BaseMetricLearner): classified as dissimilar. """ + classes_ = np.array([0, 1]) _tuple_size = 2 # number of points in a tuple, 2 for pairs def predict(self, pairs): @@ -752,11 +753,12 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None, 'Got {} instead.'.format(type(beta))) -class _TripletsClassifierMixin(BaseMetricLearner): +class _TripletsClassifierMixin(BaseMetricLearner, ClassifierMixin): """ Base class for triplets learners. """ + classes_ = np.array([0, 1]) _tuple_size = 3 # number of points in a tuple, 3 for triplets def predict(self, triplets): @@ -837,11 +839,12 @@ def score(self, triplets): return self.predict(triplets).mean() / 2 + 0.5 -class _QuadrupletsClassifierMixin(BaseMetricLearner): +class _QuadrupletsClassifierMixin(BaseMetricLearner, ClassifierMixin): """ Base class for quadruplets learners. """ + classes_ = np.array([0, 1]) _tuple_size = 4 # number of points in a tuple, 4 for quadruplets def predict(self, quadruplets): diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 93f3f441..c76de99b 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -43,6 +43,9 @@ def _fit(self, pairs, y): print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') + n_features = pairs.shape[2] + if n_features < 2: + raise ValueError(f"Cannot fit SDML with {n_features} feature(s)") # set up (the inverse of) the prior M # if the prior is the default (None), we raise a warning @@ -83,6 +86,7 @@ def _fit(self, pairs, y): w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() + # TODO: Narrow this to the specific exceptions we expect. except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False From b3044a837bd8b32c7a9c7189970a48046d48a11e Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 28 Sep 2023 20:13:39 -0400 Subject: [PATCH 208/210] Bump version to 0.7.0 --- doc/conf.py | 6 +++--- metric_learn/_version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 5eb312dc..c472cc21 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -21,12 +21,12 @@ # General information about the project. project = u'metric-learn' -copyright = (u'2015-2020, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien ' +copyright = (u'2015-2023, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien ' u'Bellet and Nathalie Vauquier') author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and ' u'Nathalie Vauquier') -version = '0.6.2' -release = '0.6.2' +version = '0.7.0' +release = '0.7.0' language = 'en' exclude_patterns = ['_build'] diff --git a/metric_learn/_version.py b/metric_learn/_version.py index aece342d..a71c5c7f 100644 --- a/metric_learn/_version.py +++ b/metric_learn/_version.py @@ -1 +1 @@ -__version__ = '0.6.2' +__version__ = '0.7.0' From 8fb6872edc5ee4a2751386e2058730fe55449488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damin=20K=C3=BChn?= Date: Fri, 2 Aug 2024 03:25:56 +0200 Subject: [PATCH 209/210] Update supervised.rst (#356) The verbose (and almost all other parameters) need to be passed to the constructor and not the .fit() function. This was not refelcted in the example snipplets for LMNN in the docs. --- doc/supervised.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index a847a33c..49548b83 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -164,8 +164,8 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, X = iris_data['data'] Y = iris_data['target'] - lmnn = LMNN(n_neighbors=5, learn_rate=1e-6) - lmnn.fit(X, Y, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) + lmnn.fit(X, Y) .. rubric:: References From dc7e4499b1a9e522f03c87ba8dc249f9747cac82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= Date: Sat, 3 Aug 2024 21:34:12 +0200 Subject: [PATCH 210/210] [MRG] Fix test failures due to updated packages: deprecated pytest.warns(None) syntax + GLasso update in sklearn (#357) * Fix GLasso import for SDML for newer sklearn versions * fix import and argument issue * also fix deprecated pytest.warns(None) syntex * fix flake8 --- metric_learn/sdml.py | 14 ++++++++++---- test/metric_learn_test.py | 9 +++++---- test/test_base_metric.py | 5 +++-- test/test_pairs_classifiers.py | 7 ++++--- test/test_utils.py | 19 ++++++++++--------- 5 files changed, 32 insertions(+), 22 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index c76de99b..c4c427b9 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -6,7 +6,13 @@ import numpy as np from sklearn.base import TransformerMixin from scipy.linalg import pinvh -from sklearn.covariance import graphical_lasso +try: + from sklearn.covariance._graph_lasso import ( + _graphical_lasso as graphical_lasso + ) +except ImportError: + from sklearn.covariance import graphical_lasso + from sklearn.exceptions import ConvergenceWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin @@ -79,9 +85,9 @@ def _fit(self, pairs, y): msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: - _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, - verbose=self.verbose, - cov_init=sigma0) + _, M, *_ = graphical_lasso(emp_cov, alpha=self.sparsity_param, + verbose=self.verbose, + cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index f109a667..d457b52d 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1,3 +1,4 @@ +import warnings import unittest import re import pytest @@ -734,12 +735,12 @@ def test_raises_no_warning_installed_skggm(self): pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) - with pytest.warns(None) as records: + with warnings.catch_warnings(record=True) as records: sdml = SDML(prior='covariance') sdml.fit(pairs, y_pairs) for record in records: assert record.category is not ConvergenceWarning - with pytest.warns(None) as records: + with warnings.catch_warnings(record=True) as records: sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5) sdml_supervised.fit(X, y) for record in records: @@ -999,7 +1000,7 @@ def test_rank_deficient_returns_warning(self): 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') - with pytest.warns(None) as raised_warnings: + with warnings.catch_warnings(record=True) as raised_warnings: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) @@ -1034,7 +1035,7 @@ def test_bad_parameters(self): 'Increase the number or size of the chunks to correct ' 'this problem.' ) - with pytest.warns(None) as raised_warning: + with warnings.catch_warnings(record=True) as raised_warning: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warning) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index fa641526..b1e71020 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,4 +1,5 @@ from numpy.core.numeric import array_equal +import warnings import pytest import re import unittest @@ -226,7 +227,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): (X[0][None], X[1][None])] for u, v in list_test_get_metric_doesnt_raise: - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: metric(u, v) assert len(record) == 0 @@ -234,7 +235,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): model.components_ = np.array([3.1]) metric = model.get_metric() for u, v in [(5, 6.7), ([5], [6.7]), ([[5]], [[6.7]])]: - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: metric(u, v) assert len(record) == 0 diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 6a725f23..bfedefea 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -1,5 +1,6 @@ from functools import partial +import warnings import pytest from numpy.testing import assert_array_equal from scipy.spatial.distance import euclidean @@ -136,7 +137,7 @@ def test_threshold_different_scores_is_finite(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) estimator.fit(input_data, labels) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: estimator.calibrate_threshold(input_data, labels, **kwargs) assert len(record) == 0 @@ -383,7 +384,7 @@ def test_calibrate_threshold_valid_parameters(valid_args): pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20) pairs_learner = IdentityPairsClassifier() pairs_learner.fit(pairs, y) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: pairs_learner.calibrate_threshold(pairs, y, **valid_args) assert len(record) == 0 @@ -518,7 +519,7 @@ def test_validate_calibration_params_valid_parameters( # test that no warning message is returned if valid arguments are given to # _validate_calibration_params for all pairs metric learners, as well as # a mocking example, and the class itself - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: estimator._validate_calibration_params(**valid_args) assert len(record) == 0 diff --git a/test/test_utils.py b/test/test_utils.py index 43d67111..c0383792 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,3 +1,4 @@ +import warnings import pytest from scipy.linalg import eigh, pinvh from collections import namedtuple @@ -353,7 +354,7 @@ def test_check_tuples_valid_tuple_size(tuple_size): checks that checking the number of tuples (pairs, quadruplets, etc) raises no warning if there is the right number of points in a tuple. """ - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples_prep(), type_of_inputs='tuples', preprocessor=mock_preprocessor, tuple_size=tuple_size) check_input(tuples_no_prep(), type_of_inputs='tuples', preprocessor=None, @@ -378,7 +379,7 @@ def test_check_tuples_valid_tuple_size(tuple_size): [[2.6, 2.3], [3.4, 5.0]]])]) def test_check_tuples_valid_with_preprocessor(tuples): """Test that valid inputs when using a preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples, type_of_inputs='tuples', preprocessor=mock_preprocessor) assert len(record) == 0 @@ -399,7 +400,7 @@ def test_check_tuples_valid_with_preprocessor(tuples): ((3, 1), (4, 4), (29, 4)))]) def test_check_tuples_valid_without_preprocessor(tuples): """Test that valid inputs when using no preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples, type_of_inputs='tuples', preprocessor=None) assert len(record) == 0 @@ -408,12 +409,12 @@ def test_check_tuples_behaviour_auto_dtype(): """Checks that check_tuples allows by default every type if using a preprocessor, and numeric types if using no preprocessor""" tuples_prep = [['img1.png', 'img2.png'], ['img3.png', 'img5.png']] - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples_prep, type_of_inputs='tuples', preprocessor=mock_preprocessor) assert len(record) == 0 - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples_no_prep(), type_of_inputs='tuples') # numeric type assert len(record) == 0 @@ -549,7 +550,7 @@ def test_check_classic_invalid_dtype_not_convertible(preprocessor, points): [2.6, 2.3]])]) def test_check_classic_valid_with_preprocessor(points): """Test that valid inputs when using a preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points, type_of_inputs='classic', preprocessor=mock_preprocessor) assert len(record) == 0 @@ -570,7 +571,7 @@ def test_check_classic_valid_with_preprocessor(points): (3, 1, 4, 4, 29, 4))]) def test_check_classic_valid_without_preprocessor(points): """Test that valid inputs when using no preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points, type_of_inputs='classic', preprocessor=None) assert len(record) == 0 @@ -585,12 +586,12 @@ def test_check_classic_behaviour_auto_dtype(): """Checks that check_input (for points) allows by default every type if using a preprocessor, and numeric types if using no preprocessor""" points_prep = ['img1.png', 'img2.png', 'img3.png', 'img5.png'] - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points_prep, type_of_inputs='classic', preprocessor=mock_preprocessor) assert len(record) == 0 - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points_no_prep(), type_of_inputs='classic') # numeric type assert len(record) == 0