From 02cc9373c8a7c5d8f5ae23add605ad15f6295b11 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 24 Jan 2019 16:49:54 +0100 Subject: [PATCH 01/75] FIX: make proposal for sdml formulation --- metric_learn/sdml.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 40fd5727..32e7c6ee 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -58,15 +58,12 @@ def _fit(self, pairs, y): # set up prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - self.M_ = pinvh(np.cov(X, rowvar = False)) + self.M_ = np.cov(X, rowvar = False) else: self.M_ = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) - P = self.M_ + self.balance_param * loss_matrix - emp_cov = pinvh(P) - # hack: ensure positive semidefinite - emp_cov = emp_cov.T.dot(emp_cov) + emp_cov = self.M_ + self.balance_param * loss_matrix _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.transformer_ = transformer_from_metric(self.M_) From aebd47f3e95a24f0112cdf4549c53fc10a5caa9c Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 29 Jan 2019 11:18:21 +0100 Subject: [PATCH 02/75] MAINT clearer formulation to make the prior appear --- metric_learn/sdml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 32e7c6ee..9bf4398d 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -58,12 +58,12 @@ def _fit(self, pairs, y): # set up prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - self.M_ = np.cov(X, rowvar = False) + self.M_ = pinvh(np.cov(X, rowvar = False)) else: self.M_ = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) - emp_cov = self.M_ + self.balance_param * loss_matrix + emp_cov = pinvh(self.M_) + self.balance_param * loss_matrix _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.transformer_ = transformer_from_metric(self.M_) From 40b2c88e0f866c82db0741d50c57cfba3478d133 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 29 Jan 2019 12:12:44 +0100 Subject: [PATCH 03/75] MAINT call the prior prior --- metric_learn/sdml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 9bf4398d..dd356b4b 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -58,12 +58,12 @@ def _fit(self, pairs, y): # set up prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - self.M_ = pinvh(np.cov(X, rowvar = False)) + prior = pinvh(np.cov(X, rowvar = False)) else: - self.M_ = np.identity(pairs.shape[2]) + prior = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) - emp_cov = pinvh(self.M_) + self.balance_param * loss_matrix + emp_cov = pinvh(prior) + self.balance_param * loss_matrix _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.transformer_ = transformer_from_metric(self.M_) From fb04cc969d52123cbec96301abcf31346bb9b1fc Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 13:56:15 +0100 Subject: [PATCH 04/75] Use skggm instead of graphical lasso --- examples/test_sdml.py | 18 ++++++++++++++++++ metric_learn/sdml.py | 6 +++--- 2 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 examples/test_sdml.py diff --git a/examples/test_sdml.py b/examples/test_sdml.py new file mode 100644 index 00000000..300218bd --- /dev/null +++ b/examples/test_sdml.py @@ -0,0 +1,18 @@ +import numpy as np +from sklearn.datasets import load_iris +from metric_learn import SDML_Supervised +import matplotlib.pyplot as plt + +dataset = load_iris() +X, y = dataset.data, dataset.target +sdml = SDML_Supervised(num_constraints=200) +sdml.fit(X, y, random_state = np.random.RandomState(1234)) +embeddings = sdml.transform(X) + +plt.figure() +plt.scatter(X[:, 0], X[:, 1], c=y) +plt.show() + +plt.figure() +plt.scatter(embeddings[:, 0], embeddings[:, 1], c=y) +plt.show() \ No newline at end of file diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index dd356b4b..0db9e972 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -12,8 +12,8 @@ import warnings import numpy as np from sklearn.base import TransformerMixin -from sklearn.covariance import graph_lasso -from sklearn.utils.extmath import pinvh +from inverse_covariance import quic +from scipy.linalg import pinvh from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs @@ -64,7 +64,7 @@ def _fit(self, pairs, y): diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = pinvh(prior) + self.balance_param * loss_matrix - _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) + self.M_, *_ = quic(emp_cov, lam=self.sparsity_param) self.transformer_ = transformer_from_metric(self.M_) return self From 518d6e83154c8342b8d9acc7e5a3f12b0a884723 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:02:53 +0100 Subject: [PATCH 05/75] Be more severe for the class separation --- test/metric_learn_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index eebce1f9..2a72d67f 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -141,7 +141,7 @@ def test_iris(self): sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.iris_points, self.iris_labels, random_state=rs) csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.25) + self.assertLess(csep, 0.20) def test_deprecation(self): # test that the right deprecation message is thrown. From 8f0b113c6824bcfdc22c98d4344c58e0995e0dc1 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:08:02 +0100 Subject: [PATCH 06/75] Put back verbose param --- metric_learn/sdml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 4458caad..da64a5b4 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -64,7 +64,7 @@ def _fit(self, pairs, y): diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = pinvh(prior) + self.balance_param * loss_matrix - M, *_ = quic(emp_cov, lam=self.sparsity_param) + M, *_ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose) self.transformer_ = transformer_from_metric(M) return self From c57a35a8f7497de0bd0208049817b8540d2698d6 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:09:21 +0100 Subject: [PATCH 07/75] MAINT: make more explicit the fact that to use identity (i.e. an SPD matrix) as initialization --- metric_learn/sdml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index da64a5b4..66aa9f10 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -64,7 +64,8 @@ def _fit(self, pairs, y): diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = pinvh(prior) + self.balance_param * loss_matrix - M, *_ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose) + M, *_ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, + Theta0=np.eye(pairs.shape[2])) self.transformer_ = transformer_from_metric(M) return self From f0eb9385ea8780e989ba49c100faaa2ccac64cba Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:14:16 +0100 Subject: [PATCH 08/75] Add skggm as a requirement for SDML --- README.rst | 2 +- doc/getting_started.rst | 2 +- setup.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 0db1f87a..99a148d1 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn +- numpy, scipy, scikit-learn, skggm (for `SDML`) - (for running the examples only: matplotlib) **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 040adedc..9f132c9e 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,7 +15,7 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn +- numpy, scipy, scikit-learn, skggm (for `SDML`) - (for running the examples only: matplotlib) **Notes** diff --git a/setup.py b/setup.py index 168fbcb6..0f9fccc9 100755 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], demo=['matplotlib'], + sdml=['skggm'] ), test_suite='test', keywords=[ From 821db0b849bb2e86e5a9a39bc292af71e049f0b1 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:16:40 +0100 Subject: [PATCH 09/75] Add skggm to required packages for travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5daa20b3..34e1f2af 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,5 @@ python: before_install: - pip install --upgrade pip - pip install wheel - - pip install numpy scipy scikit-learn + - pip install numpy scipy scikit-learn skggm script: pytest test From bd2862dff2b46e301ca00d78c015098400c7d7f2 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:22:36 +0100 Subject: [PATCH 10/75] Also add cython as a dependency --- .travis.yml | 2 +- README.rst | 2 +- doc/getting_started.rst | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 34e1f2af..0eea92d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,5 @@ python: before_install: - pip install --upgrade pip - pip install wheel - - pip install numpy scipy scikit-learn skggm + - pip install numpy scipy scikit-learn cython skggm script: pytest test diff --git a/README.rst b/README.rst index 99a148d1..4ffba046 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, skggm (for `SDML`) +- numpy, scipy, scikit-learn, and skggm and cython for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 9f132c9e..587e91ef 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,7 +15,7 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, skggm (for `SDML`) +- numpy, scipy, scikit-learn, and skggm and cython for `SDML` - (for running the examples only: matplotlib) **Notes** diff --git a/setup.py b/setup.py index 0f9fccc9..e3793dae 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], demo=['matplotlib'], - sdml=['skggm'] + sdml=['cython', 'skggm'] ), test_suite='test', keywords=[ From c6a2daa94db0e19cab2c84fbe20be09c0376ac57 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:53:35 +0100 Subject: [PATCH 11/75] FIX: install all except skggm and then skggm --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0eea92d5..ab304c3e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,6 @@ python: before_install: - pip install --upgrade pip - pip install wheel - - pip install numpy scipy scikit-learn cython skggm + - pip install cython numpy scipy scikit-learn + - pip install skggm script: pytest test From 93d790e8b76582c3cb6f654898b59bf4d8d70e3a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 1 Feb 2019 14:59:15 +0100 Subject: [PATCH 12/75] Remove cython dependency --- .travis.yml | 2 +- README.rst | 2 +- doc/getting_started.rst | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index ab304c3e..f6ef38e0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,6 @@ python: before_install: - pip install --upgrade pip - pip install wheel - - pip install cython numpy scipy scikit-learn + - pip install numpy scipy scikit-learn - pip install skggm script: pytest test diff --git a/README.rst b/README.rst index 4ffba046..bb3f2fc7 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm and cython for `SDML` +- numpy, scipy, scikit-learn, and skggm for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 587e91ef..765545f8 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,7 +15,7 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm and cython for `SDML` +- numpy, scipy, scikit-learn, and skggm for `SDML` - (for running the examples only: matplotlib) **Notes** diff --git a/setup.py b/setup.py index e3793dae..0f9fccc9 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], demo=['matplotlib'], - sdml=['cython', 'skggm'] + sdml=['skggm'] ), test_suite='test', keywords=[ From cae6c28c1dc6a4587bfb065c6daec29c0cb05fbf Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 09:21:42 +0100 Subject: [PATCH 13/75] Install skggm only if we have at least python 3.6 --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f6ef38e0..59751151 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,9 +4,10 @@ cache: pip python: - "2.7" - "3.4" + - "3.6" before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - pip install skggm + - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install skggm; fi script: pytest test From 5d673bafeb534b774264ed827956b1fb5e244866 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 09:22:27 +0100 Subject: [PATCH 14/75] Should work if we want other versions superior to 3.6 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 59751151..575552db 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,5 +9,5 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install skggm; fi + - if [[ $TRAVIS_PYTHON_VERSION >= 3.6 ]]; then pip install skggm; fi script: pytest test From e8a28d547d67c11d6d996cccbbef932f04228dab Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 09:38:00 +0100 Subject: [PATCH 15/75] Fix bash >= which should be written -ge --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 575552db..654d92ea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,5 +9,5 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - if [[ $TRAVIS_PYTHON_VERSION >= 3.6 ]]; then pip install skggm; fi + - if [[ $TRAVIS_PYTHON_VERSION -ge 3.6 ]]; then pip install skggm; fi script: pytest test From e74070267c2899a4ca7649ce0bf25ec7bd6b7f2b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 10:29:19 +0100 Subject: [PATCH 16/75] Deal with tests when skggm is not installed and fix some PEP8 warnings --- metric_learn/_util.py | 8 +++++++ metric_learn/sdml.py | 10 ++++++-- test/metric_learn_test.py | 48 +++++++++++++++++++++------------------ test/test_base_metric.py | 29 +++++++++++++++++++---- test/test_utils.py | 39 ++++++++++++++++--------------- 5 files changed, 87 insertions(+), 47 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index bd57fd5f..82552ff7 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -15,6 +15,14 @@ def vector_norm(X): return np.linalg.norm(X, axis=1) +def has_installed_skggm(): + try: + import inverse_covariance + return True + except ImportError: + return False + + def check_input(input_data, y=None, preprocessor=None, type_of_inputs='classic', tuple_size=None, accept_sparse=False, dtype='numeric', order=None, diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 66aa9f10..6aed791e 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -12,12 +12,11 @@ import warnings import numpy as np from sklearn.base import TransformerMixin -from inverse_covariance import quic from scipy.linalg import pinvh from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric +from ._util import transformer_from_metric, has_installed_skggm class _BaseSDML(MahalanobisMixin): @@ -45,6 +44,13 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, The preprocessor to call to get tuples from indices. If array-like, tuples will be gotten like this: X[indices]. """ + if has_installed_skggm(): + from inverse_covariance import quic + else: + raise NotImplementedError("SDML cannot be instantiated without " + "installing skggm. Please install skggm and " + "try again (make sure you meet skggm's " + "requirements).") self.balance_param = balance_param self.sparsity_param = sparsity_param self.use_cov = use_cov diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index bb6cc3a6..7977a49a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -13,8 +13,10 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, + MMC_Supervised) # Import this specially for testing. +from metric_learn._util import has_installed_skggm from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -147,28 +149,30 @@ def test_no_twice_same_objective(capsys): assert len(objectives[:-1]) == len(set(objectives[:-1])) -class TestSDML(MetricTestCase): - def test_iris(self): - # Note: this is a flaky test, which fails for certain seeds. - # TODO: un-flake it! - rs = np.random.RandomState(5555) - - sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.iris_points, self.iris_labels, random_state=rs) - csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.20) +if has_installed_skggm(): + class TestSDML(MetricTestCase): + def test_iris(self): + # Note: this is a flaky test, which fails for certain seeds. + # TODO: un-flake it! + rs = np.random.RandomState(5555) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - sdml_supervised = SDML_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.iris_points, self.iris_labels, random_state=rs) + csep = class_separation(sdml.transform(self.iris_points), + self.iris_labels) + self.assertLess(csep, 0.20) + + def test_deprecation_num_labeled(self): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + sdml_supervised = SDML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') + assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) class TestNCA(MetricTestCase): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 6c9a6dc5..5fdb428e 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -4,6 +4,8 @@ import numpy as np from sklearn import clone from sklearn.utils.testing import set_random_state + +from metric_learn._util import has_installed_skggm from test.test_utils import ids_metric_learners, metric_learners @@ -52,16 +54,33 @@ def test_lsml(self): weights=None) """.strip('\n')) - def test_sdml(self): - self.assertEqual(str(metric_learn.SDML()), - "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True,\n verbose=False)") - self.assertEqual(str(metric_learn.SDML_Supervised()), """ + if has_installed_skggm(): + def test_sdml(self): + self.assertEqual(str(metric_learn.SDML()), + "SDML(balance_param=0.5, preprocessor=None, " + "sparsity_param=0.01, use_cov=True,\n " + "verbose=False)") + self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, use_cov=True, verbose=False) """.strip('\n')) + else: + # if we haven't install skggm, we will just check that instantiating SDML + # or SDML_Supervised will return an error + def test_sdml(self): + expected_msg = ("SDML cannot be instantiated without " + "installing skggm. Please install skggm and " + "try again (make sure you meet skggm's " + "requirements).") + with pytest.raises(NotImplementedError) as raised_error: + metric_learn.SDML() + assert str(raised_error) == expected_msg + with pytest.raises(NotImplementedError) as raised_error: + metric_learn.SDML_Supervised() + assert str(raised_error) == expected_msg + def test_rca(self): self.assertEqual(str(metric_learn.RCA()), "RCA(num_dims=None, pca_comps=None, preprocessor=None)") diff --git a/test/test_utils.py b/test/test_utils.py index 5e640dbc..bce86f78 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -9,7 +9,8 @@ from sklearn.base import clone from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, - check_collapsed_pairs, validate_vector) + check_collapsed_pairs, validate_vector, + has_installed_skggm) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -104,26 +105,28 @@ def build_quadruplets(with_preprocessor=False): pairs_learners = [(ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster - (SDML(), build_pairs), ] +if has_installed_skggm(): + pairs_learners.append(((SDML(), build_pairs))) ids_pairs_learners = list(map(lambda x: x.__class__.__name__, - [learner for (learner, _) in - pairs_learners])) - -classifiers = [(Covariance(), build_classification), - (LFDA(), build_classification), - (LMNN(), build_classification), - (NCA(), build_classification), - (RCA(), build_classification), - (ITML_Supervised(max_iter=5), build_classification), - (LSML_Supervised(), build_classification), - (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=10), build_classification), - (SDML_Supervised(), build_classification) - ] + [learner for (learner, _) in + pairs_learners])) + +classifiers = [(Covariance(), build_classification), + (LFDA(), build_classification), + (LMNN(), build_classification), + (NCA(), build_classification), + (RCA(), build_classification), + (ITML_Supervised(max_iter=5), build_classification), + (LSML_Supervised(), build_classification), + (MMC_Supervised(max_iter=5), build_classification), + (RCA_Supervised(num_chunks=10), build_classification), + ] +if has_installed_skggm(): + classifiers.append(((SDML_Supervised(), build_classification))) ids_classifiers = list(map(lambda x: x.__class__.__name__, - [learner for (learner, _) in - classifiers])) + [learner for (learner, _) in + classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list(map(lambda x: x.__class__.__name__, From 333675b6ea2661964145e9d54700fea7e1b663df Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 10:34:21 +0100 Subject: [PATCH 17/75] replace manual calls of algorithms with tuples_learners --- test/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index bce86f78..fc3de3e7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -842,8 +842,8 @@ class MockMetricLearner(MahalanobisMixin): "or a callable.".format(type(preprocessor))) -@pytest.mark.parametrize('estimator', [ITML(), LSML(), MMC(), SDML()], - ids=['ITML', 'LSML', 'MMC', 'SDML']) +@pytest.mark.parametrize('estimator', tuples_learners, + ids=ids_tuples_learners) def test_error_message_tuple_size(estimator): """Tests that if a tuples learner is not given the good number of points per tuple, it throws an error message""" From 1a6e97b2cb2fe12cf933c8f55a1f72ed17d1205a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 10:53:34 +0100 Subject: [PATCH 18/75] Remove another call of SDML if skggm is not installed --- test/test_transformer_metric_conversion.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 59986011..8e973a2f 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -6,6 +6,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) +from metric_learn._util import has_installed_skggm class TestTransformerMetricConversion(unittest.TestCase): @@ -42,12 +43,13 @@ def test_lmnn(self): L = lmnn.transformer_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) - def test_sdml_supervised(self): - seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.X, self.y, random_state=seed) - L = sdml.transformer_ - assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) + if has_installed_skggm(): + def test_sdml_supervised(self): + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.X, self.y, random_state=seed) + L = sdml.transformer_ + assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) def test_nca(self): n = self.X.shape[0] From 7cecf27355fb54d5c7435201389e58ad9c5ede55 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 10:56:07 +0100 Subject: [PATCH 19/75] FIX fix the test_error_message_tuple_size --- test/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index fc3de3e7..d9f53547 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -842,9 +842,9 @@ class MockMetricLearner(MahalanobisMixin): "or a callable.".format(type(preprocessor))) -@pytest.mark.parametrize('estimator', tuples_learners, +@pytest.mark.parametrize('estimator, _', tuples_learners, ids=ids_tuples_learners) -def test_error_message_tuple_size(estimator): +def test_error_message_tuple_size(estimator, _): """Tests that if a tuples learner is not given the good number of points per tuple, it throws an error message""" estimator = clone(estimator) From 5303e1a294ce44063f537648addcaa51b4262e17 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 11:05:52 +0100 Subject: [PATCH 20/75] FIX fix test_sdml_supervised --- test/test_fit_transform.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 118f6b90..27f54c1c 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -7,6 +7,8 @@ LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) +from metric_learn._util import has_installed_skggm + class TestFitTransform(unittest.TestCase): @classmethod @@ -60,17 +62,18 @@ def test_lmnn(self): assert_array_almost_equal(res_1, res_2) - def test_sdml_supervised(self): - seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.X, self.y, random_state=seed) - res_1 = sdml.transform(self.X) + if has_installed_skggm(): + def test_sdml_supervised(self): + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.X, self.y, random_state=seed) + res_1 = sdml.transform(self.X) - seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) - res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) - assert_array_almost_equal(res_1, res_2) + assert_array_almost_equal(res_1, res_2) def test_nca(self): n = self.X.shape[0] From 377760a3d4ac2abd52423a401ebe9478b3602d6f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 11:15:09 +0100 Subject: [PATCH 21/75] FIX: fix another sdml test --- test/test_base_metric.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 5fdb428e..dabffa1b 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -56,11 +56,11 @@ def test_lsml(self): if has_installed_skggm(): def test_sdml(self): - self.assertEqual(str(metric_learn.SDML()), - "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True,\n " - "verbose=False)") - self.assertEqual(str(metric_learn.SDML_Supervised()), """ + self.assertEqual(str(metric_learn.SDML()), + "SDML(balance_param=0.5, preprocessor=None, " + "sparsity_param=0.01, use_cov=True,\n " + "verbose=False)") + self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, use_cov=True, verbose=False) @@ -70,16 +70,16 @@ def test_sdml(self): # if we haven't install skggm, we will just check that instantiating SDML # or SDML_Supervised will return an error def test_sdml(self): - expected_msg = ("SDML cannot be instantiated without " - "installing skggm. Please install skggm and " - "try again (make sure you meet skggm's " - "requirements).") - with pytest.raises(NotImplementedError) as raised_error: - metric_learn.SDML() - assert str(raised_error) == expected_msg - with pytest.raises(NotImplementedError) as raised_error: - metric_learn.SDML_Supervised() - assert str(raised_error) == expected_msg + expected_msg = ("SDML cannot be instantiated without " + "installing skggm. Please install skggm and " + "try again (make sure you meet skggm's " + "requirements).") + with pytest.raises(NotImplementedError) as raised_error: + metric_learn.SDML() + assert str(raised_error.value) == expected_msg + with pytest.raises(NotImplementedError) as raised_error: + metric_learn.SDML_Supervised() + assert str(raised_error.value) == expected_msg def test_rca(self): self.assertEqual(str(metric_learn.RCA()), From 0a46ad550251e9bdae3c7729ced0731b74e4b236 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 13:38:39 +0100 Subject: [PATCH 22/75] FIX quic call for python 2.7 --- metric_learn/sdml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 6aed791e..ad270455 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -70,8 +70,8 @@ def _fit(self, pairs, y): diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = pinvh(prior) + self.balance_param * loss_matrix - M, *_ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, - Theta0=np.eye(pairs.shape[2])) + M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, + Theta0=np.eye(pairs.shape[2])) self.transformer_ = transformer_from_metric(M) return self From 391d773a305067e28edf62b7aebe2605178892c9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 13:42:03 +0100 Subject: [PATCH 23/75] Fix quic import --- metric_learn/sdml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index ad270455..ee68c542 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -17,6 +17,8 @@ from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs from ._util import transformer_from_metric, has_installed_skggm +if has_installed_skggm(): + from inverse_covariance import quic class _BaseSDML(MahalanobisMixin): @@ -44,9 +46,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, The preprocessor to call to get tuples from indices. If array-like, tuples will be gotten like this: X[indices]. """ - if has_installed_skggm(): - from inverse_covariance import quic - else: + if not has_installed_skggm(): raise NotImplementedError("SDML cannot be instantiated without " "installing skggm. Please install skggm and " "try again (make sure you meet skggm's " From 66547698a5913111fdd19169d8b5d47615983915 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 13:45:01 +0100 Subject: [PATCH 24/75] Add Sigma0 initalization (both sigma zero and theta zero should be specified otherwise an error is returned --- metric_learn/sdml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index ee68c542..96146019 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -71,7 +71,8 @@ def _fit(self, pairs, y): loss_matrix = (diff.T * y).dot(diff) emp_cov = pinvh(prior) + self.balance_param * loss_matrix M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, - Theta0=np.eye(pairs.shape[2])) + Theta0=np.eye(pairs.shape[2]), + Sigma0=np.eye(pairs.shape[2])) self.transformer_ = transformer_from_metric(M) return self From ac4e18a5b7a99c93d022c0073329215eb9965fcb Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 17:35:08 +0100 Subject: [PATCH 25/75] Deal with SDML making some tests fail --- metric_learn/sdml.py | 18 +++++++--- test/metric_learn_test.py | 3 +- test/test_fit_transform.py | 3 ++ test/test_mahalanobis_mixin.py | 25 ++++++++------ test/test_sklearn_compat.py | 62 ++++++++++++++++++---------------- 5 files changed, 63 insertions(+), 48 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 96146019..e65525bc 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -64,17 +64,25 @@ def _fit(self, pairs, y): # set up prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - prior = pinvh(np.cov(X, rowvar = False)) + prior = pinvh(np.atleast_2d(np.cov(X, rowvar=False))) else: prior = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = pinvh(prior) + self.balance_param * loss_matrix - M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, - Theta0=np.eye(pairs.shape[2]), - Sigma0=np.eye(pairs.shape[2])) - self.transformer_ = transformer_from_metric(M) + # our initialization will be the matrix with emp_cov's eigenvalues, + # with a constant added so that they are all positive (plus an epsilon + # to ensure definiteness). This is empirical. TODO: see if there are + # better justified initializations. + w, V = np.linalg.eigh(emp_cov) + sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) + theta0 = pinvh(sigma0) + M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, + msg=self.verbose, + Theta0=theta0, Sigma0=sigma0) + M = M + 1e-10 * np.eye(M.shape[0]) # to ensure M is positive semi-definite + self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 7977a49a..26b9ac25 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -166,8 +166,7 @@ def test_deprecation_num_labeled(self): # test that a deprecation message is thrown if num_labeled is set at # initialization # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) + X, y = make_classification() sdml_supervised = SDML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 27f54c1c..fccf9238 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -1,3 +1,4 @@ +import pytest import unittest import numpy as np from sklearn.datasets import load_iris @@ -63,6 +64,8 @@ def test_lmnn(self): assert_array_almost_equal(res_1, res_2) if has_installed_skggm(): + # TODO: remove the mark when SDML has become deterministic + @pytest.mark.xfail def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 1e555e73..1871eb51 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -96,7 +96,7 @@ def check_is_distance_matrix(pairwise): assert np.array_equal(pairwise, pairwise.T) # symmetry assert (pairwise.diagonal() == 0).all() # identity # triangular inequality - tol = 1e-15 + tol = 1e-12 assert (pairwise <= pairwise[:, :, np.newaxis] + pairwise[:, np.newaxis, :] + tol).all() @@ -272,14 +272,17 @@ def test_get_squared_metric(estimator, build_dataset): ids=ids_metric_learners) def test_transformer_is_2D(estimator, build_dataset): """Tests that the transformer of metric learners is 2D""" - input_data, labels, _, X = build_dataset() - model = clone(estimator) - set_random_state(model) - # test that it works for X.shape[1] features - model.fit(input_data, labels) - assert model.transformer_.shape == (X.shape[1], X.shape[1]) + # TODO: remove this check when SDML has become robust to 1D elements, + # or when the 1D case is dealt with separately + if not str(estimator).startswith('SDML'): + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + # test that it works for X.shape[1] features + model.fit(input_data, labels) + assert model.transformer_.shape == (X.shape[1], X.shape[1]) - # test that it works for 1 feature - trunc_data = input_data[..., :1] - model.fit(trunc_data, labels) - assert model.transformer_.shape == (1, 1) # the transformer must be 2D + # test that it works for 1 feature + trunc_data = input_data[..., :1] + model.fit(trunc_data, labels) + assert model.transformer_.shape == (1, 1) # the transformer must be 2D diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index d9dce685..810f039c 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -72,7 +72,7 @@ def test_itml(self): def test_mmc(self): check_estimator(dMMC) - # This fails due to a FloatingPointError + # TODO: uncomment this when SDML has become deterministic # def test_sdml(self): # check_estimator(dSDML) @@ -115,37 +115,39 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, same as scikit-learn's cross-validation (some code for generating the folds is taken from scikit-learn). """ - if any(hasattr(estimator, method) for method in ["predict", "score"]): - input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - set_random_state(estimator) - n_splits = 3 - kfold = KFold(shuffle=False, n_splits=n_splits) - n_samples = input_data.shape[0] - fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) - fold_sizes[:n_samples % n_splits] += 1 - current = 0 - scores, predictions = [], np.zeros(input_data.shape[0]) - for fold_size in fold_sizes: - start, stop = current, current + fold_size - current = stop - test_slice = slice(start, stop) - train_mask = np.ones(input_data.shape[0], bool) - train_mask[test_slice] = False - y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(input_data[train_mask], y_train) + # TODO: remove this check when SDML has become deterministic + if not str(estimator).startswith('SDML'): + if any(hasattr(estimator, method) for method in ["predict", "score"]): + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + n_splits = 3 + kfold = KFold(shuffle=False, n_splits=n_splits) + n_samples = input_data.shape[0] + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes[:n_samples % n_splits] += 1 + current = 0 + scores, predictions = [], np.zeros(input_data.shape[0]) + for fold_size in fold_sizes: + start, stop = current, current + fold_size + current = stop + test_slice = slice(start, stop) + train_mask = np.ones(input_data.shape[0], bool) + train_mask[test_slice] = False + y_train, y_test = labels[train_mask], labels[test_slice] + estimator.fit(input_data[train_mask], y_train) + if hasattr(estimator, "score"): + scores.append(estimator.score(input_data[test_slice], y_test)) + if hasattr(estimator, "predict"): + predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): - scores.append(estimator.score(input_data[test_slice], y_test)) + assert all(scores == cross_val_score(estimator, input_data, labels, + cv=kfold)) if hasattr(estimator, "predict"): - predictions[test_slice] = estimator.predict(input_data[test_slice]) - if hasattr(estimator, "score"): - assert all(scores == cross_val_score(estimator, input_data, labels, - cv=kfold)) - if hasattr(estimator, "predict"): - assert all(predictions == cross_val_predict(estimator, input_data, - labels, - cv=kfold)) + assert all(predictions == cross_val_predict(estimator, input_data, + labels, + cv=kfold)) def check_score(estimator, tuples, y): From 458d64606d818b2fcfa7dd81ab7ce52cc1b43079 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 15 Feb 2019 17:52:33 +0100 Subject: [PATCH 26/75] Remove epsilon that was unnecessary --- metric_learn/sdml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index e65525bc..f24f7da0 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -81,7 +81,6 @@ def _fit(self, pairs, y): M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) - M = M + 1e-10 * np.eye(M.shape[0]) # to ensure M is positive semi-definite self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self From fd7c9fbd111be5b5c9e2855bc3801718d1841d0d Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 19 Feb 2019 11:44:56 +0100 Subject: [PATCH 27/75] FIX: use latest commit of skggm that fixes the non deterministic problem --- .travis.yml | 5 ++++- README.rst | 3 ++- doc/getting_started.rst | 3 ++- test/test_fit_transform.py | 2 -- test/test_sklearn_compat.py | 5 ++--- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 654d92ea..a3605931 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,5 +9,8 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - if [[ $TRAVIS_PYTHON_VERSION -ge 3.6 ]]; then pip install skggm; fi + - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || + ($TRAVIS_PYTHON_VERSION == 2.7)]]; then + pip install git+https://github.com/skggm/skggm.git@a0ed406586c43\ + 64ea3297a658f415e13b5cbdaf8; fi script: pytest test diff --git a/README.rst b/README.rst index bb3f2fc7..e1007287 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,8 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit [a0ed406](https://github.com/\ +skggm/skggm/commit/a0ed406586c4364ea3297a658f415e13b5cbdaf8)) for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 765545f8..8d769f29 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,7 +15,8 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit [a0ed406](https://github.com/\ +skggm/skggm/commit/a0ed406586c4364ea3297a658f415e13b5cbdaf8)) for `SDML` - (for running the examples only: matplotlib) **Notes** diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index fccf9238..16f982e8 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -64,8 +64,6 @@ def test_lmnn(self): assert_array_almost_equal(res_1, res_2) if has_installed_skggm(): - # TODO: remove the mark when SDML has become deterministic - @pytest.mark.xfail def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 810f039c..8d2bd424 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -72,9 +72,8 @@ def test_itml(self): def test_mmc(self): check_estimator(dMMC) - # TODO: uncomment this when SDML has become deterministic - # def test_sdml(self): - # check_estimator(dSDML) + def test_sdml(self): + check_estimator(dSDML) # This fails because the default num_chunks isn't data-dependent. # def test_rca(self): From e118cd80a0b867425faec3382e9ead612d28b312 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 15:11:34 +0100 Subject: [PATCH 28/75] MAINT: add message for SDML when not SPD --- metric_learn/sdml.py | 11 +++++++++-- test/metric_learn_test.py | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index f24f7da0..af0279bd 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -13,6 +13,7 @@ import numpy as np from sklearn.base import TransformerMixin from scipy.linalg import pinvh +from sklearn.exceptions import ConvergenceWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs @@ -73,9 +74,15 @@ def _fit(self, pairs, y): # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon - # to ensure definiteness). This is empirical. TODO: see if there are - # better justified initializations. + # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) + if any(w < 0.): + warnings.warn("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.", + ConvergenceWarning) sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 26b9ac25..0057ceb7 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -11,10 +11,9 @@ from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y -from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, - MMC_Supervised) +from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, + LSML_Supervised, ITML_Supervised, SDML_Supervised, + RCA_Supervised, MMC_Supervised, SDML) # Import this specially for testing. from metric_learn._util import has_installed_skggm from metric_learn.constraints import wrap_pairs @@ -173,6 +172,34 @@ def test_deprecation_num_labeled(self): 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + def test_sdml_raises_warning_non_psd(self): + """Tests that SDML raises a warning on a toy example where we know the + pseudo-covariance matrix is not PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + msg = ("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.") + with pytest.warns(ConvergenceWarning) as raised_warning: + try: + sdml.fit(pairs, y) + except Exception: + pass + assert str(raised_warning[0].message) == msg + + def test_sdml_doesnt_raise_warning_if_psd(self): + """Tests that sdml does not raise a warning and converges on a simple + problem where we know the pseudo-covariance matrix is PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + with pytest.warns(None) as record: + sdml.fit(pairs, y) + assert len(record) == 0 + class TestNCA(MetricTestCase): def test_iris(self): From b0c475330ef27b83cfa46e6f229c70955ce1a37a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 15:34:16 +0100 Subject: [PATCH 29/75] MAINT: add test for error message if skggm not installed --- test/metric_learn_test.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 0057ceb7..3cf07106 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -148,8 +148,28 @@ def test_no_twice_same_objective(capsys): assert len(objectives[:-1]) == len(set(objectives[:-1])) -if has_installed_skggm(): - class TestSDML(MetricTestCase): +class TestSDML(MetricTestCase): + + def test_raises_error_msg_not_installed_skggm(self): + """Tests that the right error message is raised if someone tries to + instantiate SDML but has not installed skggm""" + # TODO: to be removed when scikit-learn v0.21 is released + if not has_installed_skggm(): + msg = ("SDML cannot be instantiated without " + "installing skggm. Please install skggm and " + "try again (make sure you meet skggm's " + "requirements).") + with pytest.raises(NotImplementedError) as expected_err: + SDML() + assert str(expected_err.value) == msg + else: # otherwise we should be able to instantiate SDML and it should + # raise no warning + with pytest.warns(None) as record: + SDML() + assert len(record) == 0 + + if has_installed_skggm(): + def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! From 13146d80c4e969ab520d9b9fb111b29cdab25661 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 15:49:41 +0100 Subject: [PATCH 30/75] Try other syntax for installing the right commit of skggm --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a3605931..61f6e325 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,5 +12,5 @@ before_install: - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || ($TRAVIS_PYTHON_VERSION == 2.7)]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c43\ - 64ea3297a658f415e13b5cbdaf8; fi +64ea3297a658f415e13b5cbdaf8; fi script: pytest test From db4a79983765fbdb6d535864210437631a29fc43 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 16:01:08 +0100 Subject: [PATCH 31/75] MAINT: make sklearn compat sdml test be run only if skggm is installed --- test/test_sklearn_compat.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 8d2bd424..38751065 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -16,6 +16,8 @@ from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) from sklearn.utils.testing import _get_args + +from metric_learn._util import has_installed_skggm from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor) @@ -73,7 +75,10 @@ def test_mmc(self): check_estimator(dMMC) def test_sdml(self): - check_estimator(dSDML) + if has_installed_skggm(): + check_estimator(dSDML) + else: + pass # This fails because the default num_chunks isn't data-dependent. # def test_rca(self): From 1011391da23d435a0221efd3f0fc09beb31ffd37 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 16:03:15 +0100 Subject: [PATCH 32/75] Try another syntax for running travis --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 61f6e325..0baaf994 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,6 @@ before_install: - pip install numpy scipy scikit-learn - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || ($TRAVIS_PYTHON_VERSION == 2.7)]]; then - pip install git+https://github.com/skggm/skggm.git@a0ed406586c43\ -64ea3297a658f415e13b5cbdaf8; fi + pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + fi script: pytest test From 5ea7ba0035711f14553230175ee97d9ea8233343 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 16:17:28 +0100 Subject: [PATCH 33/75] Better bash syntax --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0baaf994..5ba89e04 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ before_install: - pip install wheel - pip install numpy scipy scikit-learn - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || - ($TRAVIS_PYTHON_VERSION == 2.7)]]; then + ($TRAVIS_PYTHON_VERSION -eq 2.7)]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; fi script: pytest test From 45d3b7b6d9ab9343430873cb90d9f994296c6f82 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 6 Mar 2019 15:47:36 +0100 Subject: [PATCH 34/75] Fix tests by removing duplicates --- test/test_mahalanobis_mixin.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 1871eb51..be4d4580 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -284,5 +284,14 @@ def test_transformer_is_2D(estimator, build_dataset): # test that it works for 1 feature trunc_data = input_data[..., :1] + # we drop duplicates that might have been formed, i.e. of the form + # aabc or abcc or aabb for quadruplets, and aa for pairs. + slices = {4: [slice(0, 2), slice(2, 4)], 2: [slice(0, 2)]} + if trunc_data.ndim == 3: + for slice_idx in slices[trunc_data.shape[1]]: + _, indices = np.unique(trunc_data[:, slice_idx, :], axis=2, + return_index=True) + trunc_data = trunc_data[indices] + labels = labels[indices] model.fit(trunc_data, labels) assert model.transformer_.shape == (1, 1) # the transformer must be 2D From dbf52573c36a12dec939f1bd4bfd76be4f138e0a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 6 Mar 2019 16:00:56 +0100 Subject: [PATCH 35/75] FIX: fix for sdml by reducing balance parameter --- test/test_mahalanobis_mixin.py | 45 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index be4d4580..e097b44d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -272,26 +272,25 @@ def test_get_squared_metric(estimator, build_dataset): ids=ids_metric_learners) def test_transformer_is_2D(estimator, build_dataset): """Tests that the transformer of metric learners is 2D""" - # TODO: remove this check when SDML has become robust to 1D elements, - # or when the 1D case is dealt with separately - if not str(estimator).startswith('SDML'): - input_data, labels, _, X = build_dataset() - model = clone(estimator) - set_random_state(model) - # test that it works for X.shape[1] features - model.fit(input_data, labels) - assert model.transformer_.shape == (X.shape[1], X.shape[1]) - - # test that it works for 1 feature - trunc_data = input_data[..., :1] - # we drop duplicates that might have been formed, i.e. of the form - # aabc or abcc or aabb for quadruplets, and aa for pairs. - slices = {4: [slice(0, 2), slice(2, 4)], 2: [slice(0, 2)]} - if trunc_data.ndim == 3: - for slice_idx in slices[trunc_data.shape[1]]: - _, indices = np.unique(trunc_data[:, slice_idx, :], axis=2, - return_index=True) - trunc_data = trunc_data[indices] - labels = labels[indices] - model.fit(trunc_data, labels) - assert model.transformer_.shape == (1, 1) # the transformer must be 2D + input_data, labels, _, X = build_dataset() + model = clone(estimator) + if model.__class__.__name__.startswith('SDML'): + model.set_params(use_cov=False, balance_param=1e-3) + set_random_state(model) + # test that it works for X.shape[1] features + model.fit(input_data, labels) + assert model.transformer_.shape == (X.shape[1], X.shape[1]) + + # test that it works for 1 feature + trunc_data = input_data[..., :1] + # we drop duplicates that might have been formed, i.e. of the form + # aabc or abcc or aabb for quadruplets, and aa for pairs. + slices = {4: [slice(0, 2), slice(2, 4)], 2: [slice(0, 2)]} + if trunc_data.ndim == 3: + for slice_idx in slices[trunc_data.shape[1]]: + _, indices = np.unique(trunc_data[:, slice_idx, :], axis=2, + return_index=True) + trunc_data = trunc_data[indices] + labels = labels[indices] + model.fit(trunc_data, labels) + assert model.transformer_.shape == (1, 1) # the transformer must be 2D From 4b0bae93eadb1e1000823f6028e3ebc91b47955b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 6 Mar 2019 17:22:59 +0100 Subject: [PATCH 36/75] FIX: update code to work with old version of numpy that does not have axis for unique --- test/test_mahalanobis_mixin.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e097b44d..9ca8fb37 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -288,9 +288,10 @@ def test_transformer_is_2D(estimator, build_dataset): slices = {4: [slice(0, 2), slice(2, 4)], 2: [slice(0, 2)]} if trunc_data.ndim == 3: for slice_idx in slices[trunc_data.shape[1]]: - _, indices = np.unique(trunc_data[:, slice_idx, :], axis=2, - return_index=True) - trunc_data = trunc_data[indices] - labels = labels[indices] + pairs = trunc_data[:, slice_idx, :] + diffs = pairs[:, 1, :] - pairs[:, 0, :] + to_keep = np.nonzero(diffs.ravel()) + trunc_data = trunc_data[to_keep] + labels = labels[to_keep] model.fit(trunc_data, labels) assert model.transformer_.shape == (1, 1) # the transformer must be 2D From f3c690e14a9df2d522dfe9fec38dfa1eb095e525 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 7 Mar 2019 13:45:03 +0100 Subject: [PATCH 37/75] Remove the need for skggm --- metric_learn/_util.py | 8 ------ metric_learn/sdml.py | 19 +++++--------- test/metric_learn_test.py | 29 ++++------------------ test/test_base_metric.py | 29 +++++----------------- test/test_fit_transform.py | 26 +++++++++---------- test/test_mahalanobis_mixin.py | 2 -- test/test_sklearn_compat.py | 6 +---- test/test_transformer_metric_conversion.py | 14 +++++------ test/test_utils.py | 12 +++------ 9 files changed, 41 insertions(+), 104 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 82552ff7..bd57fd5f 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -15,14 +15,6 @@ def vector_norm(X): return np.linalg.norm(X, axis=1) -def has_installed_skggm(): - try: - import inverse_covariance - return True - except ImportError: - return False - - def check_input(input_data, y=None, preprocessor=None, type_of_inputs='classic', tuple_size=None, accept_sparse=False, dtype='numeric', order=None, diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index af0279bd..9d2606e5 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -13,13 +13,12 @@ import numpy as np from sklearn.base import TransformerMixin from scipy.linalg import pinvh +from sklearn.covariance import graphical_lasso from sklearn.exceptions import ConvergenceWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric, has_installed_skggm -if has_installed_skggm(): - from inverse_covariance import quic +from ._util import transformer_from_metric class _BaseSDML(MahalanobisMixin): @@ -47,11 +46,6 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, The preprocessor to call to get tuples from indices. If array-like, tuples will be gotten like this: X[indices]. """ - if not has_installed_skggm(): - raise NotImplementedError("SDML cannot be instantiated without " - "installing skggm. Please install skggm and " - "try again (make sure you meet skggm's " - "requirements).") self.balance_param = balance_param self.sparsity_param = sparsity_param self.use_cov = use_cov @@ -83,11 +77,10 @@ def _fit(self, pairs, y): "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_covariance=False.", ConvergenceWarning) - sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) - theta0 = pinvh(sigma0) - M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, - msg=self.verbose, - Theta0=theta0, Sigma0=sigma0) + cov_init = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) + _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, + verbose=self.verbose, + cov_init=cov_init) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 3cf07106..ccb3514f 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -15,7 +15,6 @@ LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised, SDML) # Import this specially for testing. -from metric_learn._util import has_installed_skggm from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -150,43 +149,25 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): - def test_raises_error_msg_not_installed_skggm(self): - """Tests that the right error message is raised if someone tries to - instantiate SDML but has not installed skggm""" - # TODO: to be removed when scikit-learn v0.21 is released - if not has_installed_skggm(): - msg = ("SDML cannot be instantiated without " - "installing skggm. Please install skggm and " - "try again (make sure you meet skggm's " - "requirements).") - with pytest.raises(NotImplementedError) as expected_err: - SDML() - assert str(expected_err.value) == msg - else: # otherwise we should be able to instantiate SDML and it should - # raise no warning - with pytest.warns(None) as record: - SDML() - assert len(record) == 0 - - if has_installed_skggm(): - def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! rs = np.random.RandomState(5555) - sdml = SDML_Supervised(num_constraints=1500) + sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + balance_param=5e-5) sdml.fit(self.iris_points, self.iris_labels, random_state=rs) csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.20) + self.assertLess(csep, 0.22) def test_deprecation_num_labeled(self): # test that a deprecation message is thrown if num_labeled is set at # initialization # TODO: remove in v.0.6 X, y = make_classification() - sdml_supervised = SDML_Supervised(num_labeled=np.inf) + sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, + balance_param=5e-5) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0') diff --git a/test/test_base_metric.py b/test/test_base_metric.py index dabffa1b..fed4d780 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -5,7 +5,6 @@ from sklearn import clone from sklearn.utils.testing import set_random_state -from metric_learn._util import has_installed_skggm from test.test_utils import ids_metric_learners, metric_learners @@ -54,33 +53,17 @@ def test_lsml(self): weights=None) """.strip('\n')) - if has_installed_skggm(): - def test_sdml(self): - self.assertEqual(str(metric_learn.SDML()), - "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True,\n " - "verbose=False)") - self.assertEqual(str(metric_learn.SDML_Supervised()), """ + def test_sdml(self): + self.assertEqual(str(metric_learn.SDML()), + "SDML(balance_param=0.5, preprocessor=None, " + "sparsity_param=0.01, use_cov=True,\n " + "verbose=False)") + self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, use_cov=True, verbose=False) """.strip('\n')) - else: - # if we haven't install skggm, we will just check that instantiating SDML - # or SDML_Supervised will return an error - def test_sdml(self): - expected_msg = ("SDML cannot be instantiated without " - "installing skggm. Please install skggm and " - "try again (make sure you meet skggm's " - "requirements).") - with pytest.raises(NotImplementedError) as raised_error: - metric_learn.SDML() - assert str(raised_error.value) == expected_msg - with pytest.raises(NotImplementedError) as raised_error: - metric_learn.SDML_Supervised() - assert str(raised_error.value) == expected_msg - def test_rca(self): self.assertEqual(str(metric_learn.RCA()), "RCA(num_dims=None, pca_comps=None, preprocessor=None)") diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 16f982e8..b85e9273 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -6,9 +6,8 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) - -from metric_learn._util import has_installed_skggm + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, + MMC_Supervised) class TestFitTransform(unittest.TestCase): @@ -63,18 +62,19 @@ def test_lmnn(self): assert_array_almost_equal(res_1, res_2) - if has_installed_skggm(): - def test_sdml_supervised(self): - seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.X, self.y, random_state=seed) - res_1 = sdml.transform(self.X) + def test_sdml_supervised(self): + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + use_cov=False) + sdml.fit(self.X, self.y, random_state=seed) + res_1 = sdml.transform(self.X) - seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) - res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + use_cov=False) + res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) - assert_array_almost_equal(res_1, res_2) + assert_array_almost_equal(res_1, res_2) def test_nca(self): n = self.X.shape[0] diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 9ca8fb37..d4bed3fe 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -274,8 +274,6 @@ def test_transformer_is_2D(estimator, build_dataset): """Tests that the transformer of metric learners is 2D""" input_data, labels, _, X = build_dataset() model = clone(estimator) - if model.__class__.__name__.startswith('SDML'): - model.set_params(use_cov=False, balance_param=1e-3) set_random_state(model) # test that it works for X.shape[1] features model.fit(input_data, labels) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 38751065..4c9cb68b 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -17,7 +17,6 @@ train_test_split, KFold) from sklearn.utils.testing import _get_args -from metric_learn._util import has_installed_skggm from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor) @@ -75,10 +74,7 @@ def test_mmc(self): check_estimator(dMMC) def test_sdml(self): - if has_installed_skggm(): - check_estimator(dSDML) - else: - pass + check_estimator(dSDML) # This fails because the default num_chunks isn't data-dependent. # def test_rca(self): diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 8e973a2f..59986011 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -6,7 +6,6 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) -from metric_learn._util import has_installed_skggm class TestTransformerMetricConversion(unittest.TestCase): @@ -43,13 +42,12 @@ def test_lmnn(self): L = lmnn.transformer_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) - if has_installed_skggm(): - def test_sdml_supervised(self): - seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.X, self.y, random_state=seed) - L = sdml.transformer_ - assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) + def test_sdml_supervised(self): + seed = np.random.RandomState(1234) + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.X, self.y, random_state=seed) + L = sdml.transformer_ + assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) def test_nca(self): n = self.X.shape[0] diff --git a/test/test_utils.py b/test/test_utils.py index d9f53547..a61c7fdd 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -9,8 +9,7 @@ from sklearn.base import clone from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, - check_collapsed_pairs, validate_vector, - has_installed_skggm) + check_collapsed_pairs, validate_vector) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, @@ -105,9 +104,7 @@ def build_quadruplets(with_preprocessor=False): pairs_learners = [(ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster - ] -if has_installed_skggm(): - pairs_learners.append(((SDML(), build_pairs))) + (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) @@ -121,9 +118,8 @@ def build_quadruplets(with_preprocessor=False): (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), - ] -if has_installed_skggm(): - classifiers.append(((SDML_Supervised(), build_classification))) + (SDML_Supervised(use_cov=False, balance_param=1e-5), + build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) From 57b0567e7abbdd1a87560b4f81cad895b61a995f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 7 Mar 2019 13:45:34 +0100 Subject: [PATCH 38/75] Update travis not to use skggm --- .travis.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5ba89e04..5c86d815 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,8 +9,4 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || - ($TRAVIS_PYTHON_VERSION -eq 2.7)]]; then - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; - fi script: pytest test From 04316b21bdd9bd7d6059ef5f095028812c4bd0cf Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 7 Mar 2019 15:26:33 +0100 Subject: [PATCH 39/75] Add a stable init for sklearn checks --- test/test_sklearn_compat.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 4c9cb68b..827c0ed3 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -74,6 +74,16 @@ def test_mmc(self): check_estimator(dMMC) def test_sdml(self): + def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None): + # this init makes SDML stable for scikit-learn examples. + SDML_Supervised.__init__(self, sparsity_param=sparsity_param, + num_labeled=num_labeled, + num_constraints=num_constraints, + verbose=verbose, + preprocessor=preprocessor, + balance_param=1e-5, use_cov=False) + dSDML.__init__ = stable_init check_estimator(dSDML) # This fails because the default num_chunks isn't data-dependent. From b6416413666c89f929658f46b8bd1db6a399e6bb Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 7 Mar 2019 15:38:18 +0100 Subject: [PATCH 40/75] FIX test_sdml_supervised --- test/test_transformer_metric_conversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 59986011..6cfe8281 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -44,7 +44,8 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500) + sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + balance_param=1e-5) sdml.fit(self.X, self.y, random_state=seed) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) From fedfb8e80d59e1ee3eba77ca24f1504a119136d4 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 11:06:24 +0100 Subject: [PATCH 41/75] Revert "Update travis not to use skggm" This reverts commit 57b0567e7abbdd1a87560b4f81cad895b61a995f. --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5c86d815..5ba89e04 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,4 +9,8 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn + - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || + ($TRAVIS_PYTHON_VERSION -eq 2.7)]]; then + pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; + fi script: pytest test From f0bbf6d47062c32d5feb64413ae9a09a23911d28 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 11:39:59 +0100 Subject: [PATCH 42/75] Add fallback on skggm --- metric_learn/_util.py | 8 ++++++ metric_learn/sdml.py | 27 ++++++++++++++++---- test/metric_learn_test.py | 53 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index bd57fd5f..82552ff7 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -15,6 +15,14 @@ def vector_norm(X): return np.linalg.norm(X, axis=1) +def has_installed_skggm(): + try: + import inverse_covariance + return True + except ImportError: + return False + + def check_input(input_data, y=None, preprocessor=None, type_of_inputs='classic', tuple_size=None, accept_sparse=False, dtype='numeric', order=None, diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 9d2606e5..88aa7776 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -18,7 +18,9 @@ from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric +from ._util import transformer_from_metric, has_installed_skggm +if has_installed_skggm(): + from inverse_covariance import quic class _BaseSDML(MahalanobisMixin): @@ -53,6 +55,15 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, super(_BaseSDML, self).__init__(preprocessor) def _fit(self, pairs, y): + if not has_installed_skggm(): + msg = ("Warning, skggm is not installed, so SDML will use " + "scikit-learn's graphical_lasso method. It can fail to converge" + "on some non SPD matrices where skggm would converge. If so, " + "try to install skggm. (see the README.md for the right " + "version.)") + warnings.warn(msg) + else: + print("SDML will use skggm's solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') @@ -77,10 +88,16 @@ def _fit(self, pairs, y): "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_covariance=False.", ConvergenceWarning) - cov_init = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) - _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, - verbose=self.verbose, - cov_init=cov_init) + sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) + if has_installed_skggm(): + theta0 = pinvh(sigma0) + M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, + msg=self.verbose, + Theta0=theta0, Sigma0=sigma0) + else: + _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, + verbose=self.verbose, + cov_init=sigma0) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index ccb3514f..994cb2c1 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -15,6 +15,7 @@ LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised, SDML) # Import this specially for testing. +from metric_learn._util import has_installed_skggm from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -149,6 +150,33 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): + def test_raises_warning_msg_not_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML but has not installed skggm""" + # TODO: remove if we don't need skggm anymore + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + X, y = make_classification(random_state=42) + sdml = SDML() + sdml_supervised = SDML_Supervised() + if not has_installed_skggm(): + msg = ("Warning, skggm is not installed, so SDML will use " + "scikit-learn's graphical_lasso method. It can fail to converge" + "on some non SPD matrices where skggm would converge. If so, " + "try to install skggm. (see the README.md for the right " + "version.)") + with pytest.warns(None) as expected_err: + sdml.fit(pairs, y_pairs) + assert str(expected_err.value) == msg + with pytest.warns(None) as expected_err: + sdml_supervised.fit(X, y) + assert str(expected_err.value) == msg + else: # otherwise we should be able to instantiate SDML and it should + # raise no warning + with pytest.warns(None) as record: + SDML() + assert len(record) == 0 + def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! @@ -165,7 +193,7 @@ def test_deprecation_num_labeled(self): # test that a deprecation message is thrown if num_labeled is set at # initialization # TODO: remove in v.0.6 - X, y = make_classification() + X, y = make_classification(random_state=42) sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, balance_param=5e-5) msg = ('"num_labeled" parameter is not used.' @@ -202,6 +230,29 @@ def test_sdml_doesnt_raise_warning_if_psd(self): assert len(record) == 0 +def test_verbose_has_installed_skggm_sdml(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML) + # TODO: remove if we don't need skggm anymore + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML() + sdml.fit(pairs, y_pairs) + out, _ = capsys.readouterr() + assert "SDML will use skggm's solver." in out + + +def test_verbose_has_installed_skggm_sdml_supervised(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML_Supervised) + # TODO: remove if we don't need skggm anymore + X, y = make_classification(random_state=42) + sdml = SDML_Supervised() + sdml.fit(X, y) + out, _ = capsys.readouterr() + assert "SDML will use skggm's solver." in out + + class TestNCA(MetricTestCase): def test_iris(self): n = self.iris_points.shape[0] From 520d7c28b98840c977e72c612e3b7e6e3be05c45 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 13:21:08 +0100 Subject: [PATCH 43/75] FIX: fix versions comparison and tests --- .travis.yml | 4 ++-- test/metric_learn_test.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5ba89e04..b045e3d3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,8 +9,8 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - if [[ ($TRAVIS_PYTHON_VERSION -ge 3.6) || - ($TRAVIS_PYTHON_VERSION -eq 2.7)]]; then + - if [[ ($TRAVIS_PYTHON_VERSION -ge "3.6") || + ($TRAVIS_PYTHON_VERSION -eq "2.7")]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; fi script: pytest test diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 994cb2c1..8ea9248a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -165,12 +165,12 @@ def test_raises_warning_msg_not_installed_skggm(self): "on some non SPD matrices where skggm would converge. If so, " "try to install skggm. (see the README.md for the right " "version.)") - with pytest.warns(None) as expected_err: + with pytest.warns(None) as record: sdml.fit(pairs, y_pairs) - assert str(expected_err.value) == msg - with pytest.warns(None) as expected_err: + assert str(record[0].message) == msg + with pytest.warns(None) as record: sdml_supervised.fit(X, y) - assert str(expected_err.value) == msg + assert str(record[0].message) == msg else: # otherwise we should be able to instantiate SDML and it should # raise no warning with pytest.warns(None) as record: From 0437c6236d7b06ddf6c14a88159006bb854404b9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 13:23:37 +0100 Subject: [PATCH 44/75] MAINT: improve test of no warning --- test/metric_learn_test.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 8ea9248a..bb0eddcc 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -171,10 +171,15 @@ def test_raises_warning_msg_not_installed_skggm(self): with pytest.warns(None) as record: sdml_supervised.fit(X, y) assert str(record[0].message) == msg - else: # otherwise we should be able to instantiate SDML and it should - # raise no warning + else: # otherwise we should be able to instantiate and fit SDML and it + # should raise no warning with pytest.warns(None) as record: - SDML() + sdml = SDML() + sdml.fit(pairs, y_pairs) + assert len(record) == 0 + with pytest.warns(None) as record: + sdml = SDML_Supervised() + sdml.fit(X, y) assert len(record) == 0 def test_iris(self): From be1a5e651205782c5c10cadfb00fbbb8bfd5cb6f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 14:49:57 +0100 Subject: [PATCH 45/75] FIX: fix wrap pairs that was returning column y (we need line y), and fix the example for SDML to not raise another warning --- metric_learn/constraints.py | 2 +- test/metric_learn_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index c4ddcae8..a5487690 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -96,6 +96,6 @@ def wrap_pairs(X, constraints): c = np.array(constraints[2]) d = np.array(constraints[3]) constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d)))) - y = np.vstack([np.ones((len(a), 1)), - np.ones((len(c), 1))]) + y = np.hstack([np.ones((len(a),)), - np.ones((len(c),))]) pairs = X[constraints] return pairs, y diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index bb0eddcc..3b75f5f1 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -178,7 +178,7 @@ def test_raises_warning_msg_not_installed_skggm(self): sdml.fit(pairs, y_pairs) assert len(record) == 0 with pytest.warns(None) as record: - sdml = SDML_Supervised() + sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) sdml.fit(X, y) assert len(record) == 0 From 56efa095d56bcf7b6eefd3cb4b4174f2b215d21f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 14:51:20 +0100 Subject: [PATCH 46/75] FIX: force travis to do the right check --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b045e3d3..25f4ae44 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,8 +9,8 @@ before_install: - pip install --upgrade pip - pip install wheel - pip install numpy scipy scikit-learn - - if [[ ($TRAVIS_PYTHON_VERSION -ge "3.6") || - ($TRAVIS_PYTHON_VERSION -eq "2.7")]]; then + - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || + ($TRAVIS_PYTHON_VERSION == "2.7")]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; fi script: pytest test From 142eea9cf7614d55c35ba5cac3db58fc1194ef1a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 14:58:28 +0100 Subject: [PATCH 47/75] TST: add non SPD test that works with skggm's quic but not sklearn's graphical_lasso --- test/metric_learn_test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 3b75f5f1..e37d386a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -234,6 +234,15 @@ def test_sdml_doesnt_raise_warning_if_psd(self): sdml.fit(pairs, y) assert len(record) == 0 + def test_sdml_works_on_non_spd_pb_with_skggm(self): + """Test that SDML works on a certain non SPD problem on which we know + it should work, but scikit-learn's graphical_lasso does not work""" + if has_installed_skggm(): + X, y = load_iris(return_X_y=True) + sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, + use_cov=True) + sdml.fit(X, y) + def test_verbose_has_installed_skggm_sdml(capsys): # Test that if users have installed skggm, a message is printed telling them From fcfd44c1b7a9f166b9d8235eae3dee1bd7ad3382 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 15:01:12 +0100 Subject: [PATCH 48/75] Try again travis this time installing cython --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 25f4ae44..cf6cc2e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ python: before_install: - pip install --upgrade pip - pip install wheel - - pip install numpy scipy scikit-learn + - pip install cython numpy scipy scikit-learn - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || ($TRAVIS_PYTHON_VERSION == "2.7")]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; From 019e28b7a63e995f0b826d9c4c4df81963ca1d09 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 15:09:22 +0100 Subject: [PATCH 49/75] Try to make travis work with build_essential --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index cf6cc2e1..0b8cbc3f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.4" - "3.6" before_install: + - sudo apt-get install build-essential - pip install --upgrade pip - pip install wheel - pip install cython numpy scipy scikit-learn From 04a5107faf1a541ed439722a2352ccdff1dd39f3 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 15:19:06 +0100 Subject: [PATCH 50/75] Try with installing liblapack --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0b8cbc3f..cc6317a2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ python: - "3.4" - "3.6" before_install: - - sudo apt-get install build-essential + - sudo apt-get install liblapack-dev - pip install --upgrade pip - pip install wheel - pip install cython numpy scipy scikit-learn From be3a2ad0bfe55553182d26bdb33c2ea8aa5002c6 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 15:36:29 +0100 Subject: [PATCH 51/75] TST: fix tests for when skggm is not installed --- test/metric_learn_test.py | 41 +++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e37d386a..029958d6 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -158,7 +158,7 @@ def test_raises_warning_msg_not_installed_skggm(self): y_pairs = [1, -1] X, y = make_classification(random_state=42) sdml = SDML() - sdml_supervised = SDML_Supervised() + sdml_supervised = SDML_Supervised(use_cov=False, balance_param=1e-5) if not has_installed_skggm(): msg = ("Warning, skggm is not installed, so SDML will use " "scikit-learn's graphical_lasso method. It can fail to converge" @@ -222,17 +222,18 @@ def test_sdml_raises_warning_non_psd(self): sdml.fit(pairs, y) except Exception: pass - assert str(raised_warning[0].message) == msg + # we assert that this warning is in one of the warning raised by the + # estimator + assert msg in list(map(lambda w: str(w.message), raised_warning)) - def test_sdml_doesnt_raise_warning_if_psd(self): - """Tests that sdml does not raise a warning and converges on a simple - problem where we know the pseudo-covariance matrix is PSD""" + def test_sdml_converges_if_psd(self): + """Tests that sdml converges on a simple problem where we know the + pseudo-covariance matrix is PSD""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y = [1, -1] sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) - with pytest.warns(None) as record: - sdml.fit(pairs, y) - assert len(record) == 0 + sdml.fit(pairs, y) + assert np.isfinite(sdml.get_mahalanobis_matrix()).all() def test_sdml_works_on_non_spd_pb_with_skggm(self): """Test that SDML works on a certain non SPD problem on which we know @@ -248,23 +249,25 @@ def test_verbose_has_installed_skggm_sdml(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML) # TODO: remove if we don't need skggm anymore - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) - y_pairs = [1, -1] - sdml = SDML() - sdml.fit(pairs, y_pairs) - out, _ = capsys.readouterr() - assert "SDML will use skggm's solver." in out + if has_installed_skggm(): + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML() + sdml.fit(pairs, y_pairs) + out, _ = capsys.readouterr() + assert "SDML will use skggm's solver." in out def test_verbose_has_installed_skggm_sdml_supervised(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore - X, y = make_classification(random_state=42) - sdml = SDML_Supervised() - sdml.fit(X, y) - out, _ = capsys.readouterr() - assert "SDML will use skggm's solver." in out + if has_installed_skggm(): + X, y = make_classification(random_state=42) + sdml = SDML_Supervised() + sdml.fit(X, y) + out, _ = capsys.readouterr() + assert "SDML will use skggm's solver." in out class TestNCA(MetricTestCase): From 1ee8d1f54559bc8754cbe0d7c34e78a97ac87e4d Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:00:16 +0100 Subject: [PATCH 52/75] TST: use better pytest skipif syntax --- test/metric_learn_test.py | 97 +++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 029958d6..7c9dbcd6 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -150,6 +150,9 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): + @pytest.mark.skipif(has_installed_skggm(), + reason="The warning will be thrown only if skggm is " + "not installed.") def test_raises_warning_msg_not_installed_skggm(self): """Tests that the right warning message is raised if someone tries to use SDML but has not installed skggm""" @@ -159,28 +162,35 @@ def test_raises_warning_msg_not_installed_skggm(self): X, y = make_classification(random_state=42) sdml = SDML() sdml_supervised = SDML_Supervised(use_cov=False, balance_param=1e-5) - if not has_installed_skggm(): - msg = ("Warning, skggm is not installed, so SDML will use " - "scikit-learn's graphical_lasso method. It can fail to converge" - "on some non SPD matrices where skggm would converge. If so, " - "try to install skggm. (see the README.md for the right " - "version.)") - with pytest.warns(None) as record: - sdml.fit(pairs, y_pairs) - assert str(record[0].message) == msg - with pytest.warns(None) as record: - sdml_supervised.fit(X, y) - assert str(record[0].message) == msg - else: # otherwise we should be able to instantiate and fit SDML and it - # should raise no warning - with pytest.warns(None) as record: - sdml = SDML() - sdml.fit(pairs, y_pairs) - assert len(record) == 0 - with pytest.warns(None) as record: - sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) - sdml.fit(X, y) - assert len(record) == 0 + msg = ("Warning, skggm is not installed, so SDML will use " + "scikit-learn's graphical_lasso method. It can fail to converge" + "on some non SPD matrices where skggm would converge. If so, " + "try to install skggm. (see the README.md for the right " + "version.)") + with pytest.warns(None) as record: + sdml.fit(pairs, y_pairs) + assert str(record[0].message) == msg + with pytest.warns(None) as record: + sdml_supervised.fit(X, y) + assert str(record[0].message) == msg + + @pytest.mark.skipif(not has_installed_skggm(), + reason="It's only in the case where skggm is installed" + "that no warning should be thrown.") + def test_raises_no_warning_installed_skggm(self): + # otherwise we should be able to instantiate and fit SDML and it + # should raise no warning + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + X, y = make_classification(random_state=42) + with pytest.warns(None) as record: + sdml = SDML() + sdml.fit(pairs, y_pairs) + assert len(record) == 0 + with pytest.warns(None) as record: + sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) + sdml.fit(X, y) + assert len(record) == 0 def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. @@ -235,39 +245,46 @@ def test_sdml_converges_if_psd(self): sdml.fit(pairs, y) assert np.isfinite(sdml.get_mahalanobis_matrix()).all() + @pytest.mark.skipif(not has_installed_skggm(), + reason="sklearn's graphical_lasso can sometimes not " + "work on some non SPD problems. We test that " + "is works only if skggm is installed.") def test_sdml_works_on_non_spd_pb_with_skggm(self): """Test that SDML works on a certain non SPD problem on which we know it should work, but scikit-learn's graphical_lasso does not work""" - if has_installed_skggm(): - X, y = load_iris(return_X_y=True) - sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, - use_cov=True) - sdml.fit(X, y) + X, y = load_iris(return_X_y=True) + sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, + use_cov=True) + sdml.fit(X, y) +@pytest.mark.skipif(not has_installed_skggm(), + reason='The message should be printed only if skggm is ' + 'installed.') def test_verbose_has_installed_skggm_sdml(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML) # TODO: remove if we don't need skggm anymore - if has_installed_skggm(): - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) - y_pairs = [1, -1] - sdml = SDML() - sdml.fit(pairs, y_pairs) - out, _ = capsys.readouterr() - assert "SDML will use skggm's solver." in out + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML() + sdml.fit(pairs, y_pairs) + out, _ = capsys.readouterr() + assert "SDML will use skggm's solver." in out +@pytest.mark.skipif(not has_installed_skggm(), + reason='The message should be printed only if skggm is ' + 'installed.') def test_verbose_has_installed_skggm_sdml_supervised(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore - if has_installed_skggm(): - X, y = make_classification(random_state=42) - sdml = SDML_Supervised() - sdml.fit(X, y) - out, _ = capsys.readouterr() - assert "SDML will use skggm's solver." in out + X, y = make_classification(random_state=42) + sdml = SDML_Supervised() + sdml.fit(X, y) + out, _ = capsys.readouterr() + assert "SDML will use skggm's solver." in out class TestNCA(MetricTestCase): From 03f4158db99788765884d16a89bd5d9d3dc66c73 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:11:59 +0100 Subject: [PATCH 53/75] FIX: fix broken link in README.md --- README.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.rst b/README.rst index e1007287..4663bf77 100644 --- a/README.rst +++ b/README.rst @@ -20,8 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit [a0ed406](https://github.com/\ -skggm/skggm/commit/a0ed406586c4364ea3297a658f415e13b5cbdaf8)) for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit [a0ed406](https://github.com/skggm/skggm/commit/a0ed406586c4364ea3297a658f415e13b5cbdaf8)) for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** From e621e271aee6f656baaa784c059890e831b9653c Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:17:56 +0100 Subject: [PATCH 54/75] use rst syntax for link --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4663bf77..df9eef82 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,8 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit [a0ed406](https://github.com/skggm/skggm/commit/a0ed406586c4364ea3297a658f415e13b5cbdaf8)) for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_ for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** From 0086c9824029de388c17864f91375a6785243be4 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:18:56 +0100 Subject: [PATCH 55/75] use rst syntax for link --- README.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.rst b/README.rst index df9eef82..7f181af2 100644 --- a/README.rst +++ b/README.rst @@ -20,8 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_ for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_ for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** From 001600e617ced6a8b14f9c8492a6663d2cabb6a2 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:19:25 +0100 Subject: [PATCH 56/75] use rst syntax for link --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 7f181af2..13ddca71 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_ for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_) for `SDML` - (for running the examples only: matplotlib) **Installation/Setup** From 8c50a0d570df9c0d7a96d689f5a278f93942494c Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:30:06 +0100 Subject: [PATCH 57/75] MAINT: remove test_sdml that was remaining from drafts tests --- examples/test_sdml.py | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 examples/test_sdml.py diff --git a/examples/test_sdml.py b/examples/test_sdml.py deleted file mode 100644 index 300218bd..00000000 --- a/examples/test_sdml.py +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np -from sklearn.datasets import load_iris -from metric_learn import SDML_Supervised -import matplotlib.pyplot as plt - -dataset = load_iris() -X, y = dataset.data, dataset.target -sdml = SDML_Supervised(num_constraints=200) -sdml.fit(X, y, random_state = np.random.RandomState(1234)) -embeddings = sdml.transform(X) - -plt.figure() -plt.scatter(X[:, 0], X[:, 1], c=y) -plt.show() - -plt.figure() -plt.scatter(embeddings[:, 0], embeddings[:, 1], c=y) -plt.show() \ No newline at end of file From e4132d6d5c4d8973b89f51661e53500079bae1e8 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:35:43 +0100 Subject: [PATCH 58/75] TST: remove skipping SDML in test_cross_validation_manual_vs_scikit --- test/test_sklearn_compat.py | 60 ++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 827c0ed3..f8c6ec34 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -125,39 +125,37 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, same as scikit-learn's cross-validation (some code for generating the folds is taken from scikit-learn). """ - # TODO: remove this check when SDML has become deterministic - if not str(estimator).startswith('SDML'): - if any(hasattr(estimator, method) for method in ["predict", "score"]): - input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - set_random_state(estimator) - n_splits = 3 - kfold = KFold(shuffle=False, n_splits=n_splits) - n_samples = input_data.shape[0] - fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) - fold_sizes[:n_samples % n_splits] += 1 - current = 0 - scores, predictions = [], np.zeros(input_data.shape[0]) - for fold_size in fold_sizes: - start, stop = current, current + fold_size - current = stop - test_slice = slice(start, stop) - train_mask = np.ones(input_data.shape[0], bool) - train_mask[test_slice] = False - y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(input_data[train_mask], y_train) - if hasattr(estimator, "score"): - scores.append(estimator.score(input_data[test_slice], y_test)) - if hasattr(estimator, "predict"): - predictions[test_slice] = estimator.predict(input_data[test_slice]) + if any(hasattr(estimator, method) for method in ["predict", "score"]): + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + n_splits = 3 + kfold = KFold(shuffle=False, n_splits=n_splits) + n_samples = input_data.shape[0] + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes[:n_samples % n_splits] += 1 + current = 0 + scores, predictions = [], np.zeros(input_data.shape[0]) + for fold_size in fold_sizes: + start, stop = current, current + fold_size + current = stop + test_slice = slice(start, stop) + train_mask = np.ones(input_data.shape[0], bool) + train_mask[test_slice] = False + y_train, y_test = labels[train_mask], labels[test_slice] + estimator.fit(input_data[train_mask], y_train) if hasattr(estimator, "score"): - assert all(scores == cross_val_score(estimator, input_data, labels, - cv=kfold)) + scores.append(estimator.score(input_data[test_slice], y_test)) if hasattr(estimator, "predict"): - assert all(predictions == cross_val_predict(estimator, input_data, - labels, - cv=kfold)) + predictions[test_slice] = estimator.predict(input_data[test_slice]) + if hasattr(estimator, "score"): + assert all(scores == cross_val_score(estimator, input_data, labels, + cv=kfold)) + if hasattr(estimator, "predict"): + assert all(predictions == cross_val_predict(estimator, input_data, + labels, + cv=kfold)) def check_score(estimator, tuples, y): From b3bf6a860cea0e1c54204b49252bccaf93ad35cb Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:37:31 +0100 Subject: [PATCH 59/75] FIX link also in getting started --- doc/getting_started.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 8d769f29..0e1d7fb7 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,8 +15,7 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit [a0ed406](https://github.com/\ -skggm/skggm/commit/a0ed406586c4364ea3297a658f415e13b5cbdaf8)) for `SDML` +- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_) for `SDML` - (for running the examples only: matplotlib) **Notes** From 49f3b9ecc65f3d67e02026c2522654218e72d999 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:40:56 +0100 Subject: [PATCH 60/75] Put back right indent --- test/metric_learn_test.py | 208 +++++++++++++++++++------------------- 1 file changed, 104 insertions(+), 104 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 7c9dbcd6..311af484 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -150,112 +150,112 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): - @pytest.mark.skipif(has_installed_skggm(), - reason="The warning will be thrown only if skggm is " - "not installed.") - def test_raises_warning_msg_not_installed_skggm(self): - """Tests that the right warning message is raised if someone tries to - use SDML but has not installed skggm""" - # TODO: remove if we don't need skggm anymore - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) - y_pairs = [1, -1] - X, y = make_classification(random_state=42) + @pytest.mark.skipif(has_installed_skggm(), + reason="The warning will be thrown only if skggm is " + "not installed.") + def test_raises_warning_msg_not_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML but has not installed skggm""" + # TODO: remove if we don't need skggm anymore + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + X, y = make_classification(random_state=42) + sdml = SDML() + sdml_supervised = SDML_Supervised(use_cov=False, balance_param=1e-5) + msg = ("Warning, skggm is not installed, so SDML will use " + "scikit-learn's graphical_lasso method. It can fail to converge" + "on some non SPD matrices where skggm would converge. If so, " + "try to install skggm. (see the README.md for the right " + "version.)") + with pytest.warns(None) as record: + sdml.fit(pairs, y_pairs) + assert str(record[0].message) == msg + with pytest.warns(None) as record: + sdml_supervised.fit(X, y) + assert str(record[0].message) == msg + + @pytest.mark.skipif(not has_installed_skggm(), + reason="It's only in the case where skggm is installed" + "that no warning should be thrown.") + def test_raises_no_warning_installed_skggm(self): + # otherwise we should be able to instantiate and fit SDML and it + # should raise no warning + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + X, y = make_classification(random_state=42) + with pytest.warns(None) as record: sdml = SDML() - sdml_supervised = SDML_Supervised(use_cov=False, balance_param=1e-5) - msg = ("Warning, skggm is not installed, so SDML will use " - "scikit-learn's graphical_lasso method. It can fail to converge" - "on some non SPD matrices where skggm would converge. If so, " - "try to install skggm. (see the README.md for the right " - "version.)") - with pytest.warns(None) as record: - sdml.fit(pairs, y_pairs) - assert str(record[0].message) == msg - with pytest.warns(None) as record: - sdml_supervised.fit(X, y) - assert str(record[0].message) == msg - - @pytest.mark.skipif(not has_installed_skggm(), - reason="It's only in the case where skggm is installed" - "that no warning should be thrown.") - def test_raises_no_warning_installed_skggm(self): - # otherwise we should be able to instantiate and fit SDML and it - # should raise no warning - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) - y_pairs = [1, -1] - X, y = make_classification(random_state=42) - with pytest.warns(None) as record: - sdml = SDML() - sdml.fit(pairs, y_pairs) - assert len(record) == 0 - with pytest.warns(None) as record: - sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) - sdml.fit(X, y) - assert len(record) == 0 - - def test_iris(self): - # Note: this is a flaky test, which fails for certain seeds. - # TODO: un-flake it! - rs = np.random.RandomState(5555) - - sdml = SDML_Supervised(num_constraints=1500, use_cov=False, - balance_param=5e-5) - sdml.fit(self.iris_points, self.iris_labels, random_state=rs) - csep = class_separation(sdml.transform(self.iris_points), - self.iris_labels) - self.assertLess(csep, 0.22) - - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42) - sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, - balance_param=5e-5) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) - - def test_sdml_raises_warning_non_psd(self): - """Tests that SDML raises a warning on a toy example where we know the - pseudo-covariance matrix is not PSD""" - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y = [1, -1] - sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) - msg = ("Warning, the input matrix of graphical lasso is not " - "positive semi-definite (PSD). The algorithm may diverge, " - "and lead to degenerate solutions. " - "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_covariance=False.") - with pytest.warns(ConvergenceWarning) as raised_warning: - try: - sdml.fit(pairs, y) - except Exception: - pass - # we assert that this warning is in one of the warning raised by the - # estimator - assert msg in list(map(lambda w: str(w.message), raised_warning)) - - def test_sdml_converges_if_psd(self): - """Tests that sdml converges on a simple problem where we know the - pseudo-covariance matrix is PSD""" - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) - y = [1, -1] - sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) - sdml.fit(pairs, y) - assert np.isfinite(sdml.get_mahalanobis_matrix()).all() - - @pytest.mark.skipif(not has_installed_skggm(), - reason="sklearn's graphical_lasso can sometimes not " - "work on some non SPD problems. We test that " - "is works only if skggm is installed.") - def test_sdml_works_on_non_spd_pb_with_skggm(self): - """Test that SDML works on a certain non SPD problem on which we know - it should work, but scikit-learn's graphical_lasso does not work""" - X, y = load_iris(return_X_y=True) - sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, - use_cov=True) + sdml.fit(pairs, y_pairs) + assert len(record) == 0 + with pytest.warns(None) as record: + sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) sdml.fit(X, y) + assert len(record) == 0 + + def test_iris(self): + # Note: this is a flaky test, which fails for certain seeds. + # TODO: un-flake it! + rs = np.random.RandomState(5555) + + sdml = SDML_Supervised(num_constraints=1500, use_cov=False, + balance_param=5e-5) + sdml.fit(self.iris_points, self.iris_labels, random_state=rs) + csep = class_separation(sdml.transform(self.iris_points), + self.iris_labels) + self.assertLess(csep, 0.22) + + def test_deprecation_num_labeled(self): + # test that a deprecation message is thrown if num_labeled is set at + # initialization + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42) + sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, + balance_param=5e-5) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') + assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + + def test_sdml_raises_warning_non_psd(self): + """Tests that SDML raises a warning on a toy example where we know the + pseudo-covariance matrix is not PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + msg = ("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.") + with pytest.warns(ConvergenceWarning) as raised_warning: + try: + sdml.fit(pairs, y) + except Exception: + pass + # we assert that this warning is in one of the warning raised by the + # estimator + assert msg in list(map(lambda w: str(w.message), raised_warning)) + + def test_sdml_converges_if_psd(self): + """Tests that sdml converges on a simple problem where we know the + pseudo-covariance matrix is PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + sdml.fit(pairs, y) + assert np.isfinite(sdml.get_mahalanobis_matrix()).all() + + @pytest.mark.skipif(not has_installed_skggm(), + reason="sklearn's graphical_lasso can sometimes not " + "work on some non SPD problems. We test that " + "is works only if skggm is installed.") + def test_sdml_works_on_non_spd_pb_with_skggm(self): + """Test that SDML works on a certain non SPD problem on which we know + it should work, but scikit-learn's graphical_lasso does not work""" + X, y = load_iris(return_X_y=True) + sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, + use_cov=True) + sdml.fit(X, y) @pytest.mark.skipif(not has_installed_skggm(), From e1664c7034ac0b0852ac676d346b10a9794746f7 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 8 Mar 2019 16:47:33 +0100 Subject: [PATCH 61/75] Remove unnecessary changes --- test/test_base_metric.py | 4 +--- test/test_sklearn_compat.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index fed4d780..6c9a6dc5 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -4,7 +4,6 @@ import numpy as np from sklearn import clone from sklearn.utils.testing import set_random_state - from test.test_utils import ids_metric_learners, metric_learners @@ -56,8 +55,7 @@ def test_lsml(self): def test_sdml(self): self.assertEqual(str(metric_learn.SDML()), "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True,\n " - "verbose=False)") + "sparsity_param=0.01, use_cov=True,\n verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index f8c6ec34..f1248c9a 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -16,7 +16,6 @@ from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) from sklearn.utils.testing import _get_args - from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor) From 60866cb6e7df2cdc3f7d7a9edf1e17fd4854dd0b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 09:40:17 +0100 Subject: [PATCH 62/75] Nitpick for concatenation and refactor HAS_SKGGM --- metric_learn/_util.py | 8 -------- metric_learn/constraints.py | 2 +- metric_learn/sdml.py | 12 ++++++++---- test/metric_learn_test.py | 18 +++++++++++------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 82552ff7..bd57fd5f 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -15,14 +15,6 @@ def vector_norm(X): return np.linalg.norm(X, axis=1) -def has_installed_skggm(): - try: - import inverse_covariance - return True - except ImportError: - return False - - def check_input(input_data, y=None, preprocessor=None, type_of_inputs='classic', tuple_size=None, accept_sparse=False, dtype='numeric', order=None, diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index a5487690..e591830b 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -96,6 +96,6 @@ def wrap_pairs(X, constraints): c = np.array(constraints[2]) d = np.array(constraints[3]) constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d)))) - y = np.hstack([np.ones((len(a),)), - np.ones((len(c),))]) + y = np.concatenate([np.ones_like(a), -np.ones_like(c)]) pairs = X[constraints] return pairs, y diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 88aa7776..d54a5ee2 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -18,9 +18,13 @@ from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs -from ._util import transformer_from_metric, has_installed_skggm -if has_installed_skggm(): +from ._util import transformer_from_metric +try: from inverse_covariance import quic +except ImportError: + HAS_SKGGM = False +else: + HAS_SKGGM = True class _BaseSDML(MahalanobisMixin): @@ -55,7 +59,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, super(_BaseSDML, self).__init__(preprocessor) def _fit(self, pairs, y): - if not has_installed_skggm(): + if not HAS_SKGGM: msg = ("Warning, skggm is not installed, so SDML will use " "scikit-learn's graphical_lasso method. It can fail to converge" "on some non SPD matrices where skggm would converge. If so, " @@ -89,7 +93,7 @@ def _fit(self, pairs, y): "`balance_param` and/or to set use_covariance=False.", ConvergenceWarning) sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) - if has_installed_skggm(): + if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 311af484..bf888bd4 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -10,12 +10,16 @@ from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y - +try: + from inverse_covariance import quic +except ImportError: + HAS_SKGGM = False +else: + HAS_SKGGM = True from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised, SDML) # Import this specially for testing. -from metric_learn._util import has_installed_skggm from metric_learn.constraints import wrap_pairs from metric_learn.lmnn import python_LMNN @@ -150,7 +154,7 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): - @pytest.mark.skipif(has_installed_skggm(), + @pytest.mark.skipif(HAS_SKGGM, reason="The warning will be thrown only if skggm is " "not installed.") def test_raises_warning_msg_not_installed_skggm(self): @@ -174,7 +178,7 @@ def test_raises_warning_msg_not_installed_skggm(self): sdml_supervised.fit(X, y) assert str(record[0].message) == msg - @pytest.mark.skipif(not has_installed_skggm(), + @pytest.mark.skipif(not HAS_SKGGM, reason="It's only in the case where skggm is installed" "that no warning should be thrown.") def test_raises_no_warning_installed_skggm(self): @@ -245,7 +249,7 @@ def test_sdml_converges_if_psd(self): sdml.fit(pairs, y) assert np.isfinite(sdml.get_mahalanobis_matrix()).all() - @pytest.mark.skipif(not has_installed_skggm(), + @pytest.mark.skipif(not HAS_SKGGM, reason="sklearn's graphical_lasso can sometimes not " "work on some non SPD problems. We test that " "is works only if skggm is installed.") @@ -258,7 +262,7 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): sdml.fit(X, y) -@pytest.mark.skipif(not has_installed_skggm(), +@pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' 'installed.') def test_verbose_has_installed_skggm_sdml(capsys): @@ -273,7 +277,7 @@ def test_verbose_has_installed_skggm_sdml(capsys): assert "SDML will use skggm's solver." in out -@pytest.mark.skipif(not has_installed_skggm(), +@pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' 'installed.') def test_verbose_has_installed_skggm_sdml_supervised(capsys): From eb9571953a0001d54c2f4a1df47ab1519347f809 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:00:23 +0100 Subject: [PATCH 63/75] ENH: Deal better with errors and skggm/scikit-learn --- metric_learn/sdml.py | 53 +++++++++++----- test/metric_learn_test.py | 130 ++++++++++++++++++++++++++++++++------ 2 files changed, 147 insertions(+), 36 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index d54a5ee2..a7b531bc 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -60,14 +60,11 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, def _fit(self, pairs, y): if not HAS_SKGGM: - msg = ("Warning, skggm is not installed, so SDML will use " - "scikit-learn's graphical_lasso method. It can fail to converge" - "on some non SPD matrices where skggm would converge. If so, " - "try to install skggm. (see the README.md for the right " - "version.)") - warnings.warn(msg) + if self.verbose: + print("SDML will use scikit-learn's graphical lasso solver.") else: - print("SDML will use skggm's solver.") + if self.verbose: + print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') @@ -93,15 +90,39 @@ def _fit(self, pairs, y): "`balance_param` and/or to set use_covariance=False.", ConvergenceWarning) sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) - if HAS_SKGGM: - theta0 = pinvh(sigma0) - M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, - msg=self.verbose, - Theta0=theta0, Sigma0=sigma0) - else: - _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, - verbose=self.verbose, - cov_init=sigma0) + try: + if HAS_SKGGM: + theta0 = pinvh(sigma0) + M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, + msg=self.verbose, + Theta0=theta0, Sigma0=sigma0) + else: + _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, + verbose=self.verbose, + cov_init=sigma0) + raised_error = None + w_mahalanobis, _ = np.linalg.eigh(M) + not_spd = any(w_mahalanobis < 0.) + not_finite = not np.isfinite(M).all() + except Exception as e: + raised_error = e + not_spd = False # not_spd not applicable here so we set to False + not_finite = False # not_finite not applicable here so we set to False + if raised_error is not None or not_spd or not_finite: + msg = ("There was a problem in SDML when using {}'s graphical " + "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn") + if not HAS_SKGGM: + skggm_advice = (" skggm's graphical lasso can sometimes converge " + "on non SPD cases where scikit-learn's graphical " + "lasso fails to converge. Try to install skggm and " + "rerun the algorithm (see the README.md for the " + "right version of skggm).") + msg += skggm_advice + if raised_error is not None: + msg += " The following error message was thrown: {}.".format( + raised_error) + raise RuntimeError(msg) + self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index bf888bd4..0db156e8 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -155,28 +155,89 @@ def test_no_twice_same_objective(capsys): class TestSDML(MetricTestCase): @pytest.mark.skipif(HAS_SKGGM, - reason="The warning will be thrown only if skggm is " + reason="The warning can be thrown only if skggm is " "not installed.") - def test_raises_warning_msg_not_installed_skggm(self): + def test_sdml_supervised_raises_warning_msg_not_installed_skggm(self): """Tests that the right warning message is raised if someone tries to - use SDML but has not installed skggm""" + use SDML_Supervised but has not installed skggm, and that the algorithm + fails to converge""" # TODO: remove if we don't need skggm anymore - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + # load_iris: dataset where we know scikit-learn's graphical lasso fails + # with a Floating Point error + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True, + sparsity_param=0.01) + msg = ("There was a problem in SDML when using scikit-learn's graphical " + "lasso solver. skggm's graphical lasso can sometimes converge on " + "non SPD cases where scikit-learn's graphical lasso fails to " + "converge. Try to install skggm and rerun the algorithm (see " + "the README.md for the right version of skggm). The following " + "error message was thrown:") + with pytest.raises(RuntimeError) as raised_error: + sdml_supervised.fit(X, y) + assert str(raised_error.value).startswith(msg) + + @pytest.mark.skipif(HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "not installed.") + def test_sdml_raises_warning_msg_not_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML but has not installed skggm, and that the algorithm fails to + converge""" + # TODO: remove if we don't need skggm anymore + # case on which we know that scikit-learn's graphical lasso fails + # because it will return a non SPD matrix + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] - X, y = make_classification(random_state=42) - sdml = SDML() - sdml_supervised = SDML_Supervised(use_cov=False, balance_param=1e-5) - msg = ("Warning, skggm is not installed, so SDML will use " - "scikit-learn's graphical_lasso method. It can fail to converge" - "on some non SPD matrices where skggm would converge. If so, " - "try to install skggm. (see the README.md for the right " - "version.)") - with pytest.warns(None) as record: + sdml = SDML(use_cov=False, balance_param=100, verbose=True) + + msg = ("There was a problem in SDML when using scikit-learn's graphical " + "lasso solver. skggm's graphical lasso can sometimes converge on " + "non SPD cases where scikit-learn's graphical lasso fails to " + "converge. Try to install skggm and rerun the algorithm (see " + "the README.md for the right version of skggm).") + with pytest.raises(RuntimeError) as raised_error: sdml.fit(pairs, y_pairs) - assert str(record[0].message) == msg - with pytest.warns(None) as record: + assert msg == str(raised_error.value) + + @pytest.mark.skipif(not HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "installed.") + def test_sdml_raises_warning_msg_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML but has not installed skggm, and that the algorithm fails to + converge""" + # TODO: remove if we don't need skggm anymore + # case on which we know that skggm's graphical lasso fails + # because it will return non finite values + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(use_cov=False, balance_param=100, verbose=True) + + msg = ("There was a problem in SDML when using skggm's graphical " + "lasso solver.") + with pytest.raises(RuntimeError) as raised_error: + sdml.fit(pairs, y_pairs) + assert msg == str(raised_error.value) + + @pytest.mark.skipif(not HAS_SKGGM, + reason="The warning can be thrown only if skggm is " + "installed.") + def test_sdml_supervised_raises_warning_msg_installed_skggm(self): + """Tests that the right warning message is raised if someone tries to + use SDML_Supervised but has not installed skggm, and that the algorithm + fails to converge""" + # TODO: remove if we don't need skggm anymore + # case on which we know that skggm's graphical lasso fails + # because it will return non finite values + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True, + sparsity_param=0.01) + msg = ("There was a problem in SDML when using skggm's graphical " + "lasso solver.") + with pytest.raises(RuntimeError) as raised_error: sdml_supervised.fit(X, y) - assert str(record[0].message) == msg + assert msg == str(raised_error.value) @pytest.mark.skipif(not HAS_SKGGM, reason="It's only in the case where skggm is installed" @@ -271,10 +332,10 @@ def test_verbose_has_installed_skggm_sdml(capsys): # TODO: remove if we don't need skggm anymore pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] - sdml = SDML() + sdml = SDML(verbose=True) sdml.fit(pairs, y_pairs) out, _ = capsys.readouterr() - assert "SDML will use skggm's solver." in out + assert "SDML will use skggm's graphical lasso solver." in out @pytest.mark.skipif(not HAS_SKGGM, @@ -285,10 +346,39 @@ def test_verbose_has_installed_skggm_sdml_supervised(capsys): # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore X, y = make_classification(random_state=42) - sdml = SDML_Supervised() + sdml = SDML_Supervised(verbose=True) + sdml.fit(X, y) + out, _ = capsys.readouterr() + assert "SDML will use skggm's graphical lasso solver." in out + + +@pytest.mark.skipif(HAS_SKGGM, + reason='The message should be printed only if skggm is ' + 'not installed.') +def test_verbose_has_not_installed_skggm_sdml(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML) + # TODO: remove if we don't need skggm anymore + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) + y_pairs = [1, -1] + sdml = SDML(verbose=True) + sdml.fit(pairs, y_pairs) + out, _ = capsys.readouterr() + assert "SDML will use scikit-learn's graphical lasso solver." in out + + +@pytest.mark.skipif(HAS_SKGGM, + reason='The message should be printed only if skggm is ' + 'not installed.') +def test_verbose_has_not_installed_skggm_sdml_supervised(capsys): + # Test that if users have installed skggm, a message is printed telling them + # skggm's solver is used (when they use SDML_Supervised) + # TODO: remove if we don't need skggm anymore + X, y = make_classification(random_state=42) + sdml = SDML_Supervised(verbose=True, balance_param=1e-5, use_cov=False) sdml.fit(X, y) out, _ = capsys.readouterr() - assert "SDML will use skggm's solver." in out + assert "SDML will use scikit-learn's graphical lasso solver." in out class TestNCA(MetricTestCase): From 4d61dbac453226136e2fa1b1c36ac39332012ca9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:03:22 +0100 Subject: [PATCH 64/75] Better creation of prior --- metric_learn/sdml.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index a7b531bc..56889706 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -68,15 +68,15 @@ def _fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') - # set up prior M + # set up (the inverse of) the prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) - prior = pinvh(np.atleast_2d(np.cov(X, rowvar=False))) + prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) else: - prior = np.identity(pairs.shape[2]) + prior_inv = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) - emp_cov = pinvh(prior) + self.balance_param * loss_matrix + emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon From 71a02e0d69c08b55a7d2bfd0d72e3f5c05faa527 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:22:01 +0100 Subject: [PATCH 65/75] Simplification for init of sdml --- metric_learn/sdml.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 56889706..590fbfb2 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -82,14 +82,17 @@ def _fit(self, pairs, y): # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) - if any(w < 0.): + min_eigval = np.min(w) + if min_eigval < 0.: warnings.warn("Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_covariance=False.", ConvergenceWarning) - sigma0 = (V * (w - min(0, np.min(w)) + 1e-10)).dot(V.T) + w -= min_eigval # we translate the eigenvalues to make them all positive + w += 1e-10 # we add a small offset to avoid definiteness problems + sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) From 1e6d44022b6c7ffcbf51c5a71163f9fa59c92832 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:25:47 +0100 Subject: [PATCH 66/75] Put skggm as optional --- README.rst | 9 +++++++-- doc/getting_started.rst | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 681d3171..8e0a4512 100644 --- a/README.rst +++ b/README.rst @@ -20,8 +20,13 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_) for `SDML` -- (for running the examples only: matplotlib) +- numpy, scipy, scikit-learn + +**Optional dependencies** + +- For SDML, convergence will be better in some cases with skggm +(install from commit `a0ed406 `_). +- For running the examples only: matplotlib **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 0e1d7fb7..2d81b213 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,8 +15,13 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn, and skggm (commit `a0ed406 `_) for `SDML` -- (for running the examples only: matplotlib) +- numpy, scipy, scikit-learn + +**Optional dependencies** + +- For SDML, convergence will be better in some cases with skggm +(install from commit `a0ed406 `_). +- For running the examples only: matplotlib **Notes** From a7ed1bbdf23907c5bbfc8a176e1214e6b0a2f63b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:29:12 +0100 Subject: [PATCH 67/75] Specify skggm version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0f9fccc9..dfb20fc0 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ extras_require=dict( docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], demo=['matplotlib'], - sdml=['skggm'] + sdml=['skggm>=0.2.9'] ), test_suite='test', keywords=[ From 31072d326620dce295669a45108f336d138459a0 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:41:59 +0100 Subject: [PATCH 68/75] TST: make test about 1 feature arrays more readable --- test/test_mahalanobis_mixin.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index d4bed3fe..a0bf3b9d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -10,6 +10,8 @@ from sklearn.utils.testing import set_random_state from metric_learn._util import make_context +from metric_learn.base_metric import (_QuadrupletsClassifierMixin, + _PairsClassifierMixin) from test.test_utils import ids_metric_learners, metric_learners @@ -283,13 +285,17 @@ def test_transformer_is_2D(estimator, build_dataset): trunc_data = input_data[..., :1] # we drop duplicates that might have been formed, i.e. of the form # aabc or abcc or aabb for quadruplets, and aa for pairs. - slices = {4: [slice(0, 2), slice(2, 4)], 2: [slice(0, 2)]} - if trunc_data.ndim == 3: - for slice_idx in slices[trunc_data.shape[1]]: + if isinstance(estimator, _QuadrupletsClassifierMixin): + for slice_idx in [slice(0, 2), slice(2, 4)]: pairs = trunc_data[:, slice_idx, :] diffs = pairs[:, 1, :] - pairs[:, 0, :] - to_keep = np.nonzero(diffs.ravel()) + to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) trunc_data = trunc_data[to_keep] labels = labels[to_keep] + elif isinstance(estimator, _PairsClassifierMixin): + diffs = trunc_data[:, 1, :] - trunc_data[:, 0, :] + to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) + trunc_data = trunc_data[to_keep] + labels = labels[to_keep] model.fit(trunc_data, labels) assert model.transformer_.shape == (1, 1) # the transformer must be 2D From 000f29ac7af87971c1198db590d161b33e8f7699 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:44:58 +0100 Subject: [PATCH 69/75] DOC: fix rst formatting --- README.rst | 2 +- doc/getting_started.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 8e0a4512..9bb3ca15 100644 --- a/README.rst +++ b/README.rst @@ -25,7 +25,7 @@ Metric Learning algorithms in Python. **Optional dependencies** - For SDML, convergence will be better in some cases with skggm -(install from commit `a0ed406 `_). + (install from commit `a0ed406 `_). - For running the examples only: matplotlib **Installation/Setup** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 2d81b213..6f54a0ba 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -20,7 +20,7 @@ Alternately, download the source repository and run: **Optional dependencies** - For SDML, convergence will be better in some cases with skggm -(install from commit `a0ed406 `_). + (install from commit `a0ed406 `_). - For running the examples only: matplotlib **Notes** From 169dccff1e99b632d8e355ddfca0709c333d5cdf Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 18 Mar 2019 15:47:36 +0100 Subject: [PATCH 70/75] DOC: reformulated skggm optional dependency --- README.rst | 2 +- doc/getting_started.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 9bb3ca15..e1bfca51 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ Metric Learning algorithms in Python. **Optional dependencies** -- For SDML, convergence will be better in some cases with skggm +- For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). - For running the examples only: matplotlib diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 6f54a0ba..2d2df25e 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -19,7 +19,7 @@ Alternately, download the source repository and run: **Optional dependencies** -- For SDML, convergence will be better in some cases with skggm +- For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). - For running the examples only: matplotlib From bfb0f8f3b5b5624949375b854770b49f1b363527 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 20 Mar 2019 09:36:00 +0100 Subject: [PATCH 71/75] TST: give an example for sdml_supervised with skggm where it indeed fails --- test/metric_learn_test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 0db156e8..ae9a8657 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -230,13 +230,17 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self): # TODO: remove if we don't need skggm anymore # case on which we know that skggm's graphical lasso fails # because it will return non finite values - X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True, + rng = np.random.RandomState(42) + # This example will create a diagonal em_cov with a negative coeff ( + # pathological case) + X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) + y = [0, 0, 1, 1] + sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=False, sparsity_param=0.01) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") with pytest.raises(RuntimeError) as raised_error: - sdml_supervised.fit(X, y) + sdml_supervised.fit(X, y, random_state=rng) assert msg == str(raised_error.value) @pytest.mark.skipif(not HAS_SKGGM, From 6f5666b587219895e3e0c516a8796bd0cfe1979e Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 20 Mar 2019 15:54:24 +0100 Subject: [PATCH 72/75] TST: fix test that fails weirdly when executing the whole test file and not just the test --- test/metric_learn_test.py | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index ae9a8657..6d4f9cbe 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -285,26 +285,6 @@ def test_deprecation_num_labeled(self): 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) - def test_sdml_raises_warning_non_psd(self): - """Tests that SDML raises a warning on a toy example where we know the - pseudo-covariance matrix is not PSD""" - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y = [1, -1] - sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) - msg = ("Warning, the input matrix of graphical lasso is not " - "positive semi-definite (PSD). The algorithm may diverge, " - "and lead to degenerate solutions. " - "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_covariance=False.") - with pytest.warns(ConvergenceWarning) as raised_warning: - try: - sdml.fit(pairs, y) - except Exception: - pass - # we assert that this warning is in one of the warning raised by the - # estimator - assert msg in list(map(lambda w: str(w.message), raised_warning)) - def test_sdml_converges_if_psd(self): """Tests that sdml converges on a simple problem where we know the pseudo-covariance matrix is PSD""" @@ -385,6 +365,27 @@ def test_verbose_has_not_installed_skggm_sdml_supervised(capsys): assert "SDML will use scikit-learn's graphical lasso solver." in out +def test_sdml_raises_warning_non_psd(): + """Tests that SDML raises a warning on a toy example where we know the + pseudo-covariance matrix is not PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + msg = ("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.") + with pytest.warns(ConvergenceWarning) as raised_warning: + try: + sdml.fit(pairs, y) + except Exception: + pass + # we assert that this warning is in one of the warning raised by the + # estimator + assert msg in list(map(lambda w: str(w.message), raised_warning)) + + class TestNCA(MetricTestCase): def test_iris(self): n = self.iris_points.shape[0] From 0973ef22a067b2fc171921c1ed558fd3d7b31d59 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 20 Mar 2019 18:53:34 +0100 Subject: [PATCH 73/75] Revert "TST: fix test that fails weirdly when executing the whole test file and not just the test" This reverts commit 6f5666b587219895e3e0c516a8796bd0cfe1979e. --- test/metric_learn_test.py | 41 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 6d4f9cbe..ae9a8657 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -285,6 +285,26 @@ def test_deprecation_num_labeled(self): 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + def test_sdml_raises_warning_non_psd(self): + """Tests that SDML raises a warning on a toy example where we know the + pseudo-covariance matrix is not PSD""" + pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) + y = [1, -1] + sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) + msg = ("Warning, the input matrix of graphical lasso is not " + "positive semi-definite (PSD). The algorithm may diverge, " + "and lead to degenerate solutions. " + "To prevent that, try to decrease the balance parameter " + "`balance_param` and/or to set use_covariance=False.") + with pytest.warns(ConvergenceWarning) as raised_warning: + try: + sdml.fit(pairs, y) + except Exception: + pass + # we assert that this warning is in one of the warning raised by the + # estimator + assert msg in list(map(lambda w: str(w.message), raised_warning)) + def test_sdml_converges_if_psd(self): """Tests that sdml converges on a simple problem where we know the pseudo-covariance matrix is PSD""" @@ -365,27 +385,6 @@ def test_verbose_has_not_installed_skggm_sdml_supervised(capsys): assert "SDML will use scikit-learn's graphical lasso solver." in out -def test_sdml_raises_warning_non_psd(): - """Tests that SDML raises a warning on a toy example where we know the - pseudo-covariance matrix is not PSD""" - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y = [1, -1] - sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) - msg = ("Warning, the input matrix of graphical lasso is not " - "positive semi-definite (PSD). The algorithm may diverge, " - "and lead to degenerate solutions. " - "To prevent that, try to decrease the balance parameter " - "`balance_param` and/or to set use_covariance=False.") - with pytest.warns(ConvergenceWarning) as raised_warning: - try: - sdml.fit(pairs, y) - except Exception: - pass - # we assert that this warning is in one of the warning raised by the - # estimator - assert msg in list(map(lambda w: str(w.message), raised_warning)) - - class TestNCA(MetricTestCase): def test_iris(self): n = self.iris_points.shape[0] From df2ae9cd864490e2c3b01d4b552e12bf7c5809d6 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 09:18:21 +0100 Subject: [PATCH 74/75] Add coverage for all versions of python --- .travis.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9cd21d25..ba6f939b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,10 +18,9 @@ before_install: then pip install pytest-cov; fi script: - - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; - then pytest test --cov; - else pytest test; - fi + # we do coverage for all versions so that codecov will merge them: this + # way we will see that both paths (with or without skggm) are tested + - pytest test --cov; after_success: - bash <(curl -s https://codecov.io/bash) From 9683934d1505b6a40f01a9cc7787c3a305f9de57 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 11:25:25 +0100 Subject: [PATCH 75/75] Install pytest-cov for all versions --- .travis.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index ba6f939b..f5527089 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,15 +8,11 @@ python: before_install: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - - pip install wheel - - pip install cython numpy scipy scikit-learn codecov + - pip install wheel cython numpy scipy scikit-learn codecov pytest-cov - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || ($TRAVIS_PYTHON_VERSION == "2.7")]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; fi - - if [[ $TRAVIS_PYTHON_VERSION == "3.4" ]]; - then pip install pytest-cov; - fi script: # we do coverage for all versions so that codecov will merge them: this # way we will see that both paths (with or without skggm) are tested