-
Notifications
You must be signed in to change notification settings - Fork 228
Fix covariance initialization when matrix is not invertible #277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3ef4f9c
2c1e308
eff07d4
bd690c8
0982559
baad5a7
80ab922
66e84d8
5787734
9e7ed19
f6dd83a
1e4664f
5abab24
6b3b02e
a5c476b
f2bc3e2
d1d7ec4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,12 @@ | |
from scipy.stats import ortho_group | ||
from sklearn import clone | ||
from sklearn.cluster import DBSCAN | ||
from sklearn.datasets import make_spd_matrix | ||
from sklearn.utils import check_random_state | ||
from sklearn.datasets import make_spd_matrix, make_blobs | ||
from sklearn.utils import check_random_state, shuffle | ||
from sklearn.utils.multiclass import type_of_target | ||
from sklearn.utils.testing import set_random_state | ||
|
||
from metric_learn._util import make_context | ||
from metric_learn._util import make_context, _initialize_metric_mahalanobis | ||
from metric_learn.base_metric import (_QuadrupletsClassifierMixin, | ||
_PairsClassifierMixin) | ||
from metric_learn.exceptions import NonPSDError | ||
|
@@ -569,7 +569,7 @@ def test_init_mahalanobis(estimator, build_dataset): | |
in zip(ids_metric_learners, | ||
metric_learners) | ||
if idml[:4] in ['ITML', 'SDML', 'LSML']]) | ||
def test_singular_covariance_init_or_prior(estimator, build_dataset): | ||
def test_singular_covariance_init_or_prior_strictpd(estimator, build_dataset): | ||
"""Tests that when using the 'covariance' init or prior, it returns the | ||
appropriate error if the covariance matrix is singular, for algorithms | ||
that need a strictly PD prior or init (see | ||
|
@@ -603,6 +603,48 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): | |
assert str(raised_err.value) == msg | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize('estimator, build_dataset', | ||
[(ml, bd) for idml, (ml, bd) | ||
in zip(ids_metric_learners, | ||
metric_learners) | ||
if idml[:3] in ['MMC']], | ||
ids=[idml for idml, (ml, _) | ||
in zip(ids_metric_learners, | ||
metric_learners) | ||
if idml[:3] in ['MMC']]) | ||
def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset): | ||
"""Tests that when using the 'covariance' init or prior, it returns the | ||
appropriate warning if the covariance matrix is singular, for algorithms | ||
that don't need a strictly PD init. Also checks that the returned | ||
inverse matrix has finite values | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not what you check, right? You seem to check the learned There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are totally right! Forgot to set the max iteration in order to make it stop before the first iteration. I called Edit: It isn't possible to set |
||
""" | ||
input_data, labels, _, X = build_dataset() | ||
model = clone(estimator) | ||
set_random_state(model) | ||
# We create a feature that is a linear combination of the first two | ||
# features: | ||
input_data = np.concatenate([input_data, input_data[:, ..., :2].dot([[2], | ||
[3]])], | ||
axis=-1) | ||
model.set_params(init='covariance') | ||
msg = ('The covariance matrix is not invertible: ' | ||
'using the pseudo-inverse instead.' | ||
'To make the covariance matrix invertible' | ||
' you can remove any linearly dependent features and/or ' | ||
'reduce the dimensionality of your input, ' | ||
'for instance using `sklearn.decomposition.PCA` as a ' | ||
'preprocessing step.') | ||
with pytest.warns(UserWarning) as raised_warning: | ||
model.fit(input_data, labels) | ||
assert np.any([str(warning.message) == msg for warning in raised_warning]) | ||
M, _ = _initialize_metric_mahalanobis(X, init='covariance', | ||
random_state=RNG, | ||
return_inverse=True, | ||
strict_pd=False) | ||
assert np.isfinite(M).all() | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize('estimator, build_dataset', | ||
[(ml, bd) for idml, (ml, bd) | ||
|
@@ -614,7 +656,7 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): | |
metric_learners) | ||
if idml[:4] in ['ITML', 'SDML', 'LSML']]) | ||
@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) | ||
def test_singular_array_init_or_prior(estimator, build_dataset, w0): | ||
def test_singular_array_init_or_prior_strictpd(estimator, build_dataset, w0): | ||
"""Tests that when using a custom array init (or prior), it returns the | ||
appropriate error if it is singular, for algorithms | ||
that need a strictly PD prior or init (see | ||
|
@@ -654,6 +696,31 @@ def test_singular_array_init_or_prior(estimator, build_dataset, w0): | |
assert str(raised_err.value) == msg | ||
|
||
|
||
@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) | ||
def test_singular_array_init_of_non_strict_pd(w0): | ||
"""Tests that when using a custom array init, it returns the | ||
appropriate warning if it is singular. Also checks if the returned | ||
inverse matrix is finite. This isn't checked for model fitting as no | ||
model curently uses this setting. | ||
""" | ||
rng = np.random.RandomState(42) | ||
X, y = shuffle(*make_blobs(random_state=rng), | ||
random_state=rng) | ||
P = ortho_group.rvs(X.shape[1], random_state=rng) | ||
w = np.abs(rng.randn(X.shape[1])) | ||
w[0] = w0 | ||
M = P.dot(np.diag(w)).dot(P.T) | ||
Comment on lines
+707
to
+712
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems a bit convoluted just to generated a singular matrix? You could just draw a rectangular matrix There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with you, just did it that way because it was done like that on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good |
||
msg = ('The initialization matrix is not invertible: ' | ||
'using the pseudo-inverse instead.') | ||
with pytest.warns(UserWarning) as raised_warning: | ||
_, M_inv = _initialize_metric_mahalanobis(X, init=M, | ||
random_state=rng, | ||
return_inverse=True, | ||
strict_pd=False) | ||
assert str(raised_warning[0].message) == msg | ||
assert np.isfinite(M_inv).all() | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize('estimator, build_dataset', metric_learners, | ||
ids=ids_metric_learners) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why did you turn off
check_finite
? I think it is better to keep itThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
check_array()
does it previously, so it shouldn't be necessary to do it again.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed!