diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index f9cd0e91..0e488749 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -105,7 +105,7 @@ def fit(self, X, y): # objective than the previous L, following the gradient: while True: # the next point next_L to try out is found by a gradient step - L_next = L - 2 * learn_rate * G + L_next = L - learn_rate * G # we compute the objective at next point # we copy variables that can be modified by _loss_grad, because if we # retry we don t want to modify them several times @@ -191,10 +191,11 @@ def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, # do the gradient update assert not np.isnan(df).any() G = dfG * reg + df * (1 - reg) + G = L.dot(G) # compute the objective function objective = total_active * (1 - reg) - objective += G.flatten().dot(L.T.dot(L).flatten()) - return G, objective, total_active, df, a1, a2 + objective += G.flatten().dot(L.flatten()) + return 2 * G, objective, total_active, df, a1, a2 def _select_targets(self, X, label_inds): target_neighbors = np.empty((X.shape[0], self.k), dtype=int) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index a785d60d..bf079511 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -2,7 +2,7 @@ import re import pytest import numpy as np -from scipy.optimize import check_grad +from scipy.optimize import check_grad, approx_fprime from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.datasets import load_iris, make_classification, make_regression @@ -21,7 +21,7 @@ RCA_Supervised, MMC_Supervised, SDML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs -from metric_learn.lmnn import python_LMNN +from metric_learn.lmnn import python_LMNN, _sum_outer_products def class_separation(X, labels): @@ -120,6 +120,98 @@ def test_iris(self): self.iris_labels) self.assertLess(csep, 0.25) + def test_loss_grad_lbfgs(self): + """Test gradient of loss function + Assert that the gradient is almost equal to its finite differences + approximation. + """ + rng = np.random.RandomState(42) + X, y = make_classification(random_state=rng) + L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1]) + lmnn = LMNN() + + k = lmnn.k + reg = lmnn.regularization + + X, y = lmnn._prepare_inputs(X, y, dtype=float, + ensure_min_samples=2) + num_pts, num_dims = X.shape + unique_labels, label_inds = np.unique(y, return_inverse=True) + lmnn.labels_ = np.arange(len(unique_labels)) + lmnn.transformer_ = np.eye(num_dims) + + target_neighbors = lmnn._select_targets(X, label_inds) + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) + + # sum outer products + dfG = _sum_outer_products(X, target_neighbors.flatten(), + np.repeat(np.arange(X.shape[0]), k)) + df = np.zeros_like(dfG) + + # storage + a1 = [None]*k + a2 = [None]*k + for nn_idx in xrange(k): + a1[nn_idx] = np.array([]) + a2[nn_idx] = np.array([]) + + # initialize L + def loss_grad(flat_L): + return lmnn._loss_grad(X, flat_L.reshape(-1, X.shape[1]), dfG, impostors, + 1, k, reg, target_neighbors, df.copy(), + list(a1), list(a2)) + + def fun(x): + return loss_grad(x)[1] + + def grad(x): + return loss_grad(x)[0].ravel() + + # compute relative error + epsilon = np.sqrt(np.finfo(float).eps) + rel_diff = (check_grad(fun, grad, L.ravel()) / + np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) + np.testing.assert_almost_equal(rel_diff, 0., decimal=5) + + +@pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), + [1, 1, 0, 0], 3.0), + (np.array([[0], [1], [2], [3]]), + [1, 0, 0, 1], 26.)]) +def test_toy_ex_lmnn(X, y, loss): + """Test that the loss give the right result on a toy example""" + L = np.array([[1]]) + lmnn = LMNN(k=1, regularization=0.5) + + k = lmnn.k + reg = lmnn.regularization + + X, y = lmnn._prepare_inputs(X, y, dtype=float, + ensure_min_samples=2) + num_pts, num_dims = X.shape + unique_labels, label_inds = np.unique(y, return_inverse=True) + lmnn.labels_ = np.arange(len(unique_labels)) + lmnn.transformer_ = np.eye(num_dims) + + target_neighbors = lmnn._select_targets(X, label_inds) + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) + + # sum outer products + dfG = _sum_outer_products(X, target_neighbors.flatten(), + np.repeat(np.arange(X.shape[0]), k)) + df = np.zeros_like(dfG) + + # storage + a1 = [None]*k + a2 = [None]*k + for nn_idx in xrange(k): + a1[nn_idx] = np.array([]) + a2[nn_idx] = np.array([]) + + # assert that the loss equals the one computed by hand + assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, k, + reg, target_neighbors, df, a1, a2)[1] == loss + def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with @@ -421,7 +513,9 @@ def grad(M): return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() # compute relative error - rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) + epsilon = np.sqrt(np.finfo(float).eps) + rel_diff = (check_grad(fun, grad, M.ravel()) / + np.linalg.norm(approx_fprime(M.ravel(), fun, epsilon))) np.testing.assert_almost_equal(rel_diff, 0., decimal=6) def test_simple_example(self):