From ee31bef8da7efbe49a066ae233367aea5e9f1d70 Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Wed, 10 Jan 2024 15:28:39 +0100 Subject: [PATCH 1/9] Optimization of memory for sparse Modified LLE --- sklearn/manifold/_locally_linear.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 0547d2bee1402..fc2bec3eb3795 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -8,7 +8,7 @@ import numpy as np from scipy.linalg import eigh, qr, solve, svd -from scipy.sparse import csr_matrix, eye +from scipy.sparse import csr_matrix, lil_matrix, eye from scipy.sparse.linalg import eigsh from ..base import ( @@ -466,7 +466,11 @@ def locally_linear_embedding( # Now calculate M. # This is the [N x N] matrix whose null space is the desired embedding - M = np.zeros((N, N), dtype=np.float64) + if M_sparse: + M = lil_matrix((N, N), dtype=np.float64) + else: + M = np.zeros((N, N), dtype=np.float64) + for i in range(N): s_i = s_range[i] @@ -502,11 +506,17 @@ def locally_linear_embedding( M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T) Wi_sum1 = Wi.sum(1) M[i, neighbors[i]] -= Wi_sum1 - M[neighbors[i], i] -= Wi_sum1 + + if M_sparse: + for idx in range(len(neighbors[i])): + M[neighbors[i][idx], i] -= Wi_sum1[idx] + else: + M[neighbors[i], i] -= Wi_sum1 + M[i, i] += s_i if M_sparse: - M = csr_matrix(M) + M = M.tocsr() elif method == "ltsa": neighbors = nbrs.kneighbors( From 0c30c1817c9cba2a530e833061144fce50bbf1bc Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Wed, 10 Jan 2024 15:47:18 +0100 Subject: [PATCH 2/9] Optimization of memory for sparse Modified LLE, black & ruff fixes --- sklearn/manifold/_locally_linear.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index fc2bec3eb3795..941c829f8d3f4 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -8,7 +8,7 @@ import numpy as np from scipy.linalg import eigh, qr, solve, svd -from scipy.sparse import csr_matrix, lil_matrix, eye +from scipy.sparse import csr_matrix, eye, lil_matrix from scipy.sparse.linalg import eigsh from ..base import ( @@ -512,7 +512,7 @@ def locally_linear_embedding( M[neighbors[i][idx], i] -= Wi_sum1[idx] else: M[neighbors[i], i] -= Wi_sum1 - + M[i, i] += s_i if M_sparse: From 97132d2c0d6bad89ee56750bce19c026f94c35a1 Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Fri, 12 Jan 2024 10:14:32 +0100 Subject: [PATCH 3/9] Optimize memory usage for sparse matrices in LLE --- doc/whats_new/v1.5.rst | 6 ++++++ sklearn/manifold/_locally_linear.py | 20 ++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index f7a521ca4f0d0..1087c115fc7df 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -31,6 +31,12 @@ Changelog by passing a function in place of a strategy name. :pr:`28053` by :user:`Mark Elliot `. +:mod:`sklearn.manifold` +..................... +- |Efficiency| :class:`LocallyLinearEmbedding` now allocates more efficiently + the memory of sparse matrices in the Hessian, Modified and LTSA methods. + :pr:`28096` by :user:`Giorgio Angelotti `. + Code and Documentation Contributors ----------------------------------- diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 941c829f8d3f4..0612ce5742ae6 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -367,7 +367,10 @@ def locally_linear_embedding( Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64) Yi[:, 0] = 1 - M = np.zeros((N, N), dtype=np.float64) + if M_sparse: + M = lil_matrix((N, N), dtype=np.float64) + else: + M = np.zeros((N, N), dtype=np.float64) use_svd = n_neighbors > d_in @@ -401,7 +404,7 @@ def locally_linear_embedding( M[nbrs_x, nbrs_y] += np.dot(w, w.T) if M_sparse: - M = csr_matrix(M) + M = M.tocsr() elif method == "modified": if n_neighbors < n_components: @@ -524,7 +527,10 @@ def locally_linear_embedding( ) neighbors = neighbors[:, 1:] - M = np.zeros((N, N)) + if M_sparse: + M = lil_matrix((N, N), dtype=np.float64) + else: + M = np.zeros((N, N), dtype=np.float64) use_svd = n_neighbors > d_in @@ -547,7 +553,13 @@ def locally_linear_embedding( nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] -= GiGiT - M[neighbors[i], neighbors[i]] += 1 + + for idx1 in range(len(neighbors[i])): + for idx2 in range(len(neighbors[i])): + M[neighbors[i][idx1], neighbors[i][idx2]] += 1 + + if M_sparse: + M = M.tocsr() return null_space( M, From d2acf4f16f16115b450c94d1a1641d12de4de77d Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Fri, 12 Jan 2024 10:27:00 +0100 Subject: [PATCH 4/9] Memory usage LLE sparse matrices: numpy optimization --- sklearn/manifold/_locally_linear.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 0612ce5742ae6..ee9f68693d2ef 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -554,9 +554,12 @@ def locally_linear_embedding( nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] -= GiGiT - for idx1 in range(len(neighbors[i])): - for idx2 in range(len(neighbors[i])): - M[neighbors[i][idx1], neighbors[i][idx2]] += 1 + if M_sparse: + for idx1 in range(len(neighbors[i])): + for idx2 in range(len(neighbors[i])): + M[neighbors[i][idx1], neighbors[i][idx2]] += 1 + else: + M[neighbors[i], neighbors[i]] += 1 if M_sparse: M = M.tocsr() From cfe5b38a342087d6827bfe0a82ef385572346d67 Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Fri, 12 Jan 2024 11:37:19 +0100 Subject: [PATCH 5/9] fixing doc --- doc/whats_new/v1.5.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 1087c115fc7df..17edca71eca80 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -32,8 +32,8 @@ Changelog :pr:`28053` by :user:`Mark Elliot `. :mod:`sklearn.manifold` -..................... -- |Efficiency| :class:`LocallyLinearEmbedding` now allocates more efficiently +....................... +- |Efficiency| :func:`locally_linear_embedding` now allocates more efficiently the memory of sparse matrices in the Hessian, Modified and LTSA methods. :pr:`28096` by :user:`Giorgio Angelotti `. From 17bd5588a38c215fff4b7e2203962f5c08dc3ca8 Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti <76100950+giorgioangel@users.noreply.github.com> Date: Sat, 13 Jan 2024 21:24:25 +0100 Subject: [PATCH 6/9] Update sklearn/manifold/_locally_linear.py removing double loop Co-authored-by: Guillaume Lemaitre --- sklearn/manifold/_locally_linear.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index ee9f68693d2ef..df70fdc82e637 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -554,12 +554,7 @@ def locally_linear_embedding( nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] -= GiGiT - if M_sparse: - for idx1 in range(len(neighbors[i])): - for idx2 in range(len(neighbors[i])): - M[neighbors[i][idx1], neighbors[i][idx2]] += 1 - else: - M[neighbors[i], neighbors[i]] += 1 + M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors) if M_sparse: M = M.tocsr() From 7729d370eb0f7e8cad4f9814c69fa85844b714ea Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti <76100950+giorgioangel@users.noreply.github.com> Date: Sat, 13 Jan 2024 21:25:43 +0100 Subject: [PATCH 7/9] Update sklearn/manifold/_locally_linear.py resolving loop for sparse matrix Co-authored-by: Guillaume Lemaitre --- sklearn/manifold/_locally_linear.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index df70fdc82e637..02e40cffc148f 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -509,13 +509,7 @@ def locally_linear_embedding( M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T) Wi_sum1 = Wi.sum(1) M[i, neighbors[i]] -= Wi_sum1 - - if M_sparse: - for idx in range(len(neighbors[i])): - M[neighbors[i][idx], i] -= Wi_sum1[idx] - else: - M[neighbors[i], i] -= Wi_sum1 - + M[neighbors[i], [i]] -= Wi_sum1 M[i, i] += s_i if M_sparse: From de630df0d76aa780bc7a761616cf69efc87db2b6 Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Sat, 13 Jan 2024 21:38:16 +0100 Subject: [PATCH 8/9] adding container --- doc/whats_new/v1.5.rst | 3 ++- sklearn/manifold/_locally_linear.py | 26 ++++++-------------------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 17edca71eca80..88b31eb606daf 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -33,7 +33,8 @@ Changelog :mod:`sklearn.manifold` ....................... -- |Efficiency| :func:`locally_linear_embedding` now allocates more efficiently +- |Efficiency| :func:`manifold.locally_linear_embedding` and + :class:`manifold.LocallyLinearEmbedding` now allocate more efficiently the memory of sparse matrices in the Hessian, Modified and LTSA methods. :pr:`28096` by :user:`Giorgio Angelotti `. diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 02e40cffc148f..1c6ff414a1682 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -334,6 +334,7 @@ def locally_linear_embedding( raise ValueError("n_neighbors must be positive") M_sparse = eigen_solver != "dense" + M_container_constructor = lil_matrix if M_sparse else np.zeros if method == "standard": W = barycenter_kneighbors_graph( @@ -367,10 +368,7 @@ def locally_linear_embedding( Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64) Yi[:, 0] = 1 - if M_sparse: - M = lil_matrix((N, N), dtype=np.float64) - else: - M = np.zeros((N, N), dtype=np.float64) + M = M_container_constructor((N, N), dtype=np.float64) use_svd = n_neighbors > d_in @@ -403,9 +401,6 @@ def locally_linear_embedding( nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] += np.dot(w, w.T) - if M_sparse: - M = M.tocsr() - elif method == "modified": if n_neighbors < n_components: raise ValueError("modified LLE requires n_neighbors >= n_components") @@ -469,10 +464,7 @@ def locally_linear_embedding( # Now calculate M. # This is the [N x N] matrix whose null space is the desired embedding - if M_sparse: - M = lil_matrix((N, N), dtype=np.float64) - else: - M = np.zeros((N, N), dtype=np.float64) + M = M_container_constructor((N, N), dtype=np.float64) for i in range(N): s_i = s_range[i] @@ -512,19 +504,13 @@ def locally_linear_embedding( M[neighbors[i], [i]] -= Wi_sum1 M[i, i] += s_i - if M_sparse: - M = M.tocsr() - elif method == "ltsa": neighbors = nbrs.kneighbors( X, n_neighbors=n_neighbors + 1, return_distance=False ) neighbors = neighbors[:, 1:] - if M_sparse: - M = lil_matrix((N, N), dtype=np.float64) - else: - M = np.zeros((N, N), dtype=np.float64) + M = M_container_constructor((N, N), dtype=np.float64) use_svd = n_neighbors > d_in @@ -550,8 +536,8 @@ def locally_linear_embedding( M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors) - if M_sparse: - M = M.tocsr() + if method != "standard" and M_sparse: + M = M.tocsr() return null_space( M, From 15c04c84a4083d8e3f826b4fbbe27b3b39e2a42c Mon Sep 17 00:00:00 2001 From: Giorgio Angelotti Date: Sat, 13 Jan 2024 21:44:11 +0100 Subject: [PATCH 9/9] final changes --- sklearn/manifold/_locally_linear.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 1c6ff414a1682..b4d435ae145ad 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -345,7 +345,7 @@ def locally_linear_embedding( # depending on the solver, we'll do this differently if M_sparse: M = eye(*W.shape, format=W.format) - W - M = (M.T * M).tocsr() + M = M.T * M else: M = (W.T * W - W.T - W).toarray() M.flat[:: M.shape[0] + 1] += 1 # W = W - I = W - I @@ -536,7 +536,7 @@ def locally_linear_embedding( M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors) - if method != "standard" and M_sparse: + if M_sparse: M = M.tocsr() return null_space(