Skip to content

EFF Optimize memory usage for sparse matrices in LLE (Hessian, Modified and LTSA) #28096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
8 changes: 8 additions & 0 deletions doc/whats_new/v1.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ Changelog
has no effect. `copy_X` will be removed in 1.8.
:pr:`29105` by :user:`Adam Li <adam2392>`.

:mod:`sklearn.manifold`
.......................

- |Efficiency| :func:`manifold.locally_linear_embedding` and
:class:`manifold.LocallyLinearEmbedding` now allocate more efficiently the memory of
sparse matrices in the Hessian, Modified and LTSA methods.
:pr:`28096` by :user:`Giorgio Angelotti <giorgioangel>`.

:mod:`sklearn.metrics`
......................

Expand Down
26 changes: 13 additions & 13 deletions sklearn/manifold/_locally_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np
from scipy.linalg import eigh, qr, solve, svd
from scipy.sparse import csr_matrix, eye
from scipy.sparse import csr_matrix, eye, lil_matrix
from scipy.sparse.linalg import eigsh

from ..base import (
Expand Down Expand Up @@ -229,6 +229,7 @@ def _locally_linear_embedding(
)

M_sparse = eigen_solver != "dense"
M_container_constructor = lil_matrix if M_sparse else np.zeros

if method == "standard":
W = barycenter_kneighbors_graph(
Expand All @@ -239,7 +240,7 @@ def _locally_linear_embedding(
# depending on the solver, we'll do this differently
if M_sparse:
M = eye(*W.shape, format=W.format) - W
M = (M.T * M).tocsr()
M = M.T * M
else:
M = (W.T * W - W.T - W).toarray()
M.flat[:: M.shape[0] + 1] += 1 # W = W - I = W - I
Expand All @@ -262,7 +263,7 @@ def _locally_linear_embedding(
Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
Yi[:, 0] = 1

M = np.zeros((N, N), dtype=np.float64)
M = M_container_constructor((N, N), dtype=np.float64)

use_svd = n_neighbors > d_in

Expand Down Expand Up @@ -295,9 +296,6 @@ def _locally_linear_embedding(
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
M[nbrs_x, nbrs_y] += np.dot(w, w.T)

if M_sparse:
M = csr_matrix(M)

elif method == "modified":
if n_neighbors < n_components:
raise ValueError("modified LLE requires n_neighbors >= n_components")
Expand Down Expand Up @@ -361,7 +359,8 @@ def _locally_linear_embedding(

# Now calculate M.
# This is the [N x N] matrix whose null space is the desired embedding
M = np.zeros((N, N), dtype=np.float64)
M = M_container_constructor((N, N), dtype=np.float64)

for i in range(N):
s_i = s_range[i]

Expand Down Expand Up @@ -397,19 +396,16 @@ def _locally_linear_embedding(
M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
Wi_sum1 = Wi.sum(1)
M[i, neighbors[i]] -= Wi_sum1
M[neighbors[i], i] -= Wi_sum1
M[neighbors[i], [i]] -= Wi_sum1
M[i, i] += s_i

if M_sparse:
M = csr_matrix(M)

elif method == "ltsa":
neighbors = nbrs.kneighbors(
X, n_neighbors=n_neighbors + 1, return_distance=False
)
neighbors = neighbors[:, 1:]

M = np.zeros((N, N))
M = M_container_constructor((N, N), dtype=np.float64)

use_svd = n_neighbors > d_in

Expand All @@ -432,7 +428,11 @@ def _locally_linear_embedding(

nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
M[nbrs_x, nbrs_y] -= GiGiT
M[neighbors[i], neighbors[i]] += 1

M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors)

if M_sparse:
M = M.tocsr()

return null_space(
M,
Expand Down