From ee31bef8da7efbe49a066ae233367aea5e9f1d70 Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Wed, 10 Jan 2024 15:28:39 +0100
Subject: [PATCH 1/9] Optimization of memory for sparse Modified LLE

---
 sklearn/manifold/_locally_linear.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 0547d2bee1402..fc2bec3eb3795 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 from scipy.linalg import eigh, qr, solve, svd
-from scipy.sparse import csr_matrix, eye
+from scipy.sparse import csr_matrix, lil_matrix, eye
 from scipy.sparse.linalg import eigsh
 
 from ..base import (
@@ -466,7 +466,11 @@ def locally_linear_embedding(
 
         # Now calculate M.
         # This is the [N x N] matrix whose null space is the desired embedding
-        M = np.zeros((N, N), dtype=np.float64)
+        if M_sparse:
+            M = lil_matrix((N, N), dtype=np.float64)
+        else:
+            M = np.zeros((N, N), dtype=np.float64)
+
         for i in range(N):
             s_i = s_range[i]
 
@@ -502,11 +506,17 @@ def locally_linear_embedding(
             M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
             Wi_sum1 = Wi.sum(1)
             M[i, neighbors[i]] -= Wi_sum1
-            M[neighbors[i], i] -= Wi_sum1
+
+            if M_sparse:
+                for idx in range(len(neighbors[i])):
+                    M[neighbors[i][idx], i] -= Wi_sum1[idx]
+            else:
+                M[neighbors[i], i] -= Wi_sum1
+                
             M[i, i] += s_i
 
         if M_sparse:
-            M = csr_matrix(M)
+            M = M.tocsr()
 
     elif method == "ltsa":
         neighbors = nbrs.kneighbors(

From 0c30c1817c9cba2a530e833061144fce50bbf1bc Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Wed, 10 Jan 2024 15:47:18 +0100
Subject: [PATCH 2/9] Optimization of memory for sparse Modified LLE, black &
 ruff fixes

---
 sklearn/manifold/_locally_linear.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index fc2bec3eb3795..941c829f8d3f4 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 from scipy.linalg import eigh, qr, solve, svd
-from scipy.sparse import csr_matrix, lil_matrix, eye
+from scipy.sparse import csr_matrix, eye, lil_matrix
 from scipy.sparse.linalg import eigsh
 
 from ..base import (
@@ -512,7 +512,7 @@ def locally_linear_embedding(
                     M[neighbors[i][idx], i] -= Wi_sum1[idx]
             else:
                 M[neighbors[i], i] -= Wi_sum1
-                
+
             M[i, i] += s_i
 
         if M_sparse:

From 97132d2c0d6bad89ee56750bce19c026f94c35a1 Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Fri, 12 Jan 2024 10:14:32 +0100
Subject: [PATCH 3/9] Optimize memory usage for sparse matrices in LLE

---
 doc/whats_new/v1.5.rst              |  6 ++++++
 sklearn/manifold/_locally_linear.py | 20 ++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index f7a521ca4f0d0..1087c115fc7df 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -31,6 +31,12 @@ Changelog
   by passing a function in place of a strategy name.
   :pr:`28053` by :user:`Mark Elliot <mark-thm>`.
 
+:mod:`sklearn.manifold`
+.....................
+- |Efficiency| :class:`LocallyLinearEmbedding` now allocates more efficiently
+  the memory of sparse matrices in the Hessian, Modified and LTSA methods.
+  :pr:`28096` by :user:`Giorgio Angelotti <giorgioangel>`.
+
 Code and Documentation Contributors
 -----------------------------------
 
diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 941c829f8d3f4..0612ce5742ae6 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -367,7 +367,10 @@ def locally_linear_embedding(
         Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
         Yi[:, 0] = 1
 
-        M = np.zeros((N, N), dtype=np.float64)
+        if M_sparse:
+            M = lil_matrix((N, N), dtype=np.float64)
+        else:
+            M = np.zeros((N, N), dtype=np.float64)
 
         use_svd = n_neighbors > d_in
 
@@ -401,7 +404,7 @@ def locally_linear_embedding(
             M[nbrs_x, nbrs_y] += np.dot(w, w.T)
 
         if M_sparse:
-            M = csr_matrix(M)
+            M = M.tocsr()
 
     elif method == "modified":
         if n_neighbors < n_components:
@@ -524,7 +527,10 @@ def locally_linear_embedding(
         )
         neighbors = neighbors[:, 1:]
 
-        M = np.zeros((N, N))
+        if M_sparse:
+            M = lil_matrix((N, N), dtype=np.float64)
+        else:
+            M = np.zeros((N, N), dtype=np.float64)
 
         use_svd = n_neighbors > d_in
 
@@ -547,7 +553,13 @@ def locally_linear_embedding(
 
             nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
             M[nbrs_x, nbrs_y] -= GiGiT
-            M[neighbors[i], neighbors[i]] += 1
+
+            for idx1 in range(len(neighbors[i])):
+                for idx2 in range(len(neighbors[i])):
+                    M[neighbors[i][idx1], neighbors[i][idx2]] += 1
+
+        if M_sparse:
+            M = M.tocsr()
 
     return null_space(
         M,

From d2acf4f16f16115b450c94d1a1641d12de4de77d Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Fri, 12 Jan 2024 10:27:00 +0100
Subject: [PATCH 4/9] Memory usage LLE sparse matrices: numpy optimization

---
 sklearn/manifold/_locally_linear.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 0612ce5742ae6..ee9f68693d2ef 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -554,9 +554,12 @@ def locally_linear_embedding(
             nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
             M[nbrs_x, nbrs_y] -= GiGiT
 
-            for idx1 in range(len(neighbors[i])):
-                for idx2 in range(len(neighbors[i])):
-                    M[neighbors[i][idx1], neighbors[i][idx2]] += 1
+            if M_sparse:
+                for idx1 in range(len(neighbors[i])):
+                    for idx2 in range(len(neighbors[i])):
+                        M[neighbors[i][idx1], neighbors[i][idx2]] += 1
+            else:
+                M[neighbors[i], neighbors[i]] += 1
 
         if M_sparse:
             M = M.tocsr()

From cfe5b38a342087d6827bfe0a82ef385572346d67 Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Fri, 12 Jan 2024 11:37:19 +0100
Subject: [PATCH 5/9] fixing doc

---
 doc/whats_new/v1.5.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 1087c115fc7df..17edca71eca80 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -32,8 +32,8 @@ Changelog
   :pr:`28053` by :user:`Mark Elliot <mark-thm>`.
 
 :mod:`sklearn.manifold`
-.....................
-- |Efficiency| :class:`LocallyLinearEmbedding` now allocates more efficiently
+.......................
+- |Efficiency| :func:`locally_linear_embedding` now allocates more efficiently
   the memory of sparse matrices in the Hessian, Modified and LTSA methods.
   :pr:`28096` by :user:`Giorgio Angelotti <giorgioangel>`.
 

From 17bd5588a38c215fff4b7e2203962f5c08dc3ca8 Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <76100950+giorgioangel@users.noreply.github.com>
Date: Sat, 13 Jan 2024 21:24:25 +0100
Subject: [PATCH 6/9] Update sklearn/manifold/_locally_linear.py

removing double loop

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/manifold/_locally_linear.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index ee9f68693d2ef..df70fdc82e637 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -554,12 +554,7 @@ def locally_linear_embedding(
             nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
             M[nbrs_x, nbrs_y] -= GiGiT
 
-            if M_sparse:
-                for idx1 in range(len(neighbors[i])):
-                    for idx2 in range(len(neighbors[i])):
-                        M[neighbors[i][idx1], neighbors[i][idx2]] += 1
-            else:
-                M[neighbors[i], neighbors[i]] += 1
+            M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors)
 
         if M_sparse:
             M = M.tocsr()

From 7729d370eb0f7e8cad4f9814c69fa85844b714ea Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <76100950+giorgioangel@users.noreply.github.com>
Date: Sat, 13 Jan 2024 21:25:43 +0100
Subject: [PATCH 7/9] Update sklearn/manifold/_locally_linear.py

resolving loop for sparse matrix

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/manifold/_locally_linear.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index df70fdc82e637..02e40cffc148f 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -509,13 +509,7 @@ def locally_linear_embedding(
             M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
             Wi_sum1 = Wi.sum(1)
             M[i, neighbors[i]] -= Wi_sum1
-
-            if M_sparse:
-                for idx in range(len(neighbors[i])):
-                    M[neighbors[i][idx], i] -= Wi_sum1[idx]
-            else:
-                M[neighbors[i], i] -= Wi_sum1
-
+            M[neighbors[i], [i]] -= Wi_sum1
             M[i, i] += s_i
 
         if M_sparse:

From de630df0d76aa780bc7a761616cf69efc87db2b6 Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Sat, 13 Jan 2024 21:38:16 +0100
Subject: [PATCH 8/9] adding container

---
 doc/whats_new/v1.5.rst              |  3 ++-
 sklearn/manifold/_locally_linear.py | 26 ++++++--------------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 17edca71eca80..88b31eb606daf 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -33,7 +33,8 @@ Changelog
 
 :mod:`sklearn.manifold`
 .......................
-- |Efficiency| :func:`locally_linear_embedding` now allocates more efficiently
+- |Efficiency| :func:`manifold.locally_linear_embedding` and
+  :class:`manifold.LocallyLinearEmbedding` now allocate more efficiently
   the memory of sparse matrices in the Hessian, Modified and LTSA methods.
   :pr:`28096` by :user:`Giorgio Angelotti <giorgioangel>`.
 
diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 02e40cffc148f..1c6ff414a1682 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -334,6 +334,7 @@ def locally_linear_embedding(
         raise ValueError("n_neighbors must be positive")
 
     M_sparse = eigen_solver != "dense"
+    M_container_constructor = lil_matrix if M_sparse else np.zeros
 
     if method == "standard":
         W = barycenter_kneighbors_graph(
@@ -367,10 +368,7 @@ def locally_linear_embedding(
         Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
         Yi[:, 0] = 1
 
-        if M_sparse:
-            M = lil_matrix((N, N), dtype=np.float64)
-        else:
-            M = np.zeros((N, N), dtype=np.float64)
+        M = M_container_constructor((N, N), dtype=np.float64)
 
         use_svd = n_neighbors > d_in
 
@@ -403,9 +401,6 @@ def locally_linear_embedding(
             nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
             M[nbrs_x, nbrs_y] += np.dot(w, w.T)
 
-        if M_sparse:
-            M = M.tocsr()
-
     elif method == "modified":
         if n_neighbors < n_components:
             raise ValueError("modified LLE requires n_neighbors >= n_components")
@@ -469,10 +464,7 @@ def locally_linear_embedding(
 
         # Now calculate M.
         # This is the [N x N] matrix whose null space is the desired embedding
-        if M_sparse:
-            M = lil_matrix((N, N), dtype=np.float64)
-        else:
-            M = np.zeros((N, N), dtype=np.float64)
+        M = M_container_constructor((N, N), dtype=np.float64)
 
         for i in range(N):
             s_i = s_range[i]
@@ -512,19 +504,13 @@ def locally_linear_embedding(
             M[neighbors[i], [i]] -= Wi_sum1
             M[i, i] += s_i
 
-        if M_sparse:
-            M = M.tocsr()
-
     elif method == "ltsa":
         neighbors = nbrs.kneighbors(
             X, n_neighbors=n_neighbors + 1, return_distance=False
         )
         neighbors = neighbors[:, 1:]
 
-        if M_sparse:
-            M = lil_matrix((N, N), dtype=np.float64)
-        else:
-            M = np.zeros((N, N), dtype=np.float64)
+        M = M_container_constructor((N, N), dtype=np.float64)
 
         use_svd = n_neighbors > d_in
 
@@ -550,8 +536,8 @@ def locally_linear_embedding(
 
             M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors)
 
-        if M_sparse:
-            M = M.tocsr()
+    if method != "standard" and M_sparse:
+        M = M.tocsr()
 
     return null_space(
         M,

From 15c04c84a4083d8e3f826b4fbbe27b3b39e2a42c Mon Sep 17 00:00:00 2001
From: Giorgio Angelotti <g.angelotti91@gmail.com>
Date: Sat, 13 Jan 2024 21:44:11 +0100
Subject: [PATCH 9/9] final changes

---
 sklearn/manifold/_locally_linear.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 1c6ff414a1682..b4d435ae145ad 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -345,7 +345,7 @@ def locally_linear_embedding(
         # depending on the solver, we'll do this differently
         if M_sparse:
             M = eye(*W.shape, format=W.format) - W
-            M = (M.T * M).tocsr()
+            M = M.T * M
         else:
             M = (W.T * W - W.T - W).toarray()
             M.flat[:: M.shape[0] + 1] += 1  # W = W - I = W - I
@@ -536,7 +536,7 @@ def locally_linear_embedding(
 
             M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors)
 
-    if method != "standard" and M_sparse:
+    if M_sparse:
         M = M.tocsr()
 
     return null_space(