From af363390d880d110d5722170130213d6a7439f36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 19 May 2022 13:33:34 +0200
Subject: [PATCH 1/4] Revert change in sklearn.extmath.util and fix
 randomized_svd benchmark

---
 benchmarks/bench_plot_randomized_svd.py | 10 ++++++----
 sklearn/utils/extmath.py                |  8 ++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
index 081842231039e..36bea0b39d8cd 100644
--- a/benchmarks/bench_plot_randomized_svd.py
+++ b/benchmarks/bench_plot_randomized_svd.py
@@ -107,7 +107,7 @@
 
 # Determine when to switch to batch computation for matrix norms,
 # in case the reconstructed (dense) matrix is too large
-MAX_MEMORY = int(2e9)
+MAX_MEMORY = int(4e9)
 
 # The following datasets can be downloaded manually from:
 # CIFAR 10: https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
@@ -323,8 +323,10 @@ def norm_diff(A, norm=2, msg=True, random_state=None):
 
 
 def scalable_frobenius_norm_discrepancy(X, U, s, V):
-    # if the input is not too big, just call scipy
-    if X.shape[0] * X.shape[1] < MAX_MEMORY:
+    # if the input is not sparse or sparse but not too big, call scipy
+    if not sp.sparse.issparse(X) or (
+        X.shape[0] * X.shape[1] * X.dtype.itemsize < MAX_MEMORY
+    ):
         A = X - U.dot(np.diag(s).dot(V))
         return norm_diff(A, norm="fro")
 
@@ -498,7 +500,7 @@ def bench_c(datasets, n_comps):
 if __name__ == "__main__":
     random_state = check_random_state(1234)
 
-    power_iter = np.linspace(0, 6, 7, dtype=int)
+    power_iter = np.arange(0, 6)
     n_comps = 50
 
     for dataset_name in datasets:
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 35e392f6e4540..8a7be5fd245f0 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -217,6 +217,10 @@ def randomized_range_finder(
     # Generating normal random vectors with shape: (A.shape[1], size)
     Q = random_state.normal(size=(A.shape[1], size))
 
+    if hasattr(A, "dtype") and A.dtype.kind == "f":
+        # Ensure f32 is preserved as f32
+        Q = Q.astype(A.dtype, copy=False)
+
     # Deal with "auto" mode
     if power_iteration_normalizer == "auto":
         if n_iter <= 2:
@@ -241,10 +245,6 @@ def randomized_range_finder(
     # Extract an orthonormal basis
     Q, _ = linalg.qr(safe_sparse_dot(A, Q), mode="economic")
 
-    if hasattr(A, "dtype") and A.dtype.kind == "f":
-        # Ensure f32 is preserved as f32
-        Q = Q.astype(A.dtype, copy=False)
-
     return Q
 
 

From b016b734dbee2a2da8e165cda1ec18667b18385c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 19 May 2022 15:23:57 +0200
Subject: [PATCH 2/4] Put the code back as it was

---
 sklearn/utils/extmath.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 8a7be5fd245f0..e4513a62bf07e 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -216,7 +216,6 @@ def randomized_range_finder(
 
     # Generating normal random vectors with shape: (A.shape[1], size)
     Q = random_state.normal(size=(A.shape[1], size))
-
     if hasattr(A, "dtype") and A.dtype.kind == "f":
         # Ensure f32 is preserved as f32
         Q = Q.astype(A.dtype, copy=False)

From cbf4906aab2be44a9a2cff3eadabb69ae84cbb90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 20 May 2022 08:19:28 +0200
Subject: [PATCH 3/4] Tweak

---
 benchmarks/bench_plot_randomized_svd.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
index 36bea0b39d8cd..49b93cd137389 100644
--- a/benchmarks/bench_plot_randomized_svd.py
+++ b/benchmarks/bench_plot_randomized_svd.py
@@ -323,10 +323,9 @@ def norm_diff(A, norm=2, msg=True, random_state=None):
 
 
 def scalable_frobenius_norm_discrepancy(X, U, s, V):
-    # if the input is not sparse or sparse but not too big, call scipy
-    if not sp.sparse.issparse(X) or (
-        X.shape[0] * X.shape[1] * X.dtype.itemsize < MAX_MEMORY
-    ):
+    if not sp.sparse.issparse(X) or (X.size * X.dtype.itemsize < MAX_MEMORY):
+        # if the input is not sparse or sparse but not too big,
+        # U.dot(np.diag(s).dot(V)) will fit in RAM
         A = X - U.dot(np.diag(s).dot(V))
         return norm_diff(A, norm="fro")
 

From 3e2aec8d11ebb28b188cdbe9937a7325b42d50a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 20 May 2022 15:55:40 +0200
Subject: [PATCH 4/4] Fix

---
 benchmarks/bench_plot_randomized_svd.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
index 49b93cd137389..ecc1bbb92ce61 100644
--- a/benchmarks/bench_plot_randomized_svd.py
+++ b/benchmarks/bench_plot_randomized_svd.py
@@ -323,7 +323,9 @@ def norm_diff(A, norm=2, msg=True, random_state=None):
 
 
 def scalable_frobenius_norm_discrepancy(X, U, s, V):
-    if not sp.sparse.issparse(X) or (X.size * X.dtype.itemsize < MAX_MEMORY):
+    if not sp.sparse.issparse(X) or (
+        X.shape[0] * X.shape[1] * X.dtype.itemsize < MAX_MEMORY
+    ):
         # if the input is not sparse or sparse but not too big,
         # U.dot(np.diag(s).dot(V)) will fit in RAM
         A = X - U.dot(np.diag(s).dot(V))