Skip to content

Commit b17feb6

Browse files
committed
Use sparse sparray by default while allowing spmatrix inputs
set up function _align_api_if_sparse() with tests Also functions _ensure_sparse_index_int32() and safely_cast_index_arrays() and _sparse_eye, _sparse_diags, _sparse_random to span Scipy <1.12 changes Introduce SCIPY_VERSION_BELOW_1_12 and SCIPY_VERSION_BELOW_1_15 boolean flags fix rng keyword arg for old SciPy versions ensure 2d sparse convert benchmarks doc modules convert csr_matrix to csr_array and CSR, COO, DIA, etc. make doctests pass pass tests on older scipy versions. i.e. ensure int32 indices where needed. make it work for SciPy 1.8 improve test coverage
1 parent 7a26152 commit b17feb6

File tree

93 files changed

+935
-489
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+935
-489
lines changed

asv_benchmarks/benchmarks/datasets.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from pathlib import Path
22

33
import numpy as np
4-
import scipy.sparse as sp
54
from joblib import Memory
65

76
from sklearn.datasets import (
@@ -17,6 +16,7 @@
1716
from sklearn.feature_extraction.text import TfidfVectorizer
1817
from sklearn.model_selection import train_test_split
1918
from sklearn.preprocessing import MaxAbsScaler, StandardScaler
19+
from sklearn.utils._sparse import _sparse_random
2020

2121
# memory location for caching datasets
2222
M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
@@ -100,12 +100,12 @@ def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np.float32
100100
def _synth_regression_sparse_dataset(
101101
n_samples=10000, n_features=10000, density=0.01, dtype=np.float32
102102
):
103-
X = sp.random(
104-
m=n_samples, n=n_features, density=density, format="csr", random_state=0
103+
X = _sparse_random(
104+
(n_samples, n_features), density=density, format="csr", random_state=0
105105
)
106106
X.data = np.random.RandomState(0).randn(X.getnnz())
107107
X = X.astype(dtype, copy=False)
108-
coefs = sp.random(m=n_features, n=1, density=0.5, random_state=0)
108+
coefs = _sparse_random((n_features, 1), density=0.5, random_state=0)
109109
coefs.data = np.random.RandomState(0).randn(coefs.getnnz())
110110
y = X.dot(coefs.toarray()).reshape(-1)
111111
y += 0.2 * y.std() * np.random.randn(n_samples)
@@ -155,9 +155,8 @@ def _random_dataset(
155155
X = np.random.RandomState(0).random_sample((n_samples, n_features))
156156
X = X.astype(dtype, copy=False)
157157
else:
158-
X = sp.random(
159-
n_samples,
160-
n_features,
158+
X = _sparse_random(
159+
(n_samples, n_features),
161160
density=0.05,
162161
format="csr",
163162
dtype=dtype,

benchmarks/bench_feature_expansions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import matplotlib.pyplot as plt
44
import numpy as np
5-
import scipy.sparse as sparse
65

76
from sklearn.preprocessing import PolynomialFeatures
7+
from sklearn.utils._sparse import _sparse_random
88

99
degree = 2
1010
trials = 3
@@ -21,7 +21,7 @@
2121
for density in densities:
2222
for dim_index, dim in enumerate(dimensionalities):
2323
print(trial, density, dim)
24-
X_csr = sparse.random(num_rows, dim, density).tocsr()
24+
X_csr = _sparse_random((num_rows, dim), density=density, format="csr")
2525
X_dense = X_csr.toarray()
2626
# CSR
2727
t0 = time()

benchmarks/bench_plot_randomized_svd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def get_data(dataset_name):
188188
data = np.repeat(data, 10)
189189
row = np.random.uniform(0, small_size, sparsity)
190190
col = np.random.uniform(0, small_size, sparsity)
191-
X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
191+
X = sp.sparse.csr_array((data, (row, col)), shape=(size, small_size))
192192
del data
193193
del row
194194
del col

benchmarks/bench_random_projections.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def bench_scikit_transformer(X, transformer):
7070
# Gaussian distributed values
7171
def make_sparse_random_data(n_samples, n_features, n_nonzeros, random_state=None):
7272
rng = np.random.RandomState(random_state)
73-
data_coo = sp.coo_matrix(
73+
data_coo = sp.coo_array(
7474
(
7575
rng.randn(n_nonzeros),
7676
(

doc/modules/impute.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ that contain the missing values::
5656
The :class:`SimpleImputer` class also supports sparse matrices::
5757

5858
>>> import scipy.sparse as sp
59-
>>> X = sp.csc_matrix([[1, 2], [0, -1], [8, 4]])
59+
>>> X = sp.csc_array([[1, 2], [0, -1], [8, 4]])
6060
>>> imp = SimpleImputer(missing_values=-1, strategy='mean')
6161
>>> imp.fit(X)
6262
SimpleImputer(missing_values=-1)
63-
>>> X_test = sp.csc_matrix([[-1, 2], [6, -1], [7, 6]])
63+
>>> X_test = sp.csc_array([[-1, 2], [6, -1], [7, 6]])
6464
>>> print(imp.transform(X_test).toarray())
6565
[[3. 2.]
6666
[6. 3.]

doc/modules/multiclass.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,9 @@ Valid :term:`multiclass` representations for
170170
[1 0 0]
171171
[0 1 0]]
172172
>>> from scipy import sparse
173-
>>> y_sparse = sparse.csr_matrix(y_dense)
173+
>>> y_sparse = sparse.csr_array(y_dense)
174174
>>> print(y_sparse)
175-
<Compressed Sparse Row sparse matrix of dtype 'int64'
175+
<Compressed Sparse Row sparse array of dtype 'int64'
176176
with 4 stored elements and shape (4, 3)>
177177
Coords Values
178178
(0, 0) 1
@@ -380,9 +380,9 @@ refer to :ref:`preprocessing_targets`.
380380

381381
An example of the same ``y`` in sparse matrix form:
382382

383-
>>> y_sparse = sparse.csr_matrix(y)
383+
>>> y_sparse = sparse.csr_array(y)
384384
>>> print(y_sparse)
385-
<Compressed Sparse Row sparse matrix of dtype 'int64'
385+
<Compressed Sparse Row sparse array of dtype 'int64'
386386
with 4 stored elements and shape (3, 4)>
387387
Coords Values
388388
(0, 0) 1

examples/applications/plot_tomography_l1_reconstruction.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ def build_projection_operator(l_x, n_dir):
8989
weights += list(w[mask])
9090
camera_inds += list(inds[mask] + i * l_x)
9191
data_inds += list(data_unravel_indices[mask])
92-
proj_operator = sparse.coo_matrix((weights, (camera_inds, data_inds)))
92+
camera_inds = np.array(camera_inds, dtype=np.int32) # lasso needs int32 inds
93+
data_inds = np.array(data_inds, dtype=np.int32)
94+
proj_operator = sparse.coo_array((weights, (camera_inds, data_inds)))
9395
return proj_operator
9496

9597

examples/applications/wikipedia_principal_eigenvector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
146146
break
147147

148148
print("Computing the adjacency matrix")
149-
X = sparse.lil_matrix((len(index_map), len(index_map)), dtype=np.float32)
149+
X = sparse.lil_array((len(index_map), len(index_map)), dtype=np.float32)
150150
for i, j in links:
151151
X[i, j] = 1.0
152152
del links

examples/linear_model/plot_lasso_dense_vs_sparse_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
X, y = make_regression(n_samples=200, n_features=5000, random_state=0)
3434
# create a copy of X in sparse format
35-
X_sp = sparse.coo_matrix(X)
35+
X_sp = sparse.coo_array(X)
3636

3737
alpha = 1
3838
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
@@ -64,7 +64,7 @@
6464
# make Xs sparse by replacing the values lower than 2.5 with 0s
6565
Xs[Xs < 2.5] = 0.0
6666
# create a copy of Xs in sparse format
67-
Xs_sp = sparse.coo_matrix(Xs)
67+
Xs_sp = sparse.coo_array(Xs)
6868
Xs_sp = Xs_sp.tocsc()
6969

7070
# compute the proportion of non-zero coefficient in the data matrix

examples/neighbors/approximate_nearest_neighbors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
# `nmslib`, as well as a loading function.
4040
import joblib
4141
import numpy as np
42-
from scipy.sparse import csr_matrix
42+
from scipy.sparse import csr_array
4343

4444
from sklearn.base import BaseEstimator, TransformerMixin
4545
from sklearn.datasets import fetch_openml
@@ -93,7 +93,7 @@ def transform(self, X):
9393
indices, distances = np.vstack(indices), np.vstack(distances)
9494

9595
indptr = np.arange(0, n_samples_transform * n_neighbors + 1, n_neighbors)
96-
kneighbors_graph = csr_matrix(
96+
kneighbors_graph = csr_array(
9797
(distances.ravel(), indices.ravel(), indptr),
9898
shape=(n_samples_transform, self.n_samples_fit_),
9999
)

0 commit comments

Comments
 (0)