Skip to content

Commit 92bc3cf

Browse files
jjerphanpunndcoder28
authored andcommitted
MAINT Remove soon-to-be deprecated scipy.sparse functions (scikit-learn#26751)
1 parent ea75350 commit 92bc3cf

File tree

25 files changed

+87
-88
lines changed

25 files changed

+87
-88
lines changed

sklearn/cluster/_agglomerative.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,12 @@ def _fix_connectivity(X, connectivity, affinity):
9090
connectivity = connectivity + connectivity.T
9191

9292
# Convert connectivity matrix to LIL
93-
if not sparse.isspmatrix_lil(connectivity):
94-
if not sparse.isspmatrix(connectivity):
95-
connectivity = sparse.lil_matrix(connectivity)
96-
else:
97-
connectivity = connectivity.tolil()
93+
if not sparse.issparse(connectivity):
94+
connectivity = sparse.lil_matrix(connectivity)
95+
96+
# `connectivity` is a sparse matrix at this point
97+
if connectivity.format != "lil":
98+
connectivity = connectivity.tolil()
9899

99100
# Compute the number of nodes
100101
n_connected_components, labels = connected_components(connectivity)

sklearn/datasets/tests/test_20news.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,15 @@ def test_20news_length_consistency(fetch_20newsgroups_fxt):
6464
def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
6565
# test subset = train
6666
bunch = fetch_20newsgroups_vectorized_fxt(subset="train")
67-
assert sp.isspmatrix_csr(bunch.data)
67+
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
6868
assert bunch.data.shape == (11314, 130107)
6969
assert bunch.target.shape[0] == 11314
7070
assert bunch.data.dtype == np.float64
7171
assert bunch.DESCR.startswith(".. _20newsgroups_dataset:")
7272

7373
# test subset = test
7474
bunch = fetch_20newsgroups_vectorized_fxt(subset="test")
75-
assert sp.isspmatrix_csr(bunch.data)
75+
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
7676
assert bunch.data.shape == (7532, 130107)
7777
assert bunch.target.shape[0] == 7532
7878
assert bunch.data.dtype == np.float64
@@ -84,7 +84,7 @@ def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
8484

8585
# test subset = all
8686
bunch = fetch_20newsgroups_vectorized_fxt(subset="all")
87-
assert sp.isspmatrix_csr(bunch.data)
87+
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
8888
assert bunch.data.shape == (11314 + 7532, 130107)
8989
assert bunch.target.shape[0] == 11314 + 7532
9090
assert bunch.data.dtype == np.float64

sklearn/feature_extraction/text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ def _more_tags(self):
917917

918918
def _document_frequency(X):
919919
"""Count the number of non-zero values for each feature in sparse X."""
920-
if sp.isspmatrix_csr(X):
920+
if sp.issparse(X) and X.format == "csr":
921921
return np.bincount(X.indices, minlength=X.shape[1])
922922
else:
923923
return np.diff(X.indptr)

sklearn/kernel_approximation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def fit(self, X, y=None):
363363
X = self._validate_data(X, accept_sparse="csr")
364364
random_state = check_random_state(self.random_state)
365365
n_features = X.shape[1]
366-
sparse = sp.isspmatrix(X)
366+
sparse = sp.issparse(X)
367367
if self.gamma == "scale":
368368
# var = E[X^2] - E[X]^2 if sparse
369369
X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()

sklearn/linear_model/_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ def _pre_fit(
839839
"""
840840
n_samples, n_features = X.shape
841841

842-
if sparse.isspmatrix(X):
842+
if sparse.issparse(X):
843843
# copy is not needed here as X is not modified inplace when X is sparse
844844
precompute = False
845845
X, y, X_offset, y_offset, X_scale = _preprocess_data(

sklearn/linear_model/_coordinate_descent.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def _alpha_grid(
137137

138138
sparse_center = False
139139
if Xy is None:
140-
X_sparse = sparse.isspmatrix(X)
140+
X_sparse = sparse.issparse(X)
141141
sparse_center = X_sparse and fit_intercept
142142
X = check_array(
143143
X, accept_sparse="csc", copy=(copy_X and fit_intercept and not X_sparse)
@@ -560,7 +560,7 @@ def enet_path(
560560
raise ValueError("positive=True is not allowed for multi-output (y.ndim != 1)")
561561

562562
# MultiTaskElasticNet does not support sparse matrices
563-
if not multi_output and sparse.isspmatrix(X):
563+
if not multi_output and sparse.issparse(X):
564564
if X_offset_param is not None:
565565
# As sparse matrices are not actually centered we need this to be passed to
566566
# the CD solver.
@@ -621,7 +621,7 @@ def enet_path(
621621
# account for n_samples scaling in objectives between here and cd_fast
622622
l1_reg = alpha * l1_ratio * n_samples
623623
l2_reg = alpha * (1.0 - l1_ratio) * n_samples
624-
if not multi_output and sparse.isspmatrix(X):
624+
if not multi_output and sparse.issparse(X):
625625
model = cd_fast.sparse_enet_coordinate_descent(
626626
w=coef_,
627627
alpha=l1_reg,
@@ -1101,7 +1101,7 @@ def _decision_function(self, X):
11011101
The predicted decision function.
11021102
"""
11031103
check_is_fitted(self)
1104-
if sparse.isspmatrix(X):
1104+
if sparse.issparse(X):
11051105
return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
11061106
else:
11071107
return super()._decision_function(X)
@@ -1546,7 +1546,7 @@ def fit(self, X, y, sample_weight=None):
15461546
check_y_params = dict(
15471547
copy=False, dtype=[np.float64, np.float32], ensure_2d=False
15481548
)
1549-
if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
1549+
if isinstance(X, np.ndarray) or sparse.issparse(X):
15501550
# Keep a reference to X
15511551
reference_to_old_X = X
15521552
# Let us not impose fortran ordering so far: it is
@@ -1563,7 +1563,7 @@ def fit(self, X, y, sample_weight=None):
15631563
X, y = self._validate_data(
15641564
X, y, validate_separately=(check_X_params, check_y_params)
15651565
)
1566-
if sparse.isspmatrix(X):
1566+
if sparse.issparse(X):
15671567
if hasattr(reference_to_old_X, "data") and not np.may_share_memory(
15681568
reference_to_old_X.data, X.data
15691569
):
@@ -1598,7 +1598,7 @@ def fit(self, X, y, sample_weight=None):
15981598
)
15991599
y = column_or_1d(y, warn=True)
16001600
else:
1601-
if sparse.isspmatrix(X):
1601+
if sparse.issparse(X):
16021602
raise TypeError("X should be dense but a sparse matrix waspassed")
16031603
elif y.ndim == 1:
16041604
raise ValueError(

sklearn/linear_model/tests/test_coordinate_descent.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,10 @@ def test_set_order_sparse(order, input_order):
8484
X = X.asformat(sparse_format)
8585
y = X.asformat(sparse_format)
8686
X2, y2 = _set_order(X, y, order=order)
87-
if order == "C":
88-
assert sparse.isspmatrix_csr(X2)
89-
assert sparse.isspmatrix_csr(y2)
90-
elif order == "F":
91-
assert sparse.isspmatrix_csc(X2)
92-
assert sparse.isspmatrix_csc(y2)
87+
88+
format = "csc" if order == "F" else "csr"
89+
assert sparse.issparse(X2) and X2.format == format
90+
assert sparse.issparse(y2) and y2.format == format
9391

9492

9593
def test_lasso_zero():

sklearn/linear_model/tests/test_sparse_coordinate_descent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_sparse_coef():
1919
clf = ElasticNet()
2020
clf.coef_ = [1, 2, 3]
2121

22-
assert sp.isspmatrix(clf.sparse_coef_)
22+
assert sp.issparse(clf.sparse_coef_)
2323
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
2424

2525

sklearn/manifold/_spectral_embedding.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def _graph_is_connected(graph):
8585
is_connected : bool
8686
True means the graph is fully connected and False means not.
8787
"""
88-
if sparse.isspmatrix(graph):
88+
if sparse.issparse(graph):
8989
# sparse graph, find all the connected components
9090
n_connected_components, _ = connected_components(graph)
9191
return n_connected_components == 1
@@ -118,7 +118,7 @@ def _set_diag(laplacian, value, norm_laplacian):
118118
"""
119119
n_nodes = laplacian.shape[0]
120120
# We need all entries in the diagonal to values
121-
if not sparse.isspmatrix(laplacian):
121+
if not sparse.issparse(laplacian):
122122
if norm_laplacian:
123123
laplacian.flat[:: n_nodes + 1] = value
124124
else:
@@ -280,7 +280,7 @@ def spectral_embedding(
280280
if (
281281
eigen_solver == "arpack"
282282
or eigen_solver != "lobpcg"
283-
and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)
283+
and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
284284
):
285285
# lobpcg used with eigen_solver='amg' has bugs for low number of nodes
286286
# for details see the source code in scipy:
@@ -371,7 +371,7 @@ def spectral_embedding(
371371
# see note above under arpack why lobpcg has problems with small
372372
# number of nodes
373373
# lobpcg will fallback to eigh, so we short circuit it
374-
if sparse.isspmatrix(laplacian):
374+
if sparse.issparse(laplacian):
375375
laplacian = laplacian.toarray()
376376
_, diffusion_map = eigh(laplacian, check_finite=False)
377377
embedding = diffusion_map.T[:n_components]

sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import List
33

44
import numpy as np
5-
from scipy.sparse import issparse, isspmatrix_csr
5+
from scipy.sparse import issparse
66

77
from ... import get_config
88
from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING64
@@ -100,7 +100,8 @@ def is_numpy_c_ordered(X):
100100

101101
def is_valid_sparse_matrix(X):
102102
return (
103-
isspmatrix_csr(X)
103+
issparse(X)
104+
and X.format == "csr"
104105
and
105106
# TODO: support CSR matrices without non-zeros elements
106107
X.nnz > 0

0 commit comments

Comments
 (0)