Skip to content

MAINT Remove soon-to-be deprecated scipy.sparse functions #26751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions sklearn/cluster/_agglomerative.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,12 @@ def _fix_connectivity(X, connectivity, affinity):
connectivity = connectivity + connectivity.T

# Convert connectivity matrix to LIL
if not sparse.isspmatrix_lil(connectivity):
if not sparse.isspmatrix(connectivity):
connectivity = sparse.lil_matrix(connectivity)
else:
connectivity = connectivity.tolil()
if not sparse.issparse(connectivity):
connectivity = sparse.lil_matrix(connectivity)

# `connectivity` is a sparse matrix at this point
if connectivity.format != "lil":
connectivity = connectivity.tolil()

# Compute the number of nodes
n_connected_components, labels = connected_components(connectivity)
Expand Down
6 changes: 3 additions & 3 deletions sklearn/datasets/tests/test_20news.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ def test_20news_length_consistency(fetch_20newsgroups_fxt):
def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
# test subset = train
bunch = fetch_20newsgroups_vectorized_fxt(subset="train")
assert sp.isspmatrix_csr(bunch.data)
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
assert bunch.data.shape == (11314, 130107)
assert bunch.target.shape[0] == 11314
assert bunch.data.dtype == np.float64
assert bunch.DESCR.startswith(".. _20newsgroups_dataset:")

# test subset = test
bunch = fetch_20newsgroups_vectorized_fxt(subset="test")
assert sp.isspmatrix_csr(bunch.data)
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
assert bunch.data.shape == (7532, 130107)
assert bunch.target.shape[0] == 7532
assert bunch.data.dtype == np.float64
Expand All @@ -84,7 +84,7 @@ def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):

# test subset = all
bunch = fetch_20newsgroups_vectorized_fxt(subset="all")
assert sp.isspmatrix_csr(bunch.data)
assert sp.issparse(bunch.data) and bunch.data.format == "csr"
assert bunch.data.shape == (11314 + 7532, 130107)
assert bunch.target.shape[0] == 11314 + 7532
assert bunch.data.dtype == np.float64
Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_extraction/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ def _more_tags(self):

def _document_frequency(X):
"""Count the number of non-zero values for each feature in sparse X."""
if sp.isspmatrix_csr(X):
if sp.issparse(X) and X.format == "csr":
return np.bincount(X.indices, minlength=X.shape[1])
else:
return np.diff(X.indptr)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/kernel_approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def fit(self, X, y=None):
X = self._validate_data(X, accept_sparse="csr")
random_state = check_random_state(self.random_state)
n_features = X.shape[1]
sparse = sp.isspmatrix(X)
sparse = sp.issparse(X)
if self.gamma == "scale":
# var = E[X^2] - E[X]^2 if sparse
X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()
Expand Down
2 changes: 1 addition & 1 deletion sklearn/linear_model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ def _pre_fit(
"""
n_samples, n_features = X.shape

if sparse.isspmatrix(X):
if sparse.issparse(X):
# copy is not needed here as X is not modified inplace when X is sparse
precompute = False
X, y, X_offset, y_offset, X_scale = _preprocess_data(
Expand Down
14 changes: 7 additions & 7 deletions sklearn/linear_model/_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _alpha_grid(

sparse_center = False
if Xy is None:
X_sparse = sparse.isspmatrix(X)
X_sparse = sparse.issparse(X)
sparse_center = X_sparse and fit_intercept
X = check_array(
X, accept_sparse="csc", copy=(copy_X and fit_intercept and not X_sparse)
Expand Down Expand Up @@ -560,7 +560,7 @@ def enet_path(
raise ValueError("positive=True is not allowed for multi-output (y.ndim != 1)")

# MultiTaskElasticNet does not support sparse matrices
if not multi_output and sparse.isspmatrix(X):
if not multi_output and sparse.issparse(X):
if X_offset_param is not None:
# As sparse matrices are not actually centered we need this to be passed to
# the CD solver.
Expand Down Expand Up @@ -621,7 +621,7 @@ def enet_path(
# account for n_samples scaling in objectives between here and cd_fast
l1_reg = alpha * l1_ratio * n_samples
l2_reg = alpha * (1.0 - l1_ratio) * n_samples
if not multi_output and sparse.isspmatrix(X):
if not multi_output and sparse.issparse(X):
model = cd_fast.sparse_enet_coordinate_descent(
w=coef_,
alpha=l1_reg,
Expand Down Expand Up @@ -1101,7 +1101,7 @@ def _decision_function(self, X):
The predicted decision function.
"""
check_is_fitted(self)
if sparse.isspmatrix(X):
if sparse.issparse(X):
return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
else:
return super()._decision_function(X)
Expand Down Expand Up @@ -1546,7 +1546,7 @@ def fit(self, X, y, sample_weight=None):
check_y_params = dict(
copy=False, dtype=[np.float64, np.float32], ensure_2d=False
)
if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
if isinstance(X, np.ndarray) or sparse.issparse(X):
# Keep a reference to X
reference_to_old_X = X
# Let us not impose fortran ordering so far: it is
Expand All @@ -1563,7 +1563,7 @@ def fit(self, X, y, sample_weight=None):
X, y = self._validate_data(
X, y, validate_separately=(check_X_params, check_y_params)
)
if sparse.isspmatrix(X):
if sparse.issparse(X):
if hasattr(reference_to_old_X, "data") and not np.may_share_memory(
reference_to_old_X.data, X.data
):
Expand Down Expand Up @@ -1598,7 +1598,7 @@ def fit(self, X, y, sample_weight=None):
)
y = column_or_1d(y, warn=True)
else:
if sparse.isspmatrix(X):
if sparse.issparse(X):
raise TypeError("X should be dense but a sparse matrix waspassed")
elif y.ndim == 1:
raise ValueError(
Expand Down
10 changes: 4 additions & 6 deletions sklearn/linear_model/tests/test_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,10 @@ def test_set_order_sparse(order, input_order):
X = X.asformat(sparse_format)
y = X.asformat(sparse_format)
X2, y2 = _set_order(X, y, order=order)
if order == "C":
assert sparse.isspmatrix_csr(X2)
assert sparse.isspmatrix_csr(y2)
elif order == "F":
assert sparse.isspmatrix_csc(X2)
assert sparse.isspmatrix_csc(y2)

format = "csc" if order == "F" else "csr"
assert sparse.issparse(X2) and X2.format == format
assert sparse.issparse(y2) and y2.format == format


def test_lasso_zero():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_sparse_coef():
clf = ElasticNet()
clf.coef_ = [1, 2, 3]

assert sp.isspmatrix(clf.sparse_coef_)
assert sp.issparse(clf.sparse_coef_)
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_


Expand Down
8 changes: 4 additions & 4 deletions sklearn/manifold/_spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _graph_is_connected(graph):
is_connected : bool
True means the graph is fully connected and False means not.
"""
if sparse.isspmatrix(graph):
if sparse.issparse(graph):
# sparse graph, find all the connected components
n_connected_components, _ = connected_components(graph)
return n_connected_components == 1
Expand Down Expand Up @@ -118,7 +118,7 @@ def _set_diag(laplacian, value, norm_laplacian):
"""
n_nodes = laplacian.shape[0]
# We need all entries in the diagonal to values
if not sparse.isspmatrix(laplacian):
if not sparse.issparse(laplacian):
if norm_laplacian:
laplacian.flat[:: n_nodes + 1] = value
else:
Expand Down Expand Up @@ -280,7 +280,7 @@ def spectral_embedding(
if (
eigen_solver == "arpack"
or eigen_solver != "lobpcg"
and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)
and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
):
# lobpcg used with eigen_solver='amg' has bugs for low number of nodes
# for details see the source code in scipy:
Expand Down Expand Up @@ -371,7 +371,7 @@ def spectral_embedding(
# see note above under arpack why lobpcg has problems with small
# number of nodes
# lobpcg will fallback to eigh, so we short circuit it
if sparse.isspmatrix(laplacian):
if sparse.issparse(laplacian):
laplacian = laplacian.toarray()
_, diffusion_map = eigh(laplacian, check_finite=False)
embedding = diffusion_map.T[:n_components]
Expand Down
5 changes: 3 additions & 2 deletions sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List

import numpy as np
from scipy.sparse import issparse, isspmatrix_csr
from scipy.sparse import issparse

from ... import get_config
from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING64
Expand Down Expand Up @@ -100,7 +100,8 @@ def is_numpy_c_ordered(X):

def is_valid_sparse_matrix(X):
return (
isspmatrix_csr(X)
issparse(X)
and X.format == "csr"
and
# TODO: support CSR matrices without non-zeros elements
X.nnz > 0
Expand Down
6 changes: 3 additions & 3 deletions sklearn/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
coo_matrix,
csc_matrix,
csr_matrix,
isspmatrix_csr,
issparse,
)
from scipy.special import comb

Expand Down Expand Up @@ -1355,8 +1355,8 @@ def test_train_test_split_sparse():
for InputFeatureType in sparse_types:
X_s = InputFeatureType(X)
X_train, X_test = train_test_split(X_s)
assert isspmatrix_csr(X_train)
assert isspmatrix_csr(X_test)
assert issparse(X_train) and X_train.format == "csr"
assert issparse(X_test) and X_test.format == "csr"


def test_train_test_split_mock_pandas():
Expand Down
4 changes: 2 additions & 2 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2394,7 +2394,7 @@ def add_dummy_feature(X, value=1.0):
n_samples, n_features = X.shape
shape = (n_samples, n_features + 1)
if sparse.issparse(X):
if sparse.isspmatrix_coo(X):
if X.format == "coo":
# Shift columns to the right.
col = X.col + 1
# Column indices of dummy feature are 0 everywhere.
Expand All @@ -2404,7 +2404,7 @@ def add_dummy_feature(X, value=1.0):
# Prepend the dummy feature n_samples times.
data = np.concatenate((np.full(n_samples, value), X.data))
return sparse.coo_matrix((data, (row, col)), shape)
elif sparse.isspmatrix_csc(X):
elif X.format == "csc":
# Shift index pointers since we need to add n_samples elements.
indptr = X.indptr + n_samples
# indptr[0] must be 0.
Expand Down
9 changes: 4 additions & 5 deletions sklearn/preprocessing/_polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def transform(self, X):

n_samples, n_features = X.shape
max_int32 = np.iinfo(np.int32).max
if sparse.isspmatrix_csr(X):
if sparse.issparse(X) and X.format == "csr":
if self._max_degree > 3:
return self.transform(X.tocsc()).tocsr()
to_stack = []
Expand Down Expand Up @@ -480,9 +480,9 @@ def transform(self, X):
" transformer to produce fewer than 2^31 output features"
)
XP = sparse.hstack(to_stack, dtype=X.dtype, format="csr")
elif sparse.isspmatrix_csc(X) and self._max_degree < 4:
elif sparse.issparse(X) and X.format == "csc" and self._max_degree < 4:
return self.transform(X.tocsr()).tocsc()
elif sparse.isspmatrix(X):
elif sparse.issparse(X):
combinations = self._combinations(
n_features=n_features,
min_degree=self._min_degree,
Expand Down Expand Up @@ -1119,8 +1119,7 @@ def transform(self, X):
XBS[mask, i * n_splines + k] = linear_extr

if use_sparse:
if not sparse.isspmatrix_csr(XBS_sparse):
XBS_sparse = XBS_sparse.tocsr()
XBS_sparse = XBS_sparse.tocsr()
output_list.append(XBS_sparse)

if use_sparse:
Expand Down
12 changes: 6 additions & 6 deletions sklearn/preprocessing/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1846,7 +1846,7 @@ def test_normalizer_l1():
X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)

assert X_norm is not X
assert sparse.isspmatrix_csr(X_norm)
assert sparse.issparse(X_norm) and X_norm.format == "csr"

X_norm = toarray(X_norm)
for i in range(3):
Expand Down Expand Up @@ -1893,7 +1893,7 @@ def test_normalizer_l2():
X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)

assert X_norm is not X
assert sparse.isspmatrix_csr(X_norm)
assert sparse.issparse(X_norm) and X_norm.format == "csr"

X_norm = toarray(X_norm)
for i in range(3):
Expand Down Expand Up @@ -1941,7 +1941,7 @@ def test_normalizer_max():
X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)

assert X_norm is not X
assert sparse.isspmatrix_csr(X_norm)
assert sparse.issparse(X_norm) and X_norm.format == "csr"

X_norm = toarray(X_norm)
for i in range(3):
Expand Down Expand Up @@ -2205,21 +2205,21 @@ def test_add_dummy_feature():
def test_add_dummy_feature_coo():
X = sparse.coo_matrix([[1, 0], [0, 1], [0, 1]])
X = add_dummy_feature(X)
assert sparse.isspmatrix_coo(X), X
assert sparse.issparse(X) and X.format == "coo", X
assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


def test_add_dummy_feature_csc():
X = sparse.csc_matrix([[1, 0], [0, 1], [0, 1]])
X = add_dummy_feature(X)
assert sparse.isspmatrix_csc(X), X
assert sparse.issparse(X) and X.format == "csc", X
assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


def test_add_dummy_feature_csr():
X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
X = add_dummy_feature(X)
assert sparse.isspmatrix_csr(X), X
assert sparse.issparse(X) and X.format == "csr", X
assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


Expand Down
2 changes: 1 addition & 1 deletion sklearn/preprocessing/tests/test_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def check_categorical_onehot(X):

assert_allclose(Xtr1.toarray(), Xtr2)

assert sparse.isspmatrix_csr(Xtr1)
assert sparse.issparse(Xtr1) and Xtr1.format == "csr"
return Xtr1.toarray()


Expand Down
Loading