scikit-learn · jeremiedbb · Jul 12, 2023 · Jul 2, 2023 · Jul 3, 2023 · Jul 5, 2023
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
@@ -90,11 +90,12 @@ def _fix_connectivity(X, connectivity, affinity):
     connectivity = connectivity + connectivity.T
 
     # Convert connectivity matrix to LIL
-    if not sparse.isspmatrix_lil(connectivity):
-        if not sparse.isspmatrix(connectivity):
-            connectivity = sparse.lil_matrix(connectivity)
-        else:
-            connectivity = connectivity.tolil()
+    if not sparse.issparse(connectivity):
+        connectivity = sparse.lil_matrix(connectivity)
+
+    # `connectivity` is a sparse matrix at this point
+    if connectivity.format != "lil":
+        connectivity = connectivity.tolil()
 
     # Compute the number of nodes
     n_connected_components, labels = connected_components(connectivity)

diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
@@ -64,15 +64,15 @@ def test_20news_length_consistency(fetch_20newsgroups_fxt):
 def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
     # test subset = train
     bunch = fetch_20newsgroups_vectorized_fxt(subset="train")
-    assert sp.isspmatrix_csr(bunch.data)
+    assert sp.issparse(bunch.data) and bunch.data.format == "csr"
     assert bunch.data.shape == (11314, 130107)
     assert bunch.target.shape[0] == 11314
     assert bunch.data.dtype == np.float64
     assert bunch.DESCR.startswith(".. _20newsgroups_dataset:")
 
     # test subset = test
     bunch = fetch_20newsgroups_vectorized_fxt(subset="test")
-    assert sp.isspmatrix_csr(bunch.data)
+    assert sp.issparse(bunch.data) and bunch.data.format == "csr"
     assert bunch.data.shape == (7532, 130107)
     assert bunch.target.shape[0] == 7532
     assert bunch.data.dtype == np.float64
@@ -84,7 +84,7 @@ def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
 
     # test subset = all
     bunch = fetch_20newsgroups_vectorized_fxt(subset="all")
-    assert sp.isspmatrix_csr(bunch.data)
+    assert sp.issparse(bunch.data) and bunch.data.format == "csr"
     assert bunch.data.shape == (11314 + 7532, 130107)
     assert bunch.target.shape[0] == 11314 + 7532
     assert bunch.data.dtype == np.float64

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
@@ -917,7 +917,7 @@ def _more_tags(self):
 
 def _document_frequency(X):
     """Count the number of non-zero values for each feature in sparse X."""
-    if sp.isspmatrix_csr(X):
+    if sp.issparse(X) and X.format == "csr":
         return np.bincount(X.indices, minlength=X.shape[1])
     else:
         return np.diff(X.indptr)

diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
@@ -363,7 +363,7 @@ def fit(self, X, y=None):
         X = self._validate_data(X, accept_sparse="csr")
         random_state = check_random_state(self.random_state)
         n_features = X.shape[1]
-        sparse = sp.isspmatrix(X)
+        sparse = sp.issparse(X)
         if self.gamma == "scale":
             # var = E[X^2] - E[X]^2 if sparse
             X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
@@ -839,7 +839,7 @@ def _pre_fit(
     """
     n_samples, n_features = X.shape
 
-    if sparse.isspmatrix(X):
+    if sparse.issparse(X):
         # copy is not needed here as X is not modified inplace when X is sparse
         precompute = False
         X, y, X_offset, y_offset, X_scale = _preprocess_data(

diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
@@ -137,7 +137,7 @@ def _alpha_grid(
 
     sparse_center = False
     if Xy is None:
-        X_sparse = sparse.isspmatrix(X)
+        X_sparse = sparse.issparse(X)
         sparse_center = X_sparse and fit_intercept
         X = check_array(
             X, accept_sparse="csc", copy=(copy_X and fit_intercept and not X_sparse)
@@ -560,7 +560,7 @@ def enet_path(
         raise ValueError("positive=True is not allowed for multi-output (y.ndim != 1)")
 
     # MultiTaskElasticNet does not support sparse matrices
-    if not multi_output and sparse.isspmatrix(X):
+    if not multi_output and sparse.issparse(X):
         if X_offset_param is not None:
             # As sparse matrices are not actually centered we need this to be passed to
             # the CD solver.
@@ -621,7 +621,7 @@ def enet_path(
         # account for n_samples scaling in objectives between here and cd_fast
         l1_reg = alpha * l1_ratio * n_samples
         l2_reg = alpha * (1.0 - l1_ratio) * n_samples
-        if not multi_output and sparse.isspmatrix(X):
+        if not multi_output and sparse.issparse(X):
             model = cd_fast.sparse_enet_coordinate_descent(
                 w=coef_,
                 alpha=l1_reg,
@@ -1101,7 +1101,7 @@ def _decision_function(self, X):
             The predicted decision function.
         """
         check_is_fitted(self)
-        if sparse.isspmatrix(X):
+        if sparse.issparse(X):
             return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
         else:
             return super()._decision_function(X)
@@ -1546,7 +1546,7 @@ def fit(self, X, y, sample_weight=None):
         check_y_params = dict(
             copy=False, dtype=[np.float64, np.float32], ensure_2d=False
         )
-        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
+        if isinstance(X, np.ndarray) or sparse.issparse(X):
             # Keep a reference to X
             reference_to_old_X = X
             # Let us not impose fortran ordering so far: it is
@@ -1563,7 +1563,7 @@ def fit(self, X, y, sample_weight=None):
             X, y = self._validate_data(
                 X, y, validate_separately=(check_X_params, check_y_params)
             )
-            if sparse.isspmatrix(X):
+            if sparse.issparse(X):
                 if hasattr(reference_to_old_X, "data") and not np.may_share_memory(
                     reference_to_old_X.data, X.data
                 ):
@@ -1598,7 +1598,7 @@ def fit(self, X, y, sample_weight=None):
                 )
             y = column_or_1d(y, warn=True)
         else:
-            if sparse.isspmatrix(X):
+            if sparse.issparse(X):
                 raise TypeError("X should be dense but a sparse matrix waspassed")
             elif y.ndim == 1:
                 raise ValueError(

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -84,12 +84,10 @@ def test_set_order_sparse(order, input_order):
     X = X.asformat(sparse_format)
     y = X.asformat(sparse_format)
     X2, y2 = _set_order(X, y, order=order)
-    if order == "C":
-        assert sparse.isspmatrix_csr(X2)
-        assert sparse.isspmatrix_csr(y2)
-    elif order == "F":
-        assert sparse.isspmatrix_csc(X2)
-        assert sparse.isspmatrix_csc(y2)
+
+    format = "csc" if order == "F" else "csr"
+    assert sparse.issparse(X2) and X2.format == format
+    assert sparse.issparse(y2) and y2.format == format
 
 
 def test_lasso_zero():

diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@@ -19,7 +19,7 @@ def test_sparse_coef():
     clf = ElasticNet()
     clf.coef_ = [1, 2, 3]
 
-    assert sp.isspmatrix(clf.sparse_coef_)
+    assert sp.issparse(clf.sparse_coef_)
     assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
 
 

diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py
@@ -85,7 +85,7 @@ def _graph_is_connected(graph):
     is_connected : bool
         True means the graph is fully connected and False means not.
     """
-    if sparse.isspmatrix(graph):
+    if sparse.issparse(graph):
         # sparse graph, find all the connected components
         n_connected_components, _ = connected_components(graph)
         return n_connected_components == 1
@@ -118,7 +118,7 @@ def _set_diag(laplacian, value, norm_laplacian):
     """
     n_nodes = laplacian.shape[0]
     # We need all entries in the diagonal to values
-    if not sparse.isspmatrix(laplacian):
+    if not sparse.issparse(laplacian):
         if norm_laplacian:
             laplacian.flat[:: n_nodes + 1] = value
     else:
@@ -280,7 +280,7 @@ def spectral_embedding(
     if (
         eigen_solver == "arpack"
         or eigen_solver != "lobpcg"
-        and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)
+        and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
     ):
         # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
         # for details see the source code in scipy:
@@ -371,7 +371,7 @@ def spectral_embedding(
             # see note above under arpack why lobpcg has problems with small
             # number of nodes
             # lobpcg will fallback to eigh, so we short circuit it
-            if sparse.isspmatrix(laplacian):
+            if sparse.issparse(laplacian):
                 laplacian = laplacian.toarray()
             _, diffusion_map = eigh(laplacian, check_finite=False)
             embedding = diffusion_map.T[:n_components]

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
@@ -2,7 +2,7 @@
 from typing import List
 
 import numpy as np
-from scipy.sparse import issparse, isspmatrix_csr
+from scipy.sparse import issparse
 
 from ... import get_config
 from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING64
@@ -100,7 +100,8 @@ def is_numpy_c_ordered(X):
 
         def is_valid_sparse_matrix(X):
             return (
-                isspmatrix_csr(X)
+                issparse(X)
+                and X.format == "csr"
                 and
                 # TODO: support CSR matrices without non-zeros elements
                 X.nnz > 0

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
@@ -10,7 +10,7 @@
     coo_matrix,
     csc_matrix,
     csr_matrix,
-    isspmatrix_csr,
+    issparse,
 )
 from scipy.special import comb
 
@@ -1355,8 +1355,8 @@ def test_train_test_split_sparse():
     for InputFeatureType in sparse_types:
         X_s = InputFeatureType(X)
         X_train, X_test = train_test_split(X_s)
-        assert isspmatrix_csr(X_train)
-        assert isspmatrix_csr(X_test)
+        assert issparse(X_train) and X_train.format == "csr"
+        assert issparse(X_test) and X_test.format == "csr"
 
 
 def test_train_test_split_mock_pandas():

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
@@ -2394,7 +2394,7 @@ def add_dummy_feature(X, value=1.0):
     n_samples, n_features = X.shape
     shape = (n_samples, n_features + 1)
     if sparse.issparse(X):
-        if sparse.isspmatrix_coo(X):
+        if X.format == "coo":
             # Shift columns to the right.
             col = X.col + 1
             # Column indices of dummy feature are 0 everywhere.
@@ -2404,7 +2404,7 @@ def add_dummy_feature(X, value=1.0):
             # Prepend the dummy feature n_samples times.
             data = np.concatenate((np.full(n_samples, value), X.data))
             return sparse.coo_matrix((data, (row, col)), shape)
-        elif sparse.isspmatrix_csc(X):
+        elif X.format == "csc":
             # Shift index pointers since we need to add n_samples elements.
             indptr = X.indptr + n_samples
             # indptr[0] must be 0.

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
@@ -435,7 +435,7 @@ def transform(self, X):
 
         n_samples, n_features = X.shape
         max_int32 = np.iinfo(np.int32).max
-        if sparse.isspmatrix_csr(X):
+        if sparse.issparse(X) and X.format == "csr":
             if self._max_degree > 3:
                 return self.transform(X.tocsc()).tocsr()
             to_stack = []
@@ -480,9 +480,9 @@ def transform(self, X):
                         " transformer to produce fewer than 2^31 output features"
                     )
                 XP = sparse.hstack(to_stack, dtype=X.dtype, format="csr")
-        elif sparse.isspmatrix_csc(X) and self._max_degree < 4:
+        elif sparse.issparse(X) and X.format == "csc" and self._max_degree < 4:
             return self.transform(X.tocsr()).tocsc()
-        elif sparse.isspmatrix(X):
+        elif sparse.issparse(X):
             combinations = self._combinations(
                 n_features=n_features,
                 min_degree=self._min_degree,
@@ -1119,8 +1119,7 @@ def transform(self, X):
                             XBS[mask, i * n_splines + k] = linear_extr
 
             if use_sparse:
-                if not sparse.isspmatrix_csr(XBS_sparse):
-                    XBS_sparse = XBS_sparse.tocsr()
+                XBS_sparse = XBS_sparse.tocsr()
                 output_list.append(XBS_sparse)
 
         if use_sparse:

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -1846,7 +1846,7 @@ def test_normalizer_l1():
         X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
 
         assert X_norm is not X
-        assert sparse.isspmatrix_csr(X_norm)
+        assert sparse.issparse(X_norm) and X_norm.format == "csr"
 
         X_norm = toarray(X_norm)
         for i in range(3):
@@ -1893,7 +1893,7 @@ def test_normalizer_l2():
         X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
 
         assert X_norm is not X
-        assert sparse.isspmatrix_csr(X_norm)
+        assert sparse.issparse(X_norm) and X_norm.format == "csr"
 
         X_norm = toarray(X_norm)
         for i in range(3):
@@ -1941,7 +1941,7 @@ def test_normalizer_max():
         X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
 
         assert X_norm is not X
-        assert sparse.isspmatrix_csr(X_norm)
+        assert sparse.issparse(X_norm) and X_norm.format == "csr"
 
         X_norm = toarray(X_norm)
         for i in range(3):
@@ -2205,21 +2205,21 @@ def test_add_dummy_feature():
 def test_add_dummy_feature_coo():
     X = sparse.coo_matrix([[1, 0], [0, 1], [0, 1]])
     X = add_dummy_feature(X)
-    assert sparse.isspmatrix_coo(X), X
+    assert sparse.issparse(X) and X.format == "coo", X
     assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
 
 
 def test_add_dummy_feature_csc():
     X = sparse.csc_matrix([[1, 0], [0, 1], [0, 1]])
     X = add_dummy_feature(X)
-    assert sparse.isspmatrix_csc(X), X
+    assert sparse.issparse(X) and X.format == "csc", X
     assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
 
 
 def test_add_dummy_feature_csr():
     X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
     X = add_dummy_feature(X)
-    assert sparse.isspmatrix_csr(X), X
+    assert sparse.issparse(X) and X.format == "csr", X
     assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
 
 

diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
@@ -240,7 +240,7 @@ def check_categorical_onehot(X):
 
     assert_allclose(Xtr1.toarray(), Xtr2)
 
-    assert sparse.isspmatrix_csr(Xtr1)
+    assert sparse.issparse(Xtr1) and Xtr1.format == "csr"
     return Xtr1.toarray()