CI Fix scipy-dev build (scikit-learn#28047)

lesteve · web-flow · commit b5827cbf7a4c · 2024-01-04T14:28:45.000+01:00
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
@@ -261,7 +261,7 @@ def test_invalid_filename():
 def test_dump(csr_container):
     X_sparse, y_dense = _load_svmlight_local_test_file(datafile)
     X_dense = X_sparse.toarray()
-    y_sparse = csr_container(y_dense)
+    y_sparse = csr_container(np.atleast_2d(y_dense))
 
     # slicing a csr_matrix can unsort its .indices, so test that we sort
     # those correctly
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -1595,7 +1595,7 @@ def test_max_samples_boundary_classifiers(name):
 @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
 def test_forest_y_sparse(csr_container):
     X = [[1, 2, 3]]
-    y = csr_container([4, 5, 6])
+    y = csr_container([[4, 5, 6]])
     est = RandomForestClassifier()
     msg = "sparse multilabel-indicator for y is not supported."
     with pytest.raises(ValueError, match=msg):
diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py
@@ -366,7 +366,7 @@ def test_readonly_kwargs():
         (np.array([1, 1.5, np.nan]), ValueError, "w contains NaN"),
         *[
             (
-                csr_container([1, 1.5, 1]),
+                csr_container([[1, 1.5, 1]]),
                 TypeError,
                 "Sparse data was passed for w, but dense data is required",
             )
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
@@ -476,8 +476,8 @@ def test_is_sorted_by_data(csr_container):
     # _is_sorted_by_data should return True when entries are sorted by data,
     # and False in all other cases.
 
-    # Test with sorted 1D array
-    X = csr_container(np.arange(10))
+    # Test with sorted single row sparse array
+    X = csr_container(np.arange(10).reshape(1, 10))
     assert _is_sorted_by_data(X)
     # Test with unsorted 1D array
     X[0, 2] = 5
diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py
@@ -765,7 +765,7 @@ def _convert_container(
     elif constructor_name == "array":
         return np.asarray(container, dtype=dtype)
     elif constructor_name == "sparse":
-        return sp.sparse.csr_matrix(container, dtype=dtype)
+        return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype)
     elif constructor_name in ("pandas", "dataframe"):
         pd = pytest.importorskip("pandas", minversion=minversion)
         result = pd.DataFrame(container, columns=columns_name, dtype=dtype, copy=False)
@@ -803,18 +803,18 @@ def _convert_container(
     elif constructor_name == "slice":
         return slice(container[0], container[1])
     elif constructor_name == "sparse_csr":
-        return sp.sparse.csr_matrix(container, dtype=dtype)
+        return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype)
     elif constructor_name == "sparse_csr_array":
         if sp_version >= parse_version("1.8"):
-            return sp.sparse.csr_array(container, dtype=dtype)
+            return sp.sparse.csr_array(np.atleast_2d(container), dtype=dtype)
         raise ValueError(
             f"sparse_csr_array is only available with scipy>=1.8.0, got {sp_version}"
         )
     elif constructor_name == "sparse_csc":
-        return sp.sparse.csc_matrix(container, dtype=dtype)
+        return sp.sparse.csc_matrix(np.atleast_2d(container), dtype=dtype)
     elif constructor_name == "sparse_csc_array":
         if sp_version >= parse_version("1.8"):
-            return sp.sparse.csc_array(container, dtype=dtype)
+            return sp.sparse.csc_array(np.atleast_2d(container), dtype=dtype)
         raise ValueError(
             f"sparse_csc_array is only available with scipy>=1.8.0, got {sp_version}"
         )
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
@@ -311,6 +311,6 @@ def test_class_weight_does_not_contains_more_classes():
 @pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
 def test_compute_sample_weight_sparse(csc_container):
     """Check that we can compute weight for sparse `y`."""
-    y = csc_container(np.asarray([0, 1, 1])).T
+    y = csc_container(np.asarray([[0], [1], [1]]))
     sample_weight = compute_sample_weight("balanced", y)
     assert_allclose(sample_weight, [1.5, 0.75, 0.75])
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
@@ -168,7 +168,7 @@ def test_resample_stratify_sparse_error(csr_container):
     n_samples = 100
     X = rng.normal(size=(n_samples, 2))
     y = rng.randint(0, 2, size=n_samples)
-    stratify = csr_container(y)
+    stratify = csr_container(y.reshape(-1, 1))
     with pytest.raises(TypeError, match="Sparse data was passed"):
         X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify)
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
@@ -639,9 +639,21 @@ def test_check_array_accept_sparse_no_exception():
 @pytest.fixture(params=["csr", "csc", "coo", "bsr"])
 def X_64bit(request):
     X = sp.rand(20, 10, format=request.param)
-    for attr in ["indices", "indptr", "row", "col"]:
-        if hasattr(X, attr):
-            setattr(X, attr, getattr(X, attr).astype("int64"))
+
+    if request.param == "coo":
+        if hasattr(X, "indices"):
+            # for scipy >= 1.13 .indices is a new attribute and is a tuple. The
+            # .col and .row attributes do not seem to be able to change the
+            # dtype, for more details see https://github.com/scipy/scipy/pull/18530/
+            X.indices = tuple(v.astype("int64") for v in X.indices)
+        else:
+            # scipy < 1.13
+            X.row = X.row.astype("int64")
+            X.col = X.col.astype("int64")
+    else:
+        X.indices = X.indices.astype("int64")
+        X.indptr = X.indptr.astype("int64")
+
     yield X
 
 

Original file line number	Diff line number	Diff line change
`@@ -366,7 +366,7 @@ def test_readonly_kwargs():`
`366`	`366`	`(np.array([1, 1.5, np.nan]), ValueError, "w contains NaN"),`
`367`	`367`	`*[`
`368`	`368`	`(`
`369`		`- csr_container([1, 1.5, 1]),`
	`369`	`+ csr_container([[1, 1.5, 1]]),`
`370`	`370`	`TypeError,`
`371`	`371`	`"Sparse data was passed for w, but dense data is required",`
`372`	`372`	`)`