diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 78a006f8f228b..10b0e29810ef7 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -261,7 +261,7 @@ def test_invalid_filename(): def test_dump(csr_container): X_sparse, y_dense = _load_svmlight_local_test_file(datafile) X_dense = X_sparse.toarray() - y_sparse = csr_container(y_dense) + y_sparse = csr_container(np.atleast_2d(y_dense)) # slicing a csr_matrix can unsort its .indices, so test that we sort # those correctly diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 0eced91418278..2468f8fc5b590 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -1595,7 +1595,7 @@ def test_max_samples_boundary_classifiers(name): @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_forest_y_sparse(csr_container): X = [[1, 2, 3]] - y = csr_container([4, 5, 6]) + y = csr_container([[4, 5, 6]]) est = RandomForestClassifier() msg = "sparse multilabel-indicator for y is not supported." with pytest.raises(ValueError, match=msg): diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index b7b2e04b11396..baaf447d3909b 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -366,7 +366,7 @@ def test_readonly_kwargs(): (np.array([1, 1.5, np.nan]), ValueError, "w contains NaN"), *[ ( - csr_container([1, 1.5, 1]), + csr_container([[1, 1.5, 1]]), TypeError, "Sparse data was passed for w, but dense data is required", ) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 00c53734c9576..2be0237cd5f7e 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -476,8 +476,8 @@ def test_is_sorted_by_data(csr_container): # _is_sorted_by_data should return True when entries are sorted by data, # and False in all other cases. - # Test with sorted 1D array - X = csr_container(np.arange(10)) + # Test with sorted single row sparse array + X = csr_container(np.arange(10).reshape(1, 10)) assert _is_sorted_by_data(X) # Test with unsorted 1D array X[0, 2] = 5 diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index a9ecaa8cd2d9d..5411c4dacf766 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -765,7 +765,7 @@ def _convert_container( elif constructor_name == "array": return np.asarray(container, dtype=dtype) elif constructor_name == "sparse": - return sp.sparse.csr_matrix(container, dtype=dtype) + return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype) elif constructor_name in ("pandas", "dataframe"): pd = pytest.importorskip("pandas", minversion=minversion) result = pd.DataFrame(container, columns=columns_name, dtype=dtype, copy=False) @@ -803,18 +803,18 @@ def _convert_container( elif constructor_name == "slice": return slice(container[0], container[1]) elif constructor_name == "sparse_csr": - return sp.sparse.csr_matrix(container, dtype=dtype) + return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype) elif constructor_name == "sparse_csr_array": if sp_version >= parse_version("1.8"): - return sp.sparse.csr_array(container, dtype=dtype) + return sp.sparse.csr_array(np.atleast_2d(container), dtype=dtype) raise ValueError( f"sparse_csr_array is only available with scipy>=1.8.0, got {sp_version}" ) elif constructor_name == "sparse_csc": - return sp.sparse.csc_matrix(container, dtype=dtype) + return sp.sparse.csc_matrix(np.atleast_2d(container), dtype=dtype) elif constructor_name == "sparse_csc_array": if sp_version >= parse_version("1.8"): - return sp.sparse.csc_array(container, dtype=dtype) + return sp.sparse.csc_array(np.atleast_2d(container), dtype=dtype) raise ValueError( f"sparse_csc_array is only available with scipy>=1.8.0, got {sp_version}" ) diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index d1deeae8ebd20..b98ce6be05658 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -311,6 +311,6 @@ def test_class_weight_does_not_contains_more_classes(): @pytest.mark.parametrize("csc_container", CSC_CONTAINERS) def test_compute_sample_weight_sparse(csc_container): """Check that we can compute weight for sparse `y`.""" - y = csc_container(np.asarray([0, 1, 1])).T + y = csc_container(np.asarray([[0], [1], [1]])) sample_weight = compute_sample_weight("balanced", y) assert_allclose(sample_weight, [1.5, 0.75, 0.75]) diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 89ab73582cefc..5f3fe72c0f7ef 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -168,7 +168,7 @@ def test_resample_stratify_sparse_error(csr_container): n_samples = 100 X = rng.normal(size=(n_samples, 2)) y = rng.randint(0, 2, size=n_samples) - stratify = csr_container(y) + stratify = csr_container(y.reshape(-1, 1)) with pytest.raises(TypeError, match="Sparse data was passed"): X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 1f847dbd55d62..b627c55a7ef12 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -639,9 +639,21 @@ def test_check_array_accept_sparse_no_exception(): @pytest.fixture(params=["csr", "csc", "coo", "bsr"]) def X_64bit(request): X = sp.rand(20, 10, format=request.param) - for attr in ["indices", "indptr", "row", "col"]: - if hasattr(X, attr): - setattr(X, attr, getattr(X, attr).astype("int64")) + + if request.param == "coo": + if hasattr(X, "indices"): + # for scipy >= 1.13 .indices is a new attribute and is a tuple. The + # .col and .row attributes do not seem to be able to change the + # dtype, for more details see https://github.com/scipy/scipy/pull/18530/ + X.indices = tuple(v.astype("int64") for v in X.indices) + else: + # scipy < 1.13 + X.row = X.row.astype("int64") + X.col = X.col.astype("int64") + else: + X.indices = X.indices.astype("int64") + X.indptr = X.indptr.astype("int64") + yield X