Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sklearn/datasets/tests/test_svmlight_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def test_invalid_filename():
def test_dump(csr_container):
X_sparse, y_dense = _load_svmlight_local_test_file(datafile)
X_dense = X_sparse.toarray()
y_sparse = csr_container(y_dense)
y_sparse = csr_container(np.atleast_2d(y_dense))

# slicing a csr_matrix can unsort its .indices, so test that we sort
# those correctly
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1595,7 +1595,7 @@ def test_max_samples_boundary_classifiers(name):
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_forest_y_sparse(csr_container):
X = [[1, 2, 3]]
y = csr_container([4, 5, 6])
y = csr_container([[4, 5, 6]])
est = RandomForestClassifier()
msg = "sparse multilabel-indicator for y is not supported."
with pytest.raises(ValueError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/metrics/tests/test_dist_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def test_readonly_kwargs():
(np.array([1, 1.5, np.nan]), ValueError, "w contains NaN"),
*[
(
csr_container([1, 1.5, 1]),
csr_container([[1, 1.5, 1]]),
TypeError,
"Sparse data was passed for w, but dense data is required",
)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/neighbors/tests/test_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,8 +476,8 @@ def test_is_sorted_by_data(csr_container):
# _is_sorted_by_data should return True when entries are sorted by data,
# and False in all other cases.

# Test with sorted 1D array
X = csr_container(np.arange(10))
# Test with sorted single row sparse array
X = csr_container(np.arange(10).reshape(1, 10))
assert _is_sorted_by_data(X)
# Test with unsorted 1D array
X[0, 2] = 5
Expand Down
10 changes: 5 additions & 5 deletions sklearn/utils/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ def _convert_container(
elif constructor_name == "array":
return np.asarray(container, dtype=dtype)
elif constructor_name == "sparse":
return sp.sparse.csr_matrix(container, dtype=dtype)
return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype)
elif constructor_name in ("pandas", "dataframe"):
pd = pytest.importorskip("pandas", minversion=minversion)
result = pd.DataFrame(container, columns=columns_name, dtype=dtype, copy=False)
Expand Down Expand Up @@ -803,18 +803,18 @@ def _convert_container(
elif constructor_name == "slice":
return slice(container[0], container[1])
elif constructor_name == "sparse_csr":
return sp.sparse.csr_matrix(container, dtype=dtype)
return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype)
elif constructor_name == "sparse_csr_array":
if sp_version >= parse_version("1.8"):
return sp.sparse.csr_array(container, dtype=dtype)
return sp.sparse.csr_array(np.atleast_2d(container), dtype=dtype)
raise ValueError(
f"sparse_csr_array is only available with scipy>=1.8.0, got {sp_version}"
)
elif constructor_name == "sparse_csc":
return sp.sparse.csc_matrix(container, dtype=dtype)
return sp.sparse.csc_matrix(np.atleast_2d(container), dtype=dtype)
elif constructor_name == "sparse_csc_array":
if sp_version >= parse_version("1.8"):
return sp.sparse.csc_array(container, dtype=dtype)
return sp.sparse.csc_array(np.atleast_2d(container), dtype=dtype)
raise ValueError(
f"sparse_csc_array is only available with scipy>=1.8.0, got {sp_version}"
)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/tests/test_class_weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,6 @@ def test_class_weight_does_not_contains_more_classes():
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_compute_sample_weight_sparse(csc_container):
"""Check that we can compute weight for sparse `y`."""
y = csc_container(np.asarray([0, 1, 1])).T
y = csc_container(np.asarray([[0], [1], [1]]))
sample_weight = compute_sample_weight("balanced", y)
assert_allclose(sample_weight, [1.5, 0.75, 0.75])
2 changes: 1 addition & 1 deletion sklearn/utils/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_resample_stratify_sparse_error(csr_container):
n_samples = 100
X = rng.normal(size=(n_samples, 2))
y = rng.randint(0, 2, size=n_samples)
stratify = csr_container(y)
stratify = csr_container(y.reshape(-1, 1))
with pytest.raises(TypeError, match="Sparse data was passed"):
X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify)

Expand Down
18 changes: 15 additions & 3 deletions sklearn/utils/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,9 +639,21 @@ def test_check_array_accept_sparse_no_exception():
@pytest.fixture(params=["csr", "csc", "coo", "bsr"])
def X_64bit(request):
X = sp.rand(20, 10, format=request.param)
for attr in ["indices", "indptr", "row", "col"]:
if hasattr(X, attr):
setattr(X, attr, getattr(X, attr).astype("int64"))

if request.param == "coo":
if hasattr(X, "indices"):
# for scipy >= 1.13 .indices is a new attribute and is a tuple. The
# .col and .row attributes do not seem to be able to change the
# dtype, for more details see https://github.com/scipy/scipy/pull/18530/
X.indices = tuple(v.astype("int64") for v in X.indices)
else:
# scipy < 1.13
X.row = X.row.astype("int64")
X.col = X.col.astype("int64")
else:
X.indices = X.indices.astype("int64")
X.indptr = X.indptr.astype("int64")

yield X


Expand Down