diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 38f7a33ab9651..553fbf6bf190a 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -534,6 +534,10 @@ Changelog misdetects the CPU architecture. :pr:`27614` by :user:`Olivier Grisel `. +- |Fix| Error message in :func:`~utils.check_array` when a sparse matrix was + passed but `accept_sparse` is `False` now suggests to use `.toarray()` and not + `X.toarray()`. :pr:`27757` by :user:`Lucy Liu `. + Code and Documentation Contributors ----------------------------------- diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index 9f82957d2067a..319385635376e 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -106,7 +106,7 @@ def test_affinity_propagation_affinity_shape(): @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_affinity_propagation_precomputed_with_sparse_input(csr_container): - err_msg = "A sparse matrix was passed, but dense data is required" + err_msg = "Sparse data was passed for X, but dense data is required" with pytest.raises(TypeError, match=err_msg): AffinityPropagation(affinity="precomputed").fit(csr_container((3, 3))) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 87b44ff9b4320..71f516dd76ed8 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -100,7 +100,7 @@ def test_linear_regression_sample_weights( def test_raises_value_error_if_positive_and_sparse(): - error_msg = "A sparse matrix was passed, but dense data is required." + error_msg = "Sparse data was passed for X, but dense data is required." # X must not be sparse if positive == True X = sparse.eye(10) y = np.ones(10) diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index f4f9c136cf98e..b7b2e04b11396 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -368,7 +368,7 @@ def test_readonly_kwargs(): ( csr_container([1, 1.5, 1]), TypeError, - "A sparse matrix was passed, but dense data is required", + "Sparse data was passed for w, but dense data is required", ) for csr_container in CSR_CONTAINERS ], diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py index 1cb29863b1732..36badb6d33f04 100644 --- a/sklearn/preprocessing/tests/test_encoders.py +++ b/sklearn/preprocessing/tests/test_encoders.py @@ -1790,7 +1790,7 @@ def test_ordinal_encoder_sparse(csr_container): encoder = OrdinalEncoder() - err_msg = "A sparse matrix was passed, but dense data is required" + err_msg = "Sparse data was passed, but dense data is required" with pytest.raises(TypeError, match=err_msg): encoder.fit(X_sparse) with pytest.raises(TypeError, match=err_msg): diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index e27c2705406c1..caf7f5ae2fb49 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -741,11 +741,11 @@ def test_ecoc_delegate_sparse_base_estimator(csc_container): ) ecoc = OutputCodeClassifier(base_estimator, random_state=0) - with pytest.raises(TypeError, match="A sparse matrix was passed"): + with pytest.raises(TypeError, match="Sparse data was passed"): ecoc.fit(X_sp, y) ecoc.fit(X, y) - with pytest.raises(TypeError, match="A sparse matrix was passed"): + with pytest.raises(TypeError, match="Sparse data was passed"): ecoc.predict(X_sp) # smoke test to check when sparse input should be supported diff --git a/sklearn/utils/tests/test_mocking.py b/sklearn/utils/tests/test_mocking.py index 93a07e3d7fab7..9c66d1345bb6d 100644 --- a/sklearn/utils/tests/test_mocking.py +++ b/sklearn/utils/tests/test_mocking.py @@ -136,7 +136,7 @@ def test_checking_classifier_with_params(iris, csr_container): check_X=check_array, check_X_params={"accept_sparse": False} ) clf.fit(X, y) - with pytest.raises(TypeError, match="A sparse matrix was passed"): + with pytest.raises(TypeError, match="Sparse data was passed"): clf.fit(X_sparse, y) diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 0f0c9c898b17a..9d4df09573857 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -168,7 +168,7 @@ def test_resample_stratify_sparse_error(csr_container): X = rng.normal(size=(n_samples, 2)) y = rng.randint(0, 2, size=n_samples) stratify = csr_container(y) - with pytest.raises(TypeError, match="A sparse matrix was passed"): + with pytest.raises(TypeError, match="Sparse data was passed"): X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 69fc7b6cac4d7..1c2a7cd3f855a 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -599,8 +599,8 @@ def test_check_array_accept_sparse_type_exception(): invalid_type = SVR() msg = ( - "A sparse matrix was passed, but dense data is required. " - r"Use X.toarray\(\) to convert to a dense numpy array." + "Sparse data was passed, but dense data is required. " + r"Use '.toarray\(\)' to convert to a dense numpy array." ) with pytest.raises(TypeError, match=msg): check_array(X_csr, accept_sparse=False) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index a5b4a8555de63..8a3f07a5c867f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -534,9 +534,10 @@ def _ensure_sparse_format( _check_large_sparse(sparse_container, accept_large_sparse) if accept_sparse is False: + padded_input = " for " + input_name if input_name else "" raise TypeError( - "A sparse matrix was passed, but dense data is required. Use X.toarray() " - "to convert to a dense numpy array." + f"Sparse data was passed{padded_input}, but dense data is required. " + "Use '.toarray()' to convert to a dense numpy array." ) elif isinstance(accept_sparse, (list, tuple)): if len(accept_sparse) == 0: