diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 44d5cfa01c8bb..fdaa30f18f577 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -242,6 +242,11 @@ Changelog `kdtree` and `balltree` values will be removed in 1.6. :pr:`26744` by :user:`Shreesha Kumar Bhat `. +- |FIX| : Create copy of precomputed sparse matrix within the + `fit` method of `cluster.DBSCAN` to avoid in-place modification of + the sparse matrix. + :pr:`27651` by :user:`Ganesh Tata `. + - |API| The option `metric=None` in :class:`cluster.AggomerativeClustering` and :class:`cluster.FeatureAgglomeration` is deprecated in version 1.4 and will be removed in version 1.6. Use the default diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 0129138801973..98f524752a39a 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -391,9 +391,10 @@ def fit(self, X, y=None, sample_weight=None): if self.metric == "precomputed" and sparse.issparse(X): # set the diagonal to explicit values, as a point is its own # neighbor + X = X.copy() # copy to avoid in-place modification with warnings.catch_warnings(): warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) - X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place + X.setdiag(X.diagonal()) neighbors_model = NearestNeighbors( radius=self.eps, diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 0b5a067b3f8b1..d42cc2b17d518 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -122,6 +122,28 @@ def test_dbscan_input_not_modified(metric, csr_container): assert_array_equal(X, X_copy) +@pytest.mark.parametrize("csr_container", CSR_CONTAINERS) +def test_dbscan_input_not_modified_precomputed_sparse_nodiag(csr_container): + """Check that we don't modify in-place the pre-computed sparse matrix. + + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/27508 + """ + X = np.random.RandomState(0).rand(10, 10) + # Add zeros on the diagonal that will be implicit when creating + # the sparse matrix. If `X` is modified in-place, the zeros from + # the diagonal will be made explicit. + np.fill_diagonal(X, 0) + X = csr_container(X) + assert all(row != col for row, col in zip(*X.nonzero())) + X_copy = X.copy() + dbscan(X, metric="precomputed") + # Make sure that we did not modify `X` in-place even by creating + # explicit 0s values. + assert X.nnz == X_copy.nnz + assert_array_equal(X.toarray(), X_copy.toarray()) + + @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_dbscan_no_core_samples(csr_container): rng = np.random.RandomState(0)