Skip to content
5 changes: 5 additions & 0 deletions doc/whats_new/v1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ Changelog
`kdtree` and `balltree` values will be removed in 1.6.
:pr:`26744` by :user:`Shreesha Kumar Bhat <Shreesha3112>`.

- |FIX| : Create copy of precomputed sparse matrix within the
`fit` method of `cluster.DBSCAN` to avoid in-place modification of
the sparse matrix.
:pr:`27651` by :user:`Ganesh Tata <tataganesh>`.

- |API| The option `metric=None` in
:class:`cluster.AggomerativeClustering` and :class:`cluster.FeatureAgglomeration`
is deprecated in version 1.4 and will be removed in version 1.6. Use the default
Expand Down
3 changes: 2 additions & 1 deletion sklearn/cluster/_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,10 @@ def fit(self, X, y=None, sample_weight=None):
if self.metric == "precomputed" and sparse.issparse(X):
# set the diagonal to explicit values, as a point is its own
# neighbor
X = X.copy() # copy to avoid in-place modification
with warnings.catch_warnings():
warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning)
X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place
X.setdiag(X.diagonal())

neighbors_model = NearestNeighbors(
radius=self.eps,
Expand Down
22 changes: 22 additions & 0 deletions sklearn/cluster/tests/test_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,28 @@ def test_dbscan_input_not_modified(metric, csr_container):
assert_array_equal(X, X_copy)


@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_dbscan_input_not_modified_precomputed_sparse_nodiag(csr_container):
"""Check that we don't modify in-place the pre-computed sparse matrix.

Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/27508
"""
X = np.random.RandomState(0).rand(10, 10)
# Add zeros on the diagonal that will be implicit when creating
# the sparse matrix. If `X` is modified in-place, the zeros from
# the diagonal will be made explicit.
np.fill_diagonal(X, 0)
X = csr_container(X)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could make an assert just below this line to be sure that we have implicit zeros.

assert all(row != col for row, col in zip(*X.nonzero()))
X_copy = X.copy()
dbscan(X, metric="precomputed")
# Make sure that we did not modify `X` in-place even by creating
# explicit 0s values.
assert X.nnz == X_copy.nnz
assert_array_equal(X.toarray(), X_copy.toarray())


@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_dbscan_no_core_samples(csr_container):
rng = np.random.RandomState(0)
Expand Down