scikit-learn · jeremiedbb · Apr 11, 2025 · Feb 17, 2025 · Feb 17, 2025 · Feb 17, 2025
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
@@ -84,6 +84,8 @@ def test_the_extract_xi_labels(ordering, clusters, expected):
 def test_extract_xi(global_dtype):
     # small and easy test (no clusters around other clusters)
     # but with a clear noise data.
+    # global_random_seed is not used here since the expected labels
+    # are hardcoded for these specific data.
     rng = np.random.RandomState(0)
     n_points_per_cluster = 5
 
@@ -138,8 +140,8 @@ def test_extract_xi(global_dtype):
     assert_array_equal(clust.labels_, expected_labels)
 
 
-def test_cluster_hierarchy_(global_dtype):
-    rng = np.random.RandomState(0)
+def test_cluster_hierarchy(global_dtype, global_random_seed):
+    rng = np.random.RandomState(global_random_seed)
     n_points_per_cluster = 100
     C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2).astype(
         global_dtype, copy=False
@@ -148,12 +150,16 @@ def test_cluster_hierarchy_(global_dtype):
         global_dtype, copy=False
     )
     X = np.vstack((C1, C2))
-    X = shuffle(X, random_state=0)
+    X = shuffle(X, random_state=rng)
 
-    clusters = OPTICS(min_samples=20, xi=0.1).fit(X).cluster_hierarchy_
+    clusters = OPTICS(min_samples=20, xi=0.2).fit(X).cluster_hierarchy_
     assert clusters.shape == (2, 2)
-    diff = np.sum(clusters - np.array([[0, 99], [0, 199]]))
-    assert diff / len(X) < 0.05
+
+    # The first cluster should contain all point from C1 but due to how the data is
+    # generated, some points from C2 may end up in it.
+    assert 100 <= np.diff(clusters[0]) + 1 <= 115
+    # The second cluster should contain all points from C1 and C2.
+    assert np.diff(clusters[-1]) + 1 == 200
 
 
 @pytest.mark.parametrize(
@@ -785,10 +791,10 @@ def test_compare_to_ELKI():
     assert_allclose(clust1.core_distances_[index], clust2.core_distances_[index])
 
 
-def test_extract_dbscan(global_dtype):
+def test_extract_dbscan(global_dtype, global_random_seed):
     # testing an easy dbscan case. Not including clusters with different
     # densities.
-    rng = np.random.RandomState(0)
+    rng = np.random.RandomState(global_random_seed)
     n_points_per_cluster = 20
     C1 = [-5, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
     C2 = [4, -1] + 0.2 * rng.randn(n_points_per_cluster, 2)
@@ -797,7 +803,9 @@ def test_extract_dbscan(global_dtype):
     X = np.vstack((C1, C2, C3, C4)).astype(global_dtype, copy=False)
 
     clust = OPTICS(cluster_method="dbscan", eps=0.5).fit(X)
-    assert_array_equal(np.sort(np.unique(clust.labels_)), [0, 1, 2, 3])
+    assert_array_equal(
+        np.sort(np.unique(clust.labels_[clust.labels_ != -1])), [0, 1, 2, 3]
+    )
 
 
 @pytest.mark.parametrize("csr_container", [None] + CSR_CONTAINERS)
@@ -817,12 +825,14 @@ def test_precomputed_dists(global_dtype, csr_container):
 
 
 @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
-def test_optics_input_not_modified_precomputed_sparse_nodiag(csr_container):
+def test_optics_input_not_modified_precomputed_sparse_nodiag(
+    csr_container, global_random_seed
+):
     """Check that we don't modify in-place the pre-computed sparse matrix.
     Non-regression test for:
     https://github.com/scikit-learn/scikit-learn/issues/27508
     """
-    X = np.random.RandomState(0).rand(6, 6)
+    X = np.random.RandomState(global_random_seed).rand(6, 6)
     # Add zeros on the diagonal that will be implicit when creating
     # the sparse matrix. If `X` is modified in-place, the zeros from
     # the diagonal will be made explicit.