diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 4e71cd1f35645..cbac3974c1e11 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -339,19 +339,6 @@ def test_fortran_aligned_data(Estimator): ) def test_k_means_fit_predict(algo, dtype, constructor, seed, max_iter, tol): # check that fit.predict gives same result as fit_predict - # There's a very small chance of failure with elkan on unstructured dataset - # because predict method uses fast euclidean distances computation which - # may cause small numerical instabilities. - # NB: This test is largely redundant with respect to test_predict and - # test_predict_equal_labels. This test has the added effect of - # testing idempotence of the fittng procesdure which appears to - # be where it fails on some MacOS setups. - if sys.platform == "darwin": - pytest.xfail( - "Known failures on MacOS, See " - "https://github.com/scikit-learn/scikit-learn/issues/12644" - ) - rng = np.random.RandomState(seed) X = make_blobs(n_samples=1000, n_features=10, centers=10, random_state=rng)[ @@ -365,12 +352,7 @@ def test_k_means_fit_predict(algo, dtype, constructor, seed, max_iter, tol): labels_1 = kmeans.fit(X).predict(X) labels_2 = kmeans.fit_predict(X) - - # Due to randomness in the order in which chunks of data are processed when - # using more than one thread, the absolute values of the labels can be - # different between the 2 strategies but they should correspond to the same - # clustering. - assert v_measure_score(labels_1, labels_2) == pytest.approx(1, abs=1e-15) + assert_array_equal(labels_1, labels_2) def test_minibatch_kmeans_verbose(): @@ -633,21 +615,10 @@ def test_score_max_iter(Estimator): def test_predict(Estimator, algorithm, init, dtype, array_constr): # Check the predict method and the equivalence between fit.predict and # fit_predict. - - # There's a very small chance of failure with elkan on unstructured dataset - # because predict method uses fast euclidean distances computation which - # may cause small numerical instabilities. - if sys.platform == "darwin": - pytest.xfail( - "Known failures on MacOS, See " - "https://github.com/scikit-learn/scikit-learn/issues/12644" - ) - X, _ = make_blobs(n_samples=500, n_features=10, centers=10, random_state=0) X = array_constr(X) - # With n_init = 1 - km = Estimator(n_clusters=10, init=init, n_init=1, random_state=0) + km = Estimator(n_clusters=10, init=init, n_init=10, random_state=0) if algorithm is not None: km.set_params(algorithm=algorithm) km.fit(X) @@ -665,31 +636,6 @@ def test_predict(Estimator, algorithm, init, dtype, array_constr): pred = km.predict(km.cluster_centers_) assert_array_equal(pred, np.arange(10)) - # With n_init > 1 - # Due to randomness in the order in which chunks of data are processed when - # using more than one thread, there might be different rounding errors for - # the computation of the inertia between 2 runs. This might result in a - # different ranking of 2 inits, hence a different labeling, even if they - # give the same clustering. We only check the labels up to a permutation. - - km = Estimator(n_clusters=10, init=init, n_init=10, random_state=0) - if algorithm is not None: - km.set_params(algorithm=algorithm) - km.fit(X) - labels = km.labels_ - - # re-predict labels for training set using predict - pred = km.predict(X) - assert_allclose(v_measure_score(pred, labels), 1) - - # re-predict labels for training set using fit_predict - pred = km.fit_predict(X) - assert_allclose(v_measure_score(pred, labels), 1) - - # predict centroid labels - pred = km.predict(km.cluster_centers_) - assert_allclose(v_measure_score(pred, np.arange(10)), 1) - @pytest.mark.parametrize("Estimator", [KMeans, MiniBatchKMeans]) def test_dense_sparse(Estimator): @@ -745,7 +691,7 @@ def test_integer_input(Estimator, array_constr, dtype, init): assert km.cluster_centers_.dtype == np.float64 expected_labels = [0, 1, 1, 0, 0, 1] - assert_allclose(v_measure_score(km.labels_, expected_labels), 1) + assert_array_equal(km.labels_, expected_labels) # Same with partial_fit (#14314) if Estimator is MiniBatchKMeans: