From af054b1ca55d2e7a745f482c61bff20599c9f3df Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Thu, 3 Jun 2021 11:53:13 +0200 Subject: [PATCH 1/4] allow small tol when finding best run --- sklearn/cluster/_kmeans.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index 8b24be6ace987..d7b4bd7267ff9 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1057,7 +1057,9 @@ def fit(self, X, y=None, sample_weight=None): x_squared_norms=x_squared_norms, n_threads=self._n_threads) # determine if these results are the best so far - if best_inertia is None or inertia < best_inertia: + # allow small tolerance on the inertia to accomodate for rounding + # errors. + if best_inertia is None or inertia < best_inertia - 1e-7: best_labels = labels best_centers = centers best_inertia = inertia From cbe9a9c6f85edcbfb5a64266ae299ba2a60ee809 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Mon, 7 Jun 2021 14:56:15 +0200 Subject: [PATCH 2/4] relative diff --- sklearn/cluster/_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index d7b4bd7267ff9..0f72157ea4d68 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1059,7 +1059,7 @@ def fit(self, X, y=None, sample_weight=None): # determine if these results are the best so far # allow small tolerance on the inertia to accomodate for rounding # errors. - if best_inertia is None or inertia < best_inertia - 1e-7: + if best_inertia is None or inertia < best_inertia * (1 - 1e-7): best_labels = labels best_centers = centers best_inertia = inertia From 26905b099751cb442b043238c3783b2287f47ec0 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Tue, 8 Jun 2021 15:55:11 +0200 Subject: [PATCH 3/4] lower tol --- sklearn/cluster/_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index 0f72157ea4d68..f420dc6590cb4 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1059,7 +1059,7 @@ def fit(self, X, y=None, sample_weight=None): # determine if these results are the best so far # allow small tolerance on the inertia to accomodate for rounding # errors. - if best_inertia is None or inertia < best_inertia * (1 - 1e-7): + if best_inertia is None or inertia < best_inertia * (1 - 1e-6): best_labels = labels best_centers = centers best_inertia = inertia From e76cc7fc234d1cd5e093fc131ecbaf1339c824d5 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Tue, 8 Jun 2021 18:07:57 +0200 Subject: [PATCH 4/4] more precise comment --- sklearn/cluster/_kmeans.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index f420dc6590cb4..07a3496517f1a 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -1057,8 +1057,8 @@ def fit(self, X, y=None, sample_weight=None): x_squared_norms=x_squared_norms, n_threads=self._n_threads) # determine if these results are the best so far - # allow small tolerance on the inertia to accomodate for rounding - # errors. + # allow small tolerance on the inertia to accommodate for + # non-deterministic rounding errors due to parallel computation if best_inertia is None or inertia < best_inertia * (1 - 1e-6): best_labels = labels best_centers = centers