From 8e7a0e3b896a4a0cbc856190d2b427d444ddba41 Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Thu, 26 Jan 2023 22:07:13 +0100 Subject: [PATCH 1/8] DOC: more didactice examples of bisecting kmeans. --- examples/cluster/plot_bisect_kmeans.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index 0f107c96e95d1..a045ec5275dba 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -6,9 +6,13 @@ This example shows differences between Regular K-Means algorithm and Bisecting K-Means. While K-Means clusterings are different when with increasing n_clusters, -Bisecting K-Means clustering build on top of the previous ones. +Bisecting K-Means clustering build on top of the previous ones. As a +result, it tends to create clusters that have a more consistent +large-scale structure across the clusters. -This difference can visually be observed. +This difference can visually be observed: for all number of clusters, +there is a dividing line cutting the overall data cloud in two for +BisectingKMeans, but not for regular KMeans. """ import matplotlib.pyplot as plt @@ -21,13 +25,13 @@ # Generate sample data -n_samples = 1000 +n_samples = 10000 random_state = 0 X, _ = make_blobs(n_samples=n_samples, centers=2, random_state=random_state) # Number of cluster centers for KMeans and BisectingKMeans -n_clusters_list = [2, 3, 4, 5] +n_clusters_list = [4, 8, 16] # Algorithms to compare clustering_algorithms = { From 1b027fae0f8950b00f2fe8ca0f3ea0d89d547b0a Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Thu, 26 Jan 2023 22:11:01 +0100 Subject: [PATCH 2/8] adjust fig aspect ratio --- examples/cluster/plot_bisect_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index a045ec5275dba..a5e6ff49af5ff 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -41,7 +41,7 @@ # Make subplots for each variant fig, axs = plt.subplots( - len(clustering_algorithms), len(n_clusters_list), figsize=(15, 5) + len(clustering_algorithms), len(n_clusters_list), figsize=(12, 5) ) axs = axs.T From f2b7a1e719f13d182addade8c9a3edc5e313f9f3 Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Fri, 27 Jan 2023 11:08:25 +0100 Subject: [PATCH 3/8] Update examples/cluster/plot_bisect_kmeans.py Co-authored-by: Olivier Grisel --- examples/cluster/plot_bisect_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index a5e6ff49af5ff..15b7477d53448 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -10,7 +10,7 @@ result, it tends to create clusters that have a more consistent large-scale structure across the clusters. -This difference can visually be observed: for all number of clusters, +This difference can visually be observed: for all numbers of clusters, there is a dividing line cutting the overall data cloud in two for BisectingKMeans, but not for regular KMeans. From d5fc043848bd2ec90464584cb3d71f94ae9809a3 Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Fri, 27 Jan 2023 11:09:10 +0100 Subject: [PATCH 4/8] Update examples/cluster/plot_bisect_kmeans.py Co-authored-by: Olivier Grisel --- examples/cluster/plot_bisect_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index 15b7477d53448..b09433865b144 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -6,7 +6,7 @@ This example shows differences between Regular K-Means algorithm and Bisecting K-Means. While K-Means clusterings are different when with increasing n_clusters, -Bisecting K-Means clustering build on top of the previous ones. As a +Bisecting K-Means clustering builds on top of the previous ones. As a result, it tends to create clusters that have a more consistent large-scale structure across the clusters. From cb8f20d8b3234011601473b8a0da9fc6511e857f Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Fri, 27 Jan 2023 11:33:51 +0100 Subject: [PATCH 5/8] Update examples/cluster/plot_bisect_kmeans.py Co-authored-by: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> --- examples/cluster/plot_bisect_kmeans.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index b09433865b144..edffc541acb00 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -6,9 +6,9 @@ This example shows differences between Regular K-Means algorithm and Bisecting K-Means. While K-Means clusterings are different when with increasing n_clusters, -Bisecting K-Means clustering builds on top of the previous ones. As a -result, it tends to create clusters that have a more consistent -large-scale structure across the clusters. +Bisecting K-Means clustering builds on top of the previous ones. As a result, it +tends to create clusters that have a more regular large-scale structure. This +difference can be visually observed: for all numbers of clusters, there is a This difference can visually be observed: for all numbers of clusters, there is a dividing line cutting the overall data cloud in two for From a4e5f77d4788340d2b832d9207df47368cea2601 Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Fri, 27 Jan 2023 11:42:01 +0100 Subject: [PATCH 6/8] Update examples/cluster/plot_bisect_kmeans.py Co-authored-by: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> --- examples/cluster/plot_bisect_kmeans.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index edffc541acb00..4c47aab102e85 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -10,9 +10,9 @@ tends to create clusters that have a more regular large-scale structure. This difference can be visually observed: for all numbers of clusters, there is a -This difference can visually be observed: for all numbers of clusters, -there is a dividing line cutting the overall data cloud in two for -BisectingKMeans, but not for regular KMeans. +dividing line cutting the overall data cloud in two for BisectingKMeans, whereas +regular K-Means results into a `Voronoi cell partitioning +`_. """ import matplotlib.pyplot as plt From ebec2b744bf5b23f98d0d8f4884ae3dde3272bd6 Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Fri, 27 Jan 2023 11:44:06 +0100 Subject: [PATCH 7/8] Remove reference to Voronoi --- examples/cluster/plot_bisect_kmeans.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index 4c47aab102e85..e108a8e513ad5 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -9,10 +9,8 @@ Bisecting K-Means clustering builds on top of the previous ones. As a result, it tends to create clusters that have a more regular large-scale structure. This difference can be visually observed: for all numbers of clusters, there is a - -dividing line cutting the overall data cloud in two for BisectingKMeans, whereas -regular K-Means results into a `Voronoi cell partitioning -`_. +dividing line cutting the overall data cloud in two for BisectingKMeans, which is not +present for regular K-Means. """ import matplotlib.pyplot as plt From c6f6e507299a3095cc7fa49a8e00b98d979ae739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Fri, 27 Jan 2023 12:39:36 +0100 Subject: [PATCH 8/8] Update examples/cluster/plot_bisect_kmeans.py --- examples/cluster/plot_bisect_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index e108a8e513ad5..a6be3545e0b27 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -5,7 +5,7 @@ This example shows differences between Regular K-Means algorithm and Bisecting K-Means. -While K-Means clusterings are different when with increasing n_clusters, +While K-Means clusterings are different when increasing n_clusters, Bisecting K-Means clustering builds on top of the previous ones. As a result, it tends to create clusters that have a more regular large-scale structure. This difference can be visually observed: for all numbers of clusters, there is a