diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index 0f107c96e95d1..a6be3545e0b27 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -5,10 +5,12 @@ This example shows differences between Regular K-Means algorithm and Bisecting K-Means. -While K-Means clusterings are different when with increasing n_clusters, -Bisecting K-Means clustering build on top of the previous ones. - -This difference can visually be observed. +While K-Means clusterings are different when increasing n_clusters, +Bisecting K-Means clustering builds on top of the previous ones. As a result, it +tends to create clusters that have a more regular large-scale structure. This +difference can be visually observed: for all numbers of clusters, there is a +dividing line cutting the overall data cloud in two for BisectingKMeans, which is not +present for regular K-Means. """ import matplotlib.pyplot as plt @@ -21,13 +23,13 @@ # Generate sample data -n_samples = 1000 +n_samples = 10000 random_state = 0 X, _ = make_blobs(n_samples=n_samples, centers=2, random_state=random_state) # Number of cluster centers for KMeans and BisectingKMeans -n_clusters_list = [2, 3, 4, 5] +n_clusters_list = [4, 8, 16] # Algorithms to compare clustering_algorithms = { @@ -37,7 +39,7 @@ # Make subplots for each variant fig, axs = plt.subplots( - len(clustering_algorithms), len(n_clusters_list), figsize=(15, 5) + len(clustering_algorithms), len(n_clusters_list), figsize=(12, 5) ) axs = axs.T