- "print(__doc__)\n\nimport time\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.cluster import MiniBatchKMeans, KMeans\nfrom sklearn.metrics.pairwise import pairwise_distances_argmin\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# #############################################################################\n# Generate sample data\nnp.random.seed(0)\n\nbatch_size = 45\ncenters = [[1, 1], [-1, -1], [1, -1]]\nn_clusters = len(centers)\nX, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)\n\n# #############################################################################\n# Compute clustering with Means\n\nk_means = KMeans(init='k-means++', n_clusters=3, n_init=10)\nt0 = time.time()\nk_means.fit(X)\nt_batch = time.time() - t0\n\n# #############################################################################\n# Compute clustering with MiniBatchKMeans\n\nmbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size,\n n_init=10, max_no_improvement=10, verbose=0)\nt0 = time.time()\nmbk.fit(X)\nt_mini_batch = time.time() - t0\n\n# #############################################################################\n# Plot result\n\nfig = plt.figure(figsize=(8, 3))\nfig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)\ncolors = ['#4EACC5', '#FF9C34', '#4E9A06']\n\n# We want to have the same colors for the same cluster from the\n# MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per\n# closest one.\nk_means_cluster_centers = np.sort(k_means.cluster_centers_, axis=0)\nmbk_means_cluster_centers = np.sort(mbk.cluster_centers_, axis=0)\nk_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)\nmbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers)\norder = pairwise_distances_argmin(k_means_cluster_centers,\n mbk_means_cluster_centers)\n\n# KMeans\nax = fig.add_subplot(1, 3, 1)\nfor k, col in zip(range(n_clusters), colors):\n my_members = k_means_labels == k\n cluster_center = k_means_cluster_centers[k]\n ax.plot(X[my_members, 0], X[my_members, 1], 'w',\n markerfacecolor=col, marker='.')\n ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n markeredgecolor='k', markersize=6)\nax.set_title('KMeans')\nax.set_xticks(())\nax.set_yticks(())\nplt.text(-3.5, 1.8, 'train time: %.2fs\\ninertia: %f' % (\n t_batch, k_means.inertia_))\n\n# MiniBatchKMeans\nax = fig.add_subplot(1, 3, 2)\nfor k, col in zip(range(n_clusters), colors):\n my_members = mbk_means_labels == order[k]\n cluster_center = mbk_means_cluster_centers[order[k]]\n ax.plot(X[my_members, 0], X[my_members, 1], 'w',\n markerfacecolor=col, marker='.')\n ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n markeredgecolor='k', markersize=6)\nax.set_title('MiniBatchKMeans')\nax.set_xticks(())\nax.set_yticks(())\nplt.text(-3.5, 1.8, 'train time: %.2fs\\ninertia: %f' %\n (t_mini_batch, mbk.inertia_))\n\n# Initialise the different array to all False\ndifferent = (mbk_means_labels == 4)\nax = fig.add_subplot(1, 3, 3)\n\nfor k in range(n_clusters):\n different += ((k_means_labels == k) != (mbk_means_labels == order[k]))\n\nidentic = np.logical_not(different)\nax.plot(X[identic, 0], X[identic, 1], 'w',\n markerfacecolor='#bbbbbb', marker='.')\nax.plot(X[different, 0], X[different, 1], 'w',\n markerfacecolor='m', marker='.')\nax.set_title('Difference')\nax.set_xticks(())\nax.set_yticks(())\n\nplt.show()"
0 commit comments