From 4c80d98c8915a3b57648d1e278374b4c22fabf89 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:01:35 -0400 Subject: [PATCH 01/16] examples/covariance/plot_outlier_detection.py - matplotlib2.0 update --- examples/covariance/plot_outlier_detection.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/covariance/plot_outlier_detection.py b/examples/covariance/plot_outlier_detection.py index 838906573a2ae..a680bc35e0a2e 100644 --- a/examples/covariance/plot_outlier_detection.py +++ b/examples/covariance/plot_outlier_detection.py @@ -107,8 +107,10 @@ linewidths=2, colors='red') subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()], colors='orange') - b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white') - c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black') + b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white', + s=20, edgecolor='k') + c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black', + s=20, edgecolor='k') subplot.axis('tight') subplot.legend( [a.collections[0], b, c], From f7f07579ebabb9b7538ae78d87778db493f5fc93 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:07:24 -0400 Subject: [PATCH 02/16] examples/cluster/plot_kmeans_silhouette_analysis.py - matplotlib2.0 update --- examples/cluster/plot_kmeans_silhouette_analysis.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py index 3e04ff35386bd..ac36bc1fe72e5 100644 --- a/examples/cluster/plot_kmeans_silhouette_analysis.py +++ b/examples/cluster/plot_kmeans_silhouette_analysis.py @@ -119,16 +119,17 @@ # 2nd Plot showing the actual clusters formed colors = cm.spectral(cluster_labels.astype(float) / n_clusters) ax2.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7, - c=colors) + c=colors, edgecolor='k') # Labeling the clusters centers = clusterer.cluster_centers_ # Draw white circles at cluster centers - ax2.scatter(centers[:, 0], centers[:, 1], - marker='o', c="white", alpha=1, s=200) + ax2.scatter(centers[:, 0], centers[:, 1], marker='o', + c="white", alpha=1, s=200, edgecolor='k') for i, c in enumerate(centers): - ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50) + ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, + s=50, edgecolor='k') ax2.set_title("The visualization of the clustered data.") ax2.set_xlabel("Feature space for the 1st feature") From 422c7983e37e2d3607bab925e6797a139f1a0f0d Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:38:51 -0400 Subject: [PATCH 03/16] examples/cluster/plot_birch_vs_minibatchkmeans.py - matplotlib2.0 + pep8 fix --- .../cluster/plot_birch_vs_minibatchkmeans.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py index b12d6c412b0ad..d9dc6855e8b24 100644 --- a/examples/cluster/plot_birch_vs_minibatchkmeans.py +++ b/examples/cluster/plot_birch_vs_minibatchkmeans.py @@ -39,7 +39,6 @@ # Generate blobs to do a comparison between MiniBatchKMeans and Birch. X, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0) - # Use all colors that matplotlib provides by default. colors_ = cycle(colors.cnames.keys()) @@ -69,11 +68,11 @@ ax = fig.add_subplot(1, 3, ind + 1) for this_centroid, k, col in zip(centroids, range(n_clusters), colors_): mask = labels == k - ax.plot(X[mask, 0], X[mask, 1], 'w', - markerfacecolor=col, marker='.') + ax.scatter(X[mask, 0], X[mask, 1], + c='w', edgecolor=col, marker='.', alpha=0.5) if birch_model.n_clusters is None: - ax.plot(this_centroid[0], this_centroid[1], '+', markerfacecolor=col, - markeredgecolor='k', markersize=5) + ax.scatter(this_centroid[0], this_centroid[1], marker='+', + c='k', s=25) ax.set_ylim([-25, 25]) ax.set_xlim([-25, 25]) ax.set_autoscaley_on(False) @@ -93,9 +92,10 @@ for this_centroid, k, col in zip(mbk.cluster_centers_, range(n_clusters), colors_): mask = mbk.labels_ == k - ax.plot(X[mask, 0], X[mask, 1], 'w', markerfacecolor=col, marker='.') - ax.plot(this_centroid[0], this_centroid[1], '+', markeredgecolor='k', - markersize=5) + ax.scatter(X[mask, 0], X[mask, 1], marker='.', + c='w', edgecolor=col, alpha=0.5) + ax.scatter(this_centroid[0], this_centroid[1], marker='+', + c='k', s=25) ax.set_xlim([-25, 25]) ax.set_ylim([-25, 25]) ax.set_title("MiniBatchKMeans") From 57ea848c24384332077418e898070c4a9b6cae6c Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:46:14 -0400 Subject: [PATCH 04/16] examples/cluster/plot_cluster_iris.py - matplotlib2.0 update --- examples/cluster/plot_cluster_iris.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index b837d53887f15..2f23645403302 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -54,7 +54,8 @@ est.fit(X) labels = est.labels_ - ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(np.float)) + ax.scatter(X[:, 3], X[:, 0], X[:, 2], + c=labels.astype(np.float), edgecolor='k') ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) @@ -81,7 +82,7 @@ bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) # Reorder the labels to have colors matching the cluster results y = np.choose(y, [1, 2, 0]).astype(np.float) -ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y) +ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k') ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) From a39e6a9b7d9eeec50318cc6308ab811de1c700b0 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:48:01 -0400 Subject: [PATCH 05/16] examples/cluster/plot_agglomerative_clustering.py - matplotlib2.0 update --- examples/cluster/plot_agglomerative_clustering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_agglomerative_clustering.py b/examples/cluster/plot_agglomerative_clustering.py index dfb27d17d1a89..b6482c43e927b 100644 --- a/examples/cluster/plot_agglomerative_clustering.py +++ b/examples/cluster/plot_agglomerative_clustering.py @@ -61,7 +61,7 @@ model.fit(X) elapsed_time = time.time() - t0 plt.scatter(X[:, 0], X[:, 1], c=model.labels_, - cmap=plt.cm.spectral) + cmap=plt.cm.spectral, s=20, edgecolor='k') plt.title('linkage=%s (time %.2fs)' % (linkage, elapsed_time), fontdict=dict(verticalalignment='top')) plt.axis('equal') From c18761a35096132933428671d366a229df2810cc Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:55:32 -0400 Subject: [PATCH 06/16] examples/cluster/plot_ward_structured_vs_unstructured.py - matplotlib2.0 update --- .../cluster/plot_ward_structured_vs_unstructured.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py index 2471f68a6f8ed..7ba292394a29c 100644 --- a/examples/cluster/plot_ward_structured_vs_unstructured.py +++ b/examples/cluster/plot_ward_structured_vs_unstructured.py @@ -57,8 +57,9 @@ ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): - ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2], - 'o', color=plt.cm.jet(np.float(l) / np.max(label + 1))) + ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2], + color=plt.cm.jet(np.float(l) / np.max(label + 1)), + s=20, edgecolor='k') plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time) @@ -84,8 +85,9 @@ ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): - ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2], - 'o', color=plt.cm.jet(float(l) / np.max(label + 1))) + ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2], + color=plt.cm.jet(float(l) / np.max(label + 1)), + s=20, edgecolor='k') plt.title('With connectivity constraints (time %.2fs)' % elapsed_time) plt.show() From 7a1c47dc47ccbb4b1b94d264b043050d50ab8fa9 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 21:59:15 -0400 Subject: [PATCH 07/16] examples/cluster/plot_kmeans_assumptions.py - matplotlib2.0 update --- examples/cluster/plot_kmeans_assumptions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index 56296decf1919..9ff669b62f5ca 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -30,16 +30,16 @@ y_pred = KMeans(n_clusters=2, random_state=random_state).fit_predict(X) plt.subplot(221) -plt.scatter(X[:, 0], X[:, 1], c=y_pred) +plt.scatter(X[:, 0], X[:, 1], c=y_pred, s=20, edgecolor='k') plt.title("Incorrect Number of Blobs") # Anisotropicly distributed data -transformation = [[ 0.60834549, -0.63667341], [-0.40887718, 0.85253229]] +transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] X_aniso = np.dot(X, transformation) y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_aniso) plt.subplot(222) -plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred) +plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred, s=20, edgecolor='k') plt.title("Anisotropicly Distributed Blobs") # Different variance @@ -49,15 +49,16 @@ y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_varied) plt.subplot(223) -plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred) +plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred, s=20, edgecolor='k') plt.title("Unequal Variance") # Unevenly sized blobs X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10])) -y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_filtered) +y_pred = KMeans(n_clusters=3, + random_state=random_state).fit_predict(X_filtered) plt.subplot(224) -plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred) +plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred, s=20, edgecolor='k') plt.title("Unevenly Sized Blobs") plt.show() From b515fd9063e043a93b5d32f7ebec24404c600813 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 3 Jun 2017 22:07:49 -0400 Subject: [PATCH 08/16] examples/classification/plot_lda_qda.py - matplotlib2.0 + pep8 fix --- examples/classification/plot_lda_qda.py | 30 +++++++++++++------------ 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index a668e7cc0db0c..fcf95e03ed710 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -20,7 +20,7 @@ class has its own standard deviation with QDA. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -############################################################################### +############################################################################## # colormap cmap = colors.LinearSegmentedColormap( 'red_blue_classes', @@ -30,7 +30,7 @@ class has its own standard deviation with QDA. plt.cm.register_cmap(cmap=cmap) -############################################################################### +############################################################################## # generate datasets def dataset_fixed_cov(): '''Generate 2 Gaussians samples with the same covariance matrix''' @@ -54,17 +54,17 @@ def dataset_cov(): return X, y -############################################################################### +############################################################################## # plot functions def plot_data(lda, X, y, y_pred, fig_index): splot = plt.subplot(2, 2, fig_index) if fig_index == 1: plt.title('Linear Discriminant Analysis') - plt.ylabel('Data with fixed covariance') + plt.ylabel('Data with\n fixed covariance') elif fig_index == 2: plt.title('Quadratic Discriminant Analysis') elif fig_index == 3: - plt.ylabel('Data with varying covariances') + plt.ylabel('Data with\n varying covariances') tp = (y == y_pred) # True Positive tp0, tp1 = tp[y == 0], tp[y == 1] @@ -76,15 +76,15 @@ def plot_data(lda, X, y, y_pred, fig_index): # class 0: dots plt.plot(X0_tp[:, 0], X0_tp[:, 1], 'o', alpha=alpha, - color='red') + color='red', markeredgecolor='k') plt.plot(X0_fp[:, 0], X0_fp[:, 1], '*', alpha=alpha, - color='#990000') # dark red + color='#990000', markeredgecolor='k') # dark red # class 1: dots plt.plot(X1_tp[:, 0], X1_tp[:, 1], 'o', alpha=alpha, - color='blue') + color='blue', markeredgecolor='k') plt.plot(X1_fp[:, 0], X1_fp[:, 1], '*', alpha=alpha, - color='#000099') # dark blue + color='#000099', markeredgecolor='k') # dark blue # class 0 and 1 : areas nx, ny = 200, 100 @@ -100,9 +100,9 @@ def plot_data(lda, X, y, y_pred, fig_index): # means plt.plot(lda.means_[0][0], lda.means_[0][1], - 'o', color='black', markersize=10) + 'o', color='black', markersize=10, markeredgecolor='k') plt.plot(lda.means_[1][0], lda.means_[1][1], - 'o', color='black', markersize=10) + 'o', color='black', markersize=10, markeredgecolor='k') return splot @@ -114,7 +114,8 @@ def plot_ellipse(splot, mean, cov, color): angle = 180 * angle / np.pi # convert to degrees # filled Gaussian at 2 standard deviation ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5, - 180 + angle, facecolor=color, edgecolor='yellow', + 180 + angle, facecolor=color, + edgecolor='yellow', linewidth=2, zorder=2) ell.set_clip_box(splot.bbox) ell.set_alpha(0.5) @@ -132,7 +133,7 @@ def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') -############################################################################### +############################################################################## for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) @@ -147,5 +148,6 @@ def plot_qda_cov(qda, splot): splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) plt.axis('tight') -plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis') +plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant' +'Analysis') plt.show() From ff29fcc57110a6e7e8cecca83968a997071f3cef Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sun, 4 Jun 2017 06:24:32 -0400 Subject: [PATCH 09/16] examples/calibration/plot_calibration.py - matplotlib2.0 update --- examples/calibration/plot_calibration.py | 7 ++++--- examples/classification/plot_lda_qda.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index b38b25812bb7f..0b24ea60089a4 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -15,8 +15,8 @@ Compared are the estimated probability using a Gaussian naive Bayes classifier without calibration, with a sigmoid calibration, and with a non-parametric -isotonic calibration. One can observe that only the non-parametric model is able -to provide a probability calibration that returns probabilities close to the +isotonic calibration. One can observe that only the non-parametric model is +able to provide a probability calibration that returns probabilities close to the expected 0.5 for most of the samples belonging to the middle cluster with heterogeneous labels. This results in a significantly improved Brier score. """ @@ -91,7 +91,8 @@ for this_y, color in zip(y_unique, colors): this_X = X_train[y_train == this_y] this_sw = sw_train[y_train == this_y] - plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color, alpha=0.5, + plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color, + alpha=0.5, edgecolor='k', label="Class %s" % this_y) plt.legend(loc="best") plt.title("Data") diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index fcf95e03ed710..ffe4d409c61a1 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -149,5 +149,5 @@ def plot_qda_cov(qda, splot): plot_qda_cov(qda, splot) plt.axis('tight') plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant' -'Analysis') + 'Analysis') plt.show() From 278013b1ba804c2be654bf3faf233d427da58c90 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sun, 4 Jun 2017 06:29:36 -0400 Subject: [PATCH 10/16] examples/plot_johnson_lindenstrauss_bound.py - matplotlib2.0 update --- examples/plot_johnson_lindenstrauss_bound.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_johnson_lindenstrauss_bound.py b/examples/plot_johnson_lindenstrauss_bound.py index b2dc902c71c52..cc711f48d52c6 100644 --- a/examples/plot_johnson_lindenstrauss_bound.py +++ b/examples/plot_johnson_lindenstrauss_bound.py @@ -187,7 +187,7 @@ % (np.mean(rates), np.std(rates))) plt.figure() - plt.hist(rates, bins=50, normed=True, range=(0., 2.)) + plt.hist(rates, bins=50, normed=True, range=(0., 2.), edgecolor='k') plt.xlabel("Squared distances rate: projected / original") plt.ylabel("Distribution of samples pairs") plt.title("Histogram of pairwise distance rates for n_components=%d" % From 12a3c73308b049f0e5b9714a29cf6c21123bcb85 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 7 Jun 2017 11:07:35 -0400 Subject: [PATCH 11/16] flake8 changes --- examples/calibration/plot_calibration.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index 0b24ea60089a4..2cffe5075910a 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -16,9 +16,10 @@ Compared are the estimated probability using a Gaussian naive Bayes classifier without calibration, with a sigmoid calibration, and with a non-parametric isotonic calibration. One can observe that only the non-parametric model is -able to provide a probability calibration that returns probabilities close to the -expected 0.5 for most of the samples belonging to the middle cluster with -heterogeneous labels. This results in a significantly improved Brier score. +able to provide a probability calibration that returns probabilities close +to the expected 0.5 for most of the samples belonging to the middle +cluster with heterogeneous labels. This results in a significantly improved +Brier score. """ print(__doc__) From 3449cbdc01aa41d34b0a708184065bb32daa2131 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 8 Jun 2017 13:39:07 -0400 Subject: [PATCH 12/16] reversing changes for plot_kmeans_assumptions & plot_agglomerative_clustering --- examples/cluster/plot_agglomerative_clustering.py | 2 +- examples/cluster/plot_kmeans_assumptions.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/cluster/plot_agglomerative_clustering.py b/examples/cluster/plot_agglomerative_clustering.py index b6482c43e927b..dfb27d17d1a89 100644 --- a/examples/cluster/plot_agglomerative_clustering.py +++ b/examples/cluster/plot_agglomerative_clustering.py @@ -61,7 +61,7 @@ model.fit(X) elapsed_time = time.time() - t0 plt.scatter(X[:, 0], X[:, 1], c=model.labels_, - cmap=plt.cm.spectral, s=20, edgecolor='k') + cmap=plt.cm.spectral) plt.title('linkage=%s (time %.2fs)' % (linkage, elapsed_time), fontdict=dict(verticalalignment='top')) plt.axis('equal') diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index 9ff669b62f5ca..2ff04d523855a 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -30,7 +30,7 @@ y_pred = KMeans(n_clusters=2, random_state=random_state).fit_predict(X) plt.subplot(221) -plt.scatter(X[:, 0], X[:, 1], c=y_pred, s=20, edgecolor='k') +plt.scatter(X[:, 0], X[:, 1], c=y_pred) plt.title("Incorrect Number of Blobs") # Anisotropicly distributed data @@ -39,7 +39,7 @@ y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_aniso) plt.subplot(222) -plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred, s=20, edgecolor='k') +plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred) plt.title("Anisotropicly Distributed Blobs") # Different variance @@ -49,7 +49,7 @@ y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_varied) plt.subplot(223) -plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred, s=20, edgecolor='k') +plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred) plt.title("Unequal Variance") # Unevenly sized blobs @@ -58,7 +58,7 @@ random_state=random_state).fit_predict(X_filtered) plt.subplot(224) -plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred, s=20, edgecolor='k') +plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred) plt.title("Unevenly Sized Blobs") plt.show() From 3cf38d9bda449c6cf25f9fc7d1de2beb8b722802 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 10 Jun 2017 16:20:33 -0400 Subject: [PATCH 13/16] cluster/plot_cluster_iris.py - major fixes --- examples/cluster/plot_cluster_iris.py | 32 ++++++++++++--------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 2f23645403302..d043fe70089ae 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -25,8 +25,6 @@ import numpy as np import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D - from sklearn.cluster import KMeans from sklearn import datasets @@ -43,14 +41,11 @@ 'k_means_iris_bad_init': KMeans(n_clusters=3, n_init=1, init='random')} - +fig = plt.figure(figsize=(8, 6)) fignum = 1 +titles = ['3 clusters', '8 clusters', '3 clusters, bad initialization'] for name, est in estimators.items(): - fig = plt.figure(fignum, figsize=(4, 3)) - plt.clf() - ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) - - plt.cla() + ax = plt.subplot(2, 2, fignum, projection='3d') est.fit(X) labels = est.labels_ @@ -63,23 +58,20 @@ ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') + ax.set_title(titles[fignum-1]) + ax.dist = 12 fignum = fignum + 1 # Plot the ground truth -fig = plt.figure(fignum, figsize=(4, 3)) -plt.clf() -ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) - -plt.cla() - +ax = plt.subplot(2, 2, 4, projection='3d') for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: ax.text3D(X[y == label, 3].mean(), - X[y == label, 0].mean() + 1.5, - X[y == label, 2].mean(), name, + X[y == label, 0].mean(), + X[y == label, 2].mean() + 2, name, horizontalalignment='center', - bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) + bbox=dict(alpha=.2, edgecolor='w', facecolor='w')) # Reorder the labels to have colors matching the cluster results y = np.choose(y, [1, 2, 0]).astype(np.float) ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k') @@ -90,4 +82,8 @@ ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') -plt.show() +ax.set_title('Ground Truth') +ax.dist = 12 + +fig.tight_layout() +fig.show() From 3afd353f91bc3ce6c5cd0ca6dfb23bdb6cbb89de Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 10 Jun 2017 17:00:23 -0400 Subject: [PATCH 14/16] examples/cluster/plot_cluster_iris.py - flake8 fix --- examples/cluster/plot_cluster_iris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index d043fe70089ae..832452cbb5e9d 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -58,7 +58,7 @@ ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') - ax.set_title(titles[fignum-1]) + ax.set_title(titles[fignum - 1]) ax.dist = 12 fignum = fignum + 1 From 92c52e2aa12ec033da49c8035ce1cc64c04055cf Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sun, 11 Jun 2017 12:49:15 -0400 Subject: [PATCH 15/16] examples/cluster/plot_cluster_iris.py - 3d projection error fix --- examples/cluster/plot_cluster_iris.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 832452cbb5e9d..2756557241322 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -25,6 +25,9 @@ import numpy as np import matplotlib.pyplot as plt +# Though the following import is not directly being used, it is required +# for 3D projection to work +from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import KMeans from sklearn import datasets From 4c93954fc49701c68189abcd99d54423f7088202 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sun, 11 Jun 2017 13:52:37 -0400 Subject: [PATCH 16/16] cluster/plot_cluster_iris.py - elevation and azimuth setting --- examples/cluster/plot_cluster_iris.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 2756557241322..3266096b97bf7 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -48,7 +48,8 @@ fignum = 1 titles = ['3 clusters', '8 clusters', '3 clusters, bad initialization'] for name, est in estimators.items(): - ax = plt.subplot(2, 2, fignum, projection='3d') + ax = plt.subplot(2, 2, fignum, projection='3d', + elev=48, azim=134) est.fit(X) labels = est.labels_ @@ -66,7 +67,8 @@ fignum = fignum + 1 # Plot the ground truth -ax = plt.subplot(2, 2, 4, projection='3d') +ax = plt.subplot(2, 2, 4, projection='3d', + elev=48, azim=134) for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: