scikit-learn · raghavrv · Jun 21, 2017 · Jun 4, 2017 · Jun 4, 2017 · Jun 4, 2017
diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py
@@ -15,10 +15,11 @@
 
 Compared are the estimated probability using a Gaussian naive Bayes classifier
 without calibration, with a sigmoid calibration, and with a non-parametric
-isotonic calibration. One can observe that only the non-parametric model is able
-to provide a probability calibration that returns probabilities close to the
-expected 0.5 for most of the samples belonging to the middle cluster with
-heterogeneous labels. This results in a significantly improved Brier score.
+isotonic calibration. One can observe that only the non-parametric model is
+able to provide a probability calibration that returns probabilities close
+to the expected 0.5 for most of the samples belonging to the middle
+cluster with heterogeneous labels. This results in a significantly improved
+Brier score.
 """
 print(__doc__)
 
@@ -91,7 +92,8 @@
 for this_y, color in zip(y_unique, colors):
     this_X = X_train[y_train == this_y]
     this_sw = sw_train[y_train == this_y]
-    plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color, alpha=0.5,
+    plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color,
+                alpha=0.5, edgecolor='k',
                 label="Class %s" % this_y)
 plt.legend(loc="best")
 plt.title("Data")

diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py
@@ -60,11 +60,11 @@ def plot_data(lda, X, y, y_pred, fig_index):
     splot = plt.subplot(2, 2, fig_index)
     if fig_index == 1:
         plt.title('Linear Discriminant Analysis')
-        plt.ylabel('Data with fixed covariance')
+        plt.ylabel('Data with\n fixed covariance')
     elif fig_index == 2:
         plt.title('Quadratic Discriminant Analysis')
     elif fig_index == 3:
-        plt.ylabel('Data with varying covariances')
+        plt.ylabel('Data with\n varying covariances')
 
     tp = (y == y_pred)  # True Positive
     tp0, tp1 = tp[y == 0], tp[y == 1]
@@ -76,15 +76,15 @@ def plot_data(lda, X, y, y_pred, fig_index):
 
     # class 0: dots
     plt.plot(X0_tp[:, 0], X0_tp[:, 1], 'o', alpha=alpha,
-             color='red')
+             color='red', markeredgecolor='k')
     plt.plot(X0_fp[:, 0], X0_fp[:, 1], '*', alpha=alpha,
-             color='#990000')  # dark red
+             color='#990000', markeredgecolor='k')  # dark red
 
     # class 1: dots
     plt.plot(X1_tp[:, 0], X1_tp[:, 1], 'o', alpha=alpha,
-             color='blue')
+             color='blue', markeredgecolor='k')
     plt.plot(X1_fp[:, 0], X1_fp[:, 1], '*', alpha=alpha,
-             color='#000099')  # dark blue
+             color='#000099', markeredgecolor='k')  # dark blue
 
     # class 0 and 1 : areas
     nx, ny = 200, 100
@@ -100,9 +100,9 @@ def plot_data(lda, X, y, y_pred, fig_index):
 
     # means
     plt.plot(lda.means_[0][0], lda.means_[0][1],
-             'o', color='black', markersize=10)
+             'o', color='black', markersize=10, markeredgecolor='k')
     plt.plot(lda.means_[1][0], lda.means_[1][1],
-             'o', color='black', markersize=10)
+             'o', color='black', markersize=10, markeredgecolor='k')
 
     return splot
 
@@ -114,7 +114,8 @@ def plot_ellipse(splot, mean, cov, color):
     angle = 180 * angle / np.pi  # convert to degrees
     # filled Gaussian at 2 standard deviation
     ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
-                              180 + angle, facecolor=color, edgecolor='yellow',
+                              180 + angle, facecolor=color,
+                              edgecolor='yellow',
                               linewidth=2, zorder=2)
     ell.set_clip_box(splot.bbox)
     ell.set_alpha(0.5)
@@ -146,5 +147,6 @@ def plot_qda_cov(qda, splot):
     splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
     plot_qda_cov(qda, splot)
     plt.axis('tight')
-plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis')
+plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant'
+             'Analysis')
 plt.show()
diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py
@@ -39,7 +39,6 @@
 
 # Generate blobs to do a comparison between MiniBatchKMeans and Birch.
 X, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)
-
 
 # Use all colors that matplotlib provides by default.
 colors_ = cycle(colors.cnames.keys())
@@ -69,11 +68,11 @@
     ax = fig.add_subplot(1, 3, ind + 1)
     for this_centroid, k, col in zip(centroids, range(n_clusters), colors_):
         mask = labels == k
-        ax.plot(X[mask, 0], X[mask, 1], 'w',
-                markerfacecolor=col, marker='.')
+        ax.scatter(X[mask, 0], X[mask, 1],
+                   c='w', edgecolor=col, marker='.', alpha=0.5)
         if birch_model.n_clusters is None:
-            ax.plot(this_centroid[0], this_centroid[1], '+', markerfacecolor=col,
-                    markeredgecolor='k', markersize=5)
+            ax.scatter(this_centroid[0], this_centroid[1], marker='+',
+                       c='k', s=25)
     ax.set_ylim([-25, 25])
     ax.set_xlim([-25, 25])
     ax.set_autoscaley_on(False)
@@ -93,9 +92,10 @@
 for this_centroid, k, col in zip(mbk.cluster_centers_,
                                  range(n_clusters), colors_):
     mask = mbk.labels_ == k
-    ax.plot(X[mask, 0], X[mask, 1], 'w', markerfacecolor=col, marker='.')
-    ax.plot(this_centroid[0], this_centroid[1], '+', markeredgecolor='k',
-            markersize=5)
+    ax.scatter(X[mask, 0], X[mask, 1], marker='.',
+               c='w', edgecolor=col, alpha=0.5)
+    ax.scatter(this_centroid[0], this_centroid[1], marker='+',
+               c='k', s=25)
 ax.set_xlim([-25, 25])
 ax.set_ylim([-25, 25])
 ax.set_title("MiniBatchKMeans")

diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py
@@ -25,9 +25,10 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
+# Though the following import is not directly being used, it is required
+# for 3D projection to work
 from mpl_toolkits.mplot3d import Axes3D
 
-
 from sklearn.cluster import KMeans
 from sklearn import datasets
 
@@ -43,50 +44,51 @@
               'k_means_iris_bad_init': KMeans(n_clusters=3, n_init=1,
                                               init='random')}
 
-
+fig = plt.figure(figsize=(8, 6))
 fignum = 1
+titles = ['3 clusters', '8 clusters', '3 clusters, bad initialization']
 for name, est in estimators.items():
-    fig = plt.figure(fignum, figsize=(4, 3))
-    plt.clf()
-    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
-
-    plt.cla()
+    ax = plt.subplot(2, 2, fignum, projection='3d',
+                     elev=48, azim=134)
     est.fit(X)
     labels = est.labels_
 
-    ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(np.float))
+    ax.scatter(X[:, 3], X[:, 0], X[:, 2],
+               c=labels.astype(np.float), edgecolor='k')
 
     ax.w_xaxis.set_ticklabels([])
     ax.w_yaxis.set_ticklabels([])
     ax.w_zaxis.set_ticklabels([])
     ax.set_xlabel('Petal width')
     ax.set_ylabel('Sepal length')
     ax.set_zlabel('Petal length')
+    ax.set_title(titles[fignum - 1])
+    ax.dist = 12
     fignum = fignum + 1
 
 # Plot the ground truth
-fig = plt.figure(fignum, figsize=(4, 3))
-plt.clf()
-ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
-
-plt.cla()
-
+ax = plt.subplot(2, 2, 4, projection='3d',
+                 elev=48, azim=134)
 for name, label in [('Setosa', 0),
                     ('Versicolour', 1),
                     ('Virginica', 2)]:
     ax.text3D(X[y == label, 3].mean(),
-              X[y == label, 0].mean() + 1.5,
-              X[y == label, 2].mean(), name,
+              X[y == label, 0].mean(),
+              X[y == label, 2].mean() + 2, name,
               horizontalalignment='center',
-              bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
+              bbox=dict(alpha=.2, edgecolor='w', facecolor='w'))
 # Reorder the labels to have colors matching the cluster results
 y = np.choose(y, [1, 2, 0]).astype(np.float)
-ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y)
+ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k')
 
 ax.w_xaxis.set_ticklabels([])
 ax.w_yaxis.set_ticklabels([])
 ax.w_zaxis.set_ticklabels([])
 ax.set_xlabel('Petal width')
 ax.set_ylabel('Sepal length')
 ax.set_zlabel('Petal length')
-plt.show()
+ax.set_title('Ground Truth')
+ax.dist = 12
+
+fig.tight_layout()
+fig.show()
diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py
@@ -34,7 +34,7 @@
 plt.title("Incorrect Number of Blobs")
 
 # Anisotropicly distributed data
-transformation = [[ 0.60834549, -0.63667341], [-0.40887718, 0.85253229]]
+transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]]
 X_aniso = np.dot(X, transformation)
 y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_aniso)
 
@@ -54,7 +54,8 @@
 
 # Unevenly sized blobs
 X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10]))
-y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_filtered)
+y_pred = KMeans(n_clusters=3,
+                random_state=random_state).fit_predict(X_filtered)
 
 plt.subplot(224)
 plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred)

diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py
@@ -119,16 +119,17 @@
     # 2nd Plot showing the actual clusters formed
     colors = cm.spectral(cluster_labels.astype(float) / n_clusters)
     ax2.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7,
-                c=colors)
+                c=colors, edgecolor='k')
 
     # Labeling the clusters
     centers = clusterer.cluster_centers_
     # Draw white circles at cluster centers
-    ax2.scatter(centers[:, 0], centers[:, 1],
-                marker='o', c="white", alpha=1, s=200)
+    ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
+                c="white", alpha=1, s=200, edgecolor='k')
 
     for i, c in enumerate(centers):
-        ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50)
+        ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
+                    s=50, edgecolor='k')
 
     ax2.set_title("The visualization of the clustered data.")
     ax2.set_xlabel("Feature space for the 1st feature")

diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py
@@ -57,8 +57,9 @@
 ax = p3.Axes3D(fig)
 ax.view_init(7, -80)
 for l in np.unique(label):
-    ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
-              'o', color=plt.cm.jet(np.float(l) / np.max(label + 1)))
+    ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
+               color=plt.cm.jet(np.float(l) / np.max(label + 1)),
+               s=20, edgecolor='k')
 plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time)
 
 
@@ -84,8 +85,9 @@
 ax = p3.Axes3D(fig)
 ax.view_init(7, -80)
 for l in np.unique(label):
-    ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
-              'o', color=plt.cm.jet(float(l) / np.max(label + 1)))
+    ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
+               color=plt.cm.jet(float(l) / np.max(label + 1)),
+               s=20, edgecolor='k')
 plt.title('With connectivity constraints (time %.2fs)' % elapsed_time)
 
 plt.show()
diff --git a/examples/covariance/plot_outlier_detection.py b/examples/covariance/plot_outlier_detection.py
@@ -107,8 +107,10 @@
                             linewidths=2, colors='red')
         subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
                          colors='orange')
-        b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white')
-        c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black')
+        b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
+                            s=20, edgecolor='k')
+        c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
+                            s=20, edgecolor='k')
         subplot.axis('tight')
         subplot.legend(
             [a.collections[0], b, c],

diff --git a/examples/plot_johnson_lindenstrauss_bound.py b/examples/plot_johnson_lindenstrauss_bound.py
@@ -187,7 +187,7 @@
           % (np.mean(rates), np.std(rates)))
 
     plt.figure()
-    plt.hist(rates, bins=50, normed=True, range=(0., 2.))
+    plt.hist(rates, bins=50, normed=True, range=(0., 2.), edgecolor='k')
     plt.xlabel("Squared distances rate: projected / original")
     plt.ylabel("Distribution of samples pairs")
     plt.title("Histogram of pairwise distance rates for n_components=%d" %