Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions examples/calibration/plot_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@

Compared are the estimated probability using a Gaussian naive Bayes classifier
without calibration, with a sigmoid calibration, and with a non-parametric
isotonic calibration. One can observe that only the non-parametric model is able
to provide a probability calibration that returns probabilities close to the
expected 0.5 for most of the samples belonging to the middle cluster with
heterogeneous labels. This results in a significantly improved Brier score.
isotonic calibration. One can observe that only the non-parametric model is
able to provide a probability calibration that returns probabilities close
to the expected 0.5 for most of the samples belonging to the middle
cluster with heterogeneous labels. This results in a significantly improved
Brier score.
"""
print(__doc__)

Expand Down Expand Up @@ -91,7 +92,8 @@
for this_y, color in zip(y_unique, colors):
this_X = X_train[y_train == this_y]
this_sw = sw_train[y_train == this_y]
plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color, alpha=0.5,
plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color,
alpha=0.5, edgecolor='k',
label="Class %s" % this_y)
plt.legend(loc="best")
plt.title("Data")
Expand Down
22 changes: 12 additions & 10 deletions examples/classification/plot_lda_qda.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def plot_data(lda, X, y, y_pred, fig_index):
splot = plt.subplot(2, 2, fig_index)
if fig_index == 1:
plt.title('Linear Discriminant Analysis')
plt.ylabel('Data with fixed covariance')
plt.ylabel('Data with\n fixed covariance')
elif fig_index == 2:
plt.title('Quadratic Discriminant Analysis')
elif fig_index == 3:
plt.ylabel('Data with varying covariances')
plt.ylabel('Data with\n varying covariances')

tp = (y == y_pred) # True Positive
tp0, tp1 = tp[y == 0], tp[y == 1]
Expand All @@ -76,15 +76,15 @@ def plot_data(lda, X, y, y_pred, fig_index):

# class 0: dots
plt.plot(X0_tp[:, 0], X0_tp[:, 1], 'o', alpha=alpha,
color='red')
color='red', markeredgecolor='k')
plt.plot(X0_fp[:, 0], X0_fp[:, 1], '*', alpha=alpha,
color='#990000') # dark red
color='#990000', markeredgecolor='k') # dark red

# class 1: dots
plt.plot(X1_tp[:, 0], X1_tp[:, 1], 'o', alpha=alpha,
color='blue')
color='blue', markeredgecolor='k')
plt.plot(X1_fp[:, 0], X1_fp[:, 1], '*', alpha=alpha,
color='#000099') # dark blue
color='#000099', markeredgecolor='k') # dark blue

# class 0 and 1 : areas
nx, ny = 200, 100
Expand All @@ -100,9 +100,9 @@ def plot_data(lda, X, y, y_pred, fig_index):

# means
plt.plot(lda.means_[0][0], lda.means_[0][1],
'o', color='black', markersize=10)
'o', color='black', markersize=10, markeredgecolor='k')
plt.plot(lda.means_[1][0], lda.means_[1][1],
'o', color='black', markersize=10)
'o', color='black', markersize=10, markeredgecolor='k')

return splot

Expand All @@ -114,7 +114,8 @@ def plot_ellipse(splot, mean, cov, color):
angle = 180 * angle / np.pi # convert to degrees
# filled Gaussian at 2 standard deviation
ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
180 + angle, facecolor=color, edgecolor='yellow',
180 + angle, facecolor=color,
edgecolor='yellow',
linewidth=2, zorder=2)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.5)
Expand Down Expand Up @@ -146,5 +147,6 @@ def plot_qda_cov(qda, splot):
splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
plot_qda_cov(qda, splot)
plt.axis('tight')
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis')
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant'
'Analysis')
plt.show()
16 changes: 8 additions & 8 deletions examples/cluster/plot_birch_vs_minibatchkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@

# Generate blobs to do a comparison between MiniBatchKMeans and Birch.
X, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)


# Use all colors that matplotlib provides by default.
colors_ = cycle(colors.cnames.keys())
Expand Down Expand Up @@ -69,11 +68,11 @@
ax = fig.add_subplot(1, 3, ind + 1)
for this_centroid, k, col in zip(centroids, range(n_clusters), colors_):
mask = labels == k
ax.plot(X[mask, 0], X[mask, 1], 'w',
markerfacecolor=col, marker='.')
ax.scatter(X[mask, 0], X[mask, 1],
c='w', edgecolor=col, marker='.', alpha=0.5)
if birch_model.n_clusters is None:
ax.plot(this_centroid[0], this_centroid[1], '+', markerfacecolor=col,
markeredgecolor='k', markersize=5)
ax.scatter(this_centroid[0], this_centroid[1], marker='+',
c='k', s=25)
ax.set_ylim([-25, 25])
ax.set_xlim([-25, 25])
ax.set_autoscaley_on(False)
Expand All @@ -93,9 +92,10 @@
for this_centroid, k, col in zip(mbk.cluster_centers_,
range(n_clusters), colors_):
mask = mbk.labels_ == k
ax.plot(X[mask, 0], X[mask, 1], 'w', markerfacecolor=col, marker='.')
ax.plot(this_centroid[0], this_centroid[1], '+', markeredgecolor='k',
markersize=5)
ax.scatter(X[mask, 0], X[mask, 1], marker='.',
c='w', edgecolor=col, alpha=0.5)
ax.scatter(this_centroid[0], this_centroid[1], marker='+',
c='k', s=25)
ax.set_xlim([-25, 25])
ax.set_ylim([-25, 25])
ax.set_title("MiniBatchKMeans")
Expand Down
40 changes: 21 additions & 19 deletions examples/cluster/plot_cluster_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@

import numpy as np
import matplotlib.pyplot as plt
# Though the following import is not directly being used, it is required
# for 3D projection to work
from mpl_toolkits.mplot3d import Axes3D


from sklearn.cluster import KMeans
from sklearn import datasets

Expand All @@ -43,50 +44,51 @@
'k_means_iris_bad_init': KMeans(n_clusters=3, n_init=1,
init='random')}


fig = plt.figure(figsize=(8, 6))
fignum = 1
titles = ['3 clusters', '8 clusters', '3 clusters, bad initialization']
for name, est in estimators.items():
fig = plt.figure(fignum, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

plt.cla()
ax = plt.subplot(2, 2, fignum, projection='3d',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which matplotlib version does this require?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually 1.5 requires this. Issue was that I wasn't importing Axes3D because it wasn't being used directly. But I found later that this projection=3d requires Axes3D imported. So it worked after adding that import. All good now.

elev=48, azim=134)
est.fit(X)
labels = est.labels_

ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(np.float))
ax.scatter(X[:, 3], X[:, 0], X[:, 2],
c=labels.astype(np.float), edgecolor='k')

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Petal width')
ax.set_ylabel('Sepal length')
ax.set_zlabel('Petal length')
ax.set_title(titles[fignum - 1])
ax.dist = 12
fignum = fignum + 1

# Plot the ground truth
fig = plt.figure(fignum, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

plt.cla()

ax = plt.subplot(2, 2, 4, projection='3d',
elev=48, azim=134)
for name, label in [('Setosa', 0),
('Versicolour', 1),
('Virginica', 2)]:
ax.text3D(X[y == label, 3].mean(),
X[y == label, 0].mean() + 1.5,
X[y == label, 2].mean(), name,
X[y == label, 0].mean(),
X[y == label, 2].mean() + 2, name,
horizontalalignment='center',
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
bbox=dict(alpha=.2, edgecolor='w', facecolor='w'))
# Reorder the labels to have colors matching the cluster results
y = np.choose(y, [1, 2, 0]).astype(np.float)
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y)
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k')

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Petal width')
ax.set_ylabel('Sepal length')
ax.set_zlabel('Petal length')
plt.show()
ax.set_title('Ground Truth')
ax.dist = 12

fig.tight_layout()
fig.show()
5 changes: 3 additions & 2 deletions examples/cluster/plot_kmeans_assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
plt.title("Incorrect Number of Blobs")

# Anisotropicly distributed data
transformation = [[ 0.60834549, -0.63667341], [-0.40887718, 0.85253229]]
transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]]
X_aniso = np.dot(X, transformation)
y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_aniso)

Expand All @@ -54,7 +54,8 @@

# Unevenly sized blobs
X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10]))
y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_filtered)
y_pred = KMeans(n_clusters=3,
random_state=random_state).fit_predict(X_filtered)

plt.subplot(224)
plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred)
Expand Down
9 changes: 5 additions & 4 deletions examples/cluster/plot_kmeans_silhouette_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,17 @@
# 2nd Plot showing the actual clusters formed
colors = cm.spectral(cluster_labels.astype(float) / n_clusters)
ax2.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7,
c=colors)
c=colors, edgecolor='k')

# Labeling the clusters
centers = clusterer.cluster_centers_
# Draw white circles at cluster centers
ax2.scatter(centers[:, 0], centers[:, 1],
marker='o', c="white", alpha=1, s=200)
ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
c="white", alpha=1, s=200, edgecolor='k')

for i, c in enumerate(centers):
ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50)
ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
s=50, edgecolor='k')

ax2.set_title("The visualization of the clustered data.")
ax2.set_xlabel("Feature space for the 1st feature")
Expand Down
10 changes: 6 additions & 4 deletions examples/cluster/plot_ward_structured_vs_unstructured.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@
ax = p3.Axes3D(fig)
ax.view_init(7, -80)
for l in np.unique(label):
ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
'o', color=plt.cm.jet(np.float(l) / np.max(label + 1)))
ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
color=plt.cm.jet(np.float(l) / np.max(label + 1)),
s=20, edgecolor='k')
plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time)


Expand All @@ -84,8 +85,9 @@
ax = p3.Axes3D(fig)
ax.view_init(7, -80)
for l in np.unique(label):
ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
'o', color=plt.cm.jet(float(l) / np.max(label + 1)))
ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
color=plt.cm.jet(float(l) / np.max(label + 1)),
s=20, edgecolor='k')
plt.title('With connectivity constraints (time %.2fs)' % elapsed_time)

plt.show()
6 changes: 4 additions & 2 deletions examples/covariance/plot_outlier_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,10 @@
linewidths=2, colors='red')
subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
colors='orange')
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white')
c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black')
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
s=20, edgecolor='k')
c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
s=20, edgecolor='k')
subplot.axis('tight')
subplot.legend(
[a.collections[0], b, c],
Expand Down
2 changes: 1 addition & 1 deletion examples/plot_johnson_lindenstrauss_bound.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
% (np.mean(rates), np.std(rates)))

plt.figure()
plt.hist(rates, bins=50, normed=True, range=(0., 2.))
plt.hist(rates, bins=50, normed=True, range=(0., 2.), edgecolor='k')
plt.xlabel("Squared distances rate: projected / original")
plt.ylabel("Distribution of samples pairs")
plt.title("Histogram of pairwise distance rates for n_components=%d" %
Expand Down