Skip to content

Commit adc28e7

Browse files
committed
Finalized version of benchmark plots
1 parent 74fb483 commit adc28e7

File tree

1 file changed

+35
-31
lines changed

1 file changed

+35
-31
lines changed

benchmarks/bench_plot_incremental_pca.py

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -28,37 +28,26 @@ def calc_time_and_err(tf, arr):
2828
return meas_time, meas_err
2929

3030

31-
def compute_feature_bench(arr, n_components, batch_size):
32-
print("===========================")
33-
print("Computing feature bench for n_components %i, batch_size %i" % (
34-
n_components, batch_size))
31+
def compute_pca_bench(arr, n_components):
3532
print("===========================")
36-
33+
print("Computing PCA bench for n_components %i" % n_components)
3734
pca = PCA(n_components=n_components)
3835
pca_time, pca_err = calc_time_and_err(pca, arr)
36+
return pca_time, pca_err
3937

40-
ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
41-
ipca_time, ipca_err = calc_time_and_err(ipca, arr)
4238

39+
def compute_rpca_bench(arr, n_components, random_seed=1999):
40+
print("===========================")
41+
print("Computing RandomizedPCA bench for n_components %i" % n_components)
4342
rpca = RandomizedPCA(n_components=n_components, random_state=1999)
4443
rpca_time, rpca_err = calc_time_and_err(rpca, arr)
45-
return pca_time, rpca_time, ipca_time, pca_err, rpca_err, ipca_err
46-
47-
48-
def compute_pca_bench(arr, n_components):
49-
print("===========================")
50-
print("Computing PCA bench for n_components %i" % n_components)
51-
print("===========================")
52-
pca = PCA(n_components=n_components)
53-
pca_time, pca_err = calc_time_and_err(pca, arr)
54-
return pca_time, pca_err
44+
return rpca_time, rpca_err
5545

5646

5747
def compute_ipca_bench(arr, n_components, batch_size):
5848
print("===========================")
5949
print("Computing IncrementalPCA bench for n_components %i, batch_size %i" % (
6050
n_components, batch_size))
61-
print("===========================")
6251

6352
ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
6453
ipca_time, ipca_err = calc_time_and_err(ipca, arr)
@@ -78,7 +67,7 @@ def plot_feature_times(all_pca_times, all_rpca_times, all_ipca_times,
7867
plt.ylabel("Time (seconds)")
7968

8069
ax = plt.gca()
81-
n_ticks = 11
70+
n_ticks = 10
8271
step_size = arr.shape[1] // 10
8372
ax.xaxis.set_major_locator(LinearLocator(numticks=n_ticks))
8473
ax.set_xticklabels(list(range(step_size, n_ticks * step_size - 1,
@@ -98,26 +87,28 @@ def plot_feature_errors(all_pca_err, all_rpca_err, all_ipca_err,
9887
plt.ylabel("Mean absolute error")
9988

10089
ax = plt.gca()
101-
n_ticks = 11
90+
n_ticks = 10
10291
step_size = arr.shape[1] // 10
10392
ax.xaxis.set_major_locator(LinearLocator(numticks=n_ticks))
10493
ax.set_xticklabels(list(range(step_size, n_ticks * step_size - 1,
10594
step_size)))
10695

10796

108-
def plot_batch_times(all_pca_times, all_ipca_times, n_features, arr):
97+
def plot_batch_times(all_pca_times, all_rpca_times, all_ipca_times,
98+
n_features, arr):
10999
plt.figure()
110100
plt.plot(all_pca_times, label="PCA")
101+
plt.plot(all_rpca_times, label="RandomizedPCA")
111102
plt.plot(all_ipca_times, label="IncrementalPCA")
112103
plt.legend(loc="lower left")
113-
plt.suptitle("Algorithm runtime vs. batch_size %i\n \
104+
plt.suptitle("Algorithm runtime vs. batch_size for n_features %i\n \
114105
Low-rank matix (effective_rank 10), size %i x %i" % (
115106
n_features, arr.shape[0], arr.shape[1]))
116107
plt.xlabel("Batch size")
117108
plt.ylabel("Time (seconds)")
118109

119110
ax = plt.gca()
120-
n_ticks = 11
111+
n_ticks = 10
121112
step_size = arr.shape[0] // 10
122113
ax.xaxis.set_major_locator(LinearLocator(numticks=n_ticks))
123114
ax.set_xticklabels(list(range(step_size, n_ticks * step_size - 1,
@@ -136,10 +127,11 @@ def plot_batch_errors(all_pca_err, all_ipca_err, n_features, arr):
136127
plt.ylabel("Mean absolute error")
137128

138129
ax = plt.gca()
139-
n_ticks = 11
130+
n_ticks = 10
140131
step_size = arr.shape[0] // 10
141132
ax.xaxis.set_major_locator(LinearLocator(numticks=n_ticks))
142-
ax.set_xticklabels(list(range(1, n_ticks * step_size - 1, step_size)))
133+
ax.set_xticklabels(list(range(step_size, n_ticks * step_size - 1,
134+
step_size)))
143135

144136

145137
def fixed_batch_size_comparison(arr):
@@ -155,8 +147,9 @@ def fixed_batch_size_comparison(arr):
155147
bs = 1000
156148
# Compare runtimes and error for fixed batch size
157149
for nf in all_features:
158-
(pca_time, rpca_time, ipca_time, pca_err,
159-
rpca_err, ipca_err) = compute_feature_bench(arr, nf, bs)
150+
pca_time, pca_err = compute_pca_bench(arr, nf)
151+
rpca_time, rpca_err = compute_rpca_bench(arr, nf)
152+
ipca_time, ipca_err = compute_ipca_bench(arr, nf, bs)
160153
all_pca_times.append(pca_time)
161154
all_rpca_times.append(rpca_time)
162155
all_ipca_times.append(ipca_time)
@@ -177,7 +170,7 @@ def fixed_batch_size_comparison(arr):
177170

178171
def variable_batch_size_comparison(arr):
179172
batch_sizes = list(map(int, np.linspace(arr.shape[0] // 10, arr.shape[0],
180-
num=5)))
173+
num=10)))
181174
all_features = list(map(int, np.linspace(arr.shape[1] // 10, arr.shape[1],
182175
num=4)))
183176
for nf in all_features:
@@ -187,6 +180,12 @@ def variable_batch_size_comparison(arr):
187180
all_pca_times.extend([pca_time] * len(batch_sizes))
188181
all_pca_err.extend([pca_err] * len(batch_sizes))
189182

183+
all_rpca_times = []
184+
all_rpca_err = []
185+
rpca_time, rpca_err = compute_rpca_bench(arr, nf)
186+
all_rpca_times.extend([rpca_time] * len(batch_sizes))
187+
all_rpca_err.extend([rpca_err] * len(batch_sizes))
188+
190189
all_ipca_times = []
191190
all_ipca_err = []
192191
for bs in batch_sizes:
@@ -195,14 +194,19 @@ def variable_batch_size_comparison(arr):
195194
all_ipca_err.append(ipca_err)
196195

197196
all_pca_times = np.array(all_pca_times)
198-
all_ipca_times = np.array(all_ipca_times)
199197
all_pca_err = np.array(all_pca_err)
198+
199+
all_rpca_times = np.array(all_rpca_times)
200+
all_rpca_err = np.array(all_rpca_err)
201+
202+
all_ipca_times = np.array(all_ipca_times)
200203
all_ipca_err = np.array(all_ipca_err)
201204

202-
plot_batch_times(all_pca_times, all_ipca_times, nf, arr)
205+
plot_batch_times(all_pca_times, all_rpca_times, all_ipca_times, nf, arr)
206+
# RandomizePCA error is always worse (approx 100x) than other PCA tests
203207
plot_batch_errors(all_pca_err, all_ipca_err, nf, arr)
204208

205-
faces = fetch_lfw_people(resize=.15, min_faces_per_person=5)
209+
faces = fetch_lfw_people(resize=.2, min_faces_per_person=5)
206210
# limit dataset to 5000 people (don't care who they are!)
207211
X = faces.data[:5000]
208212
n_samples, h, w = faces.images.shape

0 commit comments

Comments
 (0)