@@ -28,37 +28,26 @@ def calc_time_and_err(tf, arr):
28
28
return meas_time , meas_err
29
29
30
30
31
- def compute_feature_bench (arr , n_components , batch_size ):
32
- print ("===========================" )
33
- print ("Computing feature bench for n_components %i, batch_size %i" % (
34
- n_components , batch_size ))
31
+ def compute_pca_bench (arr , n_components ):
35
32
print ("===========================" )
36
-
33
+ print ( "Computing PCA bench for n_components %i" % n_components )
37
34
pca = PCA (n_components = n_components )
38
35
pca_time , pca_err = calc_time_and_err (pca , arr )
36
+ return pca_time , pca_err
39
37
40
- ipca = IncrementalPCA (n_components = n_components , batch_size = batch_size )
41
- ipca_time , ipca_err = calc_time_and_err (ipca , arr )
42
38
39
+ def compute_rpca_bench (arr , n_components , random_seed = 1999 ):
40
+ print ("===========================" )
41
+ print ("Computing RandomizedPCA bench for n_components %i" % n_components )
43
42
rpca = RandomizedPCA (n_components = n_components , random_state = 1999 )
44
43
rpca_time , rpca_err = calc_time_and_err (rpca , arr )
45
- return pca_time , rpca_time , ipca_time , pca_err , rpca_err , ipca_err
46
-
47
-
48
- def compute_pca_bench (arr , n_components ):
49
- print ("===========================" )
50
- print ("Computing PCA bench for n_components %i" % n_components )
51
- print ("===========================" )
52
- pca = PCA (n_components = n_components )
53
- pca_time , pca_err = calc_time_and_err (pca , arr )
54
- return pca_time , pca_err
44
+ return rpca_time , rpca_err
55
45
56
46
57
47
def compute_ipca_bench (arr , n_components , batch_size ):
58
48
print ("===========================" )
59
49
print ("Computing IncrementalPCA bench for n_components %i, batch_size %i" % (
60
50
n_components , batch_size ))
61
- print ("===========================" )
62
51
63
52
ipca = IncrementalPCA (n_components = n_components , batch_size = batch_size )
64
53
ipca_time , ipca_err = calc_time_and_err (ipca , arr )
@@ -78,7 +67,7 @@ def plot_feature_times(all_pca_times, all_rpca_times, all_ipca_times,
78
67
plt .ylabel ("Time (seconds)" )
79
68
80
69
ax = plt .gca ()
81
- n_ticks = 11
70
+ n_ticks = 10
82
71
step_size = arr .shape [1 ] // 10
83
72
ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
84
73
ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
@@ -98,26 +87,28 @@ def plot_feature_errors(all_pca_err, all_rpca_err, all_ipca_err,
98
87
plt .ylabel ("Mean absolute error" )
99
88
100
89
ax = plt .gca ()
101
- n_ticks = 11
90
+ n_ticks = 10
102
91
step_size = arr .shape [1 ] // 10
103
92
ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
104
93
ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
105
94
step_size )))
106
95
107
96
108
- def plot_batch_times (all_pca_times , all_ipca_times , n_features , arr ):
97
+ def plot_batch_times (all_pca_times , all_rpca_times , all_ipca_times ,
98
+ n_features , arr ):
109
99
plt .figure ()
110
100
plt .plot (all_pca_times , label = "PCA" )
101
+ plt .plot (all_rpca_times , label = "RandomizedPCA" )
111
102
plt .plot (all_ipca_times , label = "IncrementalPCA" )
112
103
plt .legend (loc = "lower left" )
113
- plt .suptitle ("Algorithm runtime vs. batch_size %i\n \
104
+ plt .suptitle ("Algorithm runtime vs. batch_size for n_features %i\n \
114
105
Low-rank matix (effective_rank 10), size %i x %i" % (
115
106
n_features , arr .shape [0 ], arr .shape [1 ]))
116
107
plt .xlabel ("Batch size" )
117
108
plt .ylabel ("Time (seconds)" )
118
109
119
110
ax = plt .gca ()
120
- n_ticks = 11
111
+ n_ticks = 10
121
112
step_size = arr .shape [0 ] // 10
122
113
ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
123
114
ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
@@ -136,10 +127,11 @@ def plot_batch_errors(all_pca_err, all_ipca_err, n_features, arr):
136
127
plt .ylabel ("Mean absolute error" )
137
128
138
129
ax = plt .gca ()
139
- n_ticks = 11
130
+ n_ticks = 10
140
131
step_size = arr .shape [0 ] // 10
141
132
ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
142
- ax .set_xticklabels (list (range (1 , n_ticks * step_size - 1 , step_size )))
133
+ ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
134
+ step_size )))
143
135
144
136
145
137
def fixed_batch_size_comparison (arr ):
@@ -155,8 +147,9 @@ def fixed_batch_size_comparison(arr):
155
147
bs = 1000
156
148
# Compare runtimes and error for fixed batch size
157
149
for nf in all_features :
158
- (pca_time , rpca_time , ipca_time , pca_err ,
159
- rpca_err , ipca_err ) = compute_feature_bench (arr , nf , bs )
150
+ pca_time , pca_err = compute_pca_bench (arr , nf )
151
+ rpca_time , rpca_err = compute_rpca_bench (arr , nf )
152
+ ipca_time , ipca_err = compute_ipca_bench (arr , nf , bs )
160
153
all_pca_times .append (pca_time )
161
154
all_rpca_times .append (rpca_time )
162
155
all_ipca_times .append (ipca_time )
@@ -177,7 +170,7 @@ def fixed_batch_size_comparison(arr):
177
170
178
171
def variable_batch_size_comparison (arr ):
179
172
batch_sizes = list (map (int , np .linspace (arr .shape [0 ] // 10 , arr .shape [0 ],
180
- num = 5 )))
173
+ num = 10 )))
181
174
all_features = list (map (int , np .linspace (arr .shape [1 ] // 10 , arr .shape [1 ],
182
175
num = 4 )))
183
176
for nf in all_features :
@@ -187,6 +180,12 @@ def variable_batch_size_comparison(arr):
187
180
all_pca_times .extend ([pca_time ] * len (batch_sizes ))
188
181
all_pca_err .extend ([pca_err ] * len (batch_sizes ))
189
182
183
+ all_rpca_times = []
184
+ all_rpca_err = []
185
+ rpca_time , rpca_err = compute_rpca_bench (arr , nf )
186
+ all_rpca_times .extend ([rpca_time ] * len (batch_sizes ))
187
+ all_rpca_err .extend ([rpca_err ] * len (batch_sizes ))
188
+
190
189
all_ipca_times = []
191
190
all_ipca_err = []
192
191
for bs in batch_sizes :
@@ -195,14 +194,19 @@ def variable_batch_size_comparison(arr):
195
194
all_ipca_err .append (ipca_err )
196
195
197
196
all_pca_times = np .array (all_pca_times )
198
- all_ipca_times = np .array (all_ipca_times )
199
197
all_pca_err = np .array (all_pca_err )
198
+
199
+ all_rpca_times = np .array (all_rpca_times )
200
+ all_rpca_err = np .array (all_rpca_err )
201
+
202
+ all_ipca_times = np .array (all_ipca_times )
200
203
all_ipca_err = np .array (all_ipca_err )
201
204
202
- plot_batch_times (all_pca_times , all_ipca_times , nf , arr )
205
+ plot_batch_times (all_pca_times , all_rpca_times , all_ipca_times , nf , arr )
206
+ # RandomizePCA error is always worse (approx 100x) than other PCA tests
203
207
plot_batch_errors (all_pca_err , all_ipca_err , nf , arr )
204
208
205
- faces = fetch_lfw_people (resize = .15 , min_faces_per_person = 5 )
209
+ faces = fetch_lfw_people (resize = .2 , min_faces_per_person = 5 )
206
210
# limit dataset to 5000 people (don't care who they are!)
207
211
X = faces .data [:5000 ]
208
212
n_samples , h , w = faces .images .shape
0 commit comments