@@ -134,21 +134,6 @@ struct option_struct {
134
134
}
135
135
};
136
136
137
- double actual (const benchmarker& feature) {
138
- return feature.stage1 .best .elapsed_ns () / feature.stats ->blocks ;
139
- }
140
- double diff (const benchmarker& feature, const benchmarker& struct7) {
141
- if (feature.stats ->blocks == struct7.stats ->blocks ) {
142
- return (feature.stage1 .best .elapsed_ns () - struct7.stage1 .best .elapsed_ns ()) / struct7.stats ->blocks ;
143
- } else {
144
- return (feature.stage1 .best .elapsed_ns () / feature.stats ->blocks ) - (struct7.stage1 .best .elapsed_ns () / struct7.stats ->blocks );
145
- }
146
- }
147
- double diff_miss (const benchmarker& feature, const benchmarker& struct7) {
148
- // There are roughly 2650 branch mispredicts, so we have to scale it so it represents a per block amount
149
- return diff (feature, struct7) * 10000.0 / 2650.0 ;
150
- }
151
-
152
137
struct feature_benchmarker {
153
138
benchmarker utf8;
154
139
benchmarker utf8_miss;
@@ -192,87 +177,182 @@ struct feature_benchmarker {
192
177
struct23_miss.run_iterations (iterations, stage1_only);
193
178
}
194
179
195
- void print () {
196
- printf (" base (ns/block)" );
197
- printf (" ,struct 1-7" );
198
- printf (" ,struct 1-7 miss" );
199
- printf (" ,utf-8" );
200
- printf (" ,utf-8 miss" );
201
- printf (" ,struct 8-15" );
202
- printf (" ,struct 8-15 miss" );
203
- printf (" ,struct 16+" );
204
- printf (" ,struct 16+ miss" );
205
- printf (" \n " );
206
-
207
- printf (" %g" , actual (empty));
208
- printf (" ,%+g" , diff (struct7, empty));
209
- printf (" ,%+g" , diff (struct7_miss, struct7));
210
- printf (" ,%+g" , diff (utf8, struct7));
211
- printf (" ,%+g" , diff (utf8_miss, utf8));
212
- printf (" ,%+g" , diff (struct15, struct7));
213
- printf (" ,%+g" , diff (struct15_miss, struct15));
214
- printf (" ,%+g" , diff (struct23, struct15));
215
- printf (" ,%+g" , diff (struct23_miss, struct23));
216
- printf (" \n " );
180
+ double cost_per_block (const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
181
+ return (feature.stage1 .best .elapsed_ns () - base.stage1 .best .elapsed_ns ()) / feature_blocks;
217
182
}
218
183
219
- double cost_per_block (benchmarker& feature, size_t feature_blocks, benchmarker& base) {
220
- return (feature.stage1 .best .elapsed_ns () - base.stage1 .best .elapsed_ns ()) / feature_blocks;
184
+ // Whether we're recording cache miss and branch miss events
185
+ bool has_events () const {
186
+ return empty.collector .has_events ();
221
187
}
222
188
223
189
// Base cost of any block (including empty ones)
224
- double base_cost () {
190
+ double base_cost () const {
225
191
return (empty.stage1 .best .elapsed_ns () / empty.stats ->blocks );
226
192
}
193
+
227
194
// Extra cost of a 1-7 structural block over an empty block
228
- double struct1_7_cost () {
195
+ double struct1_7_cost () const {
229
196
return cost_per_block (struct7, struct7.stats ->blocks_with_1_structural , empty);
230
197
}
231
198
// Extra cost of an 1-7-structural miss
232
- double struct1_7_miss_cost () {
199
+ double struct1_7_miss_cost () const {
233
200
return cost_per_block (struct7_miss, struct7_miss.stats ->blocks_with_1_structural , struct7);
234
201
}
202
+ // Rate of 1-7-structural misses per 8-structural flip
203
+ double struct1_7_miss_rate () const {
204
+ if (!has_events ()) { return 1 ; }
205
+ return double (struct7_miss.stage1 .best .branch_misses () - struct7.stage1 .best .branch_misses ()) / struct7_miss.stats ->blocks_with_1_structural_flipped ;
206
+ }
207
+
235
208
// Extra cost of an 8-15 structural block over a 1-7 structural block
236
- double struct8_15_cost () {
209
+ double struct8_15_cost () const {
237
210
return cost_per_block (struct15, struct15.stats ->blocks_with_8_structurals , struct7);
238
211
}
239
212
// Extra cost of an 8-15-structural miss over a 1-7 miss
240
- double struct8_15_miss_cost () {
213
+ double struct8_15_miss_cost () const {
241
214
return cost_per_block (struct15_miss, struct15_miss.stats ->blocks_with_8_structurals_flipped , struct15);
242
215
}
216
+ // Rate of 8-15-structural misses per 8-structural flip
217
+ double struct8_15_miss_rate () const {
218
+ if (!has_events ()) { return 1 ; }
219
+ return double (struct15_miss.stage1 .best .branch_misses () - struct15.stage1 .best .branch_misses ()) / struct15_miss.stats ->blocks_with_8_structurals_flipped ;
220
+ }
221
+
243
222
// Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
244
- double struct16_cost () {
223
+ double struct16_cost () const {
245
224
return cost_per_block (struct23, struct23.stats ->blocks_with_16_structurals , struct15);
246
225
}
247
226
// Extra cost of a 16-structural miss over an 8-15 miss
248
- double struct16_miss_cost () {
227
+ double struct16_miss_cost () const {
249
228
return cost_per_block (struct23_miss, struct23_miss.stats ->blocks_with_16_structurals_flipped , struct23);
250
229
}
230
+ // Rate of 16-structural misses per 16-structural flip
231
+ double struct16_miss_rate () const {
232
+ if (!has_events ()) { return 1 ; }
233
+ return double (struct23_miss.stage1 .best .branch_misses () - struct23.stage1 .best .branch_misses ()) / struct23_miss.stats ->blocks_with_16_structurals_flipped ;
234
+ }
235
+
251
236
// Extra cost of having UTF-8 in a block
252
- double utf8_cost () {
237
+ double utf8_cost () const {
253
238
return cost_per_block (utf8, utf8.stats ->blocks_with_utf8 , struct7_full);
254
239
}
255
240
// Extra cost of a UTF-8 miss
256
- double utf8_miss_cost () {
241
+ double utf8_miss_cost () const {
257
242
return cost_per_block (utf8_miss, utf8_miss.stats ->blocks_with_utf8_flipped , utf8);
258
243
}
244
+ // Rate of UTF-8 misses per UTF-8 flip
245
+ double utf8_miss_rate () const {
246
+ if (!has_events ()) { return 1 ; }
247
+ return double (utf8_miss.stage1 .best .branch_misses () - utf8.stage1 .best .branch_misses ()) / utf8_miss.stats ->blocks_with_utf8_flipped ;
248
+ }
249
+
250
+ double calc_expected_feature_cost (const benchmarker& file) const {
251
+ // Expected base ns/block (empty)
252
+ json_stats& stats = *file.stats ;
253
+ double expected = base_cost () * stats.blocks ;
254
+ expected += struct1_7_cost () * stats.blocks_with_1_structural ;
255
+ expected += utf8_cost () * stats.blocks_with_utf8 ;
256
+ expected += struct8_15_cost () * stats.blocks_with_8_structurals ;
257
+ expected += struct16_cost () * stats.blocks_with_16_structurals ;
258
+ return expected / stats.blocks ;
259
+ }
259
260
260
- double calc_expected ( benchmarker& file) {
261
+ double calc_expected_miss_cost ( const benchmarker& file) const {
261
262
// Expected base ns/block (empty)
262
263
json_stats& stats = *file.stats ;
263
- double expected = base_cost () * stats.blocks ;
264
- expected += struct1_7_cost () * stats.blocks_with_1_structural ;
265
- expected += struct1_7_miss_cost () * stats.blocks_with_1_structural_flipped ;
266
- expected += utf8_cost () * stats.blocks_with_utf8 ;
267
- expected += utf8_miss_cost () * stats.blocks_with_utf8_flipped ;
268
- expected += struct8_15_cost () * stats.blocks_with_8_structurals ;
269
- expected += struct8_15_miss_cost () * stats.blocks_with_8_structurals_flipped ;
270
- expected += struct16_cost () * stats.blocks_with_16_structurals ;
271
- expected += struct16_miss_cost () * stats.blocks_with_16_structurals_flipped ;
264
+ double expected = struct1_7_miss_cost () * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate ();
265
+ expected += utf8_miss_cost () * stats.blocks_with_utf8_flipped * utf8_miss_rate ();
266
+ expected += struct8_15_miss_cost () * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate ();
267
+ expected += struct16_miss_cost () * stats.blocks_with_16_structurals_flipped * struct16_miss_rate ();
272
268
return expected / stats.blocks ;
273
269
}
270
+
271
+ double calc_expected_misses (const benchmarker& file) const {
272
+ json_stats& stats = *file.stats ;
273
+ double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate ();
274
+ expected += stats.blocks_with_utf8_flipped * utf8_miss_rate ();
275
+ expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate ();
276
+ expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate ();
277
+ return expected;
278
+ }
279
+
280
+ double calc_expected (const benchmarker& file) const {
281
+ return calc_expected_feature_cost (file) + calc_expected_miss_cost (file);
282
+ }
283
+
284
+ void print () {
285
+ printf (" \n " );
286
+ printf (" Features in ns/block (64 bytes):\n " );
287
+ printf (" \n " );
288
+ printf (" | %-8s " , " Stage" );
289
+ printf (" | %8s " , " Base" );
290
+ printf (" | %8s " , " 7 Struct" );
291
+ printf (" | %8s " , " UTF-8" );
292
+ printf (" | %8s " , " 15 Str." );
293
+ printf (" | %8s " , " 16+ Str." );
294
+ printf (" | %15s " , " 7 Struct Miss" );
295
+ printf (" | %15s " , " UTF-8 Miss" );
296
+ printf (" | %15s " , " 15 Str. Miss" );
297
+ printf (" | %15s " , " 16+ Str. Miss" );
298
+ printf (" |\n " );
299
+
300
+ printf (" |%.10s" , " ---------------------------------------" );
301
+ printf (" |%.10s" , " ---------------------------------------" );
302
+ printf (" |%.10s" , " ---------------------------------------" );
303
+ printf (" |%.10s" , " ---------------------------------------" );
304
+ printf (" |%.10s" , " ---------------------------------------" );
305
+ printf (" |%.10s" , " ---------------------------------------" );
306
+ printf (" |%.17s" , " ---------------------------------------" );
307
+ printf (" |%.17s" , " ---------------------------------------" );
308
+ printf (" |%.17s" , " ---------------------------------------" );
309
+ printf (" |%.17s" , " ---------------------------------------" );
310
+ printf (" |\n " );
311
+
312
+ printf (" | %-8s " , " Stage 1" );
313
+ printf (" | %8.3g " , base_cost ());
314
+ printf (" | %8.3g " , struct1_7_cost ());
315
+ printf (" | %8.3g " , utf8_cost ());
316
+ printf (" | %8.3g " , struct8_15_cost ());
317
+ printf (" | %8.3g " , struct16_cost ());
318
+ if (has_events ()) {
319
+ printf (" | %8.3g (%3d%%) " , struct1_7_miss_cost (), int (struct1_7_miss_rate ()*100 ));
320
+ printf (" | %8.3g (%3d%%) " , utf8_miss_cost (), int (utf8_miss_rate ()*100 ));
321
+ printf (" | %8.3g (%3d%%) " , struct8_15_miss_cost (), int (struct8_15_miss_rate ()*100 ));
322
+ printf (" | %8.3g (%3d%%) " , struct16_miss_cost (), int (struct16_miss_rate ()*100 ));
323
+ } else {
324
+ printf (" | %8.3g " , struct1_7_miss_cost ());
325
+ printf (" | %8.3g " , utf8_miss_cost ());
326
+ printf (" | %8.3g " , struct8_15_miss_cost ());
327
+ printf (" | %8.3g " , struct16_miss_cost ());
328
+ }
329
+ printf (" |\n " );
330
+ }
274
331
};
275
332
333
+ void print_file_effectiveness (const char * filename, const benchmarker& results, const feature_benchmarker& features) {
334
+ double actual = results.stage1 .best .elapsed_ns () / results.stats ->blocks ;
335
+ double calc = features.calc_expected (results);
336
+ uint64_t actual_misses = results.stage1 .best .branch_misses ();
337
+ uint64_t calc_misses = uint64_t (features.calc_expected_misses (results));
338
+ double calc_miss_cost = features.calc_expected_miss_cost (results);
339
+ printf (" | %-15s " , filename);
340
+ printf (" | %8.3g " , features.calc_expected_feature_cost (results));
341
+ printf (" | %8.3g " , calc_miss_cost);
342
+ printf (" | %8.3g " , calc);
343
+ printf (" | %8.3g " , actual);
344
+ printf (" | %+8.3g " , actual - calc);
345
+ printf (" | %13lu " , calc_misses);
346
+ if (features.has_events ()) {
347
+ printf (" | %13lu " , actual_misses);
348
+ printf (" | %+13ld " , int64_t (actual_misses - calc_misses));
349
+ double miss_adjustment = calc_miss_cost * (double (int64_t (actual_misses - calc_misses)) / calc_misses);
350
+ printf (" | %8.3g " , calc_miss_cost + miss_adjustment);
351
+ printf (" | %+8.3g " , actual - (calc + miss_adjustment));
352
+ }
353
+ printf (" |\n " );
354
+ }
355
+
276
356
int main (int argc, char *argv[]) {
277
357
// Read options
278
358
exe_name = argv[0 ];
@@ -317,10 +397,42 @@ int main(int argc, char *argv[]) {
317
397
318
398
features.print ();
319
399
320
- // Gauge effectiveness
321
- printf (" gsoc-2018.json expected/actual: %g/%g\n " , features.calc_expected (gsoc_2018), actual (gsoc_2018));
322
- printf (" twitter.json expected/actual: %g/%g\n " , features.calc_expected (twitter), actual (twitter));
323
- printf (" random.json expected/actual: %g/%g\n " , features.calc_expected (random ), actual (random ));
400
+ // Gauge effectiveness
401
+ printf (" \n " );
402
+ printf (" Estimated vs. Actual ns/block for real files:\n " );
403
+ printf (" \n " );
404
+ printf (" | %-15s " , " File" );
405
+ printf (" | %11s " , " Est. (Base)" );
406
+ printf (" | %11s " , " Est. (Miss)" );
407
+ printf (" | %8s " , " Est." );
408
+ printf (" | %8s " , " Actual" );
409
+ printf (" | %8s " , " Diff" );
410
+ printf (" | %13s " , " Est. Misses" );
411
+ if (features.has_events ()) {
412
+ printf (" | %13s " , " Actual Misses" );
413
+ printf (" | %13s " , " Diff (Misses)" );
414
+ printf (" | %13s " , " Adjusted Miss" );
415
+ printf (" | %13s " , " Adjusted Diff" );
416
+ }
417
+ printf (" |\n " );
418
+ printf (" |%.17s" , " ---------------------------------------" );
419
+ printf (" |%.13s" , " ---------------------------------------" );
420
+ printf (" |%.13s" , " ---------------------------------------" );
421
+ printf (" |%.10s" , " ---------------------------------------" );
422
+ printf (" |%.10s" , " ---------------------------------------" );
423
+ printf (" |%.10s" , " ---------------------------------------" );
424
+ printf (" |%.15s" , " ---------------------------------------" );
425
+ if (features.has_events ()) {
426
+ printf (" |%.15s" , " ---------------------------------------" );
427
+ printf (" |%.15s" , " ---------------------------------------" );
428
+ printf (" |%.15s" , " ---------------------------------------" );
429
+ printf (" |%.15s" , " ---------------------------------------" );
430
+ }
431
+ printf (" |\n " );
432
+
433
+ print_file_effectiveness (" gsoc-2018.json" , gsoc_2018, features);
434
+ print_file_effectiveness (" twitter.json" , twitter, features);
435
+ print_file_effectiveness (" random.json" , random , features);
324
436
325
437
return EXIT_SUCCESS;
326
438
}
0 commit comments