Skip to content

Commit 6091631

Browse files
committed
Show miss rate, make it more accurate
1 parent d7c8339 commit 6091631

File tree

2 files changed

+177
-64
lines changed

2 files changed

+177
-64
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/jsonstream.cpp src/simdjson.c
7777
MINIFIERHEADERS=include/simdjson/jsonminifier.h
7878
MINIFIERLIBFILES=src/jsonminifier.cpp
7979

80+
FEATURE_JSON_FILES=jsonexamples/generated/0-structurals-full.json jsonexamples/generated/15-structurals-miss.json jsonexamples/generated/7-structurals.json jsonexamples/generated/0-structurals.json jsonexamples/generated/23-structurals-full.json jsonexamples/generated/7-structurals-miss.json jsonexamples/generated/0-structurals-miss.json jsonexamples/generated/23-structurals.json jsonexamples/generated/utf-8-full.json jsonexamples/generated/15-structurals-full.json jsonexamples/generated/23-structurals-miss.json jsonexamples/generated/utf-8.json jsonexamples/generated/15-structurals.json jsonexamples/generated/7-structurals-full.json jsonexamples/generated/utf-8-miss.json
8081

8182
RAPIDJSON_INCLUDE:=dependencies/rapidjson/include
8283
SAJSON_INCLUDE:=dependencies/sajson/include
@@ -131,10 +132,10 @@ run_issue150_sh: allparserscheckfile
131132
run_testjson2json_sh: minify json2json
132133
./scripts/testjson2json.sh
133134

134-
generate_featurejson:
135+
$(FEATURE_JSON_FILES): benchmark/genfeaturejson.rb
135136
ruby ./benchmark/genfeaturejson.rb
136137

137-
run_benchfeatures: benchfeatures generate_featurejson
138+
run_benchfeatures: benchfeatures $(FEATURE_JSON_FILES)
138139
./benchfeatures -n 1000
139140

140141
test: run_basictests run_jsoncheck run_numberparsingcheck run_integer_tests run_stringparsingcheck run_jsonstream_test run_pointercheck run_testjson2json_sh run_issue150_sh run_jsoncheck_noavx

benchmark/benchfeatures.cpp

Lines changed: 174 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -134,21 +134,6 @@ struct option_struct {
134134
}
135135
};
136136

137-
double actual(const benchmarker& feature) {
138-
return feature.stage1.best.elapsed_ns() / feature.stats->blocks;
139-
}
140-
double diff(const benchmarker& feature, const benchmarker& struct7) {
141-
if (feature.stats->blocks == struct7.stats->blocks) {
142-
return (feature.stage1.best.elapsed_ns() - struct7.stage1.best.elapsed_ns()) / struct7.stats->blocks;
143-
} else {
144-
return (feature.stage1.best.elapsed_ns() / feature.stats->blocks) - (struct7.stage1.best.elapsed_ns() / struct7.stats->blocks);
145-
}
146-
}
147-
double diff_miss(const benchmarker& feature, const benchmarker& struct7) {
148-
// There are roughly 2650 branch mispredicts, so we have to scale it so it represents a per block amount
149-
return diff(feature, struct7) * 10000.0 / 2650.0;
150-
}
151-
152137
struct feature_benchmarker {
153138
benchmarker utf8;
154139
benchmarker utf8_miss;
@@ -192,87 +177,182 @@ struct feature_benchmarker {
192177
struct23_miss.run_iterations(iterations, stage1_only);
193178
}
194179

195-
void print() {
196-
printf("base (ns/block)");
197-
printf(",struct 1-7");
198-
printf(",struct 1-7 miss");
199-
printf(",utf-8");
200-
printf(",utf-8 miss");
201-
printf(",struct 8-15");
202-
printf(",struct 8-15 miss");
203-
printf(",struct 16+");
204-
printf(",struct 16+ miss");
205-
printf("\n");
206-
207-
printf("%g", actual(empty));
208-
printf(",%+g", diff(struct7, empty));
209-
printf(",%+g", diff(struct7_miss, struct7));
210-
printf(",%+g", diff(utf8, struct7));
211-
printf(",%+g", diff(utf8_miss, utf8));
212-
printf(",%+g", diff(struct15, struct7));
213-
printf(",%+g", diff(struct15_miss, struct15));
214-
printf(",%+g", diff(struct23, struct15));
215-
printf(",%+g", diff(struct23_miss, struct23));
216-
printf("\n");
180+
double cost_per_block(const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
181+
return (feature.stage1.best.elapsed_ns() - base.stage1.best.elapsed_ns()) / feature_blocks;
217182
}
218183

219-
double cost_per_block(benchmarker& feature, size_t feature_blocks, benchmarker& base) {
220-
return (feature.stage1.best.elapsed_ns() - base.stage1.best.elapsed_ns()) / feature_blocks;
184+
// Whether we're recording cache miss and branch miss events
185+
bool has_events() const {
186+
return empty.collector.has_events();
221187
}
222188

223189
// Base cost of any block (including empty ones)
224-
double base_cost() {
190+
double base_cost() const {
225191
return (empty.stage1.best.elapsed_ns() / empty.stats->blocks);
226192
}
193+
227194
// Extra cost of a 1-7 structural block over an empty block
228-
double struct1_7_cost() {
195+
double struct1_7_cost() const {
229196
return cost_per_block(struct7, struct7.stats->blocks_with_1_structural, empty);
230197
}
231198
// Extra cost of an 1-7-structural miss
232-
double struct1_7_miss_cost() {
199+
double struct1_7_miss_cost() const {
233200
return cost_per_block(struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
234201
}
202+
// Rate of 1-7-structural misses per 8-structural flip
203+
double struct1_7_miss_rate() const {
204+
if (!has_events()) { return 1; }
205+
return double(struct7_miss.stage1.best.branch_misses() - struct7.stage1.best.branch_misses()) / struct7_miss.stats->blocks_with_1_structural_flipped;
206+
}
207+
235208
// Extra cost of an 8-15 structural block over a 1-7 structural block
236-
double struct8_15_cost() {
209+
double struct8_15_cost() const {
237210
return cost_per_block(struct15, struct15.stats->blocks_with_8_structurals, struct7);
238211
}
239212
// Extra cost of an 8-15-structural miss over a 1-7 miss
240-
double struct8_15_miss_cost() {
213+
double struct8_15_miss_cost() const {
241214
return cost_per_block(struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
242215
}
216+
// Rate of 8-15-structural misses per 8-structural flip
217+
double struct8_15_miss_rate() const {
218+
if (!has_events()) { return 1; }
219+
return double(struct15_miss.stage1.best.branch_misses() - struct15.stage1.best.branch_misses()) / struct15_miss.stats->blocks_with_8_structurals_flipped;
220+
}
221+
243222
// Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
244-
double struct16_cost() {
223+
double struct16_cost() const {
245224
return cost_per_block(struct23, struct23.stats->blocks_with_16_structurals, struct15);
246225
}
247226
// Extra cost of a 16-structural miss over an 8-15 miss
248-
double struct16_miss_cost() {
227+
double struct16_miss_cost() const {
249228
return cost_per_block(struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
250229
}
230+
// Rate of 16-structural misses per 16-structural flip
231+
double struct16_miss_rate() const {
232+
if (!has_events()) { return 1; }
233+
return double(struct23_miss.stage1.best.branch_misses() - struct23.stage1.best.branch_misses()) / struct23_miss.stats->blocks_with_16_structurals_flipped;
234+
}
235+
251236
// Extra cost of having UTF-8 in a block
252-
double utf8_cost() {
237+
double utf8_cost() const {
253238
return cost_per_block(utf8, utf8.stats->blocks_with_utf8, struct7_full);
254239
}
255240
// Extra cost of a UTF-8 miss
256-
double utf8_miss_cost() {
241+
double utf8_miss_cost() const {
257242
return cost_per_block(utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
258243
}
244+
// Rate of UTF-8 misses per UTF-8 flip
245+
double utf8_miss_rate() const {
246+
if (!has_events()) { return 1; }
247+
return double(utf8_miss.stage1.best.branch_misses() - utf8.stage1.best.branch_misses()) / utf8_miss.stats->blocks_with_utf8_flipped;
248+
}
249+
250+
double calc_expected_feature_cost(const benchmarker& file) const {
251+
// Expected base ns/block (empty)
252+
json_stats& stats = *file.stats;
253+
double expected = base_cost() * stats.blocks;
254+
expected += struct1_7_cost() * stats.blocks_with_1_structural;
255+
expected += utf8_cost() * stats.blocks_with_utf8;
256+
expected += struct8_15_cost() * stats.blocks_with_8_structurals;
257+
expected += struct16_cost() * stats.blocks_with_16_structurals;
258+
return expected / stats.blocks;
259+
}
259260

260-
double calc_expected(benchmarker& file) {
261+
double calc_expected_miss_cost(const benchmarker& file) const {
261262
// Expected base ns/block (empty)
262263
json_stats& stats = *file.stats;
263-
double expected = base_cost() * stats.blocks;
264-
expected += struct1_7_cost() * stats.blocks_with_1_structural;
265-
expected += struct1_7_miss_cost() * stats.blocks_with_1_structural_flipped;
266-
expected += utf8_cost() * stats.blocks_with_utf8;
267-
expected += utf8_miss_cost() * stats.blocks_with_utf8_flipped;
268-
expected += struct8_15_cost() * stats.blocks_with_8_structurals;
269-
expected += struct8_15_miss_cost() * stats.blocks_with_8_structurals_flipped;
270-
expected += struct16_cost() * stats.blocks_with_16_structurals;
271-
expected += struct16_miss_cost() * stats.blocks_with_16_structurals_flipped;
264+
double expected = struct1_7_miss_cost() * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate();
265+
expected += utf8_miss_cost() * stats.blocks_with_utf8_flipped * utf8_miss_rate();
266+
expected += struct8_15_miss_cost() * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate();
267+
expected += struct16_miss_cost() * stats.blocks_with_16_structurals_flipped * struct16_miss_rate();
272268
return expected / stats.blocks;
273269
}
270+
271+
double calc_expected_misses(const benchmarker& file) const {
272+
json_stats& stats = *file.stats;
273+
double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate();
274+
expected += stats.blocks_with_utf8_flipped * utf8_miss_rate();
275+
expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate();
276+
expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate();
277+
return expected;
278+
}
279+
280+
double calc_expected(const benchmarker& file) const {
281+
return calc_expected_feature_cost(file) + calc_expected_miss_cost(file);
282+
}
283+
284+
void print() {
285+
printf("\n");
286+
printf("Features in ns/block (64 bytes):\n");
287+
printf("\n");
288+
printf("| %-8s ", "Stage");
289+
printf("| %8s ", "Base");
290+
printf("| %8s ", "7 Struct");
291+
printf("| %8s ", "UTF-8");
292+
printf("| %8s ", "15 Str.");
293+
printf("| %8s ", "16+ Str.");
294+
printf("| %15s ", "7 Struct Miss");
295+
printf("| %15s ", "UTF-8 Miss");
296+
printf("| %15s ", "15 Str. Miss");
297+
printf("| %15s ", "16+ Str. Miss");
298+
printf("|\n");
299+
300+
printf("|%.10s", "---------------------------------------");
301+
printf("|%.10s", "---------------------------------------");
302+
printf("|%.10s", "---------------------------------------");
303+
printf("|%.10s", "---------------------------------------");
304+
printf("|%.10s", "---------------------------------------");
305+
printf("|%.10s", "---------------------------------------");
306+
printf("|%.17s", "---------------------------------------");
307+
printf("|%.17s", "---------------------------------------");
308+
printf("|%.17s", "---------------------------------------");
309+
printf("|%.17s", "---------------------------------------");
310+
printf("|\n");
311+
312+
printf("| %-8s ", "Stage 1");
313+
printf("| %8.3g ", base_cost());
314+
printf("| %8.3g ", struct1_7_cost());
315+
printf("| %8.3g ", utf8_cost());
316+
printf("| %8.3g ", struct8_15_cost());
317+
printf("| %8.3g ", struct16_cost());
318+
if (has_events()) {
319+
printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(), int(struct1_7_miss_rate()*100));
320+
printf("| %8.3g (%3d%%) ", utf8_miss_cost(), int(utf8_miss_rate()*100));
321+
printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(), int(struct8_15_miss_rate()*100));
322+
printf("| %8.3g (%3d%%) ", struct16_miss_cost(), int(struct16_miss_rate()*100));
323+
} else {
324+
printf("| %8.3g ", struct1_7_miss_cost());
325+
printf("| %8.3g ", utf8_miss_cost());
326+
printf("| %8.3g ", struct8_15_miss_cost());
327+
printf("| %8.3g ", struct16_miss_cost());
328+
}
329+
printf("|\n");
330+
}
274331
};
275332

333+
void print_file_effectiveness(const char* filename, const benchmarker& results, const feature_benchmarker& features) {
334+
double actual = results.stage1.best.elapsed_ns() / results.stats->blocks;
335+
double calc = features.calc_expected(results);
336+
uint64_t actual_misses = results.stage1.best.branch_misses();
337+
uint64_t calc_misses = uint64_t(features.calc_expected_misses(results));
338+
double calc_miss_cost = features.calc_expected_miss_cost(results);
339+
printf("| %-15s ", filename);
340+
printf("| %8.3g ", features.calc_expected_feature_cost(results));
341+
printf("| %8.3g ", calc_miss_cost);
342+
printf("| %8.3g ", calc);
343+
printf("| %8.3g ", actual);
344+
printf("| %+8.3g ", actual - calc);
345+
printf("| %13lu ", calc_misses);
346+
if (features.has_events()) {
347+
printf("| %13lu ", actual_misses);
348+
printf("| %+13ld ", int64_t(actual_misses - calc_misses));
349+
double miss_adjustment = calc_miss_cost * (double(int64_t(actual_misses - calc_misses)) / calc_misses);
350+
printf("| %8.3g ", calc_miss_cost + miss_adjustment);
351+
printf("| %+8.3g ", actual - (calc + miss_adjustment));
352+
}
353+
printf("|\n");
354+
}
355+
276356
int main(int argc, char *argv[]) {
277357
// Read options
278358
exe_name = argv[0];
@@ -317,10 +397,42 @@ int main(int argc, char *argv[]) {
317397

318398
features.print();
319399

320-
// Gauge effectiveness
321-
printf("gsoc-2018.json expected/actual: %g/%g\n", features.calc_expected(gsoc_2018), actual(gsoc_2018));
322-
printf("twitter.json expected/actual: %g/%g\n", features.calc_expected(twitter), actual(twitter));
323-
printf("random.json expected/actual: %g/%g\n", features.calc_expected(random), actual(random));
400+
// Gauge effectiveness
401+
printf("\n");
402+
printf("Estimated vs. Actual ns/block for real files:\n");
403+
printf("\n");
404+
printf("| %-15s ", "File");
405+
printf("| %11s ", "Est. (Base)");
406+
printf("| %11s ", "Est. (Miss)");
407+
printf("| %8s ", "Est.");
408+
printf("| %8s ", "Actual");
409+
printf("| %8s ", "Diff");
410+
printf("| %13s ", "Est. Misses");
411+
if (features.has_events()) {
412+
printf("| %13s ", "Actual Misses");
413+
printf("| %13s ", "Diff (Misses)");
414+
printf("| %13s ", "Adjusted Miss");
415+
printf("| %13s ", "Adjusted Diff");
416+
}
417+
printf("|\n");
418+
printf("|%.17s", "---------------------------------------");
419+
printf("|%.13s", "---------------------------------------");
420+
printf("|%.13s", "---------------------------------------");
421+
printf("|%.10s", "---------------------------------------");
422+
printf("|%.10s", "---------------------------------------");
423+
printf("|%.10s", "---------------------------------------");
424+
printf("|%.15s", "---------------------------------------");
425+
if (features.has_events()) {
426+
printf("|%.15s", "---------------------------------------");
427+
printf("|%.15s", "---------------------------------------");
428+
printf("|%.15s", "---------------------------------------");
429+
printf("|%.15s", "---------------------------------------");
430+
}
431+
printf("|\n");
432+
433+
print_file_effectiveness("gsoc-2018.json", gsoc_2018, features);
434+
print_file_effectiveness("twitter.json", twitter, features);
435+
print_file_effectiveness("random.json", random, features);
324436

325437
return EXIT_SUCCESS;
326438
}

0 commit comments

Comments
 (0)