@@ -60,7 +60,7 @@ simdjson_really_inline void read_tweets(ondemand::parser &parser, padded_string
60
60
}
61
61
}
62
62
63
- static void bench_tweets (State &state) {
63
+ static void ondemand_tweets (State &state) {
64
64
// Load twitter.json to a buffer
65
65
padded_string json;
66
66
if (auto error = padded_string::load (TWITTER_JSON).get (json)) { cerr << error << endl; return ; }
@@ -87,13 +87,98 @@ static void bench_tweets(State &state) {
87
87
state.counters [" tweets" ] = Counter (double (tweet_count), benchmark::Counter::kIsRate );
88
88
}
89
89
90
- BENCHMARK (bench_tweets )->Repetitions (REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
90
+ BENCHMARK (ondemand_tweets )->Repetitions (REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
91
91
return *(std::max_element (std::begin (v), std::end (v)));
92
92
})->DisplayAggregatesOnly (true );
93
93
94
94
} // namespace ondemand_bench
95
95
96
96
97
+ SIMDJSON_UNTARGET_REGION
98
+
99
+ SIMDJSON_TARGET_HASWELL
100
+
101
+ namespace iter_bench {
102
+
103
+ using namespace simdjson ;
104
+ using namespace haswell ;
105
+
106
+ simdjson_really_inline void read_tweets (ondemand::parser &parser, padded_string &json, std::vector<twitter::tweet> &tweets) {
107
+ // Walk the document, parsing the tweets as we go
108
+
109
+ // { "statuses":
110
+ auto doc = parser.parse (json);
111
+ ondemand::json_iterator &iter = doc.iterate();
112
+ iter.start_object ().value ();
113
+ if (!iter.find_first_field_raw (" statuses" )) { throw " No statuses field" ; }
114
+ // { "statuses": [
115
+ auto tweets_array = iter.start_array ().value ();
116
+ if (iter.is_empty_array ()) { return ; }
117
+
118
+ do {
119
+ auto tweet_object = iter.start_object ().value ();
120
+ twitter::tweet tweet;
121
+ if (!iter.find_first_field_raw (" created_at" )) { throw " Could not find created_at" ; }
122
+ tweet.created_at = iter.get_raw_json_string ().value ().unescape (parser);
123
+ if (!iter.find_next_field_raw (" id" , tweet_object)) { throw " Could not find id" ; }
124
+ tweet.id = iter.get_uint64 ();
125
+ if (!iter.find_next_field_raw (" text" , tweet_object)) { throw " Could not find text" ; }
126
+ tweet.text = iter.get_raw_json_string ().value ().unescape (parser);
127
+ if (!iter.find_next_field_raw (" in_reply_to_status_id" , tweet_object)) { throw " Could not find in_reply_to_status_id" ; }
128
+ if (!iter.is_null ()) {
129
+ tweet.in_reply_to_status_id = iter.get_uint64 ();
130
+ }
131
+ if (!iter.find_next_field_raw (" user" , tweet_object)) { throw " Could not find user" ; }
132
+ {
133
+ auto user_object = iter.start_object ().value ();
134
+ if (!iter.find_first_field_raw (" id" )) { throw " Could not find user.id" ; }
135
+ tweet.user .id = iter.get_uint64 ();
136
+ if (!iter.find_next_field_raw (" screen_name" , user_object)) { throw " Could not find user.screen_name" ; }
137
+ tweet.user .screen_name = iter.get_raw_json_string ().value ().unescape (parser);
138
+ }
139
+ if (!iter.find_next_field_raw (" retweet_count" , tweet_object)) { throw " Could not find retweet_count" ; }
140
+ tweet.retweet_count = iter.get_uint64 ();
141
+ if (!iter.find_next_field_raw (" favorite_count" , tweet_object)) { throw " Could not find favorite_count" ; }
142
+ tweet.favorite_count = iter.get_uint64 ();
143
+
144
+ tweets.push_back (tweet);
145
+ } while (iter.next_element (tweets_array));
146
+ }
147
+
148
+ static void iter_tweets (State &state) {
149
+ // Load twitter.json to a buffer
150
+ padded_string json;
151
+ if (auto error = padded_string::load (TWITTER_JSON).get (json)) { cerr << error << endl; return ; }
152
+
153
+ // Allocate and warm the vector
154
+ std::vector<twitter::tweet> tweets;
155
+ ondemand::parser parser;
156
+ read_tweets (parser, json, tweets);
157
+
158
+ // Read tweets
159
+ size_t byte_count = 0 ;
160
+ size_t tweet_count = 0 ;
161
+ for (SIMDJSON_UNUSED auto _ : state) {
162
+ tweets.clear ();
163
+ read_tweets (parser, json, tweets);
164
+ byte_count += json.size ();
165
+ tweet_count += tweets.size ();
166
+ }
167
+ // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
168
+ state.counters [" Gigabytes" ] = benchmark::Counter (
169
+ double (byte_count), benchmark::Counter::kIsRate ,
170
+ benchmark::Counter::OneK::kIs1000 ); // For GiB : kIs1024
171
+ state.counters [" docs" ] = Counter (double (state.iterations ()), benchmark::Counter::kIsRate );
172
+ state.counters [" tweets" ] = Counter (double (tweet_count), benchmark::Counter::kIsRate );
173
+ }
174
+
175
+ BENCHMARK (iter_tweets)->Repetitions (REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
176
+ return *(std::max_element (std::begin (v), std::end (v)));
177
+ })->DisplayAggregatesOnly (true );
178
+
179
+ } // namespace iter_bench
180
+
181
+
97
182
SIMDJSON_UNTARGET_REGION
98
183
99
184
#include " twitter/sax_tweet_reader.h"
@@ -251,7 +336,7 @@ struct my_point {
251
336
/* **
252
337
* We start with the naive DOM-based approach.
253
338
**/
254
- static void dom_parse_largerandom (State &state) {
339
+ static void dom_largerandom (State &state) {
255
340
// Load twitter.json to a buffer
256
341
const padded_string& json = get_my_json_str ();
257
342
@@ -283,7 +368,7 @@ static void dom_parse_largerandom(State &state) {
283
368
state.counters [" docs" ] = Counter (double (state.iterations ()), benchmark::Counter::kIsRate );
284
369
}
285
370
286
- BENCHMARK (dom_parse_largerandom )->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
371
+ BENCHMARK (dom_largerandom )->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
287
372
return *(std::max_element (std::begin (v), std::end (v)));
288
373
})->DisplayAggregatesOnly (true );
289
374
@@ -294,7 +379,7 @@ SIMDJSON_TARGET_HASWELL
294
379
/* **
295
380
* On Demand approach.
296
381
**/
297
- static void ondemand_parse_largerandom (State &state) {
382
+ static void ondemand_largerandom (State &state) {
298
383
using namespace haswell ;
299
384
// Load twitter.json to a buffer
300
385
const padded_string& json = get_my_json_str ();
@@ -324,7 +409,61 @@ static void ondemand_parse_largerandom(State &state) {
324
409
325
410
SIMDJSON_UNTARGET_REGION
326
411
327
- BENCHMARK (ondemand_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
412
+ BENCHMARK (ondemand_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
413
+ return *(std::max_element (std::begin (v), std::end (v)));
414
+ })->DisplayAggregatesOnly (true );
415
+
416
+ SIMDJSON_TARGET_HASWELL
417
+
418
+ static simdjson_really_inline double first_double (haswell::ondemand::json_iterator &iter) {
419
+ if (iter.start_object ().error () || iter.field_key ().error () || iter.field_value ()) { throw " Invalid field" ; }
420
+ return iter.get_double ();
421
+ }
422
+
423
+ static simdjson_really_inline double next_double (haswell::ondemand::json_iterator &iter) {
424
+ if (!iter.has_next_field () || iter.field_key ().error () || iter.field_value ()) { throw " Invalid field" ; }
425
+ return iter.get_double ();
426
+ }
427
+
428
+ /* **
429
+ * On Demand Iterator approach.
430
+ **/
431
+ static void iter_largerandom (State &state) {
432
+ using namespace haswell ;
433
+ // Load twitter.json to a buffer
434
+ const padded_string& json = get_my_json_str ();
435
+
436
+ // Allocate
437
+ ondemand::parser parser;
438
+ error_code error;
439
+ if ((error = parser.allocate (json.size ()))) { throw error; };
440
+
441
+ // Read
442
+ size_t bytes = 0 ;
443
+ for (SIMDJSON_UNUSED auto _ : state) {
444
+ std::vector<my_point> container;
445
+ auto doc = parser.parse (json);
446
+ ondemand::json_iterator &iter = doc.iterate();
447
+ iter.start_array ().value ();
448
+ if (!iter.is_empty_array ()) {
449
+ do {
450
+ container.emplace_back (my_point{first_double (iter), next_double (iter), next_double (iter)});
451
+ if (iter.has_next_field ()) { throw " Too many fields" ; }
452
+ } while (iter.has_next_element ());
453
+ }
454
+ bytes += json.size ();
455
+ benchmark::DoNotOptimize (container.data ());
456
+ }
457
+ // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
458
+ state.counters [" Gigabytes" ] = benchmark::Counter (
459
+ double (bytes), benchmark::Counter::kIsRate ,
460
+ benchmark::Counter::OneK::kIs1000 ); // For GiB : kIs1024
461
+ state.counters [" docs" ] = Counter (double (state.iterations ()), benchmark::Counter::kIsRate );
462
+ }
463
+
464
+ SIMDJSON_UNTARGET_REGION
465
+
466
+ BENCHMARK (iter_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
328
467
return *(std::max_element (std::begin (v), std::end (v)));
329
468
})->DisplayAggregatesOnly (true );
330
469
@@ -430,7 +569,7 @@ SIMDJSON_UNTARGET_REGION
430
569
431
570
432
571
// ./benchmark/bench_sax --benchmark_filter=largerandom
433
- static void sax_parse_largerandom (State &state) {
572
+ static void sax_largerandom (State &state) {
434
573
// Load twitter.json to a buffer
435
574
const padded_string& json = get_my_json_str ();
436
575
@@ -455,7 +594,7 @@ static void sax_parse_largerandom(State &state) {
455
594
benchmark::Counter::OneK::kIs1000 ); // For GiB : kIs1024
456
595
state.counters [" docs" ] = Counter (double (state.iterations ()), benchmark::Counter::kIsRate );
457
596
}
458
- BENCHMARK (sax_parse_largerandom )->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
597
+ BENCHMARK (sax_largerandom )->Repetitions(REPETITIONS)->ComputeStatistics(" max" , [](const std::vector<double >& v) -> double {
459
598
return *(std::max_element (std::begin (v), std::end (v)));
460
599
})->DisplayAggregatesOnly (true );
461
600
0 commit comments