Skip to content

Commit 18564f1

Browse files
committed
Don't benchmark unless haswell is available
1 parent 638f1de commit 18564f1

File tree

2 files changed

+357
-133
lines changed

2 files changed

+357
-133
lines changed

benchmark/bench_sax.cpp

Lines changed: 227 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,19 @@
22
#define SIMDJSON_IMPLEMENTATION_WESTMERE 0
33
#define SIMDJSON_IMPLEMENTATION_AMD64 0
44

5+
#include <iostream>
6+
#include <sstream>
7+
#include <random>
8+
59
#include "simdjson.h"
610

711
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
812
#include <benchmark/benchmark.h>
913
SIMDJSON_POP_DISABLE_WARNINGS
1014

1115
#include "simdjson.cpp"
12-
#include "twitter/sax_tweet_reader.h"
16+
17+
#if SIMDJSON_EXCEPTIONS
1318

1419
using namespace benchmark;
1520
using namespace simdjson;
@@ -19,6 +24,10 @@ using std::endl;
1924
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
2025
const int REPETITIONS = 10;
2126

27+
#if SIMDJSON_IMPLEMENTATION_HASWELL
28+
29+
#include "twitter/sax_tweet_reader.h"
30+
2231
static void sax_tweets(State &state) {
2332
// Load twitter.json to a buffer
2433
padded_string json;
@@ -50,7 +59,9 @@ BENCHMARK(sax_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](con
5059
return *(std::max_element(std::begin(v), std::end(v)));
5160
})->DisplayAggregatesOnly(true);
5261

53-
#if SIMDJSON_EXCEPTIONS
62+
#endif // SIMDJSON_IMPLEMENTATION_HASWELL
63+
64+
#include "twitter/tweet.h"
5465

5566
simdjson_really_inline uint64_t nullable_int(dom::element element) {
5667
if (element.is_null()) { return 0; }
@@ -106,8 +117,6 @@ BENCHMARK(dom_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](con
106117
return *(std::max_element(std::begin(v), std::end(v)));
107118
})->DisplayAggregatesOnly(true);
108119

109-
#endif // SIMDJSON_EXCEPTIONS
110-
111120
static void dom_parse(State &state) {
112121
// Load twitter.json to a buffer
113122
padded_string json;
@@ -133,4 +142,218 @@ BENCHMARK(dom_parse)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](cons
133142
return *(std::max_element(std::begin(v), std::end(v)));
134143
})->DisplayAggregatesOnly(true);
135144

145+
146+
/********************
147+
* Large file parsing benchmarks:
148+
********************/
149+
150+
static std::string build_json_array(size_t N) {
151+
std::default_random_engine e;
152+
std::uniform_real_distribution<> dis(0, 1);
153+
std::stringstream myss;
154+
myss << "[" << std::endl;
155+
if(N > 0) {
156+
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
157+
}
158+
for(size_t i = 1; i < N; i++) {
159+
myss << "," << std::endl;
160+
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
161+
}
162+
myss << std::endl;
163+
myss << "]" << std::endl;
164+
std::string answer = myss.str();
165+
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
166+
return answer;
167+
}
168+
169+
static const simdjson::padded_string& get_my_json_str() {
170+
static simdjson::padded_string s = build_json_array(1000000);
171+
return s;
172+
}
173+
174+
struct my_point {
175+
double x;
176+
double y;
177+
double z;
178+
};
179+
180+
// ./benchmark/bench_sax --benchmark_filter=largerandom
181+
182+
183+
/***
184+
* We start with the naive DOM-based approach.
185+
**/
186+
static void dom_parse_largerandom(State &state) {
187+
// Load twitter.json to a buffer
188+
const padded_string& json = get_my_json_str();
189+
190+
// Allocate
191+
dom::parser parser;
192+
if (auto error = parser.allocate(json.size())) { cerr << error << endl; return; };
193+
194+
// Read
195+
size_t bytes = 0;
196+
simdjson::error_code error;
197+
for (SIMDJSON_UNUSED auto _ : state) {
198+
std::vector<my_point> container;
199+
dom::element doc;
200+
if ((error = parser.parse(json).get(doc))) {
201+
std::cerr << "failure: " << error << std::endl;
202+
throw "Parsing failed";
203+
};
204+
for (auto p : doc) {
205+
container.emplace_back(my_point{p["x"], p["y"], p["z"]});
206+
}
207+
bytes += json.size();
208+
benchmark::DoNotOptimize(container.data());
209+
210+
}
211+
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
212+
state.counters["Gigabytes"] = benchmark::Counter(
213+
double(bytes), benchmark::Counter::kIsRate,
214+
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
215+
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
216+
}
217+
218+
BENCHMARK(dom_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
219+
return *(std::max_element(std::begin(v), std::end(v)));
220+
})->DisplayAggregatesOnly(true);
221+
222+
#if SIMDJSON_IMPLEMENTATION_HASWELL
223+
224+
/***
225+
* Next we are going to code the SAX approach.
226+
**/
227+
228+
SIMDJSON_TARGET_HASWELL
229+
230+
namespace largerandom {
231+
namespace {
232+
233+
using namespace simdjson;
234+
using namespace haswell;
235+
using namespace haswell::stage2;
236+
struct sax_point_reader_visitor {
237+
public:
238+
sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {
239+
}
240+
241+
simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
242+
simdjson_really_inline error_code visit_object_start(json_iterator &) { return SUCCESS; }
243+
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t *key) {
244+
switch(key[0]) {
245+
case 'x':
246+
idx = 0;
247+
break;
248+
case 'y':
249+
idx = 2;
250+
break;
251+
case 'z':
252+
idx = 3;
253+
break;
254+
}
255+
return SUCCESS;
256+
}
257+
simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
258+
return numberparsing::parse_double(value).get(buffer[idx]);
259+
}
260+
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
261+
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
262+
simdjson_really_inline error_code visit_object_end(json_iterator &) { return SUCCESS; }
263+
simdjson_really_inline error_code visit_document_end(json_iterator &) { return SUCCESS; }
264+
simdjson_really_inline error_code visit_empty_array(json_iterator &) { return SUCCESS; }
265+
simdjson_really_inline error_code visit_empty_object(json_iterator &) { return SUCCESS; }
266+
simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *) { return SUCCESS; }
267+
simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
268+
std::vector<my_point> &points;
269+
size_t idx{0};
270+
double buffer[3];
271+
};
272+
273+
struct sax_point_reader {
274+
std::vector<my_point> points;
275+
std::unique_ptr<uint8_t[]> string_buf;
276+
size_t capacity;
277+
dom_parser_implementation dom_parser;
278+
279+
sax_point_reader();
280+
error_code set_capacity(size_t new_capacity);
281+
error_code read_points(const padded_string &json);
282+
}; // struct sax_point_reader
283+
284+
sax_point_reader::sax_point_reader() : points{}, string_buf{}, capacity{0}, dom_parser() {
285+
}
286+
287+
error_code sax_point_reader::set_capacity(size_t new_capacity) {
288+
// string_capacity copied from document::allocate
289+
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
290+
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
291+
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
292+
if (capacity == 0) { // set max depth the first time only
293+
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
294+
}
295+
capacity = new_capacity;
296+
return SUCCESS;
297+
}
298+
299+
error_code sax_point_reader::read_points(const padded_string &json) {
300+
// Allocate capacity if needed
301+
points.clear();
302+
if (capacity < json.size()) {
303+
if (auto error = set_capacity(capacity)) { return error; }
304+
}
305+
306+
// Run stage 1 first.
307+
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { return error; }
308+
309+
// Then walk the document, parsing the tweets as we go
310+
json_iterator iter(dom_parser, 0);
311+
sax_point_reader_visitor visitor(points);
312+
if (auto error = iter.walk_document<false>(visitor)) { return error; }
313+
return SUCCESS;
314+
}
315+
316+
} // unnamed namespace
317+
} // namespace largerandom
318+
319+
SIMDJSON_UNTARGET_REGION
320+
321+
322+
323+
324+
325+
// ./benchmark/bench_sax --benchmark_filter=largerandom
326+
static void sax_parse_largerandom(State &state) {
327+
// Load twitter.json to a buffer
328+
const padded_string& json = get_my_json_str();
329+
330+
// Allocate
331+
largerandom::sax_point_reader reader;
332+
if (auto error = reader.set_capacity(json.size())) { throw error; }
333+
// warming
334+
for(size_t i = 0; i < 10; i++) {
335+
if (auto error = reader.read_points(json)) { throw error; }
336+
}
337+
338+
// Read
339+
size_t bytes = 0;
340+
for (SIMDJSON_UNUSED auto _ : state) {
341+
if (auto error = reader.read_points(json)) { throw error; }
342+
bytes += json.size();
343+
benchmark::DoNotOptimize(reader.points.data());
344+
}
345+
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
346+
state.counters["Gigabytes"] = benchmark::Counter(
347+
double(bytes), benchmark::Counter::kIsRate,
348+
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
349+
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
350+
}
351+
BENCHMARK(sax_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
352+
return *(std::max_element(std::begin(v), std::end(v)));
353+
})->DisplayAggregatesOnly(true);
354+
355+
#endif // SIMDJSON_IMPLEMENTATION_HASWELL
356+
357+
#endif // SIMDJSON_EXCEPTIONS
358+
136359
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)