Skip to content

Commit 9486095

Browse files
committed
Fix SAX benchmarks to actually push to vector
1 parent 6bb6e75 commit 9486095

File tree

3 files changed

+37
-37
lines changed

3 files changed

+37
-37
lines changed

benchmark/bench_sax.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ using std::cerr;
2222
using std::endl;
2323

2424
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
25-
const int REPETITIONS = 10;
2625

2726
#if SIMDJSON_IMPLEMENTATION_HASWELL
2827

@@ -188,20 +187,21 @@ static void sax_tweets(State &state) {
188187
padded_string json;
189188
if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; }
190189

191-
// Allocate
192-
twitter::sax_tweet_reader reader;
193-
if (auto error = reader.set_capacity(json.size())) { cerr << error << endl; return; }
194-
195-
// Warm the vector
196-
if (auto error = reader.read_tweets(json)) { throw error; }
197-
198-
// Read tweets
199190
size_t bytes = 0;
200191
size_t tweets = 0;
201-
for (SIMDJSON_UNUSED auto _ : state) {
192+
{
193+
// Yes, we leak this. Destructor issues. TODO fix that
194+
twitter::sax_tweet_reader reader;
195+
196+
// Warm the vector and allocate capacity
202197
if (auto error = reader.read_tweets(json)) { throw error; }
203-
bytes += json.size();
204-
tweets += reader.tweets.size();
198+
199+
// Read tweets
200+
for (SIMDJSON_UNUSED auto _ : state) {
201+
if (auto error = reader.read_tweets(json)) { throw error; }
202+
bytes += json.size();
203+
tweets += reader.tweets.size();
204+
}
205205
}
206206
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
207207
state.counters["bytes"] = benchmark::Counter(

benchmark/twitter/sax_tweet_reader.h

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#define TWITTER_SAX_TWEET_READER_H
33

44
#include "simdjson.h"
5-
#include "sax_tweet_reader_visitor.h"
65
#include "tweet.h"
6+
#include "sax_tweet_reader_visitor.h"
77
#include <vector>
88

99
SIMDJSON_TARGET_HASWELL
@@ -16,20 +16,16 @@ using namespace haswell;
1616
using namespace haswell::stage2;
1717

1818
struct sax_tweet_reader {
19-
std::vector<tweet> tweets;
20-
std::unique_ptr<uint8_t[]> string_buf;
21-
size_t capacity;
22-
dom_parser_implementation dom_parser;
19+
std::vector<tweet> tweets{};
20+
std::unique_ptr<uint8_t[]> string_buf{};
21+
size_t capacity{};
22+
dom_parser_implementation dom_parser{};
2323

24-
sax_tweet_reader();
25-
error_code set_capacity(size_t new_capacity);
24+
error_code allocate(size_t new_capacity);
2625
error_code read_tweets(padded_string &json);
2726
}; // struct tweet_reader
2827

29-
sax_tweet_reader::sax_tweet_reader() : tweets{}, string_buf{}, capacity{0}, dom_parser() {
30-
}
31-
32-
error_code sax_tweet_reader::set_capacity(size_t new_capacity) {
28+
error_code sax_tweet_reader::allocate(size_t new_capacity) {
3329
// string_capacity copied from document::allocate
3430
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
3531
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
@@ -46,11 +42,11 @@ error_code sax_tweet_reader::read_tweets(padded_string &json) {
4642
// Allocate capacity if needed
4743
tweets.clear();
4844
if (capacity < json.size()) {
49-
if (auto error = set_capacity(capacity)) { return error; }
45+
if (auto error = allocate(json.size())) { return error; }
5046
}
5147

5248
// Run stage 1 first.
53-
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { return error; }
49+
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { std::cout << error << std::endl; return error; }
5450

5551
// Then walk the document, parsing the tweets as we go
5652
json_iterator iter(dom_parser, 0);

benchmark/twitter/sax_tweet_reader_visitor.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
SIMDJSON_TARGET_HASWELL
99

1010
namespace twitter {
11+
namespace {
1112

1213
using namespace simdjson;
1314
using namespace haswell;
1415
using namespace haswell::stage2;
1516

1617
struct sax_tweet_reader_visitor {
1718
public:
18-
sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *string_buf);
19+
simdjson_really_inline sax_tweet_reader_visitor(std::vector<tweet> &tweets, uint8_t *string_buf);
1920

2021
simdjson_really_inline error_code visit_document_start(json_iterator &iter);
2122
simdjson_really_inline error_code visit_object_start(json_iterator &iter);
@@ -68,8 +69,8 @@ struct sax_tweet_reader_visitor {
6869
field_type type{field_type::any};
6970
};
7071

71-
containers container{containers::document};
7272
std::vector<tweet> &tweets;
73+
containers container{containers::document};
7374
uint8_t *current_string_buf_loc;
7475
const uint8_t *current_key{};
7576

@@ -94,9 +95,9 @@ struct sax_tweet_reader_visitor {
9495
static field_lookup fields;
9596
}; // sax_tweet_reader_visitor
9697

97-
sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *string_buf)
98+
simdjson_really_inline sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *_string_buf)
9899
: tweets{_tweets},
99-
current_string_buf_loc{string_buf} {
100+
current_string_buf_loc{_string_buf} {
100101
}
101102

102103
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_start(json_iterator &iter) {
@@ -112,6 +113,7 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_start(js
112113
switch (fields.get(current_key, container).type) {
113114
case field_type::array: // { "statuses": [
114115
start_container(iter);
116+
current_key = nullptr;
115117
return SUCCESS;
116118
case field_type::any:
117119
return SUCCESS;
@@ -190,6 +192,7 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_primitive(json
190192
iter.log_error("unexpected primitive");
191193
return INCORRECT_TYPE;
192194
}
195+
current_key = nullptr;
193196
}
194197

195198
// If it's not a field, it's a child of an array.
@@ -202,16 +205,17 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_end(json
202205
return SUCCESS;
203206
}
204207
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_end(json_iterator &iter) {
208+
current_key = nullptr;
205209
if (in_container(iter)) { end_container(iter); }
206210
return SUCCESS;
207211
}
208212

209-
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &iter) {
210-
iter.log_end_value("document");
213+
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &) {
211214
return SUCCESS;
212215
}
213216

214217
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_array(json_iterator &) {
218+
current_key = nullptr;
215219
return SUCCESS;
216220
}
217221
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_object(json_iterator &) {
@@ -233,16 +237,15 @@ simdjson_really_inline bool sax_tweet_reader_visitor::in_container_child(json_it
233237
simdjson_really_inline void sax_tweet_reader_visitor::start_container(json_iterator &iter) {
234238
SIMDJSON_ASSUME(iter.depth <= MAX_SUPPORTED_DEPTH); // Asserts in debug mode
235239
container = containers(iter.depth);
240+
if (logger::LOG_ENABLED) { iter.log_value(STATE_NAMES[iter.depth]); }
236241
if (container == containers::tweet) { tweets.push_back({}); }
237-
if (logger::LOG_ENABLED) { iter.log_start_value(STATE_NAMES[iter.depth]); }
238242
}
239-
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &iter) {
240-
if (logger::LOG_ENABLED) { iter.log_end_value(STATE_NAMES[int(container)]); }
243+
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &) {
241244
container = containers(int(container) - 1);
242245
}
243246
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
244247
iter.log_value(f.key);
245-
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
248+
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
246249
if (auto error = numberparsing::parse_unsigned(value).get(*i)) {
247250
// If number parsing failed, check if it's null before returning the error
248251
if (!atomparsing::is_valid_null_atom(value)) { iter.log_error("expected number or null"); return error; }
@@ -252,12 +255,12 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsig
252255
}
253256
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
254257
iter.log_value(f.key);
255-
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
258+
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
256259
return numberparsing::parse_unsigned(value).get(*i);
257260
}
258261
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_string(json_iterator &iter, const uint8_t *value, const field &f) {
259262
iter.log_value(f.key);
260-
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
263+
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
261264
return stringparsing::parse_string_to_buffer(value, current_string_buf_loc, *s);
262265
}
263266

@@ -513,6 +516,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
513516
// }
514517
// }
515518

519+
} // unnamed namespace
516520
} // namespace twitter
517521

518522
SIMDJSON_UNTARGET_REGION

0 commit comments

Comments
 (0)