Skip to content

Commit 6b8f5d3

Browse files
authored
Fixing issue 601 (simdjson#618)
* Fixing issue 601
1 parent d5af359 commit 6b8f5d3

File tree

1 file changed

+64
-69
lines changed

1 file changed

+64
-69
lines changed

benchmark/parseandstatcompetition.cpp

Lines changed: 64 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -43,67 +43,60 @@ void print_stat(const stat_t &s) {
4343
s.true_count, s.false_count);
4444
}
4545

46-
__attribute__((noinline)) stat_t
47-
simdjson_compute_stats(const simdjson::padded_string &p) {
48-
stat_t answer;
49-
simdjson::ParsedJson pj = build_parsed_json(p);
50-
answer.valid = pj.is_valid();
51-
if (!answer.valid) {
52-
return answer;
46+
47+
really_inline void simdjson_process_atom(stat_t &s,
48+
simdjson::document::element element) {
49+
if (element.is_number()) {
50+
s.number_count++;
51+
} else if (element.is_bool()) {
52+
if (element.as_bool()) {
53+
s.true_count++;
54+
} else {
55+
s.false_count++;
56+
}
57+
} else if (element.is_null()) {
58+
s.null_count++;
5359
}
54-
answer.number_count = 0;
55-
answer.object_count = 0;
56-
answer.array_count = 0;
57-
answer.null_count = 0;
58-
answer.true_count = 0;
59-
answer.false_count = 0;
60-
size_t tape_idx = 0;
61-
uint64_t tape_val = pj.doc.tape[tape_idx++];
62-
uint8_t type = (tape_val >> 56);
63-
size_t how_many = 0;
64-
assert(type == 'r');
65-
how_many = tape_val & simdjson::internal::JSON_VALUE_MASK;
66-
for (; tape_idx < how_many; tape_idx++) {
67-
tape_val = pj.doc.tape[tape_idx];
68-
// uint64_t payload = tape_val & simdjson::internal::JSON_VALUE_MASK;
69-
type = (tape_val >> 56);
70-
switch (type) {
71-
case 'l': // we have a long int
72-
answer.number_count++;
73-
tape_idx++; // skipping the integer
74-
break;
75-
case 'u': // we have a long uint
76-
answer.number_count++;
77-
tape_idx++; // skipping the unsigned integer
78-
break;
79-
case 'd': // we have a double
80-
answer.number_count++;
81-
tape_idx++; // skipping the double
82-
break;
83-
case 'n': // we have a null
84-
answer.null_count++;
85-
break;
86-
case 't': // we have a true
87-
answer.true_count++;
88-
break;
89-
case 'f': // we have a false
90-
answer.false_count++;
91-
break;
92-
case '{': // we have an object
93-
answer.object_count++;
94-
break;
95-
case '}': // we end an object
96-
break;
97-
case '[': // we start an array
98-
answer.array_count++;
99-
break;
100-
case ']': // we end an array
101-
break;
102-
default:
103-
break; // ignore
60+
}
61+
62+
void simdjson_recurse(stat_t &s, simdjson::document::element element) {
63+
if (element.is_array()) {
64+
s.array_count++;
65+
auto [array, array_error] = element.as_array();
66+
for (auto child : array) {
67+
if (child.is_array() || child.is_object()) {
68+
simdjson_recurse(s, child);
69+
} else {
70+
simdjson_process_atom(s, child);
71+
}
72+
}
73+
} else if (element.is_object()) {
74+
s.object_count++;
75+
auto [object, object_error] = element.as_object();
76+
for (auto [key, value] : object) {
77+
if (value.is_array() || value.is_object()) {
78+
simdjson_recurse(s, value);
79+
} else {
80+
simdjson_process_atom(s, value);
81+
}
10482
}
83+
} else {
84+
simdjson_process_atom(s, element);
10585
}
106-
return answer;
86+
}
87+
88+
__attribute__((noinline)) stat_t
89+
simdjson_compute_stats(const simdjson::padded_string &p) {
90+
stat_t s{};
91+
simdjson::document::parser parser;
92+
auto [doc, error] = parser.parse(p);
93+
if (error) {
94+
s.valid = false;
95+
return s;
96+
}
97+
s.valid = true;
98+
simdjson_recurse(s, doc.root());
99+
return s;
107100
}
108101

109102
// see
@@ -153,7 +146,7 @@ __attribute__((noinline)) stat_t
153146
sasjon_compute_stats(const simdjson::padded_string &p) {
154147
stat_t answer;
155148
char *buffer = (char *)malloc(p.size());
156-
if(buffer == nullptr) {
149+
if (buffer == nullptr) {
157150
return answer;
158151
}
159152
memcpy(buffer, p.data(), p.size());
@@ -215,7 +208,7 @@ __attribute__((noinline)) stat_t
215208
rapid_compute_stats(const simdjson::padded_string &p) {
216209
stat_t answer;
217210
char *buffer = (char *)malloc(p.size() + 1);
218-
if(buffer == nullptr) {
211+
if (buffer == nullptr) {
219212
return answer;
220213
}
221214
memcpy(buffer, p.data(), p.size());
@@ -242,13 +235,13 @@ __attribute__((noinline)) stat_t
242235
rapid_accurate_compute_stats(const simdjson::padded_string &p) {
243236
stat_t answer;
244237
char *buffer = (char *)malloc(p.size() + 1);
245-
if(buffer == nullptr) {
238+
if (buffer == nullptr) {
246239
return answer;
247240
}
248241
memcpy(buffer, p.data(), p.size());
249242
buffer[p.size()] = '\0';
250243
rapidjson::Document d;
251-
d.ParseInsitu<kParseValidateEncodingFlag|kParseFullPrecisionFlag>(buffer);
244+
d.ParseInsitu<kParseValidateEncodingFlag | kParseFullPrecisionFlag>(buffer);
252245
answer.valid = !d.HasParseError();
253246
if (!answer.valid) {
254247
free(buffer);
@@ -323,7 +316,7 @@ int main(int argc, char *argv[]) {
323316
stat_t s2a = rapid_accurate_compute_stats(p);
324317
if (verbose) {
325318
printf("rapid full: ");
326-
print_stat(s2);
319+
print_stat(s2a);
327320
}
328321
stat_t s3 = sasjon_compute_stats(p);
329322
if (verbose) {
@@ -337,10 +330,12 @@ int main(int argc, char *argv[]) {
337330
if (just_data) {
338331
printf("name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n");
339332
}
340-
BEST_TIME("simdjson ", simdjson_compute_stats(p).valid, true, , repeat,
341-
volume, !just_data);
342-
BEST_TIME("RapidJSON ", rapid_compute_stats(p).valid, true, , repeat, volume,
343-
!just_data);
344-
BEST_TIME("sasjon ", sasjon_compute_stats(p).valid, true, , repeat, volume,
345-
!just_data);
333+
BEST_TIME("simdjson ", simdjson_compute_stats(p).valid, true, ,
334+
repeat, volume, !just_data);
335+
BEST_TIME("RapidJSON ", rapid_compute_stats(p).valid, true, ,
336+
repeat, volume, !just_data);
337+
BEST_TIME("RapidJSON (precise) ", rapid_accurate_compute_stats(p).valid, true, ,
338+
repeat, volume, !just_data);
339+
BEST_TIME("sasjon ", sasjon_compute_stats(p).valid, true, ,
340+
repeat, volume, !just_data);
346341
}

0 commit comments

Comments
 (0)