@@ -43,67 +43,60 @@ void print_stat(const stat_t &s) {
43
43
s.true_count , s.false_count );
44
44
}
45
45
46
- __attribute__ ((noinline)) stat_t
47
- simdjson_compute_stats(const simdjson::padded_string &p) {
48
- stat_t answer;
49
- simdjson::ParsedJson pj = build_parsed_json (p);
50
- answer.valid = pj.is_valid ();
51
- if (!answer.valid ) {
52
- return answer;
46
+
47
+ really_inline void simdjson_process_atom (stat_t &s,
48
+ simdjson::document::element element) {
49
+ if (element.is_number ()) {
50
+ s.number_count ++;
51
+ } else if (element.is_bool ()) {
52
+ if (element.as_bool ()) {
53
+ s.true_count ++;
54
+ } else {
55
+ s.false_count ++;
56
+ }
57
+ } else if (element.is_null ()) {
58
+ s.null_count ++;
53
59
}
54
- answer.number_count = 0 ;
55
- answer.object_count = 0 ;
56
- answer.array_count = 0 ;
57
- answer.null_count = 0 ;
58
- answer.true_count = 0 ;
59
- answer.false_count = 0 ;
60
- size_t tape_idx = 0 ;
61
- uint64_t tape_val = pj.doc .tape [tape_idx++];
62
- uint8_t type = (tape_val >> 56 );
63
- size_t how_many = 0 ;
64
- assert (type == ' r' );
65
- how_many = tape_val & simdjson::internal::JSON_VALUE_MASK;
66
- for (; tape_idx < how_many; tape_idx++) {
67
- tape_val = pj.doc .tape [tape_idx];
68
- // uint64_t payload = tape_val & simdjson::internal::JSON_VALUE_MASK;
69
- type = (tape_val >> 56 );
70
- switch (type) {
71
- case ' l' : // we have a long int
72
- answer.number_count ++;
73
- tape_idx++; // skipping the integer
74
- break ;
75
- case ' u' : // we have a long uint
76
- answer.number_count ++;
77
- tape_idx++; // skipping the unsigned integer
78
- break ;
79
- case ' d' : // we have a double
80
- answer.number_count ++;
81
- tape_idx++; // skipping the double
82
- break ;
83
- case ' n' : // we have a null
84
- answer.null_count ++;
85
- break ;
86
- case ' t' : // we have a true
87
- answer.true_count ++;
88
- break ;
89
- case ' f' : // we have a false
90
- answer.false_count ++;
91
- break ;
92
- case ' {' : // we have an object
93
- answer.object_count ++;
94
- break ;
95
- case ' }' : // we end an object
96
- break ;
97
- case ' [' : // we start an array
98
- answer.array_count ++;
99
- break ;
100
- case ' ]' : // we end an array
101
- break ;
102
- default :
103
- break ; // ignore
60
+ }
61
+
62
+ void simdjson_recurse (stat_t &s, simdjson::document::element element) {
63
+ if (element.is_array ()) {
64
+ s.array_count ++;
65
+ auto [array, array_error] = element.as_array ();
66
+ for (auto child : array) {
67
+ if (child.is_array () || child.is_object ()) {
68
+ simdjson_recurse (s, child);
69
+ } else {
70
+ simdjson_process_atom (s, child);
71
+ }
72
+ }
73
+ } else if (element.is_object ()) {
74
+ s.object_count ++;
75
+ auto [object, object_error] = element.as_object ();
76
+ for (auto [key, value] : object) {
77
+ if (value.is_array () || value.is_object ()) {
78
+ simdjson_recurse (s, value);
79
+ } else {
80
+ simdjson_process_atom (s, value);
81
+ }
104
82
}
83
+ } else {
84
+ simdjson_process_atom (s, element);
105
85
}
106
- return answer;
86
+ }
87
+
88
+ __attribute__ ((noinline)) stat_t
89
+ simdjson_compute_stats(const simdjson::padded_string &p) {
90
+ stat_t s{};
91
+ simdjson::document::parser parser;
92
+ auto [doc, error] = parser.parse (p);
93
+ if (error) {
94
+ s.valid = false ;
95
+ return s;
96
+ }
97
+ s.valid = true ;
98
+ simdjson_recurse (s, doc.root ());
99
+ return s;
107
100
}
108
101
109
102
// see
@@ -153,7 +146,7 @@ __attribute__((noinline)) stat_t
153
146
sasjon_compute_stats(const simdjson::padded_string &p) {
154
147
stat_t answer;
155
148
char *buffer = (char *)malloc (p.size ());
156
- if (buffer == nullptr ) {
149
+ if (buffer == nullptr ) {
157
150
return answer;
158
151
}
159
152
memcpy (buffer, p.data (), p.size ());
@@ -215,7 +208,7 @@ __attribute__((noinline)) stat_t
215
208
rapid_compute_stats(const simdjson::padded_string &p) {
216
209
stat_t answer;
217
210
char *buffer = (char *)malloc (p.size () + 1 );
218
- if (buffer == nullptr ) {
211
+ if (buffer == nullptr ) {
219
212
return answer;
220
213
}
221
214
memcpy (buffer, p.data (), p.size ());
@@ -242,13 +235,13 @@ __attribute__((noinline)) stat_t
242
235
rapid_accurate_compute_stats(const simdjson::padded_string &p) {
243
236
stat_t answer;
244
237
char *buffer = (char *)malloc (p.size () + 1 );
245
- if (buffer == nullptr ) {
238
+ if (buffer == nullptr ) {
246
239
return answer;
247
240
}
248
241
memcpy (buffer, p.data (), p.size ());
249
242
buffer[p.size ()] = ' \0 ' ;
250
243
rapidjson::Document d;
251
- d.ParseInsitu <kParseValidateEncodingFlag | kParseFullPrecisionFlag >(buffer);
244
+ d.ParseInsitu <kParseValidateEncodingFlag | kParseFullPrecisionFlag >(buffer);
252
245
answer.valid = !d.HasParseError ();
253
246
if (!answer.valid ) {
254
247
free (buffer);
@@ -323,7 +316,7 @@ int main(int argc, char *argv[]) {
323
316
stat_t s2a = rapid_accurate_compute_stats (p);
324
317
if (verbose) {
325
318
printf (" rapid full: " );
326
- print_stat (s2 );
319
+ print_stat (s2a );
327
320
}
328
321
stat_t s3 = sasjon_compute_stats (p);
329
322
if (verbose) {
@@ -337,10 +330,12 @@ int main(int argc, char *argv[]) {
337
330
if (just_data) {
338
331
printf (" name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n " );
339
332
}
340
- BEST_TIME (" simdjson " , simdjson_compute_stats (p).valid , true , , repeat,
341
- volume, !just_data);
342
- BEST_TIME (" RapidJSON " , rapid_compute_stats (p).valid , true , , repeat, volume,
343
- !just_data);
344
- BEST_TIME (" sasjon " , sasjon_compute_stats (p).valid , true , , repeat, volume,
345
- !just_data);
333
+ BEST_TIME (" simdjson " , simdjson_compute_stats (p).valid , true , ,
334
+ repeat, volume, !just_data);
335
+ BEST_TIME (" RapidJSON " , rapid_compute_stats (p).valid , true , ,
336
+ repeat, volume, !just_data);
337
+ BEST_TIME (" RapidJSON (precise) " , rapid_accurate_compute_stats (p).valid , true , ,
338
+ repeat, volume, !just_data);
339
+ BEST_TIME (" sasjon " , sasjon_compute_stats (p).valid , true , ,
340
+ repeat, volume, !just_data);
346
341
}
0 commit comments