1
1
#ifndef SIMDJSON_PARSEDJSONITERATOR_H
2
2
#define SIMDJSON_PARSEDJSONITERATOR_H
3
3
4
- #include " simdjson/parsedjson.h"
5
4
#include " simdjson/jsonformatutils.h"
5
+ #include " simdjson/parsedjson.h"
6
6
#include < cstring>
7
7
#include < iostream>
8
- #include < limits>
9
8
#include < iterator>
9
+ #include < limits>
10
10
11
11
namespace simdjson {
12
- template <size_t max_depth>
13
- class ParsedJson ::BasicIterator {
12
+ template <size_t max_depth> class ParsedJson ::BasicIterator {
14
13
// might throw InvalidJSON if ParsedJson is invalid
15
14
public:
16
15
explicit BasicIterator (ParsedJson &pj_);
@@ -51,6 +50,14 @@ class ParsedJson::BasicIterator {
51
50
return static_cast <int64_t >(pj->tape [location + 1 ]);
52
51
}
53
52
53
+ // get the value as uint64
54
+ inline uint64_t get_unsigned_integer () const {
55
+ if (location + 1 >= tape_length) {
56
+ return 0 ; // default value in case of error
57
+ }
58
+ return pj->tape [location + 1 ];
59
+ }
60
+
54
61
// get the string value at this node (NULL ended); valid only if we're at "
55
62
// note that tabs, and line endings are escaped in the returned value (see
56
63
// print_with_escapes) return value is valid UTF-8 It may contain NULL chars
@@ -90,10 +97,26 @@ class ParsedJson::BasicIterator {
90
97
91
98
inline bool is_string () const { return get_type () == ' "' ; }
92
99
100
+ // Returns true if the current type of node is an signed integer.
101
+ // You can get its value with `get_integer()`.
93
102
inline bool is_integer () const { return get_type () == ' l' ; }
94
103
104
+ // Returns true if the current type of node is an unsigned integer.
105
+ // You can get its value with `get_unsigned_integer()`.
106
+ //
107
+ // NOTE:
108
+ // Only a large value, which is out of range of a 64-bit signed integer, is
109
+ // represented internally as an unsigned node. On the other hand, a typical
110
+ // positive integer, such as 1, 42, or 1000000, is as a signed node.
111
+ // Be aware this function returns false for a signed node.
112
+ inline bool is_unsigned_integer () const { return get_type () == ' u' ; }
113
+
95
114
inline bool is_double () const { return get_type () == ' d' ; }
96
115
116
+ inline bool is_number () const {
117
+ return is_integer () || is_unsigned_integer () || is_double ();
118
+ }
119
+
97
120
inline bool is_true () const { return get_type () == ' t' ; }
98
121
99
122
inline bool is_false () const { return get_type () == ' f' ; }
@@ -110,7 +133,7 @@ class ParsedJson::BasicIterator {
110
133
// (in case of repeated keys, this only finds the first one).
111
134
// We seek the key using C's strcmp so if your JSON strings contain
112
135
// NULL chars, this would trigger a false positive: if you expect that
113
- // to be the case, take extra precautions.
136
+ // to be the case, take extra precautions.
114
137
// Furthermore, we do the comparison character-by-character
115
138
// without taking into account Unicode equivalence.
116
139
inline bool move_to_key (const char *key);
@@ -230,21 +253,28 @@ class ParsedJson::BasicIterator {
230
253
};
231
254
232
255
template <size_t max_depth>
233
- WARN_UNUSED
234
- bool ParsedJson::BasicIterator<max_depth>::is_ok() const { return location < tape_length; }
256
+ WARN_UNUSED bool ParsedJson::BasicIterator<max_depth>::is_ok() const {
257
+ return location < tape_length;
258
+ }
235
259
236
260
// useful for debuging purposes
237
261
template <size_t max_depth>
238
- size_t ParsedJson::BasicIterator<max_depth>::get_tape_location() const { return location; }
262
+ size_t ParsedJson::BasicIterator<max_depth>::get_tape_location() const {
263
+ return location;
264
+ }
239
265
240
266
// useful for debuging purposes
241
267
template <size_t max_depth>
242
- size_t ParsedJson::BasicIterator<max_depth>::get_tape_length() const { return tape_length; }
268
+ size_t ParsedJson::BasicIterator<max_depth>::get_tape_length() const {
269
+ return tape_length;
270
+ }
243
271
244
272
// returns the current depth (start at 1 with 0 reserved for the fictitious root
245
273
// node)
246
274
template <size_t max_depth>
247
- size_t ParsedJson::BasicIterator<max_depth>::get_depth() const { return depth; }
275
+ size_t ParsedJson::BasicIterator<max_depth>::get_depth() const {
276
+ return depth;
277
+ }
248
278
249
279
// A scope is a series of nodes at the same depth, typically it is either an
250
280
// object ({) or an array ([). The root node has type 'r'.
@@ -268,8 +298,8 @@ bool ParsedJson::BasicIterator<max_depth>::move_forward() {
268
298
} else if ((current_type == ' ]' ) || (current_type == ' }' )) {
269
299
// Leaving a scope.
270
300
depth--;
271
- } else if ((current_type == ' d ' ) || (current_type == ' l ' )) {
272
- // d and l types use 2 locations on the tape, not just one.
301
+ } else if (is_number ( )) {
302
+ // these types use 2 locations on the tape, not just one.
273
303
location += 1 ;
274
304
}
275
305
@@ -305,7 +335,8 @@ bool ParsedJson::BasicIterator<max_depth>::move_to_key(const char *key) {
305
335
}
306
336
307
337
template <size_t max_depth>
308
- bool ParsedJson::BasicIterator<max_depth>::move_to_key(const char *key, uint32_t length) {
338
+ bool ParsedJson::BasicIterator<max_depth>::move_to_key(const char *key,
339
+ uint32_t length) {
309
340
if (down ()) {
310
341
do {
311
342
assert (is_string ());
@@ -339,33 +370,31 @@ bool ParsedJson::BasicIterator<max_depth>::move_to_index(uint32_t index) {
339
370
return false ;
340
371
}
341
372
342
- template <size_t max_depth>
343
- bool ParsedJson::BasicIterator<max_depth>::prev() {
373
+ template <size_t max_depth> bool ParsedJson::BasicIterator<max_depth>::prev() {
344
374
size_t target_location = location;
345
375
to_start_scope ();
346
376
size_t npos = location;
347
- if (target_location == npos) {
377
+ if (target_location == npos) {
348
378
return false ; // we were already at the start
349
379
}
350
380
size_t oldnpos;
351
381
// we have that npos < target_location here
352
382
do {
353
383
oldnpos = npos;
354
384
if ((current_type == ' [' ) || (current_type == ' {' )) {
355
- // we need to jump
385
+ // we need to jump
356
386
npos = (current_val & JSON_VALUE_MASK);
357
387
} else {
358
388
npos = npos + ((current_type == ' d' || current_type == ' l' ) ? 2 : 1 );
359
389
}
360
- } while (npos < target_location);
390
+ } while (npos < target_location);
361
391
location = oldnpos;
362
392
current_val = pj->tape [location];
363
393
current_type = current_val >> 56 ;
364
394
return true ;
365
395
}
366
396
367
- template <size_t max_depth>
368
- bool ParsedJson::BasicIterator<max_depth>::up() {
397
+ template <size_t max_depth> bool ParsedJson::BasicIterator<max_depth>::up() {
369
398
if (depth == 1 ) {
370
399
return false ; // don't allow moving back to root
371
400
}
@@ -378,8 +407,7 @@ bool ParsedJson::BasicIterator<max_depth>::up() {
378
407
return true ;
379
408
}
380
409
381
- template <size_t max_depth>
382
- bool ParsedJson::BasicIterator<max_depth>::down() {
410
+ template <size_t max_depth> bool ParsedJson::BasicIterator<max_depth>::down() {
383
411
if (location + 1 >= tape_length) {
384
412
return false ;
385
413
}
@@ -407,14 +435,13 @@ void ParsedJson::BasicIterator<max_depth>::to_start_scope() {
407
435
current_type = (current_val >> 56 );
408
436
}
409
437
410
- template <size_t max_depth>
411
- bool ParsedJson::BasicIterator<max_depth>::next() {
438
+ template <size_t max_depth> bool ParsedJson::BasicIterator<max_depth>::next() {
412
439
size_t npos;
413
440
if ((current_type == ' [' ) || (current_type == ' {' )) {
414
441
// we need to jump
415
442
npos = (current_val & JSON_VALUE_MASK);
416
443
} else {
417
- npos = location + ((current_type == ' d ' || current_type == ' l ' ) ? 2 : 1 );
444
+ npos = location + (is_number ( ) ? 2 : 1 );
418
445
}
419
446
uint64_t next_val = pj->tape [npos];
420
447
uint8_t next_type = (next_val >> 56 );
@@ -456,14 +483,17 @@ ParsedJson::BasicIterator<max_depth>::BasicIterator(ParsedJson &pj_)
456
483
}
457
484
458
485
template <size_t max_depth>
459
- ParsedJson::BasicIterator<max_depth>::BasicIterator(const BasicIterator &o) noexcept
460
- : pj(o.pj), depth(o.depth), location(o.location), tape_length(o.tape_length),
461
- current_type (o.current_type), current_val(o.current_val) {
486
+ ParsedJson::BasicIterator<max_depth>::BasicIterator(
487
+ const BasicIterator &o) noexcept
488
+ : pj(o.pj), depth(o.depth), location(o.location),
489
+ tape_length (o.tape_length), current_type(o.current_type),
490
+ current_val(o.current_val) {
462
491
memcpy (depth_index, o.depth_index , (depth + 1 ) * sizeof (depth_index[0 ]));
463
492
}
464
493
465
494
template <size_t max_depth>
466
- ParsedJson::BasicIterator<max_depth> &ParsedJson::BasicIterator<max_depth>::operator =(const BasicIterator &o) noexcept {
495
+ ParsedJson::BasicIterator<max_depth> &ParsedJson::BasicIterator<max_depth>::
496
+ operator =(const BasicIterator &o) noexcept {
467
497
pj = o.pj ;
468
498
depth = o.depth ;
469
499
location = o.location ;
@@ -475,7 +505,8 @@ ParsedJson::BasicIterator<max_depth> &ParsedJson::BasicIterator<max_depth>::oper
475
505
}
476
506
477
507
template <size_t max_depth>
478
- bool ParsedJson::BasicIterator<max_depth>::print(std::ostream &os, bool escape_strings) const {
508
+ bool ParsedJson::BasicIterator<max_depth>::print(std::ostream &os,
509
+ bool escape_strings) const {
479
510
if (!is_ok ()) {
480
511
return false ;
481
512
}
@@ -495,6 +526,9 @@ bool ParsedJson::BasicIterator<max_depth>::print(std::ostream &os, bool escape_s
495
526
case ' l' : // we have a long int
496
527
os << get_integer ();
497
528
break ;
529
+ case ' u' :
530
+ os << get_unsigned_integer ();
531
+ break ;
498
532
case ' d' :
499
533
os << get_double ();
500
534
break ;
@@ -520,7 +554,8 @@ bool ParsedJson::BasicIterator<max_depth>::print(std::ostream &os, bool escape_s
520
554
}
521
555
522
556
template <size_t max_depth>
523
- bool ParsedJson::BasicIterator<max_depth>::move_to(const char *pointer, uint32_t length) {
557
+ bool ParsedJson::BasicIterator<max_depth>::move_to(const char *pointer,
558
+ uint32_t length) {
524
559
char *new_pointer = nullptr ;
525
560
if (pointer[0 ] == ' #' ) {
526
561
// Converting fragment representation to string representation
0 commit comments