Skip to content

Commit 3aa3175

Browse files
committed
Add tests for recovering from partial child iteration
1 parent 4c63956 commit 3aa3175

13 files changed

+520
-77
lines changed

include/simdjson/generic/ondemand/array_iterator-inl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ simdjson_really_inline array_iterator &array_iterator::operator++() noexcept {
2121
// PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here.
2222
// However, it does not seem to make a perf difference, so we add it out of an abundance of caution.
2323
if ((error = iter.error()) ) { return *this; }
24-
if ((error = iter.finish_child() )) { return *this; }
24+
if ((error = iter.skip_child() )) { return *this; }
2525
if ((error = iter.has_next_element().error() )) { return *this; }
2626
return *this;
2727
}

include/simdjson/generic/ondemand/document.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,14 @@ class document {
208208

209209
/**
210210
* Look up a field by name on an object.
211-
*
211+
*
212212
* Important notes:
213-
*
213+
*
214214
* * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
215215
* e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
216216
* * **Once Only:** You may only look up a single field on a document. To look up multiple fields,
217217
* use `.get_object()` or cast to `object`.
218-
*
218+
*
219219
* @param key The key to look up.
220220
* @returns The value of the field, NO_SUCH_FIELD if the field is not in the object, or
221221
* INCORRECT_TYPE if the JSON value is not an array.

include/simdjson/generic/ondemand/json_iterator-inl.h

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,40 +30,57 @@ simdjson_really_inline json_iterator::json_iterator(ondemand::parser *_parser) n
3030
}
3131

3232
simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept {
33-
SIMDJSON_ASSUME(depth() == parent_depth+1);
33+
if (depth() <= parent_depth) { return SUCCESS; }
3434

3535
switch (*advance()) {
36-
case '[': case '{':
36+
// TODO consider whether matching braces is a requirement: if non-matching braces indicates
37+
// *missing* braces, then future lookups are not in the object/arrays they think they are,
38+
// violating the rule "validate enough structure that the user can be confident they are
39+
// looking at the right values."
40+
// PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
41+
42+
// For the first open array/object in a value, we've already incremented depth, so keep it the same
43+
// We never stop at colon, but if we did, it wouldn't affect depth
44+
case '[': case '{': case ':':
3745
logger::log_start_value(*this, "skip");
38-
return finish_child(parent_depth);
46+
break;
47+
// If there is a comma, we have just finished a value in an array/object, and need to get back in
48+
case ',':
49+
logger::log_value(*this, "skip");
50+
break;
51+
// ] or } means we just finished a value and need to jump out of the array/object
52+
case ']': case '}':
53+
logger::log_end_value(*this, "skip");
54+
_depth--;
55+
if (depth() <= parent_depth) { return SUCCESS; }
56+
break;
57+
// Anything else must be a scalar value
3958
default:
59+
// For the first scalar, we will have incremented depth already, so we decrement it here.
4060
logger::log_value(*this, "skip");
41-
ascend_to(parent_depth);
42-
return SUCCESS;
61+
_depth--;
62+
if (depth() <= parent_depth) { return SUCCESS; }
63+
break;
4364
}
44-
}
4565

46-
simdjson_warn_unused simdjson_really_inline error_code json_iterator::finish_child(depth_t parent_depth) noexcept {
47-
if (depth() <= parent_depth) { return SUCCESS; }
48-
// The loop breaks only when depth()-- happens.
66+
// Now that we've considered the first value, we only increment/decrement for arrays/objects
4967
auto end = &parser->dom_parser.structural_indexes[parser->dom_parser.n_structural_indexes];
5068
while (token.index <= end) {
51-
uint8_t ch = *advance();
52-
switch (ch) {
69+
switch (*advance()) {
70+
case '[': case '{':
71+
logger::log_start_value(*this, "skip");
72+
_depth++;
73+
break;
5374
// TODO consider whether matching braces is a requirement: if non-matching braces indicates
5475
// *missing* braces, then future lookups are not in the object/arrays they think they are,
5576
// violating the rule "validate enough structure that the user can be confident they are
5677
// looking at the right values."
78+
// PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
5779
case ']': case '}':
5880
logger::log_end_value(*this, "skip");
5981
_depth--;
6082
if (depth() <= parent_depth) { return SUCCESS; }
6183
break;
62-
// PERF TODO does it skip the depth() check when we don't decrement depth()?
63-
case '[': case '{':
64-
logger::log_start_value(*this, "skip");
65-
_depth++;
66-
break;
6784
default:
6885
logger::log_value(*this, "skip", "");
6986
break;

include/simdjson/generic/ondemand/json_iterator.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class json_iterator {
3737
error_code error{SUCCESS};
3838
/**
3939
* Depth of the current token in the JSON.
40-
*
40+
*
4141
* - 0 = finished with document
4242
* - 1 = document root value (could be [ or {, not yet known)
4343
* - 2 = , or } inside root array/object
@@ -57,11 +57,6 @@ class json_iterator {
5757
*/
5858
simdjson_warn_unused simdjson_really_inline error_code skip_child(depth_t parent_depth) noexcept;
5959

60-
/**
61-
* Finishes iteration of a child in an object or array.
62-
*/
63-
simdjson_warn_unused simdjson_really_inline error_code finish_child(depth_t parent_depth) noexcept;
64-
6560
/**
6661
* Tell whether the iterator is still at the start
6762
*/
@@ -113,18 +108,18 @@ class json_iterator {
113108

114109
/**
115110
* Ascend one level.
116-
*
111+
*
117112
* Validates that the depth - 1 == parent_depth.
118-
*
113+
*
119114
* @param parent_depth the expected parent depth.
120115
*/
121116
simdjson_really_inline void ascend_to(depth_t parent_depth) noexcept;
122117

123118
/**
124119
* Descend one level.
125-
*
120+
*
126121
* Validates that the new depth == child_depth.
127-
*
122+
*
128123
* @param child_depth the expected child depth.
129124
*/
130125
simdjson_really_inline void descend_to(depth_t parent_depth) noexcept;

include/simdjson/generic/ondemand/logger-inl.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,21 @@ simdjson_really_inline void log_headers() noexcept {
5656
log_depth = 0;
5757
if (LOG_ENABLED) {
5858
printf("\n");
59-
printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
60-
printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
59+
printf("| %-*s ", LOG_EVENT_LEN, "Event");
60+
printf("| %-*s ", LOG_BUFFER_LEN, "Buffer");
61+
printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next");
62+
// printf("| %-*s ", 5, "Next#");
63+
printf("| %-*s ", 5, "Depth");
64+
printf("| Detail ");
65+
printf("|\n");
66+
67+
printf("|%.*s", LOG_EVENT_LEN+2, DASHES);
68+
printf("|%.*s", LOG_BUFFER_LEN+2, DASHES);
69+
printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES);
70+
// printf("|%.*s", 5+2, DASHES);
71+
printf("|%.*s", 5+2, DASHES);
72+
printf("|--------");
73+
printf("|\n");
6174
fflush(stdout);
6275
}
6376
}
@@ -86,7 +99,8 @@ simdjson_really_inline void log_line(const json_iterator &iter, const char *titl
8699
}
87100
printf(" ");
88101
}
89-
printf("| %5u ", iter.token.peek_index(delta+1));
102+
// printf("| %5u ", iter.token.peek_index(delta+1));
103+
printf("| %5u ", iter.depth());
90104
printf("| %.*s ", int(detail.size()), detail.data());
91105
printf("|\n");
92106
fflush(stdout);

include/simdjson/generic/ondemand/logger.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ namespace logger {
1515

1616
static simdjson_really_inline void log_headers() noexcept;
1717
static simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
18-
static simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
18+
static simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept;
1919
static simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
2020
static simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
2121
static simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
2222
static simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept;
2323

24-
static simdjson_really_inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
24+
static simdjson_really_inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept;
2525
static simdjson_really_inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
2626
static simdjson_really_inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
2727
static simdjson_really_inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;

include/simdjson/generic/ondemand/object.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,23 @@ class object {
2626

2727
/**
2828
* Look up a field by name on an object.
29-
*
29+
*
3030
* Important notes:
31-
*
31+
*
3232
* * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
3333
* e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
3434
* * **Order Sensitive:** Each field lookup will only move forward in the object. In particular,
3535
* the following code reads z, then y, then x, and thus will not retrieve x or y if fed the
3636
* JSON `{ "x": 1, "y": 2, "z": 3 }`:
37-
*
37+
*
3838
* ```c++
3939
* simdjson::builtin::ondemand::parser parser;
4040
* auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
4141
* double z = obj["z"];
4242
* double y = obj["y"];
4343
* double x = obj["x"];
4444
* ```
45-
*
45+
*
4646
* @param key The key to look up.
4747
* @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
4848
*/

include/simdjson/generic/ondemand/object_iterator-inl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ simdjson_really_inline object_iterator &object_iterator::operator++() noexcept {
3232
if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error
3333

3434
simdjson_unused error_code error;
35-
if ((error = iter.finish_child() )) { return *this; }
35+
if ((error = iter.skip_child() )) { return *this; }
3636

3737
simdjson_unused bool has_value;
3838
if ((error = iter.has_next_field().get(has_value) )) { return *this; };
@@ -90,7 +90,7 @@ simdjson_warn_unused simdjson_really_inline error_code object_iterator::find_fie
9090
at_start = false;
9191
has_value = true;
9292
} else {
93-
if ((error = iter.finish_child() )) { iter.abandon(); return error; }
93+
if ((error = iter.skip_child() )) { iter.abandon(); return error; }
9494
if ((error = iter.has_next_field().get(has_value) )) { iter.abandon(); return error; }
9595
}
9696
while (has_value) {

include/simdjson/generic/ondemand/object_iterator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ class object_iterator {
5050
*
5151
* PERF NOTE: this should be elided into inline control flow: it is only used for the first []
5252
* or * call, and SSA optimizers commonly do first-iteration loop optimization.
53-
*
53+
*
5454
* SAFETY: this is not safe; the object_iterator can be copied freely, so the state CAN be lost.
5555
*/
5656
bool at_start{};
57-
57+
5858
simdjson_really_inline object_iterator(const value_iterator &iter) noexcept;
5959
friend struct simdjson_result<object_iterator>;
6060
friend class object;

include/simdjson/generic/ondemand/value.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -252,14 +252,14 @@ class value {
252252

253253
/**
254254
* Look up a field by name on an object.
255-
*
255+
*
256256
* Important notes:
257-
*
257+
*
258258
* * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
259259
* e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
260260
* * **Once Only:** You may only look up a single field on a value. To look up multiple fields,
261261
* you must cast to object or call `.get_object()`.
262-
*
262+
*
263263
* @param key The key to look up.
264264
* @returns The value of the field, NO_SUCH_FIELD if the field is not in the object, or
265265
* INCORRECT_TYPE if the JSON value is not an array.
@@ -372,14 +372,14 @@ struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> : public SIMDJS
372372

373373
/**
374374
* Look up a field by name on an object.
375-
*
375+
*
376376
* Important notes:
377-
*
377+
*
378378
* * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
379379
* e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
380380
* * **Once Only:** You may only look up a single field on a value. To look up multiple fields,
381381
* you must cast to object or call `.get_object()`.
382-
*
382+
*
383383
* @param key The key to look up.
384384
* @returns The value of the field, NO_SUCH_FIELD if the field is not in the object, or
385385
* INCORRECT_TYPE if the JSON value is not an array.

include/simdjson/generic/ondemand/value_iterator-inl.h

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<std::string_view> va
149149
simdjson_warn_unused simdjson_really_inline simdjson_result<raw_json_string> value_iterator::try_get_raw_json_string() noexcept {
150150
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 0 );
151151

152-
logger::log_value(*_json_iter, "string", "");
152+
logger::log_value(*_json_iter, "string", "", 0);
153153
auto json = _json_iter->peek();
154154
if (*json != '"') { logger::log_error(*_json_iter, "Not a string"); return INCORRECT_TYPE; }
155155
_json_iter->advance();
@@ -159,7 +159,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<raw_json_string> val
159159
simdjson_warn_unused simdjson_really_inline simdjson_result<raw_json_string> value_iterator::require_raw_json_string() noexcept {
160160
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 0 );
161161

162-
logger::log_value(*_json_iter, "string", "");
162+
logger::log_value(*_json_iter, "string", "", 0);
163163
auto json = _json_iter->advance();
164164
if (*json != '"') { logger::log_error(*_json_iter, "Not a string"); return INCORRECT_TYPE; }
165165
_json_iter->ascend_to(depth()-1);
@@ -168,7 +168,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<raw_json_string> val
168168
simdjson_warn_unused simdjson_really_inline simdjson_result<uint64_t> value_iterator::try_get_uint64() noexcept {
169169
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 1 );
170170

171-
logger::log_value(*_json_iter, "uint64", "");
171+
logger::log_value(*_json_iter, "uint64", "", 0);
172172
uint64_t result;
173173
SIMDJSON_TRY( numberparsing::parse_unsigned(_json_iter->peek()).get(result) );
174174
_json_iter->advance();
@@ -178,14 +178,14 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<uint64_t> value_iter
178178
simdjson_warn_unused simdjson_really_inline simdjson_result<uint64_t> value_iterator::require_uint64() noexcept {
179179
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 1 );
180180

181-
logger::log_value(*_json_iter, "uint64", "");
181+
logger::log_value(*_json_iter, "uint64", "", 0);
182182
_json_iter->ascend_to(depth()-1);
183183
return numberparsing::parse_unsigned(_json_iter->advance());
184184
}
185185
simdjson_warn_unused simdjson_really_inline simdjson_result<int64_t> value_iterator::try_get_int64() noexcept {
186186
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 1 );
187187

188-
logger::log_value(*_json_iter, "int64", "");
188+
logger::log_value(*_json_iter, "int64", "", 0);
189189
int64_t result;
190190
SIMDJSON_TRY( numberparsing::parse_integer(_json_iter->peek()).get(result) );
191191
_json_iter->advance();
@@ -195,14 +195,14 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<int64_t> value_itera
195195
simdjson_warn_unused simdjson_really_inline simdjson_result<int64_t> value_iterator::require_int64() noexcept {
196196
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 1 );
197197

198-
logger::log_value(*_json_iter, "int64", "");
198+
logger::log_value(*_json_iter, "int64", "", 0);
199199
_json_iter->ascend_to(depth()-1);
200200
return numberparsing::parse_integer(_json_iter->advance());
201201
}
202202
simdjson_warn_unused simdjson_really_inline simdjson_result<double> value_iterator::try_get_double() noexcept {
203203
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 1 );
204204

205-
logger::log_value(*_json_iter, "double", "");
205+
logger::log_value(*_json_iter, "double", "", 0);
206206
double result;
207207
SIMDJSON_TRY( numberparsing::parse_double(_json_iter->peek()).get(result) );
208208
_json_iter->advance();
@@ -212,7 +212,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<double> value_iterat
212212
simdjson_warn_unused simdjson_really_inline simdjson_result<double> value_iterator::require_double() noexcept {
213213
SIMDJSON_ASSUME( _json_iter->depth() == depth() && depth() > 1 );
214214

215-
logger::log_value(*_json_iter, "double", "");
215+
logger::log_value(*_json_iter, "double", "", 0);
216216
_json_iter->ascend_to(depth()-1);
217217
return numberparsing::parse_double(_json_iter->advance());
218218
}
@@ -270,7 +270,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<uint64_t> value_iter
270270

271271
uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
272272
if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Root number more than 20 characters"); return NUMBER_ERROR; }
273-
logger::log_value(*_json_iter, "uint64", "");
273+
logger::log_value(*_json_iter, "uint64", "", 0);
274274
auto result = numberparsing::parse_unsigned(tmpbuf);
275275
if (result.error()) { logger::log_error(*_json_iter, "Error parsing unsigned integer"); }
276276
return result;
@@ -294,7 +294,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<int64_t> value_itera
294294

295295
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
296296
if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Root number more than 20 characters"); return NUMBER_ERROR; }
297-
logger::log_value(*_json_iter, "int64", "");
297+
logger::log_value(*_json_iter, "int64", "", 0);
298298
auto result = numberparsing::parse_integer(tmpbuf);
299299
if (result.error()) { logger::log_error(*_json_iter, "Error parsing integer"); }
300300
return result;
@@ -319,7 +319,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<double> value_iterat
319319
// Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.<fraction>e-308.
320320
uint8_t tmpbuf[1074+8+1];
321321
if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, "Root number more than 1082 characters"); return NUMBER_ERROR; }
322-
logger::log_value(*_json_iter, "double", "");
322+
logger::log_value(*_json_iter, "double", "", 0);
323323
auto result = numberparsing::parse_double(tmpbuf);
324324
if (result.error()) { logger::log_error(*_json_iter, "Error parsing double"); }
325325
return result;
@@ -383,9 +383,6 @@ simdjson_really_inline bool value_iterator::require_root_null() noexcept {
383383
simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept {
384384
return _json_iter->skip_child(depth());
385385
}
386-
simdjson_warn_unused simdjson_really_inline error_code value_iterator::finish_child() noexcept {
387-
return _json_iter->finish_child(depth());
388-
}
389386
simdjson_really_inline value_iterator value_iterator::child() const noexcept {
390387
SIMDJSON_ASSUME( _json_iter->depth() == depth()+1 );
391388
return { _json_iter, depth()+1 };

0 commit comments

Comments
 (0)