Skip to content

Commit b842658

Browse files
committed
Make field lookup order-insensitive.
1 parent 85001c5 commit b842658

File tree

6 files changed

+125
-24
lines changed

6 files changed

+125
-24
lines changed

include/simdjson/generic/ondemand/json_iterator-inl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,14 @@ simdjson_really_inline error_code json_iterator::report_error(error_code _error,
156156
return error;
157157
}
158158

159+
simdjson_really_inline const uint32_t *json_iterator::checkpoint() const noexcept {
160+
return token.checkpoint();
161+
}
162+
simdjson_really_inline void json_iterator::restore_checkpoint(const uint32_t *target_checkpoint) noexcept {
163+
token.restore_checkpoint(target_checkpoint);
164+
}
165+
166+
159167
simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {
160168
SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD);
161169
logger::log_error(*this, message);

include/simdjson/generic/ondemand/json_iterator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ class json_iterator {
163163
template<int N> simdjson_warn_unused simdjson_really_inline bool peek_to_buffer(uint8_t (&tmpbuf)[N]) noexcept;
164164
template<int N> simdjson_warn_unused simdjson_really_inline bool advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept;
165165

166+
simdjson_really_inline const uint32_t *checkpoint() const noexcept;
167+
simdjson_really_inline void restore_checkpoint(const uint32_t *target_checkpoint) noexcept;
168+
166169
protected:
167170
simdjson_really_inline json_iterator(ondemand::parser *parser) noexcept;
168171

include/simdjson/generic/ondemand/token_iterator-inl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ simdjson_really_inline const uint32_t *token_iterator::checkpoint() const noexce
4343
return index;
4444
}
4545

46+
simdjson_really_inline void token_iterator::restore_checkpoint(const uint32_t *target_checkpoint) noexcept {
47+
index = target_checkpoint;
48+
}
49+
4650
} // namespace ondemand
4751
} // namespace SIMDJSON_IMPLEMENTATION
4852
} // namespace simdjson

include/simdjson/generic/ondemand/token_iterator.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ class token_iterator {
5454
*/
5555
simdjson_really_inline const uint32_t *checkpoint() const noexcept;
5656

57+
/**
58+
* Reset to a previously saved index.
59+
*/
60+
simdjson_really_inline void restore_checkpoint(const uint32_t *target_checkpoint) noexcept;
61+
5762
// NOTE: we don't support a full C++ iterator interface, because we expect people to make
5863
// different calls to advance the iterator based on *their own* state.
5964

include/simdjson/generic/ondemand/value_iterator-inl.h

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,37 +51,124 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator
5151
}
5252
}
5353

54-
/**
55-
* Find the field with the given key. May be used in place of ++.
56-
*/
5754
simdjson_warn_unused simdjson_really_inline simdjson_result<bool> value_iterator::find_field_raw(const std::string_view key) noexcept {
58-
if (!is_open()) { return false; }
59-
60-
// Unless this is the first field, we need to advance past the , and check for }
6155
error_code error;
6256
bool has_value;
57+
58+
//
59+
// Initially, the object can be in one of a few different places:
60+
//
61+
// 1. The start of the object, at the first field:
62+
//
63+
// ```
64+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
65+
// ^ (depth 2, index 1)
66+
// ```
67+
//
6368
if (at_first_field()) {
69+
// If we're at the beginning of the object, we definitely have a field
6470
has_value = true;
71+
72+
// 2. When a previous search did not yield a value or the object is empty:
73+
//
74+
// ```
75+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
76+
// ^ (depth 0)
77+
// { }
78+
// ^ (depth 0, index 2)
79+
// ```
80+
//
81+
} else if (!is_open()) {
82+
has_value = false;
83+
84+
// 3. When a previous search found a field or an iterator yielded a value:
85+
//
86+
// ```
87+
// // When a field was not fully consumed (or not even touched at all)
88+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
89+
// ^ (depth 2)
90+
// // When a field was fully consumed
91+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
92+
// ^ (depth 1)
93+
// // When the last field was fully consumed
94+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
95+
// ^ (depth 1)
96+
// ```
97+
//
6598
} else {
99+
// Finish the previous value and see if , or } is next
66100
if ((error = skip_child() )) { abandon(); return error; }
67101
if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
68102
}
103+
104+
// After initial processing, we will be in one of two states:
105+
//
106+
// ```
107+
// // At the beginning of a field
108+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
109+
// ^ (depth 1)
110+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
111+
// ^ (depth 1)
112+
// // At the end of the object
113+
// { "a": [ 1, 2 ], "b": [ 3, 4 ] }
114+
// ^ (depth 0)
115+
// ```
116+
//
117+
118+
// First, we scan from that point to the end.
119+
// If we don't find a match, we loop back around, and scan from the beginning to that point.
120+
const uint32_t *search_start = _json_iter->checkpoint();
121+
122+
// Next, we find a match starting from the current position.
69123
while (has_value) {
70-
// Get the key
124+
SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); // We must be at the start of a field
125+
126+
// Get the key and colon, stopping at the value.
71127
raw_json_string actual_key;
72128
if ((error = field_key().get(actual_key) )) { abandon(); return error; };
73129
if ((error = field_value() )) { abandon(); return error; }
74130

75-
// Check if it matches
131+
// If it matches, stop and return
76132
if (actual_key == key) {
77133
logger::log_event(*this, "match", key, -2);
78134
return true;
79135
}
136+
137+
// No match: skip the value and see if , or } is next
80138
logger::log_event(*this, "no match", key, -2);
81-
SIMDJSON_TRY( skip_child() ); // Skip the value entirely
139+
SIMDJSON_TRY( skip_child() );
82140
if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
83141
}
84142

143+
// If we reach the end without finding a match, search the rest of the fields starting at the
144+
// beginning of the object.
145+
// (We have already run through the object before, so we've already validated its structure. We
146+
// don't check errors in this bit.)
147+
_json_iter->restore_checkpoint(_start_index + 1);
148+
_json_iter->descend_to(_depth);
149+
150+
has_value = started_object();
151+
while (_json_iter->checkpoint() < search_start) {
152+
SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object
153+
SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); // We must be at the start of a field
154+
155+
// Get the key and colon, stopping at the value.
156+
raw_json_string actual_key;
157+
error = field_key().get(actual_key); SIMDJSON_ASSUME(!error);
158+
error = field_value(); SIMDJSON_ASSUME(!error);
159+
160+
// If it matches, stop and return
161+
if (actual_key == key) {
162+
logger::log_event(*this, "match", key, -2);
163+
return true;
164+
}
165+
166+
// No match: skip the value and see if , or } is next
167+
logger::log_event(*this, "no match", key, -2);
168+
SIMDJSON_TRY( skip_child() );
169+
error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error);
170+
}
171+
85172
// If the loop ended, we're out of fields to look at.
86173
return false;
87174
}

tests/ondemand/ondemand_basictests.cpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ namespace dom_api_tests {
11661166
ASSERT_EQUAL( object["b"].get_uint64().first, 2 );
11671167
ASSERT_EQUAL( object["c/d"].get_uint64().first, 3 );
11681168

1169-
ASSERT_ERROR( object["a"], NO_SUCH_FIELD );
1169+
ASSERT_EQUAL( object["a"].get_uint64().first, 1 );
11701170
ASSERT_ERROR( object["d"], NO_SUCH_FIELD );
11711171
return true;
11721172
}));
@@ -1178,7 +1178,7 @@ namespace dom_api_tests {
11781178
ASSERT_EQUAL( object["b"].get_uint64().first, 2 );
11791179
ASSERT_EQUAL( object["c/d"].get_uint64().first, 3 );
11801180

1181-
ASSERT_ERROR( object["a"], NO_SUCH_FIELD );
1181+
ASSERT_EQUAL( object["a"].get_uint64().first, 1 );
11821182
ASSERT_ERROR( object["d"], NO_SUCH_FIELD );
11831183
return true;
11841184
}));
@@ -1189,7 +1189,7 @@ namespace dom_api_tests {
11891189
ASSERT_EQUAL( doc["b"].get_uint64().first, 2 );
11901190
ASSERT_EQUAL( doc["c/d"].get_uint64().first, 3 );
11911191

1192-
ASSERT_ERROR( doc["a"], NO_SUCH_FIELD );
1192+
ASSERT_EQUAL( doc["a"].get_uint64().first, 1 );
11931193
ASSERT_ERROR( doc["d"], NO_SUCH_FIELD );
11941194
return true;
11951195
}));
@@ -1198,7 +1198,7 @@ namespace dom_api_tests {
11981198
ASSERT_EQUAL( doc_result["b"].get_uint64().first, 2 );
11991199
ASSERT_EQUAL( doc_result["c/d"].get_uint64().first, 3 );
12001200

1201-
ASSERT_ERROR( doc_result["a"], NO_SUCH_FIELD );
1201+
ASSERT_EQUAL( doc_result["a"].get_uint64().first, 1 );
12021202
ASSERT_ERROR( doc_result["d"], NO_SUCH_FIELD );
12031203
return true;
12041204
}));
@@ -1437,19 +1437,13 @@ namespace ordering_tests {
14371437
double z{0};
14381438
for (ondemand::object point_object : doc["coordinates"]) {
14391439
z += double(point_object["z"]);
1440-
try {
1441-
x += double(point_object["x"]);
1442-
return false;
1443-
} catch(simdjson_error&) {}
1444-
try {
1445-
y += double(point_object["y"]);
1446-
return false;
1447-
} catch(simdjson_error&) {}
1440+
x += double(point_object["x"]);
1441+
y += double(point_object["y"]);
14481442
}
1449-
return (x == 0) && (y == 0) && (z == 3.3);
1443+
return (x == 1.1) && (y == 2.2) && (z == 3.3);
14501444
}
14511445

1452-
bool robust_order() {
1446+
bool foreach_lookup() {
14531447
TEST_START();
14541448
ondemand::parser parser{};
14551449
auto doc = parser.iterate(json);
@@ -1472,7 +1466,7 @@ namespace ordering_tests {
14721466
#if SIMDJSON_EXCEPTIONS
14731467
in_order() &&
14741468
out_of_order() &&
1475-
robust_order() &&
1469+
foreach_lookup() &&
14761470
#endif
14771471
true;
14781472
}

0 commit comments

Comments
 (0)