Skip to content

Commit 54f86ce

Browse files
committed
Use json_iterator as shared state instead of document
1 parent c7583b7 commit 54f86ce

14 files changed

+157
-131
lines changed

benchmark/twitter/sax_tweet_reader_visitor.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,6 @@ simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const ch
295295
auto index = hash(key, depth);
296296
if (entries[index].key) {
297297
fprintf(stderr, "%s (depth %d) conflicts with %s (depth %d) !\n", key, depth, entries[index].key, int(entries[index].container));
298-
assert(false);
299298
}
300299
}
301300

src/generic/ondemand/array-inl.h

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,54 +5,54 @@ namespace ondemand {
55
//
66
// ### Live States
77
//
8-
// While iterating or looking up values, depth >= doc->iter.depth. at_start may vary. Error is
8+
// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is
99
// always SUCCESS:
1010
//
1111
// - Start: This is the state when the array is first found and the iterator is just past the `{`.
1212
// In this state, at_start == true.
1313
// - Next: After we hand a scalar value to the user, or an array/object which they then fully
1414
// iterate over, the iterator is at the `,` before the next value (or `]`). In this state,
15-
// depth == doc->iter.depth, at_start == false, and error == SUCCESS.
15+
// depth == iter->depth, at_start == false, and error == SUCCESS.
1616
// - Unfinished Business: When we hand an array/object to the user which they do not fully
1717
// iterate over, we need to finish that iteration by skipping child values until we reach the
18-
// Next state. In this state, depth > doc->iter.depth, at_start == false, and error == SUCCESS.
18+
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
1919
//
2020
// ## Error States
2121
//
22-
// In error states, we will yield exactly one more value before stopping. doc->iter.depth == depth
22+
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
2323
// and at_start is always false. We decrement after yielding the error, moving to the Finished
2424
// state.
2525
//
2626
// - Chained Error: When the array iterator is part of an error chain--for example, in
2727
// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an
2828
// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
29-
// doc->iter.depth == depth, and at_start == false. We decrement depth when we yield the error.
29+
// iter->depth == depth, and at_start == false. We decrement depth when we yield the error.
3030
// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements,
3131
// we flag that as an error and treat it exactly the same as a Chained Error. In this state,
32-
// error == TAPE_ERROR, doc->iter.depth == depth, and at_start == false.
32+
// error == TAPE_ERROR, iter->depth == depth, and at_start == false.
3333
//
3434
// ## Terminal State
3535
//
36-
// The terminal state has doc->iter.depth < depth. at_start is always false.
36+
// The terminal state has iter->depth < depth. at_start is always false.
3737
//
3838
// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this
39-
// by decrementing depth. In this state, doc->iter.depth < depth, at_start == false, and
39+
// by decrementing depth. In this state, iter->depth < depth, at_start == false, and
4040
// error == SUCCESS.
4141
//
4242

4343
simdjson_really_inline array::array() noexcept = default;
44-
simdjson_really_inline array::array(document *_doc, bool has_value) noexcept
45-
: doc{_doc}, has_next{has_value}, error{SUCCESS}
44+
simdjson_really_inline array::array(json_iterator *_iter, bool has_value) noexcept
45+
: iter{_iter}, has_next{has_value}, error{SUCCESS}
4646
{
4747
}
4848
simdjson_really_inline array::array(array &&other) noexcept
49-
: doc{other.doc}, has_next{other.has_next}, error{other.error}
49+
: iter{other.iter}, has_next{other.has_next}, error{other.error}
5050
{
5151
// Terminate the other iterator
5252
other.has_next = false;
5353
}
5454
simdjson_really_inline array &array::operator=(array &&other) noexcept {
55-
doc = other.doc;
55+
iter = other.iter;
5656
has_next = other.has_next;
5757
error = other.error;
5858
// Terminate the other iterator
@@ -62,18 +62,18 @@ simdjson_really_inline array &array::operator=(array &&other) noexcept {
6262

6363
simdjson_really_inline array::~array() noexcept {
6464
if (!error && has_next) {
65-
logger::log_event(doc->iter, "unfinished", "array");
66-
doc->iter.skip_container();
65+
logger::log_event(*iter, "unfinished", "array");
66+
iter->skip_container();
6767
}
6868
}
6969

70-
simdjson_really_inline simdjson_result<array> array::start(document *doc) noexcept {
70+
simdjson_really_inline simdjson_result<array> array::start(json_iterator *iter) noexcept {
7171
bool has_value;
72-
SIMDJSON_TRY( doc->iter.start_array().get(has_value) );
73-
return array(doc, has_value);
72+
SIMDJSON_TRY( iter->start_array().get(has_value) );
73+
return array(iter, has_value);
7474
}
75-
simdjson_really_inline array array::started(document *doc) noexcept {
76-
return array(doc, doc->iter.started_array());
75+
simdjson_really_inline array array::started(json_iterator *iter) noexcept {
76+
return array(iter, iter->started_array());
7777
}
7878
simdjson_really_inline array::iterator array::begin() noexcept {
7979
return *this;
@@ -96,7 +96,7 @@ simdjson_really_inline array::iterator &array::iterator::operator=(const array::
9696

9797
simdjson_really_inline simdjson_result<value> array::iterator::operator*() noexcept {
9898
if (a->error) { return a->report_error(); }
99-
return value::start(a->doc);
99+
return value::start(a->iter);
100100
}
101101
simdjson_really_inline bool array::iterator::operator==(const array::iterator &) noexcept {
102102
return !a->has_next;
@@ -106,7 +106,7 @@ simdjson_really_inline bool array::iterator::operator!=(const array::iterator &)
106106
}
107107
simdjson_really_inline array::iterator &array::iterator::operator++() noexcept {
108108
if (a->error) { return *this; }
109-
a->error = a->doc->iter.has_next_element().get(a->has_next); // If there's an error, has_next stays true.
109+
a->error = a->iter->has_next_element().get(a->has_next); // If there's an error, has_next stays true.
110110
return *this;
111111
}
112112

@@ -179,12 +179,12 @@ simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>
179179
if (error()) { return error(); }
180180
return *first;
181181
}
182-
// Assumes it's being compared with the end. true if depth < doc->iter.depth.
182+
// Assumes it's being compared with the end. true if depth < iter->depth.
183183
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &other) noexcept {
184184
if (error()) { return true; }
185185
return first == other.first;
186186
}
187-
// Assumes it's being compared with the end. true if depth >= doc->iter.depth.
187+
// Assumes it's being compared with the end. true if depth >= iter->depth.
188188
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator>::operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &other) noexcept {
189189
if (error()) { return false; }
190190
return first != other.first;

src/generic/ondemand/array.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ class array {
3131

3232
// Reads key and value, yielding them to the user.
3333
simdjson_really_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
34-
// Assumes it's being compared with the end. true if depth < doc->iter.depth.
34+
// Assumes it's being compared with the end. true if depth < iter->depth.
3535
simdjson_really_inline bool operator==(const array::iterator &) noexcept;
36-
// Assumes it's being compared with the end. true if depth >= doc->iter.depth.
36+
// Assumes it's being compared with the end. true if depth >= iter->depth.
3737
simdjson_really_inline bool operator!=(const array::iterator &) noexcept;
3838
// Checks for ']' and ','
3939
simdjson_really_inline array::iterator &operator++() noexcept;
@@ -53,13 +53,13 @@ class array {
5353
* @param doc The document containing the array.
5454
* @error INCORRECT_TYPE if the iterator is not at [.
5555
*/
56-
static simdjson_really_inline simdjson_result<array> start(document *doc) noexcept;
56+
static simdjson_really_inline simdjson_result<array> start(json_iterator *iter) noexcept;
5757
/**
5858
* Begin array iteration.
5959
*
6060
* @param doc The document containing the array. The iterator must be just after the opening `[`.
6161
*/
62-
static simdjson_really_inline array started(document *doc) noexcept;
62+
static simdjson_really_inline array started(json_iterator *iter) noexcept;
6363

6464
/**
6565
* Report the current error and set finished so it won't be reported again.
@@ -69,19 +69,19 @@ class array {
6969
/**
7070
* Internal array creation. Call array::start() or array::started() instead of this.
7171
*
72-
* @param doc The document containing the array. doc->iter.depth must already be incremented to
72+
* @param doc The document containing the array. iter->depth must already be incremented to
7373
* reflect the array's depth. The iterator must be just after the opening `[`.
7474
* @param has_value Whether the array has a value (false means empty array).
7575
*/
76-
simdjson_really_inline array(document *_doc, bool has_value) noexcept;
76+
simdjson_really_inline array(json_iterator *iter, bool has_value) noexcept;
7777

7878
/**
7979
* Document containing this array.
8080
*
8181
* PERF NOTE: expected to be elided in favor of the parent document: this is set when the array
8282
* is first used, and never changes afterwards.
8383
*/
84-
document *doc{};
84+
json_iterator *iter{};
8585
/**
8686
* Whether we have anything to yield.
8787
*
@@ -136,9 +136,9 @@ struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> : pub
136136

137137
// Reads key and value, yielding them to the user.
138138
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
139-
// Assumes it's being compared with the end. true if depth < doc->iter.depth.
139+
// Assumes it's being compared with the end. true if depth < iter->depth.
140140
simdjson_really_inline bool operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &) noexcept;
141-
// Assumes it's being compared with the end. true if depth >= doc->iter.depth.
141+
// Assumes it's being compared with the end. true if depth >= iter->depth.
142142
simdjson_really_inline bool operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &) noexcept;
143143
// Checks for ']' and ','
144144
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array::iterator> &operator++() noexcept;

src/generic/ondemand/document-inl.h

Lines changed: 7 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,51 +2,33 @@ namespace {
22
namespace SIMDJSON_IMPLEMENTATION {
33
namespace ondemand {
44

5-
simdjson_really_inline document::document(document &&other) noexcept :
6-
iter{std::forward<json_iterator>(other.iter)},
7-
parser{other.parser}
8-
{
9-
if (!at_start()) { logger::log_error(iter, "Cannot move document after it has been used"); abort(); }
10-
other.parser = nullptr;
11-
}
12-
simdjson_really_inline document &document::operator=(document &&other) noexcept {
13-
iter = std::forward<json_iterator>(other.iter);
14-
parser = other.parser;
15-
if (!at_start()) { logger::log_error(iter, "Cannot move document after it has been used"); abort(); }
16-
other.parser = nullptr;
17-
return *this;
18-
}
19-
5+
simdjson_really_inline document::document(document &&other) noexcept = default;
6+
simdjson_really_inline document &document::operator=(document &&other) noexcept = default;
207
simdjson_really_inline document::document(ondemand::parser *_parser) noexcept
21-
: iter(_parser->dom_parser.buf, _parser->dom_parser.structural_indexes.get()), parser{_parser}
8+
: iter(_parser)
229
{
23-
logger::log_headers();
24-
parser->current_string_buf_loc = parser->string_buf.get();
2510
logger::log_start_value(iter, "document");
2611
}
2712
simdjson_really_inline document::~document() noexcept {
28-
// Release the string buf so it can be reused by the next document
29-
if (parser) {
13+
if (iter.is_alive()) {
3014
logger::log_end_value(iter, "document");
31-
parser->current_string_buf_loc = nullptr;
3215
}
3316
}
3417

3518
simdjson_really_inline value document::as_value() noexcept {
36-
if (!at_start()) {
19+
if (!iter.at_start()) {
3720
logger::log_error(iter, "Document value can only be used once! ondemand::document is a forward-only input iterator.");
3821
abort(); // TODO is there anything softer we can do? I'd rather not make this a simdjson_result just for user error.
3922
}
40-
return value::start(this);
23+
return value::start(&iter);
4124
}
4225
simdjson_really_inline json_iterator &document::iterate() & noexcept {
43-
if (!at_start()) {
26+
if (!iter.at_start()) {
4427
logger::log_error(iter, "Document value can only be used once! ondemand::document is a forward-only input iterator.");
4528
abort(); // TODO is there anything softer we can do? I'd rather not make this a simdjson_result just for user error.
4629
}
4730
return iter;
4831
}
49-
simdjson_really_inline bool document::at_start() const noexcept { return iter.index == parser->dom_parser.structural_indexes.get(); }
5032

5133
simdjson_really_inline simdjson_result<array> document::get_array() & noexcept { return as_value().get_array(); }
5234
simdjson_really_inline simdjson_result<object> document::get_object() & noexcept { return as_value().get_object(); }

src/generic/ondemand/document.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,9 @@ class document {
5858
simdjson_really_inline document(ondemand::parser *parser) noexcept;
5959
simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept;
6060

61-
json_iterator iter; ///< Current position in the document
62-
ondemand::parser *parser;
61+
json_iterator iter{}; ///< Current position in the document
6362

6463
simdjson_really_inline value as_value() noexcept;
65-
simdjson_really_inline bool at_start() const noexcept;
6664

6765
friend struct simdjson_result<document>;
6866
friend class value;

src/generic/ondemand/field-inl.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value
1212
{
1313
}
1414

15-
simdjson_really_inline simdjson_result<field> field::start(document *doc) noexcept {
15+
simdjson_really_inline simdjson_result<field> field::start(json_iterator *iter) noexcept {
1616
raw_json_string key;
17-
SIMDJSON_TRY( doc->iter.field_key().get(key) );
18-
SIMDJSON_TRY( doc->iter.field_value() );
19-
return field::start(doc, key);
17+
SIMDJSON_TRY( iter->field_key().get(key) );
18+
SIMDJSON_TRY( iter->field_value() );
19+
return field::start(iter, key);
2020
}
2121

22-
simdjson_really_inline simdjson_result<field> field::start(document *doc, raw_json_string key) noexcept {
23-
return field(key, value::start(doc));
22+
simdjson_really_inline simdjson_result<field> field::start(json_iterator *iter, raw_json_string key) noexcept {
23+
return field(key, value::start(iter));
2424
}
2525

2626
simdjson_really_inline raw_json_string field::key() const noexcept {

src/generic/ondemand/field.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ class field : public std::pair<raw_json_string, value> {
1919
simdjson_really_inline ondemand::value &value() noexcept;
2020
protected:
2121
simdjson_really_inline field(raw_json_string key, ondemand::value &&value) noexcept;
22-
static simdjson_really_inline simdjson_result<field> start(document *doc) noexcept;
23-
static simdjson_really_inline simdjson_result<field> start(document *doc, raw_json_string key) noexcept;
22+
static simdjson_really_inline simdjson_result<field> start(json_iterator *iter) noexcept;
23+
static simdjson_really_inline simdjson_result<field> start(json_iterator *iter, raw_json_string key) noexcept;
2424
friend struct simdjson_result<field>;
2525
friend class object;
2626
};

src/generic/ondemand/json_iterator-inl.h

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,31 @@ namespace SIMDJSON_IMPLEMENTATION {
33
namespace ondemand {
44

55
simdjson_really_inline json_iterator::json_iterator() noexcept = default;
6-
simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept = default;
7-
simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept = default;
8-
simdjson_really_inline json_iterator::json_iterator(const uint8_t *_buf, uint32_t *_index) noexcept
9-
: token_iterator(_buf, _index)
6+
simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept
7+
: token_iterator(std::forward<token_iterator>(other)),
8+
parser{other.parser}
109
{
10+
other.parser = nullptr;
11+
}
12+
simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
13+
buf = other.buf;
14+
index = other.index;
15+
parser = other.parser;
16+
other.parser = nullptr;
17+
return *this;
18+
}
19+
simdjson_really_inline json_iterator::json_iterator(ondemand::parser *_parser) noexcept
20+
: token_iterator(_parser->dom_parser.buf, _parser->dom_parser.structural_indexes.get()), parser{_parser}
21+
{
22+
// Release the string buf so it can be reused by the next document
23+
logger::log_headers();
24+
parser->current_string_buf_loc = parser->string_buf.get();
25+
}
26+
simdjson_really_inline json_iterator::~json_iterator() noexcept {
27+
if (parser) {
28+
parser->current_string_buf_loc = nullptr;
29+
}
1130
}
12-
1331

1432
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_object() noexcept {
1533
if (*advance() != '{') { logger::log_error(*this, "Not an object"); return INCORRECT_TYPE; }
@@ -232,6 +250,18 @@ simdjson_really_inline bool json_iterator::skip_container() noexcept {
232250
};
233251
}
234252

253+
simdjson_really_inline bool json_iterator::at_start() const noexcept {
254+
return index == parser->dom_parser.structural_indexes.get();
255+
}
256+
257+
simdjson_really_inline bool json_iterator::at_eof() const noexcept {
258+
return index == &parser->dom_parser.structural_indexes[parser->dom_parser.n_structural_indexes];
259+
}
260+
261+
simdjson_really_inline bool json_iterator::is_alive() const noexcept {
262+
return parser;
263+
}
264+
235265
} // namespace ondemand
236266
} // namespace SIMDJSON_IMPLEMENTATION
237267
} // namespace {

src/generic/ondemand/json_iterator.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class json_iterator : public token_iterator {
1414
simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept;
1515
simdjson_really_inline json_iterator(const json_iterator &other) noexcept = delete;
1616
simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = delete;
17+
simdjson_really_inline ~json_iterator() noexcept;
1718

1819
/**
1920
* Check for an opening { and start an object iteration.
@@ -117,8 +118,24 @@ class json_iterator : public token_iterator {
117118
*/
118119
simdjson_really_inline bool skip_container() noexcept;
119120

121+
/**
122+
* Tell whether the iterator is still at the start
123+
*/
124+
simdjson_really_inline bool at_start() const noexcept;
125+
126+
/**
127+
* Tell whether the iterator has reached EOF
128+
*/
129+
simdjson_really_inline bool at_eof() const noexcept;
130+
131+
/**
132+
* Tell whether the iterator is live (has not been moved).
133+
*/
134+
simdjson_really_inline bool is_alive() const noexcept;
120135
protected:
121-
simdjson_really_inline json_iterator(const uint8_t *buf, uint32_t *index) noexcept;
136+
ondemand::parser *parser{};
137+
138+
simdjson_really_inline json_iterator(ondemand::parser *parser) noexcept;
122139
template<int N>
123140
SIMDJSON_WARN_UNUSED simdjson_really_inline bool advance_to_buffer(uint8_t (&buf)[N]) noexcept;
124141

0 commit comments

Comments
 (0)