Skip to content

Commit 6bb99ae

Browse files
committed
Merge structural_parser+iterator into json_iterator
1 parent a67e83e commit 6bb99ae

File tree

3 files changed

+76
-46
lines changed

3 files changed

+76
-46
lines changed

src/generic/stage2/structural_parser.h renamed to src/generic/stage2/json_iterator.h

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,45 @@
1-
// This file contains the common code every implementation uses for stage2
2-
// It is intended to be included multiple times and compiled multiple times
3-
// We assume the file in which it is include already includes
4-
// "simdjson/stage2.h" (this simplifies amalgation)
5-
61
#include "generic/stage2/logger.h"
7-
#include "generic/stage2/structural_iterator.h"
82

9-
namespace { // Make everything here private
3+
namespace {
104
namespace SIMDJSON_IMPLEMENTATION {
115
namespace stage2 {
126

13-
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
14-
15-
struct structural_parser : structural_iterator {
16-
/** Current depth (nested objects and arrays) */
7+
class json_iterator {
8+
public:
9+
const uint8_t* const buf;
10+
uint32_t *next_structural;
11+
dom_parser_implementation &dom_parser;
1712
uint32_t depth{0};
1813

1914
template<bool STREAMING, typename T>
2015
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code walk_document(T &visitor) noexcept;
2116

22-
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
23-
simdjson_really_inline structural_parser(dom_parser_implementation &_dom_parser, uint32_t start_structural_index)
24-
: structural_iterator(_dom_parser, start_structural_index) {
17+
// Start a structural
18+
simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
19+
: buf{_dom_parser.buf},
20+
next_structural{&_dom_parser.structural_indexes[start_structural_index]},
21+
dom_parser{_dom_parser} {
22+
}
23+
24+
// Get the buffer position of the current structural character
25+
simdjson_really_inline char peek_next_char() {
26+
return buf[*(next_structural)];
27+
}
28+
simdjson_really_inline const uint8_t* advance() {
29+
return &buf[*(next_structural++)];
30+
}
31+
simdjson_really_inline char advance_char() {
32+
return buf[*(next_structural++)];
33+
}
34+
simdjson_really_inline size_t remaining_len() {
35+
return dom_parser.len - *(next_structural-1);
36+
}
37+
38+
simdjson_really_inline bool at_end() {
39+
return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
40+
}
41+
simdjson_really_inline bool at_beginning() {
42+
return next_structural == dom_parser.structural_indexes.get();
2543
}
2644

2745
template<typename T>
@@ -64,11 +82,11 @@ struct structural_parser : structural_iterator {
6482
simdjson_really_inline void log_error(const char *error) {
6583
logger::log_line(*this, "", "ERROR", error);
6684
}
67-
}; // struct structural_parser
85+
};
6886

6987
template<bool STREAMING, typename T>
70-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code structural_parser::walk_document(T &visitor) noexcept {
71-
const uint8_t *value;
88+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::walk_document(T &visitor) noexcept {
89+
const uint8_t *value; // Used to keep a value around between states
7290

7391
logger::log_start();
7492

src/generic/stage2/tape_builder.h

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "generic/stage2/structural_parser.h"
1+
#include "generic/stage2/json_iterator.h"
22
#include "generic/stage2/tape_writer.h"
33
#include "generic/stage2/atomparsing.h"
44

@@ -12,12 +12,12 @@ struct tape_builder {
1212
dom_parser_implementation &dom_parser,
1313
dom::document &doc) noexcept {
1414
dom_parser.doc = &doc;
15-
structural_parser iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
15+
json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
1616
tape_builder builder(doc);
1717
return iter.walk_document<STREAMING>(builder);
1818
}
1919

20-
simdjson_really_inline error_code root_primitive(structural_parser &iter, const uint8_t *value) {
20+
simdjson_really_inline error_code root_primitive(json_iterator &iter, const uint8_t *value) {
2121
switch (*value) {
2222
case '"': return parse_string(iter, value);
2323
case 't': return parse_root_true_atom(iter, value);
@@ -32,7 +32,7 @@ struct tape_builder {
3232
return TAPE_ERROR;
3333
}
3434
}
35-
simdjson_really_inline error_code primitive(structural_parser &iter, const uint8_t *value) {
35+
simdjson_really_inline error_code primitive(json_iterator &iter, const uint8_t *value) {
3636
switch (*value) {
3737
case '"': return parse_string(iter, value);
3838
case 't': return parse_true_atom(iter, value);
@@ -47,54 +47,64 @@ struct tape_builder {
4747
return TAPE_ERROR;
4848
}
4949
}
50-
simdjson_really_inline void empty_object(structural_parser &iter) {
50+
simdjson_really_inline void empty_object(json_iterator &iter) {
5151
iter.log_value("empty object");
5252
empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
5353
}
54-
simdjson_really_inline void empty_array(structural_parser &iter) {
54+
simdjson_really_inline void empty_array(json_iterator &iter) {
5555
iter.log_value("empty array");
5656
empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
5757
}
5858

59-
simdjson_really_inline void start_document(structural_parser &iter) {
59+
simdjson_really_inline void start_document(json_iterator &iter) {
6060
iter.log_start_value("document");
6161
start_container(iter);
6262
iter.dom_parser.is_array[iter.depth] = false;
6363
}
64-
simdjson_really_inline void start_object(structural_parser &iter) {
64+
simdjson_really_inline void start_object(json_iterator &iter) {
6565
iter.log_start_value("object");
6666
start_container(iter);
6767
iter.dom_parser.is_array[iter.depth] = false;
6868
}
69-
simdjson_really_inline void start_array(structural_parser &iter) {
69+
simdjson_really_inline void start_array(json_iterator &iter) {
7070
iter.log_start_value("array");
7171
start_container(iter);
7272
iter.dom_parser.is_array[iter.depth] = true;
7373
}
7474

75-
simdjson_really_inline void end_object(structural_parser &iter) {
75+
simdjson_really_inline void end_object(json_iterator &iter) {
7676
iter.log_end_value("object");
7777
end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
7878
}
79-
simdjson_really_inline void end_array(structural_parser &iter) {
79+
simdjson_really_inline void end_array(json_iterator &iter) {
8080
iter.log_end_value("array");
8181
end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
8282
}
83-
simdjson_really_inline void end_document(structural_parser &iter) {
83+
simdjson_really_inline void end_document(json_iterator &iter) {
8484
iter.log_end_value("document");
8585
constexpr uint32_t start_tape_index = 0;
8686
tape.append(start_tape_index, internal::tape_type::ROOT);
8787
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
8888
}
89-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code key(structural_parser &iter, const uint8_t *key) {
89+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code key(json_iterator &iter, const uint8_t *key) {
9090
return parse_string(iter, key, true);
9191
}
9292

93+
// Called after end_object/end_array. Not called after empty_object/empty_array,
94+
// as the parent is already known in those cases.
95+
//
96+
// The object returned from end_container() should support the in_container(),
97+
// in_array() and in_object() methods, allowing the iterator to branch to the
98+
// correct place.
99+
simdjson_really_inline tape_builder &end_container(json_iterator &iter) {
100+
iter.depth--;
101+
return *this;
102+
}
93103
// increment_count increments the count of keys in an object or values in an array.
94-
simdjson_really_inline void increment_count(structural_parser &iter) {
104+
simdjson_really_inline void increment_count(json_iterator &iter) {
95105
iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
96106
}
97-
simdjson_really_inline bool in_array(structural_parser &iter) noexcept {
107+
simdjson_really_inline bool in_array(json_iterator &iter) noexcept {
98108
return iter.dom_parser.is_array[iter.depth];
99109
}
100110

@@ -106,7 +116,7 @@ struct tape_builder {
106116

107117
simdjson_really_inline tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
108118

109-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_string(structural_parser &iter, const uint8_t *value, bool key = false) {
119+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_string(json_iterator &iter, const uint8_t *value, bool key = false) {
110120
iter.log_value(key ? "key" : "string");
111121
uint8_t *dst = on_start_string(iter);
112122
dst = stringparsing::parse_string(value, dst);
@@ -118,13 +128,13 @@ struct tape_builder {
118128
return SUCCESS;
119129
}
120130

121-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_number(structural_parser &iter, const uint8_t *value) {
131+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_number(json_iterator &iter, const uint8_t *value) {
122132
iter.log_value("number");
123133
if (!numberparsing::parse_number(value, tape)) { iter.log_error("Invalid number"); return NUMBER_ERROR; }
124134
return SUCCESS;
125135
}
126136

127-
simdjson_really_inline error_code parse_root_number(structural_parser &iter, const uint8_t *value) {
137+
simdjson_really_inline error_code parse_root_number(json_iterator &iter, const uint8_t *value) {
128138
//
129139
// We need to make a copy to make sure that the string is space terminated.
130140
// This is not about padding the input, which should already padded up
@@ -149,42 +159,42 @@ struct tape_builder {
149159
return error;
150160
}
151161

152-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_true_atom(structural_parser &iter, const uint8_t *value) {
162+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_true_atom(json_iterator &iter, const uint8_t *value) {
153163
iter.log_value("true");
154164
if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
155165
tape.append(0, internal::tape_type::TRUE_VALUE);
156166
return SUCCESS;
157167
}
158168

159-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_true_atom(structural_parser &iter, const uint8_t *value) {
169+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_true_atom(json_iterator &iter, const uint8_t *value) {
160170
iter.log_value("true");
161171
if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
162172
tape.append(0, internal::tape_type::TRUE_VALUE);
163173
return SUCCESS;
164174
}
165175

166-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_false_atom(structural_parser &iter, const uint8_t *value) {
176+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_false_atom(json_iterator &iter, const uint8_t *value) {
167177
iter.log_value("false");
168178
if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
169179
tape.append(0, internal::tape_type::FALSE_VALUE);
170180
return SUCCESS;
171181
}
172182

173-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_false_atom(structural_parser &iter, const uint8_t *value) {
183+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_false_atom(json_iterator &iter, const uint8_t *value) {
174184
iter.log_value("false");
175185
if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
176186
tape.append(0, internal::tape_type::FALSE_VALUE);
177187
return SUCCESS;
178188
}
179189

180-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_null_atom(structural_parser &iter, const uint8_t *value) {
190+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_null_atom(json_iterator &iter, const uint8_t *value) {
181191
iter.log_value("null");
182192
if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
183193
tape.append(0, internal::tape_type::NULL_VALUE);
184194
return SUCCESS;
185195
}
186196

187-
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_null_atom(structural_parser &iter, const uint8_t *value) {
197+
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_root_null_atom(json_iterator &iter, const uint8_t *value) {
188198
iter.log_value("null");
189199
if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
190200
tape.append(0, internal::tape_type::NULL_VALUE);
@@ -193,23 +203,23 @@ struct tape_builder {
193203

194204
// private:
195205

196-
simdjson_really_inline uint32_t next_tape_index(structural_parser &iter) {
206+
simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) {
197207
return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
198208
}
199209

200-
simdjson_really_inline void empty_container(structural_parser &iter, internal::tape_type start, internal::tape_type end) {
210+
simdjson_really_inline void empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) {
201211
auto start_index = next_tape_index(iter);
202212
tape.append(start_index+2, start);
203213
tape.append(start_index, end);
204214
}
205215

206-
simdjson_really_inline void start_container(structural_parser &iter) {
216+
simdjson_really_inline void start_container(json_iterator &iter) {
207217
iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
208218
iter.dom_parser.open_containers[iter.depth].count = 0;
209219
tape.skip(); // We don't actually *write* the start element until the end.
210220
}
211221

212-
simdjson_really_inline void end_container(structural_parser &iter, internal::tape_type start, internal::tape_type end) noexcept {
222+
simdjson_really_inline void end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
213223
// Write the ending tape element, pointing at the start location
214224
const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
215225
tape.append(start_tape_index, end);
@@ -221,7 +231,7 @@ struct tape_builder {
221231
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
222232
}
223233

224-
simdjson_really_inline uint8_t *on_start_string(structural_parser &iter) noexcept {
234+
simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept {
225235
// we advance the point, accounting for the fact that we have a NULL termination
226236
tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
227237
return current_string_buf_loc + sizeof(uint32_t);

src/implementation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
#include <initializer_list>
66

7+
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
8+
79
// Static array of known implementations. We're hoping these get baked into the executable
810
// without requiring a static initializer.
911

0 commit comments

Comments
 (0)