Skip to content

Commit 856d362

Browse files
committed
Store current_string_buf_loc in doc_parser
1 parent b41e9cc commit 856d362

File tree

3 files changed

+21
-30
lines changed

3 files changed

+21
-30
lines changed

include/simdjson/dom/parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ class parser {
349349

350350
/** @private Next location to write to in the tape */
351351
uint32_t current_loc{0};
352+
/** @private Current location in string buffer */
353+
uint8_t *current_string_buf_loc;
352354

353355
/** @private Number of structural indices passed from stage 1 to stage 2 */
354356
uint32_t n_structural_indexes{0};

src/generic/stage2/streaming_structural_parser.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
namespace stage2 {
22

33
struct streaming_structural_parser: structural_parser {
4-
really_inline streaming_structural_parser(const uint8_t *buf, size_t len, parser &_doc_parser, size_t &next_structural, uint8_t *&_current_string_buf_loc) : structural_parser(buf, len, _doc_parser, next_structural, _current_string_buf_loc) {}
4+
really_inline streaming_structural_parser(
5+
const uint8_t *buf,
6+
size_t len,
7+
parser &_doc_parser,
8+
size_t &next_structural
9+
) : structural_parser(buf, len, _doc_parser, next_structural) {}
510

611
// override to add streaming
712
WARN_UNUSED really_inline error_code start(UNUSED size_t len) {
@@ -41,8 +46,7 @@ struct streaming_structural_parser: structural_parser {
4146
* for documentation.
4247
***********/
4348
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, parser &doc_parser, size_t &next_json) const noexcept {
44-
uint8_t *current_string_buf_loc = doc_parser.doc.string_buf.get();
45-
stage2::streaming_structural_parser parser(buf, len, doc_parser, next_json, current_string_buf_loc);
49+
stage2::streaming_structural_parser parser(buf, len, doc_parser, next_json);
4650
error_code result = parser.start(len);
4751
if (result) { return result; }
4852

src/generic/stage2/structural_parser.h

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,31 +31,25 @@ struct number_writer {
3131
struct structural_parser {
3232
structural_iterator structurals;
3333
parser &doc_parser;
34-
/** Next write location in the string buf for stage 2 parsing */
35-
uint8_t *&current_string_buf_loc;
3634
uint32_t depth;
3735

3836
really_inline structural_parser(
3937
const uint8_t *buf,
4038
size_t len,
4139
parser &_doc_parser,
42-
size_t &next_structural,
43-
uint8_t *&_current_string_buf_loc
40+
size_t &next_structural
4441
) : structurals(buf, len, _doc_parser.structural_indexes.get(), next_structural),
4542
doc_parser{_doc_parser},
46-
current_string_buf_loc{_current_string_buf_loc},
4743
depth{0} {
4844
}
4945

5046
really_inline structural_parser(
5147
const uint8_t *buf,
5248
parser &_doc_parser,
5349
size_t &next_structural,
54-
uint8_t *&_current_string_buf_loc,
5550
uint32_t _depth
5651
) : structurals(buf, 0, _doc_parser.structural_indexes.get(), next_structural),
5752
doc_parser{_doc_parser},
58-
current_string_buf_loc{_current_string_buf_loc},
5953
depth{_depth} {
6054
}
6155

@@ -112,20 +106,20 @@ struct structural_parser {
112106

113107
really_inline uint8_t *on_start_string() noexcept {
114108
// we advance the point, accounting for the fact that we have a NULL termination
115-
write_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
116-
return current_string_buf_loc + sizeof(uint32_t);
109+
write_tape(doc_parser.current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
110+
return doc_parser.current_string_buf_loc + sizeof(uint32_t);
117111
}
118112

119113
really_inline void on_end_string(uint8_t *dst) noexcept {
120-
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
114+
uint32_t str_length = uint32_t(dst - (doc_parser.current_string_buf_loc + sizeof(uint32_t)));
121115
// TODO check for overflow in case someone has a crazy string (>=4GB?)
122116
// But only add the overflow check when the document itself exceeds 4GB
123117
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
124-
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
118+
memcpy(doc_parser.current_string_buf_loc, &str_length, sizeof(uint32_t));
125119
// NULL termination is still handy if you expect all your strings to
126120
// be NULL terminated? It comes at a small cost
127121
*dst = 0;
128-
current_string_buf_loc = dst + 1;
122+
doc_parser.current_string_buf_loc = dst + 1;
129123
}
130124

131125
WARN_UNUSED really_inline bool parse_string(bool key = false) {
@@ -250,19 +244,17 @@ struct structural_parser {
250244
}
251245

252246
WARN_UNUSED really_inline bool parse_object() {
253-
return parse_object(structurals.buf, doc_parser, structurals.next_structural, current_string_buf_loc, depth+1);
247+
return parse_object(structurals.buf, doc_parser, structurals.next_structural, depth+1);
254248
}
255249

256250
WARN_UNUSED static bool parse_object(
257251
const uint8_t *buf,
258252
parser &doc_parser,
259253
size_t &next_structural,
260-
uint8_t *&current_string_buf_loc,
261254
uint32_t depth) {
262-
structural_parser parser(buf, doc_parser, next_structural, current_string_buf_loc, depth);
255+
structural_parser parser(buf, doc_parser, next_structural, depth);
263256
bool result = parser.parse_object_inline();
264257
next_structural = parser.structurals.next_structural;
265-
current_string_buf_loc = parser.current_string_buf_loc;
266258
return result;
267259
}
268260

@@ -310,19 +302,17 @@ struct structural_parser {
310302
}
311303

312304
WARN_UNUSED really_inline bool parse_array() {
313-
return parse_array(structurals.buf, doc_parser, structurals.next_structural, current_string_buf_loc, depth+1);
305+
return parse_array(structurals.buf, doc_parser, structurals.next_structural, depth+1);
314306
}
315307

316308
WARN_UNUSED static bool parse_array(
317309
const uint8_t *buf,
318310
parser &doc_parser,
319311
size_t &next_structural,
320-
uint8_t *&current_string_buf_loc,
321312
uint32_t depth) {
322-
structural_parser parser(buf, doc_parser, next_structural, current_string_buf_loc, depth);
313+
structural_parser parser(buf, doc_parser, next_structural, depth);
323314
bool result = parser.parse_array_inline();
324315
next_structural = parser.structurals.next_structural;
325-
current_string_buf_loc = parser.current_string_buf_loc;
326316
return result;
327317
}
328318

@@ -361,10 +351,6 @@ struct structural_parser {
361351
return on_error(TAPE_ERROR);
362352
}
363353
end_document(0, 1);
364-
if (depth != 0) {
365-
log_error("Unclosed objects or arrays!");
366-
return on_error(TAPE_ERROR);
367-
}
368354

369355
return on_success(SUCCESS);
370356
}
@@ -420,7 +406,7 @@ struct structural_parser {
420406
}
421407

422408
really_inline void init() {
423-
current_string_buf_loc = doc_parser.doc.string_buf.get();
409+
doc_parser.current_string_buf_loc = doc_parser.doc.string_buf.get();
424410
doc_parser.current_loc = 0;
425411
doc_parser.valid = false;
426412
doc_parser.error = UNINITIALIZED;
@@ -476,8 +462,7 @@ struct structural_parser {
476462
***********/
477463
WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, parser &doc_parser) const noexcept {
478464
size_t next_structural = 0;
479-
uint8_t *current_string_buf_loc = doc_parser.doc.string_buf.get();
480-
stage2::structural_parser parser(buf, len, doc_parser, next_structural, current_string_buf_loc);
465+
stage2::structural_parser parser(buf, len, doc_parser, next_structural);
481466
error_code result = parser.start(len);
482467
if (result) { return result; }
483468

0 commit comments

Comments
 (0)