Skip to content

Commit b41e9cc

Browse files
committed
Eliminate containing_scope and ret_address
1 parent a973c50 commit b41e9cc

File tree

4 files changed

+46
-88
lines changed

4 files changed

+46
-88
lines changed

include/simdjson/dom/parser.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -355,12 +355,6 @@ class parser {
355355
/** @private Structural indices passed from stage 1 to stage 2 */
356356
std::unique_ptr<uint32_t[]> structural_indexes{};
357357

358-
/** @private Tape location of each open { or [ */
359-
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
360-
361-
/** @private Return address of each open { or [ */
362-
std::unique_ptr<internal::ret_address[]> ret_address{};
363-
364358
/** @private Use `if (parser.parse(...).error())` instead */
365359
bool valid{false};
366360
/** @private Use `parser.parse(...).error()` instead */

include/simdjson/inline/parser.h

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -188,30 +188,10 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
188188
}
189189

190190
//
191-
// If max_depth has changed, reallocate those buffers
191+
// Set max_depth. It's obsolete, but it's still in the API.
192192
//
193-
if (max_depth != _max_depth) {
194-
_max_depth = 0;
193+
_max_depth = max_depth;
195194

196-
if (max_depth == 0) {
197-
ret_address.reset();
198-
containing_scope.reset();
199-
return SUCCESS;
200-
}
201-
202-
//
203-
// Initialize stage 2 state
204-
//
205-
containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); // TODO realloc
206-
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
207-
208-
if (!ret_address || !containing_scope) {
209-
// Could not allocate memory
210-
return MEMALLOC;
211-
}
212-
213-
_max_depth = max_depth;
214-
}
215195
return SUCCESS;
216196
}
217197

src/generic/stage2/streaming_structural_parser.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,11 @@ struct streaming_structural_parser: structural_parser {
2323
log_error("IMPOSSIBLE: past the end of the JSON!");
2424
return on_error(TAPE_ERROR);
2525
}
26-
end_document();
26+
end_document(0, 1);
2727
if (depth != 0) {
2828
log_error("Unclosed objects or arrays!");
2929
return on_error(TAPE_ERROR);
3030
}
31-
if (doc_parser.containing_scope[depth].tape_index != 0) {
32-
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
33-
return on_error(TAPE_ERROR);
34-
}
3531
bool finished = structurals.at_end(doc_parser.n_structural_indexes);
3632
if (!finished) { log_value("(and has more)"); }
3733
return on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);

src/generic/stage2/structural_parser.h

Lines changed: 43 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ struct structural_parser {
6262
WARN_UNUSED really_inline bool start_scope(internal::tape_type type) {
6363
bool exceeded_max_depth = depth >= doc_parser.max_depth();
6464
if (exceeded_max_depth) { log_error("Exceeded max depth!"); return true; }
65-
doc_parser.containing_scope[depth].tape_index = doc_parser.current_loc;
66-
doc_parser.containing_scope[depth].count = 0;
6765
write_tape(0, type); // if the document is correct, this gets rewritten later
6866
return false;
6967
}
@@ -84,44 +82,34 @@ struct structural_parser {
8482
}
8583

8684
// this function is responsible for annotating the start of the scope
87-
really_inline void end_scope(internal::tape_type type) noexcept {
85+
really_inline void end_scope(internal::tape_type type, uint32_t start_loc, uint32_t count) noexcept {
8886
// write our doc.tape location to the header scope
8987
// The root scope gets written *at* the previous location.
90-
write_tape(doc_parser.containing_scope[depth].tape_index, type);
88+
write_tape(start_loc, type);
9189
// count can overflow if it exceeds 24 bits... so we saturate
9290
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
93-
const uint32_t start_tape_index = doc_parser.containing_scope[depth].tape_index;
94-
const uint32_t count = doc_parser.containing_scope[depth].count;
9591
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
9692
// This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
97-
doc_parser.doc.tape[start_tape_index] |= doc_parser.current_loc | (uint64_t(cntsat) << 32);
93+
doc_parser.doc.tape[start_loc] |= doc_parser.current_loc | (uint64_t(cntsat) << 32);
9894
}
9995

100-
really_inline void end_object() {
96+
really_inline void end_object(uint32_t start_loc, uint32_t count) {
10197
log_end_value("object");
102-
end_scope(internal::tape_type::END_OBJECT);
98+
end_scope(internal::tape_type::END_OBJECT, start_loc, count);
10399
}
104-
really_inline void end_array() {
100+
really_inline void end_array(uint32_t start_loc, uint32_t count) {
105101
log_end_value("array");
106-
end_scope(internal::tape_type::END_ARRAY);
102+
end_scope(internal::tape_type::END_ARRAY, start_loc, count);
107103
}
108-
really_inline void end_document() {
104+
really_inline void end_document(uint32_t start_loc, uint32_t count) {
109105
log_end_value("document");
110-
end_scope(internal::tape_type::ROOT);
106+
end_scope(internal::tape_type::ROOT, start_loc, count);
111107
}
112108

113109
really_inline void write_tape(uint64_t val, internal::tape_type t) noexcept {
114110
doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
115111
}
116112

117-
// increment_count increments the count of keys in an object or values in an array.
118-
// Note that if you are at the level of the values or elements, the count
119-
// must be increment in the preceding depth (depth-1) where the array or
120-
// the object resides.
121-
really_inline void increment_count() {
122-
doc_parser.containing_scope[depth].count++; // we have a key value pair in the object at parser.depth - 1
123-
}
124-
125113
really_inline uint8_t *on_start_string() noexcept {
126114
// we advance the point, accounting for the fact that we have a NULL termination
127115
write_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
@@ -279,38 +267,41 @@ struct structural_parser {
279267
}
280268

281269
WARN_UNUSED really_inline bool parse_object_inline() {
270+
uint32_t start_loc = doc_parser.current_loc;
282271
if (start_object()) { return true; }
283272
switch (advance_char()) {
284273
case '"':
285-
increment_count();
286-
// Key
287-
if (parse_string(true)) { return true; }
288-
while (true) {
289-
290-
// :
291-
if (advance_char() != ':' ) { log_error("Missing colon after key in object"); return true; }
292-
293-
// Value
294-
advance_char();
295-
if (parse_value()) { return true; }
296-
297-
switch (advance_char()) {
298-
case ',':
299-
increment_count();
300-
if (advance_char() != '"' ) { log_error("Key string missing at beginning of field in object"); return true; }
301-
if (parse_string(true)) { return true; }
302-
continue;
303-
case '}':
304-
end_object();
305-
return false;
306-
default:
307-
log_error("No comma between object fields");
308-
return true;
274+
{
275+
uint32_t count = 1;
276+
// Key
277+
if (parse_string(true)) { return true; }
278+
while (true) {
279+
280+
// :
281+
if (advance_char() != ':' ) { log_error("Missing colon after key in object"); return true; }
282+
283+
// Value
284+
advance_char();
285+
if (parse_value()) { return true; }
286+
287+
switch (advance_char()) {
288+
case ',':
289+
count++;
290+
if (advance_char() != '"' ) { log_error("Key string missing at beginning of field in object"); return true; }
291+
if (parse_string(true)) { return true; }
292+
continue;
293+
case '}':
294+
end_object(start_loc, count);
295+
return false;
296+
default:
297+
log_error("No comma between object fields");
298+
return true;
299+
}
309300
}
310301
}
311302
break;
312303
case '}':
313-
end_object();
304+
end_object(start_loc, 0);
314305
return false;
315306
default:
316307
log_error("Object does not start with a key");
@@ -336,24 +327,25 @@ struct structural_parser {
336327
}
337328

338329
WARN_UNUSED really_inline bool parse_array_inline() {
330+
uint32_t start_loc = doc_parser.current_loc;
339331
if (start_array()) { return true; }
340332

341333
if (advance_char() == ']') {
342-
end_array();
334+
end_array(start_loc, 0);
343335
return false;
344336
}
345-
increment_count();
346337

338+
uint32_t count = 1;
347339
while (true) {
348340
if (parse_value()) { return true; }
349341

350342
switch (advance_char()) {
351343
case ',':
352-
increment_count();
344+
count++;
353345
advance_char();
354346
continue;
355347
case ']':
356-
end_array();
348+
end_array(start_loc, count);
357349
return false;
358350
default:
359351
log_error("Missing comma between array values");
@@ -368,15 +360,11 @@ struct structural_parser {
368360
log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
369361
return on_error(TAPE_ERROR);
370362
}
371-
end_document();
363+
end_document(0, 1);
372364
if (depth != 0) {
373365
log_error("Unclosed objects or arrays!");
374366
return on_error(TAPE_ERROR);
375367
}
376-
if (doc_parser.containing_scope[depth].tape_index != 0) {
377-
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
378-
return on_error(TAPE_ERROR);
379-
}
380368

381369
return on_success(SUCCESS);
382370
}

0 commit comments

Comments
 (0)