Skip to content

Commit 8c1f4b5

Browse files
committed
Make 4-stage pipeline: read, scan, strings, output
1 parent 6b93906 commit 8c1f4b5

File tree

4 files changed

+138
-138
lines changed

4 files changed

+138
-138
lines changed

src/generic/stage1_find_marks.h

Lines changed: 122 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -81,37 +81,6 @@ really_inline ErrorValues detect_errors_on_eof(
8181
return SUCCESS;
8282
}
8383

84-
//
85-
// Return a mask of all string characters plus end quotes.
86-
//
87-
// prev_escaped is overflow saying whether the next character is escaped.
88-
// prev_in_string is overflow saying whether we're still in a string.
89-
//
90-
// Backslash sequences outside of quotes will be detected in stage 2.
91-
//
92-
really_inline uint64_t find_strings(const simd_input<ARCHITECTURE> in, uint64_t &prev_escaped, uint64_t &prev_in_string) {
93-
const uint64_t backslash = in.eq('\\');
94-
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
95-
const uint64_t quote = in.eq('"') & ~escaped;
96-
// compute_quote_mask returns start quote plus string contents.
97-
const uint64_t in_string = compute_quote_mask(quote) ^ prev_in_string;
98-
/* right shift of a signed value expected to be well-defined and standard
99-
* compliant as of C++20,
100-
* John Regher from Utah U. says this is fine code */
101-
prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(in_string) >> 63);
102-
// Use ^ to turn the beginning quote off, and the end quote on.
103-
return in_string ^ quote;
104-
}
105-
106-
really_inline uint64_t invalid_string_bytes(const uint64_t unescaped, const uint64_t quote_mask) {
107-
/* All Unicode characters may be placed within the
108-
* quotation marks, except for the characters that MUST be escaped:
109-
* quotation mark, reverse solidus, and the control characters (U+0000
110-
* through U+001F).
111-
* https://tools.ietf.org/html/rfc8259 */
112-
return quote_mask & unescaped;
113-
}
114-
11584
//
11685
// Determine which characters are *structural*:
11786
// - braces: [] and {}
@@ -128,11 +97,7 @@ really_inline uint64_t invalid_string_bytes(const uint64_t unescaped, const uint
12897
// contents of a string the same as content outside. Errors and structurals inside the string or on
12998
// the trailing quote will need to be removed later when the correct string information is known.
13099
//
131-
really_inline uint64_t find_potential_structurals(const simd_input<ARCHITECTURE> in, uint64_t &prev_primitive) {
132-
// These use SIMD so let's kick them off before running the regular 64-bit stuff ...
133-
uint64_t whitespace, op;
134-
find_whitespace_and_operators(in, whitespace, op);
135-
100+
really_inline uint64_t find_potential_structurals(uint64_t whitespace, uint64_t op, uint64_t &prev_primitive) {
136101
// Detect the start of a run of primitive characters. Includes numbers, booleans, and strings (").
137102
// Everything except whitespace, braces, colon and comma.
138103
const uint64_t primitive = ~(op | whitespace);
@@ -143,7 +108,61 @@ really_inline uint64_t find_potential_structurals(const simd_input<ARCHITECTURE>
143108
return op | start_primitive;
144109
}
145110

146-
static const size_t STEP_SIZE = 128;
111+
// All independent tasks that scan the SIMD input go here.
112+
really_inline void stage2_scan_input(
113+
const simd_input<ARCHITECTURE> in,
114+
uint64_t &prev_escaped, uint64_t &prev_primitive, utf8_checker<ARCHITECTURE> utf8_state,
115+
uint64_t &quote, uint64_t &potential_structurals, uint64_t &invalid_string_bytes
116+
) {
117+
// Finding real quotes, and finding potential structurals, are interleaved here.
118+
// First we read the input we need from SIMD for them ...
119+
uint64_t backslash = in.eq('\\');
120+
uint64_t whitespace, op;
121+
find_whitespace_and_operators(in, whitespace, op);
122+
123+
// Then we do the actual work. follows_odd_sequence_of takes longer than find_potential_structurals.
124+
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
125+
potential_structurals = find_potential_structurals(whitespace, op, prev_primitive);
126+
uint64_t raw_quote = in.eq('"');
127+
quote = raw_quote & ~escaped;
128+
invalid_string_bytes = in.lteq(0x1F);
129+
130+
// The UTF-8 scan is pretty much independent of anything else, so we stick it at the end here to
131+
// soak up some CPU.
132+
utf8_state.check_next_input(in);
133+
}
134+
135+
//
136+
// Return a mask of all string characters plus end quotes.
137+
//
138+
// prev_escaped is overflow saying whether the next character is escaped.
139+
// prev_in_string is overflow saying whether we're still in a string.
140+
//
141+
// Backslash sequences outside of quotes will be detected in stage 2.
142+
//
143+
really_inline uint64_t find_strings(uint64_t quote, uint64_t &prev_in_string) {
144+
// compute_quote_mask returns start quote plus string contents.
145+
const uint64_t in_string = compute_quote_mask(quote) ^ prev_in_string;
146+
/* right shift of a signed value expected to be well-defined and standard
147+
* compliant as of C++20,
148+
* John Regher from Utah U. says this is fine code */
149+
prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(in_string) >> 63);
150+
// Use ^ to turn the beginning quote off, and the end quote on (i.e. everything except the
151+
// starting quote is the string's value).
152+
return in_string ^ quote;
153+
}
154+
155+
really_inline void stage3_find_strings(
156+
uint64_t quote,
157+
uint64_t &prev_in_string,
158+
uint64_t &string,
159+
const uint64_t potential_structurals, uint64_t &potential_structurals_out, const uint64_t invalid_string_bytes, uint64_t &invalid_string_bytes_out
160+
) {
161+
string = find_strings(quote, prev_in_string);
162+
potential_structurals_out = potential_structurals;
163+
invalid_string_bytes_out = invalid_string_bytes;
164+
}
165+
147166

148167
//
149168
// Find the important bits of JSON in a 128-byte chunk, and add them to :
@@ -166,67 +185,39 @@ static const size_t STEP_SIZE = 128;
166185
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
167186
// workout.
168187
//
169-
really_inline void find_structural_bits_start(
170-
const uint8_t *buf,
188+
really_inline bool stage1_read_input(
189+
const uint8_t *&buf,
190+
const uint8_t *buf_end,
171191
simd_input<ARCHITECTURE> &in
172192
) {
173-
in = simd_input<ARCHITECTURE>(buf);
174-
}
193+
// If we have a full input, load it up and return it.
194+
const uint8_t *next_buf = buf+64;
195+
if (likely(next_buf < buf_end)) {
196+
in = simd_input<ARCHITECTURE>(buf);
197+
buf = next_buf;
198+
return true;
199+
}
175200

176-
really_inline void find_structural_bits_middle(
177-
simd_input<ARCHITECTURE> in,
178-
uint64_t &prev_escaped, uint64_t &prev_in_string, uint64_t &prev_primitive,
179-
uint64_t &string, uint64_t &structurals
180-
) {
181-
string = find_strings(in, prev_escaped, prev_in_string);
182-
structurals = find_potential_structurals(in, prev_primitive);
201+
// If we have a *partial* (< 64 byte) input, load it up and return it.
202+
if (likely(buf_end > buf)) {
203+
uint8_t tmp_buf[64];
204+
memset(tmp_buf, 0x20, 64);
205+
memcpy(tmp_buf, buf, buf_end-buf);
206+
in = simd_input<ARCHITECTURE>(tmp_buf);
207+
buf = next_buf;
208+
return true;
209+
}
210+
211+
return false;
183212
}
184213

185-
really_inline void find_structural_bits_end(
186-
simd_input<ARCHITECTURE> in, uint64_t idx, uint64_t string, uint64_t structurals,
187-
uint32_t *&base_ptr, uint64_t &prev_structurals, utf8_checker<ARCHITECTURE> &utf8_state,
214+
really_inline void stage4_output_structurals(
215+
const uint64_t string, const uint64_t structurals, const uint64_t invalid_string_bytes,
216+
uint32_t *&base_ptr, size_t &idx,
188217
uint64_t &unescaped_chars_error
189218
) {
190-
uint64_t unescaped = in.lteq(0x1F);
191-
utf8_state.check_next_input(in);
192-
flatten_bits(base_ptr, idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
193-
prev_structurals = structurals & ~string;
194-
unescaped_chars_error |= unescaped & string;
195-
idx += 64;
196-
}
197-
198-
really_inline void find_structural_bits_128(
199-
const uint8_t *buf, size_t &idx, uint32_t *&base_ptr,
200-
uint64_t &prev_escaped, uint64_t &prev_in_string,
201-
uint64_t &prev_primitive,
202-
uint64_t &prev_structurals,
203-
uint64_t &unescaped_chars_error,
204-
utf8_checker<ARCHITECTURE> &utf8_state) {
205-
//
206-
// Load up all 128 bytes into SIMD registers
207-
//
208-
simd_input<ARCHITECTURE> in_1, in_2;
209-
find_structural_bits_start(buf, in_1);
210-
find_structural_bits_start(buf+64, in_2);
211-
212-
//
213-
// Find the strings and potential structurals (operators / primitives).
214-
//
215-
// This will include false structurals that are *inside* strings--we'll filter strings out
216-
// before we return.
217-
//
218-
uint64_t string_1, structurals_1, string_2, structurals_2;
219-
find_structural_bits_middle(in_1, prev_escaped, prev_in_string, prev_primitive, string_1, structurals_1);
220-
find_structural_bits_middle(in_2, prev_escaped, prev_in_string, prev_primitive, string_2, structurals_2);
221-
222-
//
223-
// Do miscellaneous work while the processor is busy calculating strings and structurals.
224-
//
225-
// After that, weed out structurals that are inside strings and find invalid string characters.
226-
//
227-
find_structural_bits_end(in_1, idx, string_1, structurals_1, base_ptr, prev_structurals, utf8_state, unescaped_chars_error);
228-
find_structural_bits_end(in_2, idx+64, string_2, structurals_2, base_ptr, prev_structurals, utf8_state, unescaped_chars_error);
229-
idx += 128;
219+
flatten_bits(base_ptr, idx, structurals & ~string);
220+
unescaped_chars_error |= invalid_string_bytes & string;
230221
}
231222

232223
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
@@ -236,49 +227,56 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
236227
<< len << " bytes" << std::endl;
237228
return simdjson::CAPACITY;
238229
}
239-
if (unlikely(len == 0)) {
240-
return simdjson::EMPTY;
241-
}
242-
uint32_t *base_ptr = pj.structural_indexes;
243-
utf8_checker<ARCHITECTURE> utf8_state;
244230

231+
// Stage 1 state and output
232+
const uint8_t *buf_end = buf + len;
233+
simd_input<ARCHITECTURE> in;
234+
235+
// Stage 2 state and output
236+
uint64_t quote, potential_structurals, invalid_string_bytes;
245237
// Whether the first character of the next iteration is escaped.
246-
uint64_t prev_escaped = 0ULL;
247-
// Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
248-
uint64_t prev_in_string = 0ULL;
238+
uint64_t prev_escaped = 0;
249239
// Whether the last character of the previous iteration is a primitive value character
250240
// (anything except whitespace, braces, comma or colon).
251-
uint64_t prev_primitive = 0ULL;
252-
// Mask of structural characters from the last iteration.
253-
// Kept around for performance reasons, so we can call flatten_bits to soak up some unused
254-
// CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask.
255-
uint64_t structurals = 0;
241+
uint64_t prev_primitive = 0;
242+
utf8_checker<ARCHITECTURE> utf8_state;
243+
244+
// Stage 3 state and output
245+
uint64_t prev_in_string = 0;
246+
uint64_t string, potential_structurals2, invalid_string_bytes2;
256247

257-
size_t last_buf_size = (len % STEP_SIZE == 0) ? STEP_SIZE : (len % STEP_SIZE);
258-
const uint8_t *last_buf = buf + len - last_buf_size;
259-
size_t idx = 0;
248+
// Stage 4 state and output
249+
uint32_t *base_ptr = pj.structural_indexes;
250+
uint64_t idx = 0;
260251
// Errors with unescaped characters in strings (ASCII codepoints < 0x20)
261252
uint64_t unescaped_chars_error = 0;
262253

263-
while (buf < last_buf) {
264-
find_structural_bits_128(buf, idx, base_ptr,
265-
prev_escaped, prev_in_string, prev_primitive,
266-
structurals, unescaped_chars_error, utf8_state);
267-
buf += 128;
268-
}
254+
// Read the first input (if any)
255+
if (stage1_read_input(buf, buf_end, in)) {
256+
// Stage 2 is ready. Push it so stage 3 is ready.
257+
stage2_scan_input(in, prev_escaped, prev_primitive, utf8_state, quote, potential_structurals, invalid_string_bytes);
269258

270-
/* If we have a final chunk of less than 64 bytes, pad it to 64 with
271-
* spaces before processing it (otherwise, we risk invalidating the UTF-8
272-
* checks). */
273-
uint8_t tmp_buf[STEP_SIZE];
274-
memset(tmp_buf, 0x20, STEP_SIZE);
275-
memcpy(tmp_buf, last_buf, last_buf_size);
276-
find_structural_bits_128(&tmp_buf[0], idx, base_ptr,
277-
prev_escaped, prev_in_string, prev_primitive,
278-
structurals, unescaped_chars_error, utf8_state);
259+
// Read the second input (if any)
260+
if (stage1_read_input(buf, buf_end, in)) {
261+
// stages 2 and 3 are ready. Push them so 3 and 4 are ready.
262+
stage3_find_strings(quote, prev_in_string, string, potential_structurals, potential_structurals2, invalid_string_bytes, invalid_string_bytes2);
263+
stage2_scan_input(in, prev_escaped, prev_primitive, utf8_state, quote, potential_structurals, invalid_string_bytes);
279264

280-
/* finally, flatten out the remaining structurals from the last iteration */
281-
flatten_bits(base_ptr, idx, structurals);
265+
// Read remaining inputs
266+
while (stage1_read_input(buf, buf_end, in)) {
267+
// stages 2, 3 and 4 are ready. Push them so 3 and 4 are ready.
268+
stage4_output_structurals(string, potential_structurals2, invalid_string_bytes2, base_ptr, idx, unescaped_chars_error);
269+
stage3_find_strings(quote, prev_in_string, string, potential_structurals, potential_structurals2, invalid_string_bytes, invalid_string_bytes2);
270+
stage2_scan_input(in, prev_escaped, prev_primitive, utf8_state, quote, potential_structurals, invalid_string_bytes);
271+
}
272+
// stages 3 and 4 are ready. Push 4 so only 3 is ready.
273+
stage4_output_structurals(string, potential_structurals2, invalid_string_bytes2, base_ptr, idx, unescaped_chars_error);
274+
}
275+
276+
// stage 3 is ready. Finish it.
277+
stage3_find_strings(quote, prev_in_string, string, potential_structurals, potential_structurals2, invalid_string_bytes, invalid_string_bytes2);
278+
stage4_output_structurals(string, potential_structurals2, invalid_string_bytes2, base_ptr, idx, unescaped_chars_error);
279+
}
282280

283281
simdjson::ErrorValues error = detect_errors_on_eof(unescaped_chars_error, prev_in_string);
284282
if (unlikely(error != simdjson::SUCCESS)) {

src/generic/stage1_find_marks_flatten.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@
88
// This is just a naive implementation. It should be normally
99
// disable, but can be used for research purposes to compare
1010
// again our optimized version.
11-
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
12-
uint32_t *out_ptr = base_ptr + base;
13-
idx -= 64;
11+
really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t &idx, uint64_t bits) {
1412
while (bits != 0) {
15-
out_ptr[0] = idx + trailing_zeroes(bits);
13+
base_ptr[0] = idx + trailing_zeroes(bits);
1614
bits = bits & (bits - 1);
17-
out_ptr++;
15+
base_ptr++;
1816
}
19-
base = (out_ptr - base_ptr);
17+
idx += 64;
2018
}
2119

2220
#else // SIMDJSON_NAIVE_FLATTEN
@@ -26,14 +24,15 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx
2624
// base_ptr[base] incrementing base as we go
2725
// will potentially store extra values beyond end of valid bits, so base_ptr
2826
// needs to be large enough to handle this
29-
really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits) {
27+
really_inline void flatten_bits(uint32_t *&base_ptr, size_t &idx, uint64_t bits) {
3028
// In some instances, the next branch is expensive because it is mispredicted.
3129
// Unfortunately, in other cases,
3230
// it helps tremendously.
33-
if (bits == 0)
31+
if (bits == 0) {
32+
idx += 64;
3433
return;
34+
}
3535
uint32_t cnt = hamming(bits);
36-
idx -= 64;
3736

3837
// Do the first 8 all together
3938
for (int i=0; i<8; i++) {
@@ -63,5 +62,6 @@ really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits
6362
}
6463

6564
base_ptr += cnt;
65+
idx += 64;
6666
}
6767
#endif // SIMDJSON_NAIVE_FLATTEN

src/haswell/stage1_find_marks.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,15 @@ really_inline void find_whitespace_and_operators(
9090
// base_ptr[base] incrementing base as we go
9191
// will potentially store extra values beyond end of valid bits, so base_ptr
9292
// needs to be large enough to handle this
93-
really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits) {
93+
really_inline void flatten_bits(uint32_t *&base_ptr, size_t &idx, uint64_t bits) {
9494
// In some instances, the next branch is expensive because it is mispredicted.
9595
// Unfortunately, in other cases,
9696
// it helps tremendously.
97-
if (bits == 0)
98-
return;
97+
if (bits == 0) {
98+
idx += 64;
99+
return;
100+
}
99101
uint32_t cnt = _mm_popcnt_u64(bits);
100-
idx -= 64;
101102

102103
// Do the first 8 all together
103104
for (int i=0; i<8; i++) {
@@ -127,6 +128,7 @@ really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits
127128
}
128129

129130
base_ptr += cnt;
131+
idx += 64;
130132
}
131133

132134
#include "generic/stage1_find_marks.h"

tests/basictests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ bool navigate_test() {
2929

3030
simdjson::ParsedJson pj = simdjson::build_parsed_json(json);
3131
if (!pj.is_valid()) {
32-
printf("Something is wrong in navigate: %s.\n", json.c_str());
32+
printf("Error in navigate: %s.\nJSON: %s\n", pj.get_error_message().c_str(), json.c_str());
3333
return false;
3434
}
3535
simdjson::ParsedJson::Iterator pjh(pj);

0 commit comments

Comments
 (0)