Skip to content

Commit 6b93906

Browse files
committed
Split find_structural_bits into pipeline functions
1 parent 0b5d1d0 commit 6b93906

File tree

4 files changed

+56
-33
lines changed

4 files changed

+56
-33
lines changed

src/arm64/simd_input.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ using namespace simdjson::arm64;
4040

4141
template <>
4242
struct simd_input<Architecture::ARM64> {
43-
const uint8x16_t chunks[4];
43+
uint8x16_t chunks[4];
4444

4545
really_inline simd_input()
4646
: chunks{uint8x16_t(), uint8x16_t(), uint8x16_t(), uint8x16_t() } {}

src/generic/stage1_find_marks.h

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,37 @@ static const size_t STEP_SIZE = 128;
166166
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
167167
// workout.
168168
//
169+
really_inline void find_structural_bits_start(
170+
const uint8_t *buf,
171+
simd_input<ARCHITECTURE> &in
172+
) {
173+
in = simd_input<ARCHITECTURE>(buf);
174+
}
175+
176+
really_inline void find_structural_bits_middle(
177+
simd_input<ARCHITECTURE> in,
178+
uint64_t &prev_escaped, uint64_t &prev_in_string, uint64_t &prev_primitive,
179+
uint64_t &string, uint64_t &structurals
180+
) {
181+
string = find_strings(in, prev_escaped, prev_in_string);
182+
structurals = find_potential_structurals(in, prev_primitive);
183+
}
184+
185+
really_inline void find_structural_bits_end(
186+
simd_input<ARCHITECTURE> in, uint64_t idx, uint64_t string, uint64_t structurals,
187+
uint32_t *&base_ptr, uint64_t &prev_structurals, utf8_checker<ARCHITECTURE> &utf8_state,
188+
uint64_t &unescaped_chars_error
189+
) {
190+
uint64_t unescaped = in.lteq(0x1F);
191+
utf8_state.check_next_input(in);
192+
flatten_bits(base_ptr, idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
193+
prev_structurals = structurals & ~string;
194+
unescaped_chars_error |= unescaped & string;
195+
idx += 64;
196+
}
197+
169198
really_inline void find_structural_bits_128(
170-
const uint8_t *buf, const size_t idx, uint32_t *&base_ptr,
199+
const uint8_t *buf, size_t &idx, uint32_t *&base_ptr,
171200
uint64_t &prev_escaped, uint64_t &prev_in_string,
172201
uint64_t &prev_primitive,
173202
uint64_t &prev_structurals,
@@ -176,36 +205,28 @@ really_inline void find_structural_bits_128(
176205
//
177206
// Load up all 128 bytes into SIMD registers
178207
//
179-
simd_input<ARCHITECTURE> in_1(buf);
180-
simd_input<ARCHITECTURE> in_2(buf+64);
208+
simd_input<ARCHITECTURE> in_1, in_2;
209+
find_structural_bits_start(buf, in_1);
210+
find_structural_bits_start(buf+64, in_2);
181211

182212
//
183213
// Find the strings and potential structurals (operators / primitives).
184214
//
185215
// This will include false structurals that are *inside* strings--we'll filter strings out
186216
// before we return.
187217
//
188-
uint64_t string_1 = find_strings(in_1, prev_escaped, prev_in_string);
189-
uint64_t structurals_1 = find_potential_structurals(in_1, prev_primitive);
190-
uint64_t string_2 = find_strings(in_2, prev_escaped, prev_in_string);
191-
uint64_t structurals_2 = find_potential_structurals(in_2, prev_primitive);
218+
uint64_t string_1, structurals_1, string_2, structurals_2;
219+
find_structural_bits_middle(in_1, prev_escaped, prev_in_string, prev_primitive, string_1, structurals_1);
220+
find_structural_bits_middle(in_2, prev_escaped, prev_in_string, prev_primitive, string_2, structurals_2);
192221

193222
//
194223
// Do miscellaneous work while the processor is busy calculating strings and structurals.
195224
//
196225
// After that, weed out structurals that are inside strings and find invalid string characters.
197226
//
198-
uint64_t unescaped_1 = in_1.lteq(0x1F);
199-
utf8_state.check_next_input(in_1);
200-
flatten_bits(base_ptr, idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
201-
prev_structurals = structurals_1 & ~string_1;
202-
unescaped_chars_error |= unescaped_1 & string_1;
203-
204-
uint64_t unescaped_2 = in_2.lteq(0x1F);
205-
utf8_state.check_next_input(in_2);
206-
flatten_bits(base_ptr, idx+64, prev_structurals); // Output *last* iteration's structurals to ParsedJson
207-
prev_structurals = structurals_2 & ~string_2;
208-
unescaped_chars_error |= unescaped_2 & string_2;
227+
find_structural_bits_end(in_1, idx, string_1, structurals_1, base_ptr, prev_structurals, utf8_state, unescaped_chars_error);
228+
find_structural_bits_end(in_2, idx+64, string_2, structurals_2, base_ptr, prev_structurals, utf8_state, unescaped_chars_error);
229+
idx += 128;
209230
}
210231

211232
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
@@ -215,6 +236,9 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
215236
<< len << " bytes" << std::endl;
216237
return simdjson::CAPACITY;
217238
}
239+
if (unlikely(len == 0)) {
240+
return simdjson::EMPTY;
241+
}
218242
uint32_t *base_ptr = pj.structural_indexes;
219243
utf8_checker<ARCHITECTURE> utf8_state;
220244

@@ -230,29 +254,28 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
230254
// CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask.
231255
uint64_t structurals = 0;
232256

233-
size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE;
257+
size_t last_buf_size = (len % STEP_SIZE == 0) ? STEP_SIZE : (len % STEP_SIZE);
258+
const uint8_t *last_buf = buf + len - last_buf_size;
234259
size_t idx = 0;
235260
// Errors with unescaped characters in strings (ASCII codepoints < 0x20)
236261
uint64_t unescaped_chars_error = 0;
237262

238-
for (; idx < lenminusstep; idx += STEP_SIZE) {
239-
find_structural_bits_128(&buf[idx], idx, base_ptr,
263+
while (buf < last_buf) {
264+
find_structural_bits_128(buf, idx, base_ptr,
240265
prev_escaped, prev_in_string, prev_primitive,
241266
structurals, unescaped_chars_error, utf8_state);
267+
buf += 128;
242268
}
243269

244270
/* If we have a final chunk of less than 64 bytes, pad it to 64 with
245271
* spaces before processing it (otherwise, we risk invalidating the UTF-8
246272
* checks). */
247-
if (likely(idx < len)) {
248-
uint8_t tmp_buf[STEP_SIZE];
249-
memset(tmp_buf, 0x20, STEP_SIZE);
250-
memcpy(tmp_buf, buf + idx, len - idx);
251-
find_structural_bits_128(&tmp_buf[0], idx, base_ptr,
252-
prev_escaped, prev_in_string, prev_primitive,
253-
structurals, unescaped_chars_error, utf8_state);
254-
idx += STEP_SIZE;
255-
}
273+
uint8_t tmp_buf[STEP_SIZE];
274+
memset(tmp_buf, 0x20, STEP_SIZE);
275+
memcpy(tmp_buf, last_buf, last_buf_size);
276+
find_structural_bits_128(&tmp_buf[0], idx, base_ptr,
277+
prev_escaped, prev_in_string, prev_primitive,
278+
structurals, unescaped_chars_error, utf8_state);
256279

257280
/* finally, flatten out the remaining structurals from the last iteration */
258281
flatten_bits(base_ptr, idx, structurals);

src/haswell/simd_input.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace simdjson {
1010

1111
template <>
1212
struct simd_input<Architecture::HASWELL> {
13-
const __m256i chunks[2];
13+
__m256i chunks[2];
1414

1515
really_inline simd_input() : chunks{__m256i(), __m256i()} {}
1616

src/westmere/simd_input.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace simdjson {
1010

1111
template <>
1212
struct simd_input<Architecture::WESTMERE> {
13-
const __m128i chunks[4];
13+
__m128i chunks[4];
1414

1515
really_inline simd_input()
1616
: chunks { __m128i(), __m128i(), __m128i(), __m128i() } {}

0 commit comments

Comments
 (0)