@@ -166,8 +166,37 @@ static const size_t STEP_SIZE = 128;
166
166
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
167
167
// workout.
168
168
//
169
+ really_inline void find_structural_bits_start (
170
+ const uint8_t *buf,
171
+ simd_input<ARCHITECTURE> &in
172
+ ) {
173
+ in = simd_input<ARCHITECTURE>(buf);
174
+ }
175
+
176
+ really_inline void find_structural_bits_middle (
177
+ simd_input<ARCHITECTURE> in,
178
+ uint64_t &prev_escaped, uint64_t &prev_in_string, uint64_t &prev_primitive,
179
+ uint64_t &string, uint64_t &structurals
180
+ ) {
181
+ string = find_strings (in, prev_escaped, prev_in_string);
182
+ structurals = find_potential_structurals (in, prev_primitive);
183
+ }
184
+
185
+ really_inline void find_structural_bits_end (
186
+ simd_input<ARCHITECTURE> in, uint64_t idx, uint64_t string, uint64_t structurals,
187
+ uint32_t *&base_ptr, uint64_t &prev_structurals, utf8_checker<ARCHITECTURE> &utf8_state,
188
+ uint64_t &unescaped_chars_error
189
+ ) {
190
+ uint64_t unescaped = in.lteq (0x1F );
191
+ utf8_state.check_next_input (in);
192
+ flatten_bits (base_ptr, idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
193
+ prev_structurals = structurals & ~string;
194
+ unescaped_chars_error |= unescaped & string;
195
+ idx += 64 ;
196
+ }
197
+
169
198
really_inline void find_structural_bits_128 (
170
- const uint8_t *buf, const size_t idx, uint32_t *&base_ptr,
199
+ const uint8_t *buf, size_t & idx, uint32_t *&base_ptr,
171
200
uint64_t &prev_escaped, uint64_t &prev_in_string,
172
201
uint64_t &prev_primitive,
173
202
uint64_t &prev_structurals,
@@ -176,36 +205,28 @@ really_inline void find_structural_bits_128(
176
205
//
177
206
// Load up all 128 bytes into SIMD registers
178
207
//
179
- simd_input<ARCHITECTURE> in_1 (buf);
180
- simd_input<ARCHITECTURE> in_2 (buf+64 );
208
+ simd_input<ARCHITECTURE> in_1, in_2;
209
+ find_structural_bits_start (buf, in_1);
210
+ find_structural_bits_start (buf+64 , in_2);
181
211
182
212
//
183
213
// Find the strings and potential structurals (operators / primitives).
184
214
//
185
215
// This will include false structurals that are *inside* strings--we'll filter strings out
186
216
// before we return.
187
217
//
188
- uint64_t string_1 = find_strings (in_1, prev_escaped, prev_in_string);
189
- uint64_t structurals_1 = find_potential_structurals (in_1, prev_primitive);
190
- uint64_t string_2 = find_strings (in_2, prev_escaped, prev_in_string);
191
- uint64_t structurals_2 = find_potential_structurals (in_2, prev_primitive);
218
+ uint64_t string_1, structurals_1, string_2, structurals_2;
219
+ find_structural_bits_middle (in_1, prev_escaped, prev_in_string, prev_primitive, string_1, structurals_1);
220
+ find_structural_bits_middle (in_2, prev_escaped, prev_in_string, prev_primitive, string_2, structurals_2);
192
221
193
222
//
194
223
// Do miscellaneous work while the processor is busy calculating strings and structurals.
195
224
//
196
225
// After that, weed out structurals that are inside strings and find invalid string characters.
197
226
//
198
- uint64_t unescaped_1 = in_1.lteq (0x1F );
199
- utf8_state.check_next_input (in_1);
200
- flatten_bits (base_ptr, idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
201
- prev_structurals = structurals_1 & ~string_1;
202
- unescaped_chars_error |= unescaped_1 & string_1;
203
-
204
- uint64_t unescaped_2 = in_2.lteq (0x1F );
205
- utf8_state.check_next_input (in_2);
206
- flatten_bits (base_ptr, idx+64 , prev_structurals); // Output *last* iteration's structurals to ParsedJson
207
- prev_structurals = structurals_2 & ~string_2;
208
- unescaped_chars_error |= unescaped_2 & string_2;
227
+ find_structural_bits_end (in_1, idx, string_1, structurals_1, base_ptr, prev_structurals, utf8_state, unescaped_chars_error);
228
+ find_structural_bits_end (in_2, idx+64 , string_2, structurals_2, base_ptr, prev_structurals, utf8_state, unescaped_chars_error);
229
+ idx += 128 ;
209
230
}
210
231
211
232
int find_structural_bits (const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
@@ -215,6 +236,9 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
215
236
<< len << " bytes" << std::endl;
216
237
return simdjson::CAPACITY;
217
238
}
239
+ if (unlikely (len == 0 )) {
240
+ return simdjson::EMPTY;
241
+ }
218
242
uint32_t *base_ptr = pj.structural_indexes ;
219
243
utf8_checker<ARCHITECTURE> utf8_state;
220
244
@@ -230,29 +254,28 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
230
254
// CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask.
231
255
uint64_t structurals = 0 ;
232
256
233
- size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE;
257
+ size_t last_buf_size = (len % STEP_SIZE == 0 ) ? STEP_SIZE : (len % STEP_SIZE);
258
+ const uint8_t *last_buf = buf + len - last_buf_size;
234
259
size_t idx = 0 ;
235
260
// Errors with unescaped characters in strings (ASCII codepoints < 0x20)
236
261
uint64_t unescaped_chars_error = 0 ;
237
262
238
- for (; idx < lenminusstep; idx += STEP_SIZE ) {
239
- find_structural_bits_128 (& buf[idx] , idx, base_ptr,
263
+ while (buf < last_buf ) {
264
+ find_structural_bits_128 (buf, idx, base_ptr,
240
265
prev_escaped, prev_in_string, prev_primitive,
241
266
structurals, unescaped_chars_error, utf8_state);
267
+ buf += 128 ;
242
268
}
243
269
244
270
/* If we have a final chunk of less than 64 bytes, pad it to 64 with
245
271
* spaces before processing it (otherwise, we risk invalidating the UTF-8
246
272
* checks). */
247
- if (likely (idx < len)) {
248
- uint8_t tmp_buf[STEP_SIZE];
249
- memset (tmp_buf, 0x20 , STEP_SIZE);
250
- memcpy (tmp_buf, buf + idx, len - idx);
251
- find_structural_bits_128 (&tmp_buf[0 ], idx, base_ptr,
252
- prev_escaped, prev_in_string, prev_primitive,
253
- structurals, unescaped_chars_error, utf8_state);
254
- idx += STEP_SIZE;
255
- }
273
+ uint8_t tmp_buf[STEP_SIZE];
274
+ memset (tmp_buf, 0x20 , STEP_SIZE);
275
+ memcpy (tmp_buf, last_buf, last_buf_size);
276
+ find_structural_bits_128 (&tmp_buf[0 ], idx, base_ptr,
277
+ prev_escaped, prev_in_string, prev_primitive,
278
+ structurals, unescaped_chars_error, utf8_state);
256
279
257
280
/* finally, flatten out the remaining structurals from the last iteration */
258
281
flatten_bits (base_ptr, idx, structurals);
0 commit comments