Skip to content

Commit 80e84a3

Browse files
authored
Merge pull request simdjson#1143 from simdjson/jkeiser/classify
Simplify operator classification lookup on Intel
2 parents 0552335 + f0ec269 commit 80e84a3

File tree

5 files changed

+117
-56
lines changed

5 files changed

+117
-56
lines changed

src/arm64/simd.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -462,16 +462,6 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
462462
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
463463
}
464464

465-
simdjson_really_inline simd8x64<T> bit_or(const T m) const {
466-
const simd8<T> mask = simd8<T>::splat(m);
467-
return simd8x64<T>(
468-
this->chunks[0] | mask,
469-
this->chunks[1] | mask,
470-
this->chunks[2] | mask,
471-
this->chunks[3] | mask
472-
);
473-
}
474-
475465
simdjson_really_inline uint64_t eq(const T m) const {
476466
const simd8<T> mask = simd8<T>::splat(m);
477467
return simd8x64<bool>(

src/haswell/dom_parser_implementation.cpp

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,72 @@ using namespace simd;
1414
struct json_character_block {
1515
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
1616
// ASCII white-space ('\r','\n','\t',' ')
17-
simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
17+
simdjson_really_inline uint64_t whitespace() const;
1818
// non-quote structural characters (comma, colon, braces, brackets)
19-
simdjson_really_inline uint64_t op() const { return _op; }
19+
simdjson_really_inline uint64_t op() const;
2020
// neither a structural character nor a white-space, so letters, numbers and quotes
21-
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
21+
simdjson_really_inline uint64_t scalar() const;
2222

2323
uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
2424
uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
2525
};
2626

27+
simdjson_really_inline uint64_t json_character_block::whitespace() const { return _whitespace; }
28+
simdjson_really_inline uint64_t json_character_block::op() const { return _op; }
29+
simdjson_really_inline uint64_t json_character_block::scalar() const { return ~(op() | whitespace()); }
30+
2731
// This identifies structural characters (comma, colon, braces, brackets),
2832
// and ASCII white-space ('\r','\n','\t',' ').
2933
simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
3034
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
3135
// we can't use the generic lookup_16.
32-
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
33-
auto op_table = simd8<uint8_t>::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
34-
35-
// We compute whitespace and op separately. If the code later only use one or the
36+
const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
37+
38+
// The 6 operators (:,[]{}) have these values:
39+
//
40+
// , 2C
41+
// : 3A
42+
// [ 5B
43+
// { 7B
44+
// ] 5D
45+
// } 7D
46+
//
47+
// If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
48+
// We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
49+
// match it (against | 0x20).
50+
//
51+
// To prevent recognizing other characters, everything else gets compared with 0, which cannot
52+
// match due to the | 0x20.
53+
//
54+
// NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
55+
// and :. This gets caught in stage 2, which checks the actual character to ensure the right
56+
// operators are in the right places.
57+
const auto op_table = simd8<uint8_t>::repeat_16(
58+
0, 0, 0, 0,
59+
0, 0, 0, 0,
60+
0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
61+
',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D
62+
);
63+
64+
// We compute whitespace and op separately. If later code only uses one or the
3665
// other, given the fact that all functions are aggressively inlined, we can
3766
// hope that useless computations will be omitted. This is namely case when
3867
// minifying (we only need whitespace).
3968

40-
uint64_t whitespace = simd8x64<bool>(
41-
in.chunks[0] == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, in.chunks[0])),
42-
in.chunks[1] == simd8<uint8_t>(_mm256_shuffle_epi8(whitespace_table, in.chunks[1]))
43-
).to_bitmask();
69+
const uint64_t whitespace = in.eq({
70+
_mm256_shuffle_epi8(whitespace_table, in.chunks[0]),
71+
_mm256_shuffle_epi8(whitespace_table, in.chunks[1])
72+
});
73+
// Turn [ and ] into { and }
74+
const simd8x64<uint8_t> curlified{
75+
in.chunks[0] | 0x20,
76+
in.chunks[1] | 0x20
77+
};
78+
const uint64_t op = curlified.eq({
79+
_mm256_shuffle_epi8(op_table, in.chunks[0]),
80+
_mm256_shuffle_epi8(op_table, in.chunks[1])
81+
});
4482

45-
uint64_t op = simd8x64<bool>(
46-
(in.chunks[0] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')),
47-
(in.chunks[1] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[1]-','))
48-
).to_bitmask();
4983
return { whitespace, op };
5084
}
5185

src/haswell/simd.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,13 @@ namespace simd {
337337
).to_bitmask();
338338
}
339339

340+
simdjson_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
341+
return simd8x64<bool>(
342+
this->chunks[0] == other.chunks[0],
343+
this->chunks[1] == other.chunks[1]
344+
).to_bitmask();
345+
}
346+
340347
simdjson_really_inline uint64_t lteq(const T m) const {
341348
const simd8<T> mask = simd8<T>::splat(m);
342349
return simd8x64<bool>(

src/westmere/dom_parser_implementation.cpp

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,28 +26,59 @@ simdjson_really_inline json_character_block json_character_block::classify(const
2626
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
2727
// we can't use the generic lookup_16.
2828
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
29-
auto op_table = simd8<uint8_t>::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
29+
30+
// The 6 operators (:,[]{}) have these values:
31+
//
32+
// , 2C
33+
// : 3A
34+
// [ 5B
35+
// { 7B
36+
// ] 5D
37+
// } 7D
38+
//
39+
// If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
40+
// We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
41+
// match it (against | 0x20).
42+
//
43+
// To prevent recognizing other characters, everything else gets compared with 0, which cannot
44+
// match due to the | 0x20.
45+
//
46+
// NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
47+
// and :. This gets caught in stage 2, which checks the actual character to ensure the right
48+
// operators are in the right places.
49+
const auto op_table = simd8<uint8_t>::repeat_16(
50+
0, 0, 0, 0,
51+
0, 0, 0, 0,
52+
0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
53+
',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D
54+
);
3055

3156
// We compute whitespace and op separately. If the code later only use one or the
3257
// other, given the fact that all functions are aggressively inlined, we can
3358
// hope that useless computations will be omitted. This is namely case when
3459
// minifying (we only need whitespace).
3560

36-
uint64_t whitespace = simd8x64<bool>(
37-
in.chunks[0] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[0])),
38-
in.chunks[1] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[1])),
39-
in.chunks[2] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[2])),
40-
in.chunks[3] == simd8<uint8_t>(_mm_shuffle_epi8(whitespace_table, in.chunks[3]))
41-
).to_bitmask();
42-
43-
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
44-
uint64_t op = simd8x64<bool>(
45-
(in.chunks[0] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[0]-',')),
46-
(in.chunks[1] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[1]-',')),
47-
(in.chunks[2] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[2]-',')),
48-
(in.chunks[3] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[3]-','))
49-
).to_bitmask();
50-
return { whitespace, op };
61+
62+
const uint64_t whitespace = in.eq({
63+
_mm_shuffle_epi8(whitespace_table, in.chunks[0]),
64+
_mm_shuffle_epi8(whitespace_table, in.chunks[1]),
65+
_mm_shuffle_epi8(whitespace_table, in.chunks[2]),
66+
_mm_shuffle_epi8(whitespace_table, in.chunks[3])
67+
});
68+
// Turn [ and ] into { and }
69+
const simd8x64<uint8_t> curlified{
70+
in.chunks[0] | 0x20,
71+
in.chunks[1] | 0x20,
72+
in.chunks[2] | 0x20,
73+
in.chunks[3] | 0x20
74+
};
75+
const uint64_t op = curlified.eq({
76+
_mm_shuffle_epi8(op_table, in.chunks[0]),
77+
_mm_shuffle_epi8(op_table, in.chunks[1]),
78+
_mm_shuffle_epi8(op_table, in.chunks[2]),
79+
_mm_shuffle_epi8(op_table, in.chunks[3])
80+
});
81+
return { whitespace, op };
5182
}
5283

5384
simdjson_really_inline bool is_ascii(const simd8x64<uint8_t>& input) {

src/westmere/simd.h

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -288,23 +288,13 @@ namespace simd {
288288
}
289289

290290
simdjson_really_inline uint64_t to_bitmask() const {
291-
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
292-
uint64_t r1 = this->chunks[1].to_bitmask();
293-
uint64_t r2 = this->chunks[2].to_bitmask();
294-
uint64_t r3 = this->chunks[3].to_bitmask();
291+
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() );
292+
uint64_t r1 = this->chunks[1].to_bitmask() ;
293+
uint64_t r2 = this->chunks[2].to_bitmask() ;
294+
uint64_t r3 = this->chunks[3].to_bitmask() ;
295295
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
296296
}
297-
298-
simdjson_really_inline simd8x64<T> bit_or(const T m) const {
299-
const simd8<T> mask = simd8<T>::splat(m);
300-
return simd8x64<T>(
301-
this->chunks[0] | mask,
302-
this->chunks[1] | mask,
303-
this->chunks[2] | mask,
304-
this->chunks[3] | mask
305-
);
306-
}
307-
297+
308298
simdjson_really_inline uint64_t eq(const T m) const {
309299
const simd8<T> mask = simd8<T>::splat(m);
310300
return simd8x64<bool>(
@@ -315,6 +305,15 @@ namespace simd {
315305
).to_bitmask();
316306
}
317307

308+
simdjson_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
309+
return simd8x64<bool>(
310+
this->chunks[0] == other.chunks[0],
311+
this->chunks[1] == other.chunks[1],
312+
this->chunks[2] == other.chunks[2],
313+
this->chunks[3] == other.chunks[3]
314+
).to_bitmask();
315+
}
316+
318317
simdjson_really_inline uint64_t lteq(const T m) const {
319318
const simd8<T> mask = simd8<T>::splat(m);
320319
return simd8x64<bool>(

0 commit comments

Comments
 (0)