Skip to content

Commit 0925f71

Browse files
committed
Simplify operator classification lookup on Intel
1 parent 4c11652 commit 0925f71

File tree

2 files changed

+66
-12
lines changed

2 files changed

+66
-12
lines changed

src/haswell/dom_parser_implementation.cpp

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,54 @@ using namespace simd;
1414
struct json_character_block {
1515
static simdjson_really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
1616
// ASCII white-space ('\r','\n','\t',' ')
17-
simdjson_really_inline uint64_t whitespace() const { return _whitespace; }
17+
simdjson_really_inline uint64_t whitespace() const;
1818
// non-quote structural characters (comma, colon, braces, brackets)
19-
simdjson_really_inline uint64_t op() const { return _op; }
19+
simdjson_really_inline uint64_t op() const;
2020
// neither a structural character nor a white-space, so letters, numbers and quotes
21-
simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); }
21+
simdjson_really_inline uint64_t scalar() const;
2222

2323
uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
2424
uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
2525
};
2626

27+
simdjson_really_inline uint64_t json_character_block::whitespace() const { return _whitespace; }
28+
simdjson_really_inline uint64_t json_character_block::op() const { return _op; }
29+
simdjson_really_inline uint64_t json_character_block::scalar() const { return ~(op() | whitespace()); }
30+
2731
// This identifies structural characters (comma, colon, braces, brackets),
2832
// and ASCII white-space ('\r','\n','\t',' ').
2933
simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
3034
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
3135
// we can't use the generic lookup_16.
3236
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
33-
auto op_table = simd8<uint8_t>::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
3437

35-
// We compute whitespace and op separately. If the code later only use one or the
38+
// The 6 operators (:,[]{}) have these values:
39+
//
40+
// , 2C
41+
// : 3A
42+
// [ 5B
43+
// { 7B
44+
// ] 5D
45+
// } 7D
46+
//
47+
// If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
48+
// We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
49+
// match it (against | 0x20).
50+
//
51+
// To prevent recognizing other characters, everything else gets compared with 0, which cannot
52+
// match due to the | 0x20.
53+
//
54+
// NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
55+
// and :. This gets caught in stage 2, which checks the actual character to ensure the right
56+
// operators are in the right places.
57+
auto op_table = simd8<uint8_t>::repeat_16(
58+
0, 0, 0, 0,
59+
0, 0, 0, 0,
60+
0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
61+
',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D
62+
);
63+
64+
// We compute whitespace and op separately. If later code only uses one or the
3665
// other, given the fact that all functions are aggressively inlined, we can
3766
// hope that useless computations will be omitted. This is namely case when
3867
// minifying (we only need whitespace).
@@ -43,8 +72,8 @@ simdjson_really_inline json_character_block json_character_block::classify(const
4372
).to_bitmask();
4473

4574
uint64_t op = simd8x64<bool>(
46-
(in.chunks[0] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')),
47-
(in.chunks[1] | 32) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[1]-','))
75+
(in.chunks[0] | 0x20) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[0])),
76+
(in.chunks[1] | 0x20) == simd8<uint8_t>(_mm256_shuffle_epi8(op_table, in.chunks[1]))
4877
).to_bitmask();
4978
return { whitespace, op };
5079
}

src/westmere/dom_parser_implementation.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,32 @@ simdjson_really_inline json_character_block json_character_block::classify(const
2626
// These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
2727
// we can't use the generic lookup_16.
2828
auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
29-
auto op_table = simd8<uint8_t>::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{');
29+
30+
// The 6 operators (:,[]{}) have these values:
31+
//
32+
// , 2C
33+
// : 3A
34+
// [ 5B
35+
// { 7B
36+
// ] 5D
37+
// } 7D
38+
//
39+
// If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
40+
// We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
41+
// match it (against | 0x20).
42+
//
43+
// To prevent recognizing other characters, everything else gets compared with 0, which cannot
44+
// match due to the | 0x20.
45+
//
46+
// NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
47+
// and :. This gets caught in stage 2, which checks the actual character to ensure the right
48+
// operators are in the right places.
49+
const auto op_table = simd8<uint8_t>::repeat_16(
50+
0, 0, 0, 0,
51+
0, 0, 0, 0,
52+
0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
53+
',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D
54+
);
3055

3156
// We compute whitespace and op separately. If the code later only use one or the
3257
// other, given the fact that all functions are aggressively inlined, we can
@@ -42,10 +67,10 @@ simdjson_really_inline json_character_block json_character_block::classify(const
4267

4368
// | 32 handles the fact that { } and [ ] are exactly 32 bytes apart
4469
uint64_t op = simd8x64<bool>(
45-
(in.chunks[0] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[0]-',')),
46-
(in.chunks[1] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[1]-',')),
47-
(in.chunks[2] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[2]-',')),
48-
(in.chunks[3] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[3]-','))
70+
(in.chunks[0] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[0])),
71+
(in.chunks[1] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[1])),
72+
(in.chunks[2] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[2])),
73+
(in.chunks[3] | 32) == simd8<uint8_t>(_mm_shuffle_epi8(op_table, in.chunks[3]))
4974
).to_bitmask();
5075
return { whitespace, op };
5176
}

0 commit comments

Comments
 (0)