@@ -25,77 +25,60 @@ static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTUR
25
25
uint64_t &whitespace, uint64_t &structurals) {
26
26
27
27
#ifdef SIMDJSON_NAIVE_STRUCTURAL
28
- // You should never need this naive approach, but it can be useful
29
- // for research purposes
30
- const __m256i mask_open_brace = _mm256_set1_epi8 (0x7b );
31
- __m256i struct_lo = _mm256_cmpeq_epi8 (in.lo , mask_open_brace);
32
- __m256i struct_hi = _mm256_cmpeq_epi8 (in.hi , mask_open_brace);
33
- const __m256i mask_close_brace = _mm256_set1_epi8 (0x7d );
34
- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_close_brace));
35
- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_close_brace));
36
- const __m256i mask_open_bracket = _mm256_set1_epi8 (0x5b );
37
- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_open_bracket));
38
- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_open_bracket));
39
- const __m256i mask_close_bracket = _mm256_set1_epi8 (0x5d );
40
- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_close_bracket));
41
- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_close_bracket));
42
- const __m256i mask_column = _mm256_set1_epi8 (0x3a );
43
- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_column));
44
- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_column));
45
- const __m256i mask_comma = _mm256_set1_epi8 (0x2c );
46
- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_comma));
47
- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_comma));
48
- uint64_t structural_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (struct_lo));
49
- uint64_t structural_res_1 = _mm256_movemask_epi8 (struct_hi);
50
- structurals = (structural_res_0 | (structural_res_1 << 32 ));
51
-
52
- const __m256i mask_space = _mm256_set1_epi8 (0x20 );
53
- __m256i space_lo = _mm256_cmpeq_epi8 (in.lo , mask_space);
54
- __m256i space_hi = _mm256_cmpeq_epi8 (in.hi , mask_space);
55
- const __m256i mask_linefeed = _mm256_set1_epi8 (0x0a );
56
- space_lo = _mm256_or_si256 (space_lo, _mm256_cmpeq_epi8 (in.lo , mask_linefeed));
57
- space_hi = _mm256_or_si256 (space_hi, _mm256_cmpeq_epi8 (in.hi , mask_linefeed));
58
- const __m256i mask_tab = _mm256_set1_epi8 (0x09 );
59
- space_lo = _mm256_or_si256 (space_lo, _mm256_cmpeq_epi8 (in.lo , mask_tab));
60
- space_hi = _mm256_or_si256 (space_hi, _mm256_cmpeq_epi8 (in.hi , mask_tab));
61
- const __m256i mask_carriage = _mm256_set1_epi8 (0x0d );
62
- space_lo = _mm256_or_si256 (space_lo, _mm256_cmpeq_epi8 (in.lo , mask_carriage));
63
- space_hi = _mm256_or_si256 (space_hi, _mm256_cmpeq_epi8 (in.hi , mask_carriage));
64
-
65
- uint64_t ws_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (space_lo));
66
- uint64_t ws_res_1 = _mm256_movemask_epi8 (space_hi);
67
- whitespace = (ws_res_0 | (ws_res_1 << 32 ));
68
- // end of naive approach
28
+
29
+ // You should never need this naive approach, but it can be useful
30
+ // for research purposes
31
+ const __m256i mask_open_brace = _mm256_set1_epi8 (0x7b );
32
+ const __m256i mask_close_brace = _mm256_set1_epi8 (0x7d );
33
+ const __m256i mask_open_bracket = _mm256_set1_epi8 (0x5b );
34
+ const __m256i mask_close_bracket = _mm256_set1_epi8 (0x5d );
35
+ const __m256i mask_column = _mm256_set1_epi8 (0x3a );
36
+ const __m256i mask_comma = _mm256_set1_epi8 (0x2c );
37
+ structurals = in->build_bitmask ([&](auto in) {
38
+ __m256i structurals = _mm256_cmpeq_epi8 (in, mask_open_brace);
39
+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_close_brace));
40
+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_open_bracket));
41
+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_close_bracket));
42
+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_column));
43
+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_comma));
44
+ return structurals;
45
+ });
46
+
47
+ const __m256i mask_space = _mm256_set1_epi8 (0x20 );
48
+ const __m256i mask_linefeed = _mm256_set1_epi8 (0x0a );
49
+ const __m256i mask_tab = _mm256_set1_epi8 (0x09 );
50
+ const __m256i mask_carriage = _mm256_set1_epi8 (0x0d );
51
+ whitespace = in->build_bitmask ([&](auto in) {
52
+ __m256i space = _mm256_cmpeq_epi8 (in, mask_space);
53
+ space = _mm256_or_si256 (space, _mm256_cmpeq_epi8 (in, mask_linefeed));
54
+ space = _mm256_or_si256 (space, _mm256_cmpeq_epi8 (in, mask_tab));
55
+ space = _mm256_or_si256 (space, _mm256_cmpeq_epi8 (in, mask_carriage));
56
+ });
57
+ // end of naive approach
69
58
70
59
#else // SIMDJSON_NAIVE_STRUCTURAL
71
- // clang-format off
72
- const __m256i structural_table =
73
- _mm256_setr_epi8 (44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 ,
74
- 44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 );
75
- const __m256i white_table = _mm256_setr_epi8 (
76
- 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 ,
77
- 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 );
78
- // clang-format on
79
- const __m256i struct_offset = _mm256_set1_epi8 (0xd4u );
80
- const __m256i struct_mask = _mm256_set1_epi8 (32 );
81
-
82
- __m256i lo_white = _mm256_cmpeq_epi8 (in.lo , _mm256_shuffle_epi8 (white_table, in.lo ));
83
- __m256i hi_white = _mm256_cmpeq_epi8 (in.hi , _mm256_shuffle_epi8 (white_table, in.hi ));
84
- uint64_t ws_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (lo_white));
85
- uint64_t ws_res_1 = _mm256_movemask_epi8 (hi_white);
86
- whitespace = (ws_res_0 | (ws_res_1 << 32 ));
87
- __m256i lo_struct_r1 = _mm256_add_epi8 (struct_offset, in.lo );
88
- __m256i hi_struct_r1 = _mm256_add_epi8 (struct_offset, in.hi );
89
- __m256i lo_struct_r2 = _mm256_or_si256 (in.lo , struct_mask);
90
- __m256i hi_struct_r2 = _mm256_or_si256 (in.hi , struct_mask);
91
- __m256i lo_struct_r3 = _mm256_shuffle_epi8 (structural_table, lo_struct_r1);
92
- __m256i hi_struct_r3 = _mm256_shuffle_epi8 (structural_table, hi_struct_r1);
93
- __m256i lo_struct = _mm256_cmpeq_epi8 (lo_struct_r2, lo_struct_r3);
94
- __m256i hi_struct = _mm256_cmpeq_epi8 (hi_struct_r2, hi_struct_r3);
95
-
96
- uint64_t structural_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (lo_struct));
97
- uint64_t structural_res_1 = _mm256_movemask_epi8 (hi_struct);
98
- structurals = (structural_res_0 | (structural_res_1 << 32 ));
60
+
61
+ // clang-format off
62
+ const __m256i structural_table =
63
+ _mm256_setr_epi8 (44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 ,
64
+ 44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 );
65
+ const __m256i white_table = _mm256_setr_epi8 (
66
+ 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 ,
67
+ 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 );
68
+ // clang-format on
69
+ const __m256i struct_offset = _mm256_set1_epi8 (0xd4u );
70
+ const __m256i struct_mask = _mm256_set1_epi8 (32 );
71
+
72
+ whitespace = in.build_bitmask ([&](auto chunk) {
73
+ return _mm256_cmpeq_epi8 (chunk, _mm256_shuffle_epi8 (white_table, chunk));
74
+ });
75
+ structurals = in.build_bitmask ([&](auto chunk) {
76
+ __m256i struct_r1 = _mm256_add_epi8 (struct_offset, chunk);
77
+ __m256i struct_r2 = _mm256_or_si256 (chunk, struct_mask);
78
+ __m256i struct_r3 = _mm256_shuffle_epi8 (structural_table, struct_r1);
79
+ return _mm256_cmpeq_epi8 (struct_r2, struct_r3);
80
+ });
81
+
99
82
#endif // else SIMDJSON_NAIVE_STRUCTURAL
100
83
}
101
84
0 commit comments