@@ -47,7 +47,8 @@ namespace simdjson::arm64::simd {
47
47
// SIMD byte mask type (returned by things like eq and gt)
48
48
template <>
49
49
struct simd8 <bool >: base_u8<bool > {
50
- typedef uint32_t bitmask_t ;
50
+ typedef uint16_t bitmask_t ;
51
+ typedef uint32_t bitmask2_t ;
51
52
52
53
static really_inline simd8<bool > splat (bool _value) { return vmovq_n_u8 (-(!!_value)); }
53
54
@@ -57,7 +58,9 @@ namespace simdjson::arm64::simd {
57
58
// Splat constructor
58
59
really_inline simd8 (bool _value) : simd8(splat(_value)) {}
59
60
60
- really_inline simd8<bool >::bitmask_t to_bitmask () const {
61
+ // We return uint32_t instead of uint16_t because that seems to be more efficient for most
62
+ // purposes (cutting it down to uint16_t costs performance in some compilers).
63
+ really_inline uint32_t to_bitmask () const {
61
64
const uint8x16_t bit_mask = {0x01 , 0x02 , 0x4 , 0x8 , 0x10 , 0x20 , 0x40 , 0x80 ,
62
65
0x01 , 0x02 , 0x4 , 0x8 , 0x10 , 0x20 , 0x40 , 0x80 };
63
66
auto minput = *this & bit_mask;
@@ -119,6 +122,8 @@ namespace simdjson::arm64::simd {
119
122
really_inline simd8<uint8_t > max (const simd8<uint8_t > other) const { return vmaxq_u8 (*this , other); }
120
123
really_inline simd8<uint8_t > min (const simd8<uint8_t > other) const { return vminq_u8 (*this , other); }
121
124
really_inline simd8<bool > operator <=(const simd8<uint8_t > other) const { return vcleq_u8 (*this , other); }
125
+ really_inline simd8<bool > operator >=(const simd8<uint8_t > other) const { return vcgeq_u8 (*this , other); }
126
+ really_inline simd8<bool > operator >(const simd8<uint8_t > other) const { return vcgtq_u8 (*this , other); }
122
127
123
128
// Bit-specific operations
124
129
really_inline simd8<bool > any_bits_set (simd8<uint8_t > bits) const { return vtstq_u8 (*this , bits); }
@@ -131,18 +136,21 @@ namespace simdjson::arm64::simd {
131
136
132
137
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
133
138
template <typename L>
139
+ really_inline simd8<L> lookup_16 (simd8<L> lookup_table) const {
140
+ return lookup_table.apply_lookup_16_to (*this );
141
+ }
142
+ template <typename L>
134
143
really_inline simd8<L> lookup_16 (
135
144
L replace0, L replace1, L replace2, L replace3,
136
145
L replace4, L replace5, L replace6, L replace7,
137
146
L replace8, L replace9, L replace10, L replace11,
138
147
L replace12, L replace13, L replace14, L replace15) const {
139
- simd8<L> lookup_table (
148
+ return lookup_16 ( simd8<L>:: repeat_16 (
140
149
replace0, replace1, replace2, replace3,
141
150
replace4, replace5, replace6, replace7,
142
151
replace8, replace9, replace10, replace11,
143
152
replace12, replace13, replace14, replace15
144
- );
145
- return lookup_table.apply_lookup_16_to (*this );
153
+ ));
146
154
}
147
155
148
156
template <typename T>
@@ -178,7 +186,7 @@ namespace simdjson::arm64::simd {
178
186
) : simd8(int8x16_t {
179
187
v0, v1, v2, v3, v4, v5, v6, v7,
180
188
v8, v9, v10,v11,v12,v13,v14,v15
181
- }) {}
189
+ }) {}
182
190
// Repeat 16 values as many times as necessary (usually for lookup tables)
183
191
really_inline static simd8<int8_t > repeat_16 (
184
192
int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
@@ -214,24 +222,28 @@ namespace simdjson::arm64::simd {
214
222
return vextq_s8 (prev_chunk, *this , 16 - N);
215
223
}
216
224
217
- // Perform a lookup of the lower 4 bits
225
+ // Perform a lookup assuming no value is larger than 16
226
+ template <typename L>
227
+ really_inline simd8<L> lookup_16 (simd8<L> lookup_table) const {
228
+ return lookup_table.apply_lookup_16_to (*this );
229
+ }
218
230
template <typename L>
219
231
really_inline simd8<L> lookup_16 (
220
232
L replace0, L replace1, L replace2, L replace3,
221
233
L replace4, L replace5, L replace6, L replace7,
222
234
L replace8, L replace9, L replace10, L replace11,
223
235
L replace12, L replace13, L replace14, L replace15) const {
224
- return simd8<uint8_t >(* this ). lookup_16 (
236
+ return lookup_16 ( simd8<L>:: repeat_16 (
225
237
replace0, replace1, replace2, replace3,
226
238
replace4, replace5, replace6, replace7,
227
239
replace8, replace9, replace10, replace11,
228
240
replace12, replace13, replace14, replace15
229
- );
241
+ )) ;
230
242
}
231
243
232
244
template <typename T>
233
245
really_inline simd8<int8_t > apply_lookup_16_to (const simd8<T> original) {
234
- return vqtbl1q_s8 (*this , original);
246
+ return vqtbl1q_s8 (*this , simd8< uint8_t >( original) );
235
247
}
236
248
};
237
249
0 commit comments