31
31
#include <emmintrin.h>
32
32
#define USE_SSE2
33
33
typedef __m128i Vector8 ;
34
+ typedef __m128i Vector32 ;
34
35
35
36
#else
36
37
/*
37
38
* If no SIMD instructions are available, we can in some cases emulate vector
38
- * operations using bitwise operations on unsigned integers.
39
+ * operations using bitwise operations on unsigned integers. Note that many
40
+ * of the functions in this file presently do not have non-SIMD
41
+ * implementations. In particular, none of the functions involving Vector32
42
+ * are implemented without SIMD since it's likely not worthwhile to represent
43
+ * two 32-bit integers using a uint64.
39
44
*/
40
45
#define USE_NO_SIMD
41
46
typedef uint64 Vector8 ;
42
47
#endif
43
48
44
-
45
49
/* load/store operations */
46
50
static inline void vector8_load (Vector8 * v , const uint8 * s );
51
+ #ifndef USE_NO_SIMD
52
+ static inline void vector32_load (Vector32 * v , const uint32 * s );
53
+ #endif
47
54
48
55
/* assignment operations */
49
56
static inline Vector8 vector8_broadcast (const uint8 c );
57
+ #ifndef USE_NO_SIMD
58
+ static inline Vector32 vector32_broadcast (const uint32 c );
59
+ #endif
50
60
51
61
/* element-wise comparisons to a scalar */
52
62
static inline bool vector8_has (const Vector8 v , const uint8 c );
@@ -56,14 +66,21 @@ static inline bool vector8_is_highbit_set(const Vector8 v);
56
66
57
67
/* arithmetic operations */
58
68
static inline Vector8 vector8_or (const Vector8 v1 , const Vector8 v2 );
59
-
60
- /* Different semantics for SIMD architectures. */
61
69
#ifndef USE_NO_SIMD
70
+ static inline Vector32 vector32_or (const Vector32 v1 , const Vector32 v2 );
71
+ static inline Vector8 vector8_ssub (const Vector8 v1 , const Vector8 v2 );
72
+ #endif
62
73
63
- /* comparisons between vectors */
74
+ /*
75
+ * comparisons between vectors
76
+ *
77
+ * Note: These return a vector rather than booloan, which is why we don't
78
+ * have non-SIMD implementations.
79
+ */
80
+ #ifndef USE_NO_SIMD
64
81
static inline Vector8 vector8_eq (const Vector8 v1 , const Vector8 v2 );
65
-
66
- #endif /* ! USE_NO_SIMD */
82
+ static inline Vector32 vector32_eq ( const Vector32 v1 , const Vector32 v2 );
83
+ #endif
67
84
68
85
/*
69
86
* Load a chunk of memory into the given vector.
@@ -78,6 +95,15 @@ vector8_load(Vector8 *v, const uint8 *s)
78
95
#endif
79
96
}
80
97
98
+ #ifndef USE_NO_SIMD
99
+ static inline void
100
+ vector32_load (Vector32 * v , const uint32 * s )
101
+ {
102
+ #ifdef USE_SSE2
103
+ * v = _mm_loadu_si128 ((const __m128i * ) s );
104
+ #endif
105
+ }
106
+ #endif /* ! USE_NO_SIMD */
81
107
82
108
/*
83
109
* Create a vector with all elements set to the same value.
@@ -92,6 +118,16 @@ vector8_broadcast(const uint8 c)
92
118
#endif
93
119
}
94
120
121
+ #ifndef USE_NO_SIMD
122
+ static inline Vector32
123
+ vector32_broadcast (const uint32 c )
124
+ {
125
+ #ifdef USE_SSE2
126
+ return _mm_set1_epi32 (c );
127
+ #endif
128
+ }
129
+ #endif /* ! USE_NO_SIMD */
130
+
95
131
/*
96
132
* Return true if any elements in the vector are equal to the given scalar.
97
133
*/
@@ -118,7 +154,7 @@ vector8_has(const Vector8 v, const uint8 c)
118
154
/* any bytes in v equal to c will evaluate to zero via XOR */
119
155
result = vector8_has_zero (v ^ vector8_broadcast (c ));
120
156
#elif defined(USE_SSE2 )
121
- result = _mm_movemask_epi8 ( _mm_cmpeq_epi8 (v , vector8_broadcast (c )));
157
+ result = vector8_is_highbit_set ( vector8_eq (v , vector8_broadcast (c )));
122
158
#endif
123
159
124
160
Assert (assert_result == result );
@@ -133,8 +169,8 @@ vector8_has_zero(const Vector8 v)
133
169
{
134
170
#if defined(USE_NO_SIMD )
135
171
/*
136
- * We cannot call vector8_has() here, because that would lead to a circular
137
- * definition.
172
+ * We cannot call vector8_has() here, because that would lead to a
173
+ * circular definition.
138
174
*/
139
175
return vector8_has_le (v , 0 );
140
176
#elif defined(USE_SSE2 )
@@ -150,9 +186,6 @@ static inline bool
150
186
vector8_has_le (const Vector8 v , const uint8 c )
151
187
{
152
188
bool result = false;
153
- #if defined(USE_SSE2 )
154
- __m128i sub ;
155
- #endif
156
189
157
190
/* pre-compute the result for assert checking */
158
191
#ifdef USE_ASSERT_CHECKING
@@ -194,10 +227,10 @@ vector8_has_le(const Vector8 v, const uint8 c)
194
227
195
228
/*
196
229
* Use saturating subtraction to find bytes <= c, which will present as
197
- * NUL bytes in 'sub'.
230
+ * NUL bytes. This approach is a workaround for the lack of unsigned
231
+ * comparison instructions on some architectures.
198
232
*/
199
- sub = _mm_subs_epu8 (v , vector8_broadcast (c ));
200
- result = vector8_has_zero (sub );
233
+ result = vector8_has_zero (vector8_ssub (v , vector8_broadcast (c )));
201
234
#endif
202
235
203
236
Assert (assert_result == result );
@@ -230,22 +263,54 @@ vector8_or(const Vector8 v1, const Vector8 v2)
230
263
#endif
231
264
}
232
265
266
+ #ifndef USE_NO_SIMD
267
+ static inline Vector32
268
+ vector32_or (const Vector32 v1 , const Vector32 v2 )
269
+ {
270
+ #ifdef USE_SSE2
271
+ return _mm_or_si128 (v1 , v2 );
272
+ #endif
273
+ }
274
+ #endif /* ! USE_NO_SIMD */
233
275
234
- /* Different semantics for SIMD architectures. */
276
+ /*
277
+ * Return the result of subtracting the respective elements of the input
278
+ * vectors using saturation (i.e., if the operation would yield a value less
279
+ * than zero, zero is returned instead). For more information on saturation
280
+ * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic
281
+ */
235
282
#ifndef USE_NO_SIMD
283
+ static inline Vector8
284
+ vector8_ssub (const Vector8 v1 , const Vector8 v2 )
285
+ {
286
+ #ifdef USE_SSE2
287
+ return _mm_subs_epu8 (v1 , v2 );
288
+ #endif
289
+ }
290
+ #endif /* ! USE_NO_SIMD */
236
291
237
292
/*
238
293
* Return a vector with all bits set in each lane where the the corresponding
239
294
* lanes in the inputs are equal.
240
295
*/
296
+ #ifndef USE_NO_SIMD
241
297
static inline Vector8
242
298
vector8_eq (const Vector8 v1 , const Vector8 v2 )
243
299
{
244
300
#ifdef USE_SSE2
245
301
return _mm_cmpeq_epi8 (v1 , v2 );
246
302
#endif
247
303
}
304
+ #endif /* ! USE_NO_SIMD */
248
305
306
+ #ifndef USE_NO_SIMD
307
+ static inline Vector32
308
+ vector32_eq (const Vector32 v1 , const Vector32 v2 )
309
+ {
310
+ #ifdef USE_SSE2
311
+ return _mm_cmpeq_epi32 (v1 , v2 );
312
+ #endif
313
+ }
249
314
#endif /* ! USE_NO_SIMD */
250
315
251
316
#endif /* SIMD_H */
0 commit comments