@@ -103,18 +103,22 @@ const uint8 pg_number_of_ones[256] = {
103
103
4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 5 , 6 , 6 , 7 , 6 , 7 , 7 , 8
104
104
};
105
105
106
- static int pg_popcount32_slow (uint32 word );
107
- static int pg_popcount64_slow (uint64 word );
106
+ static inline int pg_popcount32_slow (uint32 word );
107
+ static inline int pg_popcount64_slow (uint64 word );
108
+ static uint64 pg_popcount_slow (const char * buf , int bytes );
108
109
109
110
#ifdef TRY_POPCNT_FAST
110
111
static bool pg_popcount_available (void );
111
112
static int pg_popcount32_choose (uint32 word );
112
113
static int pg_popcount64_choose (uint64 word );
113
- static int pg_popcount32_fast (uint32 word );
114
- static int pg_popcount64_fast (uint64 word );
114
+ static uint64 pg_popcount_choose (const char * buf , int bytes );
115
+ static inline int pg_popcount32_fast (uint32 word );
116
+ static inline int pg_popcount64_fast (uint64 word );
117
+ static uint64 pg_popcount_fast (const char * buf , int bytes );
115
118
116
119
int (* pg_popcount32 ) (uint32 word ) = pg_popcount32_choose ;
117
120
int (* pg_popcount64 ) (uint64 word ) = pg_popcount64_choose ;
121
+ uint64 (* pg_popcount ) (const char * buf , int bytes ) = pg_popcount_choose ;
118
122
#endif /* TRY_POPCNT_FAST */
119
123
120
124
#ifdef TRY_POPCNT_FAST
@@ -151,11 +155,13 @@ pg_popcount32_choose(uint32 word)
151
155
{
152
156
pg_popcount32 = pg_popcount32_fast ;
153
157
pg_popcount64 = pg_popcount64_fast ;
158
+ pg_popcount = pg_popcount_fast ;
154
159
}
155
160
else
156
161
{
157
162
pg_popcount32 = pg_popcount32_slow ;
158
163
pg_popcount64 = pg_popcount64_slow ;
164
+ pg_popcount = pg_popcount_slow ;
159
165
}
160
166
161
167
return pg_popcount32 (word );
@@ -168,21 +174,42 @@ pg_popcount64_choose(uint64 word)
168
174
{
169
175
pg_popcount32 = pg_popcount32_fast ;
170
176
pg_popcount64 = pg_popcount64_fast ;
177
+ pg_popcount = pg_popcount_fast ;
171
178
}
172
179
else
173
180
{
174
181
pg_popcount32 = pg_popcount32_slow ;
175
182
pg_popcount64 = pg_popcount64_slow ;
183
+ pg_popcount = pg_popcount_slow ;
176
184
}
177
185
178
186
return pg_popcount64 (word );
179
187
}
180
188
189
+ static uint64
190
+ pg_popcount_choose (const char * buf , int bytes )
191
+ {
192
+ if (pg_popcount_available ())
193
+ {
194
+ pg_popcount32 = pg_popcount32_fast ;
195
+ pg_popcount64 = pg_popcount64_fast ;
196
+ pg_popcount = pg_popcount_fast ;
197
+ }
198
+ else
199
+ {
200
+ pg_popcount32 = pg_popcount32_slow ;
201
+ pg_popcount64 = pg_popcount64_slow ;
202
+ pg_popcount = pg_popcount_slow ;
203
+ }
204
+
205
+ return pg_popcount (buf , bytes );
206
+ }
207
+
181
208
/*
182
209
* pg_popcount32_fast
183
210
* Return the number of 1 bits set in word
184
211
*/
185
- static int
212
+ static inline int
186
213
pg_popcount32_fast (uint32 word )
187
214
{
188
215
#ifdef _MSC_VER
@@ -199,7 +226,7 @@ __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
199
226
* pg_popcount64_fast
200
227
* Return the number of 1 bits set in word
201
228
*/
202
- static int
229
+ static inline int
203
230
pg_popcount64_fast (uint64 word )
204
231
{
205
232
#ifdef _MSC_VER
@@ -212,14 +239,60 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
212
239
#endif
213
240
}
214
241
242
+ /*
243
+ * pg_popcount_fast
244
+ * Returns the number of 1-bits in buf
245
+ */
246
+ static uint64
247
+ pg_popcount_fast (const char * buf , int bytes )
248
+ {
249
+ uint64 popcnt = 0 ;
250
+
251
+ #if SIZEOF_VOID_P >= 8
252
+ /* Process in 64-bit chunks if the buffer is aligned. */
253
+ if (buf == (const char * ) TYPEALIGN (8 , buf ))
254
+ {
255
+ const uint64 * words = (const uint64 * ) buf ;
256
+
257
+ while (bytes >= 8 )
258
+ {
259
+ popcnt += pg_popcount64_fast (* words ++ );
260
+ bytes -= 8 ;
261
+ }
262
+
263
+ buf = (const char * ) words ;
264
+ }
265
+ #else
266
+ /* Process in 32-bit chunks if the buffer is aligned. */
267
+ if (buf == (const char * ) TYPEALIGN (4 , buf ))
268
+ {
269
+ const uint32 * words = (const uint32 * ) buf ;
270
+
271
+ while (bytes >= 4 )
272
+ {
273
+ popcnt += pg_popcount32_fast (* words ++ );
274
+ bytes -= 4 ;
275
+ }
276
+
277
+ buf = (const char * ) words ;
278
+ }
279
+ #endif
280
+
281
+ /* Process any remaining bytes */
282
+ while (bytes -- )
283
+ popcnt += pg_number_of_ones [(unsigned char ) * buf ++ ];
284
+
285
+ return popcnt ;
286
+ }
287
+
215
288
#endif /* TRY_POPCNT_FAST */
216
289
217
290
218
291
/*
219
292
* pg_popcount32_slow
220
293
* Return the number of 1 bits set in word
221
294
*/
222
- static int
295
+ static inline int
223
296
pg_popcount32_slow (uint32 word )
224
297
{
225
298
#ifdef HAVE__BUILTIN_POPCOUNT
@@ -241,7 +314,7 @@ pg_popcount32_slow(uint32 word)
241
314
* pg_popcount64_slow
242
315
* Return the number of 1 bits set in word
243
316
*/
244
- static int
317
+ static inline int
245
318
pg_popcount64_slow (uint64 word )
246
319
{
247
320
#ifdef HAVE__BUILTIN_POPCOUNT
@@ -265,35 +338,12 @@ pg_popcount64_slow(uint64 word)
265
338
#endif /* HAVE__BUILTIN_POPCOUNT */
266
339
}
267
340
268
- #ifndef TRY_POPCNT_FAST
269
-
270
341
/*
271
- * When the POPCNT instruction is not available, there's no point in using
272
- * function pointers to vary the implementation between the fast and slow
273
- * method. We instead just make these actual external functions when
274
- * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
275
- * the slow versions here.
276
- */
277
- int
278
- pg_popcount32 (uint32 word )
279
- {
280
- return pg_popcount32_slow (word );
281
- }
282
-
283
- int
284
- pg_popcount64 (uint64 word )
285
- {
286
- return pg_popcount64_slow (word );
287
- }
288
-
289
- #endif /* !TRY_POPCNT_FAST */
290
-
291
- /*
292
- * pg_popcount
342
+ * pg_popcount_slow
293
343
* Returns the number of 1-bits in buf
294
344
*/
295
- uint64
296
- pg_popcount (const char * buf , int bytes )
345
+ static uint64
346
+ pg_popcount_slow (const char * buf , int bytes )
297
347
{
298
348
uint64 popcnt = 0 ;
299
349
@@ -305,7 +355,7 @@ pg_popcount(const char *buf, int bytes)
305
355
306
356
while (bytes >= 8 )
307
357
{
308
- popcnt += pg_popcount64 (* words ++ );
358
+ popcnt += pg_popcount64_slow (* words ++ );
309
359
bytes -= 8 ;
310
360
}
311
361
@@ -319,7 +369,7 @@ pg_popcount(const char *buf, int bytes)
319
369
320
370
while (bytes >= 4 )
321
371
{
322
- popcnt += pg_popcount32 (* words ++ );
372
+ popcnt += pg_popcount32_slow (* words ++ );
323
373
bytes -= 4 ;
324
374
}
325
375
@@ -333,3 +383,36 @@ pg_popcount(const char *buf, int bytes)
333
383
334
384
return popcnt ;
335
385
}
386
+
387
+ #ifndef TRY_POPCNT_FAST
388
+
389
+ /*
390
+ * When the POPCNT instruction is not available, there's no point in using
391
+ * function pointers to vary the implementation between the fast and slow
392
+ * method. We instead just make these actual external functions when
393
+ * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
394
+ * the slow versions here.
395
+ */
396
+ int
397
+ pg_popcount32 (uint32 word )
398
+ {
399
+ return pg_popcount32_slow (word );
400
+ }
401
+
402
+ int
403
+ pg_popcount64 (uint64 word )
404
+ {
405
+ return pg_popcount64_slow (word );
406
+ }
407
+
408
+ /*
409
+ * pg_popcount
410
+ * Returns the number of 1-bits in buf
411
+ */
412
+ uint64
413
+ pg_popcount (const char * buf , int bytes )
414
+ {
415
+ return pg_popcount_slow (buf , bytes );
416
+ }
417
+
418
+ #endif /* !TRY_POPCNT_FAST */
0 commit comments