@@ -106,19 +106,23 @@ const uint8 pg_number_of_ones[256] = {
106
106
static inline int pg_popcount32_slow (uint32 word );
107
107
static inline int pg_popcount64_slow (uint64 word );
108
108
static uint64 pg_popcount_slow (const char * buf , int bytes );
109
+ static uint64 pg_popcount_masked_slow (const char * buf , int bytes , bits8 mask );
109
110
110
111
#ifdef TRY_POPCNT_FAST
111
112
static bool pg_popcount_available (void );
112
113
static int pg_popcount32_choose (uint32 word );
113
114
static int pg_popcount64_choose (uint64 word );
114
115
static uint64 pg_popcount_choose (const char * buf , int bytes );
116
+ static uint64 pg_popcount_masked_choose (const char * buf , int bytes , bits8 mask );
115
117
static inline int pg_popcount32_fast (uint32 word );
116
118
static inline int pg_popcount64_fast (uint64 word );
117
119
static uint64 pg_popcount_fast (const char * buf , int bytes );
120
+ static uint64 pg_popcount_masked_fast (const char * buf , int bytes , bits8 mask );
118
121
119
122
int (* pg_popcount32 ) (uint32 word ) = pg_popcount32_choose ;
120
123
int (* pg_popcount64 ) (uint64 word ) = pg_popcount64_choose ;
121
124
uint64 (* pg_popcount_optimized ) (const char * buf , int bytes ) = pg_popcount_choose ;
125
+ uint64 (* pg_popcount_masked_optimized ) (const char * buf , int bytes , bits8 mask ) = pg_popcount_masked_choose ;
122
126
#endif /* TRY_POPCNT_FAST */
123
127
124
128
#ifdef TRY_POPCNT_FAST
@@ -156,17 +160,22 @@ choose_popcount_functions(void)
156
160
pg_popcount32 = pg_popcount32_fast ;
157
161
pg_popcount64 = pg_popcount64_fast ;
158
162
pg_popcount_optimized = pg_popcount_fast ;
163
+ pg_popcount_masked_optimized = pg_popcount_masked_fast ;
159
164
}
160
165
else
161
166
{
162
167
pg_popcount32 = pg_popcount32_slow ;
163
168
pg_popcount64 = pg_popcount64_slow ;
164
169
pg_popcount_optimized = pg_popcount_slow ;
170
+ pg_popcount_masked_optimized = pg_popcount_masked_slow ;
165
171
}
166
172
167
173
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
168
174
if (pg_popcount_avx512_available ())
175
+ {
169
176
pg_popcount_optimized = pg_popcount_avx512 ;
177
+ pg_popcount_masked_optimized = pg_popcount_masked_avx512 ;
178
+ }
170
179
#endif
171
180
}
172
181
@@ -191,6 +200,13 @@ pg_popcount_choose(const char *buf, int bytes)
191
200
return pg_popcount_optimized (buf , bytes );
192
201
}
193
202
203
+ static uint64
204
+ pg_popcount_masked_choose (const char * buf , int bytes , bits8 mask )
205
+ {
206
+ choose_popcount_functions ();
207
+ return pg_popcount_masked (buf , bytes , mask );
208
+ }
209
+
194
210
/*
195
211
* pg_popcount32_fast
196
212
* Return the number of 1 bits set in word
@@ -271,6 +287,56 @@ pg_popcount_fast(const char *buf, int bytes)
271
287
return popcnt ;
272
288
}
273
289
290
+ /*
291
+ * pg_popcount_masked_fast
292
+ * Returns the number of 1-bits in buf after applying the mask to each byte
293
+ */
294
+ static uint64
295
+ pg_popcount_masked_fast (const char * buf , int bytes , bits8 mask )
296
+ {
297
+ uint64 popcnt = 0 ;
298
+
299
+ #if SIZEOF_VOID_P >= 8
300
+ /* Process in 64-bit chunks if the buffer is aligned */
301
+ uint64 maskv = ~UINT64CONST (0 ) / 0xFF * mask ;
302
+
303
+ if (buf == (const char * ) TYPEALIGN (8 , buf ))
304
+ {
305
+ const uint64 * words = (const uint64 * ) buf ;
306
+
307
+ while (bytes >= 8 )
308
+ {
309
+ popcnt += pg_popcount64_fast (* words ++ & maskv );
310
+ bytes -= 8 ;
311
+ }
312
+
313
+ buf = (const char * ) words ;
314
+ }
315
+ #else
316
+ /* Process in 32-bit chunks if the buffer is aligned. */
317
+ uint32 maskv = ~((uint32 ) 0 ) / 0xFF * mask ;
318
+
319
+ if (buf == (const char * ) TYPEALIGN (4 , buf ))
320
+ {
321
+ const uint32 * words = (const uint32 * ) buf ;
322
+
323
+ while (bytes >= 4 )
324
+ {
325
+ popcnt += pg_popcount32_fast (* words ++ & maskv );
326
+ bytes -= 4 ;
327
+ }
328
+
329
+ buf = (const char * ) words ;
330
+ }
331
+ #endif
332
+
333
+ /* Process any remaining bytes */
334
+ while (bytes -- )
335
+ popcnt += pg_number_of_ones [(unsigned char ) * buf ++ & mask ];
336
+
337
+ return popcnt ;
338
+ }
339
+
274
340
#endif /* TRY_POPCNT_FAST */
275
341
276
342
@@ -370,6 +436,56 @@ pg_popcount_slow(const char *buf, int bytes)
370
436
return popcnt ;
371
437
}
372
438
439
+ /*
440
+ * pg_popcount_masked_slow
441
+ * Returns the number of 1-bits in buf after applying the mask to each byte
442
+ */
443
+ static uint64
444
+ pg_popcount_masked_slow (const char * buf , int bytes , bits8 mask )
445
+ {
446
+ uint64 popcnt = 0 ;
447
+
448
+ #if SIZEOF_VOID_P >= 8
449
+ /* Process in 64-bit chunks if the buffer is aligned */
450
+ uint64 maskv = ~UINT64CONST (0 ) / 0xFF * mask ;
451
+
452
+ if (buf == (const char * ) TYPEALIGN (8 , buf ))
453
+ {
454
+ const uint64 * words = (const uint64 * ) buf ;
455
+
456
+ while (bytes >= 8 )
457
+ {
458
+ popcnt += pg_popcount64_slow (* words ++ & maskv );
459
+ bytes -= 8 ;
460
+ }
461
+
462
+ buf = (const char * ) words ;
463
+ }
464
+ #else
465
+ /* Process in 32-bit chunks if the buffer is aligned. */
466
+ uint32 maskv = ~((uint32 ) 0 ) / 0xFF * mask ;
467
+
468
+ if (buf == (const char * ) TYPEALIGN (4 , buf ))
469
+ {
470
+ const uint32 * words = (const uint32 * ) buf ;
471
+
472
+ while (bytes >= 4 )
473
+ {
474
+ popcnt += pg_popcount32_slow (* words ++ & maskv );
475
+ bytes -= 4 ;
476
+ }
477
+
478
+ buf = (const char * ) words ;
479
+ }
480
+ #endif
481
+
482
+ /* Process any remaining bytes */
483
+ while (bytes -- )
484
+ popcnt += pg_number_of_ones [(unsigned char ) * buf ++ & mask ];
485
+
486
+ return popcnt ;
487
+ }
488
+
373
489
#ifndef TRY_POPCNT_FAST
374
490
375
491
/*
@@ -401,4 +517,14 @@ pg_popcount_optimized(const char *buf, int bytes)
401
517
return pg_popcount_slow (buf , bytes );
402
518
}
403
519
520
+ /*
521
+ * pg_popcount_masked_optimized
522
+ * Returns the number of 1-bits in buf after applying the mask to each byte
523
+ */
524
+ uint64
525
+ pg_popcount_masked_optimized (const char * buf , int bytes , bits8 mask )
526
+ {
527
+ return pg_popcount_masked_slow (buf , bytes , mask );
528
+ }
529
+
404
530
#endif /* !TRY_POPCNT_FAST */
0 commit comments