@@ -4269,156 +4269,6 @@ static const uchar popCountTable4[] =
4269
4269
1 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 1 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2
4270
4270
};
4271
4271
4272
- #if CV_AVX2
4273
- static inline int _mm256_extract_epi32_ (__m256i reg, const int i)
4274
- {
4275
- CV_DECL_ALIGNED (32 ) int reg_data[8 ];
4276
- CV_DbgAssert (0 <= i && i < 8 );
4277
- _mm256_store_si256 ((__m256i*)reg_data, reg);
4278
- return reg_data[i];
4279
- }
4280
- #endif
4281
-
4282
- int normHamming (const uchar* a, int n)
4283
- {
4284
- int i = 0 ;
4285
- int result = 0 ;
4286
- #if CV_AVX2
4287
- if (USE_AVX2)
4288
- {
4289
- __m256i _r0 = _mm256_setzero_si256 ();
4290
- __m256i _0 = _mm256_setzero_si256 ();
4291
- __m256i _popcnt_table = _mm256_setr_epi8 (0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 ,
4292
- 0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 );
4293
- __m256i _popcnt_mask = _mm256_set1_epi8 (0x0F );
4294
-
4295
- for (; i <= n - 32 ; i+= 32 )
4296
- {
4297
- __m256i _a0 = _mm256_loadu_si256 ((const __m256i*)(a + i));
4298
-
4299
- __m256i _popc0 = _mm256_shuffle_epi8 (_popcnt_table, _mm256_and_si256 (_a0, _popcnt_mask));
4300
- __m256i _popc1 = _mm256_shuffle_epi8 (_popcnt_table,
4301
- _mm256_and_si256 (_mm256_srli_epi16 (_a0, 4 ), _popcnt_mask));
4302
-
4303
- _r0 = _mm256_add_epi32 (_r0, _mm256_sad_epu8 (_0, _mm256_add_epi8 (_popc0, _popc1)));
4304
- }
4305
- _r0 = _mm256_add_epi32 (_r0, _mm256_shuffle_epi32 (_r0, 2 ));
4306
- result = _mm256_extract_epi32_ (_mm256_add_epi32 (_r0, _mm256_permute2x128_si256 (_r0, _r0, 1 )), 0 );
4307
- }
4308
- #endif // CV_AVX2
4309
-
4310
- #if CV_POPCNT
4311
- if (checkHardwareSupport (CV_CPU_POPCNT))
4312
- {
4313
- # if defined CV_POPCNT_U64
4314
- for (; i <= n - 8 ; i += 8 )
4315
- {
4316
- result += (int )CV_POPCNT_U64 (*(uint64*)(a + i));
4317
- }
4318
- # endif
4319
- for (; i <= n - 4 ; i += 4 )
4320
- {
4321
- result += CV_POPCNT_U32 (*(uint*)(a + i));
4322
- }
4323
- }
4324
- #endif // CV_POPCNT
4325
-
4326
- #if CV_SIMD128
4327
- if (hasSIMD128 ())
4328
- {
4329
- v_uint32x4 t = v_setzero_u32 ();
4330
- for (; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes)
4331
- {
4332
- t += v_popcount (v_load (a + i));
4333
- }
4334
- result += v_reduce_sum (t);
4335
- }
4336
- #endif // CV_SIMD128
4337
- #if CV_ENABLE_UNROLLED
4338
- for (; i <= n - 4 ; i += 4 )
4339
- {
4340
- result += popCountTable[a[i]] + popCountTable[a[i+1 ]] +
4341
- popCountTable[a[i+2 ]] + popCountTable[a[i+3 ]];
4342
- }
4343
- #endif
4344
- for (; i < n; i++)
4345
- {
4346
- result += popCountTable[a[i]];
4347
- }
4348
- return result;
4349
- }
4350
-
4351
- int normHamming (const uchar* a, const uchar* b, int n)
4352
- {
4353
- int i = 0 ;
4354
- int result = 0 ;
4355
- #if CV_AVX2
4356
- if (USE_AVX2)
4357
- {
4358
- __m256i _r0 = _mm256_setzero_si256 ();
4359
- __m256i _0 = _mm256_setzero_si256 ();
4360
- __m256i _popcnt_table = _mm256_setr_epi8 (0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 ,
4361
- 0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 );
4362
- __m256i _popcnt_mask = _mm256_set1_epi8 (0x0F );
4363
-
4364
- for (; i <= n - 32 ; i+= 32 )
4365
- {
4366
- __m256i _a0 = _mm256_loadu_si256 ((const __m256i*)(a + i));
4367
- __m256i _b0 = _mm256_loadu_si256 ((const __m256i*)(b + i));
4368
-
4369
- __m256i _xor = _mm256_xor_si256 (_a0, _b0);
4370
-
4371
- __m256i _popc0 = _mm256_shuffle_epi8 (_popcnt_table, _mm256_and_si256 (_xor, _popcnt_mask));
4372
- __m256i _popc1 = _mm256_shuffle_epi8 (_popcnt_table,
4373
- _mm256_and_si256 (_mm256_srli_epi16 (_xor, 4 ), _popcnt_mask));
4374
-
4375
- _r0 = _mm256_add_epi32 (_r0, _mm256_sad_epu8 (_0, _mm256_add_epi8 (_popc0, _popc1)));
4376
- }
4377
- _r0 = _mm256_add_epi32 (_r0, _mm256_shuffle_epi32 (_r0, 2 ));
4378
- result = _mm256_extract_epi32_ (_mm256_add_epi32 (_r0, _mm256_permute2x128_si256 (_r0, _r0, 1 )), 0 );
4379
- }
4380
- #endif // CV_AVX2
4381
-
4382
- #if CV_POPCNT
4383
- if (checkHardwareSupport (CV_CPU_POPCNT))
4384
- {
4385
- # if defined CV_POPCNT_U64
4386
- for (; i <= n - 8 ; i += 8 )
4387
- {
4388
- result += (int )CV_POPCNT_U64 (*(uint64*)(a + i) ^ *(uint64*)(b + i));
4389
- }
4390
- # endif
4391
- for (; i <= n - 4 ; i += 4 )
4392
- {
4393
- result += CV_POPCNT_U32 (*(uint*)(a + i) ^ *(uint*)(b + i));
4394
- }
4395
- }
4396
- #endif // CV_POPCNT
4397
-
4398
- #if CV_SIMD128
4399
- if (hasSIMD128 ())
4400
- {
4401
- v_uint32x4 t = v_setzero_u32 ();
4402
- for (; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes)
4403
- {
4404
- t += v_popcount (v_load (a + i) ^ v_load (b + i));
4405
- }
4406
- result += v_reduce_sum (t);
4407
- }
4408
- #endif // CV_SIMD128
4409
- #if CV_ENABLE_UNROLLED
4410
- for (; i <= n - 4 ; i += 4 )
4411
- {
4412
- result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1 ] ^ b[i+1 ]] +
4413
- popCountTable[a[i+2 ] ^ b[i+2 ]] + popCountTable[a[i+3 ] ^ b[i+3 ]];
4414
- }
4415
- #endif
4416
- for (; i < n; i++)
4417
- {
4418
- result += popCountTable[a[i] ^ b[i]];
4419
- }
4420
- return result;
4421
- }
4422
4272
4423
4273
int normHamming (const uchar* a, int n, int cellSize)
4424
4274
{
0 commit comments