|
53 | 53 | namespace cv
|
54 | 54 | {
|
55 | 55 |
|
56 |
| -template<typename T> static inline Scalar rawToScalar(const T& v) |
57 |
| -{ |
58 |
| - Scalar s; |
59 |
| - typedef typename DataType<T>::channel_type T1; |
60 |
| - int i, n = DataType<T>::channels; |
61 |
| - for( i = 0; i < n; i++ ) |
62 |
| - s.val[i] = ((T1*)&v)[i]; |
63 |
| - return s; |
64 |
| -} |
65 |
| - |
66 | 56 | /****************************************************************************************\
|
67 | 57 | * sum *
|
68 | 58 | \****************************************************************************************/
|
@@ -4249,7 +4239,7 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
|
4249 | 4239 |
|
4250 | 4240 | namespace cv { namespace hal {
|
4251 | 4241 |
|
4252 |
| -static const uchar popCountTable[] = |
| 4242 | +extern const uchar popCountTable[256] = |
4253 | 4243 | {
|
4254 | 4244 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
4255 | 4245 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
@@ -4285,154 +4275,6 @@ static const uchar popCountTable4[] =
|
4285 | 4275 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
4286 | 4276 | };
|
4287 | 4277 |
|
4288 |
| -#if CV_AVX2 |
4289 |
| -static inline int _mm256_extract_epi32_(__m256i reg, const int i) |
4290 |
| -{ |
4291 |
| - CV_DECL_ALIGNED(32) int reg_data[8]; |
4292 |
| - CV_DbgAssert(0 <= i && i < 8); |
4293 |
| - _mm256_store_si256((__m256i*)reg_data, reg); |
4294 |
| - return reg_data[i]; |
4295 |
| -} |
4296 |
| -#endif |
4297 |
| - |
4298 |
| -int normHamming(const uchar* a, int n) |
4299 |
| -{ |
4300 |
| - int i = 0; |
4301 |
| - int result = 0; |
4302 |
| -#if CV_AVX2 |
4303 |
| - if(USE_AVX2) |
4304 |
| - { |
4305 |
| - __m256i _r0 = _mm256_setzero_si256(); |
4306 |
| - __m256i _0 = _mm256_setzero_si256(); |
4307 |
| - __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
4308 |
| - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); |
4309 |
| - __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); |
4310 |
| - |
4311 |
| - for(; i <= n - 32; i+= 32) |
4312 |
| - { |
4313 |
| - __m256i _a0 = _mm256_loadu_si256((const __m256i*)(a + i)); |
4314 |
| - |
4315 |
| - __m256i _popc0 = _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_a0, _popcnt_mask)); |
4316 |
| - __m256i _popc1 = _mm256_shuffle_epi8(_popcnt_table, |
4317 |
| - _mm256_and_si256(_mm256_srli_epi16(_a0, 4), _popcnt_mask)); |
4318 |
| - |
4319 |
| - _r0 = _mm256_add_epi32(_r0, _mm256_sad_epu8(_0, _mm256_add_epi8(_popc0, _popc1))); |
4320 |
| - } |
4321 |
| - _r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2)); |
4322 |
| - result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0); |
4323 |
| - } |
4324 |
| -#endif // CV_AVX2 |
4325 |
| - |
4326 |
| -#if CV_POPCNT |
4327 |
| - if(checkHardwareSupport(CV_CPU_POPCNT)) |
4328 |
| - { |
4329 |
| -# if defined CV_POPCNT_U64 |
4330 |
| - for(; i <= n - 8; i += 8) |
4331 |
| - { |
4332 |
| - result += (int)CV_POPCNT_U64(*(uint64*)(a + i)); |
4333 |
| - } |
4334 |
| -# endif |
4335 |
| - for(; i <= n - 4; i += 4) |
4336 |
| - { |
4337 |
| - result += CV_POPCNT_U32(*(uint*)(a + i)); |
4338 |
| - } |
4339 |
| - } |
4340 |
| -#endif // CV_POPCNT |
4341 |
| - |
4342 |
| -#if CV_SIMD128 |
4343 |
| - if(hasSIMD128()) |
4344 |
| - { |
4345 |
| - v_uint32x4 t = v_setzero_u32(); |
4346 |
| - for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) |
4347 |
| - { |
4348 |
| - t += v_popcount(v_load(a + i)); |
4349 |
| - } |
4350 |
| - result += v_reduce_sum(t); |
4351 |
| - } |
4352 |
| -#endif // CV_SIMD128 |
4353 |
| - |
4354 |
| - for(; i <= n - 4; i += 4) |
4355 |
| - { |
4356 |
| - result += popCountTable[a[i]] + popCountTable[a[i+1]] + |
4357 |
| - popCountTable[a[i+2]] + popCountTable[a[i+3]]; |
4358 |
| - } |
4359 |
| - for(; i < n; i++) |
4360 |
| - { |
4361 |
| - result += popCountTable[a[i]]; |
4362 |
| - } |
4363 |
| - return result; |
4364 |
| -} |
4365 |
| - |
4366 |
| -int normHamming(const uchar* a, const uchar* b, int n) |
4367 |
| -{ |
4368 |
| - int i = 0; |
4369 |
| - int result = 0; |
4370 |
| -#if CV_AVX2 |
4371 |
| - if(USE_AVX2) |
4372 |
| - { |
4373 |
| - __m256i _r0 = _mm256_setzero_si256(); |
4374 |
| - __m256i _0 = _mm256_setzero_si256(); |
4375 |
| - __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
4376 |
| - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); |
4377 |
| - __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); |
4378 |
| - |
4379 |
| - for(; i <= n - 32; i+= 32) |
4380 |
| - { |
4381 |
| - __m256i _a0 = _mm256_loadu_si256((const __m256i*)(a + i)); |
4382 |
| - __m256i _b0 = _mm256_loadu_si256((const __m256i*)(b + i)); |
4383 |
| - |
4384 |
| - __m256i _xor = _mm256_xor_si256(_a0, _b0); |
4385 |
| - |
4386 |
| - __m256i _popc0 = _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_xor, _popcnt_mask)); |
4387 |
| - __m256i _popc1 = _mm256_shuffle_epi8(_popcnt_table, |
4388 |
| - _mm256_and_si256(_mm256_srli_epi16(_xor, 4), _popcnt_mask)); |
4389 |
| - |
4390 |
| - _r0 = _mm256_add_epi32(_r0, _mm256_sad_epu8(_0, _mm256_add_epi8(_popc0, _popc1))); |
4391 |
| - } |
4392 |
| - _r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2)); |
4393 |
| - result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0); |
4394 |
| - } |
4395 |
| -#endif // CV_AVX2 |
4396 |
| - |
4397 |
| -#if CV_POPCNT |
4398 |
| - if(checkHardwareSupport(CV_CPU_POPCNT)) |
4399 |
| - { |
4400 |
| -# if defined CV_POPCNT_U64 |
4401 |
| - for(; i <= n - 8; i += 8) |
4402 |
| - { |
4403 |
| - result += (int)CV_POPCNT_U64(*(uint64*)(a + i) ^ *(uint64*)(b + i)); |
4404 |
| - } |
4405 |
| -# endif |
4406 |
| - for(; i <= n - 4; i += 4) |
4407 |
| - { |
4408 |
| - result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i)); |
4409 |
| - } |
4410 |
| - } |
4411 |
| -#endif // CV_POPCNT |
4412 |
| - |
4413 |
| -#if CV_SIMD128 |
4414 |
| - if(hasSIMD128()) |
4415 |
| - { |
4416 |
| - v_uint32x4 t = v_setzero_u32(); |
4417 |
| - for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) |
4418 |
| - { |
4419 |
| - t += v_popcount(v_load(a + i) ^ v_load(b + i)); |
4420 |
| - } |
4421 |
| - result += v_reduce_sum(t); |
4422 |
| - } |
4423 |
| -#endif // CV_SIMD128 |
4424 |
| - |
4425 |
| - for(; i <= n - 4; i += 4) |
4426 |
| - { |
4427 |
| - result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + |
4428 |
| - popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; |
4429 |
| - } |
4430 |
| - for(; i < n; i++) |
4431 |
| - { |
4432 |
| - result += popCountTable[a[i] ^ b[i]]; |
4433 |
| - } |
4434 |
| - return result; |
4435 |
| -} |
4436 | 4278 |
|
4437 | 4279 | int normHamming(const uchar* a, int n, int cellSize)
|
4438 | 4280 | {
|
@@ -4469,11 +4311,11 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
|
4469 | 4311 | return -1;
|
4470 | 4312 | int i = 0;
|
4471 | 4313 | int result = 0;
|
4472 |
| - #if CV_ENABLE_UNROLLED |
| 4314 | +#if CV_ENABLE_UNROLLED |
4473 | 4315 | for( ; i <= n - 4; i += 4 )
|
4474 | 4316 | result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
|
4475 | 4317 | tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
|
4476 |
| - #endif |
| 4318 | +#endif |
4477 | 4319 | for( ; i < n; i++ )
|
4478 | 4320 | result += tab[a[i] ^ b[i]];
|
4479 | 4321 | return result;
|
|
0 commit comments