Skip to content

Commit b683e68

Browse files
committed
Merge pull request opencv#8398 from woodychow:normL2Sqr_avx2
2 parents 925594d + c370cc1 commit b683e68

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

modules/core/src/stat.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4470,7 +4470,22 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
44704470
float normL2Sqr_(const float* a, const float* b, int n)
44714471
{
44724472
int j = 0; float d = 0.f;
4473-
#if CV_SSE
4473+
#if CV_AVX2
4474+
float CV_DECL_ALIGNED(32) buf[8];
4475+
__m256 d0 = _mm256_setzero_ps();
4476+
4477+
for( ; j <= n - 8; j += 8 )
4478+
{
4479+
__m256 t0 = _mm256_sub_ps(_mm256_loadu_ps(a + j), _mm256_loadu_ps(b + j));
4480+
#ifdef CV_FMA3
4481+
d0 = _mm256_fmadd_ps(t0, t0, d0);
4482+
#else
4483+
d0 = _mm256_add_ps(d0, _mm256_mul_ps(t0, t0));
4484+
#endif
4485+
}
4486+
_mm256_store_ps(buf, d0);
4487+
d = buf[0] + buf[1] + buf[2] + buf[3] + buf[4] + buf[5] + buf[6] + buf[7];
4488+
#elif CV_SSE
44744489
float CV_DECL_ALIGNED(16) buf[4];
44754490
__m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
44764491

0 commit comments

Comments
 (0)