Skip to content

Commit 89af053

Browse files
committed
Merge pull request opencv#8797 from sovrasov:hog_sse_fix
2 parents c397361 + 6c164d8 commit 89af053

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

modules/objdetect/src/hog.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,14 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
325325
#if CV_SSE2
326326
__m128i ithree = _mm_set1_epi32(3);
327327
for ( ; x <= end - 4; x += 4)
328-
_mm_storeu_si128((__m128i*)(xmap + x), _mm_mullo_epi16(ithree,
329-
_mm_loadu_si128((const __m128i*)(xmap + x))));
328+
{
329+
//emulation of _mm_mullo_epi32
330+
__m128i mul_res = _mm_loadu_si128((const __m128i*)(xmap + x));
331+
__m128i tmp1 = _mm_mul_epu32(ithree, mul_res);
332+
__m128i tmp2 = _mm_mul_epu32( _mm_srli_si128(ithree,4), _mm_srli_si128(mul_res,4));
333+
mul_res = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE (0,0,2,0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE (0,0,2,0)));
334+
_mm_storeu_si128((__m128i*)(xmap + x), mul_res);
335+
}
330336
#elif CV_NEON
331337
int32x4_t ithree = vdupq_n_s32(3);
332338
for ( ; x <= end - 4; x += 4)

0 commit comments

Comments
 (0)