Skip to content

Commit 1563300

Browse files
committed
Merge pull request opencv#9833 from tomoaki0705:universalMathFuncs
2 parents 2914443 + 2a781bb commit 1563300

File tree

1 file changed

+19
-35
lines changed

1 file changed

+19
-35
lines changed

modules/core/src/mathfuncs.cpp

Lines changed: 19 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -605,24 +605,18 @@ void polarToCart( InputArray src1, InputArray src2,
605605
{
606606
k = 0;
607607

608-
#if CV_NEON
609-
for( ; k <= len - 4; k += 4 )
610-
{
611-
float32x4_t v_m = vld1q_f32(mag + k);
612-
vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m));
613-
vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m));
614-
}
615-
#elif CV_SSE2
616-
if (USE_SSE2)
608+
#if CV_SIMD128
609+
if( hasSIMD128() )
617610
{
618-
for( ; k <= len - 4; k += 4 )
611+
int cWidth = v_float32x4::nlanes;
612+
for( ; k <= len - cWidth; k += cWidth )
619613
{
620-
__m128 v_m = _mm_loadu_ps(mag + k);
621-
_mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m));
622-
_mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m));
614+
v_float32x4 v_m = v_load(mag + k);
615+
v_store(x + k, v_load(x + k) * v_m);
616+
v_store(y + k, v_load(y + k) * v_m);
623617
}
624618
}
625-
#endif
619+
#endif
626620

627621
for( ; k < len; k++ )
628622
{
@@ -1599,38 +1593,28 @@ void patchNaNs( InputOutputArray _a, double _val )
15991593
Cv32suf val;
16001594
val.f = (float)_val;
16011595

1602-
#if CV_SSE2
1603-
__m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
1604-
__m128i v_val = _mm_set1_epi32(val.i);
1605-
#elif CV_NEON
1606-
int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000),
1607-
v_val = vdupq_n_s32(val.i);
1596+
#if CV_SIMD128
1597+
v_int32x4 v_mask1 = v_setall_s32(0x7fffffff), v_mask2 = v_setall_s32(0x7f800000);
1598+
v_int32x4 v_val = v_setall_s32(val.i);
16081599
#endif
16091600

16101601
for( size_t i = 0; i < it.nplanes; i++, ++it )
16111602
{
16121603
int* tptr = ptrs[0];
16131604
size_t j = 0;
16141605

1615-
#if CV_SSE2
1616-
if (USE_SSE2)
1606+
#if CV_SIMD128
1607+
if( hasSIMD128() )
16171608
{
1618-
for ( ; j + 4 <= len; j += 4)
1609+
size_t cWidth = (size_t)v_int32x4::nlanes;
1610+
for ( ; j + cWidth <= len; j += cWidth)
16191611
{
1620-
__m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
1621-
__m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
1622-
__m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
1623-
_mm_storeu_si128((__m128i *)(tptr + j), v_res);
1612+
v_int32x4 v_src = v_load(tptr + j);
1613+
v_int32x4 v_cmp_mask = v_mask2 < (v_src & v_mask1);
1614+
v_int32x4 v_dst = v_select(v_cmp_mask, v_val, v_src);
1615+
v_store(tptr + j, v_dst);
16241616
}
16251617
}
1626-
#elif CV_NEON
1627-
for ( ; j + 4 <= len; j += 4)
1628-
{
1629-
int32x4_t v_src = vld1q_s32(tptr + j);
1630-
uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1));
1631-
int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src);
1632-
vst1q_s32(tptr + j, v_dst);
1633-
}
16341618
#endif
16351619

16361620
for( ; j < len; j++ )

0 commit comments

Comments
 (0)