@@ -605,24 +605,18 @@ void polarToCart( InputArray src1, InputArray src2,
605
605
{
606
606
k = 0 ;
607
607
608
- #if CV_NEON
609
- for ( ; k <= len - 4 ; k += 4 )
610
- {
611
- float32x4_t v_m = vld1q_f32 (mag + k);
612
- vst1q_f32 (x + k, vmulq_f32 (vld1q_f32 (x + k), v_m));
613
- vst1q_f32 (y + k, vmulq_f32 (vld1q_f32 (y + k), v_m));
614
- }
615
- #elif CV_SSE2
616
- if (USE_SSE2)
608
+ #if CV_SIMD128
609
+ if ( hasSIMD128 () )
617
610
{
618
- for ( ; k <= len - 4 ; k += 4 )
611
+ int cWidth = v_float32x4::nlanes;
612
+ for ( ; k <= len - cWidth; k += cWidth )
619
613
{
620
- __m128 v_m = _mm_loadu_ps (mag + k);
621
- _mm_storeu_ps (x + k, _mm_mul_ps ( _mm_loadu_ps ( x + k), v_m) );
622
- _mm_storeu_ps (y + k, _mm_mul_ps ( _mm_loadu_ps ( y + k), v_m) );
614
+ v_float32x4 v_m = v_load (mag + k);
615
+ v_store (x + k, v_load ( x + k) * v_m);
616
+ v_store (y + k, v_load ( y + k) * v_m);
623
617
}
624
618
}
625
- #endif
619
+ #endif
626
620
627
621
for ( ; k < len; k++ )
628
622
{
@@ -1599,38 +1593,28 @@ void patchNaNs( InputOutputArray _a, double _val )
1599
1593
Cv32suf val;
1600
1594
val.f = (float )_val;
1601
1595
1602
- #if CV_SSE2
1603
- __m128i v_mask1 = _mm_set1_epi32 (0x7fffffff ), v_mask2 = _mm_set1_epi32 (0x7f800000 );
1604
- __m128i v_val = _mm_set1_epi32 (val.i );
1605
- #elif CV_NEON
1606
- int32x4_t v_mask1 = vdupq_n_s32 (0x7fffffff ), v_mask2 = vdupq_n_s32 (0x7f800000 ),
1607
- v_val = vdupq_n_s32 (val.i );
1596
+ #if CV_SIMD128
1597
+ v_int32x4 v_mask1 = v_setall_s32 (0x7fffffff ), v_mask2 = v_setall_s32 (0x7f800000 );
1598
+ v_int32x4 v_val = v_setall_s32 (val.i );
1608
1599
#endif
1609
1600
1610
1601
for ( size_t i = 0 ; i < it.nplanes ; i++, ++it )
1611
1602
{
1612
1603
int * tptr = ptrs[0 ];
1613
1604
size_t j = 0 ;
1614
1605
1615
- #if CV_SSE2
1616
- if (USE_SSE2 )
1606
+ #if CV_SIMD128
1607
+ if ( hasSIMD128 () )
1617
1608
{
1618
- for ( ; j + 4 <= len; j += 4 )
1609
+ size_t cWidth = (size_t )v_int32x4::nlanes;
1610
+ for ( ; j + cWidth <= len; j += cWidth)
1619
1611
{
1620
- __m128i v_src = _mm_loadu_si128 ((__m128i const *)( tptr + j) );
1621
- __m128i v_cmp_mask = _mm_cmplt_epi32 ( v_mask2, _mm_and_si128 (v_src, v_mask1) );
1622
- __m128i v_res = _mm_or_si128 ( _mm_andnot_si128 ( v_cmp_mask, v_src), _mm_and_si128 (v_cmp_mask, v_val) );
1623
- _mm_storeu_si128 ((__m128i *)( tptr + j), v_res );
1612
+ v_int32x4 v_src = v_load ( tptr + j);
1613
+ v_int32x4 v_cmp_mask = v_mask2 < (v_src & v_mask1);
1614
+ v_int32x4 v_dst = v_select ( v_cmp_mask, v_val, v_src );
1615
+ v_store ( tptr + j, v_dst );
1624
1616
}
1625
1617
}
1626
- #elif CV_NEON
1627
- for ( ; j + 4 <= len; j += 4 )
1628
- {
1629
- int32x4_t v_src = vld1q_s32 (tptr + j);
1630
- uint32x4_t v_cmp_mask = vcltq_s32 (v_mask2, vandq_s32 (v_src, v_mask1));
1631
- int32x4_t v_dst = vbslq_s32 (v_cmp_mask, v_val, v_src);
1632
- vst1q_s32 (tptr + j, v_dst);
1633
- }
1634
1618
#endif
1635
1619
1636
1620
for ( ; j < len; j++ )
0 commit comments