Skip to content

Commit e23b59d

Browse files
committed
build: fix v_reduce_sum4 (requires SSE3)
1 parent f49f056 commit e23b59d

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

modules/core/include/opencv2/core/hal/intrin_sse.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s
11291129
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
11301130
const v_float32x4& c, const v_float32x4& d)
11311131
{
1132+
#if CV_SSE3
11321133
__m128 ab = _mm_hadd_ps(a.val, b.val);
11331134
__m128 cd = _mm_hadd_ps(c.val, d.val);
11341135
return v_float32x4(_mm_hadd_ps(ab, cd));
1136+
#else
1137+
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
1138+
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
1139+
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
1140+
#endif
11351141
}
11361142

11371143
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)

modules/core/test/test_intrin.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,23 @@ template<typename R> struct TheTest
741741
return *this;
742742
}
743743

744+
TheTest & test_reduce_sum4()
745+
{
746+
R a(0.1f, 0.02f, 0.003f, 0.0004f);
747+
R b(1, 20, 300, 4000);
748+
R c(10, 2, 0.3f, 0.04f);
749+
R d(1, 2, 3, 4);
750+
751+
R sum = v_reduce_sum4(a, b, c, d);
752+
753+
Data<R> res = sum;
754+
EXPECT_EQ(0.1234f, res[0]);
755+
EXPECT_EQ(4321.0f, res[1]);
756+
EXPECT_EQ(12.34f, res[2]);
757+
EXPECT_EQ(10.0f, res[3]);
758+
return *this;
759+
}
760+
744761
TheTest & test_loadstore_fp16()
745762
{
746763
#if CV_FP16 && CV_SIMD128
@@ -986,6 +1003,7 @@ TEST(hal_intrin, float32x4) {
9861003
.test_float_cvt64()
9871004
.test_matmul()
9881005
.test_transpose()
1006+
.test_reduce_sum4()
9891007
;
9901008
}
9911009

0 commit comments

Comments
 (0)