Skip to content

Commit 10e6491

Browse files
committed
Merge pull request opencv#9024 from tomoaki0705:featureDispatchAccumulate
2 parents 4238add + e7d5dbf commit 10e6491

File tree

5 files changed

+3273
-1707
lines changed

5 files changed

+3273
-1707
lines changed

modules/core/include/opencv2/core/hal/intrin_sse.hpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,15 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
899899
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
900900
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
901901

902+
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
903+
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
904+
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
905+
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
906+
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
907+
908+
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64);
909+
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64);
910+
902911
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
903912
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
904913
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
@@ -1520,6 +1529,35 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
15201529
v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
15211530
}
15221531

1532+
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
1533+
{
1534+
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
1535+
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
1536+
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
1537+
1538+
a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
1539+
b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
1540+
c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
1541+
}
1542+
1543+
inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
1544+
{
1545+
v_uint64x2 t0, t1, t2;
1546+
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
1547+
a = v_reinterpret_as_s64(t0);
1548+
b = v_reinterpret_as_s64(t1);
1549+
c = v_reinterpret_as_s64(t2);
1550+
}
1551+
1552+
inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
1553+
{
1554+
v_uint64x2 t0, t1, t2;
1555+
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
1556+
a = v_reinterpret_as_f64(t0);
1557+
b = v_reinterpret_as_f64(t1);
1558+
c = v_reinterpret_as_f64(t2);
1559+
}
1560+
15231561
// 2-channel, float only
15241562
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
15251563
{
@@ -1717,6 +1755,27 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
17171755
_mm_storeu_ps((ptr + 4), u1);
17181756
}
17191757

1758+
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
1759+
{
1760+
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
1761+
__m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
1762+
__m128i t2 = _mm_unpackhi_epi64(b.val, c.val);
1763+
1764+
_mm_storeu_si128((__m128i*)ptr, t0);
1765+
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
1766+
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
1767+
}
1768+
1769+
inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
1770+
{
1771+
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
1772+
}
1773+
1774+
inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
1775+
{
1776+
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
1777+
}
1778+
17201779
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
17211780
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
17221781
_Tpvec& b0, _Tpvec& c0 ) \

modules/imgproc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
set(the_description "Image Processing")
2+
ocv_add_dispatched_file(accum SSE2 AVX NEON)
23
ocv_define_module(imgproc opencv_core WRAP java python)

0 commit comments

Comments
 (0)