pythonwebcoder
diff --git a/‎modules/core/include/opencv2/core/hal/intrin_sse.hpp
Lines changed: 59 additions & 0 deletions b/‎modules/core/include/opencv2/core/hal/intrin_sse.hpp
Lines changed: 59 additions & 0 deletions
diff --git a/‎modules/imgproc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎modules/imgproc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
@@ -899,6 +899,15 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
 OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
 OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
 
+#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
+
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64);
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64);
+
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
@@ -1520,6 +1529,35 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
     v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
 }
 
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
+{
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
+    __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
+
+    a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
+    b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
+    c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
+}
+
+inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
+{
+    v_uint64x2 t0, t1, t2;
+    v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
+    a = v_reinterpret_as_s64(t0);
+    b = v_reinterpret_as_s64(t1);
+    c = v_reinterpret_as_s64(t2);
+}
+
+inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
+{
+    v_uint64x2 t0, t1, t2;
+    v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
+    a = v_reinterpret_as_f64(t0);
+    b = v_reinterpret_as_f64(t1);
+    c = v_reinterpret_as_f64(t2);
+}
+
 // 2-channel, float only
 inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
 {
@@ -1717,6 +1755,27 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
     _mm_storeu_ps((ptr + 4), u1);
 }
 
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
+{
+    __m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
+    __m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
+    __m128i t2 = _mm_unpackhi_epi64(b.val, c.val);
+
+    _mm_storeu_si128((__m128i*)ptr, t0);
+    _mm_storeu_si128((__m128i*)(ptr + 2), t1);
+    _mm_storeu_si128((__m128i*)(ptr + 4), t2);
+}
+
+inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
+{
+    v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
+}
+
+inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
+}
+
 #define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
 inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
                                  _Tpvec& b0, _Tpvec& c0 ) \
 
@@ -1,2 +1,3 @@
 set(the_description "Image Processing")
+ocv_add_dispatched_file(accum SSE2 AVX NEON)
 ocv_define_module(imgproc opencv_core WRAP java python)
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`set(the_description "Image Processing")`
	`2`	`+ocv_add_dispatched_file(accum SSE2 AVX NEON)`
`2`	`3`	`ocv_define_module(imgproc opencv_core WRAP java python)`