Skip to content

Commit 1b8acd6

Browse files
seiko2plusvpisarev
authored andcommitted
core:ppc Fix several issues for VSX (opencv#10303)
- fix conversion intrinsics compatibility with xlc - implement odd-elements 2 to 4 conversion intrinsics - improve implementation of universal intrinsic v_popcount - rename FORCE_INLINE to VSX_FINLINE in vsx_utils.hpp
1 parent 7ad308e commit 1b8acd6

File tree

2 files changed

+142
-149
lines changed

2 files changed

+142
-149
lines changed

modules/core/include/opencv2/core/hal/intrin_vsx.hpp

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -723,31 +723,9 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
723723
}
724724

725725
/** Popcount **/
726-
#define OPENCV_HAL_IMPL_VSX_POPCOUNT_8(_Tpvec) \
727-
inline v_uint32x4 v_popcount(const _Tpvec& a) \
728-
{ \
729-
vec_uchar16 v16 = vec_popcntu(a.val); \
730-
vec_ushort8 v8 = vec_add(vec_unpacklu(v16), vec_unpackhu(v16)); \
731-
return v_uint32x4(vec_add(vec_unpacklu(v8), vec_unpackhu(v8))); \
732-
}
733-
OPENCV_HAL_IMPL_VSX_POPCOUNT_8(v_int8x16)
734-
OPENCV_HAL_IMPL_VSX_POPCOUNT_8(v_uint8x16)
735-
736-
#define OPENCV_HAL_IMPL_VSX_POPCOUNT_16(_Tpvec) \
737-
inline v_uint32x4 v_popcount(const _Tpvec& a) \
738-
{ \
739-
vec_ushort8 v8 = vec_popcntu(a.val); \
740-
return v_uint32x4(vec_add(vec_unpacklu(v8), vec_unpackhu(v8))); \
741-
}
742-
OPENCV_HAL_IMPL_VSX_POPCOUNT_16(v_int16x8)
743-
OPENCV_HAL_IMPL_VSX_POPCOUNT_16(v_uint16x8)
744-
745-
#define OPENCV_HAL_IMPL_VSX_POPCOUNT_32(_Tpvec) \
746-
inline v_uint32x4 v_popcount(const _Tpvec& a) \
747-
{ return v_uint32x4(vec_popcntu(a.val)); }
748-
749-
OPENCV_HAL_IMPL_VSX_POPCOUNT_32(v_int32x4)
750-
OPENCV_HAL_IMPL_VSX_POPCOUNT_32(v_uint32x4)
726+
template<typename _Tpvec>
727+
inline v_uint32x4 v_popcount(const _Tpvec& a)
728+
{ return v_uint32x4(vec_popcntu(vec_uint4_c(a.val))); }
751729

752730
/** Mask **/
753731
inline int v_signmask(const v_uint8x16& a)
@@ -879,32 +857,32 @@ inline v_int32x4 v_round(const v_float32x4& a)
879857
{ return v_int32x4(vec_cts(vec_round(a.val))); }
880858

881859
inline v_int32x4 v_round(const v_float64x2& a)
882-
{ return v_int32x4(vec_mergesqo(vec_cts(vec_round(a.val)), vec_int4_z)); }
860+
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_round(a.val)), vec_int4_z)); }
883861

884862
inline v_int32x4 v_floor(const v_float32x4& a)
885863
{ return v_int32x4(vec_cts(vec_floor(a.val))); }
886864

887865
inline v_int32x4 v_floor(const v_float64x2& a)
888-
{ return v_int32x4(vec_mergesqo(vec_cts(vec_floor(a.val)), vec_int4_z)); }
866+
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); }
889867

890868
inline v_int32x4 v_ceil(const v_float32x4& a)
891869
{ return v_int32x4(vec_cts(vec_ceil(a.val))); }
892870

893871
inline v_int32x4 v_ceil(const v_float64x2& a)
894-
{ return v_int32x4(vec_mergesqo(vec_cts(vec_ceil(a.val)), vec_int4_z)); }
872+
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); }
895873

896874
inline v_int32x4 v_trunc(const v_float32x4& a)
897875
{ return v_int32x4(vec_cts(a.val)); }
898876

899877
inline v_int32x4 v_trunc(const v_float64x2& a)
900-
{ return v_int32x4(vec_mergesqo(vec_cts(a.val), vec_int4_z)); }
878+
{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); }
901879

902880
/** To float **/
903881
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
904882
{ return v_float32x4(vec_ctf(a.val)); }
905883

906884
inline v_float32x4 v_cvt_f32(const v_float64x2& a)
907-
{ return v_float32x4(vec_mergesqo(vec_cvf(a.val), vec_float4_z)); }
885+
{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
908886

909887
inline v_float64x2 v_cvt_f64(const v_int32x4& a)
910888
{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }

0 commit comments

Comments
 (0)