@@ -723,31 +723,9 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
723
723
}
724
724
725
725
/* * Popcount **/
726
- #define OPENCV_HAL_IMPL_VSX_POPCOUNT_8 (_Tpvec ) \
727
- inline v_uint32x4 v_popcount (const _Tpvec& a) \
728
- { \
729
- vec_uchar16 v16 = vec_popcntu (a.val ); \
730
- vec_ushort8 v8 = vec_add (vec_unpacklu (v16), vec_unpackhu (v16)); \
731
- return v_uint32x4 (vec_add (vec_unpacklu (v8), vec_unpackhu (v8))); \
732
- }
733
- OPENCV_HAL_IMPL_VSX_POPCOUNT_8 (v_int8x16)
734
- OPENCV_HAL_IMPL_VSX_POPCOUNT_8 (v_uint8x16)
735
-
736
- #define OPENCV_HAL_IMPL_VSX_POPCOUNT_16 (_Tpvec ) \
737
- inline v_uint32x4 v_popcount (const _Tpvec& a) \
738
- { \
739
- vec_ushort8 v8 = vec_popcntu (a.val ); \
740
- return v_uint32x4 (vec_add (vec_unpacklu (v8), vec_unpackhu (v8))); \
741
- }
742
- OPENCV_HAL_IMPL_VSX_POPCOUNT_16 (v_int16x8)
743
- OPENCV_HAL_IMPL_VSX_POPCOUNT_16 (v_uint16x8)
744
-
745
- #define OPENCV_HAL_IMPL_VSX_POPCOUNT_32 (_Tpvec ) \
746
- inline v_uint32x4 v_popcount (const _Tpvec& a) \
747
- { return v_uint32x4 (vec_popcntu (a.val )); }
748
-
749
- OPENCV_HAL_IMPL_VSX_POPCOUNT_32 (v_int32x4)
750
- OPENCV_HAL_IMPL_VSX_POPCOUNT_32 (v_uint32x4)
726
+ template <typename _Tpvec>
727
+ inline v_uint32x4 v_popcount (const _Tpvec& a)
728
+ { return v_uint32x4 (vec_popcntu (vec_uint4_c (a.val ))); }
751
729
752
730
/* * Mask **/
753
731
inline int v_signmask (const v_uint8x16& a)
@@ -879,32 +857,32 @@ inline v_int32x4 v_round(const v_float32x4& a)
879
857
{ return v_int32x4 (vec_cts (vec_round (a.val ))); }
880
858
881
859
inline v_int32x4 v_round (const v_float64x2& a)
882
- { return v_int32x4 (vec_mergesqo (vec_cts (vec_round (a.val )), vec_int4_z)); }
860
+ { return v_int32x4 (vec_mergesqo (vec_ctso (vec_round (a.val )), vec_int4_z)); }
883
861
884
862
inline v_int32x4 v_floor (const v_float32x4& a)
885
863
{ return v_int32x4 (vec_cts (vec_floor (a.val ))); }
886
864
887
865
inline v_int32x4 v_floor (const v_float64x2& a)
888
- { return v_int32x4 (vec_mergesqo (vec_cts (vec_floor (a.val )), vec_int4_z)); }
866
+ { return v_int32x4 (vec_mergesqo (vec_ctso (vec_floor (a.val )), vec_int4_z)); }
889
867
890
868
inline v_int32x4 v_ceil (const v_float32x4& a)
891
869
{ return v_int32x4 (vec_cts (vec_ceil (a.val ))); }
892
870
893
871
inline v_int32x4 v_ceil (const v_float64x2& a)
894
- { return v_int32x4 (vec_mergesqo (vec_cts (vec_ceil (a.val )), vec_int4_z)); }
872
+ { return v_int32x4 (vec_mergesqo (vec_ctso (vec_ceil (a.val )), vec_int4_z)); }
895
873
896
874
inline v_int32x4 v_trunc (const v_float32x4& a)
897
875
{ return v_int32x4 (vec_cts (a.val )); }
898
876
899
877
inline v_int32x4 v_trunc (const v_float64x2& a)
900
- { return v_int32x4 (vec_mergesqo (vec_cts (a.val ), vec_int4_z)); }
878
+ { return v_int32x4 (vec_mergesqo (vec_ctso (a.val ), vec_int4_z)); }
901
879
902
880
/* * To float **/
903
881
inline v_float32x4 v_cvt_f32 (const v_int32x4& a)
904
882
{ return v_float32x4 (vec_ctf (a.val )); }
905
883
906
884
inline v_float32x4 v_cvt_f32 (const v_float64x2& a)
907
- { return v_float32x4 (vec_mergesqo (vec_cvf (a.val ), vec_float4_z)); }
885
+ { return v_float32x4 (vec_mergesqo (vec_cvfo (a.val ), vec_float4_z)); }
908
886
909
887
inline v_float64x2 v_cvt_f64 (const v_int32x4& a)
910
888
{ return v_float64x2 (vec_ctdo (vec_mergeh (a.val , a.val ))); }
0 commit comments