@@ -1762,14 +1762,14 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
1762
1762
1763
1763
// ! @brief Helper macro
1764
1764
// ! @ingroup core_hal_intrin_impl
1765
- #define OPENCV_HAL_IMPL_C_PACK (_Tpvec, _Tpnvec, _Tpn, pack_suffix ) \
1765
+ #define OPENCV_HAL_IMPL_C_PACK (_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast ) \
1766
1766
inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
1767
1767
{ \
1768
1768
_Tpnvec c; \
1769
1769
for ( int i = 0 ; i < _Tpvec::nlanes; i++ ) \
1770
1770
{ \
1771
- c.s [i] = saturate_cast <_Tpn>(a.s [i]); \
1772
- c.s [i+_Tpvec::nlanes] = saturate_cast <_Tpn>(b.s [i]); \
1771
+ c.s [i] = cast <_Tpn>(a.s [i]); \
1772
+ c.s [i+_Tpvec::nlanes] = cast <_Tpn>(b.s [i]); \
1773
1773
} \
1774
1774
return c; \
1775
1775
}
@@ -1783,26 +1783,28 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
1783
1783
// !
1784
1784
// ! - pack: for 16-, 32- and 64-bit integer input types
1785
1785
// ! - pack_u: for 16- and 32-bit signed integer input types
1786
- OPENCV_HAL_IMPL_C_PACK (v_uint16x8, v_uint8x16, uchar, pack)
1787
- OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_int8x16, schar, pack)
1788
- OPENCV_HAL_IMPL_C_PACK (v_uint32x4, v_uint16x8, ushort, pack)
1789
- OPENCV_HAL_IMPL_C_PACK (v_int32x4, v_int16x8, short , pack)
1790
- OPENCV_HAL_IMPL_C_PACK (v_uint64x2, v_uint32x4, unsigned , pack)
1791
- OPENCV_HAL_IMPL_C_PACK (v_int64x2, v_int32x4, int , pack)
1792
- OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_uint8x16, uchar, pack_u)
1793
- OPENCV_HAL_IMPL_C_PACK (v_int32x4, v_uint16x8, ushort, pack_u)
1786
+ // !
1787
+ // ! @note All variants except 64-bit use saturation.
1788
+ OPENCV_HAL_IMPL_C_PACK (v_uint16x8, v_uint8x16, uchar, pack, saturate_cast)
1789
+ OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_int8x16, schar, pack, saturate_cast)
1790
+ OPENCV_HAL_IMPL_C_PACK (v_uint32x4, v_uint16x8, ushort, pack, saturate_cast)
1791
+ OPENCV_HAL_IMPL_C_PACK (v_int32x4, v_int16x8, short , pack, saturate_cast)
1792
+ OPENCV_HAL_IMPL_C_PACK (v_uint64x2, v_uint32x4, unsigned , pack, static_cast )
1793
+ OPENCV_HAL_IMPL_C_PACK (v_int64x2, v_int32x4, int , pack, static_cast )
1794
+ OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast)
1795
+ OPENCV_HAL_IMPL_C_PACK (v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast)
1794
1796
// ! @}
1795
1797
1796
1798
// ! @brief Helper macro
1797
1799
// ! @ingroup core_hal_intrin_impl
1798
- #define OPENCV_HAL_IMPL_C_RSHR_PACK (_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix ) \
1800
+ #define OPENCV_HAL_IMPL_C_RSHR_PACK (_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast ) \
1799
1801
template <int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
1800
1802
{ \
1801
1803
_Tpnvec c; \
1802
1804
for ( int i = 0 ; i < _Tpvec::nlanes; i++ ) \
1803
1805
{ \
1804
- c.s [i] = saturate_cast <_Tpn>((a.s [i] + ((_Tp)1 << (n - 1 ))) >> n); \
1805
- c.s [i+_Tpvec::nlanes] = saturate_cast <_Tpn>((b.s [i] + ((_Tp)1 << (n - 1 ))) >> n); \
1806
+ c.s [i] = cast <_Tpn>((a.s [i] + ((_Tp)1 << (n - 1 ))) >> n); \
1807
+ c.s [i+_Tpvec::nlanes] = cast <_Tpn>((b.s [i] + ((_Tp)1 << (n - 1 ))) >> n); \
1806
1808
} \
1807
1809
return c; \
1808
1810
}
@@ -1816,51 +1818,55 @@ template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve
1816
1818
// !
1817
1819
// ! - pack: for 16-, 32- and 64-bit integer input types
1818
1820
// ! - pack_u: for 16- and 32-bit signed integer input types
1819
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint16x8, ushort, v_uint8x16, uchar, pack)
1820
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short , v_int8x16, schar, pack)
1821
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint32x4, unsigned , v_uint16x8, ushort, pack)
1822
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_int32x4, int , v_int16x8, short , pack)
1823
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint64x2, uint64, v_uint32x4, unsigned , pack)
1824
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_int64x2, int64, v_int32x4, int , pack)
1825
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short , v_uint8x16, uchar, pack_u)
1826
- OPENCV_HAL_IMPL_C_RSHR_PACK (v_int32x4, int , v_uint16x8, ushort, pack_u)
1821
+ // !
1822
+ // ! @note All variants except 64-bit use saturation.
1823
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
1824
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short , v_int8x16, schar, pack, saturate_cast)
1825
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint32x4, unsigned , v_uint16x8, ushort, pack, saturate_cast)
1826
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_int32x4, int , v_int16x8, short , pack, saturate_cast)
1827
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint64x2, uint64, v_uint32x4, unsigned , pack, static_cast )
1828
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_int64x2, int64, v_int32x4, int , pack, static_cast )
1829
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short , v_uint8x16, uchar, pack_u, saturate_cast)
1830
+ OPENCV_HAL_IMPL_C_RSHR_PACK (v_int32x4, int , v_uint16x8, ushort, pack_u, saturate_cast)
1827
1831
// ! @}
1828
1832
1829
1833
// ! @brief Helper macro
1830
1834
// ! @ingroup core_hal_intrin_impl
1831
- #define OPENCV_HAL_IMPL_C_PACK_STORE (_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix ) \
1835
+ #define OPENCV_HAL_IMPL_C_PACK_STORE (_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast ) \
1832
1836
inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
1833
1837
{ \
1834
1838
for ( int i = 0 ; i < _Tpvec::nlanes; i++ ) \
1835
- ptr[i] = saturate_cast <_Tpn>(a.s [i]); \
1839
+ ptr[i] = cast <_Tpn>(a.s [i]); \
1836
1840
}
1837
1841
1838
1842
// ! @name Pack and store
1839
1843
// ! @{
1840
1844
// ! @brief Store values from the input vector into memory with pack
1841
1845
// !
1842
- // ! Values will be stored into memory with saturating conversion to narrower type.
1846
+ // ! Values will be stored into memory with conversion to narrower type.
1843
1847
// ! Variant with _u_ suffix converts to corresponding unsigned type.
1844
1848
// !
1845
1849
// ! - pack: for 16-, 32- and 64-bit integer input types
1846
1850
// ! - pack_u: for 16- and 32-bit signed integer input types
1847
- OPENCV_HAL_IMPL_C_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack)
1848
- OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short , v_int8x16, schar, pack)
1849
- OPENCV_HAL_IMPL_C_PACK_STORE (v_uint32x4, unsigned , v_uint16x8, ushort, pack)
1850
- OPENCV_HAL_IMPL_C_PACK_STORE (v_int32x4, int , v_int16x8, short , pack)
1851
- OPENCV_HAL_IMPL_C_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned , pack)
1852
- OPENCV_HAL_IMPL_C_PACK_STORE (v_int64x2, int64, v_int32x4, int , pack)
1853
- OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short , v_uint8x16, uchar, pack_u)
1854
- OPENCV_HAL_IMPL_C_PACK_STORE (v_int32x4, int , v_uint16x8, ushort, pack_u)
1851
+ // !
1852
+ // ! @note All variants except 64-bit use saturation.
1853
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
1854
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short , v_int8x16, schar, pack, saturate_cast)
1855
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_uint32x4, unsigned , v_uint16x8, ushort, pack, saturate_cast)
1856
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_int32x4, int , v_int16x8, short , pack, saturate_cast)
1857
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned , pack, static_cast )
1858
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_int64x2, int64, v_int32x4, int , pack, static_cast )
1859
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short , v_uint8x16, uchar, pack_u, saturate_cast)
1860
+ OPENCV_HAL_IMPL_C_PACK_STORE (v_int32x4, int , v_uint16x8, ushort, pack_u, saturate_cast)
1855
1861
// ! @}
1856
1862
1857
1863
// ! @brief Helper macro
1858
1864
// ! @ingroup core_hal_intrin_impl
1859
- #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix ) \
1865
+ #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast ) \
1860
1866
template <int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
1861
1867
{ \
1862
1868
for ( int i = 0 ; i < _Tpvec::nlanes; i++ ) \
1863
- ptr[i] = saturate_cast <_Tpn>((a.s [i] + ((_Tp)1 << (n - 1 ))) >> n); \
1869
+ ptr[i] = cast <_Tpn>((a.s [i] + ((_Tp)1 << (n - 1 ))) >> n); \
1864
1870
}
1865
1871
1866
1872
// ! @name Pack and store with rounding shift
@@ -1872,14 +1878,16 @@ template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec
1872
1878
// !
1873
1879
// ! - pack: for 16-, 32- and 64-bit integer input types
1874
1880
// ! - pack_u: for 16- and 32-bit signed integer input types
1875
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack)
1876
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short , v_int8x16, schar, pack)
1877
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint32x4, unsigned , v_uint16x8, ushort, pack)
1878
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int32x4, int , v_int16x8, short , pack)
1879
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned , pack)
1880
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int64x2, int64, v_int32x4, int , pack)
1881
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short , v_uint8x16, uchar, pack_u)
1882
- OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int32x4, int , v_uint16x8, ushort, pack_u)
1881
+ // !
1882
+ // ! @note All variants except 64-bit use saturation.
1883
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
1884
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short , v_int8x16, schar, pack, saturate_cast)
1885
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint32x4, unsigned , v_uint16x8, ushort, pack, saturate_cast)
1886
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int32x4, int , v_int16x8, short , pack, saturate_cast)
1887
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned , pack, static_cast )
1888
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int64x2, int64, v_int32x4, int , pack, static_cast )
1889
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short , v_uint8x16, uchar, pack_u, saturate_cast)
1890
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int32x4, int , v_uint16x8, ushort, pack_u, saturate_cast)
1883
1891
// ! @}
1884
1892
1885
1893
/* * @brief Matrix multiplication
0 commit comments