@@ -1500,12 +1500,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1500
1500
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
1501
1501
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
1502
1502
1503
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
1504
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
1505
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
1506
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
1507
+
1503
1508
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
1504
- { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
1505
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
1509
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 },
1510
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 },
1506
1511
{ ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
1507
- { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 },
1508
- { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
1512
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 },
1513
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 },
1509
1514
};
1510
1515
1511
1516
static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
@@ -1605,6 +1610,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1605
1610
1606
1611
{ ISD::UINT_TO_FP, MVT::f32 , MVT::i64 , 1 },
1607
1612
{ ISD::UINT_TO_FP, MVT::f64 , MVT::i64 , 1 },
1613
+
1614
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
1615
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
1616
+
1608
1617
{ ISD::FP_TO_UINT, MVT::i64 , MVT::f32 , 1 },
1609
1618
{ ISD::FP_TO_UINT, MVT::i64 , MVT::f64 , 1 },
1610
1619
@@ -1717,8 +1726,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1717
1726
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
1718
1727
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
1719
1728
1720
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
1721
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
1729
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
1730
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
1731
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
1732
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
1733
+
1734
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
1735
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
1736
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
1737
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
1722
1738
// This node is expanded into scalarized operations but BasicTTI is overly
1723
1739
// optimistic estimating its cost. It computes 3 per element (one
1724
1740
// vector-extract, one scalar conversion and one vector-insert). The
@@ -1769,6 +1785,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1769
1785
1770
1786
{ ISD::UINT_TO_FP, MVT::f32 , MVT::i64 , 4 },
1771
1787
{ ISD::UINT_TO_FP, MVT::f64 , MVT::i64 , 4 },
1788
+
1789
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
1790
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
1791
+
1792
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
1793
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
1794
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
1772
1795
};
1773
1796
1774
1797
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1794,16 +1817,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1794
1817
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
1795
1818
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
1796
1819
1820
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
1821
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
1822
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
1797
1823
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
1798
1824
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
1825
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
1799
1826
1800
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
1827
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
1801
1828
1802
1829
{ ISD::UINT_TO_FP, MVT::f32 , MVT::i64 , 6 },
1803
1830
{ ISD::UINT_TO_FP, MVT::f64 , MVT::i64 , 6 },
1804
1831
1805
1832
{ ISD::FP_TO_UINT, MVT::i64 , MVT::f32 , 4 },
1806
1833
{ ISD::FP_TO_UINT, MVT::i64 , MVT::f64 , 4 },
1834
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
1835
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
1836
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
1837
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
1838
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
1839
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
1807
1840
1808
1841
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
1809
1842
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
0 commit comments