Skip to content

Commit 5eff75d

Browse files
committed
[X86][CostModel] Improve costs for fp_to_uint/fp_to_sint for vXi8/vXi16/v2i32 results.
Differential Revision: https://reviews.llvm.org/D78893
1 parent 9d1fc92 commit 5eff75d

File tree

5 files changed

+175
-226
lines changed

5 files changed

+175
-226
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,12 +1500,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
15001500
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
15011501
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
15021502

1503+
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
1504+
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
1505+
{ ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
1506+
{ ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
1507+
15031508
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
1504-
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
1505-
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
1509+
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 },
1510+
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 },
15061511
{ ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
1507-
{ ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 },
1508-
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
1512+
{ ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 },
1513+
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 },
15091514
};
15101515

15111516
static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
@@ -1605,6 +1610,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
16051610

16061611
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
16071612
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
1613+
1614+
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
1615+
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
1616+
16081617
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
16091618
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
16101619

@@ -1717,8 +1726,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
17171726
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
17181727
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
17191728

1720-
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
1721-
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
1729+
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
1730+
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
1731+
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
1732+
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
1733+
1734+
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
1735+
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
1736+
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
1737+
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
17221738
// This node is expanded into scalarized operations but BasicTTI is overly
17231739
// optimistic estimating its cost. It computes 3 per element (one
17241740
// vector-extract, one scalar conversion and one vector-insert). The
@@ -1769,6 +1785,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
17691785

17701786
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
17711787
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
1788+
1789+
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
1790+
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
1791+
1792+
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
1793+
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
1794+
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
17721795
};
17731796

17741797
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1794,16 +1817,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
17941817
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
17951818
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
17961819

1820+
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
1821+
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
1822+
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
17971823
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
17981824
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
1825+
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
17991826

1800-
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
1827+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
18011828

18021829
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
18031830
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
18041831

18051832
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
18061833
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
1834+
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
1835+
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
1836+
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
1837+
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
1838+
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
1839+
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
18071840

18081841
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
18091842
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },

0 commit comments

Comments
 (0)