Skip to content

Commit 95b8d9b

Browse files
committed
[SelectionDAG] computeKnownBits - support constant pool values from target
This patch adds the overridable TargetLowering::getTargetConstantFromLoad function which allows targets to return any constant value loaded by a LoadSDNode node - only X86 makes use of this so far but everything should be in place for other targets. computeKnownBits then uses this function to improve codegen, notably vector code after legalization. A future commit will do the same for ComputeNumSignBits but computeKnownBits sees the bigger benefit. This required a couple of fixes: * SimplifyDemandedBits must early-out for getTargetConstantFromLoad cases to prevent infinite loops of constant regeneration (similar to what we already do for BUILD_VECTOR). * Fix a DAGCombiner::visitTRUNCATE issue as we had trunc(shl(v8i32),v8i16) <-> shl(trunc(v8i16),v8i32) infinite loops after legalization on AVX512 targets. Differential Revision: https://reviews.llvm.org/D61887 llvm-svn: 361620
1 parent 980f760 commit 95b8d9b

17 files changed

+1193
-1817
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3119,6 +3119,10 @@ class TargetLowering : public TargetLoweringBase {
31193119
TargetLoweringOpt &TLO,
31203120
unsigned Depth = 0) const;
31213121

3122+
/// This method returns the constant pool value that will be loaded by LD.
3123+
/// NOTE: You must check for implicit extensions of the constant by LD.
3124+
virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
3125+
31223126
/// If \p SNaN is false, \returns true if \p Op is known to never be any
31233127
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
31243128
/// NaN.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10110,7 +10110,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1011010110

1011110111
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
1011210112
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10113-
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
10113+
(!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
1011410114
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
1011510115
SDValue Amt = N0.getOperand(1);
1011610116
KnownBits Known = DAG.computeKnownBits(Amt);

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2886,8 +2886,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
28862886
}
28872887
case ISD::LOAD: {
28882888
LoadSDNode *LD = cast<LoadSDNode>(Op);
2889-
// If this is a ZEXTLoad and we are looking at the loaded value.
2890-
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
2889+
const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
2890+
if (ISD::isNON_EXTLoad(LD) && Cst) {
2891+
// Determine any common known bits from the loaded constant pool value.
2892+
Type *CstTy = Cst->getType();
2893+
if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
2894+
// If its a vector splat, then we can (quickly) reuse the scalar path.
2895+
// NOTE: We assume all elements match and none are UNDEF.
2896+
if (CstTy->isVectorTy()) {
2897+
if (const Constant *Splat = Cst->getSplatValue()) {
2898+
Cst = Splat;
2899+
CstTy = Cst->getType();
2900+
}
2901+
}
2902+
// TODO - do we need to handle different bitwidths?
2903+
if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
2904+
// Iterate across all vector elements finding common known bits.
2905+
Known.One.setAllBits();
2906+
Known.Zero.setAllBits();
2907+
for (unsigned i = 0; i != NumElts; ++i) {
2908+
if (!DemandedElts[i])
2909+
continue;
2910+
if (Constant *Elt = Cst->getAggregateElement(i)) {
2911+
if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
2912+
const APInt &Value = CInt->getValue();
2913+
Known.One &= Value;
2914+
Known.Zero &= ~Value;
2915+
continue;
2916+
}
2917+
if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
2918+
APInt Value = CFP->getValueAPF().bitcastToAPInt();
2919+
Known.One &= Value;
2920+
Known.Zero &= ~Value;
2921+
continue;
2922+
}
2923+
}
2924+
Known.One.clearAllBits();
2925+
Known.Zero.clearAllBits();
2926+
break;
2927+
}
2928+
} else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
2929+
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
2930+
const APInt &Value = CInt->getValue();
2931+
Known.One = Value;
2932+
Known.Zero = ~Value;
2933+
} else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
2934+
APInt Value = CFP->getValueAPF().bitcastToAPInt();
2935+
Known.One = Value;
2936+
Known.Zero = ~Value;
2937+
}
2938+
}
2939+
}
2940+
} else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
2941+
// If this is a ZEXTLoad and we are looking at the loaded value.
28912942
EVT VT = LD->getMemoryVT();
28922943
unsigned MemBits = VT.getScalarSizeInBits();
28932944
Known.Zero.setBitsFrom(MemBits);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,14 @@ bool TargetLowering::SimplifyDemandedBits(
659659
Known.Zero &= Known2.Zero;
660660
}
661661
return false; // Don't fall through, will infinitely loop.
662+
case ISD::LOAD: {
663+
LoadSDNode *LD = cast<LoadSDNode>(Op);
664+
if (getTargetConstantFromLoad(LD)) {
665+
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
666+
return false; // Don't fall through, will infinitely loop.
667+
}
668+
break;
669+
}
662670
case ISD::INSERT_VECTOR_ELT: {
663671
SDValue Vec = Op.getOperand(0);
664672
SDValue Scl = Op.getOperand(1);
@@ -2314,6 +2322,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
23142322
return false;
23152323
}
23162324

2325+
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2326+
return nullptr;
2327+
}
2328+
23172329
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
23182330
const SelectionDAG &DAG,
23192331
bool SNaN,

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5731,10 +5731,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
57315731
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
57325732
}
57335733

5734-
static const Constant *getTargetConstantFromNode(SDValue Op) {
5735-
Op = peekThroughBitcasts(Op);
5736-
5737-
auto *Load = dyn_cast<LoadSDNode>(Op);
5734+
static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
57385735
if (!Load)
57395736
return nullptr;
57405737

@@ -5750,6 +5747,17 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
57505747
return CNode->getConstVal();
57515748
}
57525749

5750+
static const Constant *getTargetConstantFromNode(SDValue Op) {
5751+
Op = peekThroughBitcasts(Op);
5752+
return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
5753+
}
5754+
5755+
const Constant *
5756+
X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
5757+
assert(LD && "Unexpected null LoadSDNode");
5758+
return getTargetConstantFromNode(LD);
5759+
}
5760+
57535761
// Extract raw constant bits from constant pools.
57545762
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
57555763
APInt &UndefElts,

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,8 @@ namespace llvm {
908908
TargetLoweringOpt &TLO,
909909
unsigned Depth) const override;
910910

911+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
912+
911913
SDValue unwrapAddress(SDValue N) const override;
912914

913915
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;

llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -940,9 +940,8 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
940940
; AVX512-LABEL: test46:
941941
; AVX512: ## %bb.0:
942942
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
943-
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
944-
; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x14,0xc1]
945-
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
943+
; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
944+
; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3]
946945
; AVX512-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
947946
; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte
948947
; AVX512-NEXT: retq ## encoding: [0xc3]

llvm/test/CodeGen/X86/bitreverse.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,31 +61,25 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
6161
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6262
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
6363
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
64-
; X64-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
65-
; X64-NEXT: packuswb %xmm2, %xmm1
66-
; X64-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
67-
; X64-NEXT: movdqa %xmm1, %xmm2
68-
; X64-NEXT: pand %xmm0, %xmm2
69-
; X64-NEXT: psllw $4, %xmm2
64+
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
65+
; X64-NEXT: packuswb %xmm2, %xmm0
66+
; X64-NEXT: movdqa %xmm0, %xmm1
67+
; X64-NEXT: psllw $4, %xmm1
7068
; X64-NEXT: pand {{.*}}(%rip), %xmm1
71-
; X64-NEXT: psrlw $4, %xmm1
72-
; X64-NEXT: pand %xmm0, %xmm1
73-
; X64-NEXT: pandn %xmm2, %xmm0
69+
; X64-NEXT: psrlw $4, %xmm0
70+
; X64-NEXT: pand {{.*}}(%rip), %xmm0
7471
; X64-NEXT: por %xmm1, %xmm0
7572
; X64-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7673
; X64-NEXT: pand %xmm0, %xmm1
7774
; X64-NEXT: psllw $2, %xmm1
78-
; X64-NEXT: pand {{.*}}(%rip), %xmm1
7975
; X64-NEXT: pand {{.*}}(%rip), %xmm0
8076
; X64-NEXT: psrlw $2, %xmm0
81-
; X64-NEXT: pand {{.*}}(%rip), %xmm0
8277
; X64-NEXT: por %xmm1, %xmm0
8378
; X64-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
8479
; X64-NEXT: pand %xmm0, %xmm1
8580
; X64-NEXT: paddb %xmm1, %xmm1
8681
; X64-NEXT: pand {{.*}}(%rip), %xmm0
8782
; X64-NEXT: psrlw $1, %xmm0
88-
; X64-NEXT: pand {{.*}}(%rip), %xmm0
8983
; X64-NEXT: por %xmm1, %xmm0
9084
; X64-NEXT: psrlq $48, %xmm0
9185
; X64-NEXT: retq

llvm/test/CodeGen/X86/combine-bitreverse.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,31 +47,25 @@ define <4 x i32> @test_demandedbits_bitreverse(<4 x i32> %a0) nounwind {
4747
; X86-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
4848
; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4949
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
50-
; X86-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
51-
; X86-NEXT: packuswb %xmm2, %xmm1
52-
; X86-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
53-
; X86-NEXT: movdqa %xmm1, %xmm2
54-
; X86-NEXT: pand %xmm0, %xmm2
55-
; X86-NEXT: psllw $4, %xmm2
50+
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
51+
; X86-NEXT: packuswb %xmm2, %xmm0
52+
; X86-NEXT: movdqa %xmm0, %xmm1
53+
; X86-NEXT: psllw $4, %xmm1
5654
; X86-NEXT: pand {{\.LCPI.*}}, %xmm1
57-
; X86-NEXT: psrlw $4, %xmm1
58-
; X86-NEXT: pand %xmm0, %xmm1
59-
; X86-NEXT: pandn %xmm2, %xmm0
55+
; X86-NEXT: psrlw $4, %xmm0
56+
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
6057
; X86-NEXT: por %xmm1, %xmm0
6158
; X86-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6259
; X86-NEXT: pand %xmm0, %xmm1
6360
; X86-NEXT: psllw $2, %xmm1
64-
; X86-NEXT: pand {{\.LCPI.*}}, %xmm1
6561
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
6662
; X86-NEXT: psrlw $2, %xmm0
67-
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
6863
; X86-NEXT: por %xmm1, %xmm0
6964
; X86-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
7065
; X86-NEXT: pand %xmm0, %xmm1
7166
; X86-NEXT: paddb %xmm1, %xmm1
7267
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
7368
; X86-NEXT: psrlw $1, %xmm0
74-
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
7569
; X86-NEXT: por %xmm1, %xmm0
7670
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
7771
; X86-NEXT: retl

llvm/test/CodeGen/X86/combine-shl.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -268,16 +268,11 @@ define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) {
268268
; SSE2-LABEL: combine_vec_shl_ext_shl1:
269269
; SSE2: # %bb.0:
270270
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
271-
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
272271
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
273-
; SSE2-NEXT: psrad $16, %xmm1
274-
; SSE2-NEXT: movdqa %xmm1, %xmm2
275-
; SSE2-NEXT: pslld $29, %xmm2
276-
; SSE2-NEXT: pslld $28, %xmm1
277-
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
278272
; SSE2-NEXT: pslld $30, %xmm0
279-
; SSE2-NEXT: xorpd %xmm2, %xmm2
280-
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
273+
; SSE2-NEXT: xorpd %xmm1, %xmm1
274+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
275+
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
281276
; SSE2-NEXT: retq
282277
;
283278
; SSE41-LABEL: combine_vec_shl_ext_shl1:

0 commit comments

Comments
 (0)