Skip to content

Commit cfca67c

Browse files
committed
If we have MI or PL and a sub, we can just do that instead of a CMP
Remove redundant fold regarding CSEL thanks to this too.
1 parent 9274200 commit cfca67c

File tree

2 files changed

+55
-26
lines changed

2 files changed

+55
-26
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3586,7 +3586,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
35863586
}
35873587

35883588
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3589-
const SDLoc &DL, SelectionDAG &DAG) {
3589+
const SDLoc &DL, SelectionDAG &DAG,
3590+
bool optimizeMIOrPL = false) {
35903591
EVT VT = LHS.getValueType();
35913592
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
35923593

@@ -3630,6 +3631,44 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
36303631
// Use result of ANDS
36313632
return LHS.getValue(1);
36323633
}
3634+
3635+
if (LHS.getOpcode() == ISD::SUB) {
3636+
if (LHS->getFlags().hasNoSignedWrap() ||
3637+
((CC == ISD::SETLT || CC == ISD::SETGE) && optimizeMIOrPL) ||
3638+
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
3639+
const SDValue SUBSNode =
3640+
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
3641+
LHS.getOperand(0), LHS.getOperand(1));
3642+
// Replace all users of (and X, Y) with newly generated (ands X, Y)
3643+
DAG.ReplaceAllUsesWith(LHS, SUBSNode);
3644+
return SUBSNode.getValue(1);
3645+
}
3646+
} else if (LHS.getOpcode() == AArch64ISD::SUBS) {
3647+
if (LHS->getFlags().hasNoSignedWrap() ||
3648+
((CC == ISD::SETLT || CC == ISD::SETGE) && optimizeMIOrPL) ||
3649+
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
3650+
return LHS.getValue(1);
3651+
}
3652+
}
3653+
3654+
if (LHS.getOpcode() == ISD::ADD) {
3655+
if (LHS->getFlags().hasNoSignedWrap() ||
3656+
((CC == ISD::SETLT || CC == ISD::SETGE) && optimizeMIOrPL) ||
3657+
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
3658+
const SDValue ADDSNode =
3659+
DAG.getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT),
3660+
LHS.getOperand(0), LHS.getOperand(1));
3661+
// Replace all users of (and X, Y) with newly generated (ands X, Y)
3662+
DAG.ReplaceAllUsesWith(LHS, ADDSNode);
3663+
return ADDSNode.getValue(1);
3664+
}
3665+
} else if (LHS.getOpcode() == AArch64ISD::ADDS) {
3666+
if (LHS->getFlags().hasNoSignedWrap() ||
3667+
((CC == ISD::SETLT || CC == ISD::SETGE) && optimizeMIOrPL) ||
3668+
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
3669+
return LHS.getValue(1);
3670+
}
3671+
}
36333672
}
36343673

36353674
return DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS)
@@ -3843,7 +3882,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
38433882

38443883
// Produce a normal comparison if we are first in the chain
38453884
if (!CCOp)
3846-
return emitComparison(LHS, RHS, CC, DL, DAG);
3885+
return emitComparison(LHS, RHS, CC, DL, DAG, isInteger);
38473886
// Otherwise produce a ccmp.
38483887
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
38493888
DAG);
@@ -4125,7 +4164,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
41254164
}
41264165

41274166
if (!Cmp) {
4128-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
4167+
Cmp = emitComparison(LHS, RHS, CC, DL, DAG, true);
41294168
AArch64CC = changeIntCCToAArch64CC(CC, RHS);
41304169
}
41314170
AArch64cc = getCondCode(DAG, AArch64CC);
@@ -25501,29 +25540,6 @@ static SDValue performCSELCombine(SDNode *N,
2550125540
}
2550225541
}
2550325542

25504-
// CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't
25505-
// use overflow flags, to avoid the comparison with zero. In case of success,
25506-
// this also replaces the original SUB(x,y) with the newly created SUBS(x,y).
25507-
// NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB
25508-
// nodes with their SUBS equivalent as is already done for other flag-setting
25509-
// operators, in which case doing the replacement here becomes redundant.
25510-
if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) &&
25511-
isNullConstant(Cond.getOperand(1))) {
25512-
SDValue Sub = Cond.getOperand(0);
25513-
AArch64CC::CondCode CC =
25514-
static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
25515-
if (Sub.getOpcode() == ISD::SUB &&
25516-
(CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
25517-
CC == AArch64CC::PL)) {
25518-
SDLoc DL(N);
25519-
SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
25520-
Sub.getOperand(0), Sub.getOperand(1));
25521-
DCI.CombineTo(Sub.getNode(), Subs);
25522-
DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1));
25523-
return SDValue(N, 0);
25524-
}
25525-
}
25526-
2552725543
// CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z
2552825544
if (SDValue CondLast = foldCSELofLASTB(N, DAG))
2552925545
return CondLast;

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4808,6 +4808,19 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
48084808
CompareType = ARMISD::CMPZ;
48094809
break;
48104810
}
4811+
4812+
// If we have MI or PL and a sub, we can just do that instead of a CMP.
4813+
if (CondCode == ARMCC::MI || CondCode == ARMCC::PL || CondCode == ARMCC::EQ ||
4814+
CondCode == ARMCC::NE ||
4815+
(LHS->getFlags().hasNoSignedWrap() &&
4816+
(CondCode == ARMCC::LT || CondCode == ARMCC::GE ||
4817+
CondCode == ARMCC::LE || CondCode == ARMCC::GT))) {
4818+
if (LHS.getOpcode() == ISD::SUB) {
4819+
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4820+
return DAG.getNode(CompareType, dl, FlagsVT, LHS.getOperand(0),
4821+
LHS.getOperand(1));
4822+
}
4823+
}
48114824
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
48124825
return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
48134826
}

0 commit comments

Comments
 (0)