Skip to content

Commit 61aa940

Browse files
committed
[RISCV] Introduce codegen patterns for RV64M-only instructions
As discussed on llvm-dev <http://lists.llvm.org/pipermail/llvm-dev/2018-December/128497.html>, we have to be careful when trying to select the *w RV64M instructions. i32 is not a legal type for RV64 in the RISC-V backend, so operations have been promoted by the time they reach instruction selection. Information about whether the operation was originally a 32-bit operations has been lost, and it's easy to write incorrect patterns. Similarly to the variable 32-bit shifts, a DAG combine on ANY_EXTEND will produce a SIGN_EXTEND if this is likely to result in sdiv/udiv/urem being selected (and so save instructions to sext/zext the input operands). Differential Revision: https://reviews.llvm.org/D53230 llvm-svn: 350993
1 parent d05eae7 commit 61aa940

File tree

6 files changed

+1781
-9
lines changed

6 files changed

+1781
-9
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,20 @@ static bool isVariableShift(SDValue Val) {
525525
}
526526
}
527527

528+
// Returns true if the given node is an sdiv, udiv, or urem with non-constant
529+
// operands.
530+
static bool isVariableSDivUDivURem(SDValue Val) {
531+
switch (Val.getOpcode()) {
532+
default:
533+
return false;
534+
case ISD::SDIV:
535+
case ISD::UDIV:
536+
case ISD::UREM:
537+
return Val.getOperand(0).getOpcode() != ISD::Constant &&
538+
Val.getOperand(1).getOpcode() != ISD::Constant;
539+
}
540+
}
541+
528542
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
529543
DAGCombinerInfo &DCI) const {
530544
SelectionDAG &DAG = DCI.DAG;
@@ -552,12 +566,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
552566
N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
553567
}
554568
case ISD::ANY_EXTEND: {
555-
// If any-extending an i32 variable-length shift to i64, then instead
556-
// sign-extend in order to increase the chance of being able to select the
557-
// sllw/srlw/sraw instruction.
569+
// If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
570+
// then instead sign-extend in order to increase the chance of being able
571+
// to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
558572
SDValue Src = N->getOperand(0);
559-
if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32 ||
560-
!isVariableShift(Src))
573+
if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
574+
break;
575+
if (!isVariableShift(Src) &&
576+
!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
561577
break;
562578
SDLoc DL(N);
563579
return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));

llvm/lib/Target/RISCV/RISCVInstrInfoM.td

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,34 @@ def : PatGprGpr<udiv, DIVU>;
4949
def : PatGprGpr<srem, REM>;
5050
def : PatGprGpr<urem, REMU>;
5151
} // Predicates = [HasStdExtM]
52+
53+
let Predicates = [HasStdExtM, IsRV64] in {
54+
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
55+
(MULW GPR:$rs1, GPR:$rs2)>;
56+
def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
57+
(sexti32 GPR:$rs2)), i32),
58+
(DIVW GPR:$rs1, GPR:$rs2)>;
59+
def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
60+
(sexti32 GPR:$rs2))),
61+
(SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
62+
def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
63+
(DIVUW GPR:$rs1, GPR:$rs2)>;
64+
// It's cheaper to perform a divuw and zero-extend the result than to
65+
// zero-extend both inputs to a udiv.
66+
def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
67+
(SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
68+
// Although the sexti32 operands may not have originated from an i32 srem,
69+
// this pattern is safe as it is impossible for two sign extended inputs to
70+
// produce a result where res[63:32]=0 and res[31]=1.
71+
def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
72+
(REMW GPR:$rs1, GPR:$rs2)>;
73+
def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
74+
(sexti32 GPR:$rs2)), i32),
75+
(REMW GPR:$rs1, GPR:$rs2)>;
76+
def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
77+
(REMUW GPR:$rs1, GPR:$rs2)>;
78+
// It's cheaper to perform a remuw and zero-extend the result than to
79+
// zero-extend both inputs to a urem.
80+
def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
81+
(SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
82+
} // Predicates = [HasStdExtM, IsRV64]

0 commit comments

Comments
 (0)