-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV] Generate QC_INSB/QC_INSBI instructions from OR of AND Imm #154023
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Generate QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value being inserted only sets known zero bits
@llvm/pr-subscribers-backend-risc-v Author: Sudharsan Veeravalli (svs-quic) ChangesGenerate QC_INSB/QC_INSBI from Full diff: https://github.com/llvm/llvm-project/pull/154023.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9e1530a2d00f4..33648c0c11ebf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -720,6 +720,74 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
return true;
}
+// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
+// being inserted only sets known zero bits.
+bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromOrAndImm(SDNode *Node) {
+ // Supported only in Xqcibm for now.
+ if (!Subtarget->hasVendorXqcibm())
+ return false;
+
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ return false;
+
+ SDValue And = Node->getOperand(0);
+
+ if (And.getOpcode() != ISD::AND)
+ return false;
+
+ auto *N2C = dyn_cast<ConstantSDNode>(And->getOperand(1));
+ if (!And.hasOneUse() || !N2C)
+ return false;
+
+ int32_t OrImm = N1C->getSExtValue();
+
+ // Compute the Known Zero for the AND as this allows us to catch more general
+ // cases than just looking for AND with imm.
+ KnownBits Known = CurDAG->computeKnownBits(And);
+
+ // Non-zero in the sense that they're not provably zero, which is the key
+ // point if we want to use this value.
+ uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
+
+ // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
+ if (!isShiftedMask_32(Known.Zero.getZExtValue()))
+ return false;
+
+ // The bits being inserted must only set those bits that are known to be zero.
+ if ((OrImm & NotKnownZero) != 0) {
+ // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
+ // currently handle this case.
+ return false;
+ }
+
+ // QC_INSB(I) dst, src, #width, #shamt.
+ MVT VT = Node->getSimpleValueType(0);
+ unsigned BitWidth = VT.getSizeInBits();
+ const unsigned ShAmt = llvm::countr_one(NotKnownZero);
+ const unsigned Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
+
+ SDLoc DL(Node);
+ SDValue ImmNode;
+ auto Opc = RISCV::QC_INSB;
+
+ int32_t LIImm = OrImm >> ShAmt;
+
+ if (isInt<5>(LIImm)) {
+ Opc = RISCV::QC_INSBI;
+ ImmNode = CurDAG->getSignedTargetConstant(LIImm, DL, MVT::i32);
+ } else {
+ ImmNode = selectImm(CurDAG, DL, MVT::i32, LIImm, *Subtarget);
+ }
+
+ SDValue Ops[] = {And.getOperand(0), ImmNode,
+ CurDAG->getTargetConstant(Width, DL, VT),
+ CurDAG->getTargetConstant(ShAmt, DL, VT)};
+ SDNode *BitIns = CurDAG->getMachineNode(Opc, DL, VT, Ops);
+ ReplaceNode(Node, BitIns);
+ return true;
+}
+
bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
// Only supported with XAndesPerf at the moment.
if (!Subtarget->hasVendorXAndesPerf())
@@ -1384,6 +1452,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (trySignedBitfieldInsertInMask(Node))
return;
+ if (tryBitfieldInsertOpFromOrAndImm(Node))
+ return;
+
if (tryShrinkShlLogicImm(Node))
return;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 9d4cd0e6e3393..c329a4c6ec62e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -76,6 +76,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool trySignedBitfieldInsertInSign(SDNode *Node);
bool trySignedBitfieldInsertInMask(SDNode *Node);
bool tryBitfieldInsertOpFromXor(SDNode *Node);
+ bool tryBitfieldInsertOpFromOrAndImm(SDNode *Node);
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb, unsigned Lsb);
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT,
diff --git a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll
index 88054a691bad1..0039580bcd53f 100644
--- a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll
+++ b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll
@@ -139,3 +139,208 @@ define i32 @test_single_bit_set(i32 %a) nounwind {
%or = or i32 %a, 4096
ret i32 %or
}
+
+
+; Tests for INSB(I) generation from OR and AND
+
+define i32 @test1(i32 %a) {
+; RV32I-LABEL: test1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, -16
+; RV32I-NEXT: addi a0, a0, 5
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test1:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: qc.insbi a0, 5, 4, 0
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test1:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: qc.insbi a0, 5, 4, 0
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i32 %a, -16 ; 0xfffffff0
+ %2 = or i32 %1, 5 ; 0x00000005
+ ret i32 %2
+}
+
+define i32 @test2(i32 %a) {
+; RV32I-LABEL: test2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 1033216
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 10240
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test2:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: qc.insbi a0, 10, 4, 22
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test2:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: qc.insbi a0, 10, 4, 22
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i32 %a, -62914561 ; 0xfc3fffff
+ %2 = or i32 %1, 41943040 ; 0x06400000
+ ret i32 %2
+}
+
+define i64 @test3(i64 %a) {
+; RV32I-LABEL: test3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, -8
+; RV32I-NEXT: addi a0, a0, 5
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test3:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: qc.insbi a0, 5, 3, 0
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test3:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: qc.insbi a0, 5, 3, 0
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i64 %a, -8 ; 0xfffffffffffffff8
+ %2 = or i64 %1, 5 ; 0x0000000000000005
+ ret i64 %2
+}
+
+define i64 @test4(i64 %a) {
+; RV32I-LABEL: test4:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, -255
+; RV32I-NEXT: addi a0, a0, 18
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test4:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: qc.insbi a0, 9, 7, 1
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test4:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: qc.insbi a0, 9, 7, 1
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i64 %a, -255 ; 0xffffffffffffff01
+ %2 = or i64 %1, 18 ; 0x0000000000000012
+ ret i64 %2
+}
+
+define i32 @test5(i32 %a) {
+; RV32I-LABEL: test5:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, -16
+; RV32I-NEXT: addi a0, a0, 6
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test5:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: qc.insbi a0, 6, 4, 0
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test5:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: qc.insbi a0, 6, 4, 0
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i32 %a, 4294967280 ; 0xfffffff0
+ %2 = or i32 %1, 6 ; 0x00000006
+ ret i32 %2
+}
+
+define i32 @test6(i32 %a) {
+; RV32I-LABEL: test6:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 1048320
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 182
+; RV32I-NEXT: addi a1, a1, -1326
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test6:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: lui a1, 182
+; RV32IXQCIBM-NEXT: addi a1, a1, -1326
+; RV32IXQCIBM-NEXT: qc.insb a0, a1, 20, 0
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test6:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: lui a1, 182
+; RV32IXQCIBMZBS-NEXT: addi a1, a1, -1326
+; RV32IXQCIBMZBS-NEXT: qc.insb a0, a1, 20, 0
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i32 %a, 4293918720 ; 0xfff00000
+ %2 = or i32 %1, 744146 ; 0x000b5ad2
+ ret i32 %2
+}
+
+define i32 @test7(i32 %a) {
+; RV32I-LABEL: test7:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 1048320
+; RV32I-NEXT: addi a1, a1, 1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 182
+; RV32I-NEXT: addi a1, a1, -1326
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test7:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: lui a1, 91
+; RV32IXQCIBM-NEXT: addi a1, a1, -663
+; RV32IXQCIBM-NEXT: qc.insb a0, a1, 19, 1
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test7:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: lui a1, 91
+; RV32IXQCIBMZBS-NEXT: addi a1, a1, -663
+; RV32IXQCIBMZBS-NEXT: qc.insb a0, a1, 19, 1
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i32 %a, 4293918721 ; 0xfff00001
+ %2 = or i32 %1, 744146 ; 0x000b5ad2
+ ret i32 %2
+}
+
+define i64 @test8(i64 %a) {
+; RV32I-LABEL: test8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 1044480
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: lui a2, 496944
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: lui a2, 9
+; RV32I-NEXT: addi a2, a2, -170
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test8:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: lui a2, 1941
+; RV32IXQCIBM-NEXT: addi a2, a2, 768
+; RV32IXQCIBM-NEXT: qc.insb a0, a2, 24, 8
+; RV32IXQCIBM-NEXT: lui a2, 9
+; RV32IXQCIBM-NEXT: addi a2, a2, -170
+; RV32IXQCIBM-NEXT: qc.insb a1, a2, 24, 0
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test8:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: lui a2, 1941
+; RV32IXQCIBMZBS-NEXT: addi a2, a2, 768
+; RV32IXQCIBMZBS-NEXT: qc.insb a0, a2, 24, 8
+; RV32IXQCIBMZBS-NEXT: lui a2, 9
+; RV32IXQCIBMZBS-NEXT: addi a2, a2, -170
+; RV32IXQCIBMZBS-NEXT: qc.insb a1, a2, 24, 0
+; RV32IXQCIBMZBS-NEXT: ret
+ %1 = and i64 %a, -72057594037927681 ; 0xff000000000000ff
+ %2 = or i64 %1, 157601565442048 ; 0x00008f5679530000
+ ret i64 %2
+}
|
I think there is a bug in this, we're looking further internally. |
I dont think there is a bug. I think the hex value in a comment in the test case was incorrect. I have corrected that. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Sorry for the confusion about the bug, earlier. I should have trusted the comments less.
Generate QC_INSB/QC_INSBI from
or (and X, MaskImm), OrImm
iff the value being inserted only sets known zero bits. This is based on a similar DAG to DAG transform done inAArch64
.