-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. #156769
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis is similar to the recently added (X & -4096) == 0 -> (X >> 12) == 0, This also removes the (X & (1 << 31)) == 0 -> (xor (srliw X, 31), 1) Full diff: https://github.com/llvm/llvm-project/pull/156769.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8c3279517527e..b012c172e37d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16726,10 +16726,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
DAG.getConstant(0, DL, XLenVT), CC);
}
-// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
-// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
-// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
-// can become a sext.w instead of a shift pair.
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
@@ -16749,20 +16745,36 @@ static SDValue performSETCCCombine(SDNode *N,
combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
return V;
- // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC =
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
unsigned ShiftBits = AndRHSC.countr_zero();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl, VT));
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, OpVT));
+ return DAG.getSetCC(dl, VT, Shift, N1, Cond);
+ }
+
+ // Similar to above but handling the lower 32 bits by using srliw.
+ // FIXME: Handle the case where N1 is non-zero.
+ if (OpVT == MVT::i64 && AndRHSC.getZExtValue() <= 0xffffffff &&
+ isPowerOf2_32(-uint32_t(AndRHSC.getZExtValue()))) {
+ unsigned ShiftBits = llvm::countr_zero(AndRHSC.getZExtValue());
+ SDValue And = DAG.getNode(ISD::AND, dl, OpVT, N0.getOperand(0),
+ DAG.getConstant(0xffffffff, dl, OpVT));
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, And,
+ DAG.getConstant(ShiftBits, dl, OpVT));
return DAG.getSetCC(dl, VT, Shift, N1, Cond);
}
}
+ // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
+ // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
+ // bit 31. Same for setne. C1' may be cheaper to materialize and the
+ // sext_inreg can become a sext.w instead of a shift pair.
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 23f5a848137c4..4b6870554ab60 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1698,8 +1698,6 @@ let Predicates = [IsRV32] in {
def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
}
let Predicates = [IsRV64] in {
-def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
- (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>;
}
diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
index b16672d3c4a16..5476d49fca4a1 100644
--- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
@@ -64,3 +64,37 @@ define i1 @test4(i64 %x) {
%b = icmp eq i64 %a, 0
ret i1 %b
}
+
+define i1 @test5(i64 %x) {
+; RV32-LABEL: test5:
+; RV32: # %bb.0:
+; RV32-NEXT: srli a0, a0, 29
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test5:
+; RV64: # %bb.0:
+; RV64-NEXT: srliw a0, a0, 29
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: ret
+ %a = and i64 %x, 3758096384
+ %b = icmp eq i64 %a, 0
+ ret i1 %b
+}
+
+define i1 @test6(i64 %x) {
+; RV32-LABEL: test6:
+; RV32: # %bb.0:
+; RV32-NEXT: srli a0, a0, 29
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test6:
+; RV64: # %bb.0:
+; RV64-NEXT: srliw a0, a0, 29
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: ret
+ %a = and i64 %x, 3758096384
+ %b = icmp ne i64 %a, 0
+ ret i1 %b
+}
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index 40e3168d08b27..5d40d8f2d860d 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -188,13 +188,13 @@ define i64 @bittest_31_i64(i64 %a) nounwind {
; RV64ZBS-LABEL: bittest_31_i64:
; RV64ZBS: # %bb.0:
; RV64ZBS-NEXT: srliw a0, a0, 31
-; RV64ZBS-NEXT: xori a0, a0, 1
+; RV64ZBS-NEXT: seqz a0, a0
; RV64ZBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_31_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: srliw a0, a0, 31
-; RV64XTHEADBS-NEXT: xori a0, a0, 1
+; RV64XTHEADBS-NEXT: seqz a0, a0
; RV64XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 31
%not = xor i64 %shr, -1
@@ -3518,7 +3518,7 @@ define i32 @bittest_31_andeq0_i64(i64 %x) {
; RV64-LABEL: bittest_31_andeq0_i64:
; RV64: # %bb.0:
; RV64-NEXT: srliw a0, a0, 31
-; RV64-NEXT: xori a0, a0, 1
+; RV64-NEXT: seqz a0, a0
; RV64-NEXT: ret
%and = and i64 %x, 2147483648
%cmp = icmp eq i64 %and, 0
|
This is similar to the recently added (X & -4096) == 0 -> (X >> 12) == 0, but operating only on the lower 32 bits. This also removes the (X & (1 << 31)) == 0 -> (xor (srliw X, 31), 1) isel pattern. seqz and xori 1 should have similar cost and encoding size.
59f3972
to
b844a7b
Compare
|
||
// Similar to above but handling the lower 32 bits by using srliw. | ||
// FIXME: Handle the case where N1 is non-zero. | ||
if (OpVT == MVT::i64 && AndRHSC.getZExtValue() <= 0xffffffff && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To make sure I follow, this is handling the case where a 0..01111..0 mask (with the 0..0 being exactly the high 32 bits), by masking out the bottom 32 and then doing the same shift compare as above? And this happens to fold into a srlw because we only care about the zero-ness of the shift, so extending bit 31 is fine?
If so, can we generalize this by replacing the AND with a SHL to clear the high bits, and adjusting the SRL amount?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A: We already were generating the shift pair sequence, and this an optimization on that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And this happens to fold into a srlw because we only care about the zero-ness of the shift, so extending bit 31 is fine?
srliw is filling zeros to any bits in the result above the original bit 31. I'm not sure if that's what you mean by "extending bit 31"?. It would also be correct to use sraiw for this case which would duplicate the original bit 31.
If so, can we generalize this by replacing the AND with a SHL to clear the high bits, and adjusting the SRL amount?
Yes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A: We already were generating the shift pair sequence, and this an optimization on that.
We're generating a shift pair sequence for the 32 leading zero case, but I don't think we do for other leading zero amounts. Which I think is what you were suggesting?
A general 0..0111..0 AND mask requires 3 shifts to implement and we don't do that currently. If we know the user is a seteq/setne we can use 2 shifts because we can move the bits to the lsbs of the compare.
@@ -64,3 +64,37 @@ define i1 @test4(i64 %x) { | |||
%b = icmp eq i64 %a, 0 | |||
ret i1 %b | |||
} | |||
|
|||
define i1 @test5(i64 %x) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you precommit the tests so we can see prior codegen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you precommit the tests so we can see prior codegen?
They are in separate commits in the PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is similar to the recently added (X & -4096) == 0 -> (X >> 12) == 0,
but operating only on the lower 32 bits.
This also removes the (X & (1 << 31)) == 0 -> (xor (srliw X, 31), 1)
isel pattern. seqz and xori 1 should have similar cost and encoding
size.