[RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. #156769

topperc · 2025-09-03T22:47:15Z

This is similar to the recently added (X & -4096) == 0 -> (X >> 12) == 0,
but operating only on the lower 32 bits.

This also removes the (X & (1 << 31)) == 0 -> (xor (srliw X, 31), 1)
isel pattern. seqz and xori 1 should have similar cost and encoding
size.

llvmbot · 2025-09-03T22:47:47Z

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

Changes

This is similar to the recently added (X & -4096) == 0 -> (X >> 12) == 0,
but operating only on the lower 32 bits.

This also removes the (X & (1 << 31)) == 0 -> (xor (srliw X, 31), 1)
isel pattern. seqz and xori 1 should have similar cost and encoding
size.

Full diff: https://github.com/llvm/llvm-project/pull/156769.diff

4 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+19-7)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfo.td (-2)
(modified) llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll (+34)
(modified) llvm/test/CodeGen/RISCV/bittest.ll (+3-3)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8c3279517527e..b012c172e37d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16726,10 +16726,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
                       DAG.getConstant(0, DL, XLenVT), CC);
 }
 
-// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
-// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
-// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
-// can become a sext.w instead of a shift pair.
 static SDValue performSETCCCombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const RISCVSubtarget &Subtarget) {
@@ -16749,20 +16745,36 @@ static SDValue performSETCCCombine(SDNode *N,
           combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
     return V;
 
-  // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
   if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
       N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
       isa<ConstantSDNode>(N0.getOperand(1))) {
     const APInt &AndRHSC =
         cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
     if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
       unsigned ShiftBits = AndRHSC.countr_zero();
-      SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
-                                  DAG.getConstant(ShiftBits, dl, VT));
+      SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
+                                  DAG.getConstant(ShiftBits, dl, OpVT));
+      return DAG.getSetCC(dl, VT, Shift, N1, Cond);
+    }
+
+    // Similar to above but handling the lower 32 bits by using srliw.
+    // FIXME: Handle the case where N1 is non-zero.
+    if (OpVT == MVT::i64 && AndRHSC.getZExtValue() <= 0xffffffff &&
+        isPowerOf2_32(-uint32_t(AndRHSC.getZExtValue()))) {
+      unsigned ShiftBits = llvm::countr_zero(AndRHSC.getZExtValue());
+      SDValue And = DAG.getNode(ISD::AND, dl, OpVT, N0.getOperand(0),
+                                DAG.getConstant(0xffffffff, dl, OpVT));
+      SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, And,
+                                  DAG.getConstant(ShiftBits, dl, OpVT));
       return DAG.getSetCC(dl, VT, Shift, N1, Cond);
     }
   }
 
+  // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
+  // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
+  // bit 31. Same for setne. C1' may be cheaper to materialize and the
+  // sext_inreg can become a sext.w instead of a shift pair.
   if (OpVT != MVT::i64 || !Subtarget.is64Bit())
     return SDValue();
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 23f5a848137c4..4b6870554ab60 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1698,8 +1698,6 @@ let Predicates = [IsRV32] in {
 def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
 }
 let Predicates = [IsRV64] in {
-def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
-          (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
 def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
 def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>;
 }
diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
index b16672d3c4a16..5476d49fca4a1 100644
--- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
@@ -64,3 +64,37 @@ define i1 @test4(i64 %x) {
   %b = icmp eq i64 %a, 0
   ret i1 %b
 }
+
+define i1 @test5(i64 %x) {
+; RV32-LABEL: test5:
+; RV32:       # %bb.0:
+; RV32-NEXT:    srli a0, a0, 29
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test5:
+; RV64:       # %bb.0:
+; RV64-NEXT:    srliw a0, a0, 29
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+  %a = and i64 %x, 3758096384
+  %b = icmp eq i64 %a, 0
+  ret i1 %b
+}
+
+define i1 @test6(i64 %x) {
+; RV32-LABEL: test6:
+; RV32:       # %bb.0:
+; RV32-NEXT:    srli a0, a0, 29
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test6:
+; RV64:       # %bb.0:
+; RV64-NEXT:    srliw a0, a0, 29
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    ret
+  %a = and i64 %x, 3758096384
+  %b = icmp ne i64 %a, 0
+  ret i1 %b
+}
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index 40e3168d08b27..5d40d8f2d860d 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -188,13 +188,13 @@ define i64 @bittest_31_i64(i64 %a) nounwind {
 ; RV64ZBS-LABEL: bittest_31_i64:
 ; RV64ZBS:       # %bb.0:
 ; RV64ZBS-NEXT:    srliw a0, a0, 31
-; RV64ZBS-NEXT:    xori a0, a0, 1
+; RV64ZBS-NEXT:    seqz a0, a0
 ; RV64ZBS-NEXT:    ret
 ;
 ; RV64XTHEADBS-LABEL: bittest_31_i64:
 ; RV64XTHEADBS:       # %bb.0:
 ; RV64XTHEADBS-NEXT:    srliw a0, a0, 31
-; RV64XTHEADBS-NEXT:    xori a0, a0, 1
+; RV64XTHEADBS-NEXT:    seqz a0, a0
 ; RV64XTHEADBS-NEXT:    ret
   %shr = lshr i64 %a, 31
   %not = xor i64 %shr, -1
@@ -3518,7 +3518,7 @@ define i32 @bittest_31_andeq0_i64(i64 %x) {
 ; RV64-LABEL: bittest_31_andeq0_i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    srliw a0, a0, 31
-; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    ret
   %and = and i64 %x, 2147483648
   %cmp = icmp eq i64 %and, 0

llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll

This is similar to the recently added (X & -4096) == 0 -> (X >> 12) == 0, but operating only on the lower 32 bits. This also removes the (X & (1 << 31)) == 0 -> (xor (srliw X, 31), 1) isel pattern. seqz and xori 1 should have similar cost and encoding size.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

preames · 2025-09-03T23:19:02Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+
+    // Similar to above but handling the lower 32 bits by using srliw.
+    // FIXME: Handle the case where N1 is non-zero.
+    if (OpVT == MVT::i64 && AndRHSC.getZExtValue() <= 0xffffffff &&


To make sure I follow, this is handling the case where a 0..01111..0 mask (with the 0..0 being exactly the high 32 bits), by masking out the bottom 32 and then doing the same shift compare as above? And this happens to fold into a srlw because we only care about the zero-ness of the shift, so extending bit 31 is fine?

If so, can we generalize this by replacing the AND with a SHL to clear the high bits, and adjusting the SRL amount?

A: We already were generating the shift pair sequence, and this an optimization on that.

And this happens to fold into a srlw because we only care about the zero-ness of the shift, so extending bit 31 is fine?

srliw is filling zeros to any bits in the result above the original bit 31. I'm not sure if that's what you mean by "extending bit 31"?. It would also be correct to use sraiw for this case which would duplicate the original bit 31.

If so, can we generalize this by replacing the AND with a SHL to clear the high bits, and adjusting the SRL amount?

Yes

A: We already were generating the shift pair sequence, and this an optimization on that.

We're generating a shift pair sequence for the 32 leading zero case, but I don't think we do for other leading zero amounts. Which I think is what you were suggesting?

A general 0..0111..0 AND mask requires 3 shifts to implement and we don't do that currently. If we know the user is a seteq/setne we can use 2 shifts because we can move the bits to the lsbs of the compare.

llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll

preames · 2025-09-03T23:20:55Z

llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll

@@ -64,3 +64,37 @@ define i1 @test4(i64 %x) {
  %b = icmp eq i64 %a, 0
  ret i1 %b
 }
+
+define i1 @test5(i64 %x) {


Can you precommit the tests so we can see prior codegen?

Can you precommit the tests so we can see prior codegen?

They are in separate commits in the PR.

pfusik

LGTM

topperc requested review from lenary and pfusik September 3, 2025 22:47

llvmbot added the backend:RISC-V label Sep 3, 2025

s-barannikov reviewed Sep 3, 2025

View reviewed changes

llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll Outdated Show resolved Hide resolved

topperc changed the title ~~[RISCV] Fold (X & (3 << 30)) == 0 -> (srliw X, 30) == 0 for RV64.~~ [RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. Sep 3, 2025

topperc added 2 commits September 3, 2025 16:18

Pre-commit tests

83f1b12

topperc force-pushed the pr/srliw-high-bits branch from 59f3972 to b844a7b Compare September 3, 2025 23:19

preames reviewed Sep 3, 2025

View reviewed changes

Improve comment

855383d

pfusik approved these changes Sep 4, 2025

View reviewed changes

topperc merged commit 97d4c7d into llvm:main Sep 4, 2025
9 checks passed

topperc deleted the pr/srliw-high-bits branch September 4, 2025 05:16

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. #156769

[RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. #156769

topperc commented Sep 3, 2025

Uh oh!

llvmbot commented Sep 3, 2025

Uh oh!

Uh oh!

Uh oh!

preames Sep 3, 2025

Uh oh!

preames Sep 3, 2025

Uh oh!

topperc Sep 3, 2025

Uh oh!

topperc Sep 4, 2025 •

edited

Loading

Uh oh!

Uh oh!

preames Sep 3, 2025

Uh oh!

topperc Sep 3, 2025

Uh oh!

pfusik left a comment

Uh oh!

Uh oh!

Uh oh!

[RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. #156769

[RISCV] Fold (X & (7 << 29)) == 0 -> (srliw X, 29) == 0 for RV64. #156769

Conversation

topperc commented Sep 3, 2025

Uh oh!

llvmbot commented Sep 3, 2025

Uh oh!

Uh oh!

Uh oh!

preames Sep 3, 2025

Choose a reason for hiding this comment

Uh oh!

preames Sep 3, 2025

Choose a reason for hiding this comment

Uh oh!

topperc Sep 3, 2025

Choose a reason for hiding this comment

Uh oh!

topperc Sep 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

preames Sep 3, 2025

Choose a reason for hiding this comment

Uh oh!

topperc Sep 3, 2025

Choose a reason for hiding this comment

Uh oh!

pfusik left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

topperc Sep 4, 2025 •

edited

Loading