Skip to content

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Aug 22, 2025

If the srl+shl have the same shift amount and the shl has the nuw flag, we can remove both.

In the affected test, the InterleavedAccess pass will emit a udiv after the mul nuw. We expect them to combine away. The remaining shifts on the RV64 tests are because we didn't add the zeroext attribute to the incoming evl operand.

…+shl combine.

If the srl+shl have the same shift amount and the shl has the nuw
flag, we can remove both.
@llvmbot
Copy link
Member

llvmbot commented Aug 22, 2025

@llvm/pr-subscribers-llvm-selectiondag

Author: Craig Topper (topperc)

Changes

If the srl+shl have the same shift amount and the shl has the nuw flag, we can remove both.


Full diff: https://github.com/llvm/llvm-project/pull/155043.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+9-1)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll (+44-65)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 15d7e7626942d..de9bcfd410440 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
-      return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
+      SDNodeFlags Flags;
+      Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
+      // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
+      return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
     }
   }
 
@@ -11094,6 +11097,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N0.getOpcode() == ISD::SHL &&
       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+    // If the shift amounts are the same and the shl doesn't shift out any
+    // non-zero bits, we can return the shl input.
+    if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
+      return N0.getOperand(0);
+
     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
                                            ConstantSDNode *RHS) {
       const APInt &LHSC = LHS->getAPIntValue();
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 2afb72fc71b39..13a836e8a7552 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -5,16 +5,14 @@
 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg2e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -142,16 +140,14 @@ merge:
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor4_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg4e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor4_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg4e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -237,16 +233,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor8_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor8_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 3
-; RV32-NEXT:    srli a1, a1, 3
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg8e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor8_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 35
-; RV64-NEXT:    srli a1, a1, 35
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg8e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -276,16 +270,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vsseg2e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: store_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -384,8 +376,6 @@ define void @store_factor7_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <v
 define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor8_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 3
-; RV32-NEXT:    srli a1, a1, 3
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vmv1r.v v10, v8
 ; RV32-NEXT:    vmv1r.v v11, v9
@@ -398,8 +388,8 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 ;
 ; RV64-LABEL: store_factor8_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 35
-; RV64-NEXT:    srli a1, a1, 35
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vmv1r.v v10, v8
 ; RV64-NEXT:    vmv1r.v v11, v9
@@ -418,16 +408,14 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_load_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_load_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
@@ -445,16 +433,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor4_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_load_factor4_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg4e32.v v8, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_load_factor4_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg4e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
@@ -477,20 +463,17 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_store_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT:    vmv1r.v v9, v8
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vmv1r.v v9, v8
 ; RV32-NEXT:    vsseg2e32.v v8, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_store_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
 ; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; RV64-NEXT:    vmv1r.v v9, v8
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
@@ -504,8 +487,6 @@ define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
 define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_load_store_factor2_v2_shared_mask:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV32-NEXT:    vsseg2e32.v v8, (a0), v0.t
@@ -513,8 +494,8 @@ define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, p
 ;
 ; RV64-LABEL: masked_load_store_factor2_v2_shared_mask:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    vsseg2e32.v v8, (a0), v0.t
@@ -537,37 +518,36 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
 ; RV32-NEXT:    vmv1r.v v8, v0
 ; RV32-NEXT:    slli a2, a1, 1
 ; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    li a1, -1
+; RV32-NEXT:    li a3, -1
 ; RV32-NEXT:    vmerge.vim v10, v9, 1, v0
 ; RV32-NEXT:    vwaddu.vv v11, v10, v10
-; RV32-NEXT:    vwmaccu.vx v11, a1, v10
-; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    vwmaccu.vx v11, a3, v10
+; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
 ; RV32-NEXT:    vmv.v.i v10, 0
-; RV32-NEXT:    srli a1, a1, 2
-; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT:    srli a3, a3, 2
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmsne.vi v0, v11, 0
-; RV32-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vslidedown.vx v11, v11, a1
+; RV32-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vslidedown.vx v11, v11, a3
 ; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
 ; RV32-NEXT:    vmerge.vim v10, v10, 1, v0
-; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmsne.vi v0, v11, 0
 ; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
 ; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
-; RV32-NEXT:    vslideup.vx v10, v9, a1
+; RV32-NEXT:    vslideup.vx v10, v9, a3
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vle32.v v10, (a0), v0.t
-; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wx v13, v10, a1
-; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    vnsrl.wx v13, v10, a2
 ; RV32-NEXT:    vnsrl.wi v12, v10, 0
-; RV32-NEXT:    srli a2, a2, 1
+; RV32-NEXT:    vmv.x.s a2, v10
 ; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vsseg2e32.v v12, (a0), v0.t
-; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    mv a0, a2
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
@@ -590,20 +570,21 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
 ; RV64-NEXT:    vmerge.vim v10, v10, 1, v0
 ; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vmsne.vi v0, v11, 0
-; RV64-NEXT:    slli a3, a1, 33
 ; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vslideup.vx v10, v9, a2
+; RV64-NEXT:    slli a2, a1, 33
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    srli a1, a3, 32
-; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    srli a2, a2, 32
+; RV64-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; RV64-NEXT:    vle32.v v10, (a0), v0.t
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wx v13, v10, a1
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v13, v10, a2
 ; RV64-NEXT:    vmv.x.s a1, v10
 ; RV64-NEXT:    vnsrl.wi v12, v10, 0
-; RV64-NEXT:    srli a3, a3, 33
+; RV64-NEXT:    srli a3, a3, 32
 ; RV64-NEXT:    vmv1r.v v0, v8
 ; RV64-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v12, (a0), v0.t
@@ -624,8 +605,6 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
 define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_store_factor4_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vmv1r.v v10, v8
 ; RV32-NEXT:    vmv1r.v v11, v9
@@ -634,8 +613,8 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
 ;
 ; RV64-LABEL: masked_store_factor4_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vmv1r.v v10, v8
 ; RV64-NEXT:    vmv1r.v v11, v9

@llvmbot
Copy link
Member

llvmbot commented Aug 22, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

Changes

If the srl+shl have the same shift amount and the shl has the nuw flag, we can remove both.


Full diff: https://github.com/llvm/llvm-project/pull/155043.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+9-1)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll (+44-65)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 15d7e7626942d..de9bcfd410440 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
-      return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
+      SDNodeFlags Flags;
+      Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
+      // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
+      return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
     }
   }
 
@@ -11094,6 +11097,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N0.getOpcode() == ISD::SHL &&
       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+    // If the shift amounts are the same and the shl doesn't shift out any
+    // non-zero bits, we can return the shl input.
+    if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
+      return N0.getOperand(0);
+
     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
                                            ConstantSDNode *RHS) {
       const APInt &LHSC = LHS->getAPIntValue();
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 2afb72fc71b39..13a836e8a7552 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -5,16 +5,14 @@
 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg2e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -142,16 +140,14 @@ merge:
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor4_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg4e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor4_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg4e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -237,16 +233,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor8_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor8_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 3
-; RV32-NEXT:    srli a1, a1, 3
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg8e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor8_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 35
-; RV64-NEXT:    srli a1, a1, 35
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg8e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -276,16 +270,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vsseg2e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: store_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v8, (a0)
 ; RV64-NEXT:    ret
@@ -384,8 +376,6 @@ define void @store_factor7_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <v
 define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor8_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 3
-; RV32-NEXT:    srli a1, a1, 3
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vmv1r.v v10, v8
 ; RV32-NEXT:    vmv1r.v v11, v9
@@ -398,8 +388,8 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 ;
 ; RV64-LABEL: store_factor8_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 35
-; RV64-NEXT:    srli a1, a1, 35
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vmv1r.v v10, v8
 ; RV64-NEXT:    vmv1r.v v11, v9
@@ -418,16 +408,14 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_load_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_load_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
@@ -445,16 +433,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor4_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_load_factor4_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg4e32.v v8, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_load_factor4_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg4e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
@@ -477,20 +463,17 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_store_factor2_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT:    vmv1r.v v9, v8
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vmv1r.v v9, v8
 ; RV32-NEXT:    vsseg2e32.v v8, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_store_factor2_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
 ; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; RV64-NEXT:    vmv1r.v v9, v8
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
@@ -504,8 +487,6 @@ define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
 define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_load_store_factor2_v2_shared_mask:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 1
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV32-NEXT:    vsseg2e32.v v8, (a0), v0.t
@@ -513,8 +494,8 @@ define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, p
 ;
 ; RV64-LABEL: masked_load_store_factor2_v2_shared_mask:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    vsseg2e32.v v8, (a0), v0.t
@@ -537,37 +518,36 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
 ; RV32-NEXT:    vmv1r.v v8, v0
 ; RV32-NEXT:    slli a2, a1, 1
 ; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    li a1, -1
+; RV32-NEXT:    li a3, -1
 ; RV32-NEXT:    vmerge.vim v10, v9, 1, v0
 ; RV32-NEXT:    vwaddu.vv v11, v10, v10
-; RV32-NEXT:    vwmaccu.vx v11, a1, v10
-; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    vwmaccu.vx v11, a3, v10
+; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
 ; RV32-NEXT:    vmv.v.i v10, 0
-; RV32-NEXT:    srli a1, a1, 2
-; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT:    srli a3, a3, 2
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmsne.vi v0, v11, 0
-; RV32-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vslidedown.vx v11, v11, a1
+; RV32-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vslidedown.vx v11, v11, a3
 ; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
 ; RV32-NEXT:    vmerge.vim v10, v10, 1, v0
-; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmsne.vi v0, v11, 0
 ; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
 ; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
-; RV32-NEXT:    vslideup.vx v10, v9, a1
+; RV32-NEXT:    vslideup.vx v10, v9, a3
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vle32.v v10, (a0), v0.t
-; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wx v13, v10, a1
-; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    vnsrl.wx v13, v10, a2
 ; RV32-NEXT:    vnsrl.wi v12, v10, 0
-; RV32-NEXT:    srli a2, a2, 1
+; RV32-NEXT:    vmv.x.s a2, v10
 ; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vsseg2e32.v v12, (a0), v0.t
-; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    mv a0, a2
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
@@ -590,20 +570,21 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
 ; RV64-NEXT:    vmerge.vim v10, v10, 1, v0
 ; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vmsne.vi v0, v11, 0
-; RV64-NEXT:    slli a3, a1, 33
 ; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vslideup.vx v10, v9, a2
+; RV64-NEXT:    slli a2, a1, 33
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    srli a1, a3, 32
-; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    srli a2, a2, 32
+; RV64-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; RV64-NEXT:    vle32.v v10, (a0), v0.t
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wx v13, v10, a1
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v13, v10, a2
 ; RV64-NEXT:    vmv.x.s a1, v10
 ; RV64-NEXT:    vnsrl.wi v12, v10, 0
-; RV64-NEXT:    srli a3, a3, 33
+; RV64-NEXT:    srli a3, a3, 32
 ; RV64-NEXT:    vmv1r.v v0, v8
 ; RV64-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v12, (a0), v0.t
@@ -624,8 +605,6 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
 define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: masked_store_factor4_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vmv1r.v v10, v8
 ; RV32-NEXT:    vmv1r.v v11, v9
@@ -634,8 +613,8 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
 ;
 ; RV64-LABEL: masked_store_factor4_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vmv1r.v v10, v8
 ; RV64-NEXT:    vmv1r.v v11, v9

Comment on lines 11100 to 11103
// If the shift amounts are the same and the shl doesn't shift out any
// non-zero bits, we can return the shl input.
if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
return N0.getOperand(0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're dropping the whole operation, should this be hoisted above the above profitability checks?

Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@topperc topperc merged commit 5628964 into llvm:main Aug 26, 2025
9 checks passed
@topperc topperc deleted the pr/nuw branch August 26, 2025 03:44
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:RISC-V llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants