[LV] Create in-loop sub reductions #147026

SamTebbs33 · 2025-07-04T09:24:12Z

This PR allows the loop vectorizer to handle in-loop sub reductions by forming a normal in-loop add reduction with a negated input.

Stacked PRs:

llvmbot · 2025-07-04T09:24:46Z

@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-vectorizers

Author: Sam Tebbs (SamTebbs33)

Changes

This PR allows the loop vectorizer to handle in-loop sub reductions by forming a normal in-loop add reduction with a negated input.

Full diff: https://github.com/llvm/llvm-project/pull/147026.diff

3 Files Affected:

(modified) llvm/lib/Analysis/IVDescriptors.cpp (+4)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+8)
(modified) llvm/test/Transforms/LoopVectorize/reduction-inloop.ll (+5-4)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b275b1064cef2..2b7d06e0c682e 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1263,6 +1263,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
     if (isFMulAddIntrinsic(Cur))
       return true;
 
+    // Recognize a sub reduction. It gets canonicalized to add(sub (0, ...)).
+    if (Cur->getOpcode() == Instruction::Sub && getOpcode() == Instruction::Add)
+      return true;
+
     return Cur->getOpcode() == getOpcode();
   };
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 907839711a39c..1cfbcf1336620 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9144,6 +9144,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
             CurrentLinkI->getFastMathFlags());
         LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
         VecOp = FMulRecipe;
+      } else if (PhiR->isInLoop() && Kind == RecurKind::Add &&
+                 CurrentLinkI->getOpcode() == Instruction::Sub) {
+        Type *PhiTy = PhiR->getUnderlyingValue()->getType();
+        auto *Zero = Plan->getOrAddLiveIn(ConstantInt::get(PhiTy, 0));
+        VPWidenRecipe *Sub = new VPWidenRecipe(
+            *CurrentLinkI, {Zero, CurrentLink->getOperand(1)});
+        LinkVPBB->insert(Sub, CurrentLink->getIterator());
+        VecOp = Sub;
       } else {
         if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
           if (isa<VPWidenRecipe>(CurrentLink)) {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index e762c9ff81322..4c6e0e487d0dd 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -627,7 +627,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-; Sub we can create a reduction, but not inloop
+; Sub we can create a reduction inloop
 define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sub_lhs(
 ; CHECK-NEXT:  entry:
@@ -636,15 +636,16 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP1]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]

llvmbot · 2025-07-04T09:24:47Z

@llvm/pr-subscribers-llvm-transforms

Author: Sam Tebbs (SamTebbs33)

Changes

This PR allows the loop vectorizer to handle in-loop sub reductions by forming a normal in-loop add reduction with a negated input.

Full diff: https://github.com/llvm/llvm-project/pull/147026.diff

3 Files Affected:

(modified) llvm/lib/Analysis/IVDescriptors.cpp (+4)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+8)
(modified) llvm/test/Transforms/LoopVectorize/reduction-inloop.ll (+5-4)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b275b1064cef2..2b7d06e0c682e 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1263,6 +1263,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
     if (isFMulAddIntrinsic(Cur))
       return true;
 
+    // Recognize a sub reduction. It gets canonicalized to add(sub (0, ...)).
+    if (Cur->getOpcode() == Instruction::Sub && getOpcode() == Instruction::Add)
+      return true;
+
     return Cur->getOpcode() == getOpcode();
   };
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 907839711a39c..1cfbcf1336620 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9144,6 +9144,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
             CurrentLinkI->getFastMathFlags());
         LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
         VecOp = FMulRecipe;
+      } else if (PhiR->isInLoop() && Kind == RecurKind::Add &&
+                 CurrentLinkI->getOpcode() == Instruction::Sub) {
+        Type *PhiTy = PhiR->getUnderlyingValue()->getType();
+        auto *Zero = Plan->getOrAddLiveIn(ConstantInt::get(PhiTy, 0));
+        VPWidenRecipe *Sub = new VPWidenRecipe(
+            *CurrentLinkI, {Zero, CurrentLink->getOperand(1)});
+        LinkVPBB->insert(Sub, CurrentLink->getIterator());
+        VecOp = Sub;
       } else {
         if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
           if (isa<VPWidenRecipe>(CurrentLink)) {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index e762c9ff81322..4c6e0e487d0dd 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -627,7 +627,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-; Sub we can create a reduction, but not inloop
+; Sub we can create a reduction inloop
 define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sub_lhs(
 ; CHECK-NEXT:  entry:
@@ -636,15 +636,16 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP1]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]

davemgreen

Hi - Could this add a sub into the middle of an extending reduction?

(It might be good to transform a sub reduction into a add reduction + negate at the end of the loop, to allow the more efficient sum reductions to trigger).

sdesmalen-arm

LGTM with nit addressed.

@davemgreen wrote:

Hi - Could this add a sub into the middle of an extending reduction?

(It might be good to transform a sub reduction into a add reduction + negate at the end of the loop, to allow the more efficient sum reductions to trigger).

That's not a bad idea, although that might be better left for a future PR. The reason for this change is to align the implementations of partial and in-loop reductions. I'm also not entirely sure if we necessarily want to do this for partial reductions, because some of the cdot patterns rely on the input being negated inside the loop.

sdesmalen-arm · 2025-07-14T16:53:28Z

llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]


I'm curious, where does the nsw come from?

I believe it's coming from CurrentLinkI in https://github.com/llvm/llvm-project/pull/147026/files#diff-da321d454a7246f8ae276bf1db2782bf26b5210b8133cb59e4d7fd45d0905decR9151 .

EDIT: I mis-read which sub this was, so I don't actually know where it's coming from!

Turns out I had the right hunch and it's been removed by my latest commits.

sdesmalen-arm · 2025-07-14T16:54:15Z

llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

@@ -627,7 +627,7 @@ for.end:                                          ; preds = %for.body, %entry
  ret float %result.0.lcssa
 }

-; Sub we can create a reduction, but not inloop
+; Sub we can create a reduction inloop


nit: We can create an in-loop reduction for sub-reductions by negating the input.

fhahn · 2025-07-14T20:33:24Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

@@ -9144,6 +9144,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
            CurrentLinkI->getFastMathFlags());
        LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
        VecOp = FMulRecipe;
+      } else if (PhiR->isInLoop() && Kind == RecurKind::Add &&


Is there a reason this cannot be classified as Sub reduction? Would be good to update the RecurrenceDescriptor to clarify that RecurKind::Add can be a sub-reduction.

Would be great if we could avoid having RecurKind meanings depend on whether it's in loop or not,

I don't think there is a reason so I'll experiment with adding RecurKind::Sub 👍 .

Done! Let me know what you think.

fhahn · 2025-07-14T20:37:11Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+        Type *PhiTy = PhiR->getUnderlyingValue()->getType();
+        auto *Zero = Plan->getOrAddLiveIn(ConstantInt::get(PhiTy, 0));
+        VPWidenRecipe *Sub = new VPWidenRecipe(
+            *CurrentLinkI, {Zero, CurrentLink->getOperand(1)});


I don't think this is correct to use the flags from the original sub, e.g. if the operand is INT_MIN it will sign wrap I think.

davemgreen · 2025-07-16T09:17:56Z

The first of these appears to hit the assert about differences in the cost model. https://godbolt.org/z/GraEdK6Er
The second I'm not sure is gaining anything from performing the sub reducion in-loop with an extra vneg. It is more instructions in the loop, that is probably slower. It doesn't try to create an extending reduction though in this case, so isn't getting that wrong.

fhahn · 2025-07-16T09:20:58Z

The first of these appears to hit the assert about differences in the cost model. https://godbolt.org/z/GraEdK6Er The second I'm not sure is gaining anything from performing the sub reducion in-loop with an extra vneg. It is more instructions in the loop, that is probably slower. It doesn't try to create an extending reduction though in this case, so isn't getting that wrong.

Yep, the new created instruction shouldn't take CurrentLinkI as underlying instruction, as it means we may add incorrect flags + introducing a cost difference

MacDue · 2025-07-17T15:01:19Z

llvm/lib/Analysis/IVDescriptors.cpp

+      // Normally the recur kind is expected to stay the same across all
+      // reduction instructions. Add and sub can appear in chained reductions so
+      // accept a sub if the recur kind is add, and vice versa.
+      if (Kind == RecurKind::Add && Cur->getOpcode() == Instruction::Sub)
+        Kind = RecurKind::Sub;
+      else if (Kind == RecurKind::Sub && Cur->getOpcode() == Instruction::Add)
+        Kind = RecurKind::Add;


I was a little confused by changing the recur kind here. I tried removing this change and updating isRecurrenceInstr() to:

case Instruction::Sub: return InstDesc(Kind == RecurKind::Sub, I); case Instruction::Add: return InstDesc(Kind == RecurKind::Sub || Kind == RecurKind::Add, I);

And it seems no LV test changed, but maybe I've missed something?

Forgot to mention, but this also requires updating isReductionPHI() to attempt to match Add reductions first:

if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT, SE)) { LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, FMF, RedDes, DB, AC, DT, SE)) { LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n"); return true; }

So if a reduction is all adds, it's matched as an Add reduction. If it's all subs (or a mix of add/sub) it's matched as a Sub reduction. Though maybe that's wrong? Is the intention that the reduction kind matches the last operation in the chain?

Nice, that's a cleaner solution, thanks.

MacDue · 2025-07-17T16:49:09Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+      } else if (PhiR->isInLoop() && Kind == RecurKind::Sub &&
+                 CurrentLinkI->getOpcode() == Instruction::Sub) {


This case is no-longer tested (removed it and all tests passed), and at does somewhat confuse me (maybe just needs a comment). I'd think you'd negate the input for a sub in an add reduction, not a sub in a sub reduction?

I'm guessing this is something to do with the input of the llvm.vector.reduce.add in the loop?

Ah yes that's right, it needs to be checking for an Add RecurKind.

MacDue · 2025-07-17T18:11:35Z

llvm/lib/Analysis/IVDescriptors.cpp

+    if ((Kind != RecurKind::Sub && !Cur->isCommutative()) && !IsAPhi &&
+        !isa<SelectInst>(Cur) && !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
        !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
      return false;


Can this restriction be lifted? I don't think the new vector code is correct for @reduction_sub_rhs:

Sub reduction on RHS:

s0 = x0 - 0 s1 = x1 - s0 s2 = x2 - s1 s3 = x3 - s2 = x3 - x2 + x1 - x0 + 0 != vec_reduce_add((x0,x1,x2,x3) - (0, 0, 0, 0)) = x0 + x1 + x2 +x3

Sub reduction on LHS:

s0 = 0 - x0 s1 = s0 - x1 s2 = s1 - x2 s3 = s2 - x3 = 0 - x0 - x1 - x2 - x3 = vec_reduce_add((0, 0, 0, 0) - (x0,x1,x2,x3))

I had a look at the output IR for that test and it seemed equivalent but tracing the loop itself reveals that you're right and it's not the same. I've added this restriction back now, thanks!

fhahn · 2025-07-17T18:18:39Z

llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

@@ -644,14 +644,14 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])


Here we are now missing the negating the operand in the vector loop?

I've traced how the scalar and vector loops act and they are equivalent as they are, but I think I should have removed the change I made to the comment since it doesn't create an in-loop reduction.

Data = [1, 2, 3, 4, 5, 6, 7, 8] Scalar loop: Iteration | Operation | New Phi 1 | 0 - 1 | -1 2 | -1 - 2 | -3 3 | -3 - 3 | -6 4 | -6 - 4 | -10 5 | -10 - 5 | -15 6 | -15 - 6 | -21 7 | -21 - 7 | -28 8 | -28 - 8 | -36 Vector loop: Iteration | Operation | New Phi 1 | [0, 0, 0, 0] - [1, 2, 3, 4] | [-1, -2, -3, -4] 2 | [-1, -2, -3, -4] - [5, 6, 7, 8] | [-6, -8, -10, -12] reduce.add([-6, -8, -10, -12]) = -36

Fixed, thanks.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

sdesmalen-arm · 2025-07-21T16:06:35Z

llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

@@ -627,7 +627,7 @@ for.end:                                          ; preds = %for.body, %entry
  ret float %result.0.lcssa
 }

-; Sub we can create a reduction, but not inloop
+; We can create an in-loop reduction for sub-reductions by negating the input


Can you add a test with an interleave factor higher than 1?

I just tried building this test with -mllvm -prefer-inloop-reductions -mllvm -force-vector-interleave=2:

int foo(int *src, int start, int n) { for(int i=0; i<n; ++i) start -= src[i]; return start; }

and this resulted in a segmentation fault.

Fixed, thank you. And I've added an interleaving run line to the inloop reduction test to make sure nothing breaks the other tests.

github-actions · 2025-07-22T15:31:39Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/Transforms/SLPVectorizer/reductions.ll llvm/include/llvm/Analysis/IVDescriptors.h llvm/lib/Analysis/IVDescriptors.cpp llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

The following files introduce new uses of undef:

llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

github-actions · 2025-07-22T15:31:39Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Analysis/IVDescriptors.cpp

sdesmalen-arm · 2025-07-30T13:09:34Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+    else {
+      // Sub reductions aren't commutative so the operands need to be swapped.
+      bool IsSub = Kind == RecurKind::Sub;
+      Value *LHS = IsSub ? PrevInChain : NewRed;


For commutative operations the order doesn't matter, so I think you can always swap the order of PrevInChain/NewRed, avoiding the need for IsSub, LHS and RHS. That is best done in a separate (NFCI) patch though, because various tests require updating.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

llvm/lib/Analysis/IVDescriptors.cpp

sdesmalen-arm · 2025-07-31T12:19:58Z

llvm/lib/Analysis/IVDescriptors.cpp

+    // Recognize a sub reduction. It gets canonicalized to add(sub (0, ...)).
+    if (Cur->getOpcode() == Instruction::Sub && getOpcode() == Instruction::Add)
+      return true;
+


When I now remove this code, all tests still pass. Is this code still needed?

It isn't, thanks.

#147026 will enable sub reductions, which require that the phi value is the first operand since they aren't commutative. This re-orders the operands when executing reductions, which actually matches other existing code in VPReductionRecipe::execute.

llvm/llvm-project#147026 will enable sub reductions, which require that the phi value is the first operand since they aren't commutative. This re-orders the operands when executing reductions, which actually matches other existing code in VPReductionRecipe::execute.

sdesmalen-arm · 2025-08-06T08:50:58Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -22344,6 +22344,7 @@ class HorizontalReduction {
      return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
                                 Name);
    }
+    case RecurKind::Sub:


I'm not so sure this is the right approach, because it suggests that Sub is somehow supported, which it isn't (yet).
My suggestion would be to do the following:

if a switch-statement has no default case, then we should add a llvm_unreachable to make it clear that RecurKind::Sub is unexpected and unsupported.

if the switch-statement has a default case with an llvm_unreachable to catch unimplemented RecurKind's, then strictly speaking no action is required, although a specific llvm_unreachable might still be helpful.

From looking at the SLPVectorizer code, it never tries to match a RecurKind::Sub so in other places it should never encounter one either, and so a llvm_unreachable would be fine.

I agree that it suggested sub is supported when it really isn't. Fixed now, thanks!

SamTebbs33 · 2025-08-07T16:48:11Z

I've pushed a commit that adds the AddChainWithSubs recur kind to make the fact that an add recurrence can have a subtraction less confusing.

sdesmalen-arm

Thanks for adding AddChainWithSubs, this avoids any possible confusing arising from new code not realising that sub might also be one of the operands in the chain, requiring negating the inputs.

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/lib/Analysis/IVDescriptors.cpp

This PR allows the loop vectorizer to handle in-loop sub reductions by forming a normal in-loop add reduction with a negated input. Stacked PRs: 1. -> llvm/llvm-project#147026 2. llvm/llvm-project#147255 3. llvm/llvm-project#147302 4. llvm/llvm-project#147513

llvm#147026 will enable sub reductions, which require that the phi value is the first operand since they aren't commutative. This re-orders the operands when executing reductions, which actually matches other existing code in VPReductionRecipe::execute.

We used to vectorize these scalably but after llvm#147026 they were split out from RecurKind::Add into their own RecurKinds, and we didn't mark them as supported in isLegalToVectorizeReduction. This caused the loop vectorizer to drop the scalable VPlan because it thinks the reductions will be scalarized. This fixes it by just marking them as supported. Fixes llvm#154554

lukel97 · 2025-08-21T13:02:37Z

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+  case RecurKind::Sub:
+  case RecurKind::AddChainWithSubs:


Just a heads up, this caused some regressions on RISC-V because sub reductions are now a different type. We have a fix up to update the TTI hook here #154753 but it might be worth looking at the other targets/X86 to see if they also need to have their isLegalToVectorizeReduction updated?

From what I can see only AArch64 and RISC-V targets have this target hook implemented.

Thanks for fixing that. Sander is right about it only being implemented for AArch64 and RISC-V. It just returns a blanket true by default.

We used to vectorize these scalably but after #147026 they were split out from RecurKind::Add into their own RecurKinds, and we didn't mark them as supported in isLegalToVectorizeReduction. This caused the loop vectorizer to drop the scalable VPlan because it thinks the reductions will be scalarized. This fixes it by just marking them as supported. Fixes #154554

SamTebbs33 requested review from fhahn, huntergr-arm, sdesmalen-arm and david-arm July 4, 2025 09:24

llvmbot added vectorizers llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Jul 4, 2025

davemgreen reviewed Jul 5, 2025

View reviewed changes

sdesmalen-arm approved these changes Jul 14, 2025

View reviewed changes

fhahn reviewed Jul 14, 2025

View reviewed changes

fhahn mentioned this pull request Jul 17, 2025

[LV] Use VPReductionRecipe for partial reductions #147513

Open

llvmbot added the backend:AArch64 label Jul 17, 2025

MacDue reviewed Jul 17, 2025

View reviewed changes

fhahn reviewed Jul 17, 2025

View reviewed changes

sdesmalen-arm reviewed Jul 18, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

sdesmalen-arm reviewed Jul 21, 2025

View reviewed changes

SamTebbs33 force-pushed the sub-reductions branch from de3da9e to 64aa25f Compare July 22, 2025 15:29

sdesmalen-arm requested changes Jul 24, 2025

View reviewed changes

llvm/lib/Analysis/IVDescriptors.cpp Show resolved Hide resolved

sdesmalen-arm reviewed Jul 30, 2025

View reviewed changes

sdesmalen-arm reviewed Jul 31, 2025

View reviewed changes

SamTebbs33 force-pushed the sub-reductions branch from 37b93f4 to 32b31a0 Compare July 31, 2025 17:15

SamTebbs33 added 11 commits August 5, 2025 16:05

Fix interleaving case

e855872

format

35b6b9d

Improve codegen

c659696

Address review

2b8e239

format

bdf2604

Improve mixed recurrence kind cases

83f029c

Remove check in isCorrectOpcode

6978e95

Update test comment and set start value to 3

f53accf

Add Sub cases to SLPVectorizer

0033606

Add test for mixed inloop reductions

3ae3e67

Format and remove check from isCorrectOpcode

0f5cb6d

SamTebbs33 force-pushed the sub-reductions branch from 0836223 to 0f5cb6d Compare August 5, 2025 15:28

sdesmalen-arm reviewed Aug 6, 2025

View reviewed changes

SamTebbs33 added 2 commits August 6, 2025 10:29

Make RecurKind::Sub unreachable in SLPVectorizer.cpp

bd5c474

Add RecurKind::AddChainWithSubs

6a6bb86

sdesmalen-arm reviewed Aug 8, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/IVDescriptors.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/IVDescriptors.cpp Outdated Show resolved Hide resolved

SamTebbs33 added 2 commits August 8, 2025 11:35

Simplify isRecurrenceInstr, getReductionOpChain and remove a case in SLP

c32b0e1

Revert partial-reduce-chained.ll changes

340003b

SamTebbs33 mentioned this pull request Aug 8, 2025

[LV] Bundle partial reductions inside VPExpressionRecipe #147302

Open

sdesmalen-arm approved these changes Aug 11, 2025

View reviewed changes

SamTebbs33 merged commit 0bfa171 into llvm:main Aug 12, 2025
8 of 9 checks passed

SamTebbs33 deleted the sub-reductions branch August 12, 2025 16:06

lukel97 mentioned this pull request Aug 21, 2025

[RISCV][LV] Subtract in reduction forcing fixed vector codegen #154554

Closed

lukel97 mentioned this pull request Aug 21, 2025

[RISCV] Mark Sub/AddChainWithSubs as legal reduction types #154753

Merged

lukel97 reviewed Aug 21, 2025

View reviewed changes

		} else if (PhiR->isInLoop() && Kind == RecurKind::Sub &&
		CurrentLinkI->getOpcode() == Instruction::Sub) {

[LV] Create in-loop sub reductions #147026

[LV] Create in-loop sub reductions #147026

Uh oh!

Conversation

SamTebbs33 commented Jul 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 4, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

sdesmalen-arm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

SamTebbs33 Jul 15, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

davemgreen commented Jul 16, 2025

Uh oh!

fhahn commented Jul 16, 2025

Uh oh!

MacDue Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

MacDue Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

MacDue Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

MacDue Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

MacDue Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

SamTebbs33 Jul 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

SamTebbs33 commented Jul 4, 2025 •

edited

Loading

llvmbot commented Jul 4, 2025 •

edited

Loading

SamTebbs33 Jul 15, 2025 •

edited

Loading

MacDue Jul 17, 2025 •

edited

Loading

MacDue Jul 17, 2025 •

edited

Loading

MacDue Jul 17, 2025 •

edited

Loading

MacDue Jul 17, 2025 •

edited

Loading

MacDue Jul 17, 2025 •

edited

Loading

SamTebbs33 Jul 18, 2025 •

edited

Loading

github-actions bot commented Jul 22, 2025 •

edited

Loading

github-actions bot commented Jul 22, 2025 •

edited

Loading