-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Remove original loop if dead after vectorization. #155497
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Build on top of llvm#154510 to completely remove dead scalar loops. Depends on llvm#154510. (Included in the PR)
@llvm/pr-subscribers-backend-systemz @llvm/pr-subscribers-backend-powerpc Author: Florian Hahn (fhahn) ChangesBuild on top of #154510 to Depends on #154510. (Included Patch is 2.94 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155497.diff 337 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 98554310c74df..951e3fcc6e60c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2357,9 +2357,9 @@ EpilogueVectorizerMainLoop::createIterationCountCheck(ElementCount VF,
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
-static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
+static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPlan &Plan, VPBasicBlock *VPBB,
BasicBlock *IRBB) {
- VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
+ VPIRBasicBlock *IRVPBB = Plan.createVPIRBasicBlock(IRBB);
auto IP = IRVPBB->begin();
for (auto &R : make_early_inc_range(VPBB->phis()))
R.moveBefore(*IRVPBB, IP);
@@ -2571,6 +2571,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Remove redundant induction instructions.
cse(HeaderBB);
+ if (Plan.getScalarPreheader()->getNumPredecessors() == 0)
+ return;
+
// Set/update profile weights for the vector and remainder loops as original
// loop iterations are now distributed among them. Note that original loop
// becomes the scalar remainder loop after vectorization.
@@ -7226,6 +7229,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan);
VPlanTransforms::removeBranchOnConst(BestVPlan);
+ if (BestVPlan.getEntry()->getSingleSuccessor() ==
+ BestVPlan.getScalarPreheader()) {
+ // TODO: Should not even try to vectorize.
+ return DenseMap<const SCEV *, Value *>();
+ }
+
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -7268,7 +7277,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BasicBlock *EntryBB =
cast<VPIRBasicBlock>(BestVPlan.getEntry())->getIRBasicBlock();
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
- replaceVPBBWithIRVPBB(BestVPlan.getScalarPreheader(),
+ replaceVPBBWithIRVPBB(BestVPlan, BestVPlan.getScalarPreheader(),
State.CFG.PrevBB->getSingleSuccessor());
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -7351,8 +7360,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
} else {
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
- if (MDNode *LID = OrigLoop->getLoopID())
- L->setLoopID(LID);
+ if (BestVPlan.getScalarPreheader()->getNumPredecessors() > 0)
+ if (MDNode *LID = OrigLoop->getLoopID())
+ L->setLoopID(LID);
LoopVectorizeHints Hints(L, true, *ORE);
Hints.setAlreadyVectorized();
@@ -7383,6 +7393,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
addRuntimeUnrollDisableMetaData(L);
}
+ if (BestVPlan.getScalarPreheader()->getNumPredecessors() == 0) {
+ // If the original loop became unreachable, we need to delete it.
+ auto Blocks = OrigLoop->getBlocksVector();
+ Blocks.push_back(cast<VPIRBasicBlock>(BestVPlan.getScalarPreheader())
+ ->getIRBasicBlock());
+ for (auto *BB : Blocks)
+ LI->removeBlock(BB);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ DeleteDeadBlocks(Blocks, &DTU);
+ LI->erase(OrigLoop);
+ }
+
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
ILV.fixVectorizedLoop(State);
@@ -7460,7 +7482,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
// generated here dominates the vector epilog iter check.
EPI.TripCount = Count;
} else {
- VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+ VectorPHVPBB =
+ replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
}
BranchInst &BI =
@@ -7493,7 +7516,7 @@ BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() {
BasicBlock *VecEpilogueIterationCountCheck =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
nullptr, "vec.epilog.iter.check", true);
- VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+ VectorPHVPBB = replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
VecEpilogueIterationCountCheck);
@@ -10213,11 +10236,22 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
}
+ if (ORE->allowExtraAnalysis(LV_NAME))
+ checkMixedPrecision(L, ORE);
+
bool DisableRuntimeUnroll = false;
MDNode *OrigLoopID = L->getLoopID();
+ bool LoopRemoved = false;
{
using namespace ore;
if (!VectorizeLoop) {
+ ORE->emit([&]() {
+ return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
+ L->getHeader())
+ << "interleaved loop (interleaved count: "
+ << NV("InterleaveCount", IC) << ")";
+ });
+
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
@@ -10234,14 +10268,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
VF.MinProfitableTripCount);
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
-
- ORE->emit([&]() {
- return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
- L->getHeader())
- << "interleaved loop (interleaved count: "
- << NV("InterleaveCount", IC) << ")";
- });
+ LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
} else {
+ // Report the vectorization decision.
+ reportVectorization(ORE, L, VF, IC);
+
// If we decided that it is *legal* to vectorize the loop, then do it.
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
@@ -10311,23 +10342,23 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// rarely used is not worth unrolling.
if (!Checks.hasChecks())
DisableRuntimeUnroll = true;
+ LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
}
- // Report the vectorization decision.
- reportVectorization(ORE, L, VF, IC);
}
-
- if (ORE->allowExtraAnalysis(LV_NAME))
- checkMixedPrecision(L, ORE);
}
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"DT not preserved correctly");
+ if (LoopRemoved)
+ return true;
+
std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
if (RemainderLoopID) {
- L->setLoopID(*RemainderLoopID);
+ if (!LoopRemoved)
+ L->setLoopID(*RemainderLoopID);
} else {
if (DisableRuntimeUnroll)
addRuntimeUnrollDisableMetaData(L);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1438dc366b55d..4a7618f40164b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -972,12 +972,14 @@ void VPlan::execute(VPTransformState *State) {
setName("Final VPlan");
LLVM_DEBUG(dump());
- // Disconnect scalar preheader and scalar header, as the dominator tree edge
- // will be updated as part of VPlan execution. This allows keeping the DTU
- // logic generic during VPlan execution.
BasicBlock *ScalarPh = State->CFG.ExitBB;
- State->CFG.DTU.applyUpdates(
- {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+ if (getScalarPreheader()->getNumPredecessors() > 0) {
+ // Disconnect scalar preheader and scalar header, as the dominator tree edge
+ // will be updated as part of VPlan execution. This allows keeping the DTU
+ // logic generic during VPlan execution.
+ State->CFG.DTU.applyUpdates(
+ {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+ }
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
Entry);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d32d2a9ad11f7..8e7fc24080c31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1920,7 +1920,7 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
VPValue *Cond;
- if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
+ if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 9e1d325a4d8d6..2959e9440e753 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -49,6 +49,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
case Instruction::GetElementPtr:
case Instruction::ICmp:
case Instruction::FCmp:
+ case Instruction::Select:
case VPInstruction::Broadcast:
case VPInstruction::PtrAdd:
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
index c18f9f2fae06b..ddfdb257ed49a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
@@ -52,8 +52,8 @@ define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: middle.block:
+; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: for.end:
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP17]])
; CHECK-NEXT: ret i64 [[TMP19]]
;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index e44ddbce34fd5..58965c19ae1cc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -202,8 +202,8 @@ exit:
define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
@@ -366,12 +366,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index f099c22333c3e..23918427e7003 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -6,8 +6,8 @@ target triple = "arm64-apple-macosx11.0.0"
define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-LABEL: define void @fshl_operand_first_order_recurrence(
; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[SRC:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -30,14 +30,12 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
-; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[L]] = load i64, ptr [[GEP_SRC]], align 8
; CHECK-NEXT: [[OR:%.*]] = tail call i64 @llvm.fshl.i64(i64 1, i64 [[RECUR]], i64 1)
@@ -73,7 +71,7 @@ define void @powi_call(ptr %P) {
; CHECK-LABEL: define void @powi_call(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -83,17 +81,6 @@ define void @powi_call(ptr %P) {
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[IV]]
-; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP]], align 8
-; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.powi.f64.i32(double [[L]], i32 3)
-; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -224,5 +211,4 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 626242667e203..481be08e7e5ae 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -5,7 +5,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-LABEL: define void @clamped_tc_8(
; CHECK-SAME: ptr captures(none) [[DST:%.*]], i32 [[N:%.*]], i64 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -32,20 +32,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
-; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]]
-; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8
-; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
@@ -79,7 +66,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 3
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -104,22 +91,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT:...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp llvm/lib/Transforms/Vectorize/VPlanUtils.h llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll llvm/test/Transforms/LoopVectorize/AArch64/interleave_count_for_known_tc.ll llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-constant-ops.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-metadata.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll llvm/test/Transforms/LoopVectorize/RISCV/f16.ll llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll llvm/test/Transforms/LoopVectorize/X86/cost-model.ll llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll llvm/test/Transforms/LoopVectorize/X86/interleaving.ll llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll llvm/test/Transforms/LoopVectorize/X86/optsize.ll llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll llvm/test/Transforms/LoopVectorize/X86/pr34438.ll llvm/test/Transforms/LoopVectorize/X86/pr36524.ll llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll llvm/test/Transforms/LoopVectorize/X86/pr81872.ll llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll llvm/test/Transforms/LoopVectorize/X86/small-size.ll llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll llvm/test/Transforms/LoopVectorize/assume.ll llvm/test/Transforms/LoopVectorize/blend-in-header.ll llvm/test/Transforms/LoopVectorize/bsd_regex.ll llvm/test/Transforms/LoopVectorize/check-prof-info.ll llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll llvm/test/Transforms/LoopVectorize/constantfolder.ll llvm/test/Transforms/LoopVectorize/create-induction-resume.ll llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll llvm/test/Transforms/LoopVectorize/dead_instructions.ll llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll llvm/test/Transforms/LoopVectorize/float-induction.ll llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll llvm/test/Transforms/LoopVectorize/forked-pointers.ll llvm/test/Transforms/LoopVectorize/hints-trans.ll llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll llvm/test/Transforms/LoopVectorize/if-pred-stores.ll llvm/test/Transforms/LoopVectorize/if-reduction.ll llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll llvm/test/Transforms/LoopVectorize/induction-step.ll llvm/test/Transforms/LoopVectorize/induction.ll llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll llvm/test/Transforms/LoopVectorize/is_fpclass.ll llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll llvm/test/Transforms/LoopVectorize/iv_outside_user.ll llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll llvm/test/Transforms/LoopVectorize/loop-form.ll llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll llvm/test/Transforms/LoopVectorize/metadata.ll llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll llvm/test/Transforms/LoopVectorize/no_outside_user.ll llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll llvm/test/Transforms/LoopVectorize/non-const-n.ll llvm/test/Transforms/LoopVectorize/optsize.ll llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll llvm/test/Transforms/LoopVectorize/phi-cost.ll llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll llvm/test/Transforms/LoopVectorize/pointer-induction.ll llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll llvm/test/Transforms/LoopVectorize/pr32859.ll llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll llvm/test/Transforms/LoopVectorize/pr44488-predication.ll llvm/test/Transforms/LoopVectorize/pr45525.ll llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll llvm/test/Transforms/LoopVectorize/pr50686.ll llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll llvm/test/Transforms/LoopVectorize/pr66616.ll llvm/test/Transforms/LoopVectorize/predicate-switch.ll llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll llvm/test/Transforms/LoopVectorize/reduction-inloop.ll llvm/test/Transforms/LoopVectorize/reduction-predselect.ll llvm/test/Transforms/LoopVectorize/reduction.ll llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll llvm/test/Transforms/LoopVectorize/reverse_induction.ll llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll llvm/test/Transforms/LoopVectorize/runtime-check.ll llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll llvm/test/Transforms/LoopVectorize/select-neg-cond.ll llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll llvm/test/Transforms/LoopVectorize/select-reduction.ll llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll llvm/test/Transforms/LoopVectorize/single_early_exit.ll llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll llvm/test/Transforms/LoopVectorize/single_early_exit_with_outer_loop.ll llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll llvm/test/Transforms/LoopVectorize/struct-return.ll llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll llvm/test/Transforms/LoopVectorize/trunc-reductions.ll llvm/test/Transforms/LoopVectorize/trunc-shifts.ll llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll llvm/test/Transforms/LoopVectorize/uniform-blend.ll llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
Build on top of #154510 to
completely remove dead scalar loops.
Depends on #154510. (Included
in the PR)