diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 167845ce646b9..858c1d5392071 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -569,7 +569,9 @@ class ScalarEvolution { LLVM_ABI const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); LLVM_ABI const SCEV *getVScale(Type *Ty); - LLVM_ABI const SCEV *getElementCount(Type *Ty, ElementCount EC); + LLVM_ABI const SCEV * + getElementCount(Type *Ty, ElementCount EC, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); LLVM_ABI const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); LLVM_ABI const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f60a1e9f22704..13b9aa28c827b 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -500,10 +500,11 @@ const SCEV *ScalarEvolution::getVScale(Type *Ty) { return S; } -const SCEV *ScalarEvolution::getElementCount(Type *Ty, ElementCount EC) { +const SCEV *ScalarEvolution::getElementCount(Type *Ty, ElementCount EC, + SCEV::NoWrapFlags Flags) { const SCEV *Res = getConstant(Ty, EC.getKnownMinValue()); if (EC.isScalable()) - Res = getMulExpr(Res, getVScale(Ty)); + Res = getMulExpr(Res, getVScale(Ty), Flags); return Res; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 838476dcae661..11bb34e5ba702 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -334,6 +334,10 @@ class VPBuilder { FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL)); } + VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr) { + return tryInsertInstruction(new VPExpandSCEVRecipe(Expr)); + } + //===--------------------------------------------------------------------===// // RAII helpers. //===--------------------------------------------------------------------===// @@ -559,6 +563,11 @@ class LoopVectorizationPlanner { /// Emit remarks for recipes with invalid costs in the available VPlans. void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE); + /// Create a check to \p Plan to see if the vector loop should be executed + /// based on its trip count. + void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, + ElementCount MinProfitableTripCount) const; + protected: /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0a750ff53b951..98554310c74df 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -500,26 +500,22 @@ class InnerLoopVectorizer { InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC, - ElementCount VecWidth, - ElementCount MinProfitableTripCount, - unsigned UnrollFactor, LoopVectorizationCostModel *CM, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &RTChecks, VPlan &Plan) + ElementCount VecWidth, unsigned UnrollFactor, + LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks, + VPlan &Plan) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC), - VF(VecWidth), MinProfitableTripCount(MinProfitableTripCount), - UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Cost(CM), - BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan), + VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), + Cost(CM), BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan), VectorPHVPBB(cast( Plan.getVectorLoopRegion()->getSinglePredecessor())) {} virtual ~InnerLoopVectorizer() = default; - /// Create a new empty loop that will contain vectorized instructions later - /// on, while the old loop will be used as the scalar remainder. Control flow - /// is generated around the vectorized (and scalar epilogue) loops consisting - /// of various checks and bypasses. Return the pre-header block of the new - /// loop. In the case of epilogue vectorization, this function is overriden to - /// handle the more complex control flow around the loops. + /// Creates a basic block for the scalar preheader. Both + /// EpilogueVectorizerMainLoop and EpilogueVectorizerEpilogueLoop overwrite + /// the method to create additional blocks and checks needed for epilogue + /// vectorization. virtual BasicBlock *createVectorizedLoopSkeleton(); /// Fix the vectorized code, taking care of header phi's, and more. @@ -547,16 +543,9 @@ class InnerLoopVectorizer { protected: friend class LoopVectorizationPlanner; - // Create a check to see if the vector loop should be executed - Value *createIterationCountCheck(ElementCount VF, unsigned UF) const; - - /// Emit a bypass check to see if the vector trip count is zero, including if - /// it overflows. - void emitIterationCountCheck(BasicBlock *Bypass); - - /// Emit basic blocks (prefixed with \p Prefix) for the iteration check, - /// vector loop preheader, middle block and scalar preheader. - void createVectorLoopSkeleton(StringRef Prefix); + /// Create a new IR basic block for the scalar preheader whose name is + /// prefixed with \p Prefix. + void createScalarPreheader(StringRef Prefix); /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. @@ -592,8 +581,6 @@ class InnerLoopVectorizer { /// vector elements. ElementCount VF; - ElementCount MinProfitableTripCount; - /// The vectorization unroll factor to use. Each scalar is vectorized to this /// many different vector instructions. unsigned UF; @@ -679,20 +666,8 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer { GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth, ElementCount MinProfitableTripCount, unsigned UnrollFactor) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth, - MinProfitableTripCount, UnrollFactor, CM, BFI, PSI, - Checks, Plan), - EPI(EPI) {} - - // Override this function to handle the more complex control flow around the - // three loops. - BasicBlock *createVectorizedLoopSkeleton() final { - return createEpilogueVectorizedLoopSkeleton(); - } - - /// The interface for creating a vectorized skeleton using one of two - /// different strategies, each corresponding to one execution of the vplan - /// as described above. - virtual BasicBlock *createEpilogueVectorizedLoopSkeleton() = 0; + UnrollFactor, CM, BFI, PSI, Checks, Plan), + EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {} /// Holds and updates state information required to vectorize the main loop /// and its epilogue in two separate passes. This setup helps us avoid @@ -701,6 +676,9 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer { /// iteration count of the loop is so small that the main vector loop is /// completely skipped. EpilogueLoopVectorizationInfo &EPI; + +protected: + ElementCount MinProfitableTripCount; }; /// A specialized derived class of inner loop vectorizer that performs @@ -720,10 +698,13 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { BFI, PSI, Check, Plan, EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF) {} /// Implements the interface for creating a vectorized skeleton using the - /// *main loop* strategy (ie the first pass of vplan execution). - BasicBlock *createEpilogueVectorizedLoopSkeleton() final; + /// *main loop* strategy (i.e., the first pass of VPlan execution). + BasicBlock *createVectorizedLoopSkeleton() final; protected: + // Create a check to see if the main vector loop should be executed + Value *createIterationCountCheck(ElementCount VF, unsigned UF) const; + /// Emits an iteration count bypass check once for the main loop (when \p /// ForEpilogue is false) and once for the epilogue loop (when \p /// ForEpilogue is true). @@ -749,8 +730,8 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer { TripCount = EPI.TripCount; } /// Implements the interface for creating a vectorized skeleton using the - /// *epilogue loop* strategy (ie the second pass of vplan execution). - BasicBlock *createEpilogueVectorizedLoopSkeleton() final; + /// *epilogue loop* strategy (i.e., the second pass of VPlan execution). + BasicBlock *createVectorizedLoopSkeleton() final; protected: /// Emits an iteration count bypass check after the main vector loop has @@ -2300,7 +2281,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { } } -Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF, +Value * +EpilogueVectorizerMainLoop::createIterationCountCheck(ElementCount VF, unsigned UF) const { // Generate code to check if the loop's trip count is less than VF * UF, or // equal to it in case a scalar epilogue is required; this implies that the @@ -2371,25 +2353,6 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF, return CheckMinIters; } -void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { - BasicBlock *const TCCheckBlock = LoopVectorPreHeader; - Value *CheckMinIters = createIterationCountCheck(VF, UF); - // Create new preheader for vector loop. - LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), - static_cast(nullptr), LI, - nullptr, "vector.ph"); - - BranchInst &BI = - *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); - if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) - setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); - ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); - - assert(cast(Plan.getEntry())->getIRBasicBlock() == - TCCheckBlock && - "Plan's entry must be TCCCheckBlock"); -} - /// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p /// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must /// have a single predecessor, which is rewired to the new VPIRBasicBlock. All @@ -2410,7 +2373,7 @@ static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, return IRVPBB; } -void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { +void InnerLoopVectorizer::createScalarPreheader(StringRef Prefix) { LoopVectorPreHeader = OrigLoop->getLoopPreheader(); assert(LoopVectorPreHeader && "Invalid loop structure"); assert((OrigLoop->getUniqueLatchExitBlock() || @@ -2423,7 +2386,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { // NOTE: The Plan's scalar preheader VPBB isn't replaced with a VPIRBasicBlock // wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar // preheader may be unreachable at this point. Instead it is replaced in - // createVectorizedLoopSkeleton. + // executePlan. } /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV @@ -2464,53 +2427,8 @@ static void addFullyUnrolledInstructionsToIgnore( } BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { - /* - In this function we generate a new loop. The new loop will contain - the vectorized instructions while the old loop will continue to run the - scalar remainder. - - [ ] <-- old preheader - loop iteration number check and SCEVs in Plan's - / | preheader are expanded here. Eventually all required SCEV - / | expansion should happen here. - / v - | [ ] <-- vector loop bypass (may consist of multiple blocks). - | / | - | / v - || [ ] <-- vector pre header. - |/ | - | v - | [ ] \ - | [ ]_| <-- vector loop (created during VPlan execution). - | | - | v - \ -[ ] <--- middle-block (wrapped in VPIRBasicBlock with the branch to - | | successors created during VPlan execution) - \/ | - /\ v - | ->[ ] <--- new preheader (wrapped in VPIRBasicBlock). - | | - (opt) v <-- edge from middle to exit iff epilogue is not required. - | [ ] \ - | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header - | | wrapped in VPIRBasicBlock). - \ | - \ v - >[ ] <-- exit block(s). (wrapped in VPIRBasicBlock) - ... - */ - - // Create an empty vector loop, and prepare basic blocks for the runtime - // checks. - createVectorLoopSkeleton(""); - - // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. This check also covers the case where the - // backedge-taken count is uint##_max: adding one to it will overflow leading - // to an incorrect trip count of zero. In this (rare) case we will also jump - // to the scalar loop. - emitIterationCountCheck(LoopScalarPreHeader); - - replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader); + // Create a new IR basic block for the scalar preheader. + createScalarPreheader(""); return LoopVectorPreHeader; } @@ -7480,8 +7398,8 @@ DenseMap LoopVectorizationPlanner::executePlan( /// This function is partially responsible for generating the control flow /// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization. -BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { - createVectorLoopSkeleton(""); +BasicBlock *EpilogueVectorizerMainLoop::createVectorizedLoopSkeleton() { + createScalarPreheader(""); // Generate the code to check the minimum iteration count of the vector // epilogue (see below). @@ -7566,9 +7484,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, /// This function is partially responsible for generating the control flow /// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization. -BasicBlock * -EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { - createVectorLoopSkeleton("vec.epilog."); +BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() { + createScalarPreheader("vec.epilog."); // Now, compare the remaining count and if there aren't enough iterations to // execute the vectorized epilogue skip to the scalar part. @@ -9369,6 +9286,29 @@ void LoopVectorizationPlanner::attachRuntimeChecks( } } +void LoopVectorizationPlanner::addMinimumIterationCheck( + VPlan &Plan, ElementCount VF, unsigned UF, + ElementCount MinProfitableTripCount) const { + // vscale is not necessarily a power-of-2, which means we cannot guarantee + // an overflow to zero when updating induction variables and so an + // additional overflow check is required before entering the vector loop. + bool IsIndvarOverflowCheckNeededForVF = + VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() && + !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) && + CM.getTailFoldingStyle() != + TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; + const uint32_t *BranchWeigths = + hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()) + ? &MinItersBypassWeights[0] + : nullptr; + VPlanTransforms::addMinimumIterationCheck( + Plan, VF, UF, MinProfitableTripCount, + CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(), + IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths, + OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), + *PSE.getSE()); +} + void VPDerivedIVRecipe::execute(VPTransformState &State) { assert(!State.Lane && "VPDerivedIVRecipe being replicated."); @@ -9484,11 +9424,14 @@ static bool processLoopInVPlanNativePath( { GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind); - InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, VF.Width, 1, &CM, + InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM, BFI, PSI, Checks, BestPlan); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false); + LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1, + VF.MinProfitableTripCount); + + LVP.executePlan(VF.Width, /*UF=*/1, BestPlan, LB, DT, false); } reportVectorization(ORE, L, VF, 1); @@ -10279,16 +10222,17 @@ bool LoopVectorizePass::processLoop(Loop *L) { // If we decided that it is not legal to vectorize the loop, then // interleave it. VPlan &BestPlan = LVP.getPlanFor(VF.Width); - InnerLoopVectorizer Unroller( - L, PSE, LI, DT, TTI, AC, ElementCount::getFixed(1), - ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan); + InnerLoopVectorizer Unroller(L, PSE, LI, DT, TTI, AC, + ElementCount::getFixed(1), IC, &CM, BFI, PSI, + Checks, BestPlan); // TODO: Move to general VPlan pipeline once epilogue loops are also // supported. VPlanTransforms::runPass( VPlanTransforms::materializeConstantVectorTripCount, BestPlan, VF.Width, IC, PSE); - + LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC, + VF.MinProfitableTripCount); LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); ORE->emit([&]() { @@ -10349,14 +10293,15 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (!Checks.hasChecks()) DisableRuntimeUnroll = true; } else { - InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, - VF.MinProfitableTripCount, IC, &CM, BFI, PSI, - Checks, BestPlan); + InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI, + PSI, Checks, BestPlan); // TODO: Move to general VPlan pipeline once epilogue loops are also // supported. VPlanTransforms::runPass( VPlanTransforms::materializeConstantVectorTripCount, BestPlan, VF.Width, IC, PSE); + LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC, + VF.MinProfitableTripCount); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 4a8b4b8d04840..c1c525ee23122 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -669,6 +669,90 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, } } +void VPlanTransforms::addMinimumIterationCheck( + VPlan &Plan, ElementCount VF, unsigned UF, + ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, + bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, + const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE) { + // Generate code to check if the loop's trip count is less than VF * UF, or + // equal to it in case a scalar epilogue is required; this implies that the + // vector trip count is zero. This check also covers the case where adding one + // to the backedge-taken count overflowed leading to an incorrect trip count + // of zero. In this case we will also jump to the scalar loop. + CmpInst::Predicate CmpPred = + RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; + // If tail is to be folded, vector loop takes care of all iterations. + VPValue *TripCountVPV = Plan.getTripCount(); + const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE); + Type *TripCountTy = TripCount->getType(); + auto GetMinTripCount = [&]() -> const SCEV * { + // Compute max(MinProfitableTripCount, UF * VF) and return it. + const SCEV *VFxUF = + SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW); + if (UF * VF.getKnownMinValue() >= + MinProfitableTripCount.getKnownMinValue()) { + // TODO: SCEV should be able to simplify test. + return VFxUF; + } + const SCEV *MinProfitableTripCountSCEV = + SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW); + return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF); + }; + + VPBasicBlock *EntryVPBB = Plan.getEntry(); + VPBuilder Builder(EntryVPBB); + VPValue *TripCountCheck = Plan.getFalse(); + const SCEV *Step = GetMinTripCount(); + if (TailFolded) { + if (CheckNeededWithTailFolding) { + // vscale is not necessarily a power-of-2, which means we cannot guarantee + // an overflow to zero when updating induction variables and so an + // additional overflow check is required before entering the vector loop. + + // Get the maximum unsigned value for the type. + VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get( + TripCountTy, cast(TripCountTy)->getMask())); + VPValue *DistanceToMax = Builder.createNaryOp( + Instruction::Sub, {MaxUIntTripCount, TripCountVPV}, + DebugLoc::getUnknown()); + + // Don't execute the vector loop if (UMax - n) < (VF * UF). + // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF, + // minProfitableTripCount). + TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax, + Builder.createExpandSCEV(Step), DL); + } else { + // TripCountCheck = false, folding tail implies positive vector trip + // count. + } + } else { + // TODO: Emit unconditional branch to vector preheader instead of + // conditional branch with known condition. + TripCount = SE.applyLoopGuards(TripCount, OrigLoop); + // Check if the trip count is < the step. + if (SE.isKnownPredicate(CmpPred, TripCount, Step)) { + // TODO: Ensure step is at most the trip count when determining max VF and + // UF, w/o tail folding. + TripCountCheck = Plan.getTrue(); + } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred), + TripCount, Step)) { + // Generate the minimum iteration check only if we cannot prove the + // check is known to be true, or known to be false. + VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step); + TripCountCheck = Builder.createICmp( + CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check"); + } // else step known to be < trip count, use TripCountCheck preset to false. + } + VPInstruction *Term = + Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL); + if (MinItersBypassWeights) { + MDBuilder MDB(Plan.getContext()); + MDNode *BranchWeights = MDB.createBranchWeights( + ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false); + Term->addMetadata(LLVMContext::MD_prof, BranchWeights); + } +} + bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * { auto *MinMaxR = dyn_cast( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 556278e4c3dba..d32d2a9ad11f7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3525,6 +3525,21 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) { Plan.resetTripCount(Exp); ExpSCEV->eraseFromParent(); } + assert(none_of(*Entry, IsaPred) && + "VPExpandSCEVRecipes must be at the beginning of the entry block, " + "after any VPIRInstructions"); + // Add IR instructions in the entry basic block but not in the VPIRBasicBlock + // to the VPIRBasicBlock. + auto EI = Entry->begin(); + for (Instruction &I : drop_end(*EntryBB)) { + if (EI != Entry->end() && isa(*EI) && + &cast(&*EI)->getInstruction() == &I) { + EI++; + continue; + } + VPIRInstruction::create(I)->insertBefore(*Entry, EI); + } + return ExpandedSCEVs; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 6349064716e78..9cf62a35ae36b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -62,9 +62,40 @@ struct VPlanTransforms { /// The created loop is wrapped in an initial skeleton to facilitate /// vectorization, consisting of a vector pre-header, an exit block for the /// main vector loop (middle.block) and a new block as preheader of the scalar - /// loop (scalar.ph). It also adds a canonical IV and its increment, using \p - /// InductionTy and \p IVDL, and creates a VPValue expression for the original - /// trip count. + /// loop (scalar.ph). See below for an illustration. It also adds a canonical + /// IV and its increment, using \p InductionTy and \p IVDL, and creates a + /// VPValue expression for the original trip count. + /// + /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's + /// / \ old preheader. Will contain iteration number check and SCEV + /// | | expansions. + /// | | + /// / v + /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be + /// | / | added later. + /// | / v + /// || [ ] <-- vector pre header. + /// |/ | + /// | v + /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized. + /// | [ ]_| + /// | | + /// | v + /// | [ ] <--- middle-block with the branch to successors + /// | / | + /// | / | + /// | | v + /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be + /// | | replaced later by a VPIRBasicBlock wrapping the scalar + /// | | preheader basic block. + /// | | + /// v <-- edge from middle to exit iff epilogue is not required. + /// | [ ] \ + /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, + /// | | header wrapped in VPIRBasicBlock). + /// \ | + /// \ v + /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks. LLVM_ABI_FOR_TEST static std::unique_ptr buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE); @@ -79,6 +110,13 @@ struct VPlanTransforms { bool RequiresScalarEpilogueCheck, bool TailFolded); + // Create a check to \p Plan to see if the vector loop should be executed. + static void addMinimumIterationCheck( + VPlan &Plan, ElementCount VF, unsigned UF, + ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, + bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, + const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE); + /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's /// flat CFG into a hierarchical CFG. LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index c914aa3582e5c..aa3d81a19a6d2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1246,7 +1246,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP2]]) +; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 8) ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; DEFAULT: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index bfebbdad5af0b..a44cc09b8a8ea 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -9,7 +9,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 7363f86cdab7a..2ade55c10db36 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -9,22 +9,20 @@ define void @f1(ptr %A) #0 { ; CHECK-LABEL: define void @f1 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: store splat (i32 1), ptr [[TMP4]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store splat (i32 1), ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index 8013a8f6e0d82..3a46944712567 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -8,8 +8,8 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[FR:%.*]] = freeze i8 [[START]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP2]], 32 @@ -127,8 +127,8 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[N_POS:%.*]] = icmp sgt i32 [[N]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[N_POS]]) ; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[FR:%.*]] = freeze i32 [[START]] ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1 +; CHECK-NEXT: [[FR:%.*]] = freeze i32 [[START]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index 330c22d736809..c444d5bcc82c7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -53,7 +53,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -94,7 +94,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -205,7 +205,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -246,7 +246,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -357,7 +357,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -398,7 +398,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -513,7 +513,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -554,7 +554,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -671,7 +671,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -714,7 +714,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -835,7 +835,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -878,7 +878,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -997,7 +997,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -1037,7 +1037,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -1141,7 +1141,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -1177,7 +1177,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: @@ -1278,7 +1278,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: @@ -1318,7 +1318,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 ; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 ; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 82ea025700c37..8766d6540ed19 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -9,8 +9,6 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-LABEL: define i32 @sudot( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -55,8 +53,6 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-LABEL: define i32 @sudot( ; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NOI8MM-NEXT: entry: -; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-NOI8MM: vector.ph: ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -124,8 +120,6 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-LABEL: define i32 @usdot( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -170,8 +164,6 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-LABEL: define i32 @usdot( ; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NOI8MM-NEXT: entry: -; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-NOI8MM: vector.ph: ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 938d58bf5e1e3..792249d7829b0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -10,8 +10,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-LABEL: define i32 @dotp( ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -42,8 +40,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-LABEL: define i32 @dotp( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -88,8 +84,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-LABEL: define i32 @dotp( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -143,8 +137,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod( ; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() @@ -177,8 +169,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod( ; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 2 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() @@ -225,8 +215,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-MAXBW-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod( ; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -284,8 +272,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod( ; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() @@ -322,8 +308,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod( ; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 2 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() @@ -374,8 +358,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-MAXBW-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod( ; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -793,8 +775,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_loop_carried( ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -837,8 +817,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_loop_carried( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -887,8 +865,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_loop_carried( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -954,8 +930,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi( ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -992,8 +966,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1036,8 +1008,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1098,7 +1068,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP13]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP13]], 2 ; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: @@ -1164,7 +1134,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP13]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP13]], 3 ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]] ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: @@ -1286,7 +1256,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -1541,8 +1511,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_extend_user( ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -1577,8 +1545,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_extend_user( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() @@ -1627,8 +1593,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-LABEL: define i32 @not_dotp_extend_user( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1687,8 +1651,6 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-LABEL: define i64 @dotp_cost_disagreement( ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -1721,7 +1683,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]] ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: @@ -1769,7 +1731,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -2076,7 +2038,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW: for.ph: ; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -2209,7 +2171,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW: for.ph: ; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -2271,7 +2233,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVE1: for.body.preheader: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 1 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: @@ -2312,7 +2274,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED: for.body.preheader: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: @@ -2367,7 +2329,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-MAXBW: for.body.preheader: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 89b5689845612..64cb33181cc1e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -10,8 +10,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-LABEL: define i32 @dotp( ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVE1-NEXT: entry: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -43,8 +41,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-LABEL: define i32 @dotp( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-INTERLEAVED-NEXT: entry: -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -91,8 +87,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-LABEL: define i32 @dotp( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index c3a7f2c477db8..370bfc641001a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -60,8 +60,6 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-MAXBW-LABEL: define i32 @zext_add_reduc_i8_i32_sve( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -244,8 +242,6 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-MAXBW-LABEL: define i64 @zext_add_reduc_i8_i64( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -340,8 +336,6 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-MAXBW-LABEL: define i64 @zext_add_reduc_i16_i64( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -436,8 +430,6 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-LABEL: define i32 @zext_add_reduc_i8_i32_has_neon_dotprod( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -718,8 +710,6 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-LABEL: define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -814,8 +804,6 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-LABEL: define i32 @sext_add_reduc_i8_i32( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { ; CHECK-MAXBW-NEXT: entry: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -932,7 +920,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32 ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = sub i32 1024, [[D]] ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 4 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: @@ -1053,7 +1041,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = sub i32 1024, [[D]] ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 4 ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]] ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 94172dea8ee6a..db55c5ca107a6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -8,8 +8,6 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_invar_gep(ptr %dst) #0 { ; CHECK-LABEL: @test_invar_gep( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 9583c7f174333..375e412861777 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -34,7 +34,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -77,7 +77,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -202,7 +202,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -266,7 +266,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -468,7 +468,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -536,7 +536,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -725,7 +725,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK-UNORDERED: for.body.preheader: ; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 +; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -782,7 +782,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK-ORDERED: for.body.preheader: ; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 +; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -945,7 +945,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1000,7 +1000,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -1165,7 +1165,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1300,7 +1300,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1380,7 +1380,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: @@ -1610,7 +1610,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-UNORDERED-NEXT: entry: ; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5 ; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: @@ -1690,7 +1690,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-ORDERED-NEXT: entry: ; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5 ; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 8d9eb26a01273..8112d2bdac5a4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -14,7 +14,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -254,9 +254,6 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP12]]) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index 7d0999e8993b0..b40a184a3e425 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -14,7 +14,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 6 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 6 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 510, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 04b0075e5a5b2..6b0da1bb2ed82 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -145,8 +145,6 @@ exit: define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-LABEL: @main_vf_vscale_x_2_no_epi_iteration( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -589,8 +587,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: entry: ; CHECK-VF8-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033 -; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-VF8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -735,8 +731,6 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: entry: ; CHECK-VF8-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024 -; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-VF8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 95153a4a59f1a..51743cf636a14 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -16,7 +16,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index 7dc6a6aa890f9..4ae49354e6443 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -7,7 +7,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-LABEL: @inv_store_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -54,7 +54,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-LABEL: @cond_inv_store_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 3c667ccbdedfc..4c7f70ad4d15e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -15,7 +15,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 4cd7dac62e79f..552d9e23c33c5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -536,8 +536,6 @@ while.end.loopexit: ; preds = %while.body define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @simple_memset_trip1024( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index 8dafa3453dcf5..4444be36c3567 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -9,8 +9,6 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] @@ -62,8 +60,6 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] @@ -128,8 +124,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12 -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -196,8 +191,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31 -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -277,8 +271,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64 -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -360,8 +353,7 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -442,8 +434,6 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index a844673cfcacb..7e8f4adb0c4cd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -83,7 +83,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 6, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 6) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 252, [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index eaabb63b8c62d..84151c2cce931 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -12,7 +12,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -73,7 +73,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -157,7 +157,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -218,7 +218,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -302,7 +302,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -363,7 +363,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -447,7 +447,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -508,7 +508,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]]) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 90c569b4e2dbc..fb62d21a86817 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -62,7 +62,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -178,7 +178,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -294,7 +294,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -410,7 +410,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -526,7 +526,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -642,7 +642,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -758,7 +758,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -874,7 +874,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -990,7 +990,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1106,7 +1106,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1222,7 +1222,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1338,7 +1338,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1454,7 +1454,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1573,7 +1573,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1691,7 +1691,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1809,7 +1809,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1927,7 +1927,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -2098,7 +2098,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index f4416f6b9a3ec..84a043a291dc7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -72,7 +72,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -211,7 +211,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -350,7 +350,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -489,7 +489,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -619,7 +619,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP9]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 8) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -738,7 +738,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP9]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 8) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -861,7 +861,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -990,7 +990,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1115,7 +1115,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 9bbfb22272351..b8add34018b22 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -60,7 +60,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -178,7 +178,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -296,7 +296,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -414,7 +414,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -532,7 +532,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -652,7 +652,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -770,7 +770,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -888,7 +888,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1006,7 +1006,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1124,7 +1124,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]]) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 968e107d04f8f..2d75576bc36ee 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -212,12 +212,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 ; CHECK-NEXT: [[FR:%.*]] = freeze i8 [[START]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK2:%.*]] = icmp ult i32 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK2]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index 372c703f4cb23..aa5fca88da9d4 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -448,7 +448,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG77]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG78]] ; DEBUGLOC: [[VECTOR_BODY]]: ; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG78]] ; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG79:![0-9]+]] @@ -556,7 +556,7 @@ define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG97]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG98]] ; DEBUGLOC: [[VECTOR_BODY]]: ; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG98]] ; DEBUGLOC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]], !dbg [[DBG99:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index cae5c4af13792..0892500da90e3 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -88,7 +88,14 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK: VPlan 'Final VPlan for VF={8},UF={1}' -; CHECK: ir-bb: +; CHECK-NEXT: Live-in ir<%n> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%n>, ir<8> +; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> +; CHECK-NEXT: Successor(s): ir-bb, vector.ph +; CHECK-EMPTY: +; CHECK: vector.ph: ; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%n>, ir<8> ; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%n>, vp<%n.mod.vf> ; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector @@ -100,8 +107,8 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, ir-bb ], [ vp<%vec.ind.next>, vector.body ] +; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, vector.ph ], [ vp<%vec.ind.next>, vector.body ] ; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_PHI]]> ; CHECK-NEXT: WIDEN ir<%x> = load ir<%q> ; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 32aadcfb164e6..3d05ee7f27b5c 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -6,16 +6,21 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: ir<%0> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: Successor(s): ir-bb, ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 +; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 +; CHECK-NEXT: IR %0 = sub i64 %end1, %start2 +; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%0>, ir<2> +; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> +; CHECK-NEXT: Successor(s): ir-bb, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: ; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%0>, ir<2> ; CHECK-NEXT: EMIT vp<[[VTC:%.+]]> = sub ir<%0>, vp<%n.mod.vf> ; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] +; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> ; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index c65d9368d595f..2a7ffec27c2f9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -66,9 +66,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %and = and i64 %N, 15 -; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, vector.ph ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: +; CHECK-NEXT: vector.ph: ; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%and>, ir<16> ; CHECK-NEXT: EMIT vp<[[VTC:%.+]]> = sub ir<%and>, vp<%n.mod.vf> ; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%and> + vp<[[VTC]]> * ir<-1>