diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ec5c8c5fa366c..10194e63b1b32 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4081,7 +4081,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPWidenIntrinsicSC: case VPDef::VPWidenSC: case VPDef::VPWidenSelectSC: - case VPDef::VPBlendSC: case VPDef::VPFirstOrderRecurrencePHISC: case VPDef::VPHistogramSC: case VPDef::VPWidenPHISC: @@ -4203,10 +4202,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { if (!VPI) continue; switch (VPI->getOpcode()) { - // Selects are only modelled in the legacy cost model for safe - // divisors. case Instruction::Select: { VPValue *VPV = VPI->getVPSingleValue(); + // Blend selects are modelled in VPlan. + if (isa_and_nonnull(VPV->getUnderlyingValue())) + continue; + // Selects are only modelled in the legacy cost model for safe + // divisors. if (VPV->getNumUsers() == 1) { if (auto *WR = dyn_cast(*VPV->user_begin())) { switch (WR->getOpcode()) { @@ -8656,9 +8658,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // latter are added above for masking. // FIXME: Migrate code relying on the underlying instruction from VPlan0 // to construct recipes below to not use the underlying instruction. - if (isa( - &R) || - (isa(&R) && !UnderlyingValue)) + if (isa(&R) || + (isa(&R) && !UnderlyingValue) || + (match(&R, m_VPInstruction( + m_VPValue(), m_VPValue(), m_VPValue())) && + isa_and_nonnull(UnderlyingValue))) continue; // FIXME: VPlan0, which models a copy of the original scalar loop, should @@ -8944,20 +8948,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // the phi until LoopExitValue. We keep track of the previous item // (PreviousLink) to tell which of the two operands of a Link will remain // scalar and which will be reduced. For minmax by select(cmp), Link will be - // the select instructions. Blend recipes of in-loop reduction phi's will + // the select instructions. Blend selects of in-loop reduction phi's will // get folded to their non-phi operand, as the reduction recipe handles the // condition directly. VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0]. for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) { - if (auto *Blend = dyn_cast(CurrentLink)) { - assert(Blend->getNumIncomingValues() == 2 && - "Blend must have 2 incoming values"); - if (Blend->getIncomingValue(0) == PhiR) { - Blend->replaceAllUsesWith(Blend->getIncomingValue(1)); + using namespace VPlanPatternMatch; + VPValue *T, *F; + if (match(CurrentLink, m_VPInstruction( + m_VPValue(), m_VPValue(T), m_VPValue(F)))) { + if (T == PhiR) { + CurrentLink->replaceAllUsesWith(F); } else { - assert(Blend->getIncomingValue(1) == PhiR && - "PhiR must be an operand of the blend"); - Blend->replaceAllUsesWith(Blend->getIncomingValue(0)); + assert(F == PhiR && "PhiR must be an operand of the select"); + CurrentLink->replaceAllUsesWith(T); } continue; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 055db2b9adb95..60decd429ff6b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -545,7 +545,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenIntrinsicSC: case VPRecipeBase::VPWidenSC: case VPRecipeBase::VPWidenSelectSC: - case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: case VPRecipeBase::VPCanonicalIVPHISC: case VPRecipeBase::VPActiveLaneMaskPHISC: @@ -2369,72 +2368,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, } }; -/// A recipe for vectorizing a phi-node as a sequence of mask-based select -/// instructions. -class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe { -public: - /// The blend operation is a User of the incoming values and of their - /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can - /// be omitted (implied by passing an odd number of operands) in which case - /// all other incoming values are merged into it. - VPBlendRecipe(PHINode *Phi, ArrayRef Operands, DebugLoc DL) - : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) { - assert(Operands.size() > 0 && "Expected at least one operand!"); - } - - VPBlendRecipe *clone() override { - return new VPBlendRecipe(cast_or_null(getUnderlyingValue()), - operands(), getDebugLoc()); - } - - VP_CLASSOF_IMPL(VPDef::VPBlendSC) - - /// A normalized blend is one that has an odd number of operands, whereby the - /// first operand does not have an associated mask. - bool isNormalized() const { return getNumOperands() % 2; } - - /// Return the number of incoming values, taking into account when normalized - /// the first incoming value will have no mask. - unsigned getNumIncomingValues() const { - return (getNumOperands() + isNormalized()) / 2; - } - - /// Return incoming value number \p Idx. - VPValue *getIncomingValue(unsigned Idx) const { - return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized()); - } - - /// Return mask number \p Idx. - VPValue *getMask(unsigned Idx) const { - assert((Idx > 0 || !isNormalized()) && "First index has no mask!"); - return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized()); - } - - void execute(VPTransformState &State) override { - llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends"); - } - - /// Return the cost of this VPWidenMemoryRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - // Recursing through Blend recipes only, must terminate at header phi's the - // latest. - return all_of(users(), - [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); - } -}; - /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. The first operand of a /// VPInterleave recipe is the address, followed by the stored values, followed diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 747c6623aa22a..b4de38120ed19 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -40,17 +40,6 @@ VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) { CanonicalIVTy = cast(TC)->getSCEV()->getType(); } -Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { - Type *ResTy = inferScalarType(R->getIncomingValue(0)); - for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) { - VPValue *Inc = R->getIncomingValue(I); - assert(inferScalarType(Inc) == ResTy && - "different types inferred for different incoming values"); - CachedTypes[Inc] = ResTy; - } - return ResTy; -} - Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { // Set the result type from the first operand, check if the types for all // other operands match and cache them. @@ -293,7 +282,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { .Case( [](const auto *R) { return R->getResultType(); }) - .Case( [this](const auto *R) { return inferScalarTypeForRecipe(R); }) .Case([V](const VPInterleaveRecipe *R) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h index c6c43698b7483..32a3da565a866 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h @@ -18,7 +18,6 @@ namespace llvm { class LLVMContext; class VPValue; -class VPBlendRecipe; class VPInstruction; class VPWidenRecipe; class VPWidenCallRecipe; @@ -48,7 +47,6 @@ class VPTypeAnalysis { Type *CanonicalIVTy; LLVMContext &Ctx; - Type *inferScalarTypeForRecipe(const VPBlendRecipe *R); Type *inferScalarTypeForRecipe(const VPInstruction *R); Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R); Type *inferScalarTypeForRecipe(const VPWidenRecipe *R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 2e9a36adbbf3c..a6a8e0166914c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -84,8 +84,8 @@ class VPPredicator { /// block of the loop is set to True, or to the loop mask when tail folding. VPValue *createBlockInMask(VPBasicBlock *VPBB); - /// Convert phi recipes in \p VPBB to VPBlendRecipes. - void convertPhisToBlends(VPBasicBlock *VPBB); + /// Convert phi recipes in \p VPBB to selects. + void convertPhisToSelects(VPBasicBlock *VPBB); const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; } }; @@ -247,7 +247,7 @@ VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const { return CommonEdgeMask; } -void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { +void VPPredicator::convertPhisToSelects(VPBasicBlock *VPBB) { SmallVector Phis; for (VPRecipeBase &R : VPBB->phis()) Phis.push_back(cast(&R)); @@ -259,8 +259,10 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { // optimizations will clean it up. VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR); + VPValue *Select = PhiR->getIncomingValue(0); SmallVector OperandsWithMask; - for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) { + for (const auto &[InVPV, InVPBB] : + drop_begin(PhiR->incoming_values_and_blocks())) { OperandsWithMask.push_back(InVPV); VPValue *EdgeMask = getEdgeMask(InVPBB, VPBB); if (!EdgeMask) { @@ -277,13 +279,11 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { EdgeMask = X; } - OperandsWithMask.push_back(EdgeMask); + Select = + Builder.createSelect(EdgeMask, InVPV, Select, PhiR->getDebugLoc()); + Select->setUnderlyingValue(PhiR->getUnderlyingValue()); } - PHINode *IRPhi = cast_or_null(PhiR->getUnderlyingValue()); - auto *Blend = - new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc()); - Builder.insert(Blend); - PhiR->replaceAllUsesWith(Blend); + PhiR->replaceAllUsesWith(Select); PhiR->eraseFromParent(); } } @@ -309,7 +309,7 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) { } Predicator.createBlockInMask(VPBB); - Predicator.convertPhisToBlends(VPBB); + Predicator.convertPhisToSelects(VPBB); } // Linearize the blocks of the loop into one serial chain. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bd9a93ed57b8a..da2942bc81cbf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -73,7 +73,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPScalarIVStepsSC: case VPPredInstPHISC: return false; - case VPBlendSC: case VPReductionEVLSC: case VPReductionSC: case VPVectorPointerSC: @@ -123,7 +122,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPWidenStoreEVLSC: case VPWidenStoreSC: return false; - case VPBlendSC: case VPReductionEVLSC: case VPReductionSC: case VPVectorPointerSC: @@ -163,7 +161,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { } case VPWidenIntrinsicSC: return cast(this)->mayHaveSideEffects(); - case VPBlendSC: case VPReductionEVLSC: case VPReductionSC: case VPScalarIVStepsSC: @@ -2581,44 +2578,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -InstructionCost VPBlendRecipe::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // Handle cases where only the first lane is used the same way as the legacy - // cost model. - if (vputils::onlyFirstLaneUsed(this)) - return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); - - Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); - Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); - return (getNumIncomingValues() - 1) * - Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy, - CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "BLEND "; - printAsOperand(O, SlotTracker); - O << " ="; - if (getNumIncomingValues() == 1) { - // Not a User of any mask: not really blending, this is a - // single-predecessor phi. - O << " "; - getIncomingValue(0)->printAsOperand(O, SlotTracker); - } else { - for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { - O << " "; - getIncomingValue(I)->printAsOperand(O, SlotTracker); - if (I == 0) - continue; - O << "/"; - getMask(I)->printAsOperand(O, SlotTracker); - } - } -} -#endif - void VPReductionRecipe::execute(VPTransformState &State) { assert(!State.Lane && "Reduction being replicated."); Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ded9e46b407b1..6f4dc27995421 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1091,6 +1091,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + if (match(Def, m_Select(m_True(), m_VPValue(X), m_VPValue()))) + return Def->replaceAllUsesWith(X); + + if (match(Def, m_Select(m_False(), m_VPValue(), m_VPValue(X)))) + return Def->replaceAllUsesWith(X); + if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return Def->replaceAllUsesWith(X); @@ -1292,85 +1298,6 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { } } -/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes -/// to make sure the masks are simplified. -static void simplifyBlends(VPlan &Plan) { - for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( - vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { - for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - auto *Blend = dyn_cast(&R); - if (!Blend) - continue; - - // Try to remove redundant blend recipes. - SmallPtrSet UniqueValues; - if (Blend->isNormalized() || !match(Blend->getMask(0), m_False())) - UniqueValues.insert(Blend->getIncomingValue(0)); - for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) - if (!match(Blend->getMask(I), m_False())) - UniqueValues.insert(Blend->getIncomingValue(I)); - - if (UniqueValues.size() == 1) { - Blend->replaceAllUsesWith(*UniqueValues.begin()); - Blend->eraseFromParent(); - continue; - } - - if (Blend->isNormalized()) - continue; - - // Normalize the blend so its first incoming value is used as the initial - // value with the others blended into it. - - unsigned StartIndex = 0; - for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) { - // If a value's mask is used only by the blend then is can be deadcoded. - // TODO: Find the most expensive mask that can be deadcoded, or a mask - // that's used by multiple blends where it can be removed from them all. - VPValue *Mask = Blend->getMask(I); - if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) { - StartIndex = I; - break; - } - } - - SmallVector OperandsWithMask; - OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex)); - - for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) { - if (I == StartIndex) - continue; - OperandsWithMask.push_back(Blend->getIncomingValue(I)); - OperandsWithMask.push_back(Blend->getMask(I)); - } - - auto *NewBlend = - new VPBlendRecipe(cast_or_null(Blend->getUnderlyingValue()), - OperandsWithMask, Blend->getDebugLoc()); - NewBlend->insertBefore(&R); - - VPValue *DeadMask = Blend->getMask(StartIndex); - Blend->replaceAllUsesWith(NewBlend); - Blend->eraseFromParent(); - recursivelyDeleteDeadRecipes(DeadMask); - - /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask. - VPValue *NewMask; - if (NewBlend->getNumOperands() == 3 && - match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) { - VPValue *Inc0 = NewBlend->getOperand(0); - VPValue *Inc1 = NewBlend->getOperand(1); - VPValue *OldMask = NewBlend->getOperand(2); - NewBlend->setOperand(0, Inc1); - NewBlend->setOperand(1, Inc0); - NewBlend->setOperand(2, NewMask); - if (OldMask->getNumUsers() == 0) - cast(OldMask)->eraseFromParent(); - } - } - } -} - /// Optimize the width of vector induction variables in \p Plan based on a known /// constant Trip Count, \p BestVF and \p BestUF. static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, @@ -1960,7 +1887,6 @@ void VPlanTransforms::optimize(VPlan &Plan) { runPass(removeRedundantInductionCasts, Plan); runPass(simplifyRecipes, Plan); - runPass(simplifyBlends, Plan); runPass(removeDeadRecipes, Plan); runPass(narrowToSingleScalarRecipes, Plan); runPass(legalizeAndOptimizeInductions, Plan); @@ -2948,18 +2874,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { continue; } - // Expand VPBlendRecipe into VPInstruction::Select. - VPBuilder Builder(&R); - if (auto *Blend = dyn_cast(&R)) { - VPValue *Select = Blend->getIncomingValue(0); - for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) - Select = Builder.createSelect(Blend->getMask(I), - Blend->getIncomingValue(I), Select, - R.getDebugLoc(), "predphi"); - Blend->replaceAllUsesWith(Select); - ToRemove.push_back(Blend); - } - if (auto *Expr = dyn_cast(&R)) { Expr->decompose(); ToRemove.push_back(Expr); @@ -2972,6 +2886,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { continue; // Expand WideIVStep. + VPBuilder Builder(&R); auto *VPI = cast(&R); Type *IVTy = TypeInfo.inferScalarType(VPI); if (TypeInfo.inferScalarType(VectorStep) != IVTy) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 2959e9440e753..88539637b96d9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -72,8 +72,7 @@ inline bool isSingleScalar(const VPValue *VPV) { return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) && all_of(Rep->operands(), isSingleScalar)); } - if (isa(VPV)) + if (isa(VPV)) return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); if (auto *WidenR = dyn_cast(VPV)) { return PreservesUniformity(WidenR->getOpcode()) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 24f6d61512ef6..4389933afbf10 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -354,7 +354,6 @@ class VPDef { VPWidenStoreSC, VPWidenSC, VPWidenSelectSC, - VPBlendSC, VPHistogramSC, // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index e25ffe135418e..ab12e0934a0e8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -119,7 +119,7 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) { } while (RecipeI != End) { - if (RecipeI->isPhi() && !isa(&*RecipeI)) { + if (RecipeI->isPhi()) { errs() << "Found phi-like recipe after non-phi recipe"; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.h b/llvm/lib/Transforms/Vectorize/VPlanVerifier.h index ccf79e8e5c985..a2ac562b500a2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.h +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.h @@ -35,8 +35,7 @@ class VPlan; /// 1. Region/Block verification: Check the Region/Block verification /// invariants for every region in the H-CFG. /// 2. all phi-like recipes must be at the beginning of a block, with no other -/// recipes in between. Note that currently there is still an exception for -/// VPBlendRecipes. +/// recipes in between. LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate = false); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index c21b059b48486..da7d1a59b537d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -127,25 +127,25 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE: pred.store.continue: ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; TFA_INTERLEAVE: pred.store.if4: +; TFA_INTERLEAVE: pred.store.if3: ; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 ; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]] -; TFA_INTERLEAVE: pred.store.continue5: +; TFA_INTERLEAVE: pred.store.continue4: ; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; TFA_INTERLEAVE: pred.store.if6: +; TFA_INTERLEAVE: pred.store.if5: ; TFA_INTERLEAVE-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP32]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE7]] -; TFA_INTERLEAVE: pred.store.continue7: +; TFA_INTERLEAVE: pred.store.continue6: ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; TFA_INTERLEAVE: pred.store.if8: +; TFA_INTERLEAVE: pred.store.if7: ; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] -; TFA_INTERLEAVE: pred.store.continue9: +; TFA_INTERLEAVE: pred.store.continue8: ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 28435d4f34ac1..5ad5cf14aea10 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -8,56 +8,12 @@ target triple = "x86_64-unknown-linux-gnu" define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-LABEL: define void @smax_call_uniform( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0 -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]] -; CHECK: [[PRED_UREM_IF]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]] -; CHECK: [[PRED_UREM_CONTINUE]]: -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]] -; CHECK: [[PRED_UREM_IF1]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]] -; CHECK: [[PRED_UREM_CONTINUE2]]: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]] -; CHECK: [[PRED_UREM_IF3]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]] -; CHECK: [[PRED_UREM_CONTINUE4]]: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]] -; CHECK: [[PRED_UREM_IF5]]: -; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] -; CHECK: [[PRED_UREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) -; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) -; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP12]] -; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8 -; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]] @@ -70,7 +26,7 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -102,9 +58,3 @@ exit: } declare i64 @llvm.smax.i64(i64, i64) -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 481095225af6a..ae93f02e958c6 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -587,7 +587,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; CHECK: pred.load.continue14: ; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]] +; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP25]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> splat (i64 -1) ; CHECK-NEXT: [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index dd1e9ac7317eb..acec3486e5c34 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -1256,10 +1256,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = and <4 x i1> [[TMP12]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = and <4 x i1> [[TMP21]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP15]], <4 x float> [[TMP17]], <4 x float> [[PREDPHI]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI6]] = select <4 x i1> [[TMP5]], <4 x float> [[PREDPHI5]], <4 x float> [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP22]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP14]] +; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP15]], <4 x float> [[TMP17]], <4 x float> [[PREDPHI]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP16]], <4 x float> [[TMP18]], <4 x float> [[PREDPHI7]] +; CHECK-INTERLEAVED-NEXT: [[PREDPHI6]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP25]], <4 x float> [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI9]] = select <4 x i1> [[TMP6]], <4 x float> [[PREDPHI8]], <4 x float> [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index db64c755d005f..c71bdbc0ba782 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1063,25 +1063,6 @@ TEST_F(VPRecipeTest, CastVPWidenGEPRecipeToVPUserAndVPDef) { delete GEP; } -TEST_F(VPRecipeTest, CastVPBlendRecipeToVPUser) { - VPlan &Plan = getPlan(); - IntegerType *Int32 = IntegerType::get(C, 32); - auto *Phi = PHINode::Create(Int32, 1); - - VPValue *I1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); - VPValue *I2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); - VPValue *M2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); - SmallVector Args; - Args.push_back(I1); - Args.push_back(I2); - Args.push_back(M2); - VPBlendRecipe Recipe(Phi, Args, {}); - EXPECT_TRUE(isa(&Recipe)); - VPRecipeBase *BaseR = &Recipe; - EXPECT_TRUE(isa(BaseR)); - delete Phi; -} - TEST_F(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index c2f045bf524e9..3dfccf0e4caaf 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -93,56 +93,6 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { #endif } -TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { - VPlan &Plan = getPlan(); - IntegerType *Int32 = IntegerType::get(C, 32); - auto *Phi = PHINode::Create(Int32, 1); - VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 0)); - - VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); - auto *Blend = new VPBlendRecipe(Phi, {DefI}, {}); - - VPBasicBlock *VPBB1 = Plan.getEntry(); - VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); - VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); - VPBasicBlock *VPBB4 = Plan.createVPBasicBlock(""); - - VPBB2->appendRecipe(CanIV); - VPBB3->appendRecipe(Blend); - VPBB4->appendRecipe(DefI); - VPBB4->appendRecipe(BranchOnCond); - - VPBlockUtils::connectBlocks(VPBB2, VPBB3); - VPBlockUtils::connectBlocks(VPBB3, VPBB4); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB4, "R1"); - VPBlockUtils::connectBlocks(VPBB1, R1); - VPBB3->setParent(R1); - - VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); - -#if GTEST_HAS_STREAM_REDIRECTION - ::testing::internal::CaptureStderr(); -#endif - EXPECT_FALSE(verifyVPlanIsValid(Plan)); -#if GTEST_HAS_STREAM_REDIRECTION -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - EXPECT_STREQ("Use before def!\n" - " BLEND ir<> = vp<%2>\n" - " before\n" - " EMIT vp<%2> = add ir<0>\n", - ::testing::internal::GetCapturedStderr().c_str()); -#else - EXPECT_STREQ("Use before def!\n", - ::testing::internal::GetCapturedStderr().c_str()); -#endif -#endif - - delete Phi; -} - TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); @@ -286,44 +236,6 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { #endif } -TEST_F(VPVerifierTest, NonHeaderPHIInHeader) { - VPlan &Plan = getPlan(); - VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - auto *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); - - VPBasicBlock *VPBB1 = Plan.getEntry(); - VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("header"); - - VPBB2->appendRecipe(CanIV); - - PHINode *PHINode = PHINode::Create(Type::getInt32Ty(C), 2); - auto *IRPhi = new VPIRPhi(*PHINode); - VPBB2->appendRecipe(IRPhi); - VPBB2->appendRecipe(BranchOnCond); - - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); - VPBlockUtils::connectBlocks(VPBB1, R1); - VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); - -#if GTEST_HAS_STREAM_REDIRECTION - ::testing::internal::CaptureStderr(); -#endif - EXPECT_FALSE(verifyVPlanIsValid(Plan)); -#if GTEST_HAS_STREAM_REDIRECTION -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - EXPECT_STREQ( - "Found non-header PHI recipe in header VPBB: IR = phi i32 \n", - ::testing::internal::GetCapturedStderr().c_str()); -#else - EXPECT_STREQ("Found non-header PHI recipe in header VPBB", - ::testing::internal::GetCapturedStderr().c_str()); -#endif -#endif - - delete PHINode; -} - class VPIRVerifierTest : public VPlanTestIRBase {}; TEST_F(VPIRVerifierTest, testVerifyIRPhi) {