diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1438dc366b55d..1ada5e413bd9e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -333,6 +333,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { LastLane = 0; } + assert(IsSingleScalar && "must be a single-scalar at this point"); + // We need to construct the vector value for a single-scalar value by + // broadcasting the scalar to all lanes. auto *LastInst = cast(get(Def, LastLane)); // Set the insert point after the last scalarized instruction or after the // last PHI, if LastInst is a PHI. This ensures the insertelement sequence @@ -343,27 +346,8 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { : std::next(BasicBlock::iterator(LastInst)); Builder.SetInsertPoint(&*NewIP); - // However, if we are vectorizing, we need to construct the vector values. - // If the value is known to be uniform after vectorization, we can just - // broadcast the scalar value corresponding to lane zero. Otherwise, we - // construct the vector values using insertelement instructions. Since the - // resulting vectors are stored in State, we will only generate the - // insertelements once. - Value *VectorValue = nullptr; - if (IsSingleScalar) { - VectorValue = GetBroadcastInstrs(ScalarValue); - set(Def, VectorValue); - } else { - assert(!VF.isScalable() && "VF is assumed to be non scalable."); - assert(isa(Def) && - "Explicit BuildVector recipes must have" - "handled packing for non-VPInstructions."); - // Initialize packing with insertelements to start from poison. - VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); - for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane) - VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane); - set(Def, VectorValue); - } + Value *VectorValue = GetBroadcastInstrs(ScalarValue); + set(Def, VectorValue); Builder.restoreIP(OldIP); return VectorValue; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 055db2b9adb95..fa41da15d2f43 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -901,6 +901,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { return R && classof(R); } + virtual VPRecipeWithIRFlags *clone() override = 0; + void execute(VPTransformState &State) override = 0; /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx. @@ -1045,13 +1047,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, VScale, }; -private: - typedef unsigned char OpcodeTy; - OpcodeTy Opcode; - - /// An optional name that can be used for the generated IR instruction. - const std::string Name; - /// Returns true if this VPInstruction generates scalar values for all lanes. /// Most VPInstructions generate a single value per part, either vector or /// scalar. VPReplicateRecipe takes care of generating multiple (scalar) @@ -1060,6 +1055,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// underlying ingredient. bool doesGeneratePerAllLanes() const; +private: + typedef unsigned char OpcodeTy; + OpcodeTy Opcode; + + /// An optional name that can be used for the generated IR instruction. + const std::string Name; + /// Returns true if we can generate a scalar for the first lane only if /// needed. bool canGenerateScalarForFirstLane() const; @@ -1069,11 +1071,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// existing value is returned rather than a generated one. Value *generate(VPTransformState &State); - /// Utility methods serving execute(): generates a scalar single instance of - /// the modeled instruction for a given lane. \returns the scalar generated - /// value for lane \p Lane. - Value *generatePerLane(VPTransformState &State, const VPLane &Lane); - #if !defined(NDEBUG) /// Return the number of operands determined by the opcode of the /// VPInstruction. Returns -1u if the number of operands cannot be determined diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bd9a93ed57b8a..b8f120ab89555 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -525,16 +525,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { } } -Value *VPInstruction::generatePerLane(VPTransformState &State, - const VPLane &Lane) { - IRBuilderBase &Builder = State.Builder; - - assert(getOpcode() == VPInstruction::PtrAdd && - "only PtrAdd opcodes are supported for now"); - return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), - State.get(getOperand(1), Lane), Name); -} - /// Create a conditional branch using \p Cond branching to the successors of \p /// VPBB. Note that the first successor is always forward (i.e. not created yet) /// while the second successor may already have been created (if it is a header @@ -1154,24 +1144,13 @@ void VPInstruction::execute(VPTransformState &State) { "Set flags not supported for the provided opcode"); if (hasFastMathFlags()) State.Builder.setFastMathFlags(getFastMathFlags()); - bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && - (vputils::onlyFirstLaneUsed(this) || - isVectorToScalar() || isSingleScalar()); - bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); - if (GeneratesPerAllLanes) { - for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); - Lane != NumLanes; ++Lane) { - Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); - assert(GeneratedValue && "generatePerLane must produce a value"); - State.set(this, GeneratedValue, VPLane(Lane)); - } - return; - } - Value *GeneratedValue = generate(State); if (!hasResult()) return; assert(GeneratedValue && "generate must produce a value"); + bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && + (vputils::onlyFirstLaneUsed(this) || + isVectorToScalar() || isSingleScalar()); assert((((GeneratedValue->getType()->isVectorTy() || GeneratedValue->getType()->isStructTy()) == !GeneratesPerFirstLaneOnly) || @@ -1244,6 +1223,9 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::Broadcast: case VPInstruction::ReductionStartVector: return true; + case VPInstruction::BuildStructVector: + case VPInstruction::BuildVector: + return getNumOperands() > 1; case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); case VPInstruction::WidePtrAdd: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 94ba30e235e49..be8dcae916822 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3377,34 +3377,40 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { vp_depth_first_shallow(Plan.getEntry())); auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry())); - // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes, - // excluding ones in replicate regions. Those are not materialized explicitly - // yet. Those vector users are still handled in VPReplicateRegion::execute(), - // via shouldPack(). + // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and + // VPInstructions, excluding ones in replicate regions. Those are not + // materialized explicitly yet. Those vector users are still handled in + // VPReplicateRegion::execute(), via shouldPack(). // TODO: materialize build vectors for replicating recipes in replicating // regions. // TODO: materialize build vectors for VPInstructions. for (VPBasicBlock *VPBB : concat(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - auto *RepR = dyn_cast(&R); - auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) { + auto *DefR = dyn_cast(&R); + if (!DefR || !isa(DefR)) + continue; + auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) { VPRegionBlock *ParentRegion = cast(U)->getParent()->getParent(); - return !U->usesScalars(RepR) || ParentRegion != LoopRegion; + return !U->usesScalars(DefR) || ParentRegion != LoopRegion; }; - if (!RepR || RepR->isSingleScalar() || - none_of(RepR->users(), UsesVectorOrInsideReplicateRegion)) + if ((isa(DefR) && + cast(DefR)->isSingleScalar()) || + (isa(DefR) && + !cast(DefR)->doesGeneratePerAllLanes()) || + vputils::onlyFirstLaneUsed(DefR) || + none_of(DefR->users(), UsesVectorOrInsideReplicateRegion)) continue; - Type *ScalarTy = TypeInfo.inferScalarType(RepR); + Type *ScalarTy = TypeInfo.inferScalarType(DefR); unsigned Opcode = ScalarTy->isStructTy() ? VPInstruction::BuildStructVector : VPInstruction::BuildVector; - auto *BuildVector = new VPInstruction(Opcode, {RepR}); - BuildVector->insertAfter(RepR); + auto *BuildVector = new VPInstruction(Opcode, {DefR}); + BuildVector->insertAfter(DefR); - RepR->replaceUsesWithIf( + DefR->replaceUsesWithIf( BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion]( VPUser &U, unsigned) { return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 9cf62a35ae36b..bd4b94fa034c4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -158,8 +158,8 @@ struct VPlanTransforms { /// Explicitly unroll \p Plan by \p UF. static void unrollByUF(VPlan &Plan, unsigned UF); - /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan - /// with \p VF single-scalar recipes. + /// Replace each VPReplicateRecipe and replicating VPInstruction outside on + /// any replicate region in \p Plan with \p VF single-scalar recipes. /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby /// dissolving the latter. static void replicateByVF(VPlan &Plan, ElementCount VF); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 7a63d20825a31..85a9fd522f3cf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -463,15 +463,15 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) { VPlanTransforms::removeDeadRecipes(Plan); } -/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p -/// Def2LaneDefs to look up scalar definitions for operands of \RepR. -static VPReplicateRecipe * +/// Create a single-scalar clone of \p DefR for lane \p Lane. Use \p +/// Def2LaneDefs to look up scalar definitions for operands of \DefR. +static VPRecipeWithIRFlags * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, - VPReplicateRecipe *RepR, VPLane Lane, + VPRecipeWithIRFlags *DefR, VPLane Lane, const DenseMap> &Def2LaneDefs) { // Collect the operands at Lane, creating extracts as needed. SmallVector NewOps; - for (VPValue *Op : RepR->operands()) { + for (VPValue *Op : DefR->operands()) { // If Op is a definition that has been unrolled, directly use the clone for // the corresponding lane. auto LaneDefs = Def2LaneDefs.find(Op); @@ -501,11 +501,19 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, NewOps.push_back(Ext); } - auto *New = - new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, - /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); - New->transferFlags(*RepR); - New->insertBefore(RepR); + VPRecipeWithIRFlags *New; + if (auto *RepR = dyn_cast(DefR)) { + New = + new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, + /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); + } else { + New = DefR->clone(); + for (const auto &[Idx, Op] : enumerate(NewOps)) { + New->setOperand(Idx, Op); + } + } + New->transferFlags(*DefR); + New->insertBefore(DefR); return New; } @@ -530,41 +538,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBBsToUnroll) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - auto *RepR = dyn_cast(&R); - if (!RepR || RepR->isSingleScalar()) + auto *DefR = dyn_cast(&R); + if (!DefR || !isa(DefR)) + continue; + if ((isa(DefR) && + cast(DefR)->isSingleScalar()) || + (isa(DefR) && + !cast(DefR)->doesGeneratePerAllLanes())) continue; - VPBuilder Builder(RepR); - if (RepR->getNumUsers() == 0) { - if (isa(RepR->getUnderlyingInstr()) && - vputils::isSingleScalar(RepR->getOperand(1))) { + VPBuilder Builder(DefR); + if (DefR->getNumUsers() == 0) { + if (isa(DefR->getUnderlyingInstr()) && + vputils::isSingleScalar(DefR->getOperand(1))) { // Stores to invariant addresses need to store the last lane only. - cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF), + cloneForLane(Plan, Builder, IdxTy, DefR, VPLane::getLastLaneForVF(VF), Def2LaneDefs); } else { - // Create single-scalar version of RepR for all lanes. + // Create single-scalar version of DefR for all lanes. for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) - cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs); + cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs); } - RepR->eraseFromParent(); + DefR->eraseFromParent(); continue; } - /// Create single-scalar version of RepR for all lanes. + /// Create single-scalar version of DefR for all lanes. SmallVector LaneDefs; for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) LaneDefs.push_back( - cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs)); + cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs)); - Def2LaneDefs[RepR] = LaneDefs; + Def2LaneDefs[DefR] = LaneDefs; /// Users that only demand the first lane can use the definition for lane /// 0. - RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) { - return U.onlyFirstLaneUsed(RepR); + DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) { + return U.onlyFirstLaneUsed(DefR); }); - // Update each build vector user that currently has RepR as its only + // Update each build vector user that currently has DefR as its only // operand, to have all LaneDefs as its operands. - for (VPUser *U : to_vector(RepR->users())) { + for (VPUser *U : to_vector(DefR->users())) { auto *VPI = dyn_cast(U); if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector && VPI->getOpcode() != VPInstruction::BuildStructVector)) @@ -576,7 +589,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { for (VPValue *LaneDef : drop_begin(LaneDefs)) VPI->addOperand(LaneDef); } - ToRemove.push_back(RepR); + ToRemove.push_back(DefR); } } for (auto *R : reverse(ToRemove)) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d2c53f47a6670..a633dfee066ed 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -33,6 +33,10 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 @@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]] ; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] -; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] -; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] -; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]] ; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index f59d4aa99918f..f33e12b418fa6 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -22,6 +22,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] ; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] +; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1 ; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) ; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) @@ -117,8 +119,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 ; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] ; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] +; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1 ; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] +; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0 +; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1 ; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 @@ -338,21 +344,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN]] +; IC1-NEXT: i64 120, label %[[IF_THEN1]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN]]: +; IC1: [[IF_THEN1]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN1:.*:]] +; IC1: [[IF_THEN:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -361,21 +367,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN]] +; IC2-NEXT: i64 120, label %[[IF_THEN1]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN]]: +; IC2: [[IF_THEN1]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN1:.*:]] +; IC2: [[IF_THEN:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 3d05ee7f27b5c..cf85f26992c2f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -22,7 +22,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> -; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]> +; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]> +; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1 ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]> ; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> ; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13> @@ -36,7 +40,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]> +; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: @@ -53,7 +57,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]> +; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: @@ -70,7 +74,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]> +; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: