@@ -3018,18 +3018,42 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3018
3018
case Instruction::Call: {
3019
3019
auto *CalledFn =
3020
3020
cast<Function>(getOperand (getNumOperands () - 1 )->getLiveInIRValue ());
3021
- if (CalledFn->isIntrinsic ())
3022
- break ;
3023
3021
3022
+ SmallVector<const VPValue *> ArgOps (drop_end (operands ()));
3024
3023
SmallVector<Type *, 4 > Tys;
3025
- for (VPValue *ArgOp : drop_end ( operands ()) )
3024
+ for (const VPValue *ArgOp : ArgOps )
3026
3025
Tys.push_back (Ctx.Types .inferScalarType (ArgOp));
3027
3026
3027
+ if (CalledFn->isIntrinsic ())
3028
+ // Various pseudo-intrinsics with costs of 0 are scalarized instead of
3029
+ // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
3030
+ switch (CalledFn->getIntrinsicID ()) {
3031
+ case Intrinsic::assume:
3032
+ case Intrinsic::lifetime_end:
3033
+ case Intrinsic::lifetime_start:
3034
+ case Intrinsic::sideeffect:
3035
+ case Intrinsic::pseudoprobe:
3036
+ case Intrinsic::experimental_noalias_scope_decl: {
3037
+ assert (getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3038
+ ElementCount::getFixed (1 ), Ctx) == 0 &&
3039
+ " pseudo-intrinsic must have zero cost" );
3040
+ return InstructionCost (0 );
3041
+ }
3042
+ default :
3043
+ break ;
3044
+ }
3045
+
3028
3046
Type *ResultTy = Ctx.Types .inferScalarType (this );
3029
3047
InstructionCost ScalarCallCost =
3030
3048
Ctx.TTI .getCallInstrCost (CalledFn, ResultTy, Tys, Ctx.CostKind );
3031
- if (isSingleScalar ())
3049
+ if (isSingleScalar ()) {
3050
+ if (CalledFn->isIntrinsic ())
3051
+ ScalarCallCost = std::min (
3052
+ ScalarCallCost,
3053
+ getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3054
+ ElementCount::getFixed (1 ), Ctx));
3032
3055
return ScalarCallCost;
3056
+ }
3033
3057
3034
3058
if (VF.isScalable ())
3035
3059
return InstructionCost::getInvalid ();
@@ -3049,7 +3073,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3049
3073
// incur any overhead.
3050
3074
SmallVector<Type *> Tys;
3051
3075
SmallPtrSet<const VPValue *, 4 > UniqueOperands;
3052
- for (auto *Op : drop_end ( operands ()) ) {
3076
+ for (auto *Op : ArgOps ) {
3053
3077
if (Op->isLiveIn () || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
3054
3078
!UniqueOperands.insert (Op).second )
3055
3079
continue ;
@@ -3059,8 +3083,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3059
3083
Ctx.TTI .getOperandsScalarizationOverhead (Tys, Ctx.CostKind );
3060
3084
}
3061
3085
3062
- return ScalarCallCost * (isSingleScalar () ? 1 : VF.getFixedValue ()) +
3063
- ScalarizationCost;
3086
+ return ScalarCallCost * VF.getFixedValue () + ScalarizationCost;
3064
3087
}
3065
3088
case Instruction::Add:
3066
3089
case Instruction::Sub:
0 commit comments