[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). #154617

fhahn · 2025-08-20T21:09:17Z

Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some
intrinsics pseudo intrinsics for which the computed cost is known zero,
so we handle those up front.

Depends on #154291. (included
in PR)

llvmbot · 2025-08-20T21:09:58Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-vectorizers

Author: Florian Hahn (fhahn)

Changes

Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some
intrinsics pseudo intrinsics for which the computed cost is known zero,
so we handle those up front.

Depends on #154291. (included
in PR)

Full diff: https://github.com/llvm/llvm-project/pull/154617.diff

1 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+86-22)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f8fde0500b77a..a55ebdb60f939 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1673,18 +1673,22 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
     State.set(this, V);
 }
 
-InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
-                                                    VPCostContext &Ctx) const {
+/// Compute the cost for the intrinsic \p ID with \p Operands, produced by \p R.
+static InstructionCost getCostForIntrinsics(Intrinsic::ID ID,
+                                            ArrayRef<const VPValue *> Operands,
+                                            const VPRecipeWithIRFlags &R,
+                                            ElementCount VF,
+                                            VPCostContext &Ctx) {
   // Some backends analyze intrinsic arguments to determine cost. Use the
   // underlying value for the operand if it has one. Otherwise try to use the
   // operand of the underlying call instruction, if there is one. Otherwise
   // clear Arguments.
   // TODO: Rework TTI interface to be independent of concrete IR values.
   SmallVector<const Value *> Arguments;
-  for (const auto &[Idx, Op] : enumerate(operands())) {
+  for (const auto &[Idx, Op] : enumerate(Operands)) {
     auto *V = Op->getUnderlyingValue();
     if (!V) {
-      if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
+      if (auto *UI = dyn_cast_or_null<CallBase>(R.getUnderlyingValue())) {
         Arguments.push_back(UI->getArgOperand(Idx));
         continue;
       }
@@ -1694,21 +1698,31 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
     Arguments.push_back(V);
   }
 
-  Type *RetTy = toVectorizedTy(Ctx.Types.inferScalarType(this), VF);
+  Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
+  Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;
   SmallVector<Type *> ParamTys;
-  for (unsigned I = 0; I != getNumOperands(); ++I)
-    ParamTys.push_back(
-        toVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
+  for (const VPValue *Op : Operands) {
+    ParamTys.push_back(VF.isVector()
+                           ? toVectorTy(Ctx.Types.inferScalarType(Op), VF)
+                           : Ctx.Types.inferScalarType(Op));
+  }
 
   // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
-  FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
+  FastMathFlags FMF =
+      R.hasFastMathFlags() ? R.getFastMathFlags() : FastMathFlags();
   IntrinsicCostAttributes CostAttrs(
-      VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
-      dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()),
+      ID, RetTy, Arguments, ParamTys, FMF,
+      dyn_cast_or_null<IntrinsicInst>(R.getUnderlyingValue()),
       InstructionCost::getInvalid(), &Ctx.TLI);
   return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
 }
 
+InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
+                                                    VPCostContext &Ctx) const {
+  SmallVector<const VPValue *> ArgOps(operands());
+  return getCostForIntrinsics(VectorIntrinsicID, ArgOps, *this, VF, Ctx);
+}
+
 StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
   return Intrinsic::getBaseName(VectorIntrinsicID);
 }
@@ -3002,23 +3016,73 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
     // instruction cost.
     return 0;
   case Instruction::Call: {
-    if (!isSingleScalar()) {
-      // TODO: Handle remaining call costs here as well.
-      if (VF.isScalable())
-        return InstructionCost::getInvalid();
-      break;
-    }
-
     auto *CalledFn =
         cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
-    if (CalledFn->isIntrinsic())
-      break;
 
+    SmallVector<const VPValue *> ArgOps(drop_end(operands()));
     SmallVector<Type *, 4> Tys;
-    for (VPValue *ArgOp : drop_end(operands()))
+    for (const VPValue *ArgOp : ArgOps)
       Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
+
+    if (CalledFn->isIntrinsic())
+      // Various pseudo-intrinsics with costs of 0 are scalarized instead of
+      // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
+      switch (CalledFn->getIntrinsicID()) {
+      case Intrinsic::assume:
+      case Intrinsic::lifetime_end:
+      case Intrinsic::lifetime_start:
+      case Intrinsic::sideeffect:
+      case Intrinsic::pseudoprobe:
+      case Intrinsic::experimental_noalias_scope_decl: {
+        assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
+                                    ElementCount::getFixed(1), Ctx) == 0 && "pseudo-intrinsic must have zero cost");
+        return InstructionCost(0);
+      }
+      default:
+        break;
+      }
+
     Type *ResultTy = Ctx.Types.inferScalarType(this);
-    return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+    InstructionCost ScalarCallCost =
+        Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+    if (isSingleScalar()) {
+      if (CalledFn->isIntrinsic())
+        ScalarCallCost = std::min(
+            ScalarCallCost,
+            getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
+                                 ElementCount::getFixed(1), Ctx));
+      return ScalarCallCost;
+    }
+
+    if (VF.isScalable())
+      return InstructionCost::getInvalid();
+
+    // Compute the cost of scalarizing the result and operands if needed.
+    InstructionCost ScalarizationCost = 0;
+    if (VF.isVector()) {
+      if (!ResultTy->isVoidTy()) {
+        for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) {
+          ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
+              cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
+              /*Insert=*/true,
+              /*Extract=*/false, Ctx.CostKind);
+        }
+      }
+      // Skip operands that do not require extraction/scalarization and do not
+      // incur any overhead.
+      SmallVector<Type *> Tys;
+      SmallPtrSet<const VPValue *, 4> UniqueOperands;
+      for (auto *Op : ArgOps) {
+        if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
+            !UniqueOperands.insert(Op).second)
+          continue;
+        Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF));
+      }
+      ScalarizationCost +=
+          Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
+    }
+
+    return ScalarCallCost * VF.getFixedValue() + ScalarizationCost;
   }
   case Instruction::Add:
   case Instruction::Sub:

github-actions · 2025-08-20T21:12:41Z

✅ With the latest revision this PR passed the C/C++ code formatter.

ElvisWang123

LGTM, thanks!

Refactor to prepare for #154617.

…CI). Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some intrinsics pseudo intrinsics for which the computed cost is known zero, so we handle those up front. Depends on llvm#154291.

… (NFC). Refactor to prepare for llvm/llvm-project#154617.

…trinsic-cost

…eRecipe (NFCI). (#154617) Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some intrinsics pseudo intrinsics for which the computed cost is known zero, so we handle those up front. Depends on llvm/llvm-project#154291. PR: llvm/llvm-project#154617

fhahn requested review from ayalz, aniragil and ElvisWang123 August 20, 2025 21:09

fhahn marked this pull request as ready for review August 20, 2025 21:09

llvmbot added vectorizers llvm:transforms labels Aug 20, 2025

fhahn force-pushed the vplan-repr-compute-intrinsic-cost branch from a1801fd to 672b04b Compare August 20, 2025 21:25

ElvisWang123 approved these changes Aug 21, 2025

View reviewed changes

fhahn added a commit that referenced this pull request Aug 27, 2025

[VPlan] Move logic to compute cost for intrinsic to helper (NFC).

5e32f72

Refactor to prepare for #154617.

[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NF…

bddd21b

…CI). Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some intrinsics pseudo intrinsics for which the computed cost is known zero, so we handle those up front. Depends on llvm#154291.

fhahn force-pushed the vplan-repr-compute-intrinsic-cost branch from 672b04b to bddd21b Compare August 27, 2025 18:30

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 27, 2025

Automerge: [VPlan] Move logic to compute cost for intrinsic to helper…

15535d7

… (NFC). Refactor to prepare for llvm/llvm-project#154617.

fhahn added 2 commits August 27, 2025 20:21

!fixup fix formatting

36dfca0

Merge remote-tracking branch 'origin/main' into vplan-repr-compute-in…

c1b5978

…trinsic-cost

fhahn merged commit df09879 into llvm:main Aug 27, 2025
8 of 9 checks passed

fhahn deleted the vplan-repr-compute-intrinsic-cost branch August 27, 2025 20:40

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). #154617

[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). #154617

fhahn commented Aug 20, 2025

Uh oh!

llvmbot commented Aug 20, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Aug 20, 2025 •

edited

Loading

Uh oh!

ElvisWang123 left a comment

Uh oh!

Uh oh!

Uh oh!

[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). #154617

[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). #154617

Conversation

fhahn commented Aug 20, 2025

Uh oh!

llvmbot commented Aug 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Aug 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ElvisWang123 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Aug 20, 2025 •

edited

Loading

github-actions bot commented Aug 20, 2025 •

edited

Loading