Skip to content

Conversation

farzonl
Copy link
Member

@farzonl farzonl commented Aug 20, 2025

fixes #152348

SimplifyCFG collapses raw buffer store from a if\else load into a select.

This change prevents the TargetExtType dx.Rawbuffer from being replace thus preserving the if\else blocks.

A further change was needed to eliminate the phi node before we process Intrinsic::dx_resource_getpointer in DXILResourceAccess.cpp

@llvmbot
Copy link
Member

llvmbot commented Aug 21, 2025

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-directx

Author: Farzon Lotfi (farzonl)

Changes

fixes #152348

SimplifyCFG collapses raw buffer store from a if\else load into a select.

This change prevents the TargetExtType dx.Rawbuffer from being replace thus preserving the if\else blocks.

A further change was needed to eliminate the phi node before we process Intrinsic::dx_resource_getpointer in DXILResourceAccess.cpp


Full diff: https://github.com/llvm/llvm-project/pull/154620.diff

5 Files Affected:

  • (modified) llvm/include/llvm/IR/DerivedTypes.h (+2)
  • (modified) llvm/lib/IR/Type.cpp (+2-1)
  • (modified) llvm/lib/Target/DirectX/DXILResourceAccess.cpp (+107-3)
  • (modified) llvm/lib/Transforms/Utils/Local.cpp (+4)
  • (added) llvm/test/CodeGen/DirectX/issue-152348.ll (+143)
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index fa62bc09b61a3..7810be22d1e99 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -847,6 +847,8 @@ class TargetExtType : public Type {
     CanBeLocal = 1U << 2,
     // This type may be used as an element in a vector.
     CanBeVectorElement = 1U << 3,
+    // This type can not be replaced in an optimization pass.
+    NoReplacement = 1U << 4,
   };
 
   /// Returns true if the target extension type contains the given property.
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 9c34662340352..7513debc2ed3a 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1036,7 +1036,8 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
   // DirectX resources
   if (Name.starts_with("dx."))
     return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal,
-                          TargetExtType::CanBeLocal);
+                          TargetExtType::CanBeLocal,
+                          TargetExtType::NoReplacement);
 
   // Opaque types in the AMDGPU name space.
   if (Name == "amdgcn.named.barrier") {
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index c33ec0efd73ca..e09e71b679031 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -9,13 +9,17 @@
 #include "DXILResourceAccess.h"
 #include "DirectX.h"
 #include "llvm/Analysis/DXILResource.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/User.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 
 #define DEBUG_TYPE "dxil-resource-access"
 
@@ -198,6 +202,102 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
   llvm_unreachable("Unhandled case in switch");
 }
 
+static void collectBlockUseDef(Instruction *Start,
+                               SmallVectorImpl<Instruction *> &Out) {
+  SmallPtrSet<Instruction *, 32> Visited;
+  SmallVector<Instruction *, 32> Worklist;
+  auto *BB = Start->getParent();
+
+  // Seed with direct users in this block.
+  for (User *U : Start->users()) {
+    if (auto *I = dyn_cast<Instruction>(U)) {
+      if (I->getParent() == BB)
+        Worklist.push_back(I);
+    }
+  }
+
+  // BFS over transitive users, constrained to the same block.
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I).second)
+      continue;
+    Out.push_back(I);
+
+    for (User *U : I->users()) {
+      if (auto *J = dyn_cast<Instruction>(U)) {
+        if (J->getParent() == BB)
+          Worklist.push_back(J);
+      }
+    }
+    for (Use &V : I->operands()) {
+      if (auto *J = dyn_cast<Instruction>(V)) {
+        if (J->getParent() == BB && V != Start)
+          Worklist.push_back(J);
+      }
+    }
+  }
+
+  // Order results in program order.
+  DenseMap<const Instruction *, unsigned> Ord;
+  unsigned Idx = 0;
+  for (Instruction &I : *BB)
+    Ord[&I] = Idx++;
+
+  llvm::sort(Out, [&](Instruction *A, Instruction *B) {
+    return Ord.lookup(A) < Ord.lookup(B);
+  });
+}
+
+static void phiNodeRemapHelper(PHINode *Phi, BasicBlock *BB,
+                               IRBuilder<> &Builder,
+                               SmallVector<Instruction *> &UsesInBlock) {
+
+  ValueToValueMapTy VMap;
+  Value *Val = Phi->getIncomingValueForBlock(BB);
+  VMap[Phi] = Val;
+  Builder.SetInsertPoint(&BB->back());
+  for (Instruction *I : UsesInBlock) {
+    // don't clone over the Phi just remap them
+    if (auto *PhiNested = dyn_cast<PHINode>(I)) {
+      VMap[PhiNested] = PhiNested->getIncomingValueForBlock(BB);
+      continue;
+    }
+    Instruction *Clone = I->clone();
+    RemapInstruction(Clone, VMap,
+                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+    Builder.Insert(Clone);
+    VMap[I] = Clone;
+  }
+}
+
+static void phiNodeReplacement(IntrinsicInst *II) {
+  SmallVector<Instruction *> DeadInsts;
+  for (User *U : II->users()) {
+    if (auto *Phi = dyn_cast<PHINode>(U)) {
+
+      IRBuilder<> Builder(Phi);
+      SmallVector<Instruction *> UsesInBlock;
+      collectBlockUseDef(Phi, UsesInBlock);
+
+      for (uint I = 0; I < Phi->getNumIncomingValues(); I++) {
+        auto *CurrIncomingBB = Phi->getIncomingBlock(I);
+        phiNodeRemapHelper(Phi, CurrIncomingBB, Builder, UsesInBlock);
+      }
+
+      DeadInsts.push_back(Phi);
+
+      for (Instruction *I : UsesInBlock) {
+        DeadInsts.push_back(I);
+      }
+    }
+  }
+
+  // Traverse the now-dead instructions in RPO and remove them.
+  for (Instruction *Dead : llvm::reverse(DeadInsts))
+    Dead->eraseFromParent();
+  DeadInsts.clear();
+}
+
 static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
   // Process users keeping track of indexing accumulated from GEPs.
   struct AccessAndOffset {
@@ -229,7 +329,6 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
     } else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
       createLoadIntrinsic(II, LI, Current.Offset, RTI);
       DeadInsts.push_back(LI);
-
     } else
       llvm_unreachable("Unhandled instruction - pointer escaped?");
   }
@@ -242,13 +341,19 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
 
 static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) {
   SmallVector<std::pair<IntrinsicInst *, dxil::ResourceTypeInfo>> Resources;
-  for (BasicBlock &BB : F)
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : make_early_inc_range(BB))
+      if (auto *II = dyn_cast<IntrinsicInst>(&I))
+        if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer)
+          phiNodeReplacement(II);
+
     for (Instruction &I : BB)
       if (auto *II = dyn_cast<IntrinsicInst>(&I))
         if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
           auto *HandleTy = cast<TargetExtType>(II->getArgOperand(0)->getType());
           Resources.emplace_back(II, DRTM[HandleTy]);
         }
+  }
 
   for (auto &[II, RI] : Resources)
     replaceAccess(II, RI);
@@ -279,7 +384,6 @@ class DXILResourceAccessLegacy : public FunctionPass {
   bool runOnFunction(Function &F) override {
     DXILResourceTypeMap &DRTM =
         getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
-
     return transformResourcePointers(F, DRTM);
   }
   StringRef getPassName() const override { return "DXIL Resource Access"; }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index ac344904f90f0..9b9b34db908f3 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3852,6 +3852,10 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
   if (I->isLifetimeStartOrEnd())
     return false;
 
+  if (auto *TT = dyn_cast<TargetExtType>(Op->getType());
+      TT && TT->hasProperty(TargetExtType::Property::NoReplacement))
+    return false;
+
   // Early exit.
   if (!isa<Constant, InlineAsm>(Op))
     return true;
diff --git a/llvm/test/CodeGen/DirectX/issue-152348.ll b/llvm/test/CodeGen/DirectX/issue-152348.ll
new file mode 100644
index 0000000000000..693f01457bdc6
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/issue-152348.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-resource-type -dxil-resource-access -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+%__cblayout_d = type <{ i32, i32, i32, i32 }>
+
+@.str = internal unnamed_addr constant [2 x i8] c"a\00", align 1
+@d.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) poison
+@e = external hidden local_unnamed_addr addrspace(2) global i32, align 4
+@d.str = internal unnamed_addr constant [2 x i8] c"d\00", align 1
+
+define void @CSMain() local_unnamed_addr {
+; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[D_CB_H_I_I:%.*]] = tail call target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D:%.*]], 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+; CHECK-NEXT:    store target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D]], 16, 0, 4, 8, 12)) [[D_CB_H_I_I]], ptr @d.cb, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[IF_ELSE_I:.*]], label %[[IF_THEN_I:.*]]
+; CHECK:       [[IF_THEN_I]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP5:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 [[TMP3]], i32 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { half, i1 } [[TMP5]], 0
+; CHECK-NEXT:    call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP4]], i32 [[TMP1]], i32 0, half [[TMP6]])
+; CHECK-NEXT:    br label %[[_Z6CSMAINV_EXIT:.*]]
+; CHECK:       [[IF_ELSE_I]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP10:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP7]], i32 [[TMP8]], i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { half, i1 } [[TMP10]], 0
+; CHECK-NEXT:    call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP9]], i32 [[TMP1]], i32 0, half [[TMP11]])
+; CHECK-NEXT:    br label %[[_Z6CSMAINV_EXIT]]
+; CHECK:       [[_Z6CSMAINV_EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %d.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) %d.cb_h.i.i, ptr @d.cb, align 4
+  %0 = load i32, ptr addrspace(2) @e, align 4
+  %tobool.not.i = icmp eq i32 %0, 0
+  %1 = load i32, ptr addrspace(2) @e, align 4
+  br i1 %tobool.not.i, label %if.else.i, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  %2 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+  %3 = load i32, ptr addrspace(2) @e, align 4
+  %4 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %2, i32 %3)
+  br label %_Z6CSMainv.exit
+
+if.else.i:                                        ; preds = %entry
+  %5 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+  %6 = load i32, ptr addrspace(2) @e, align 4
+  %7 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %5, i32 %6)
+  br label %_Z6CSMainv.exit
+
+_Z6CSMainv.exit:                                  ; preds = %if.then.i, %if.else.i
+  %.sink1 = phi ptr [ %4, %if.then.i ], [ %7, %if.else.i ]
+  %8 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+  %9 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %8, i32 %1)
+  %10 = load half, ptr %.sink1, align 2
+  store half %10, ptr %9, align 2
+  ret void
+}
+
+define void @Main() local_unnamed_addr  {
+; CHECK-LABEL: define void @Main() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[D_CB_H_I_I:%.*]] = tail call target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D:%.*]], 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+; CHECK-NEXT:    store target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D]], 16, 0, 4, 8, 12)) [[D_CB_H_I_I]], ptr @d.cb, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT_I]], label %[[IF_ELSE_I:.*]], label %[[IF_THEN_I:.*]]
+; CHECK:       [[IF_THEN_I]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP6:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP1]], i32 [[TMP3]], i32 0)
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { half, i1 } [[TMP6]], 0
+; CHECK-NEXT:    call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP5]], i32 [[TMP4]], i32 0, half [[TMP7]])
+; CHECK-NEXT:    br label %[[_Z6MAINV_EXIT:.*]]
+; CHECK:       [[IF_ELSE_I]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[CMP_I]], label %[[IF_THEN2_I:.*]], label %[[IF_ELSE6_I:.*]]
+; CHECK:       [[IF_THEN2_I]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP10:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { half, i1 } [[TMP10]], 0
+; CHECK-NEXT:    call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP9]], i32 0, i32 0, half [[TMP11]])
+; CHECK-NEXT:    br label %[[_Z6MAINV_EXIT]]
+; CHECK:       [[IF_ELSE6_I]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT:    [[TMP14:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP0]], i32 [[TMP12]], i32 0)
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { half, i1 } [[TMP14]], 0
+; CHECK-NEXT:    call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP13]], i32 [[TMP8]], i32 0, half [[TMP15]])
+; CHECK-NEXT:    br label %[[_Z6MAINV_EXIT]]
+; CHECK:       [[_Z6MAINV_EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+  %1 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+  %d.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) %d.cb_h.i.i, ptr @d.cb, align 4
+  %2 = load i32, ptr addrspace(2) @e, align 4
+  %tobool.not.i = icmp eq i32 %2, 0
+  br i1 %tobool.not.i, label %if.else.i, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  %3 = load i32, ptr addrspace(2) @e, align 4
+  %4 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %1, i32 %3)
+  %5 = load i32, ptr addrspace(2) @e, align 4
+  br label %_Z6Mainv.exit
+
+if.else.i:                                        ; preds = %entry
+  %6 = load i32, ptr addrspace(2) @e, align 4
+  %cmp.i = icmp eq i32 %6, 0
+  br i1 %cmp.i, label %if.then2.i, label %if.else6.i
+
+if.then2.i:                                       ; preds = %if.else.i
+  %7 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %1, i32 0)
+  br label %_Z6Mainv.exit
+
+if.else6.i:                                       ; preds = %if.else.i
+  %8 = load i32, ptr addrspace(2) @e, align 4
+  %9 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %0, i32 %8)
+  br label %_Z6Mainv.exit
+
+_Z6Mainv.exit:                                  ; preds = %if.then.i, %if.then2.i, %if.else6.i
+  %.sink2 = phi i32 [ %5, %if.then.i ], [ 0, %if.then2.i ], [ %6, %if.else6.i ]
+  %.sink.in = phi ptr [ %4, %if.then.i ], [ %7, %if.then2.i ], [ %9, %if.else6.i ]
+  %10 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+  %.sink = load half, ptr %.sink.in, align 2
+  %11 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %10, i32 %.sink2)
+  store half %.sink, ptr %11, align 2
+  ret void
+}

Copy link
Member

@hekota hekota left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a few comments. I did not review the algorithm in detail yet.

Copy link
Contributor

@bogner bogner left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be nice to also include a small handwritten test that shows the Phi transformation - it's a bit hard to see what it does in the simple case with a test case derived from HLSL like this.

Comment on lines 850 to 851
// This type can not be replaced in an optimization pass.
NoReplacement = 1U << 4,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple of thoughts here:

  • Is this the best name for this? Maybe IsTokenLike (since the constraints are similar to the token type) or MustBeLiteral or something like that would be better?
  • The doc comment here is a little vague - better wording might come out of the better naming though.
  • Docs should use /// comments so that they show up in doxygen, especially given that the LangRef for TargetExtType defers to the doxygen to describe the properties.

@nikic do you have opinions on the naming here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IsTokenLike makes sense to me. I'd combine this with adding an isTokenLikeTy() method and then using that instead of isTokenTy() in relevant places (including the IR verifier).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nikic replacing isTokenTy() with isTokenLikeTy() is a ton of file changes. Could we address that in a follow up pr if I create an issue for it?

@farzonl farzonl force-pushed the bugfix/issue-152348_replaceOpWithVar_version branch from 156a8e7 to fc7b3dc Compare August 27, 2025 19:50
Copy link

github-actions bot commented Aug 27, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@farzonl farzonl force-pushed the bugfix/issue-152348_replaceOpWithVar_version branch from 7e31011 to 6acef97 Compare August 27, 2025 20:13
@farzonl farzonl force-pushed the bugfix/issue-152348_replaceOpWithVar_version branch from 179186a to 7940bb4 Compare August 28, 2025 16:23
Copy link
Contributor

@bogner bogner left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good to me. Please wait to hear back from @nikic on whether we should do the isTokenLikeTy() refactoring before or after this change.

Comment on lines +306 to +309
for (Instruction *Dead : llvm::reverse(CurrBBDeadInsts))
Dead->eraseFromParent();
CurrBBDeadInsts.clear();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to do this on each iteration or would it be okay to remove all of the dead instructions at the end? I think we could probably get away with just one DeadInsts container and put both the CurrBB and PrevBB dead instructions in it together.

Copy link
Member Author

@farzonl farzonl Aug 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that. The problem is the Intrinsic::dx_resource_getpointer needs to be replicated across the phi basic blocks before we do

for (Instruction &I : BB)
      if (auto *II = dyn_cast<IntrinsicInst>(&I))
        if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
          auto *HandleTy = cast<TargetExtType>(II->getArgOperand(0)->getType());
          Resources.emplace_back(II, DRTM[HandleTy]);
        }

Otherwise the Resources SmallVector will get populated with an intrinsic that is dead.

Similarly we can't remove the br label %_Z6CSMainv.exit until we are done iterating because those instructions are not in the current basic block we are evaluating. Thats forcing me to split the cleanup.

I suppose what we could do is check if the II is in the DeadInsts vector and that could let us do cleanup once but was afraid a contains check might get expensive? Thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we need to check if the cleanup is already in the vector then consolidating the cleanup probably isn't really worth it. I'm fine with it being split as long as there's a good reason for it, but if it's straightforward to combine then that seems nicer.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good to me. Please wait to hear back from @nikic on whether we should do the isTokenLikeTy() refactoring before or after this change.

I'd like the usages in the IR verifier to be adjusted, and an IR verifier test added that errors when using such a type in a phi node.

I don't think we need to replace other uses at this time.

CanBeVectorElement = 1U << 3,
/// All uses of this type must not attempt to introspect or obscure it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty vague... How about something like this?

Suggested change
/// All uses of this type must not attempt to introspect or obscure it.
/// This type can only be used in intrinsic arguments and return values.
/// In particular, it cannot be used in select and phi instructions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that mean the docs for tokenType should be updated

The token type is used when a value is associated with an 
instruction but all uses of the value must not attempt to introspect or obscure it. 
As such, it is not appropriate to have a [phi](https://llvm.org/docs/LangRef.html#i-phi) 
or [select](https://llvm.org/docs/LangRef.html#i-select) of type token.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to have a SimplifyCFG test that shows that no incorrect replacement occurs. I think this test is basically that? But this is a DirectX backend test. We should test this with -passes=simplifycfg in llvm/test/Transforms/SimplifyCFG.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The DX Buffer types are the only TargetExtType that uses the IsTokenLike property. And the DX target is still an experimental target. I could put a test in llvm/test/Transforms/SimplifyCFG/ but it would need a requires for the DirectX target.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it need the DirectX target? Both the type and the intrinsics don't require the backend, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right.

@farzonl farzonl force-pushed the bugfix/issue-152348_replaceOpWithVar_version branch from e197cea to 2dc03ee Compare August 28, 2025 20:02
@farzonl
Copy link
Member Author

farzonl commented Aug 28, 2025

This looks good to me. Please wait to hear back from @nikic on whether we should do the isTokenLikeTy() refactoring before or after this change.

I'd like the usages in the IR verifier to be adjusted, and an IR verifier test added that errors when using such a type in a phi node.

I don't think we need to replace other uses at this time.

@bogner can you verify if these are all case we would not want a Raw or Typed Buffer?

  1. llvm/lib/IR/Verifier.cpp:3009: Check(!Arg.getType()->isTokenTy(),

Function takes token but isn't an intrinsic
example:

define void @f(token %A) {
entry:
  ret void
}
  1. llvm/lib/IR/Verifier.cpp:3023: Check(!F.getReturnType()->isTokenTy(),
    Function returns a token but isn't an intrinsic
    example:
define token @f() {
entry:
  ret token undef
}
  1. llvm/lib/IR/Verifier.cpp:3637: Check(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!");

Obviously this one is a yes we don't want Raw or Type buffers in PHI nodes

  1. llvm/lib/IR/Verifier.cpp:3842: Check(!ParamTy->isTokenTy(),
    Function has token parameter but isn't an intrinsic

There was no test case for this one.

  1. llvm/lib/IR/Verifier.cpp:3849: Check(!FTy->getReturnType()->isTokenTy(),

Return type cannot be token for indirect call!
example

define void @f() {
entry:
  call token () undef ()
  ret void
}

Copy link

github-actions bot commented Aug 28, 2025

✅ With the latest revision this PR passed the undef deprecator.

; REQUIRES: asserts
; RUN: not --crash llvm-as %s -o /dev/null 2>&1 | FileCheck %s

define void @f(target("dx.RawBuffer", half, 1, 0) %A, target("dx.RawBuffer", half, 1, 0) %B) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To explain the file naming convention for all these verifer tests I used the name of the test I copied from and swapped in the target extention type switching only the word token to tokenlike.

@farzonl
Copy link
Member Author

farzonl commented Aug 28, 2025

This looks good to me. Please wait to hear back from @nikic on whether we should do the isTokenLikeTy() refactoring before or after this change.

I'd like the usages in the IR verifier to be adjusted, and an IR verifier test added that errors when using such a type in a phi node.
I don't think we need to replace other uses at this time.

@bogner can you verify if these are all case we would not want a Raw or Typed Buffer?

  1. llvm/lib/IR/Verifier.cpp:3009: Check(!Arg.getType()->isTokenTy(),

Function takes token but isn't an intrinsic example:

define void @f(token %A) {
entry:
  ret void
}
  1. llvm/lib/IR/Verifier.cpp:3023: Check(!F.getReturnType()->isTokenTy(),
    Function returns a token but isn't an intrinsic
    example:
define token @f() {
entry:
  ret token undef
}
  1. llvm/lib/IR/Verifier.cpp:3637: Check(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!");

Obviously this one is a yes we don't want Raw or Type buffers in PHI nodes

  1. llvm/lib/IR/Verifier.cpp:3842: Check(!ParamTy->isTokenTy(),
    Function has token parameter but isn't an intrinsic

There was no test case for this one.

  1. llvm/lib/IR/Verifier.cpp:3849: Check(!FTy->getReturnType()->isTokenTy(),

Return type cannot be token for indirect call! example

define void @f() {
entry:
  call token () undef ()
  ret void
}

@bogner the new Verifier changes are causing CodeGenHLSL/builtins/hlsl_resource_t.hlsl to fail
specifically with the error:Function takes token but isn't an intrinsic

@bogner
Copy link
Contributor

bogner commented Aug 28, 2025

@bogner the new Verifier changes are causing CodeGenHLSL/builtins/hlsl_resource_t.hlsl to fail
specifically with the error:Function takes token but isn't an intrinsic

That's probably fine / expected - that test is creating the __hlsl_resource_t directly and calling functions on it, which isn't really meaningful. As far as I can tell the tests that use the actual HLSL types (RWBuffer/StructuredBuffer) are still okay. I guess the fix will be to add a wrapper struct:

struct CustomResource {
  handle_float_t h;
};

and have the functions in that test use the struct not the handle itself. @hekota does this sound right to you?

@farzonl
Copy link
Member Author

farzonl commented Aug 29, 2025

current failure is Clang-Unit :: ./AllClangUnitTests/TimeProfilerTest/ConstantEvaluationCxx20. Which is not related to this PRs changes.

@hekota
Copy link
Member

hekota commented Aug 29, 2025

@hekota does this sound right to you?

Sounds good!

fixes llvm#152348

SimplifyCFG collapses raw buffer store from a if\else load into a select.

This change prevents the TargetExtType dx.Rawbuffer from being replace thus preserving the if\else blocks.

A further change was needed to eliminate the phi node before we process Intrinsic::dx_resource_getpointer in DXILResourceAccess.cpp
@farzonl farzonl force-pushed the bugfix/issue-152348_replaceOpWithVar_version branch from 1e66f73 to 1546848 Compare August 29, 2025 19:04
@farzonl farzonl merged commit 01c0a84 into llvm:main Aug 29, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[DirectX] Select of two different RawBuffer can not easily be reduced to a callInst.
5 participants