-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[DirectX] Make dx.RawBuffer an op that can't be replaced #154620
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DirectX] Make dx.RawBuffer an op that can't be replaced #154620
Conversation
4196866
to
89e1338
Compare
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changesfixes #152348 SimplifyCFG collapses raw buffer store from a if\else load into a select. This change prevents the TargetExtType dx.Rawbuffer from being replace thus preserving the if\else blocks. A further change was needed to eliminate the phi node before we process Intrinsic::dx_resource_getpointer in DXILResourceAccess.cpp Full diff: https://github.com/llvm/llvm-project/pull/154620.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index fa62bc09b61a3..7810be22d1e99 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -847,6 +847,8 @@ class TargetExtType : public Type {
CanBeLocal = 1U << 2,
// This type may be used as an element in a vector.
CanBeVectorElement = 1U << 3,
+ // This type can not be replaced in an optimization pass.
+ NoReplacement = 1U << 4,
};
/// Returns true if the target extension type contains the given property.
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 9c34662340352..7513debc2ed3a 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1036,7 +1036,8 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
// DirectX resources
if (Name.starts_with("dx."))
return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal,
- TargetExtType::CanBeLocal);
+ TargetExtType::CanBeLocal,
+ TargetExtType::NoReplacement);
// Opaque types in the AMDGPU name space.
if (Name == "amdgcn.named.barrier") {
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index c33ec0efd73ca..e09e71b679031 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -9,13 +9,17 @@
#include "DXILResourceAccess.h"
#include "DirectX.h"
#include "llvm/Analysis/DXILResource.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "dxil-resource-access"
@@ -198,6 +202,102 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
llvm_unreachable("Unhandled case in switch");
}
+static void collectBlockUseDef(Instruction *Start,
+ SmallVectorImpl<Instruction *> &Out) {
+ SmallPtrSet<Instruction *, 32> Visited;
+ SmallVector<Instruction *, 32> Worklist;
+ auto *BB = Start->getParent();
+
+ // Seed with direct users in this block.
+ for (User *U : Start->users()) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (I->getParent() == BB)
+ Worklist.push_back(I);
+ }
+ }
+
+ // BFS over transitive users, constrained to the same block.
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I).second)
+ continue;
+ Out.push_back(I);
+
+ for (User *U : I->users()) {
+ if (auto *J = dyn_cast<Instruction>(U)) {
+ if (J->getParent() == BB)
+ Worklist.push_back(J);
+ }
+ }
+ for (Use &V : I->operands()) {
+ if (auto *J = dyn_cast<Instruction>(V)) {
+ if (J->getParent() == BB && V != Start)
+ Worklist.push_back(J);
+ }
+ }
+ }
+
+ // Order results in program order.
+ DenseMap<const Instruction *, unsigned> Ord;
+ unsigned Idx = 0;
+ for (Instruction &I : *BB)
+ Ord[&I] = Idx++;
+
+ llvm::sort(Out, [&](Instruction *A, Instruction *B) {
+ return Ord.lookup(A) < Ord.lookup(B);
+ });
+}
+
+static void phiNodeRemapHelper(PHINode *Phi, BasicBlock *BB,
+ IRBuilder<> &Builder,
+ SmallVector<Instruction *> &UsesInBlock) {
+
+ ValueToValueMapTy VMap;
+ Value *Val = Phi->getIncomingValueForBlock(BB);
+ VMap[Phi] = Val;
+ Builder.SetInsertPoint(&BB->back());
+ for (Instruction *I : UsesInBlock) {
+ // don't clone over the Phi just remap them
+ if (auto *PhiNested = dyn_cast<PHINode>(I)) {
+ VMap[PhiNested] = PhiNested->getIncomingValueForBlock(BB);
+ continue;
+ }
+ Instruction *Clone = I->clone();
+ RemapInstruction(Clone, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ Builder.Insert(Clone);
+ VMap[I] = Clone;
+ }
+}
+
+static void phiNodeReplacement(IntrinsicInst *II) {
+ SmallVector<Instruction *> DeadInsts;
+ for (User *U : II->users()) {
+ if (auto *Phi = dyn_cast<PHINode>(U)) {
+
+ IRBuilder<> Builder(Phi);
+ SmallVector<Instruction *> UsesInBlock;
+ collectBlockUseDef(Phi, UsesInBlock);
+
+ for (uint I = 0; I < Phi->getNumIncomingValues(); I++) {
+ auto *CurrIncomingBB = Phi->getIncomingBlock(I);
+ phiNodeRemapHelper(Phi, CurrIncomingBB, Builder, UsesInBlock);
+ }
+
+ DeadInsts.push_back(Phi);
+
+ for (Instruction *I : UsesInBlock) {
+ DeadInsts.push_back(I);
+ }
+ }
+ }
+
+ // Traverse the now-dead instructions in RPO and remove them.
+ for (Instruction *Dead : llvm::reverse(DeadInsts))
+ Dead->eraseFromParent();
+ DeadInsts.clear();
+}
+
static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
// Process users keeping track of indexing accumulated from GEPs.
struct AccessAndOffset {
@@ -229,7 +329,6 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
} else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
createLoadIntrinsic(II, LI, Current.Offset, RTI);
DeadInsts.push_back(LI);
-
} else
llvm_unreachable("Unhandled instruction - pointer escaped?");
}
@@ -242,13 +341,19 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) {
SmallVector<std::pair<IntrinsicInst *, dxil::ResourceTypeInfo>> Resources;
- for (BasicBlock &BB : F)
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : make_early_inc_range(BB))
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer)
+ phiNodeReplacement(II);
+
for (Instruction &I : BB)
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
auto *HandleTy = cast<TargetExtType>(II->getArgOperand(0)->getType());
Resources.emplace_back(II, DRTM[HandleTy]);
}
+ }
for (auto &[II, RI] : Resources)
replaceAccess(II, RI);
@@ -279,7 +384,6 @@ class DXILResourceAccessLegacy : public FunctionPass {
bool runOnFunction(Function &F) override {
DXILResourceTypeMap &DRTM =
getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
-
return transformResourcePointers(F, DRTM);
}
StringRef getPassName() const override { return "DXIL Resource Access"; }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index ac344904f90f0..9b9b34db908f3 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3852,6 +3852,10 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
if (I->isLifetimeStartOrEnd())
return false;
+ if (auto *TT = dyn_cast<TargetExtType>(Op->getType());
+ TT && TT->hasProperty(TargetExtType::Property::NoReplacement))
+ return false;
+
// Early exit.
if (!isa<Constant, InlineAsm>(Op))
return true;
diff --git a/llvm/test/CodeGen/DirectX/issue-152348.ll b/llvm/test/CodeGen/DirectX/issue-152348.ll
new file mode 100644
index 0000000000000..693f01457bdc6
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/issue-152348.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-resource-type -dxil-resource-access -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+%__cblayout_d = type <{ i32, i32, i32, i32 }>
+
+@.str = internal unnamed_addr constant [2 x i8] c"a\00", align 1
+@d.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) poison
+@e = external hidden local_unnamed_addr addrspace(2) global i32, align 4
+@d.str = internal unnamed_addr constant [2 x i8] c"d\00", align 1
+
+define void @CSMain() local_unnamed_addr {
+; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[D_CB_H_I_I:%.*]] = tail call target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D:%.*]], 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+; CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D]], 16, 0, 4, 8, 12)) [[D_CB_H_I_I]], ptr @d.cb, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[IF_ELSE_I:.*]], label %[[IF_THEN_I:.*]]
+; CHECK: [[IF_THEN_I]]:
+; CHECK-NEXT: [[TMP2:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TMP4:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP5:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 [[TMP3]], i32 0)
+; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { half, i1 } [[TMP5]], 0
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP4]], i32 [[TMP1]], i32 0, half [[TMP6]])
+; CHECK-NEXT: br label %[[_Z6CSMAINV_EXIT:.*]]
+; CHECK: [[IF_ELSE_I]]:
+; CHECK-NEXT: [[TMP7:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TMP9:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP10:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP7]], i32 [[TMP8]], i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { half, i1 } [[TMP10]], 0
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP9]], i32 [[TMP1]], i32 0, half [[TMP11]])
+; CHECK-NEXT: br label %[[_Z6CSMAINV_EXIT]]
+; CHECK: [[_Z6CSMAINV_EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %d.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+ store target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) %d.cb_h.i.i, ptr @d.cb, align 4
+ %0 = load i32, ptr addrspace(2) @e, align 4
+ %tobool.not.i = icmp eq i32 %0, 0
+ %1 = load i32, ptr addrspace(2) @e, align 4
+ br i1 %tobool.not.i, label %if.else.i, label %if.then.i
+
+if.then.i: ; preds = %entry
+ %2 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %3 = load i32, ptr addrspace(2) @e, align 4
+ %4 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %2, i32 %3)
+ br label %_Z6CSMainv.exit
+
+if.else.i: ; preds = %entry
+ %5 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %6 = load i32, ptr addrspace(2) @e, align 4
+ %7 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %5, i32 %6)
+ br label %_Z6CSMainv.exit
+
+_Z6CSMainv.exit: ; preds = %if.then.i, %if.else.i
+ %.sink1 = phi ptr [ %4, %if.then.i ], [ %7, %if.else.i ]
+ %8 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %9 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %8, i32 %1)
+ %10 = load half, ptr %.sink1, align 2
+ store half %10, ptr %9, align 2
+ ret void
+}
+
+define void @Main() local_unnamed_addr {
+; CHECK-LABEL: define void @Main() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP1:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[D_CB_H_I_I:%.*]] = tail call target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D:%.*]], 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+; CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", [[__CBLAYOUT_D]], 16, 0, 4, 8, 12)) [[D_CB_H_I_I]], ptr @d.cb, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[IF_ELSE_I:.*]], label %[[IF_THEN_I:.*]]
+; CHECK: [[IF_THEN_I]]:
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP6:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP1]], i32 [[TMP3]], i32 0)
+; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { half, i1 } [[TMP6]], 0
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP5]], i32 [[TMP4]], i32 0, half [[TMP7]])
+; CHECK-NEXT: br label %[[_Z6MAINV_EXIT:.*]]
+; CHECK: [[IF_ELSE_I]]:
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[CMP_I]], label %[[IF_THEN2_I:.*]], label %[[IF_ELSE6_I:.*]]
+; CHECK: [[IF_THEN2_I]]:
+; CHECK-NEXT: [[TMP9:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP10:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { half, i1 } [[TMP10]], 0
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP9]], i32 0, i32 0, half [[TMP11]])
+; CHECK-NEXT: br label %[[_Z6MAINV_EXIT]]
+; CHECK: [[IF_ELSE6_I]]:
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(2) @e, align 4
+; CHECK-NEXT: [[TMP13:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP14:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP0]], i32 [[TMP12]], i32 0)
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { half, i1 } [[TMP14]], 0
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP13]], i32 [[TMP8]], i32 0, half [[TMP15]])
+; CHECK-NEXT: br label %[[_Z6MAINV_EXIT]]
+; CHECK: [[_Z6MAINV_EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %1 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %d.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_ds_16_0_4_8_12tt(i32 3, i32 0, i32 1, i32 0, i1 false, ptr nonnull @d.str)
+ store target("dx.CBuffer", target("dx.Layout", %__cblayout_d, 16, 0, 4, 8, 12)) %d.cb_h.i.i, ptr @d.cb, align 4
+ %2 = load i32, ptr addrspace(2) @e, align 4
+ %tobool.not.i = icmp eq i32 %2, 0
+ br i1 %tobool.not.i, label %if.else.i, label %if.then.i
+
+if.then.i: ; preds = %entry
+ %3 = load i32, ptr addrspace(2) @e, align 4
+ %4 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %1, i32 %3)
+ %5 = load i32, ptr addrspace(2) @e, align 4
+ br label %_Z6Mainv.exit
+
+if.else.i: ; preds = %entry
+ %6 = load i32, ptr addrspace(2) @e, align 4
+ %cmp.i = icmp eq i32 %6, 0
+ br i1 %cmp.i, label %if.then2.i, label %if.else6.i
+
+if.then2.i: ; preds = %if.else.i
+ %7 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %1, i32 0)
+ br label %_Z6Mainv.exit
+
+if.else6.i: ; preds = %if.else.i
+ %8 = load i32, ptr addrspace(2) @e, align 4
+ %9 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %0, i32 %8)
+ br label %_Z6Mainv.exit
+
+_Z6Mainv.exit: ; preds = %if.then.i, %if.then2.i, %if.else6.i
+ %.sink2 = phi i32 [ %5, %if.then.i ], [ 0, %if.then2.i ], [ %6, %if.else6.i ]
+ %.sink.in = phi ptr [ %4, %if.then.i ], [ %7, %if.then2.i ], [ %9, %if.else6.i ]
+ %10 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %.sink = load half, ptr %.sink.in, align 2
+ %11 = tail call noundef nonnull align 2 dereferenceable(2) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %10, i32 %.sink2)
+ store half %.sink, ptr %11, align 2
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a few comments. I did not review the algorithm in detail yet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It'd be nice to also include a small handwritten test that shows the Phi transformation - it's a bit hard to see what it does in the simple case with a test case derived from HLSL like this.
llvm/include/llvm/IR/DerivedTypes.h
Outdated
// This type can not be replaced in an optimization pass. | ||
NoReplacement = 1U << 4, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A couple of thoughts here:
- Is this the best name for this? Maybe
IsTokenLike
(since the constraints are similar to the token type) orMustBeLiteral
or something like that would be better? - The doc comment here is a little vague - better wording might come out of the better naming though.
- Docs should use
///
comments so that they show up in doxygen, especially given that the LangRef for TargetExtType defers to the doxygen to describe the properties.
@nikic do you have opinions on the naming here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IsTokenLike makes sense to me. I'd combine this with adding an isTokenLikeTy() method and then using that instead of isTokenTy() in relevant places (including the IR verifier).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic replacing isTokenTy()
with isTokenLikeTy()
is a ton of file changes. Could we address that in a follow up pr if I create an issue for it?
156a8e7
to
fc7b3dc
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
7e31011
to
6acef97
Compare
179186a
to
7940bb4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good to me. Please wait to hear back from @nikic on whether we should do the isTokenLikeTy()
refactoring before or after this change.
for (Instruction *Dead : llvm::reverse(CurrBBDeadInsts)) | ||
Dead->eraseFromParent(); | ||
CurrBBDeadInsts.clear(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to do this on each iteration or would it be okay to remove all of the dead instructions at the end? I think we could probably get away with just one DeadInsts
container and put both the CurrBB
and PrevBB
dead instructions in it together.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought about that. The problem is the Intrinsic::dx_resource_getpointer needs to be replicated across the phi basic blocks before we do
for (Instruction &I : BB)
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
auto *HandleTy = cast<TargetExtType>(II->getArgOperand(0)->getType());
Resources.emplace_back(II, DRTM[HandleTy]);
}
Otherwise the Resources SmallVector will get populated with an intrinsic that is dead.
Similarly we can't remove the br label %_Z6CSMainv.exit
until we are done iterating because those instructions are not in the current basic block we are evaluating. Thats forcing me to split the cleanup.
I suppose what we could do is check if the II is in the DeadInsts vector and that could let us do cleanup once but was afraid a contains check might get expensive? Thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we need to check if the cleanup is already in the vector then consolidating the cleanup probably isn't really worth it. I'm fine with it being split as long as there's a good reason for it, but if it's straightforward to combine then that seems nicer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good to me. Please wait to hear back from @nikic on whether we should do the isTokenLikeTy() refactoring before or after this change.
I'd like the usages in the IR verifier to be adjusted, and an IR verifier test added that errors when using such a type in a phi node.
I don't think we need to replace other uses at this time.
llvm/include/llvm/IR/DerivedTypes.h
Outdated
CanBeVectorElement = 1U << 3, | ||
/// All uses of this type must not attempt to introspect or obscure it. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is pretty vague... How about something like this?
/// All uses of this type must not attempt to introspect or obscure it. | |
/// This type can only be used in intrinsic arguments and return values. | |
/// In particular, it cannot be used in select and phi instructions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does that mean the docs for tokenType should be updated
The token type is used when a value is associated with an
instruction but all uses of the value must not attempt to introspect or obscure it.
As such, it is not appropriate to have a [phi](https://llvm.org/docs/LangRef.html#i-phi)
or [select](https://llvm.org/docs/LangRef.html#i-select) of type token.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like to have a SimplifyCFG test that shows that no incorrect replacement occurs. I think this test is basically that? But this is a DirectX backend test. We should test this with -passes=simplifycfg
in llvm/test/Transforms/SimplifyCFG.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The DX Buffer types are the only TargetExtType
that uses the IsTokenLike
property. And the DX target is still an experimental target. I could put a test in llvm/test/Transforms/SimplifyCFG/
but it would need a requires for the DirectX target.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does it need the DirectX target? Both the type and the intrinsics don't require the backend, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right.
e197cea
to
2dc03ee
Compare
@bogner can you verify if these are all case we would not want a Raw or Typed Buffer?
Obviously this one is a yes we don't want Raw or Type buffers in PHI nodes
There was no test case for this one.
|
✅ With the latest revision this PR passed the undef deprecator. |
; REQUIRES: asserts | ||
; RUN: not --crash llvm-as %s -o /dev/null 2>&1 | FileCheck %s | ||
|
||
define void @f(target("dx.RawBuffer", half, 1, 0) %A, target("dx.RawBuffer", half, 1, 0) %B) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To explain the file naming convention for all these verifer tests I used the name of the test I copied from and swapped in the target extention type switching only the word token to tokenlike.
@bogner the new Verifier changes are causing |
That's probably fine / expected - that test is creating the struct CustomResource {
handle_float_t h;
}; and have the functions in that test use the struct not the handle itself. @hekota does this sound right to you? |
current failure is |
Sounds good! |
fixes llvm#152348 SimplifyCFG collapses raw buffer store from a if\else load into a select. This change prevents the TargetExtType dx.Rawbuffer from being replace thus preserving the if\else blocks. A further change was needed to eliminate the phi node before we process Intrinsic::dx_resource_getpointer in DXILResourceAccess.cpp
…ure. test needed to be updated
1e66f73
to
1546848
Compare
fixes #152348
SimplifyCFG collapses raw buffer store from a if\else load into a select.
This change prevents the TargetExtType dx.Rawbuffer from being replace thus preserving the if\else blocks.
A further change was needed to eliminate the phi node before we process Intrinsic::dx_resource_getpointer in DXILResourceAccess.cpp