-
Notifications
You must be signed in to change notification settings - Fork 14.9k
AMDGPU: Fix mis-selecting saddr flat atomics on gfx9 #156860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Fix mis-selecting saddr flat atomics on gfx9 #156860
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis would select the pseudo and then crash when the MC instruction Patch is 437.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156860.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0e0b84f7e3374..a366db1c580ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -68,13 +68,15 @@ def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
"FlatGlobalInsts",
"true",
- "Have global_* flat memory instructions"
+ "Have global_* flat memory instructions",
+ [FeatureFlatAddressSpace]
>;
def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
"FlatScratchInsts",
"true",
- "Have scratch_* flat memory instructions"
+ "Have scratch_* flat memory instructions",
+ [FeatureFlatAddressSpace]
>;
def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
@@ -92,7 +94,8 @@ def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch",
def FeatureFlatGVSMode : SubtargetFeature<"flat-gvs-mode",
"FlatGVSMode",
"true",
- "Have GVS addressing mode with flat_* instructions"
+ "Have GVS addressing mode with flat_* instructions",
+ [FeatureFlatAddressSpace]
>;
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
@@ -934,13 +937,15 @@ def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-glo
def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32",
"HasAtomicFMinFMaxF32FlatInsts",
"true",
- "Has flat memory instructions for atomicrmw fmin/fmax for float"
+ "Has flat memory instructions for atomicrmw fmin/fmax for float",
+ [FeatureFlatAddressSpace]
>;
def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64",
"HasAtomicFMinFMaxF64FlatInsts",
"true",
- "Has flat memory instructions for atomicrmw fmin/fmax for double"
+ "Has flat memory instructions for atomicrmw fmin/fmax for double",
+ [FeatureFlatAddressSpace]
>;
def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
@@ -992,7 +997,8 @@ def FeatureFlatAtomicFaddF32Inst
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
"HasFlatAtomicFaddF32Inst",
"true",
- "Has flat_atomic_add_f32 instruction"
+ "Has flat_atomic_add_f32 instruction",
+ [FeatureFlatAddressSpace]
>;
def FeatureFlatBufferGlobalAtomicFaddF64Inst
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1617f7954a5ee..9056d4cad19ea 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -297,7 +297,7 @@ multiclass FLAT_Flat_Store_Pseudo_t16<string opName> {
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterOperand regClass = AVLdSt_32,
bit HasTiedInput = 0> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
@@ -347,7 +347,7 @@ multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterOperand regClas
}
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterOperand regClass = AVLdSt_32> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
@@ -1296,19 +1296,19 @@ let SubtargetPredicate = isGFX10Plus in {
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", AVLdSt_64, f64, v2f64, AVLdSt_128>;
} // End SubtargetPredicate = isGFX10Plus
-let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_add_f32", AVLdSt_32, f32
>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_pk_add_f16", AVLdSt_32, v2f16
>;
-let OtherPredicates = [HasAtomicFaddRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
"global_atomic_add_f32", AVLdSt_32, f32
>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
"global_atomic_pk_add_f16", AVLdSt_32, v2f16
>;
@@ -1442,8 +1442,10 @@ class FlatStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
class FlatAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ComplexPattern pat,
ValueType vt, ValueType data_vt = vt> : GCNPat <
(vt (node (pat (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), data_vt:$data)),
- (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset, $cpol)
->;
+ (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset, $cpol)> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
+}
class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
@@ -1476,12 +1478,13 @@ multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt,
let AddedComplexity = 1 in
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+ (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node),
GlobalSAddr, vt, data_vt> {
let AddedComplexity = 9;
- let SubtargetPredicate = HasFlatGVSMode;
}
}
@@ -1500,11 +1503,12 @@ multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt,
defvar rtnNode = !cast<SDPatternOperator>(node);
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+ (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, GlobalSAddrGLC, vt, data_vt> {
let AddedComplexity = 8;
- let SubtargetPredicate = HasFlatGVSMode;
}
}
@@ -1540,8 +1544,10 @@ multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
(vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
->;
+ (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
+}
multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
ValueType data_vt = vt, int complexity = 0,
@@ -1651,30 +1657,42 @@ multiclass GlobalStoreLDSPats<FLAT_Pseudo inst, SDPatternOperator node> {
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat <inst, node, vt> {
let AddedComplexity = 10;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
}
multiclass GlobalFLATLoadPats_M0<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat_M0 <inst, node, vt> {
let AddedComplexity = 10;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
def : GlobalLoadSaddrPat_M0<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
}
multiclass GlobalFLATLoadPats_CPOL<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat_CPOL<inst, node, vt> {
let AddedComplexity = 10;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
def : GlobalLoadSaddrPat_CPOL<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
}
@@ -1701,10 +1719,14 @@ multiclass GlobalFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Value
multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> {
def : FlatStoreSignedPat <inst, node, vt> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
let AddedComplexity = 10;
}
def : FlatStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
let AddedComplexity = 11;
}
}
@@ -1849,7 +1871,9 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
}
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
- def : FlatLoadPat <inst, node, vt>;
+ def : FlatLoadPat <inst, node, vt> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 9;
@@ -1876,7 +1900,9 @@ multiclass FlatLoadPats_D16_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueT
}
multiclass FlatStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
- def : FlatStorePat <inst, node, vt>;
+ def : FlatStorePat <inst, node, vt> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 9;
@@ -1893,8 +1919,6 @@ multiclass FlatStorePats_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueType
}
}
-let OtherPredicates = [HasFlatAddressSpace] in {
-
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_aext_16_flat, i32>;
@@ -2018,8 +2042,6 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
defm : FlatStorePats <FLAT_STORE_SHORT, store_flat, i16>;
-} // End OtherPredicates = [HasFlatAddressSpace]
-
let OtherPredicates = [isGFX12Plus] in
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
@@ -2048,8 +2070,6 @@ defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
}
-let OtherPredicates = [HasFlatGlobalInsts] in {
-
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_aext_16_global, i32>;
@@ -2174,7 +2194,7 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_glo
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
-let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
@@ -2194,7 +2214,7 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i
let SubtargetPredicate = isGFX12Plus in {
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>;
- let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+ let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>;
}
@@ -2249,7 +2269,7 @@ let OtherPredicates = [isGFX1250Plus] in {
defm : GlobalStoreLDSPats <GLOBAL_STORE_ASYNC_FROM_LDS_B128, int_amdgcn_global_store_async_from_lds_b128>;
}
-let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
+let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
}
@@ -2267,44 +2287,44 @@ let OtherPredicates = [isGFX12Only] in {
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
}
-let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
}
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
}
-let OtherPredicates = [HasAtomicFaddRtnInsts] in {
+let SubtargetPredicate = HasAtomicFaddRtnInsts in {
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
}
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
}
-let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
+let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
}
-let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
}
-let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
+let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
}
-let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>;
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>;
}
-let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_global", v2bf16>;
-} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
+} // End SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, AddedComplexity = 10
let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
index 3266fde10f9fb..8d56c17fbc0c1 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
@@ -11,18 +11,16 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v0, v1, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
-; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v0, v1, v0, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr inbounds i32, ptr %addr, i32 -4
@@ -35,18 +33,16 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v1, s[0:1] offset:-16
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32_forced:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
-; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v1, v0, s[0:1] offset:-16
; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr inbounds i32, ptr %addr, i32 -4
@@ -57,13 +53,12 @@ entry:
define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr %use) {
; GFX12-SDAG-LABEL: flat_atomic_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_clause 0x1
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v0, v1, s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-NEXT: flat_store_b32 v[0:1], v2
@@ -75,9 +70,8 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
-; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v...
[truncated]
|
48a66b7
to
249e734
Compare
This would select the pseudo and then crash when the MC instruction was used. I believe this has been broken since 9912ccb
249e734
to
522784f
Compare
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < | ||
"global_atomic_add_f32", AVLdSt_32, f32 | ||
>; | ||
let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in | ||
let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the rule again regarding using OtherPredicates
vs SubtargetPredicate
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not that well formed. Really we should be more structured with what features are defined, and have a better system for combining them. The usual convention is SubtargetPredicate is the main instruction-exists option, and OtherPredicates is the side option for extra modifiers
Pull Request is not mergeable
This would select the pseudo and then crash when the MC instruction
was used. I believe this has been broken since 9912ccb