-
Notifications
You must be signed in to change notification settings - Fork 14.9k
AMDGPU: Add agpr versions of global return atomics #156890
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
arsenm
merged 3 commits into
main
from
users/arsenm/amdgpu/add-agpr-variants-global-rtn-atomics
Sep 4, 2025
Merged
AMDGPU: Add agpr versions of global return atomics #156890
arsenm
merged 3 commits into
main
from
users/arsenm/amdgpu/add-agpr-variants-global-rtn-atomics
Sep 4, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This would select the pseudo and then crash when the MC instruction was used. I believe this has been broken since 9912ccb
Add comprehensive tests for global atomics with return in agpr / AV usage contexts.
Incremental step towards removing the special case hack in TargetInstrInfo::getRegClass.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesIncremental step towards removing the special case hack Full diff: https://github.com/llvm/llvm-project/pull/156890.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index bec920380e081..f2e432fa8d7f5 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -51,22 +51,6 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
}
-class DstOperandIsAV<dag OperandList> {
- bit ret = OperandIsAV<!getdagarg<DAGOperand>(OperandList, "vdst")>.ret;
-}
-
-class DstOperandIsAGPR<dag OperandList> {
- bit ret = OperandIsAGPR<!getdagarg<DAGOperand>(OperandList, "vdst")>.ret;
-}
-
-class DataOperandIsAV<dag OperandList> {
- bit ret = OperandIsAV<!getdagarg<DAGOperand>(OperandList, "data0")>.ret;
-}
-
-class DataOperandIsAGPR<dag OperandList> {
- bit ret = OperandIsAGPR<!getdagarg<DAGOperand>(OperandList, "data0")>.ret;
-}
-
class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
Enc64 {
@@ -115,13 +99,13 @@ class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
// register fields are only 8-bit, so data operands must all be AGPR
// or VGPR.
defvar DstOpIsAV = !if(ps.has_vdst,
- DstOperandIsAV<ps.OutOperandList>.ret, 0);
+ VDstOperandIsAV<ps.OutOperandList>.ret, 0);
defvar DstOpIsAGPR = !if(ps.has_vdst,
- DstOperandIsAGPR<ps.OutOperandList>.ret, 0);
+ VDstOperandIsAGPR<ps.OutOperandList>.ret, 0);
defvar DataOpIsAV = !if(!or(ps.has_data0, ps.has_gws_data0),
- DataOperandIsAV<ps.InOperandList>.ret, 0);
+ Data0OperandIsAV<ps.InOperandList>.ret, 0);
defvar DataOpIsAGPR = !if(!or(ps.has_data0, ps.has_gws_data0),
- DataOperandIsAGPR<ps.InOperandList>.ret, 0);
+ Data0OperandIsAGPR<ps.InOperandList>.ret, 0);
bits<1> acc = !if(ps.has_vdst,
!if(DstOpIsAV, vdst{9}, DstOpIsAGPR),
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 0ac5f3d50f1b5..fd7c9a741c301 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -137,7 +137,18 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
// unsigned for flat accesses.
bits<13> offset;
// GFX90A+ only: instruction uses AccVGPR for data
- bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0));
+ defvar DstOpIsAV = !if(ps.has_vdst,
+ VDstOperandIsAV<ps.OutOperandList>.ret, 0);
+ defvar DstOpIsAGPR = !if(ps.has_vdst,
+ VDstOperandIsAGPR<ps.OutOperandList>.ret, 0);
+ defvar DataOpIsAV = !if(ps.has_data,
+ VDataOperandIsAV<ps.InOperandList>.ret, 0);
+ defvar DataOpIsAGPR = !if(ps.has_data,
+ VDataOperandIsAGPR<ps.InOperandList>.ret, 0);
+
+ bits<1> acc = !if(ps.has_vdst,
+ !if(DstOpIsAV, vdst{9}, DstOpIsAGPR),
+ !if(DataOpIsAV, vdata{9}, DataOpIsAGPR));
// We don't use tfe right now, and it was removed in gfx9.
bits<1> tfe = 0;
@@ -860,6 +871,30 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
let enabled_saddr = 1;
let FPAtomic = data_vt.isFP;
}
+
+ defvar vdst_op_agpr = getEquivalentAGPROperand<vdst_op>.ret;
+ defvar data_op_agpr = getEquivalentAGPROperand<data_op>.ret;
+
+ let SubtargetPredicate = isGFX90APlus in {
+ def _RTN_agpr : FLAT_AtomicRet_Pseudo <opName,
+ (outs vdst_op_agpr:$vdst),
+ (ins VReg_64:$vaddr, data_op_agpr:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
+ " $vdst, $vaddr, $vdata, off$offset$cpol">,
+ GlobalSaddrTable<0, opName#"_rtn_agpr"> {
+ let has_saddr = 1;
+ let FPAtomic = data_vt.isFP;
+ }
+
+ def _SADDR_RTN_agpr : FLAT_AtomicRet_Pseudo <opName,
+ (outs vdst_op_agpr:$vdst),
+ (ins VGPR_32:$vaddr, data_op_agpr:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
+ " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
+ GlobalSaddrTable<1, opName#"_rtn_agpr"> {
+ let has_saddr = 1;
+ let enabled_saddr = 1;
+ let FPAtomic = data_vt.isFP;
+ }
+ }
}
}
@@ -2637,8 +2672,10 @@ multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
FLAT_Real_AllAddr_vi<op, has_sccb> {
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
-}
+ def _RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+ def _SADDR_RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
+}
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 8c2bd3d3962ce..d9746a17e75eb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1492,3 +1492,27 @@ class OperandIsVGPR<DAGOperand Op> {
defvar reg_class = getRegClassFromOp<Op>.ret;
bit ret = !and(reg_class.HasVGPR, !not(reg_class.HasAGPR));
}
+
+class VDstOperandIsAV<dag OperandList> {
+ bit ret = OperandIsAV<!getdagarg<DAGOperand>(OperandList, "vdst")>.ret;
+}
+
+class VDstOperandIsAGPR<dag OperandList> {
+ bit ret = OperandIsAGPR<!getdagarg<DAGOperand>(OperandList, "vdst")>.ret;
+}
+
+class Data0OperandIsAV<dag OperandList> {
+ bit ret = OperandIsAV<!getdagarg<DAGOperand>(OperandList, "data0")>.ret;
+}
+
+class Data0OperandIsAGPR<dag OperandList> {
+ bit ret = OperandIsAGPR<!getdagarg<DAGOperand>(OperandList, "data0")>.ret;
+}
+
+class VDataOperandIsAV<dag OperandList> {
+ bit ret = OperandIsAV<!getdagarg<DAGOperand>(OperandList, "vdata")>.ret;
+}
+
+class VDataOperandIsAGPR<dag OperandList> {
+ bit ret = OperandIsAGPR<!getdagarg<DAGOperand>(OperandList, "vdata")>.ret;
+}
|
rampitec
approved these changes
Sep 4, 2025
Base automatically changed from
users/arsenm/amdgpu/add-more-tests-agpr-flat-global-atomics
to
main
September 4, 2025 23:39
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Incremental step towards removing the special case hack
in TargetInstrInfo::getRegClass.