-
Notifications
You must be signed in to change notification settings - Fork 14.9k
AMDGPU: Add tests for atomics with AGPR operands #155820
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Add tests for atomics with AGPR operands #155820
Conversation
The handling of AGPR vs. VGPR operand restrictions is broken and results in bugs like #155777 and missed optimizations. Add some baseline tests for future improvements.
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThe handling of AGPR vs. VGPR operand restrictions is Patch is 197.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155820.diff 5 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-ds-atomic-cmpxchg.ll b/llvm/test/CodeGen/AMDGPU/a-v-ds-atomic-cmpxchg.ll
new file mode 100644
index 0000000000000..e8f949d31b87e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/a-v-ds-atomic-cmpxchg.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s
+
+define void @ds_atomic_cmpxchg_i32_ret_av_av__av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=^VA"()
+ %data1 = call i32 asm "; def $0", "=^VA"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_cmpxchg_i32_ret_av_av__v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=^VA"()
+ %data1 = call i32 asm "; def $0", "=^VA"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_cmpxchg_i32_ret_av_av__a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=^VA"()
+ %data1 = call i32 asm "; def $0", "=^VA"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; FIXME: Broken
+; define void @ds_atomic_cmpxchg_i32_ret_a_a__a(ptr addrspace(3) %ptr) #0 {
+; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+; %data0 = call i32 asm "; def $0", "=a"()
+; %data1 = call i32 asm "; def $0", "=a"()
+; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+; %result = extractvalue { i32, i1 } %pair, 0
+; call void asm "; use $0", "a"(i32 %result)
+; ret void
+; }
+
+; FIXME: Broken
+; define void @ds_atomic_cmpxchg_i32_ret_a_a__v(ptr addrspace(3) %ptr) #0 {
+; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+; %data0 = call i32 asm "; def $0", "=a"()
+; %data1 = call i32 asm "; def $0", "=a"()
+; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+; %result = extractvalue { i32, i1 } %pair, 0
+; call void asm "; use $0", "v"(i32 %result)
+; ret void
+; }
+
+; FIXME: Broken
+; define void @ds_atomic_cmpxchg_i32_ret_v_a__v(ptr addrspace(3) %ptr) #0 {
+; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+; %data0 = call i32 asm "; def $0", "=v"()
+; %data1 = call i32 asm "; def $0", "=a"()
+; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+; %result = extractvalue { i32, i1 } %pair, 0
+; call void asm "; use $0", "v"(i32 %result)
+; ret void
+; }
+
+; FIXME: Broken
+; define void @ds_atomic_cmpxchg_i32_ret_a_v__v(ptr addrspace(3) %ptr) #0 {
+; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+; %data0 = call i32 asm "; def $0", "=a"()
+; %data1 = call i32 asm "; def $0", "=v"()
+; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+; %result = extractvalue { i32, i1 } %pair, 0
+; call void asm "; use $0", "v"(i32 %result)
+; ret void
+; }
+
+define void @ds_atomic_cmpxchg_i32_ret_v_v__a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_v__a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=v"()
+ %data1 = call i32 asm "; def $0", "=v"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_cmpxchg_i32_ret_av_v__av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_v__av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=^VA"()
+ %data1 = call i32 asm "; def $0", "=v"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_cmpxchg_i32_ret_v_av__av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_av__av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=v"()
+ %data1 = call i32 asm "; def $0", "=^VA"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; FIXME: Broken
+; define void @ds_atomic_cmpxchg_i32_ret_av_a__av(ptr addrspace(3) %ptr) #0 {
+; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+; %data0 = call i32 asm "; def $0", "=^VA"()
+; %data1 = call i32 asm "; def $0", "=a"()
+; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+; %result = extractvalue { i32, i1 } %pair, 0
+; call void asm "; use $0", "^VA"(i32 %result)
+; ret void
+; }
+
+define void @ds_atomic_cmpxchg_i32_ret_a_av__av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_av__av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v2, v1 offset:40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data0 = call i32 asm "; def $0", "=a"()
+ %data1 = call i32 asm "; def $0", "=^VA"()
+ %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll
new file mode 100644
index 0000000000000..4c62409a85c00
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll
@@ -0,0 +1,1123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
+
+;---------------------------------------------------------------------
+; xchg i32 cases
+;---------------------------------------------------------------------
+
+; Input and result use AGPR
+define void @ds_atomic_xchg_i32_ret_a_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_a_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is AGPR, result used as VGPR.
+define void @ds_atomic_xchg_i32_ret_a_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_a_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+; Input is VGPR, result used as AGPR
+define void @ds_atomic_xchg_i32_ret_v_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_v_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=v"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is AV, result also used as AV
+define void @ds_atomic_xchg_i32_ret_av_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; Input is AV, used as v
+define void @ds_atomic_xchg_i32_ret_av_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+; Input is AV, used as a
+define void @ds_atomic_xchg_i32_ret_av_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is a, result used as AV
+define void @ds_atomic_xchg_i32_ret_a_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_a_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; Input is v, result used as AV
+define void @ds_atomic_xchg_i32_ret_v_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_v_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=v"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_av_no_agprs:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[0:31]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_write_b32 a18, v31 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a31, v18 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a30, v19 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a29, v20 ; Re...
[truncated]
|
; CHECK-NEXT: ;;#ASMEND | ||
; CHECK-NEXT: s_setpc_b64 s[30:31] | ||
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10 | ||
%data0 = call i32 asm "; def $0", "=^VA"() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does =^VA
do?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uses AV class
…oject into bugprone-method-hiding * 'bugprone-method-hiding' of github.com:t-a-james/llvm-project: (230 commits) [SimplifyCFG] Move token type check into canReplaceOperandWithVariable() [ADT] Fix signed integer overflow (llvm#155826) [Offload] Update LIBOMPTARGET_INFO text for `attach` map-type. (llvm#155509) [CMake][AIX] Enable CMP0182: Create shared library archives by default (llvm#155686) AMDGPU: Add tests for atomics with AGPR operands (llvm#155820) [AArch64] Split zero cycle zeoring per register class (llvm#154561) [gn build] Port fa883e1 [mlir][tosa] Allow shift operand of tosa::MulOp as non-constant (llvm#155197) [AArch64][NFC] Add MCInstrAnalysis unittests (llvm#155609) [Offload][OpenMP] Tests require libc on GPU for printf (llvm#155785) AMDGPU: Add missing verifier tests for load/store AGPR case (llvm#155815) [lldb-mcp] Fix building for Windows Revert "[lldb] Correct a usage after a rename was merged. (llvm#155720)" Revert "[lldb] NFC Moving mcp::Transport into its own file. (llvm#155711)" [lldb][test] Run ranges::ref_vew test only for libc++ (llvm#155813) [SCCP][FuncSpec] Poison unreachable constant global variable user (llvm#155753) [LoongArch] Lowering v32i8 vector mask generation to `VMSKLTZ` (llvm#149953) [flang][docs][NFC] Remove stray backtick (llvm#154974) [MLIR] Apply clang-tidy fixes for misc-use-internal-linkage in LinalgOps.cpp (NFC) [MLIR] Apply clang-tidy fixes for performance-move-const-arg in VariantValue.cpp (NFC) ...
The handling of AGPR vs. VGPR operand restrictions is
broken and results in bugs like #155777 and missed optimizations.
Add some baseline tests for future improvements.