Skip to content

Commit a69c26a

Browse files
committed
AMDGPU/GlobalISel: Select llvm.amdgcn.struct.buffer.store[.format]
1 parent 533d650 commit a69c26a

6 files changed

+646
-18
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2770,8 +2770,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
27702770
return true;
27712771
}
27722772
case Intrinsic::amdgcn_raw_buffer_store:
2773+
case Intrinsic::amdgcn_struct_buffer_store:
27732774
return legalizeBufferStore(MI, MRI, B, false, false);
27742775
case Intrinsic::amdgcn_raw_buffer_store_format:
2776+
case Intrinsic::amdgcn_struct_buffer_store_format:
27752777
return legalizeBufferStore(MI, MRI, B, false, true);
27762778
case Intrinsic::amdgcn_raw_buffer_load:
27772779
case Intrinsic::amdgcn_struct_buffer_load:

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2343,19 +2343,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
23432343
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
23442344
return;
23452345
}
2346-
case Intrinsic::amdgcn_raw_buffer_store:
2347-
case Intrinsic::amdgcn_raw_buffer_store_format:
2348-
case Intrinsic::amdgcn_raw_tbuffer_store: {
2349-
applyDefaultMapping(OpdMapper);
2350-
executeInWaterfallLoop(MI, MRI, {2, 4});
2351-
return;
2352-
}
2353-
case Intrinsic::amdgcn_struct_buffer_store:
2354-
case Intrinsic::amdgcn_struct_tbuffer_store: {
2355-
applyDefaultMapping(OpdMapper);
2356-
executeInWaterfallLoop(MI, MRI, {2, 5});
2357-
return;
2358-
}
23592346
default: {
23602347
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
23612348
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s
4+
5+
define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(half %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
6+
; UNPACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
7+
; UNPACKED: bb.1 (%ir-block.0):
8+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
9+
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
10+
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
11+
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
12+
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
13+
; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
14+
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
15+
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
16+
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
17+
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
18+
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
19+
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
20+
; UNPACKED: S_ENDPGM 0
21+
; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
22+
; PACKED: bb.1 (%ir-block.0):
23+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
24+
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
25+
; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
26+
; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
27+
; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
28+
; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
29+
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
30+
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
31+
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
32+
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
33+
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
34+
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
35+
; PACKED: S_ENDPGM 0
36+
call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
37+
ret void
38+
}
39+
40+
define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
41+
; UNPACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
42+
; UNPACKED: bb.1 (%ir-block.0):
43+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
44+
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45+
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
46+
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
47+
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
48+
; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
49+
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
50+
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
51+
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
52+
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
53+
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
54+
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
55+
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec
56+
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
57+
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
58+
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
59+
; UNPACKED: S_ENDPGM 0
60+
; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
61+
; PACKED: bb.1 (%ir-block.0):
62+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
63+
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
64+
; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
65+
; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
66+
; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
67+
; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
68+
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
69+
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
70+
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
71+
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
72+
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
73+
; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
74+
; PACKED: S_ENDPGM 0
75+
call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
76+
ret void
77+
}
78+
79+
; FIXME:
80+
; define amdgpu_ps void @struct_buffer_store_format_v3f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<3 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
81+
; call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
82+
; ret void
83+
; }
84+
85+
define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
86+
; UNPACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
87+
; UNPACKED: bb.1 (%ir-block.0):
88+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
89+
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
90+
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
91+
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
92+
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
93+
; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
94+
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
95+
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
96+
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
97+
; UNPACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
98+
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
99+
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
100+
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
101+
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY]], implicit $exec
102+
; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
103+
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec
104+
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
105+
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
106+
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
107+
; UNPACKED: S_ENDPGM 0
108+
; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
109+
; PACKED: bb.1 (%ir-block.0):
110+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
111+
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
112+
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
113+
; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
114+
; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
115+
; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
116+
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
117+
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
118+
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
119+
; PACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
120+
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
121+
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
122+
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
123+
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
124+
; PACKED: S_ENDPGM 0
125+
call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
126+
ret void
127+
}
128+
129+
define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset(half inreg %val, <4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) {
130+
; UNPACKED-LABEL: name: struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset
131+
; UNPACKED: bb.1 (%ir-block.0):
132+
; UNPACKED: successors: %bb.2(0x80000000)
133+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
134+
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
135+
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
136+
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
137+
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
138+
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3
139+
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
140+
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
141+
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4
142+
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
143+
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
144+
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
145+
; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
146+
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
147+
; UNPACKED: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
148+
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
149+
; UNPACKED: bb.2:
150+
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
151+
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
152+
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
153+
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
154+
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec
155+
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
156+
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
157+
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
158+
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec
159+
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
160+
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
161+
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
162+
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
163+
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
164+
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
165+
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
166+
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
167+
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
168+
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
169+
; UNPACKED: bb.3:
170+
; UNPACKED: successors: %bb.4(0x80000000)
171+
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
172+
; UNPACKED: bb.4:
173+
; UNPACKED: S_ENDPGM 0
174+
; PACKED-LABEL: name: struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset
175+
; PACKED: bb.1 (%ir-block.0):
176+
; PACKED: successors: %bb.2(0x80000000)
177+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
178+
; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
179+
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
180+
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
181+
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
182+
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3
183+
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
184+
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
185+
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4
186+
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
187+
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
188+
; PACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
189+
; PACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
190+
; PACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
191+
; PACKED: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
192+
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
193+
; PACKED: bb.2:
194+
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
195+
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
196+
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
197+
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
198+
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec
199+
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
200+
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
201+
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
202+
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec
203+
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
204+
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
205+
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
206+
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
207+
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
208+
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
209+
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
210+
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
211+
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
212+
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
213+
; PACKED: bb.3:
214+
; PACKED: successors: %bb.4(0x80000000)
215+
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
216+
; PACKED: bb.4:
217+
; PACKED: S_ENDPGM 0
218+
call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
219+
ret void
220+
}
221+
222+
declare void @llvm.amdgcn.struct.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32, i32 immarg)
223+
declare void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
224+
declare void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
225+
declare void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32 immarg)

0 commit comments

Comments
 (0)