@@ -152,11 +152,10 @@ define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32>
152
152
ret <3 x float > %shuf
153
153
}
154
154
155
- ; FIXME: Not handled even though only 2 elts used
156
155
; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2(
157
- ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32 (<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
158
- ; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
159
- ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1
156
+ ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32 (<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
157
+ ; CHECK-NEXT: %elt0 = extractelement <2 x float> %data, i32 0
158
+ ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
160
159
; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0
161
160
; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
162
161
; CHECK-NEXT: ret { float, float } %ins1
@@ -169,6 +168,74 @@ define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i3
169
168
ret { float , float } %ins1
170
169
}
171
170
171
+ ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_2(
172
+ ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
173
+ ; CHECK-NEXT: %elt0 = extractelement <3 x float> %data, i32 0
174
+ ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1
175
+ ; CHECK-NEXT: %elt2 = extractelement <3 x float> %data, i32 2
176
+ ; CHECK-NEXT: %ins0 = insertvalue { float, float, float } undef, float %elt0, 0
177
+ ; CHECK-NEXT: %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1
178
+ ; CHECK-NEXT: %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2
179
+ ; CHECK-NEXT: ret { float, float, float } %ins2
180
+ define amdgpu_ps { float , float , float } @extract_elt0_elt1_elt2_buffer_load_v4f32_2 (<4 x i32 > inreg %rsrc , i32 %idx , i32 %ofs ) #0 {
181
+ %data = call <4 x float > @llvm.amdgcn.buffer.load.v4f32 (<4 x i32 > %rsrc , i32 %idx , i32 %ofs , i1 false , i1 false )
182
+ %elt0 = extractelement <4 x float > %data , i32 0
183
+ %elt1 = extractelement <4 x float > %data , i32 1
184
+ %elt2 = extractelement <4 x float > %data , i32 2
185
+ %ins0 = insertvalue { float , float , float } undef , float %elt0 , 0
186
+ %ins1 = insertvalue { float , float , float } %ins0 , float %elt1 , 1
187
+ %ins2 = insertvalue { float , float , float } %ins1 , float %elt2 , 2
188
+ ret { float , float , float } %ins2
189
+ }
190
+
191
+ ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3(
192
+ ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
193
+ ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
194
+ ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 undef, i32 1>
195
+ ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
196
+ define amdgpu_ps <2 x float > @extract_elt0_elt1_elt2_buffer_load_v4f32_3 (<4 x i32 > inreg %rsrc , i32 %idx , i32 %ofs ) #0 {
197
+ %data = call <4 x float > @llvm.amdgcn.buffer.load.v4f32 (<4 x i32 > %rsrc , i32 %idx , i32 %ofs , i1 false , i1 false )
198
+ %elt0 = extractelement <4 x float > %data , i32 0
199
+ %elt2 = extractelement <4 x float > %data , i32 2
200
+ %ins0 = insertelement <2 x float > undef , float %elt0 , i32 0
201
+ %ins1 = insertelement <2 x float > %ins0 , float %elt2 , i32 1
202
+ %shuf = shufflevector <4 x float > %data , <4 x float > undef , <2 x i32 > <i32 4 , i32 1 >
203
+ %ret = fadd <2 x float > %ins1 , %shuf
204
+ ret <2 x float > %ret
205
+ }
206
+
207
+ ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4(
208
+ ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
209
+ ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
210
+ ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 undef>
211
+ ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
212
+ ; CHECK-NEXT: ret <2 x float> %ret
213
+ define amdgpu_ps <2 x float > @extract_elt0_elt1_elt2_buffer_load_v4f32_4 (<4 x i32 > inreg %rsrc , i32 %idx , i32 %ofs ) #0 {
214
+ %data = call <4 x float > @llvm.amdgcn.buffer.load.v4f32 (<4 x i32 > %rsrc , i32 %idx , i32 %ofs , i1 false , i1 false )
215
+ %elt0 = extractelement <4 x float > %data , i32 0
216
+ %elt2 = extractelement <4 x float > %data , i32 2
217
+ %ins0 = insertelement <2 x float > undef , float %elt0 , i32 0
218
+ %ins1 = insertelement <2 x float > %ins0 , float %elt2 , i32 1
219
+ %shuf = shufflevector <4 x float > undef , <4 x float > %data , <2 x i32 > <i32 5 , i32 1 >
220
+ %ret = fadd <2 x float > %ins1 , %shuf
221
+ ret <2 x float > %ret
222
+ }
223
+
224
+ ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5(
225
+ ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
226
+ ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 2, i32 2>
227
+ ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
228
+ ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
229
+ define amdgpu_ps <2 x float > @extract_elt0_elt1_elt2_buffer_load_v4f32_5 (<4 x i32 > inreg %rsrc , i32 %idx , i32 %ofs ) #0 {
230
+ %data = call <4 x float > @llvm.amdgcn.buffer.load.v4f32 (<4 x i32 > %rsrc , i32 %idx , i32 %ofs , i1 false , i1 false )
231
+ %elt2 = extractelement <4 x float > %data , i32 2
232
+ %ins0 = insertelement <2 x float > undef , float %elt2 , i32 0
233
+ %ins1 = insertelement <2 x float > %ins0 , float %elt2 , i32 1
234
+ %shuf = shufflevector <4 x float > %data , <4 x float > %data , <2 x i32 > <i32 0 , i32 5 >
235
+ %ret = fadd <2 x float > %ins1 , %shuf
236
+ ret <2 x float > %ret
237
+ }
238
+
172
239
; CHECK-LABEL: @extract_elt0_buffer_load_v3f32(
173
240
; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
174
241
; CHECK-NEXT: ret float %data
0 commit comments