@@ -172,3 +172,118 @@ bb34: ; preds = %bb10
172
172
%tmp35 = add <4 x i64 > %tmp29 , %tmp28
173
173
ret void
174
174
}
175
+
176
+ define i32 @PR43159 (<4 x i32 >* %a0 ) {
177
+ ; SSE-LABEL: PR43159:
178
+ ; SSE: # %bb.0: # %entry
179
+ ; SSE-NEXT: movdqa (%rdi), %xmm0
180
+ ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1645975491,344322273,2164392969,1916962805]
181
+ ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
182
+ ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
183
+ ; SSE-NEXT: pmuludq %xmm2, %xmm3
184
+ ; SSE-NEXT: movdqa %xmm0, %xmm2
185
+ ; SSE-NEXT: psrld $1, %xmm2
186
+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
187
+ ; SSE-NEXT: pmuludq %xmm1, %xmm2
188
+ ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
189
+ ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
190
+ ; SSE-NEXT: psubd %xmm1, %xmm0
191
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
192
+ ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
193
+ ; SSE-NEXT: pxor %xmm2, %xmm2
194
+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
195
+ ; SSE-NEXT: paddd %xmm1, %xmm2
196
+ ; SSE-NEXT: movdqa %xmm2, %xmm0
197
+ ; SSE-NEXT: psrld $7, %xmm0
198
+ ; SSE-NEXT: psrld $6, %xmm2
199
+ ; SSE-NEXT: movd %xmm2, %edi
200
+ ; SSE-NEXT: pextrd $1, %xmm0, %esi
201
+ ; SSE-NEXT: pextrd $2, %xmm2, %edx
202
+ ; SSE-NEXT: pextrd $3, %xmm0, %ecx
203
+ ; SSE-NEXT: jmp foo # TAILCALL
204
+ ;
205
+ ; AVX2-LABEL: PR43159:
206
+ ; AVX2: # %bb.0: # %entry
207
+ ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
208
+ ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1645975491,344322273,2164392969,1916962805]
209
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
210
+ ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
211
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
212
+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
213
+ ; AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
214
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
215
+ ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
216
+ ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
217
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
218
+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
219
+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
220
+ ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
221
+ ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
222
+ ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
223
+ ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
224
+ ; AVX2-NEXT: vmovd %xmm0, %edi
225
+ ; AVX2-NEXT: vpextrd $1, %xmm0, %esi
226
+ ; AVX2-NEXT: vpextrd $2, %xmm0, %edx
227
+ ; AVX2-NEXT: vpextrd $3, %xmm0, %ecx
228
+ ; AVX2-NEXT: jmp foo # TAILCALL
229
+ ;
230
+ ; AVX512VL-LABEL: PR43159:
231
+ ; AVX512VL: # %bb.0: # %entry
232
+ ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
233
+ ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [1645975491,344322273,2164392969,1916962805]
234
+ ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
235
+ ; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
236
+ ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
237
+ ; AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
238
+ ; AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
239
+ ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
240
+ ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
241
+ ; AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
242
+ ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
243
+ ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
244
+ ; AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
245
+ ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
246
+ ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
247
+ ; AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
248
+ ; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
249
+ ; AVX512VL-NEXT: vmovd %xmm0, %edi
250
+ ; AVX512VL-NEXT: vpextrd $1, %xmm0, %esi
251
+ ; AVX512VL-NEXT: vpextrd $2, %xmm0, %edx
252
+ ; AVX512VL-NEXT: vpextrd $3, %xmm0, %ecx
253
+ ; AVX512VL-NEXT: jmp foo # TAILCALL
254
+ ;
255
+ ; AVX512DQVL-LABEL: PR43159:
256
+ ; AVX512DQVL: # %bb.0: # %entry
257
+ ; AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0
258
+ ; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1645975491,344322273,2164392969,1916962805]
259
+ ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
260
+ ; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
261
+ ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
262
+ ; AVX512DQVL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
263
+ ; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
264
+ ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
265
+ ; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
266
+ ; AVX512DQVL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
267
+ ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
268
+ ; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
269
+ ; AVX512DQVL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
270
+ ; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
271
+ ; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
272
+ ; AVX512DQVL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
273
+ ; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
274
+ ; AVX512DQVL-NEXT: vmovd %xmm0, %edi
275
+ ; AVX512DQVL-NEXT: vpextrd $1, %xmm0, %esi
276
+ ; AVX512DQVL-NEXT: vpextrd $2, %xmm0, %edx
277
+ ; AVX512DQVL-NEXT: vpextrd $3, %xmm0, %ecx
278
+ ; AVX512DQVL-NEXT: jmp foo # TAILCALL
279
+ entry:
280
+ %0 = load <4 x i32 >, <4 x i32 >* %a0 , align 16
281
+ %div = udiv <4 x i32 > %0 , <i32 167 , i32 237 , i32 254 , i32 177 >
282
+ %ext0 = extractelement <4 x i32 > %div , i32 0
283
+ %ext1 = extractelement <4 x i32 > %div , i32 1
284
+ %ext2 = extractelement <4 x i32 > %div , i32 2
285
+ %ext3 = extractelement <4 x i32 > %div , i32 3
286
+ %call = tail call i32 @foo (i32 %ext0 , i32 %ext1 , i32 %ext2 , i32 %ext3 )
287
+ ret i32 %call
288
+ }
289
+ declare dso_local i32 @foo (i32 , i32 , i32 , i32 )
0 commit comments