@@ -5,9 +5,8 @@ define <4 x half> @shuffle_v4f16_23uu(<4 x half> addrspace(1)* %arg0, <4 x half>
5
5
; GFX9-LABEL: shuffle_v4f16_23uu:
6
6
; GFX9: ; %bb.0:
7
7
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[0:1], off
8
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
9
9
; GFX9-NEXT: s_waitcnt vmcnt(0)
10
- ; GFX9-NEXT: v_mov_b32_e32 v0, v1
11
10
; GFX9-NEXT: s_setpc_b64 s[30:31]
12
11
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
13
12
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -19,10 +18,10 @@ define <4 x half> @shuffle_v4f16_234u(<4 x half> addrspace(1)* %arg0, <4 x half>
19
18
; GFX9-LABEL: shuffle_v4f16_234u:
20
19
; GFX9: ; %bb.0:
21
20
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22
- ; GFX9-NEXT: global_load_dwordx2 v[4:5 ], v[0:1 ], off
23
- ; GFX9-NEXT: global_load_dwordx2 v[1:2] , v[2:3 ], off
21
+ ; GFX9-NEXT: global_load_dwordx2 v[2:3 ], v[2:3 ], off
22
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1 ], off offset:4
24
23
; GFX9-NEXT: s_waitcnt vmcnt(1)
25
- ; GFX9-NEXT: v_mov_b32_e32 v0, v5
24
+ ; GFX9-NEXT: v_mov_b32_e32 v1, v2
26
25
; GFX9-NEXT: s_waitcnt vmcnt(0)
27
26
; GFX9-NEXT: s_setpc_b64 s[30:31]
28
27
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
@@ -154,7 +153,7 @@ define <4 x half> @shuffle_v4f16_0101(<4 x half> addrspace(1)* %arg0, <4 x half>
154
153
; GFX9-LABEL: shuffle_v4f16_0101:
155
154
; GFX9: ; %bb.0:
156
155
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[0:1], off
156
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off
158
157
; GFX9-NEXT: s_waitcnt vmcnt(0)
159
158
; GFX9-NEXT: v_mov_b32_e32 v1, v0
160
159
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -181,9 +180,8 @@ define <4 x half> @shuffle_v4f16_0145(<4 x half> addrspace(1)* %arg0, <4 x half>
181
180
; GFX9-LABEL: shuffle_v4f16_0145:
182
181
; GFX9: ; %bb.0:
183
182
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
185
- ; GFX9-NEXT: s_waitcnt vmcnt(0)
186
- ; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
183
+ ; GFX9-NEXT: global_load_dword v0, v[0:1], off
184
+ ; GFX9-NEXT: global_load_dword v1, v[2:3], off
187
185
; GFX9-NEXT: s_waitcnt vmcnt(0)
188
186
; GFX9-NEXT: s_setpc_b64 s[30:31]
189
187
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
@@ -196,11 +194,9 @@ define <4 x half> @shuffle_v4f16_0167(<4 x half> addrspace(1)* %arg0, <4 x half>
196
194
; GFX9-LABEL: shuffle_v4f16_0167:
197
195
; GFX9: ; %bb.0:
198
196
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
200
- ; GFX9-NEXT: s_waitcnt vmcnt(0)
201
- ; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
197
+ ; GFX9-NEXT: global_load_dword v0, v[0:1], off
198
+ ; GFX9-NEXT: global_load_dword v1, v[2:3], off offset:4
202
199
; GFX9-NEXT: s_waitcnt vmcnt(0)
203
- ; GFX9-NEXT: v_mov_b32_e32 v1, v2
204
200
; GFX9-NEXT: s_setpc_b64 s[30:31]
205
201
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
206
202
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -226,9 +222,9 @@ define <4 x half> @shuffle_v4f16_2323(<4 x half> addrspace(1)* %arg0, <4 x half>
226
222
; GFX9-LABEL: shuffle_v4f16_2323:
227
223
; GFX9: ; %bb.0:
228
224
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[0:1], off
225
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
230
226
; GFX9-NEXT: s_waitcnt vmcnt(0)
231
- ; GFX9-NEXT: v_mov_b32_e32 v0, v1
227
+ ; GFX9-NEXT: v_mov_b32_e32 v1, v0
232
228
; GFX9-NEXT: s_setpc_b64 s[30:31]
233
229
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
234
230
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -240,10 +236,8 @@ define <4 x half> @shuffle_v4f16_2345(<4 x half> addrspace(1)* %arg0, <4 x half>
240
236
; GFX9-LABEL: shuffle_v4f16_2345:
241
237
; GFX9: ; %bb.0:
242
238
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243
- ; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
244
- ; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
245
- ; GFX9-NEXT: s_waitcnt vmcnt(1)
246
- ; GFX9-NEXT: v_mov_b32_e32 v0, v5
239
+ ; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
240
+ ; GFX9-NEXT: global_load_dword v1, v[2:3], off
247
241
; GFX9-NEXT: s_waitcnt vmcnt(0)
248
242
; GFX9-NEXT: s_setpc_b64 s[30:31]
249
243
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
@@ -256,10 +250,9 @@ define <4 x half> @shuffle_v4f16_2367(<4 x half> addrspace(1)* %arg0, <4 x half>
256
250
; GFX9-LABEL: shuffle_v4f16_2367:
257
251
; GFX9: ; %bb.0:
258
252
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259
- ; GFX9-NEXT: global_load_dwordx2 v[4:5] , v[0:1], off
260
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[2:3], off
253
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
254
+ ; GFX9-NEXT: global_load_dword v1 , v[2:3], off offset:4
261
255
; GFX9-NEXT: s_waitcnt vmcnt(0)
262
- ; GFX9-NEXT: v_mov_b32_e32 v0, v5
263
256
; GFX9-NEXT: s_setpc_b64 s[30:31]
264
257
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
265
258
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -271,10 +264,11 @@ define <4 x half> @shuffle_v4f16_4501(<4 x half> addrspace(1)* %arg0, <4 x half>
271
264
; GFX9-LABEL: shuffle_v4f16_4501:
272
265
; GFX9: ; %bb.0:
273
266
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274
- ; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
275
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
267
+ ; GFX9-NEXT: global_load_dword v2, v[2:3], off
268
+ ; GFX9-NEXT: global_load_dword v1, v[0:1], off
269
+ ; GFX9-NEXT: s_waitcnt vmcnt(1)
270
+ ; GFX9-NEXT: v_mov_b32_e32 v0, v2
276
271
; GFX9-NEXT: s_waitcnt vmcnt(0)
277
- ; GFX9-NEXT: v_mov_b32_e32 v1, v4
278
272
; GFX9-NEXT: s_setpc_b64 s[30:31]
279
273
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
280
274
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -286,10 +280,11 @@ define <4 x half> @shuffle_v4f16_4523(<4 x half> addrspace(1)* %arg0, <4 x half>
286
280
; GFX9-LABEL: shuffle_v4f16_4523:
287
281
; GFX9: ; %bb.0:
288
282
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[0:1 ], off
290
- ; GFX9-NEXT: global_load_dwordx2 v[2:3] , v[2:3 ], off
291
- ; GFX9-NEXT: s_waitcnt vmcnt(0 )
283
+ ; GFX9-NEXT: global_load_dword v2 , v[2:3 ], off
284
+ ; GFX9-NEXT: global_load_dword v1 , v[0:1 ], off offset:4
285
+ ; GFX9-NEXT: s_waitcnt vmcnt(1 )
292
286
; GFX9-NEXT: v_mov_b32_e32 v0, v2
287
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
293
288
; GFX9-NEXT: s_setpc_b64 s[30:31]
294
289
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
295
290
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -301,7 +296,7 @@ define <4 x half> @shuffle_v4f16_4545(<4 x half> addrspace(1)* %arg0, <4 x half>
301
296
; GFX9-LABEL: shuffle_v4f16_4545:
302
297
; GFX9: ; %bb.0:
303
298
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[2:3], off
299
+ ; GFX9-NEXT: global_load_dword v0 , v[2:3], off
305
300
; GFX9-NEXT: s_waitcnt vmcnt(0)
306
301
; GFX9-NEXT: v_mov_b32_e32 v1, v0
307
302
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -328,11 +323,11 @@ define <4 x half> @shuffle_v4f16_6701(<4 x half> addrspace(1)* %arg0, <4 x half>
328
323
; GFX9-LABEL: shuffle_v4f16_6701:
329
324
; GFX9: ; %bb.0:
330
325
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
331
- ; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
332
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
326
+ ; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4
327
+ ; GFX9-NEXT: global_load_dword v1, v[0:1], off
328
+ ; GFX9-NEXT: s_waitcnt vmcnt(1)
329
+ ; GFX9-NEXT: v_mov_b32_e32 v0, v2
333
330
; GFX9-NEXT: s_waitcnt vmcnt(0)
334
- ; GFX9-NEXT: v_mov_b32_e32 v0, v1
335
- ; GFX9-NEXT: v_mov_b32_e32 v1, v4
336
331
; GFX9-NEXT: s_setpc_b64 s[30:31]
337
332
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
338
333
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -344,10 +339,11 @@ define <4 x half> @shuffle_v4f16_6723(<4 x half> addrspace(1)* %arg0, <4 x half>
344
339
; GFX9-LABEL: shuffle_v4f16_6723:
345
340
; GFX9: ; %bb.0:
346
341
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
348
- ; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
342
+ ; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4
343
+ ; GFX9-NEXT: global_load_dword v1, v[0:1], off offset:4
344
+ ; GFX9-NEXT: s_waitcnt vmcnt(1)
345
+ ; GFX9-NEXT: v_mov_b32_e32 v0, v2
349
346
; GFX9-NEXT: s_waitcnt vmcnt(0)
350
- ; GFX9-NEXT: v_mov_b32_e32 v0, v3
351
347
; GFX9-NEXT: s_setpc_b64 s[30:31]
352
348
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
353
349
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -373,9 +369,9 @@ define <4 x half> @shuffle_v4f16_6767(<4 x half> addrspace(1)* %arg0, <4 x half>
373
369
; GFX9-LABEL: shuffle_v4f16_6767:
374
370
; GFX9: ; %bb.0:
375
371
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376
- ; GFX9-NEXT: global_load_dwordx2 v[0:1] , v[2:3], off
372
+ ; GFX9-NEXT: global_load_dword v0 , v[2:3], off offset:4
377
373
; GFX9-NEXT: s_waitcnt vmcnt(0)
378
- ; GFX9-NEXT: v_mov_b32_e32 v0, v1
374
+ ; GFX9-NEXT: v_mov_b32_e32 v1, v0
379
375
; GFX9-NEXT: s_setpc_b64 s[30:31]
380
376
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
381
377
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -388,13 +384,12 @@ define <4 x half> @shuffle_v4f16_2356(<4 x half> addrspace(1)* %arg0, <4 x half>
388
384
; GFX9: ; %bb.0:
389
385
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390
386
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
391
- ; GFX9-NEXT: global_load_dwordx2 v[4:5] , v[0:1], off
392
- ; GFX9-NEXT: v_mov_b32_e32 v0 , 0xffff
387
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
388
+ ; GFX9-NEXT: v_mov_b32_e32 v1 , 0xffff
393
389
; GFX9-NEXT: s_waitcnt vmcnt(1)
394
- ; GFX9-NEXT: v_and_b32_sdwa v0, v0 , v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
395
- ; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v0
390
+ ; GFX9-NEXT: v_and_b32_sdwa v1, v1 , v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
391
+ ; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
396
392
; GFX9-NEXT: s_waitcnt vmcnt(0)
397
- ; GFX9-NEXT: v_mov_b32_e32 v0, v5
398
393
; GFX9-NEXT: s_setpc_b64 s[30:31]
399
394
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
400
395
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -407,11 +402,12 @@ define <4 x half> @shuffle_v4f16_5623(<4 x half> addrspace(1)* %arg0, <4 x half>
407
402
; GFX9: ; %bb.0:
408
403
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409
404
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
410
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
411
- ; GFX9-NEXT: s_waitcnt vmcnt(0)
405
+ ; GFX9-NEXT: global_load_dword v1, v[0:1], off offset:4
412
406
; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff
407
+ ; GFX9-NEXT: s_waitcnt vmcnt(1)
413
408
; GFX9-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
414
409
; GFX9-NEXT: v_lshl_or_b32 v0, v3, 16, v0
410
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
415
411
; GFX9-NEXT: s_setpc_b64 s[30:31]
416
412
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
417
413
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -485,13 +481,12 @@ define <4 x i16> @shuffle_v4i16_2356(<4 x i16> addrspace(1)* %arg0, <4 x i16> ad
485
481
; GFX9: ; %bb.0:
486
482
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487
483
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
488
- ; GFX9-NEXT: global_load_dwordx2 v[4:5] , v[0:1], off
489
- ; GFX9-NEXT: v_mov_b32_e32 v0 , 0xffff
484
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
485
+ ; GFX9-NEXT: v_mov_b32_e32 v1 , 0xffff
490
486
; GFX9-NEXT: s_waitcnt vmcnt(1)
491
- ; GFX9-NEXT: v_and_b32_sdwa v0, v0 , v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
492
- ; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v0
487
+ ; GFX9-NEXT: v_and_b32_sdwa v1, v1 , v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
488
+ ; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
493
489
; GFX9-NEXT: s_waitcnt vmcnt(0)
494
- ; GFX9-NEXT: v_mov_b32_e32 v0, v5
495
490
; GFX9-NEXT: s_setpc_b64 s[30:31]
496
491
%val0 = load <4 x i16 >, <4 x i16 > addrspace (1 )* %arg0
497
492
%val1 = load <4 x i16 >, <4 x i16 > addrspace (1 )* %arg1
@@ -503,11 +498,9 @@ define <4 x i16> @shuffle_v4i16_0167(<4 x i16> addrspace(1)* %arg0, <4 x i16> ad
503
498
; GFX9-LABEL: shuffle_v4i16_0167:
504
499
; GFX9: ; %bb.0:
505
500
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506
- ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
507
- ; GFX9-NEXT: s_waitcnt vmcnt(0)
508
- ; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
501
+ ; GFX9-NEXT: global_load_dword v0, v[0:1], off
502
+ ; GFX9-NEXT: global_load_dword v1, v[2:3], off offset:4
509
503
; GFX9-NEXT: s_waitcnt vmcnt(0)
510
- ; GFX9-NEXT: v_mov_b32_e32 v1, v2
511
504
; GFX9-NEXT: s_setpc_b64 s[30:31]
512
505
%val0 = load <4 x i16 >, <4 x i16 > addrspace (1 )* %arg0
513
506
%val1 = load <4 x i16 >, <4 x i16 > addrspace (1 )* %arg1
@@ -590,12 +583,11 @@ define <4 x half> @shuffle_v4f16_2333(<4 x half> addrspace(1)* %arg0, <4 x half>
590
583
; GFX9-LABEL: shuffle_v4f16_2333:
591
584
; GFX9: ; %bb.0:
592
585
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593
- ; GFX9-NEXT: global_load_dwordx2 v[1:2] , v[0:1], off
586
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
594
587
; GFX9-NEXT: s_waitcnt vmcnt(0)
595
- ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2
596
- ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
597
- ; GFX9-NEXT: v_lshl_or_b32 v1, v0, 16, v1
598
- ; GFX9-NEXT: v_mov_b32_e32 v0, v2
588
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
589
+ ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v1
590
+ ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2
599
591
; GFX9-NEXT: s_setpc_b64 s[30:31]
600
592
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
601
593
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
@@ -607,12 +599,11 @@ define <4 x half> @shuffle_v4f16_6667(<4 x half> addrspace(1)* %arg0, <4 x half>
607
599
; GFX9-LABEL: shuffle_v4f16_6667:
608
600
; GFX9: ; %bb.0:
609
601
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610
- ; GFX9-NEXT: global_load_dwordx2 v[1:2] , v[0:1], off
602
+ ; GFX9-NEXT: global_load_dword v0 , v[0:1], off offset:4
611
603
; GFX9-NEXT: s_waitcnt vmcnt(0)
612
- ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2
613
- ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
614
- ; GFX9-NEXT: v_lshl_or_b32 v1, v0, 16, v1
615
- ; GFX9-NEXT: v_mov_b32_e32 v0, v2
604
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
605
+ ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v1
606
+ ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2
616
607
; GFX9-NEXT: s_setpc_b64 s[30:31]
617
608
%val0 = load <4 x half >, <4 x half > addrspace (1 )* %arg0
618
609
%val1 = load <4 x half >, <4 x half > addrspace (1 )* %arg1
0 commit comments