@@ -46,9 +46,8 @@ body: |
46
46
; GFX90A: liveins: $vgpr0_vgpr1
47
47
; GFX90A-NEXT: {{ $}}
48
48
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
49
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
50
- ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[DEF]]
51
- ; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec
49
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
50
+ ; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, implicit $exec
52
51
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
53
52
%1:vreg_64 = IMPLICIT_DEF
54
53
%2:vreg_64_align2 = COPY killed %1
@@ -148,9 +147,8 @@ body: |
148
147
; GFX90A: liveins: $vgpr0_vgpr1
149
148
; GFX90A-NEXT: {{ $}}
150
149
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
151
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_96 = IMPLICIT_DEF
152
- ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[DEF]]
153
- ; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec
150
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF
151
+ ; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[DEF]], 0, 0, implicit $exec
154
152
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
155
153
%1:vreg_96 = IMPLICIT_DEF
156
154
%2:vreg_96_align2 = COPY killed %1
@@ -326,11 +324,59 @@ body: |
326
324
; GFX90A: liveins: $vgpr0_vgpr1
327
325
; GFX90A-NEXT: {{ $}}
328
326
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
329
- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
330
- ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY killed [[DEF]]
331
- ; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec
327
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
328
+ ; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[DEF]], 0, 0, implicit $exec
332
329
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
333
330
%1:vreg_128 = IMPLICIT_DEF
334
331
%2:vreg_128_align2 = COPY killed %1
335
332
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
336
333
...
334
+
335
+ # Make sure the alignment requirement is respected for VS_64 operand
336
+ # uses.
337
+ ---
338
+ name : aligned_vgpr_vs_64_constraint
339
+ tracksRegLiveness : true
340
+ isSSA : true
341
+ body : |
342
+ bb.0.entry:
343
+ liveins: $vgpr0, $sgpr8_sgpr9
344
+
345
+ ; GFX908-LABEL: name: aligned_vgpr_vs_64_constraint
346
+ ; GFX908: liveins: $vgpr0, $sgpr8_sgpr9
347
+ ; GFX908-NEXT: {{ $}}
348
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
349
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
350
+ ; GFX908-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[COPY]], [[COPY1]], 16, 0, implicit $exec :: (load (s96), align 4, addrspace 1)
351
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0
352
+ ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
353
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
354
+ ; GFX908-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, killed [[REG_SEQUENCE]], 0, [[GLOBAL_LOAD_DWORDX3_SADDR]].sub1_sub2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
355
+ ; GFX908-NEXT: DS_WRITE_B64_gfx9 [[V_MOV_B32_e32_]], killed [[V_PK_ADD_F32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
356
+ ; GFX908-NEXT: S_ENDPGM 0
357
+ ;
358
+ ; GFX90A-LABEL: name: aligned_vgpr_vs_64_constraint
359
+ ; GFX90A: liveins: $vgpr0, $sgpr8_sgpr9
360
+ ; GFX90A-NEXT: {{ $}}
361
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
362
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
363
+ ; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[COPY]], [[COPY1]], 16, 0, implicit $exec :: (load (s96), align 4, addrspace 1)
364
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0
365
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub1_sub2
366
+ ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
367
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
368
+ ; GFX90A-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, killed [[REG_SEQUENCE]], 0, killed [[COPY3]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
369
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 [[V_MOV_B32_e32_]], killed [[V_PK_ADD_F32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
370
+ ; GFX90A-NEXT: S_ENDPGM 0
371
+ %0:sgpr_64 = COPY $sgpr8_sgpr9
372
+ %1:vgpr_32 = COPY $vgpr0
373
+ %2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR %0, %1, 16, 0, implicit $exec :: (load (s96), align 4, addrspace 1)
374
+ %3:vgpr_32 = COPY %2.sub0
375
+ %4:vreg_64_align2 = COPY killed %2.sub1_sub2
376
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
377
+ %6:vreg_64_align2 = REG_SEQUENCE %3, %subreg.sub0, %5, %subreg.sub1
378
+ %7:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, killed %6, 0, killed %4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
379
+ DS_WRITE_B64_gfx9 %5, killed %7, 0, 0, implicit $exec :: (store (s64), addrspace 3)
380
+ S_ENDPGM 0
381
+
382
+ ...
0 commit comments