diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index f433b5aec5162..242899fa82373 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -245,12 +245,12 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const { continue; MachineOperand *Src2 = TII.getNamedOperand(*MFMA, AMDGPU::OpName::src2); - if (!Src2->isReg()) - continue; - - Register Src2Reg = Src2->getReg(); - if (!Src2Reg.isVirtual()) - continue; + Register Src2Reg; + if (Src2->isReg()) { + Src2Reg = Src2->getReg(); + if (!Src2Reg.isVirtual()) + continue; + } // FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead // of an AGPR or VGPR subclass, so we can't simply use the result on the @@ -259,13 +259,15 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const { LLVM_DEBUG({ dbgs() << "Attempting to replace VGPR MFMA with AGPR version:" << " Dst=[" << printReg(VReg) << " => " - << printReg(PhysReg, &TRI) << ']'; + << printReg(PhysReg, &TRI); if (Src2Reg) { Register Src2PhysReg = VRM.getPhys(Src2Reg); - dbgs() << ", Src2=[" << printReg(Src2Reg, &TRI) << " => " - << printReg(Src2PhysReg, &TRI) << "]: " << *MFMA; + dbgs() << "], Src2=[" << printReg(Src2Reg, &TRI) << " => " + << printReg(Src2PhysReg, &TRI); } + + dbgs() << "]: " << *MFMA; }); const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass(MFMADstReg); diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir index b53bde6bfd281..3103d635200c6 100644 --- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir +++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir @@ -16,10 +16,6 @@ ret void } - define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 { - ret void - } - define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2() #0 { ret void } @@ -341,89 +337,6 @@ body: | ... -# Non-mac variant, src2 is an immediate. ---- -name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2 -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true - stackPtrOffsetReg: '$sgpr32' - occupancy: 10 - sgprForEXECCopy: '$sgpr100_sgpr101' -body: | - ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0 - ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0 - ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1 - ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc - ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 - ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - S_NOP 0, implicit-def $agpr0 - renamable $sgpr0 = S_MOV_B32 0 - undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec - renamable $sgpr1 = COPY renamable $sgpr0 - %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1 - renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc - %0.sub9:vreg_512_align2 = COPY %0.sub8 - - bb.1: - liveins: $vcc - - %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec - S_CBRANCH_VCCNZ %bb.1, implicit $vcc - S_BRANCH %bb.2 - - bb.2: - ; No VGPRs available for %0 - S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 - S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 - S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 - S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 - S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 - %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) - GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) - S_ENDPGM 0 - -... - # Non-mac variant, src2 is a physical register --- name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2 diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir index dcb45cc90ca68..3de86da766af7 100644 --- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir +++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir @@ -1553,19 +1553,18 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr20_vgpr21, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; CHECK-NEXT: early-clobber renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $agpr0_agpr1, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF + ; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 @@ -1903,14 +1902,13 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF + ; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll index 66002fff12155..4bc0270e61a7a 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll @@ -235,47 +235,11 @@ bb: ret void } -; TODO: Handle rewriting this case define void @test_rewrite_mfma_imm_src2(float %arg0, float %arg1) #0 { ; CHECK-LABEL: test_rewrite_mfma_imm_src2: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 2.0 -; CHECK-NEXT: s_nop 7 -; CHECK-NEXT: s_nop 7 -; CHECK-NEXT: s_nop 1 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 -; CHECK-NEXT: v_accvgpr_write_b32 a1, v1 -; CHECK-NEXT: v_accvgpr_write_b32 a2, v2 -; CHECK-NEXT: v_accvgpr_write_b32 a3, v3 -; CHECK-NEXT: v_accvgpr_write_b32 a4, v4 -; CHECK-NEXT: v_accvgpr_write_b32 a5, v5 -; CHECK-NEXT: v_accvgpr_write_b32 a6, v6 -; CHECK-NEXT: v_accvgpr_write_b32 a7, v7 -; CHECK-NEXT: v_accvgpr_write_b32 a8, v8 -; CHECK-NEXT: v_accvgpr_write_b32 a9, v9 -; CHECK-NEXT: v_accvgpr_write_b32 a10, v10 -; CHECK-NEXT: v_accvgpr_write_b32 a11, v11 -; CHECK-NEXT: v_accvgpr_write_b32 a12, v12 -; CHECK-NEXT: v_accvgpr_write_b32 a13, v13 -; CHECK-NEXT: v_accvgpr_write_b32 a14, v14 -; CHECK-NEXT: v_accvgpr_write_b32 a15, v15 -; CHECK-NEXT: v_accvgpr_write_b32 a16, v16 -; CHECK-NEXT: v_accvgpr_write_b32 a17, v17 -; CHECK-NEXT: v_accvgpr_write_b32 a18, v18 -; CHECK-NEXT: v_accvgpr_write_b32 a19, v19 -; CHECK-NEXT: v_accvgpr_write_b32 a20, v20 -; CHECK-NEXT: v_accvgpr_write_b32 a21, v21 -; CHECK-NEXT: v_accvgpr_write_b32 a22, v22 -; CHECK-NEXT: v_accvgpr_write_b32 a23, v23 -; CHECK-NEXT: v_accvgpr_write_b32 a24, v24 -; CHECK-NEXT: v_accvgpr_write_b32 a25, v25 -; CHECK-NEXT: v_accvgpr_write_b32 a26, v26 -; CHECK-NEXT: v_accvgpr_write_b32 a27, v27 -; CHECK-NEXT: v_accvgpr_write_b32 a28, v28 -; CHECK-NEXT: v_accvgpr_write_b32 a29, v29 -; CHECK-NEXT: v_accvgpr_write_b32 a30, v30 -; CHECK-NEXT: v_accvgpr_write_b32 a31, v31 +; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, 2.0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use a[0:31] ; CHECK-NEXT: ;;#ASMEND