-
Notifications
You must be signed in to change notification settings - Fork 15k
AMDGPU: Avoid using exact class check in reg_sequence AGPR fold #156135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Avoid using exact class check in reg_sequence AGPR fold #156135
Conversation
This does better in cases which mix align2 and non-align2 classes.
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis does better in cases which mix align2 and non-align2 classes. Full diff: https://github.com/llvm/llvm-project/pull/156135.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3979e1e0c44aa..53579b926cfe0 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1930,8 +1930,10 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const {
// Direct copy from SGPR to AGPR is not possible on gfx908. To avoid
// creation of exploded copies SGPR->VGPR->AGPR in the copyPhysReg()
// later, create a copy here and track if we already have such a copy.
- if (TRI->getSubRegisterClass(MRI->getRegClass(Src.Reg), Src.SubReg) !=
- VGPRUseSubRC) {
+ const TargetRegisterClass *SubRC =
+ TRI->getSubRegisterClass(MRI->getRegClass(Src.Reg), Src.SubReg);
+ if (!VGPRUseSubRC->hasSubClassEq(SubRC)) {
+ // TODO: Try to reconstrain class
VGPRCopy = MRI->createVirtualRegister(VGPRUseSubRC);
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), VGPRCopy).add(*Def);
B.addReg(VGPRCopy);
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
index 80f13b5102097..ce539bd9c42f1 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
@@ -308,8 +308,7 @@ body: |
; CHECK-LABEL: name: s_mov_b64_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr_elt64
; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[V_MOV_B]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[V_MOV_B]]
- ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2_sub3
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[V_MOV_B]], %subreg.sub2_sub3
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
%0:sreg_64 = S_MOV_B64 0
@@ -329,8 +328,7 @@ body: |
; CHECK-LABEL: name: s_mov_b64_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr_subreg_elt32
; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B]].sub0, %subreg.sub0, [[V_MOV_B]].sub1, %subreg.sub1, [[V_MOV_B]], %subreg.sub1_sub2
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[V_MOV_B]]
- ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]].sub0, %subreg.sub0, [[V_MOV_B]].sub1, %subreg.sub1, [[COPY]], %subreg.sub1_sub2
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]].sub0, %subreg.sub0, [[V_MOV_B]].sub1, %subreg.sub1, [[V_MOV_B]], %subreg.sub1_sub2
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
%0:sreg_64 = S_MOV_B64 0
@@ -354,9 +352,7 @@ body: |
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[V_MOV_B]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
- ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2_sub3
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
%0:sreg_64 = S_MOV_B64 0
@@ -381,8 +377,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[COPY]]
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY1]]
- ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
%0:sreg_64 = COPY $sgpr8_sgpr9
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/22705 Here is the relevant piece of the build log for the reference
|
This does better in cases which mix align2 and non-align2 classes.