-
Notifications
You must be signed in to change notification settings - Fork 14.9k
AMDGPU: Fold mov imm to copy to av_32 class #155428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesPreviously we had special case folding into copies to AGPR_32, Not sure why the true16 case regressed. Patch is 25.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155428.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index d72af06ac566e..cdbb7a7097bb5 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1260,30 +1260,13 @@ void SIFoldOperandsImpl::foldOperand(
return;
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
- if (!DestReg.isPhysical() && DestRC == &AMDGPU::AGPR_32RegClass) {
- std::optional<int64_t> UseImmVal = OpToFold.getEffectiveImmVal();
- if (UseImmVal && TII->isInlineConstant(
- *UseImmVal, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
- UseMI->getOperand(1).ChangeToImmediate(*UseImmVal);
- CopiesToReplace.push_back(UseMI);
- return;
- }
- }
-
- // Allow immediates COPYd into sgpr_lo16 to be further folded while
- // still being legal if not further folded
- if (DestRC == &AMDGPU::SGPR_LO16RegClass) {
- assert(ST->useRealTrue16Insts());
- MRI->setRegClass(DestReg, &AMDGPU::SGPR_32RegClass);
- DestRC = &AMDGPU::SGPR_32RegClass;
- }
// In order to fold immediates into copies, we need to change the copy to a
// MOV. Find a compatible mov instruction with the value.
for (unsigned MovOp :
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
- AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64}) {
+ AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
+ AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
const MCInstrDesc &MovDesc = TII->get(MovOp);
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
@@ -1315,6 +1298,14 @@ void SIFoldOperandsImpl::foldOperand(
UseMI->setDesc(MovDesc);
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
+ // Allow immediates COPYd into sgpr_lo16 to be further folded while
+ // still being legal if not further folded
+ if (DestRC == &AMDGPU::SGPR_LO16RegClass) {
+ assert(ST->useRealTrue16Insts());
+ MRI->setRegClass(DestReg, &AMDGPU::SGPR_32RegClass);
+ DestRC = &AMDGPU::SGPR_32RegClass;
+ }
+
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
MachineOperand NewSrcOp(SrcOp);
MachineFunction *MF = UseMI->getParent()->getParent();
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
index a079ee1296f41..6f2e33900a79a 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
@@ -91,8 +91,8 @@ body: |
bb.0:
; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32
; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub0
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B]].sub0, implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
%1:agpr_32 = COPY %0.sub0
@@ -108,8 +108,8 @@ body: |
bb.0:
; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32
; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub1
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B]].sub1, implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
%1:agpr_32 = COPY %0.sub1
@@ -133,3 +133,329 @@ body: |
S_ENDPGM 0, implicit %1
...
+
+---
+name: s_mov_b32_imm_0_copy_to_agpr_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b32_imm_0_copy_to_agpr_32
+ ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
+ %0:sreg_32 = S_MOV_B32 0, implicit $exec
+ %1:agpr_32 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b32_imm_neg16_copy_to_agpr_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b32_imm_neg16_copy_to_agpr_32
+ ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 -16, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
+ %0:sreg_32 = S_MOV_B32 -16, implicit $exec
+ %1:agpr_32 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b32_imm_65_copy_to_agpr_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_agpr_32
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[S_MOV_B32_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
+ %0:sreg_32 = S_MOV_B32 65, implicit $exec
+ %1:agpr_32 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b32_imm_0_copy_to_av_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b32_imm_0_copy_to_av_32
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
+ %0:sreg_32 = S_MOV_B32 0, implicit $exec
+ %1:av_32 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b32_imm_neg16_copy_to_av_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b32_imm_neg16_copy_to_av_32
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO -16, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
+ %0:sreg_32 = S_MOV_B32 -16, implicit $exec
+ %1:av_32 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b32_imm_65_copy_to_av_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
+ ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
+ %0:sreg_32 = S_MOV_B32 65, implicit $exec
+ %1:av_32 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_0_copy_to_areg_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 0, implicit $exec
+ %1:areg_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_0_copy_to_areg_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64_align2
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 0, implicit $exec
+ %1:areg_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_neg16_copy_to_areg_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 -16, implicit $exec
+ %1:areg_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 -16, implicit $exec
+ %1:areg_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_0_copy_to_av_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 0, implicit $exec
+ %1:av_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_0_copy_to_av_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64_align2
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 0, implicit $exec
+ %1:av_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_neg16_copy_to_av_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 -16, implicit $exec
+ %1:av_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_neg16_copy_to_av_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64_align2
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64 -16, implicit $exec
+ %1:av_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
+ %1:areg_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
+ %1:areg_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
+ %1:areg_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
+ %1:areg_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
+ %1:av_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
+ %1:av_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
+ %1:av_64 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64_align2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64_align2
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
+ %1:av_64_align2 = COPY %0
+ S_ENDPGM 0, implicit %1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index 9a51f457a567a..ddf2aa34ecd87 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -191,8 +191,8 @@ body: |
bb.0:
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
%1:agpr_32 = COPY %0
@@ -207,9 +207,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 32, implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0
%0:sreg_32 = S_MOV_B32 32
%1:av_32 = COPY %0
@@ -224,9 +223,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 32, implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
%1:av_32 = COPY %0
@@ -242,8 +240,8 @@ body: |
bb.0:
; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0
%0:sreg_32 = S_MOV_B32 999
%1:av_32 = COPY %0
@@ -259,8 +257,8 @@ body: |
bb.0:
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+ ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
%1:av_32 = COPY %0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
index 0488f5d38b39d..1e44a09ddadf9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
@@ -35,9 +35,10 @@ define amdgpu_kernel void @rcp_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
+; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
+; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
@@ -56,9 +57,10 @@ define amdgpu_kernel void @rcp_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
+; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
+; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
@@ -77,9 +79,10 @@ define amdgpu_kernel void @rcp_undef_bf16(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_undef_bf16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
+; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
+; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_undef_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 6110b3101020a..d39daaade677f 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -708,103 +708,72 @@ define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg)
; GFX908-LABEL: test_mfma_loop_unfoldable_seq:
; GFX908: ; %bb.0: ; %entry
; GFX908-NEXT: v_mov_b32_e32 v0, 0x431a0000
-; GFX908-NEXT: s_mov_b32 s0, 16
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT: v_mov_b32_...
[truncated]
|
// Allow immediates COPYd into sgpr_lo16 to be further folded while | ||
// still being legal if not further folded | ||
if (DestRC == &AMDGPU::SGPR_LO16RegClass) { | ||
assert(ST->useRealTrue16Insts()); | ||
MRI->setRegClass(DestReg, &AMDGPU::SGPR_32RegClass); | ||
DestRC = &AMDGPU::SGPR_32RegClass; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving this down here doesn't make sense, but I did it in a vain effort to avoid the true16 regression
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not think it is a regression really. It is just VOPD does not have t16 support. That's a question what is better after all.
Previously we had special case folding into copies to AGPR_32, ignoring AV_32. Try folding into the pseudos. Not sure why the true16 case regressed.
f79cef3
to
5fe4d5e
Compare
Previously we had special case folding into copies to AGPR_32,
ignoring AV_32. Try folding into the pseudos.
Not sure why the true16 case regressed.