-
Notifications
You must be signed in to change notification settings - Fork 14.9k
AMDGPU: Fix DPP combiner using isOperandLegal on incomplete inst #155595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/amdgpu/add-version-isImmOperandLegal-MCInstrDesc
Are you sure you want to change the base?
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesIt is not safe to use isOperandLegal on an instruction that does This seems to fix a missed optimization in the gfx11 test. The Full diff: https://github.com/llvm/llvm-project/pull/155595.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 184929a5a50f6..3831aacc2a639 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
++NumOperands;
}
if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
- if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
+ if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) {
DPPInst.add(*SDst);
++NumOperands;
}
@@ -296,11 +296,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(Src0);
int Src0Idx = NumOperands;
- if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
- LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
- Fail = true;
- break;
- }
+
DPPInst.add(*Src0);
DPPInst->getOperand(NumOperands).setIsKill(false);
++NumOperands;
@@ -319,7 +315,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
if (Src1) {
- int OpNum = NumOperands;
+ assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1));
// If subtarget does not support SGPRs for src1 operand then the
// requirements are the same as for src0. We check src0 instead because
// pseudos are shared between subtargets and allow SGPR for src1 on all.
@@ -327,13 +323,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
getOperandSize(*DPPInst, NumOperands, *MRI) &&
"Src0 and Src1 operands should have the same size");
- OpNum = Src0Idx;
- }
- if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
- LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
- Fail = true;
- break;
}
+
DPPInst.add(*Src1);
++NumOperands;
}
@@ -349,9 +340,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
if (Src2) {
- if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
- !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
- LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
+ if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) {
+ LLVM_DEBUG(dbgs() << " failed: dpp does not have src2\n");
Fail = true;
break;
}
@@ -431,6 +421,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
DPPInst.addImm(CombBCZ ? 1 : 0);
+
+ for (AMDGPU::OpName Op :
+ {AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, Op);
+ if (OpIdx == -1)
+ break;
+
+ if (!TII->isOperandLegal(*DPPInst, OpIdx)) {
+ LLVM_DEBUG(dbgs() << " failed: src operand is illegal\n");
+ Fail = true;
+ break;
+ }
+ }
} while (false);
if (Fail) {
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fb20e72a77103..3725384e885ee 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
---
@@ -8,8 +8,7 @@
# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
-# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
-# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
name: vop3
tracksRegLiveness: true
body: |
@@ -39,12 +38,9 @@ body: |
# GCN-LABEL: name: vop3_sgpr_src1
# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
-# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
-# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
-# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
name: vop3_sgpr_src1
tracksRegLiveness: true
|
@@ -319,21 +315,16 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, | |||
} | |||
auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); | |||
if (Src1) { | |||
int OpNum = NumOperands; | |||
assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I'm keen on assert failure messages, it might be obvious to us but it can really help the programmer and the code to self-document.
It is not safe to use isOperandLegal on an instruction that does not have a complete set of operands. Unforunately the APIs are not set up in a convenient way to speculatively check if an instruction will be legal in a hypothetical instruction. Build all the operands and then verify they are legal after. This is clumsy, we should have a more direct check for will these operands give a legal instruction. This seems to fix a missed optimization in the gfx11 test. The fold was firing for gfx1150, but not gfx1100. Both should support vop3 literals so I'm not sure why it wasn't working before.
088e57a
to
11df2dc
Compare
@@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, | |||
++NumOperands; | |||
} | |||
if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) { | |||
if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) { | |||
if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The TII->isOperandLegal
invocations for the srcX
operands have been moved to a new loop below, but it seems that this one went away, i.e. it is not contained in the list of operands being checked in the loop. Is this intended?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. isOperandLegal doesn't do anything meaningful on results
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
It is not safe to use isOperandLegal on an instruction that does
not have a complete set of operands. Unforunately the APIs are
not set up in a convenient way to speculatively check if an instruction
will be legal in a hypothetical instruction. Build all the operands
and then verify they are legal after. This is clumsy, we should have
a more direct check for will these operands give a legal instruction.
This seems to fix a missed optimization in the gfx11 test. The
fold was firing for gfx1150, but not gfx1100. Both should support
vop3 literals so I'm not sure why it wasn't working before.