-
Notifications
You must be signed in to change notification settings - Fork 14.9k
AMDGPU: Use getMergedLocation in SILoadStoreOptimizer #156396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
AMDGPU: Use getMergedLocation in SILoadStoreOptimizer #156396
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis is merging loads and stores so use the combined DebugLoc. Not sure if computeBase should be using the merged location from Full diff: https://github.com/llvm/llvm-project/pull/156396.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 263f7127fbf10..5d2143904c873 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -232,10 +232,11 @@ class SILoadStoreOptimizer {
void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
- AMDGPU::OpName OpName, Register DestReg) const;
+ const DebugLoc &DL, AMDGPU::OpName OpName,
+ Register DestReg) const;
Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
- AMDGPU::OpName OpName) const;
+ const DebugLoc &DL, AMDGPU::OpName OpName) const;
unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
@@ -1320,10 +1321,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
// Paired.
void SILoadStoreOptimizer::copyToDestRegs(
CombineInfo &CI, CombineInfo &Paired,
- MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName,
- Register DestReg) const {
+ MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL,
+ AMDGPU::OpName OpName, Register DestReg) const {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
@@ -1351,9 +1351,9 @@ void SILoadStoreOptimizer::copyToDestRegs(
Register
SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL,
AMDGPU::OpName OpName) const {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
@@ -1409,7 +1409,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Register DestReg = MRI->createVirtualRegister(SuperRC);
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
@@ -1437,7 +1438,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
.addImm(0) // gds
.cloneMergedMemRefs({&*CI.I, &*Paired.I});
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1491,7 +1492,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
(NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
const MCInstrDesc &Write2Desc = TII->get(Opc);
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
@@ -1532,7 +1534,9 @@ MachineBasicBlock::iterator
SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
+
const unsigned Opcode = getNewOpcode(CI, Paired);
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
@@ -1557,7 +1561,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1568,7 +1572,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
+
const unsigned Opcode = getNewOpcode(CI, Paired);
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
@@ -1589,7 +1595,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
New.addImm(MergedOffset);
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1600,7 +1606,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1630,7 +1638,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1641,7 +1649,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1681,7 +1691,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1692,12 +1702,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
Register SrcReg =
- copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
+ copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
@@ -1739,7 +1750,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1757,7 +1770,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
.addImm(CI.CPol)
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1768,12 +1781,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
Register SrcReg =
- copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
+ copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
@@ -2042,12 +2057,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
Register SrcReg =
- copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
+ copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
This is merging loads and stores so use the combined DebugLoc. Not sure if computeBase should be using the merged location from all the involved instructions. I'm also not sure how to test this sort of thing.
5fb68b6
to
c2fbea3
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would a MIR test work to test this ? https://llvm.org/docs/MIRLangRef.html#source-locations
It seems like the computeBase instruction(s) should be the same as at least one of the merged stores? If not, I would imagine we don't want to include it anyway I think the existing logic in |
This is merging loads and stores so use the combined DebugLoc.
Not sure if computeBase should be using the merged location from
all the involved instructions. I'm also not sure how to test this
sort of thing.