Skip to content

Commit e0f0d0e

Browse files
committed
[MachineScheduler] Allow clustering mem ops with complex addresses
The generic BaseMemOpClusterMutation calls into TargetInstrInfo to analyze the address of each load/store instruction, and again to decide whether two instructions should be clustered. Previously this had to represent each address as a single base operand plus a constant byte offset. This patch extends it to support any number of base operands. The old target hook getMemOperandWithOffset is now a convenience function for callers that are only prepared to handle a single base operand. It calls the new more general target hook getMemOperandsWithOffset. The only requirements for the base operands returned by getMemOperandsWithOffset are: - they can be sorted by MemOpInfo::Compare, such that clusterable ops get sorted next to each other, and - shouldClusterMemOps knows what they mean. One simple follow-on is to enable clustering of AMDGPU FLAT instructions with both vaddr and saddr (base register + offset register). I've left a FIXME in the code for this case. Differential Revision: https://reviews.llvm.org/D71655
1 parent 70096ca commit e0f0d0e

File tree

13 files changed

+144
-91
lines changed

13 files changed

+144
-91
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,15 +1238,21 @@ class TargetInstrInfo : public MCInstrInfo {
12381238
}
12391239

12401240
/// Get the base operand and byte offset of an instruction that reads/writes
1241+
/// memory. This is a convenience function for callers that are only prepared
1242+
/// to handle a single base operand.
1243+
bool getMemOperandWithOffset(const MachineInstr &MI,
1244+
const MachineOperand *&BaseOp, int64_t &Offset,
1245+
const TargetRegisterInfo *TRI) const;
1246+
1247+
/// Get the base operands and byte offset of an instruction that reads/writes
12411248
/// memory.
12421249
/// It returns false if MI does not read/write memory.
1243-
/// It returns false if no base operand and offset was found.
1244-
/// It is not guaranteed to always recognize base operand and offsets in all
1250+
/// It returns false if no base operands and offset was found.
1251+
/// It is not guaranteed to always recognize base operands and offsets in all
12451252
/// cases.
1246-
virtual bool getMemOperandWithOffset(const MachineInstr &MI,
1247-
const MachineOperand *&BaseOp,
1248-
int64_t &Offset,
1249-
const TargetRegisterInfo *TRI) const {
1253+
virtual bool getMemOperandsWithOffset(
1254+
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
1255+
int64_t &Offset, const TargetRegisterInfo *TRI) const {
12501256
return false;
12511257
}
12521258

@@ -1270,8 +1276,8 @@ class TargetInstrInfo : public MCInstrInfo {
12701276
/// or
12711277
/// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
12721278
/// to TargetPassConfig::createMachineScheduler() to have an effect.
1273-
virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1,
1274-
const MachineOperand &BaseOp2,
1279+
virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
1280+
ArrayRef<const MachineOperand *> BaseOps2,
12751281
unsigned NumLoads) const {
12761282
llvm_unreachable("target did not implement shouldClusterMemOps()");
12771283
}

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,41 +1471,46 @@ namespace {
14711471
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
14721472
struct MemOpInfo {
14731473
SUnit *SU;
1474-
const MachineOperand *BaseOp;
1474+
SmallVector<const MachineOperand *, 4> BaseOps;
14751475
int64_t Offset;
14761476

1477-
MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1478-
: SU(su), BaseOp(Op), Offset(ofs) {}
1479-
1480-
bool operator<(const MemOpInfo &RHS) const {
1481-
if (BaseOp->getType() != RHS.BaseOp->getType())
1482-
return BaseOp->getType() < RHS.BaseOp->getType();
1483-
1484-
if (BaseOp->isReg())
1485-
return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1486-
std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1487-
RHS.SU->NodeNum);
1488-
if (BaseOp->isFI()) {
1489-
const MachineFunction &MF =
1490-
*BaseOp->getParent()->getParent()->getParent();
1477+
MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1478+
int64_t Offset)
1479+
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {}
1480+
1481+
static bool Compare(const MachineOperand *const &A,
1482+
const MachineOperand *const &B) {
1483+
if (A->getType() != B->getType())
1484+
return A->getType() < B->getType();
1485+
if (A->isReg())
1486+
return A->getReg() < B->getReg();
1487+
if (A->isFI()) {
1488+
const MachineFunction &MF = *A->getParent()->getParent()->getParent();
14911489
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
14921490
bool StackGrowsDown = TFI.getStackGrowthDirection() ==
14931491
TargetFrameLowering::StackGrowsDown;
1494-
// Can't use tuple comparison here since we might need to use a
1495-
// different order when the stack grows down.
1496-
if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1497-
return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1498-
: BaseOp->getIndex() < RHS.BaseOp->getIndex();
1499-
1500-
if (Offset != RHS.Offset)
1501-
return Offset < RHS.Offset;
1502-
1503-
return SU->NodeNum < RHS.SU->NodeNum;
1492+
return StackGrowsDown ? A->getIndex() > B->getIndex()
1493+
: A->getIndex() < B->getIndex();
15041494
}
15051495

15061496
llvm_unreachable("MemOpClusterMutation only supports register or frame "
15071497
"index bases.");
15081498
}
1499+
1500+
bool operator<(const MemOpInfo &RHS) const {
1501+
// FIXME: Don't compare everything twice. Maybe use C++20 three way
1502+
// comparison instead when it's available.
1503+
if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1504+
RHS.BaseOps.begin(), RHS.BaseOps.end(),
1505+
Compare))
1506+
return true;
1507+
if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1508+
BaseOps.begin(), BaseOps.end(), Compare))
1509+
return false;
1510+
if (Offset != RHS.Offset)
1511+
return Offset < RHS.Offset;
1512+
return SU->NodeNum < RHS.SU->NodeNum;
1513+
}
15091514
};
15101515

15111516
const TargetInstrInfo *TII;
@@ -1560,10 +1565,14 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
15601565
ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
15611566
SmallVector<MemOpInfo, 32> MemOpRecords;
15621567
for (SUnit *SU : MemOps) {
1563-
const MachineOperand *BaseOp;
1568+
SmallVector<const MachineOperand *, 4> BaseOps;
15641569
int64_t Offset;
1565-
if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1566-
MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1570+
if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset, TRI))
1571+
MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset));
1572+
#ifndef NDEBUG
1573+
for (auto *Op : BaseOps)
1574+
assert(Op);
1575+
#endif
15671576
}
15681577
if (MemOpRecords.size() < 2)
15691578
return;
@@ -1573,8 +1582,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
15731582
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
15741583
SUnit *SUa = MemOpRecords[Idx].SU;
15751584
SUnit *SUb = MemOpRecords[Idx+1].SU;
1576-
if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1577-
*MemOpRecords[Idx + 1].BaseOp,
1585+
if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
1586+
MemOpRecords[Idx + 1].BaseOps,
15781587
ClusterLength)) {
15791588
if (SUa->NodeNum > SUb->NodeNum)
15801589
std::swap(SUa, SUb);

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,18 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
10281028
return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
10291029
}
10301030

1031+
// Default implementation of getMemOperandWithOffset.
1032+
bool TargetInstrInfo::getMemOperandWithOffset(
1033+
const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
1034+
const TargetRegisterInfo *TRI) const {
1035+
SmallVector<const MachineOperand *, 4> BaseOps;
1036+
if (!getMemOperandsWithOffset(MI, BaseOps, Offset, TRI) ||
1037+
BaseOps.size() != 1)
1038+
return false;
1039+
BaseOp = BaseOps.front();
1040+
return true;
1041+
}
1042+
10311043
//===----------------------------------------------------------------------===//
10321044
// SelectionDAG latency interface.
10331045
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1985,15 +1985,18 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
19851985
return true;
19861986
}
19871987

1988-
bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
1989-
const MachineOperand *&BaseOp,
1990-
int64_t &Offset,
1991-
const TargetRegisterInfo *TRI) const {
1988+
bool AArch64InstrInfo::getMemOperandsWithOffset(
1989+
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
1990+
int64_t &Offset, const TargetRegisterInfo *TRI) const {
19921991
if (!LdSt.mayLoadOrStore())
19931992
return false;
19941993

1994+
const MachineOperand *BaseOp;
19951995
unsigned Width;
1996-
return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1996+
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
1997+
return false;
1998+
BaseOps.push_back(BaseOp);
1999+
return true;
19972000
}
19982001

19992002
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
@@ -2370,9 +2373,12 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
23702373
/// Detect opportunities for ldp/stp formation.
23712374
///
23722375
/// Only called for LdSt for which getMemOperandWithOffset returns true.
2373-
bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
2374-
const MachineOperand &BaseOp2,
2375-
unsigned NumLoads) const {
2376+
bool AArch64InstrInfo::shouldClusterMemOps(
2377+
ArrayRef<const MachineOperand *> BaseOps1,
2378+
ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads) const {
2379+
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2380+
const MachineOperand &BaseOp1 = *BaseOps1.front();
2381+
const MachineOperand &BaseOp2 = *BaseOps2.front();
23762382
const MachineInstr &FirstLdSt = *BaseOp1.getParent();
23772383
const MachineInstr &SecondLdSt = *BaseOp2.getParent();
23782384
if (BaseOp1.getType() != BaseOp2.getType())

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
112112
/// Hint that pairing the given load or store is unprofitable.
113113
static void suppressLdStPair(MachineInstr &MI);
114114

115-
bool getMemOperandWithOffset(const MachineInstr &MI,
116-
const MachineOperand *&BaseOp,
117-
int64_t &Offset,
118-
const TargetRegisterInfo *TRI) const override;
115+
bool getMemOperandsWithOffset(
116+
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
117+
int64_t &Offset, const TargetRegisterInfo *TRI) const override;
119118

120119
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
121120
const MachineOperand *&BaseOp,
@@ -132,8 +131,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
132131
static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
133132
int64_t &MinOffset, int64_t &MaxOffset);
134133

135-
bool shouldClusterMemOps(const MachineOperand &BaseOp1,
136-
const MachineOperand &BaseOp2,
134+
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
135+
ArrayRef<const MachineOperand *> BaseOps2,
137136
unsigned NumLoads) const override;
138137

139138
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -258,14 +258,14 @@ static bool isStride64(unsigned Opc) {
258258
}
259259
}
260260

261-
bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
262-
const MachineOperand *&BaseOp,
263-
int64_t &Offset,
264-
const TargetRegisterInfo *TRI) const {
261+
bool SIInstrInfo::getMemOperandsWithOffset(
262+
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
263+
int64_t &Offset, const TargetRegisterInfo *TRI) const {
265264
if (!LdSt.mayLoadOrStore())
266265
return false;
267266

268267
unsigned Opc = LdSt.getOpcode();
268+
const MachineOperand *BaseOp;
269269

270270
if (isDS(LdSt)) {
271271
const MachineOperand *OffsetImm =
@@ -278,6 +278,7 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
278278
if (!BaseOp || !BaseOp->isReg())
279279
return false;
280280

281+
BaseOps.push_back(BaseOp);
281282
Offset = OffsetImm->getImm();
282283

283284
return true;
@@ -314,6 +315,7 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
314315
if (!BaseOp->isReg())
315316
return false;
316317

318+
BaseOps.push_back(BaseOp);
317319
Offset = EltSize * Offset0;
318320

319321
return true;
@@ -339,7 +341,7 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
339341

340342
const MachineOperand *OffsetImm =
341343
getNamedOperand(LdSt, AMDGPU::OpName::offset);
342-
BaseOp = SOffset;
344+
BaseOps.push_back(SOffset);
343345
Offset = OffsetImm->getImm();
344346
return true;
345347
}
@@ -358,6 +360,7 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
358360
if (!BaseOp->isReg())
359361
return false;
360362

363+
BaseOps.push_back(BaseOp);
361364
return true;
362365
}
363366

@@ -373,13 +376,15 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
373376
if (!BaseOp->isReg())
374377
return false;
375378

379+
BaseOps.push_back(BaseOp);
376380
return true;
377381
}
378382

379383
if (isFLAT(LdSt)) {
380384
const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
381385
if (VAddr) {
382386
// Can't analyze 2 offsets.
387+
// FIXME remove this restriction!
383388
if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
384389
return false;
385390

@@ -392,6 +397,7 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
392397
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
393398
if (!BaseOp->isReg())
394399
return false;
400+
BaseOps.push_back(BaseOp);
395401
return true;
396402
}
397403

@@ -433,9 +439,12 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
433439
return Base1 == Base2;
434440
}
435441

436-
bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
437-
const MachineOperand &BaseOp2,
442+
bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
443+
ArrayRef<const MachineOperand *> BaseOps2,
438444
unsigned NumLoads) const {
445+
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
446+
const MachineOperand &BaseOp1 = *BaseOps1.front();
447+
const MachineOperand &BaseOp2 = *BaseOps2.front();
439448
const MachineInstr &FirstLdSt = *BaseOp1.getParent();
440449
const MachineInstr &SecondLdSt = *BaseOp2.getParent();
441450

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
181181
int64_t &Offset1,
182182
int64_t &Offset2) const override;
183183

184-
bool getMemOperandWithOffset(const MachineInstr &LdSt,
185-
const MachineOperand *&BaseOp,
186-
int64_t &Offset,
187-
const TargetRegisterInfo *TRI) const final;
184+
bool
185+
getMemOperandsWithOffset(const MachineInstr &LdSt,
186+
SmallVectorImpl<const MachineOperand *> &BaseOps,
187+
int64_t &Offset,
188+
const TargetRegisterInfo *TRI) const final;
188189

189-
bool shouldClusterMemOps(const MachineOperand &BaseOp1,
190-
const MachineOperand &BaseOp2,
190+
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
191+
ArrayRef<const MachineOperand *> BaseOps2,
191192
unsigned NumLoads) const override;
192193

193194
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,

llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,12 +2946,15 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
29462946
}
29472947

29482948
/// Get the base register and byte offset of a load/store instr.
2949-
bool HexagonInstrInfo::getMemOperandWithOffset(
2950-
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
2951-
const TargetRegisterInfo *TRI) const {
2949+
bool HexagonInstrInfo::getMemOperandsWithOffset(
2950+
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2951+
int64_t &Offset, const TargetRegisterInfo *TRI) const {
29522952
unsigned AccessSize = 0;
2953-
BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
2954-
return BaseOp != nullptr && BaseOp->isReg();
2953+
const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
2954+
if (!BaseOp || !BaseOp->isReg())
2955+
return false;
2956+
BaseOps.push_back(BaseOp);
2957+
return true;
29552958
}
29562959

29572960
/// Can these instructions execute at the same time in a bundle.

llvm/lib/Target/Hexagon/HexagonInstrInfo.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,11 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
204204
bool expandPostRAPseudo(MachineInstr &MI) const override;
205205

206206
/// Get the base register and byte offset of a load/store instr.
207-
bool getMemOperandWithOffset(const MachineInstr &LdSt,
208-
const MachineOperand *&BaseOp,
209-
int64_t &Offset,
210-
const TargetRegisterInfo *TRI) const override;
207+
bool
208+
getMemOperandsWithOffset(const MachineInstr &LdSt,
209+
SmallVectorImpl<const MachineOperand *> &BaseOps,
210+
int64_t &Offset,
211+
const TargetRegisterInfo *TRI) const override;
211212

212213
/// Reverses the branch condition of the specified condition list,
213214
/// returning false on success and true if it cannot be reversed.

llvm/lib/Target/Lanai/LanaiInstrInfo.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -795,10 +795,9 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
795795
return true;
796796
}
797797

798-
bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
799-
const MachineOperand *&BaseOp,
800-
int64_t &Offset,
801-
const TargetRegisterInfo *TRI) const {
798+
bool LanaiInstrInfo::getMemOperandsWithOffset(
799+
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
800+
int64_t &Offset, const TargetRegisterInfo *TRI) const {
802801
switch (LdSt.getOpcode()) {
803802
default:
804803
return false;
@@ -811,7 +810,11 @@ bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
811810
case Lanai::STH_RI:
812811
case Lanai::LDBs_RI:
813812
case Lanai::LDBz_RI:
813+
const MachineOperand *BaseOp;
814814
unsigned Width;
815-
return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
815+
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
816+
return false;
817+
BaseOps.push_back(BaseOp);
818+
return true;
816819
}
817820
}

0 commit comments

Comments
 (0)