diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index f253a841f16a6..e395cc883a4ad 100644 --- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -81,11 +81,11 @@ static void printLivenessInfo(raw_ostream &OS, const auto &MRI = BB->getParent()->getRegInfo(); const auto LiveIns = getLiveRegsBefore(*Begin, *LIS); - OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns)); + OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns, Begin->getMF())); const auto BottomMI = End == BB->end() ? std::prev(End) : End; const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS); - OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts)); + OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts, Begin->getMF())); } LLVM_DUMP_METHOD @@ -238,11 +238,8 @@ class SchedStrategyStub : public MachineSchedStrategy { GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, StrategyKind S) - : BaseClass(C, std::make_unique()) - , Context(C) - , Strategy(S) - , UPTracker(*LIS) { -} + : BaseClass(C, std::make_unique()), Context(C), + Strategy(S), UPTracker(*LIS, C->MF) {} // returns max pressure for a region GCNRegPressure @@ -281,7 +278,7 @@ template GCNRegPressure GCNIterativeScheduler::getSchedulePressure(const Region &R, Range &&Schedule) const { auto const BBEnd = R.Begin->getParent()->end(); - GCNUpwardRPTracker RPTracker(*LIS); + GCNUpwardRPTracker RPTracker(*LIS, &MF); if (R.End != BBEnd) { // R.End points to the boundary instruction but the // schedule doesn't include it @@ -447,11 +444,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { // BestSchedules aren't deleted on fail. unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { // TODO: assert Regions are sorted descending by pressure - const auto &ST = MF.getSubtarget(); - const unsigned DynamicVGPRBlockSize = - MF.getInfo()->getDynamicVGPRBlockSize(); - const auto Occ = - Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); + const auto Occ = Regions.front()->MaxPressure.getOccupancy(MF); LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc << ", current = " << Occ << '\n'); @@ -460,7 +453,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { // Always build the DAG to add mutations BuildDAG DAG(*R, *this); - if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc) + if (R->MaxPressure.getOccupancy(MF) >= NewOcc) continue; LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3); @@ -471,7 +464,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n"; printSchedRP(dbgs(), R->MaxPressure, MaxRP)); - NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize)); + NewOcc = std::min(NewOcc, MaxRP.getOccupancy(MF)); if (NewOcc <= Occ) break; @@ -488,15 +481,12 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { } void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( - bool TryMaximizeOccupancy) { - const auto &ST = MF.getSubtarget(); + bool TryMaximizeOccupancy) { SIMachineFunctionInfo *MFI = MF.getInfo(); auto TgtOcc = MFI->getMinAllowedOccupancy(); - unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize(); sortRegionsByPressure(TgtOcc); - auto Occ = - Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); + auto Occ = Regions.front()->MaxPressure.getOccupancy(MF); bool IsReentry = false; if (TryMaximizeOccupancy && Occ < TgtOcc) { @@ -527,21 +517,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( const auto RP = getRegionPressure(*R); LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); - if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) { + if (RP.getOccupancy(MF) < TgtOcc) { LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); - if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy( - ST, DynamicVGPRBlockSize) >= TgtOcc) { + if (R->BestSchedule.get() && + R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) { LLVM_DEBUG(dbgs() << ", scheduling minimal register\n"); scheduleBest(*R); } else { LLVM_DEBUG(dbgs() << ", restoring\n"); Ovr.restoreOrder(); - assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= - TgtOcc); + assert(R->MaxPressure.getOccupancy(MF) >= TgtOcc); } } - FinalOccupancy = - std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize)); + FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF)); } } MFI->limitOccupancy(FinalOccupancy); @@ -582,16 +570,12 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) { /////////////////////////////////////////////////////////////////////////////// // ILP scheduler port -void GCNIterativeScheduler::scheduleILP( - bool TryMaximizeOccupancy) { - const auto &ST = MF.getSubtarget(); +void GCNIterativeScheduler::scheduleILP(bool TryMaximizeOccupancy) { SIMachineFunctionInfo *MFI = MF.getInfo(); auto TgtOcc = MFI->getMinAllowedOccupancy(); - unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize(); sortRegionsByPressure(TgtOcc); - auto Occ = - Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); + auto Occ = Regions.front()->MaxPressure.getOccupancy(MF); bool IsReentry = false; if (TryMaximizeOccupancy && Occ < TgtOcc) { @@ -612,18 +596,17 @@ void GCNIterativeScheduler::scheduleILP( const auto RP = getSchedulePressure(*R, ILPSchedule); LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); - if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) { + if (RP.getOccupancy(MF) < TgtOcc) { LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); - if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy( - ST, DynamicVGPRBlockSize) >= TgtOcc) { + if (R->BestSchedule.get() && + R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) { LLVM_DEBUG(dbgs() << ", scheduling minimal register\n"); scheduleBest(*R); } } else { scheduleRegion(*R, ILPSchedule, RP); LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); - FinalOccupancy = - std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize)); + FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF)); } } MFI->limitOccupancy(FinalOccupancy); diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h index f731b1fc7e0df..b5980ef1cfdb9 100644 --- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h +++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h @@ -94,9 +94,8 @@ class GCNIterativeScheduler : public ScheduleDAGMILive { } void swapIGLPMutations(const Region &R, bool IsReentry); - void setBestSchedule(Region &R, - ScheduleRef Schedule, - const GCNRegPressure &MaxRP = GCNRegPressure()); + void setBestSchedule(Region &R, ScheduleRef Schedule, + const GCNRegPressure &MaxRP); void scheduleBest(Region &R); @@ -105,8 +104,7 @@ class GCNIterativeScheduler : public ScheduleDAGMILive { void sortRegionsByPressure(unsigned TargetOcc); template - void scheduleRegion(Region &R, Range &&Schedule, - const GCNRegPressure &MaxRP = GCNRegPressure()); + void scheduleRegion(Region &R, Range &&Schedule, const GCNRegPressure &MaxRP); unsigned tryMaximizeOccupancy(unsigned TargetOcc = std::numeric_limits::max()); diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 334afd3a2a5b4..581935b7fef89 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -135,8 +135,6 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O, unsigned OtherVGPRForSGPRSpills = (OtherExcessSGPR + (WaveSize - 1)) / WaveSize; - unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs(); - // Unified excess pressure conditions, accounting for VGPRs used for SGPR // spills unsigned ExcessVGPR = @@ -149,22 +147,26 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O, 0); // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR // spills - unsigned ExcessArchVGPR = std::max( - static_cast(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs), - 0); + unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs(); + unsigned ExcessArchVGPR = + std::max(static_cast(getVGPRNum(false) + VGPRForSGPRSpills - + AddressableArchVGPRs), + 0); unsigned OtherExcessArchVGPR = std::max(static_cast(O.getVGPRNum(false) + OtherVGPRForSGPRSpills - - MaxArchVGPRs), + AddressableArchVGPRs), 0); // AGPR excess pressure conditions - unsigned ExcessAGPR = std::max( - static_cast(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs) - : (getAGPRNum() - MaxVGPRs)), - 0); - unsigned OtherExcessAGPR = std::max( - static_cast(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs) - : (O.getAGPRNum() - MaxVGPRs)), - 0); + unsigned ExcessAGPR = + std::max(static_cast(ST.hasGFX90AInsts() + ? (getAGPRNum() - AddressableArchVGPRs) + : (getAGPRNum() - MaxVGPRs)), + 0); + unsigned OtherExcessAGPR = + std::max(static_cast(ST.hasGFX90AInsts() + ? (O.getAGPRNum() - AddressableArchVGPRs) + : (O.getAGPRNum() - MaxVGPRs)), + 0); bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR; bool OtherExcessRP = OtherExcessSGPR || OtherExcessVGPR || @@ -186,12 +188,14 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O, unsigned PureExcessVGPR = std::max(static_cast(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs), 0) + - std::max(static_cast(getVGPRNum(false) - MaxArchVGPRs), 0); + std::max(static_cast(getVGPRNum(false) - AddressableArchVGPRs), + 0); unsigned OtherPureExcessVGPR = std::max( static_cast(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs), 0) + - std::max(static_cast(O.getVGPRNum(false) - MaxArchVGPRs), 0); + std::max(static_cast(O.getVGPRNum(false) - AddressableArchVGPRs), + 0); // If we have a special case where there is a tie in excess VGPR, but one // of the pressures has VGPR usage from SGPR spills, prefer the pressure @@ -229,14 +233,15 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O, } // Give final precedence to lower general RP. - return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()): - (getVGPRNum(ST.hasGFX90AInsts()) < + return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()) + : (getVGPRNum(ST.hasGFX90AInsts()) < O.getVGPRNum(ST.hasGFX90AInsts())); } Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST, - unsigned DynamicVGPRBlockSize) { - return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) { + unsigned DynamicVGPRBlockSize, + const MachineFunction *MF) { + return Printable([&RP, ST, DynamicVGPRBlockSize, MF](raw_ostream &OS) { OS << "VGPRs: " << RP.getArchVGPRNum() << ' ' << "AGPRs: " << RP.getAGPRNum(); if (ST) @@ -250,7 +255,7 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST, OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight() << ", LSGPR WT: " << RP.getSGPRTuplesWeight(); if (ST) - OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize); + OS << " -> Occ: " << RP.getOccupancy(*MF); OS << '\n'; }); } @@ -473,8 +478,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, void GCNRPTracker::reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy, bool After) { - const MachineFunction &MF = *MI.getMF(); - MRI = &MF.getRegInfo(); + MRI = &MF->getRegInfo(); if (LiveRegsCopy) { if (&LiveRegs != LiveRegsCopy) LiveRegs = *LiveRegsCopy; @@ -483,7 +487,7 @@ void GCNRPTracker::reset(const MachineInstr &MI, : getLiveRegsBefore(MI, LIS); } - MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); + MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs, MF); } void GCNRPTracker::reset(const MachineRegisterInfo &MRI_, @@ -491,7 +495,7 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRI_, MRI = &MRI_; LiveRegs = LiveRegs_; LastTrackedMI = nullptr; - MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_); + MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_, MF); } /// Mostly copy/paste from CodeGen/RegisterPressure.cpp @@ -517,7 +521,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { return; // Kill all defs. - GCNRegPressure DefPressure, ECDefPressure; + GCNRegPressure DefPressure(MF), ECDefPressure(MF); bool HasECDefs = false; for (const MachineOperand &MO : MI.all_defs()) { if (!MO.getReg().isVirtual()) @@ -565,7 +569,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure) : max(CurPressure, MaxPressure); - assert(CurPressure == getRegPressure(*MRI, LiveRegs)); + assert(CurPressure == getRegPressure(*MRI, LiveRegs, MF)); } //////////////////////////////////////////////////////////////////////////////// @@ -800,7 +804,7 @@ bool GCNUpwardRPTracker::isValid() const { return false; } - auto LISPressure = getRegPressure(*MRI, LISLR); + auto LISPressure = getRegPressure(*MRI, LISLR, MF); if (LISPressure != CurPressure) { dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: " << print(CurPressure) << "LIS rpt: " << print(LISPressure); @@ -876,8 +880,8 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { OS << "---\nname: " << MF.getName() << "\nbody: |\n"; - auto printRP = [](const GCNRegPressure &RP) { - return Printable([&RP](raw_ostream &OS) { + auto printRP = [&MF](const GCNRegPressure &RP) { + return Printable([&RP, &MF](raw_ostream &OS) { OS << format(PFX " %-5d", RP.getSGPRNum()) << format(" %-5d", RP.getVGPRNum(false)); }); @@ -906,14 +910,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB); GCNRPTracker::LiveRegSet LiveIn, LiveOut; - GCNRegPressure RPAtMBBEnd; + GCNRegPressure RPAtMBBEnd(&MF); if (UseDownwardTracker) { if (MBB.empty()) { LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI); - RPAtMBBEnd = getRegPressure(MRI, LiveIn); + RPAtMBBEnd = getRegPressure(MRI, LiveIn, &MF); } else { - GCNDownwardRPTracker RPT(LIS); + GCNDownwardRPTracker RPT(LIS, &MF); RPT.reset(MBB.front()); LiveIn = RPT.getLiveRegs(); @@ -928,7 +932,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { RPAtMBBEnd = RPT.getPressure(); } } else { - GCNUpwardRPTracker RPT(LIS); + GCNUpwardRPTracker RPT(LIS, &MF); RPT.reset(MRI, MBBEndSlot); LiveOut = RPT.getLiveRegs(); @@ -977,7 +981,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { } } OS << PFX " Live-thr:" << llvm::print(LiveThrough, MRI); - OS << printRP(getRegPressure(MRI, LiveThrough)) << '\n'; + OS << printRP(getRegPressure(MRI, LiveThrough, &MF)) << '\n'; } OS << "...\n"; return false; diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index ea33a229110c1..da564ce645039 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -18,6 +18,7 @@ #define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H #include "GCNSubtarget.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/RegisterPressure.h" #include @@ -35,6 +36,16 @@ struct GCNRegPressure { clear(); } + GCNRegPressure(unsigned ArchVGPRThreshold) : ArchVGPRThreshold(ArchVGPRThreshold) { + clear(); + } + + GCNRegPressure(const MachineFunction *MF) { + const GCNSubtarget &ST = MF->getSubtarget(); + ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF->getFunction()).first; + clear(); + } + bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR]; } @@ -43,48 +54,91 @@ struct GCNRegPressure { /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } - /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure - /// dependent upon \p UnifiedVGPRFile unsigned getVGPRNum(bool UnifiedVGPRFile) const { if (UnifiedVGPRFile) { - return Value[AGPR] - ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR]) - : Value[VGPR] + Value[AVGPR]; + return Value[AGPR] || Value[AVGPR] + ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR], + ArchVGPRThreshold) + : Value[VGPR]; } // AVGPR assignment priority is based on the width of the register. Account // AVGPR pressure as VGPR. return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]); } + inline static unsigned getAVGPRsAsVGPRsNum(unsigned NumArchVGPRs, + unsigned NumAVGPRs, + unsigned ArchVGPRThreshold) { + + return NumArchVGPRs < ArchVGPRThreshold + ? std::min((ArchVGPRThreshold - NumArchVGPRs), NumAVGPRs) + : 0; + } + + inline static unsigned getAVGPRsAsAGPRsNum(unsigned NumArchVGPRs, + unsigned NumAGPRs, + unsigned NumAVGPRs, + unsigned ArchVGPRThreshold) { + unsigned AVGPRsAsVGPRs = + getAVGPRsAsVGPRsNum(NumArchVGPRs, NumAVGPRs, ArchVGPRThreshold); + return NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0; + } + /// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs /// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified /// VGPR file. inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs, unsigned NumAGPRs, - unsigned NumAVGPRs) { - - // Assume AVGPRs will be assigned as VGPRs. - return alignTo(NumArchVGPRs + NumAVGPRs, + unsigned NumAVGPRs, + unsigned ArchVGPRThreshold) { + + // Until we hit the VGPRThreshold, we will assign AV as VGPR. After that + // point, we will assign as AGPR. + unsigned AVGPRsAsVGPRs = + getAVGPRsAsVGPRsNum(NumArchVGPRs, NumAVGPRs, ArchVGPRThreshold); + unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum(NumArchVGPRs, NumAGPRs, + NumAVGPRs, ArchVGPRThreshold); + return alignTo(NumArchVGPRs + AVGPRsAsVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) + - NumAGPRs; + NumAGPRs + AVGPRsAsAGPRs; } /// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be /// allocated as VGPR - unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; } + unsigned getArchVGPRNum() const { + unsigned AVGPRsAsVGPRs = + getAVGPRsAsVGPRsNum(Value[VGPR], Value[AVGPR], ArchVGPRThreshold); + + return Value[VGPR] + AVGPRsAsVGPRs; + } /// \returns the AccVGPR32 pressure - unsigned getAGPRNum() const { return Value[AGPR]; } + unsigned getAGPRNum() const { + unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum( + Value[VGPR], Value[AGPR], Value[AVGPR], ArchVGPRThreshold); + + return Value[AGPR] + AVGPRsAsAGPRs; + } /// \returns the AVGPR32 pressure unsigned getAVGPRNum() const { return Value[AVGPR]; } unsigned getVGPRTuplesWeight() const { - return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR], - Value[TOTAL_KINDS + AGPR]); + unsigned AVGPRsAsVGPRs = + getAVGPRsAsVGPRsNum(Value[TOTAL_KINDS + VGPR], + Value[TOTAL_KINDS + AVGPR], ArchVGPRThreshold); + unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum( + Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR], + Value[TOTAL_KINDS + AVGPR], ArchVGPRThreshold); + + return std::max(Value[TOTAL_KINDS + VGPR] + AVGPRsAsVGPRs, + Value[TOTAL_KINDS + AGPR] + AVGPRsAsAGPRs); } unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; } - unsigned getOccupancy(const GCNSubtarget &ST, - unsigned DynamicVGPRBlockSize) const { + unsigned getOccupancy(const MachineFunction &MF) const { + const GCNSubtarget &ST = MF.getSubtarget(); + unsigned DynamicVGPRBlockSize = + MF.getInfo()->getDynamicVGPRBlockSize(); + return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()), ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()), DynamicVGPRBlockSize)); @@ -95,10 +149,9 @@ struct GCNRegPressure { LaneBitmask NewMask, const MachineRegisterInfo &MRI); - bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O, - unsigned DynamicVGPRBlockSize) const { - return getOccupancy(ST, DynamicVGPRBlockSize) > - O.getOccupancy(ST, DynamicVGPRBlockSize); + bool higherOccupancy(const GCNRegPressure &O, + const MachineFunction &MF) const { + return getOccupancy(MF) > O.getOccupancy(MF); } /// Compares \p this GCNRegpressure to \p O, returning true if \p this is @@ -136,6 +189,10 @@ struct GCNRegPressure { return *this; } + void setArchVGPRThreshold(unsigned VGPRThreshold) { + ArchVGPRThreshold = VGPRThreshold; + } + void dump() const; private: @@ -145,18 +202,20 @@ struct GCNRegPressure { /// all tuple register kinds). unsigned Value[ValueArraySize]; + unsigned ArchVGPRThreshold = std::numeric_limits::max(); + static unsigned getRegKind(const TargetRegisterClass *RC, const SIRegisterInfo *STI); friend GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2); - friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST, + friend Printable print(const GCNRegPressure &RP, unsigned DynamicVGPRBlockSize); }; inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) { - GCNRegPressure Res; + GCNRegPressure Res(P1.ArchVGPRThreshold); for (unsigned I = 0; I < GCNRegPressure::ValueArraySize; ++I) Res.Value[I] = std::max(P1.Value[I], P2.Value[I]); return Res; @@ -238,7 +297,6 @@ class GCNRPTarget { private: /// Current register pressure. GCNRegPressure RP; - /// Target number of SGPRs. unsigned MaxSGPRs; /// Target number of ArchVGPRs and AGPRs. @@ -280,12 +338,14 @@ class GCNRPTracker { protected: const LiveIntervals &LIS; + const MachineFunction *MF; LiveRegSet LiveRegs; GCNRegPressure CurPressure, MaxPressure; const MachineInstr *LastTrackedMI = nullptr; mutable const MachineRegisterInfo *MRI = nullptr; - GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} + GCNRPTracker(const LiveIntervals &LIS_, const MachineFunction *MF) + : LIS(LIS_), MF(MF), CurPressure(MF), MaxPressure(MF) {} void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy, bool After); @@ -319,7 +379,8 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, class GCNUpwardRPTracker : public GCNRPTracker { public: - GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} + GCNUpwardRPTracker(const LiveIntervals &LIS_, const MachineFunction *MF) + : GCNRPTracker(LIS_, MF) {} using GCNRPTracker::reset; @@ -369,7 +430,8 @@ class GCNDownwardRPTracker : public GCNRPTracker { MachineBasicBlock::const_iterator MBBEnd; public: - GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} + GCNDownwardRPTracker(const LiveIntervals &LIS_, const MachineFunction *MF) + : GCNRPTracker(LIS_, MF) {} using GCNRPTracker::reset; @@ -505,9 +567,9 @@ inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, } template -GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, - Range &&LiveRegs) { - GCNRegPressure Res; +GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs, + const MachineFunction *MF) { + GCNRegPressure Res(MF); for (const auto &RM : LiveRegs) Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI); return Res; @@ -517,7 +579,8 @@ bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2); Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr, - unsigned DynamicVGPRBlockSize = 0); + unsigned DynamicVGPRBlockSize = 0, + const MachineFunction *MF = nullptr); Printable print(const GCNRPTracker::LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI); diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index ce1ce687d0038..91ae042a23ef6 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -72,8 +72,8 @@ const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), - DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) { -} + DownwardTracker(*C->LIS, C->MF), UpwardTracker(*C->LIS, C->MF), + HasHighPressure(false) {} void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { GenericScheduler::initialize(DAG); @@ -181,7 +181,7 @@ static void getRegisterPressures( // GCNTrackers Pressure.resize(4, 0); MachineInstr *MI = SU->getInstr(); - GCNRegPressure NewPressure; + GCNRegPressure NewPressure(MI->getMF()); if (AtTop) { GCNDownwardRPTracker TempDownwardTracker(DownwardTracker); NewPressure = TempDownwardTracker.bumpDownwardPressure(MI, SRI); @@ -190,6 +190,7 @@ static void getRegisterPressures( TempUpwardTracker.recede(*MI); NewPressure = TempUpwardTracker.getPressure(); } + Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum(); Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = NewPressure.getArchVGPRNum(); @@ -802,7 +803,7 @@ void GCNScheduleDAGMILive::schedule() { GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { - GCNDownwardRPTracker RPTracker(*LIS); + GCNDownwardRPTracker RPTracker(*LIS, &MF); RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second, &LiveIns[RegionIdx]); return RPTracker.moveMaxPressure(); @@ -818,7 +819,7 @@ static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB) { - GCNDownwardRPTracker RPTracker(*LIS); + GCNDownwardRPTracker RPTracker(*LIS, &MF); // If the block has the only successor then live-ins of that successor are // live-outs of the current block. We can reuse calculated live set if the @@ -1026,7 +1027,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : DAG(DAG), S(static_cast(*DAG.SchedImpl)), MF(DAG.MF), - MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {} + MFI(DAG.MFI), ST(DAG.ST), StageID(StageID), PressureBefore(&DAG.MF), + PressureAfter(&DAG.MF) {} bool GCNSchedStage::initGCNSchedStage() { if (!DAG.LIS) @@ -1140,8 +1142,7 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() { if (DAG.MinOccupancy > InitialOccupancy) { for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX) DAG.RegionsWithMinOcc[IDX] = - DAG.Pressure[IDX].getOccupancy( - DAG.ST, DAG.MFI.getDynamicVGPRBlockSize()) == DAG.MinOccupancy; + DAG.Pressure[IDX].getOccupancy(DAG.MF) == DAG.MinOccupancy; LLVM_DEBUG(dbgs() << StageID << " stage successfully increased occupancy to " @@ -1189,12 +1190,14 @@ bool GCNSchedStage::initGCNRegion() { PressureBefore = DAG.Pressure[RegionIdx]; - LLVM_DEBUG( - dbgs() << "Pressure before scheduling:\nRegion live-ins:" - << print(DAG.LiveIns[RegionIdx], DAG.MRI) - << "Region live-in pressure: " - << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx])) - << "Region register pressure: " << print(PressureBefore)); + LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:" + << print(DAG.LiveIns[RegionIdx], DAG.MRI) + << "Region live-in pressure: " + << print(llvm::getRegPressure( + DAG.MRI, DAG.LiveIns[RegionIdx], &DAG.MF), + &ST, 0, &MF) + << "Region register pressure: " + << print(PressureBefore, &ST, 0, &MF)); S.HasHighPressure = false; S.KnownExcessRP = isRegionWithExcessRP(); @@ -1275,17 +1278,16 @@ void GCNSchedStage::checkScheduling() { // Check the results of scheduling. PressureAfter = DAG.getRealRegPressure(RegionIdx); - LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter)); + LLVM_DEBUG(dbgs() << "Pressure after scheduling: " + << print(PressureAfter, &ST, 0, &MF)); LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n"); - unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize(); - + unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first; if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { DAG.Pressure[RegionIdx] = PressureAfter; DAG.RegionsWithMinOcc[RegionIdx] = - PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) == - DAG.MinOccupancy; + PressureAfter.getOccupancy(DAG.MF) == DAG.MinOccupancy; // Early out if we have achieved the occupancy target. LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); @@ -1294,10 +1296,10 @@ void GCNSchedStage::checkScheduling() { unsigned TargetOccupancy = std::min( S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second); - unsigned WavesAfter = std::min( - TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize)); - unsigned WavesBefore = std::min( - TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize)); + unsigned WavesAfter = + std::min(TargetOccupancy, PressureAfter.getOccupancy(DAG.MF)); + unsigned WavesBefore = + std::min(TargetOccupancy, PressureBefore.getOccupancy(DAG.MF)); LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"); @@ -1346,8 +1348,7 @@ void GCNSchedStage::checkScheduling() { } else { DAG.Pressure[RegionIdx] = PressureAfter; DAG.RegionsWithMinOcc[RegionIdx] = - PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) == - DAG.MinOccupancy; + PressureAfter.getOccupancy(DAG.MF) == DAG.MinOccupancy; } } @@ -1500,8 +1501,7 @@ bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) { // If RP is not reduced in the unclustered reschedule stage, revert to the // old schedule. - if ((WavesAfter <= - PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) && + if ((WavesAfter <= PressureBefore.getOccupancy(DAG.MF) && mayCauseSpilling(WavesAfter)) || GCNSchedStage::shouldRevertScheduling(WavesAfter)) { LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); @@ -1523,9 +1523,8 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) { ScheduleMetrics MAfter = getScheduleMetrics(DAG); unsigned OldMetric = MBefore.getMetric(); unsigned NewMetric = MAfter.getMetric(); - unsigned WavesBefore = std::min( - S.getTargetOccupancy(), - PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize())); + unsigned WavesBefore = + std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(DAG.MF)); unsigned Profit = ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore * ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) / @@ -1579,8 +1578,7 @@ bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { void GCNSchedStage::revertScheduling() { DAG.RegionsWithMinOcc[RegionIdx] = - PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) == - DAG.MinOccupancy; + PressureBefore.getOccupancy(DAG.MF) == DAG.MinOccupancy; LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); DAG.RegionEnd = DAG.RegionBegin; int SkippedDebugInstr = 0; @@ -2000,16 +1998,16 @@ void PreRARematStage::rematerialize() { if (!RecomputeRP.contains(I)) continue; - GCNRegPressure RP; + GCNRegPressure RP(&DAG.MF); if (IsEmptyRegion) { - RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]); + RP = getRegPressure(DAG.MRI, DAG.LiveIns[I], &DAG.MF); } else { - GCNDownwardRPTracker RPT(*DAG.LIS); + GCNDownwardRPTracker RPT(*DAG.LIS, &DAG.MF); auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first, DAG.Regions[I].second); if (NonDbgMI == DAG.Regions[I].second) { // Region is non-empty but contains only debug instructions. - RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]); + RP = getRegPressure(DAG.MRI, DAG.LiveIns[I], &DAG.MF); } else { RPT.reset(*NonDbgMI, &DAG.LiveIns[I]); RPT.advance(DAG.Regions[I].second); @@ -2017,9 +2015,7 @@ void PreRARematStage::rematerialize() { } } DAG.Pressure[I] = RP; - AchievedOcc = std::min( - AchievedOcc, RP.getOccupancy(ST, MF.getInfo() - ->getDynamicVGPRBlockSize())); + AchievedOcc = std::min(AchievedOcc, RP.getOccupancy(DAG.MF)); } REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n"); } diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index 6b13b06590102..ac2056cad647e 100644 --- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -197,9 +197,7 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI, // pointer becomes dead and could otherwise be reused for destination. RPT.advanceToNext(); GCNRegPressure MaxPressure = RPT.moveMaxPressure(); - unsigned Occupancy = MaxPressure.getOccupancy( - *ST, - MI.getMF()->getInfo()->getDynamicVGPRBlockSize()); + unsigned Occupancy = MaxPressure.getOccupancy(*MI.getMF()); // Don't push over half the register budget. We don't want to introduce // spilling just to form a soft clause. @@ -277,7 +275,7 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) { "amdgpu-max-memory-clause", MaxClause); for (MachineBasicBlock &MBB : MF) { - GCNDownwardRPTracker RPT(*LIS); + GCNDownwardRPTracker RPT(*LIS, &MF); MachineBasicBlock::instr_iterator Next; for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) { MachineInstr &MI = *I; diff --git a/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir b/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir new file mode 100644 index 0000000000000..a5183ce0d2661 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir @@ -0,0 +1,481 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler --debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s +# REQUIRES: asserts + +--- | + define void @avgpr_rp_occ1() #0 { + entry: + unreachable + } + + define void @avgpr_rp_occ2() #1 { + entry: + unreachable + } + + define void @avgpr_rp_occ3() #2 { + entry: + unreachable + } + + define void @avgpr_rp_occ4() #3 { + entry: + unreachable + } + + define void @avgpr_rp_occ5() #4 { + entry: + unreachable + } + + define void @avgpr_rp_occ6() #5 { + entry: + unreachable + } + + define void @avgpr_rp_occ7() #6 { + entry: + unreachable + } + + define void @avgpr_rp_occ8() #7 { + entry: + unreachable + } + + + define void @vgpr_rp_occ1() #0 { + entry: + unreachable + } + + define void @vgpr_rp_occ2() #1 { + entry: + unreachable + } + + define void @vgpr_rp_occ3() #2 { + entry: + unreachable + } + + attributes #0 = {"amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} + attributes #1 = {"amdgpu-waves-per-eu"="2,2" "amdgpu-flat-work-group-size"="64,64"} + attributes #2 = {"amdgpu-waves-per-eu"="3,3" "amdgpu-flat-work-group-size"="64,64"} + attributes #3 = {"amdgpu-waves-per-eu"="4,4" "amdgpu-flat-work-group-size"="64,64"} + attributes #4 = {"amdgpu-waves-per-eu"="5,5" "amdgpu-flat-work-group-size"="64,64"} + attributes #5 = {"amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="64,64"} + attributes #6 = {"amdgpu-waves-per-eu"="7,7" "amdgpu-flat-work-group-size"="64,64"} + attributes #7 = {"amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64"} + + +... + +# CHECK: avgpr_rp_occ1:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 256 AGPRs: 192(O1), SGPRs: 0(O10), LVGPR WT: 256, LSGPR WT: 0 -> Occ: 1 + +--- +name: avgpr_rp_occ1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:av_1024 = IMPLICIT_DEF + %9:av_1024 = IMPLICIT_DEF + %10:av_1024 = IMPLICIT_DEF + %11:av_1024 = IMPLICIT_DEF + %12:av_1024 = IMPLICIT_DEF + %13:av_1024 = IMPLICIT_DEF + %14:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7 + + bb.1: + KILL %8, %9, %10, %11, %12, %13, %14 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ2:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 128 AGPRs: 64(O2), SGPRs: 0(O10), LVGPR WT: 128, LSGPR WT: 0 -> Occ: 2 + +--- +name: avgpr_rp_occ2 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + %5:av_1024 = IMPLICIT_DEF + %6:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2, %3 + + bb.1: + KILL %4, %5, %6 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ3:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 84 AGPRs: 44(O4), SGPRs: 0(O10), LVGPR WT: 84, LSGPR WT: 0 -> Occ: 4 + +--- +name: avgpr_rp_occ3 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:av_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ4:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 64 AGPRs: 64(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4 + +--- +name: avgpr_rp_occ4 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:av_1024 = IMPLICIT_DEF + %2:av_1024 = IMPLICIT_DEF + %3:av_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ5:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 48 AGPRs: 80(O4), SGPRs: 0(O10), LVGPR WT: 80, LSGPR WT: 0 -> Occ: 4 + +--- +name: avgpr_rp_occ5 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:av_1024 = IMPLICIT_DEF + %2:av_1024 = IMPLICIT_DEF + %3:av_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ6:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 40 AGPRs: 88(O4), SGPRs: 0(O10), LVGPR WT: 88, LSGPR WT: 0 -> Occ: 4 + +--- +name: avgpr_rp_occ6 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:av_1024 = IMPLICIT_DEF + %2:av_1024 = IMPLICIT_DEF + %3:av_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ7:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 36 AGPRs: 92(O4), SGPRs: 0(O10), LVGPR WT: 92, LSGPR WT: 0 -> Occ: 4 + +--- +name: avgpr_rp_occ7 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:av_1024 = IMPLICIT_DEF + %2:av_1024 = IMPLICIT_DEF + %3:av_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... + +# CHECK: avgpr_rp_occ8:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 32 AGPRs: 96(O4), SGPRs: 0(O10), LVGPR WT: 96, LSGPR WT: 0 -> Occ: 4 + +--- +name: avgpr_rp_occ8 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:av_1024 = IMPLICIT_DEF + %2:av_1024 = IMPLICIT_DEF + %3:av_1024 = IMPLICIT_DEF + %4:av_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... + +# CHECK: vgpr_rp_occ1:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 448 AGPRs: 0(O1), SGPRs: 0(O10), LVGPR WT: 448, LSGPR WT: 0 -> Occ: 1 + +--- +name: vgpr_rp_occ1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_1024 = IMPLICIT_DEF + %9:vreg_1024 = IMPLICIT_DEF + %10:vreg_1024 = IMPLICIT_DEF + %11:vreg_1024 = IMPLICIT_DEF + %12:vreg_1024 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + %14:vreg_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7 + + bb.1: + KILL %8, %9, %10, %11, %12, %13, %14 + S_ENDPGM 0 +... + +# CHECK: vgpr_rp_occ2:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 192 AGPRs: 0(O2), SGPRs: 0(O10), LVGPR WT: 192, LSGPR WT: 0 -> Occ: 2 + +--- +name: vgpr_rp_occ2 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2, %3 + + bb.1: + KILL %4, %5, %6 + S_ENDPGM 0 +... + +# CHECK: vgpr_rp_occ3:%bb.0 +# CHECK: Pressure before scheduling: +# CHECK-NEXT: Region live-ins: +# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 +# CHECK-NEXT: Region register pressure: VGPRs: 128 AGPRs: 0(O4), SGPRs: 0(O10), LVGPR WT: 128, LSGPR WT: 0 -> Occ: 4 + + +--- +name: vgpr_rp_occ3 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + SCHED_BARRIER 0 + KILL %1, %2 + + bb.1: + KILL %3, %4 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir index 2a08c52e447ba..72181346764fb 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir +++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir @@ -6,7 +6,7 @@ # CHECK-NEXT: test_get_liveins:%bb.0 # CHECK: ********** MI Scheduling ********** # CHECK-NEXT: test_get_liveins:%bb.1 -# CHECK: Region live-in pressure: VGPRs: 1 AGPRs: 0, SGPRs: 0, LVGPR WT: 0, LSGPR WT: 0 +# CHECK: Region live-in pressure: VGPRs: 1 AGPRs: 0(O10), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 10 # CHECK: ScheduleDAGMILive::schedule starting ---