AMDGPU: Reduce the number of expensive calls in SIFormMemoryClause

Summary:
  RPTracker::reset(MI) is a very expensive call when the number of virtual registers is huge.
We observed a long compilation time issue when RPT::reset() is called once for each cluster.

In this work, we call RPT.reset() only at the first seen cluster, and use advance() to get
the register pressure for the later clusters in the same basic block. This could effectively reduce the number
of the expensive calls and thus reduce the compile time.

Reviewers:
  rampitec

Fixes:
  SWDEV-239161

Differential Revision:
  https://reviews.llvm.org/D95273
This commit is contained in:
Changpeng Fang 2021-01-25 16:08:08 -08:00
parent 3dd5ea9dd8
commit 5b648df1a8
1 changed files with 18 additions and 5 deletions

View File

@ -317,6 +317,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
MF.getFunction(), "amdgpu-max-memory-clause", MaxClause); MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock &MBB : MF) {
GCNDownwardRPTracker RPT(*LIS);
MachineBasicBlock::instr_iterator Next; MachineBasicBlock::instr_iterator Next;
for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) { for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
MachineInstr &MI = *I; MachineInstr &MI = *I;
@ -327,12 +328,19 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
if (!isValidClauseInst(MI, IsVMEM)) if (!isValidClauseInst(MI, IsVMEM))
continue; continue;
RegUse Defs, Uses; if (!RPT.getNext().isValid())
GCNDownwardRPTracker RPT(*LIS); RPT.reset(MI);
RPT.reset(MI); else { // Advance the state to the current MI.
RPT.advance(MachineBasicBlock::const_iterator(MI));
RPT.advanceBeforeNext();
}
if (!processRegUses(MI, Defs, Uses, RPT)) const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs());
RegUse Defs, Uses;
if (!processRegUses(MI, Defs, Uses, RPT)) {
RPT.reset(MI, &LiveRegsCopy);
continue; continue;
}
unsigned Length = 1; unsigned Length = 1;
for ( ; Next != E && Length < FuncMaxClause; ++Next) { for ( ; Next != E && Length < FuncMaxClause; ++Next) {
@ -347,8 +355,10 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
++Length; ++Length;
} }
if (Length < 2) if (Length < 2) {
RPT.reset(MI, &LiveRegsCopy);
continue; continue;
}
Changed = true; Changed = true;
MFI->limitOccupancy(LastRecordedOccupancy); MFI->limitOccupancy(LastRecordedOccupancy);
@ -356,6 +366,9 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
auto B = BuildMI(MBB, I, DebugLoc(), TII->get(TargetOpcode::BUNDLE)); auto B = BuildMI(MBB, I, DebugLoc(), TII->get(TargetOpcode::BUNDLE));
Ind->insertMachineInstrInMaps(*B); Ind->insertMachineInstrInMaps(*B);
// Restore the state after processing the bundle.
RPT.reset(*B, &LiveRegsCopy);
for (auto BI = I; BI != Next; ++BI) { for (auto BI = I; BI != Next; ++BI) {
BI->bundleWithPred(); BI->bundleWithPred();
Ind->removeSingleMachineInstrFromMaps(*BI); Ind->removeSingleMachineInstrFromMaps(*BI);