From 5b648df1a842fba1fa47fdfa0936694573df02d2 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Mon, 25 Jan 2021 16:08:08 -0800 Subject: [PATCH] AMDGPU: Reduce the number of expensive calls in SIFormMemoryClause Summary: RPTracker::reset(MI) is a very expensive call when the number of virtual registers is huge. We observed a long compilation time issue when RPT::reset() is called once for each cluster. In this work, we call RPT.reset() only at the first seen cluster, and use advance() to get the register pressure for the later clusters in the same basic block. This could effectively reduce the number of the expensive calls and thus reduce the compile time. Reviewers: rampitec Fixes: SWDEV-239161 Differential Revision: https://reviews.llvm.org/D95273 --- .../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index 7bf508a72dc3..104eceafce34 100644 --- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -317,6 +317,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { MF.getFunction(), "amdgpu-max-memory-clause", MaxClause); for (MachineBasicBlock &MBB : MF) { + GCNDownwardRPTracker RPT(*LIS); MachineBasicBlock::instr_iterator Next; for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) { MachineInstr &MI = *I; @@ -327,12 +328,19 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { if (!isValidClauseInst(MI, IsVMEM)) continue; - RegUse Defs, Uses; - GCNDownwardRPTracker RPT(*LIS); - RPT.reset(MI); + if (!RPT.getNext().isValid()) + RPT.reset(MI); + else { // Advance the state to the current MI. + RPT.advance(MachineBasicBlock::const_iterator(MI)); + RPT.advanceBeforeNext(); + } - if (!processRegUses(MI, Defs, Uses, RPT)) + const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs()); + RegUse Defs, Uses; + if (!processRegUses(MI, Defs, Uses, RPT)) { + RPT.reset(MI, &LiveRegsCopy); continue; + } unsigned Length = 1; for ( ; Next != E && Length < FuncMaxClause; ++Next) { @@ -347,8 +355,10 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { ++Length; } - if (Length < 2) + if (Length < 2) { + RPT.reset(MI, &LiveRegsCopy); continue; + } Changed = true; MFI->limitOccupancy(LastRecordedOccupancy); @@ -356,6 +366,9 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { auto B = BuildMI(MBB, I, DebugLoc(), TII->get(TargetOpcode::BUNDLE)); Ind->insertMachineInstrInMaps(*B); + // Restore the state after processing the bundle. + RPT.reset(*B, &LiveRegsCopy); + for (auto BI = I; BI != Next; ++BI) { BI->bundleWithPred(); Ind->removeSingleMachineInstrFromMaps(*BI);