222 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			222 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| /// \file
 | |
| /// Pass to pre-allocated WWM registers
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "AMDGPU.h"
 | |
| #include "AMDGPUSubtarget.h"
 | |
| #include "SIInstrInfo.h"
 | |
| #include "SIRegisterInfo.h"
 | |
| #include "SIMachineFunctionInfo.h"
 | |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 | |
| #include "llvm/ADT/PostOrderIterator.h"
 | |
| #include "llvm/CodeGen/VirtRegMap.h"
 | |
| #include "llvm/CodeGen/LiveInterval.h"
 | |
| #include "llvm/CodeGen/LiveIntervals.h"
 | |
| #include "llvm/CodeGen/LiveRegMatrix.h"
 | |
| #include "llvm/CodeGen/MachineDominators.h"
 | |
| #include "llvm/CodeGen/MachineFunctionPass.h"
 | |
| #include "llvm/CodeGen/RegisterClassInfo.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| class SIPreAllocateWWMRegs : public MachineFunctionPass {
 | |
| private:
 | |
|   const SIInstrInfo *TII;
 | |
|   const SIRegisterInfo *TRI;
 | |
|   MachineRegisterInfo *MRI;
 | |
|   LiveIntervals *LIS;
 | |
|   LiveRegMatrix *Matrix;
 | |
|   VirtRegMap *VRM;
 | |
|   RegisterClassInfo RegClassInfo;
 | |
| 
 | |
|   std::vector<unsigned> RegsToRewrite;
 | |
| 
 | |
| public:
 | |
|   static char ID;
 | |
| 
 | |
|   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
 | |
|     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
 | |
|   }
 | |
| 
 | |
|   bool runOnMachineFunction(MachineFunction &MF) override;
 | |
| 
 | |
|   void getAnalysisUsage(AnalysisUsage &AU) const override {
 | |
|     AU.addRequired<LiveIntervals>();
 | |
|     AU.addPreserved<LiveIntervals>();
 | |
|     AU.addRequired<VirtRegMap>();
 | |
|     AU.addRequired<LiveRegMatrix>();
 | |
|     AU.addPreserved<SlotIndexes>();
 | |
|     AU.setPreservesCFG();
 | |
|     MachineFunctionPass::getAnalysisUsage(AU);
 | |
|   }
 | |
| 
 | |
| private:
 | |
|   bool processDef(MachineOperand &MO);
 | |
|   void rewriteRegs(MachineFunction &MF);
 | |
| };
 | |
| 
 | |
| } // End anonymous namespace.
 | |
| 
 | |
| INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
 | |
|                 "SI Pre-allocate WWM Registers", false, false)
 | |
| INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 | |
| INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 | |
| INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
 | |
| INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
 | |
|                 "SI Pre-allocate WWM Registers", false, false)
 | |
| 
 | |
| char SIPreAllocateWWMRegs::ID = 0;
 | |
| 
 | |
| char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
 | |
| 
 | |
| FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
 | |
|   return new SIPreAllocateWWMRegs();
 | |
| }
 | |
| 
 | |
| bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
 | |
|   if (!MO.isReg())
 | |
|     return false;
 | |
| 
 | |
|   unsigned Reg = MO.getReg();
 | |
| 
 | |
|   if (!TRI->isVGPR(*MRI, Reg))
 | |
|     return false;
 | |
| 
 | |
|   if (TRI->isPhysicalRegister(Reg))
 | |
|     return false;
 | |
| 
 | |
|   if (VRM->hasPhys(Reg))
 | |
|     return false;
 | |
| 
 | |
|   LiveInterval &LI = LIS->getInterval(Reg);
 | |
| 
 | |
|   for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
 | |
|     if (!MRI->isPhysRegUsed(PhysReg) &&
 | |
|         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
 | |
|       Matrix->assign(LI, PhysReg);
 | |
|       assert(PhysReg != 0);
 | |
|       RegsToRewrite.push_back(Reg);
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   llvm_unreachable("physreg not found for WWM expression");
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
 | |
|   for (MachineBasicBlock &MBB : MF) {
 | |
|     for (MachineInstr &MI : MBB) {
 | |
|       for (MachineOperand &MO : MI.operands()) {
 | |
|         if (!MO.isReg())
 | |
|           continue;
 | |
| 
 | |
|         const unsigned VirtReg = MO.getReg();
 | |
|         if (TRI->isPhysicalRegister(VirtReg))
 | |
|           continue;
 | |
| 
 | |
|         if (!VRM->hasPhys(VirtReg))
 | |
|           continue;
 | |
| 
 | |
|         unsigned PhysReg = VRM->getPhys(VirtReg);
 | |
|         const unsigned SubReg = MO.getSubReg();
 | |
|         if (SubReg != 0) {
 | |
|           PhysReg = TRI->getSubReg(PhysReg, SubReg);
 | |
|           MO.setSubReg(0);
 | |
|         }
 | |
| 
 | |
|         MO.setReg(PhysReg);
 | |
|         MO.setIsRenamable(false);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 | |
| 
 | |
|   for (unsigned Reg : RegsToRewrite) {
 | |
|     LIS->removeInterval(Reg);
 | |
| 
 | |
|     const unsigned PhysReg = VRM->getPhys(Reg);
 | |
|     assert(PhysReg != 0);
 | |
|     MFI->ReserveWWMRegister(PhysReg);
 | |
|   }
 | |
| 
 | |
|   RegsToRewrite.clear();
 | |
| 
 | |
|   // Update the set of reserved registers to include WWM ones.
 | |
|   MRI->freezeReservedRegs(MF);
 | |
| }
 | |
| 
 | |
| bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
 | |
|   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
 | |
| 
 | |
|   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 | |
| 
 | |
|   TII = ST.getInstrInfo();
 | |
|   TRI = &TII->getRegisterInfo();
 | |
|   MRI = &MF.getRegInfo();
 | |
| 
 | |
|   LIS = &getAnalysis<LiveIntervals>();
 | |
|   Matrix = &getAnalysis<LiveRegMatrix>();
 | |
|   VRM = &getAnalysis<VirtRegMap>();
 | |
| 
 | |
|   RegClassInfo.runOnMachineFunction(MF);
 | |
| 
 | |
|   bool RegsAssigned = false;
 | |
| 
 | |
|   // We use a reverse post-order traversal of the control-flow graph to
 | |
|   // guarantee that we visit definitions in dominance order. Since WWM
 | |
|   // expressions are guaranteed to never involve phi nodes, and we can only
 | |
|   // escape WWM through the special WWM instruction, this means that this is a
 | |
|   // perfect elimination order, so we can never do any better.
 | |
|   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
 | |
| 
 | |
|   for (MachineBasicBlock *MBB : RPOT) {
 | |
|     bool InWWM = false;
 | |
|     for (MachineInstr &MI : *MBB) {
 | |
|       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
 | |
|           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
 | |
|         RegsAssigned |= processDef(MI.getOperand(0));
 | |
| 
 | |
|       if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
 | |
|         LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
 | |
|         InWWM = true;
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
 | |
|         LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
 | |
|         InWWM = false;
 | |
|       }
 | |
| 
 | |
|       if (!InWWM)
 | |
|         continue;
 | |
| 
 | |
|       LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
 | |
| 
 | |
|       for (MachineOperand &DefOpnd : MI.defs()) {
 | |
|         RegsAssigned |= processDef(DefOpnd);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (!RegsAssigned)
 | |
|     return false;
 | |
| 
 | |
|   rewriteRegs(MF);
 | |
|   return true;
 | |
| }
 |