384 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			384 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
 | |
| //
 | |
| //                     The LLVM Compiler Infrastructure
 | |
| //
 | |
| // This file is distributed under the University of Illinois Open Source
 | |
| // License. See LICENSE.TXT for details.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| // This file contains the AArch64 / Cortex-A57 specific register allocation
 | |
| // constraints for use by the PBQP register allocator.
 | |
| //
 | |
| // It is essentially a transcription of what is contained in
 | |
| // AArch64A57FPLoadBalancing, which tries to use a balanced
 | |
| // mix of odd and even D-registers when performing a critical sequence of
 | |
| // independent, non-quadword FP/ASIMD floating-point multiply-accumulates.
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #define DEBUG_TYPE "aarch64-pbqp"
 | |
| 
 | |
| #include "AArch64PBQPRegAlloc.h"
 | |
| #include "AArch64.h"
 | |
| #include "AArch64RegisterInfo.h"
 | |
| #include "llvm/CodeGen/LiveIntervals.h"
 | |
| #include "llvm/CodeGen/MachineBasicBlock.h"
 | |
| #include "llvm/CodeGen/MachineFunction.h"
 | |
| #include "llvm/CodeGen/MachineRegisterInfo.h"
 | |
| #include "llvm/CodeGen/RegAllocPBQP.h"
 | |
| #include "llvm/Support/Debug.h"
 | |
| #include "llvm/Support/ErrorHandling.h"
 | |
| #include "llvm/Support/raw_ostream.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| #ifndef NDEBUG
 | |
| bool isFPReg(unsigned reg) {
 | |
|   return AArch64::FPR32RegClass.contains(reg) ||
 | |
|          AArch64::FPR64RegClass.contains(reg) ||
 | |
|          AArch64::FPR128RegClass.contains(reg);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| bool isOdd(unsigned reg) {
 | |
|   switch (reg) {
 | |
|   default:
 | |
|     llvm_unreachable("Register is not from the expected class !");
 | |
|   case AArch64::S1:
 | |
|   case AArch64::S3:
 | |
|   case AArch64::S5:
 | |
|   case AArch64::S7:
 | |
|   case AArch64::S9:
 | |
|   case AArch64::S11:
 | |
|   case AArch64::S13:
 | |
|   case AArch64::S15:
 | |
|   case AArch64::S17:
 | |
|   case AArch64::S19:
 | |
|   case AArch64::S21:
 | |
|   case AArch64::S23:
 | |
|   case AArch64::S25:
 | |
|   case AArch64::S27:
 | |
|   case AArch64::S29:
 | |
|   case AArch64::S31:
 | |
|   case AArch64::D1:
 | |
|   case AArch64::D3:
 | |
|   case AArch64::D5:
 | |
|   case AArch64::D7:
 | |
|   case AArch64::D9:
 | |
|   case AArch64::D11:
 | |
|   case AArch64::D13:
 | |
|   case AArch64::D15:
 | |
|   case AArch64::D17:
 | |
|   case AArch64::D19:
 | |
|   case AArch64::D21:
 | |
|   case AArch64::D23:
 | |
|   case AArch64::D25:
 | |
|   case AArch64::D27:
 | |
|   case AArch64::D29:
 | |
|   case AArch64::D31:
 | |
|   case AArch64::Q1:
 | |
|   case AArch64::Q3:
 | |
|   case AArch64::Q5:
 | |
|   case AArch64::Q7:
 | |
|   case AArch64::Q9:
 | |
|   case AArch64::Q11:
 | |
|   case AArch64::Q13:
 | |
|   case AArch64::Q15:
 | |
|   case AArch64::Q17:
 | |
|   case AArch64::Q19:
 | |
|   case AArch64::Q21:
 | |
|   case AArch64::Q23:
 | |
|   case AArch64::Q25:
 | |
|   case AArch64::Q27:
 | |
|   case AArch64::Q29:
 | |
|   case AArch64::Q31:
 | |
|     return true;
 | |
|   case AArch64::S0:
 | |
|   case AArch64::S2:
 | |
|   case AArch64::S4:
 | |
|   case AArch64::S6:
 | |
|   case AArch64::S8:
 | |
|   case AArch64::S10:
 | |
|   case AArch64::S12:
 | |
|   case AArch64::S14:
 | |
|   case AArch64::S16:
 | |
|   case AArch64::S18:
 | |
|   case AArch64::S20:
 | |
|   case AArch64::S22:
 | |
|   case AArch64::S24:
 | |
|   case AArch64::S26:
 | |
|   case AArch64::S28:
 | |
|   case AArch64::S30:
 | |
|   case AArch64::D0:
 | |
|   case AArch64::D2:
 | |
|   case AArch64::D4:
 | |
|   case AArch64::D6:
 | |
|   case AArch64::D8:
 | |
|   case AArch64::D10:
 | |
|   case AArch64::D12:
 | |
|   case AArch64::D14:
 | |
|   case AArch64::D16:
 | |
|   case AArch64::D18:
 | |
|   case AArch64::D20:
 | |
|   case AArch64::D22:
 | |
|   case AArch64::D24:
 | |
|   case AArch64::D26:
 | |
|   case AArch64::D28:
 | |
|   case AArch64::D30:
 | |
|   case AArch64::Q0:
 | |
|   case AArch64::Q2:
 | |
|   case AArch64::Q4:
 | |
|   case AArch64::Q6:
 | |
|   case AArch64::Q8:
 | |
|   case AArch64::Q10:
 | |
|   case AArch64::Q12:
 | |
|   case AArch64::Q14:
 | |
|   case AArch64::Q16:
 | |
|   case AArch64::Q18:
 | |
|   case AArch64::Q20:
 | |
|   case AArch64::Q22:
 | |
|   case AArch64::Q24:
 | |
|   case AArch64::Q26:
 | |
|   case AArch64::Q28:
 | |
|   case AArch64::Q30:
 | |
|     return false;
 | |
| 
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool haveSameParity(unsigned reg1, unsigned reg2) {
 | |
|   assert(isFPReg(reg1) && "Expecting an FP register for reg1");
 | |
|   assert(isFPReg(reg2) && "Expecting an FP register for reg2");
 | |
| 
 | |
|   return isOdd(reg1) == isOdd(reg2);
 | |
| }
 | |
| 
 | |
| }
 | |
| 
 | |
| bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
 | |
|                                                  unsigned Ra) {
 | |
|   if (Rd == Ra)
 | |
|     return false;
 | |
| 
 | |
|   LiveIntervals &LIs = G.getMetadata().LIS;
 | |
| 
 | |
|   if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
 | |
|     LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
 | |
|                       << '\n');
 | |
|     LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
 | |
|                       << '\n');
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
 | |
|   PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra);
 | |
| 
 | |
|   const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
 | |
|     &G.getNodeMetadata(node1).getAllowedRegs();
 | |
|   const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed =
 | |
|     &G.getNodeMetadata(node2).getAllowedRegs();
 | |
| 
 | |
|   PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
 | |
| 
 | |
|   // The edge does not exist. Create one with the appropriate interference
 | |
|   // costs.
 | |
|   if (edge == G.invalidEdgeId()) {
 | |
|     const LiveInterval &ld = LIs.getInterval(Rd);
 | |
|     const LiveInterval &la = LIs.getInterval(Ra);
 | |
|     bool livesOverlap = ld.overlaps(la);
 | |
| 
 | |
|     PBQPRAGraph::RawMatrix costs(vRdAllowed->size() + 1,
 | |
|                                  vRaAllowed->size() + 1, 0);
 | |
|     for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
 | |
|       unsigned pRd = (*vRdAllowed)[i];
 | |
|       for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
 | |
|         unsigned pRa = (*vRaAllowed)[j];
 | |
|         if (livesOverlap && TRI->regsOverlap(pRd, pRa))
 | |
|           costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
 | |
|         else
 | |
|           costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
 | |
|       }
 | |
|     }
 | |
|     G.addEdge(node1, node2, std::move(costs));
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (G.getEdgeNode1Id(edge) == node2) {
 | |
|     std::swap(node1, node2);
 | |
|     std::swap(vRdAllowed, vRaAllowed);
 | |
|   }
 | |
| 
 | |
|   // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass))
 | |
|   PBQPRAGraph::RawMatrix costs(G.getEdgeCosts(edge));
 | |
|   for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
 | |
|     unsigned pRd = (*vRdAllowed)[i];
 | |
| 
 | |
|     // Get the maximum cost (excluding unallocatable reg) for same parity
 | |
|     // registers
 | |
|     PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
 | |
|     for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
 | |
|       unsigned pRa = (*vRaAllowed)[j];
 | |
|       if (haveSameParity(pRd, pRa))
 | |
|         if (costs[i + 1][j + 1] !=
 | |
|                 std::numeric_limits<PBQP::PBQPNum>::infinity() &&
 | |
|             costs[i + 1][j + 1] > sameParityMax)
 | |
|           sameParityMax = costs[i + 1][j + 1];
 | |
|     }
 | |
| 
 | |
|     // Ensure all registers with a different parity have a higher cost
 | |
|     // than sameParityMax
 | |
|     for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
 | |
|       unsigned pRa = (*vRaAllowed)[j];
 | |
|       if (!haveSameParity(pRd, pRa))
 | |
|         if (sameParityMax > costs[i + 1][j + 1])
 | |
|           costs[i + 1][j + 1] = sameParityMax + 1.0;
 | |
|     }
 | |
|   }
 | |
|   G.updateEdgeCosts(edge, std::move(costs));
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
 | |
|                                                  unsigned Ra) {
 | |
|   LiveIntervals &LIs = G.getMetadata().LIS;
 | |
| 
 | |
|   // Do some Chain management
 | |
|   if (Chains.count(Ra)) {
 | |
|     if (Rd != Ra) {
 | |
|       LLVM_DEBUG(dbgs() << "Moving acc chain from " << printReg(Ra, TRI)
 | |
|                         << " to " << printReg(Rd, TRI) << '\n';);
 | |
|       Chains.remove(Ra);
 | |
|       Chains.insert(Rd);
 | |
|     }
 | |
|   } else {
 | |
|     LLVM_DEBUG(dbgs() << "Creating new acc chain for " << printReg(Rd, TRI)
 | |
|                       << '\n';);
 | |
|     Chains.insert(Rd);
 | |
|   }
 | |
| 
 | |
|   PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
 | |
| 
 | |
|   const LiveInterval &ld = LIs.getInterval(Rd);
 | |
|   for (auto r : Chains) {
 | |
|     // Skip self
 | |
|     if (r == Rd)
 | |
|       continue;
 | |
| 
 | |
|     const LiveInterval &lr = LIs.getInterval(r);
 | |
|     if (ld.overlaps(lr)) {
 | |
|       const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
 | |
|         &G.getNodeMetadata(node1).getAllowedRegs();
 | |
| 
 | |
|       PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r);
 | |
|       const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed =
 | |
|         &G.getNodeMetadata(node2).getAllowedRegs();
 | |
| 
 | |
|       PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
 | |
|       assert(edge != G.invalidEdgeId() &&
 | |
|              "PBQP error ! The edge should exist !");
 | |
| 
 | |
|       LLVM_DEBUG(dbgs() << "Refining constraint !\n";);
 | |
| 
 | |
|       if (G.getEdgeNode1Id(edge) == node2) {
 | |
|         std::swap(node1, node2);
 | |
|         std::swap(vRdAllowed, vRrAllowed);
 | |
|       }
 | |
| 
 | |
|       // Enforce that cost is higher with all other Chains of the same parity
 | |
|       PBQP::Matrix costs(G.getEdgeCosts(edge));
 | |
|       for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
 | |
|         unsigned pRd = (*vRdAllowed)[i];
 | |
| 
 | |
|         // Get the maximum cost (excluding unallocatable reg) for all other
 | |
|         // parity registers
 | |
|         PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
 | |
|         for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
 | |
|           unsigned pRa = (*vRrAllowed)[j];
 | |
|           if (!haveSameParity(pRd, pRa))
 | |
|             if (costs[i + 1][j + 1] !=
 | |
|                     std::numeric_limits<PBQP::PBQPNum>::infinity() &&
 | |
|                 costs[i + 1][j + 1] > sameParityMax)
 | |
|               sameParityMax = costs[i + 1][j + 1];
 | |
|         }
 | |
| 
 | |
|         // Ensure all registers with same parity have a higher cost
 | |
|         // than sameParityMax
 | |
|         for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
 | |
|           unsigned pRa = (*vRrAllowed)[j];
 | |
|           if (haveSameParity(pRd, pRa))
 | |
|             if (sameParityMax > costs[i + 1][j + 1])
 | |
|               costs[i + 1][j + 1] = sameParityMax + 1.0;
 | |
|         }
 | |
|       }
 | |
|       G.updateEdgeCosts(edge, std::move(costs));
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
 | |
|                                 const MachineInstr &MI) {
 | |
|   const LiveInterval &LI = LIs.getInterval(reg);
 | |
|   SlotIndex SI = LIs.getInstructionIndex(MI);
 | |
|   return LI.expiredAt(SI);
 | |
| }
 | |
| 
 | |
| void A57ChainingConstraint::apply(PBQPRAGraph &G) {
 | |
|   const MachineFunction &MF = G.getMetadata().MF;
 | |
|   LiveIntervals &LIs = G.getMetadata().LIS;
 | |
| 
 | |
|   TRI = MF.getSubtarget().getRegisterInfo();
 | |
|   LLVM_DEBUG(MF.dump());
 | |
| 
 | |
|   for (const auto &MBB: MF) {
 | |
|     Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
 | |
| 
 | |
|     for (const auto &MI: MBB) {
 | |
| 
 | |
|       // Forget Chains which have expired
 | |
|       for (auto r : Chains) {
 | |
|         SmallVector<unsigned, 8> toDel;
 | |
|         if(regJustKilledBefore(LIs, r, MI)) {
 | |
|           LLVM_DEBUG(dbgs() << "Killing chain " << printReg(r, TRI) << " at ";
 | |
|                      MI.print(dbgs()););
 | |
|           toDel.push_back(r);
 | |
|         }
 | |
| 
 | |
|         while (!toDel.empty()) {
 | |
|           Chains.remove(toDel.back());
 | |
|           toDel.pop_back();
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       switch (MI.getOpcode()) {
 | |
|       case AArch64::FMSUBSrrr:
 | |
|       case AArch64::FMADDSrrr:
 | |
|       case AArch64::FNMSUBSrrr:
 | |
|       case AArch64::FNMADDSrrr:
 | |
|       case AArch64::FMSUBDrrr:
 | |
|       case AArch64::FMADDDrrr:
 | |
|       case AArch64::FNMSUBDrrr:
 | |
|       case AArch64::FNMADDDrrr: {
 | |
|         unsigned Rd = MI.getOperand(0).getReg();
 | |
|         unsigned Ra = MI.getOperand(3).getReg();
 | |
| 
 | |
|         if (addIntraChainConstraint(G, Rd, Ra))
 | |
|           addInterChainConstraint(G, Rd, Ra);
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|       case AArch64::FMLAv2f32:
 | |
|       case AArch64::FMLSv2f32: {
 | |
|         unsigned Rd = MI.getOperand(0).getReg();
 | |
|         addInterChainConstraint(G, Rd, Rd);
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|       default:
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 |