363 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			363 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===---------------------------- GCNILPSched.cpp - -----------------------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| /// \file
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "llvm/CodeGen/ScheduleDAG.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| #define DEBUG_TYPE "machine-scheduler"
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| class GCNILPScheduler {
 | |
|   struct Candidate : ilist_node<Candidate> {
 | |
|     SUnit *SU;
 | |
| 
 | |
|     Candidate(SUnit *SU_)
 | |
|       : SU(SU_) {}
 | |
|   };
 | |
| 
 | |
|   SpecificBumpPtrAllocator<Candidate> Alloc;
 | |
|   typedef simple_ilist<Candidate> Queue;
 | |
|   Queue PendingQueue;
 | |
|   Queue AvailQueue;
 | |
|   unsigned CurQueueId = 0;
 | |
| 
 | |
|   std::vector<unsigned> SUNumbers;
 | |
| 
 | |
|   /// CurCycle - The current scheduler state corresponds to this cycle.
 | |
|   unsigned CurCycle = 0;
 | |
| 
 | |
|   unsigned getNodePriority(const SUnit *SU) const;
 | |
| 
 | |
|   const SUnit *pickBest(const SUnit *left, const SUnit *right);
 | |
|   Candidate* pickCandidate();
 | |
| 
 | |
|   void releasePending();
 | |
|   void advanceToCycle(unsigned NextCycle);
 | |
|   void releasePredecessors(const SUnit* SU);
 | |
| 
 | |
| public:
 | |
|   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
 | |
|                                      const ScheduleDAG &DAG);
 | |
| };
 | |
| } // namespace
 | |
| 
 | |
| /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
 | |
| /// Smaller number is the higher priority.
 | |
| static unsigned
 | |
| CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
 | |
|   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
 | |
|   if (SethiUllmanNumber != 0)
 | |
|     return SethiUllmanNumber;
 | |
| 
 | |
|   unsigned Extra = 0;
 | |
|   for (const SDep &Pred : SU->Preds) {
 | |
|     if (Pred.isCtrl()) continue;  // ignore chain preds
 | |
|     SUnit *PredSU = Pred.getSUnit();
 | |
|     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
 | |
|     if (PredSethiUllman > SethiUllmanNumber) {
 | |
|       SethiUllmanNumber = PredSethiUllman;
 | |
|       Extra = 0;
 | |
|     }
 | |
|     else if (PredSethiUllman == SethiUllmanNumber)
 | |
|       ++Extra;
 | |
|   }
 | |
| 
 | |
|   SethiUllmanNumber += Extra;
 | |
| 
 | |
|   if (SethiUllmanNumber == 0)
 | |
|     SethiUllmanNumber = 1;
 | |
| 
 | |
|   return SethiUllmanNumber;
 | |
| }
 | |
| 
 | |
| // Lower priority means schedule further down. For bottom-up scheduling, lower
 | |
| // priority SUs are scheduled before higher priority SUs.
 | |
| unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
 | |
|   assert(SU->NodeNum < SUNumbers.size());
 | |
|   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
 | |
|     // If SU does not have a register use, i.e. it doesn't produce a value
 | |
|     // that would be consumed (e.g. store), then it terminates a chain of
 | |
|     // computation.  Give it a large SethiUllman number so it will be
 | |
|     // scheduled right before its predecessors that it doesn't lengthen
 | |
|     // their live ranges.
 | |
|     return 0xffff;
 | |
| 
 | |
|   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
 | |
|     // If SU does not have a register def, schedule it close to its uses
 | |
|     // because it does not lengthen any live ranges.
 | |
|     return 0;
 | |
| 
 | |
|   return SUNumbers[SU->NodeNum];
 | |
| }
 | |
| 
 | |
| /// closestSucc - Returns the scheduled cycle of the successor which is
 | |
| /// closest to the current cycle.
 | |
| static unsigned closestSucc(const SUnit *SU) {
 | |
|   unsigned MaxHeight = 0;
 | |
|   for (const SDep &Succ : SU->Succs) {
 | |
|     if (Succ.isCtrl()) continue;  // ignore chain succs
 | |
|     unsigned Height = Succ.getSUnit()->getHeight();
 | |
|     // If there are bunch of CopyToRegs stacked up, they should be considered
 | |
|     // to be at the same position.
 | |
|     if (Height > MaxHeight)
 | |
|       MaxHeight = Height;
 | |
|   }
 | |
|   return MaxHeight;
 | |
| }
 | |
| 
 | |
| /// calcMaxScratches - Returns an cost estimate of the worse case requirement
 | |
| /// for scratch registers, i.e. number of data dependencies.
 | |
| static unsigned calcMaxScratches(const SUnit *SU) {
 | |
|   unsigned Scratches = 0;
 | |
|   for (const SDep &Pred : SU->Preds) {
 | |
|     if (Pred.isCtrl()) continue;  // ignore chain preds
 | |
|     Scratches++;
 | |
|   }
 | |
|   return Scratches;
 | |
| }
 | |
| 
 | |
| // Return -1 if left has higher priority, 1 if right has higher priority.
 | |
| // Return 0 if latency-based priority is equivalent.
 | |
| static int BUCompareLatency(const SUnit *left, const SUnit *right) {
 | |
|   // Scheduling an instruction that uses a VReg whose postincrement has not yet
 | |
|   // been scheduled will induce a copy. Model this as an extra cycle of latency.
 | |
|   int LHeight = (int)left->getHeight();
 | |
|   int RHeight = (int)right->getHeight();
 | |
| 
 | |
|   // If either node is scheduling for latency, sort them by height/depth
 | |
|   // and latency.
 | |
| 
 | |
|   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
 | |
|   // is enabled, grouping instructions by cycle, then its height is already
 | |
|   // covered so only its depth matters. We also reach this point if both stall
 | |
|   // but have the same height.
 | |
|   if (LHeight != RHeight)
 | |
|     return LHeight > RHeight ? 1 : -1;
 | |
| 
 | |
|   int LDepth = left->getDepth();
 | |
|   int RDepth = right->getDepth();
 | |
|   if (LDepth != RDepth) {
 | |
|     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
 | |
|                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
 | |
|                       << ") depth " << RDepth << "\n");
 | |
|     return LDepth < RDepth ? 1 : -1;
 | |
|   }
 | |
|   if (left->Latency != right->Latency)
 | |
|     return left->Latency > right->Latency ? 1 : -1;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
 | |
| {
 | |
|   // TODO: add register pressure lowering checks
 | |
| 
 | |
|   bool const DisableSchedCriticalPath = false;
 | |
|   int MaxReorderWindow = 6;
 | |
|   if (!DisableSchedCriticalPath) {
 | |
|     int spread = (int)left->getDepth() - (int)right->getDepth();
 | |
|     if (std::abs(spread) > MaxReorderWindow) {
 | |
|       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
 | |
|                         << left->getDepth() << " != SU(" << right->NodeNum
 | |
|                         << "): " << right->getDepth() << "\n");
 | |
|       return left->getDepth() < right->getDepth() ? right : left;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   bool const DisableSchedHeight = false;
 | |
|   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
 | |
|     int spread = (int)left->getHeight() - (int)right->getHeight();
 | |
|     if (std::abs(spread) > MaxReorderWindow)
 | |
|       return left->getHeight() > right->getHeight() ? right : left;
 | |
|   }
 | |
| 
 | |
|   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
 | |
|   unsigned LPriority = getNodePriority(left);
 | |
|   unsigned RPriority = getNodePriority(right);
 | |
| 
 | |
|   if (LPriority != RPriority)
 | |
|     return LPriority > RPriority ? right : left;
 | |
| 
 | |
|   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
 | |
|   // e.g.
 | |
|   // t1 = op t2, c1
 | |
|   // t3 = op t4, c2
 | |
|   //
 | |
|   // and the following instructions are both ready.
 | |
|   // t2 = op c3
 | |
|   // t4 = op c4
 | |
|   //
 | |
|   // Then schedule t2 = op first.
 | |
|   // i.e.
 | |
|   // t4 = op c4
 | |
|   // t2 = op c3
 | |
|   // t1 = op t2, c1
 | |
|   // t3 = op t4, c2
 | |
|   //
 | |
|   // This creates more short live intervals.
 | |
|   unsigned LDist = closestSucc(left);
 | |
|   unsigned RDist = closestSucc(right);
 | |
|   if (LDist != RDist)
 | |
|     return LDist < RDist ? right : left;
 | |
| 
 | |
|   // How many registers becomes live when the node is scheduled.
 | |
|   unsigned LScratch = calcMaxScratches(left);
 | |
|   unsigned RScratch = calcMaxScratches(right);
 | |
|   if (LScratch != RScratch)
 | |
|     return LScratch > RScratch ? right : left;
 | |
| 
 | |
|   bool const DisableSchedCycles = false;
 | |
|   if (!DisableSchedCycles) {
 | |
|     int result = BUCompareLatency(left, right);
 | |
|     if (result != 0)
 | |
|       return result > 0 ? right : left;
 | |
|     return left;
 | |
|   }
 | |
|   else {
 | |
|     if (left->getHeight() != right->getHeight())
 | |
|       return (left->getHeight() > right->getHeight()) ? right : left;
 | |
| 
 | |
|     if (left->getDepth() != right->getDepth())
 | |
|       return (left->getDepth() < right->getDepth()) ? right : left;
 | |
|   }
 | |
| 
 | |
|   assert(left->NodeQueueId && right->NodeQueueId &&
 | |
|         "NodeQueueId cannot be zero");
 | |
|   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
 | |
| }
 | |
| 
 | |
| GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
 | |
|   if (AvailQueue.empty())
 | |
|     return nullptr;
 | |
|   auto Best = AvailQueue.begin();
 | |
|   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
 | |
|     auto NewBestSU = pickBest(Best->SU, I->SU);
 | |
|     if (NewBestSU != Best->SU) {
 | |
|       assert(NewBestSU == I->SU);
 | |
|       Best = I;
 | |
|     }
 | |
|   }
 | |
|   return &*Best;
 | |
| }
 | |
| 
 | |
| void GCNILPScheduler::releasePending() {
 | |
|   // Check to see if any of the pending instructions are ready to issue.  If
 | |
|   // so, add them to the available queue.
 | |
|   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
 | |
|     auto &C = *I++;
 | |
|     if (C.SU->getHeight() <= CurCycle) {
 | |
|       PendingQueue.remove(C);
 | |
|       AvailQueue.push_back(C);
 | |
|       C.SU->NodeQueueId = CurQueueId++;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// Move the scheduler state forward by the specified number of Cycles.
 | |
| void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
 | |
|   if (NextCycle <= CurCycle)
 | |
|     return;
 | |
|   CurCycle = NextCycle;
 | |
|   releasePending();
 | |
| }
 | |
| 
 | |
| void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
 | |
|   for (const auto &PredEdge : SU->Preds) {
 | |
|     auto PredSU = PredEdge.getSUnit();
 | |
|     if (PredEdge.isWeak())
 | |
|       continue;
 | |
|     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
 | |
| 
 | |
|     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
 | |
| 
 | |
|     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
 | |
|       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
 | |
|   }
 | |
| }
 | |
| 
 | |
| std::vector<const SUnit*>
 | |
| GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
 | |
|                           const ScheduleDAG &DAG) {
 | |
|   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
 | |
| 
 | |
|   std::vector<SUnit> SUSavedCopy;
 | |
|   SUSavedCopy.resize(SUnits.size());
 | |
| 
 | |
|   // we cannot save only those fields we touch: some of them are private
 | |
|   // so save units verbatim: this assumes SUnit should have value semantics
 | |
|   for (const SUnit &SU : SUnits)
 | |
|     SUSavedCopy[SU.NodeNum] = SU;
 | |
| 
 | |
|   SUNumbers.assign(SUnits.size(), 0);
 | |
|   for (const SUnit &SU : SUnits)
 | |
|     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
 | |
| 
 | |
|   for (auto SU : BotRoots) {
 | |
|     AvailQueue.push_back(
 | |
|       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
 | |
|   }
 | |
|   releasePredecessors(&DAG.ExitSU);
 | |
| 
 | |
|   std::vector<const SUnit*> Schedule;
 | |
|   Schedule.reserve(SUnits.size());
 | |
|   while (true) {
 | |
|     if (AvailQueue.empty() && !PendingQueue.empty()) {
 | |
|       auto EarliestSU = std::min_element(
 | |
|         PendingQueue.begin(), PendingQueue.end(),
 | |
|         [=](const Candidate& C1, const Candidate& C2) {
 | |
|         return C1.SU->getHeight() < C2.SU->getHeight();
 | |
|       })->SU;
 | |
|       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
 | |
|     }
 | |
|     if (AvailQueue.empty())
 | |
|       break;
 | |
| 
 | |
|     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
 | |
|                          "Ready queue:";
 | |
|                for (auto &C
 | |
|                     : AvailQueue) dbgs()
 | |
|                << ' ' << C.SU->NodeNum;
 | |
|                dbgs() << '\n';);
 | |
| 
 | |
|     auto C = pickCandidate();
 | |
|     assert(C);
 | |
|     AvailQueue.remove(*C);
 | |
|     auto SU = C->SU;
 | |
|     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
 | |
| 
 | |
|     advanceToCycle(SU->getHeight());
 | |
| 
 | |
|     releasePredecessors(SU);
 | |
|     Schedule.push_back(SU);
 | |
|     SU->isScheduled = true;
 | |
|   }
 | |
|   assert(SUnits.size() == Schedule.size());
 | |
| 
 | |
|   std::reverse(Schedule.begin(), Schedule.end());
 | |
| 
 | |
|   // restore units
 | |
|   for (auto &SU : SUnits)
 | |
|     SU = SUSavedCopy[SU.NodeNum];
 | |
| 
 | |
|   return Schedule;
 | |
| }
 | |
| 
 | |
| namespace llvm {
 | |
| std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
 | |
|                                               const ScheduleDAG &DAG) {
 | |
|   GCNILPScheduler S;
 | |
|   return S.schedule(BotRoots, DAG);
 | |
| }
 | |
| }
 |