forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			653 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			653 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| /// \file
 | |
| ///
 | |
| /// This file implements the functionalities used by the BottleneckAnalysis
 | |
| /// to report bottleneck info.
 | |
| ///
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "Views/BottleneckAnalysis.h"
 | |
| #include "llvm/MC/MCInst.h"
 | |
| #include "llvm/MCA/Support.h"
 | |
| #include "llvm/Support/Format.h"
 | |
| #include "llvm/Support/FormattedStream.h"
 | |
| 
 | |
| namespace llvm {
 | |
| namespace mca {
 | |
| 
 | |
| #define DEBUG_TYPE "llvm-mca"
 | |
| 
 | |
| PressureTracker::PressureTracker(const MCSchedModel &Model)
 | |
|     : SM(Model),
 | |
|       ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
 | |
|       ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
 | |
|       ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
 | |
|       ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
 | |
|   computeProcResourceMasks(SM, ProcResID2Mask);
 | |
| 
 | |
|   // Ignore the invalid resource at index zero.
 | |
|   unsigned NextResourceUsersIdx = 0;
 | |
|   for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
 | |
|     const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
 | |
|     ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
 | |
|     NextResourceUsersIdx += ProcResource.NumUnits;
 | |
|     uint64_t ResourceMask = ProcResID2Mask[I];
 | |
|     ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
 | |
|   }
 | |
| 
 | |
|   ResourceUsers.resize(NextResourceUsersIdx);
 | |
|   std::fill(ResourceUsers.begin(), ResourceUsers.end(),
 | |
|             std::make_pair<unsigned, unsigned>(~0U, 0U));
 | |
| }
 | |
| 
 | |
| void PressureTracker::getResourceUsers(uint64_t ResourceMask,
 | |
|                                        SmallVectorImpl<User> &Users) const {
 | |
|   unsigned Index = getResourceStateIndex(ResourceMask);
 | |
|   unsigned ProcResID = ResIdx2ProcResID[Index];
 | |
|   const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
 | |
|   for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
 | |
|     const User U = getResourceUser(ProcResID, I);
 | |
|     if (U.second && IPI.find(U.first) != IPI.end())
 | |
|       Users.emplace_back(U);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PressureTracker::onInstructionDispatched(unsigned IID) {
 | |
|   IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
 | |
| }
 | |
| 
 | |
| void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
 | |
| 
 | |
| void PressureTracker::handleInstructionIssuedEvent(
 | |
|     const HWInstructionIssuedEvent &Event) {
 | |
|   unsigned IID = Event.IR.getSourceIndex();
 | |
|   using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
 | |
|   using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
 | |
|   for (const ResourceUse &Use : Event.UsedResources) {
 | |
|     const ResourceRef &RR = Use.first;
 | |
|     unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
 | |
|     Index += countTrailingZeros(RR.second);
 | |
|     ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PressureTracker::updateResourcePressureDistribution(
 | |
|     uint64_t CumulativeMask) {
 | |
|   while (CumulativeMask) {
 | |
|     uint64_t Current = CumulativeMask & (-CumulativeMask);
 | |
|     unsigned ResIdx = getResourceStateIndex(Current);
 | |
|     unsigned ProcResID = ResIdx2ProcResID[ResIdx];
 | |
|     uint64_t Mask = ProcResID2Mask[ProcResID];
 | |
| 
 | |
|     if (Mask == Current) {
 | |
|       ResourcePressureDistribution[ProcResID]++;
 | |
|       CumulativeMask ^= Current;
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     Mask ^= Current;
 | |
|     while (Mask) {
 | |
|       uint64_t SubUnit = Mask & (-Mask);
 | |
|       ResIdx = getResourceStateIndex(SubUnit);
 | |
|       ProcResID = ResIdx2ProcResID[ResIdx];
 | |
|       ResourcePressureDistribution[ProcResID]++;
 | |
|       Mask ^= SubUnit;
 | |
|     }
 | |
| 
 | |
|     CumulativeMask ^= Current;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
 | |
|   assert(Event.Reason != HWPressureEvent::INVALID &&
 | |
|          "Unexpected invalid event!");
 | |
| 
 | |
|   switch (Event.Reason) {
 | |
|   default:
 | |
|     break;
 | |
| 
 | |
|   case HWPressureEvent::RESOURCES: {
 | |
|     const uint64_t ResourceMask = Event.ResourceMask;
 | |
|     updateResourcePressureDistribution(Event.ResourceMask);
 | |
| 
 | |
|     for (const InstRef &IR : Event.AffectedInstructions) {
 | |
|       const Instruction &IS = *IR.getInstruction();
 | |
|       unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
 | |
|       if (!BusyResources)
 | |
|         continue;
 | |
| 
 | |
|       unsigned IID = IR.getSourceIndex();
 | |
|       IPI[IID].ResourcePressureCycles++;
 | |
|     }
 | |
|     break;
 | |
|   }
 | |
| 
 | |
|   case HWPressureEvent::REGISTER_DEPS:
 | |
|     for (const InstRef &IR : Event.AffectedInstructions) {
 | |
|       unsigned IID = IR.getSourceIndex();
 | |
|       IPI[IID].RegisterPressureCycles++;
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|   case HWPressureEvent::MEMORY_DEPS:
 | |
|     for (const InstRef &IR : Event.AffectedInstructions) {
 | |
|       unsigned IID = IR.getSourceIndex();
 | |
|       IPI[IID].MemoryPressureCycles++;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| #ifndef NDEBUG
 | |
| void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
 | |
|                                          const DependencyEdge &DepEdge,
 | |
|                                          MCInstPrinter &MCIP) const {
 | |
|   unsigned FromIID = DepEdge.FromIID;
 | |
|   unsigned ToIID = DepEdge.ToIID;
 | |
|   assert(FromIID < ToIID && "Graph should be acyclic!");
 | |
| 
 | |
|   const DependencyEdge::Dependency &DE = DepEdge.Dep;
 | |
|   assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
 | |
| 
 | |
|   OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
 | |
|   if (DE.Type == DependencyEdge::DT_REGISTER) {
 | |
|     OS << " - REGISTER: ";
 | |
|     MCIP.printRegName(OS, DE.ResourceOrRegID);
 | |
|   } else if (DE.Type == DependencyEdge::DT_MEMORY) {
 | |
|     OS << " - MEMORY";
 | |
|   } else {
 | |
|     assert(DE.Type == DependencyEdge::DT_RESOURCE &&
 | |
|            "Unsupported dependency type!");
 | |
|     OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
 | |
|   }
 | |
|   OS << " - COST: " << DE.Cost << '\n';
 | |
| }
 | |
| #endif // NDEBUG
 | |
| 
 | |
| void DependencyGraph::pruneEdges(unsigned Iterations) {
 | |
|   for (DGNode &N : Nodes) {
 | |
|     unsigned NumPruned = 0;
 | |
|     const unsigned Size = N.OutgoingEdges.size();
 | |
|     // Use a cut-off threshold to prune edges with a low frequency.
 | |
|     for (unsigned I = 0, E = Size; I < E; ++I) {
 | |
|       DependencyEdge &Edge = N.OutgoingEdges[I];
 | |
|       if (Edge.Frequency == Iterations)
 | |
|         continue;
 | |
|       double Factor = (double)Edge.Frequency / Iterations;
 | |
|       if (0.10 < Factor)
 | |
|         continue;
 | |
|       Nodes[Edge.ToIID].NumPredecessors--;
 | |
|       std::swap(Edge, N.OutgoingEdges[E - 1]);
 | |
|       --E;
 | |
|       ++NumPruned;
 | |
|     }
 | |
| 
 | |
|     if (NumPruned)
 | |
|       N.OutgoingEdges.resize(Size - NumPruned);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void DependencyGraph::initializeRootSet(
 | |
|     SmallVectorImpl<unsigned> &RootSet) const {
 | |
|   for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
 | |
|     const DGNode &N = Nodes[I];
 | |
|     if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
 | |
|       RootSet.emplace_back(I);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void DependencyGraph::propagateThroughEdges(
 | |
|     SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
 | |
|   SmallVector<unsigned, 8> ToVisit;
 | |
| 
 | |
|   // A critical sequence is computed as the longest path from a node of the
 | |
|   // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
 | |
|   // composed of nodes with at least one successor, and no predecessors.
 | |
|   //
 | |
|   // Each node of the graph starts with an initial default cost of zero.  The
 | |
|   // cost of a node is a measure of criticality: the higher the cost, the bigger
 | |
|   // is the performance impact.
 | |
|   // For register and memory dependencies, the cost is a function of the write
 | |
|   // latency as well as the actual delay (in cycles) caused to users.
 | |
|   // For processor resource dependencies, the cost is a function of the resource
 | |
|   // pressure. Resource interferences with low frequency values are ignored.
 | |
|   //
 | |
|   // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
 | |
|   // the inner loop selects (i.e. visits) a node N from a set of `unvisited
 | |
|   // nodes`, and then propagates the cost of N to all its neighbors.
 | |
|   //
 | |
|   // The `unvisited nodes` set initially contains all the nodes from the
 | |
|   // RootSet.  A node N is added to the `unvisited nodes` if all its
 | |
|   // predecessors have been visited already.
 | |
|   // 
 | |
|   // For simplicity, every node tracks the number of unvisited incoming edges in
 | |
|   // field `NumVisitedPredecessors`.  When the value of that field drops to
 | |
|   // zero, then the corresponding node is added to a `ToVisit` set.
 | |
|   //
 | |
|   // At the end of every iteration of the outer loop, set `ToVisit` becomes our
 | |
|   // new `unvisited nodes` set.
 | |
|   // 
 | |
|   // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
 | |
|   // is empty. This algorithm works under the assumption that the graph is
 | |
|   // acyclic.
 | |
|   do {
 | |
|     for (unsigned IID : RootSet) {
 | |
|       const DGNode &N = Nodes[IID];
 | |
|       for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
 | |
|         unsigned ToIID = DepEdge.ToIID;
 | |
|         DGNode &To = Nodes[ToIID];
 | |
|         uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
 | |
|         // Check if this is the most expensive incoming edge seen so far.  In
 | |
|         // case, update the total cost of the destination node (ToIID), as well
 | |
|         // its field `CriticalPredecessor`.
 | |
|         if (Cost > To.Cost) {
 | |
|           To.CriticalPredecessor = DepEdge;
 | |
|           To.Cost = Cost;
 | |
|           To.Depth = N.Depth + 1;
 | |
|         }
 | |
|         To.NumVisitedPredecessors++;
 | |
|         if (To.NumVisitedPredecessors == To.NumPredecessors)
 | |
|           ToVisit.emplace_back(ToIID);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     std::swap(RootSet, ToVisit);
 | |
|     ToVisit.clear();
 | |
|   } while (!RootSet.empty());
 | |
| }
 | |
| 
 | |
| void DependencyGraph::getCriticalSequence(
 | |
|     SmallVectorImpl<const DependencyEdge *> &Seq) const {
 | |
|   // At this stage, nodes of the graph have been already visited, and costs have
 | |
|   // been propagated through the edges (see method `propagateThroughEdges()`).
 | |
| 
 | |
|   // Identify the node N with the highest cost in the graph. By construction,
 | |
|   // that node is the last instruction of our critical sequence.
 | |
|   // Field N.Depth would tell us the total length of the sequence.
 | |
|   //
 | |
|   // To obtain the sequence of critical edges, we simply follow the chain of critical
 | |
|   // predecessors starting from node N (field DGNode::CriticalPredecessor).
 | |
|   const auto It = std::max_element(
 | |
|       Nodes.begin(), Nodes.end(),
 | |
|       [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
 | |
|   unsigned IID = std::distance(Nodes.begin(), It);
 | |
|   Seq.resize(Nodes[IID].Depth);
 | |
|   for (unsigned I = Seq.size(), E = 0; I > E; --I) {
 | |
|     const DGNode &N = Nodes[IID];
 | |
|     Seq[I - 1] = &N.CriticalPredecessor;
 | |
|     IID = N.CriticalPredecessor.FromIID;
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void printInstruction(formatted_raw_ostream &FOS,
 | |
|                              const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
 | |
|                              const MCInst &MCI,
 | |
|                              bool UseDifferentColor = false) {
 | |
|   std::string Instruction;
 | |
|   raw_string_ostream InstrStream(Instruction);
 | |
| 
 | |
|   FOS.PadToColumn(14);
 | |
| 
 | |
|   MCIP.printInst(&MCI, 0, "", STI, InstrStream);
 | |
|   InstrStream.flush();
 | |
| 
 | |
|   if (UseDifferentColor)
 | |
|     FOS.changeColor(raw_ostream::CYAN, true, false);
 | |
|   FOS << StringRef(Instruction).ltrim();
 | |
|   if (UseDifferentColor)
 | |
|     FOS.resetColor();
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
 | |
|   // Early exit if no bottlenecks were found during the simulation.
 | |
|   if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
 | |
|     return;
 | |
| 
 | |
|   SmallVector<const DependencyEdge *, 16> Seq;
 | |
|   DG.getCriticalSequence(Seq);
 | |
|   if (Seq.empty())
 | |
|     return;
 | |
| 
 | |
|   OS << "\nCritical sequence based on the simulation:\n\n";
 | |
| 
 | |
|   const DependencyEdge &FirstEdge = *Seq[0];
 | |
|   unsigned FromIID = FirstEdge.FromIID % Source.size();
 | |
|   unsigned ToIID = FirstEdge.ToIID % Source.size();
 | |
|   bool IsLoopCarried = FromIID >= ToIID;
 | |
| 
 | |
|   formatted_raw_ostream FOS(OS);
 | |
|   FOS.PadToColumn(14);
 | |
|   FOS << "Instruction";
 | |
|   FOS.PadToColumn(58);
 | |
|   FOS << "Dependency Information";
 | |
| 
 | |
|   bool HasColors = FOS.has_colors();
 | |
| 
 | |
|   unsigned CurrentIID = 0;
 | |
|   if (IsLoopCarried) {
 | |
|     FOS << "\n +----< " << FromIID << ".";
 | |
|     printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
 | |
|     FOS << "\n |\n |    < loop carried > \n |";
 | |
|   } else {
 | |
|     while (CurrentIID < FromIID) {
 | |
|       FOS << "\n        " << CurrentIID << ".";
 | |
|       printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
 | |
|       CurrentIID++;
 | |
|     }
 | |
| 
 | |
|     FOS << "\n +----< " << CurrentIID << ".";
 | |
|     printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
 | |
|     CurrentIID++;
 | |
|   }
 | |
| 
 | |
|   for (const DependencyEdge *&DE : Seq) {
 | |
|     ToIID = DE->ToIID % Source.size();
 | |
|     unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
 | |
| 
 | |
|     while (CurrentIID < LastIID) {
 | |
|       FOS << "\n |      " << CurrentIID << ".";
 | |
|       printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
 | |
|       CurrentIID++;
 | |
|     }
 | |
| 
 | |
|     if (CurrentIID == ToIID) {
 | |
|       FOS << "\n +----> " << ToIID << ".";
 | |
|       printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
 | |
|     } else {
 | |
|       FOS << "\n |\n |    < loop carried > \n |"
 | |
|           << "\n +----> " << ToIID << ".";
 | |
|       printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
 | |
|     }
 | |
|     FOS.PadToColumn(58);
 | |
| 
 | |
|     const DependencyEdge::Dependency &Dep = DE->Dep;
 | |
|     if (HasColors)
 | |
|       FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
 | |
| 
 | |
|     if (Dep.Type == DependencyEdge::DT_REGISTER) {
 | |
|       FOS << "## REGISTER dependency:  ";
 | |
|       if (HasColors)
 | |
|         FOS.changeColor(raw_ostream::MAGENTA, true, false);
 | |
|       MCIP.printRegName(FOS, Dep.ResourceOrRegID);
 | |
|     } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
 | |
|       FOS << "## MEMORY dependency.";
 | |
|     } else {
 | |
|       assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
 | |
|              "Unsupported dependency type!");
 | |
|       FOS << "## RESOURCE interference:  ";
 | |
|       if (HasColors)
 | |
|         FOS.changeColor(raw_ostream::MAGENTA, true, false);
 | |
|       FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
 | |
|       if (HasColors) {
 | |
|         FOS.resetColor();
 | |
|         FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
 | |
|       }
 | |
|       FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
 | |
|           << "% ]";
 | |
|     }
 | |
|     if (HasColors)
 | |
|       FOS.resetColor();
 | |
|     ++CurrentIID;
 | |
|   }
 | |
| 
 | |
|   while (CurrentIID < Source.size()) {
 | |
|     FOS << "\n        " << CurrentIID << ".";
 | |
|     printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
 | |
|     CurrentIID++;
 | |
|   }
 | |
| 
 | |
|   FOS << '\n';
 | |
|   FOS.flush();
 | |
| }
 | |
| 
 | |
| #ifndef NDEBUG
 | |
| void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
 | |
|   OS << "\nREG DEPS\n";
 | |
|   for (const DGNode &Node : Nodes)
 | |
|     for (const DependencyEdge &DE : Node.OutgoingEdges)
 | |
|       if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
 | |
|         dumpDependencyEdge(OS, DE, MCIP);
 | |
| 
 | |
|   OS << "\nMEM DEPS\n";
 | |
|   for (const DGNode &Node : Nodes)
 | |
|     for (const DependencyEdge &DE : Node.OutgoingEdges)
 | |
|       if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
 | |
|         dumpDependencyEdge(OS, DE, MCIP);
 | |
| 
 | |
|   OS << "\nRESOURCE DEPS\n";
 | |
|   for (const DGNode &Node : Nodes)
 | |
|     for (const DependencyEdge &DE : Node.OutgoingEdges)
 | |
|       if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
 | |
|         dumpDependencyEdge(OS, DE, MCIP);
 | |
| }
 | |
| #endif // NDEBUG
 | |
| 
 | |
| void DependencyGraph::addDependency(unsigned From, unsigned To,
 | |
|                                     DependencyEdge::Dependency &&Dep) {
 | |
|   DGNode &NodeFrom = Nodes[From];
 | |
|   DGNode &NodeTo = Nodes[To];
 | |
|   SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
 | |
| 
 | |
|   auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
 | |
|     return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
 | |
|   });
 | |
| 
 | |
|   if (It != Vec.end()) {
 | |
|     It->Dep.Cost += Dep.Cost;
 | |
|     It->Frequency++;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   DependencyEdge DE = {Dep, From, To, 1};
 | |
|   Vec.emplace_back(DE);
 | |
|   NodeTo.NumPredecessors++;
 | |
| }
 | |
| 
 | |
| BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
 | |
|                                        MCInstPrinter &Printer,
 | |
|                                        ArrayRef<MCInst> S, unsigned NumIter)
 | |
|     : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
 | |
|       Source(S), Iterations(NumIter), TotalCycles(0),
 | |
|       PressureIncreasedBecauseOfResources(false),
 | |
|       PressureIncreasedBecauseOfRegisterDependencies(false),
 | |
|       PressureIncreasedBecauseOfMemoryDependencies(false),
 | |
|       SeenStallCycles(false), BPI() {}
 | |
| 
 | |
| void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
 | |
|                                         unsigned RegID, unsigned Cost) {
 | |
|   bool IsLoopCarried = From >= To;
 | |
|   unsigned SourceSize = Source.size();
 | |
|   if (IsLoopCarried) {
 | |
|     DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
 | |
|     DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
 | |
|     return;
 | |
|   }
 | |
|   DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
 | |
|                                       unsigned Cost) {
 | |
|   bool IsLoopCarried = From >= To;
 | |
|   unsigned SourceSize = Source.size();
 | |
|   if (IsLoopCarried) {
 | |
|     DG.addMemoryDep(From, To + SourceSize, Cost);
 | |
|     DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
 | |
|     return;
 | |
|   }
 | |
|   DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
 | |
|                                         uint64_t Mask, unsigned Cost) {
 | |
|   bool IsLoopCarried = From >= To;
 | |
|   unsigned SourceSize = Source.size();
 | |
|   if (IsLoopCarried) {
 | |
|     DG.addResourceDep(From, To + SourceSize, Mask, Cost);
 | |
|     DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
 | |
|     return;
 | |
|   }
 | |
|   DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
 | |
|   const unsigned IID = Event.IR.getSourceIndex();
 | |
|   if (Event.Type == HWInstructionEvent::Dispatched) {
 | |
|     Tracker.onInstructionDispatched(IID);
 | |
|     return;
 | |
|   }
 | |
|   if (Event.Type == HWInstructionEvent::Executed) {
 | |
|     Tracker.onInstructionExecuted(IID);
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   if (Event.Type != HWInstructionEvent::Issued)
 | |
|     return;
 | |
| 
 | |
|   const Instruction &IS = *Event.IR.getInstruction();
 | |
|   unsigned To = IID % Source.size();
 | |
| 
 | |
|   unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
 | |
|   uint64_t ResourceMask = IS.getCriticalResourceMask();
 | |
|   SmallVector<std::pair<unsigned, unsigned>, 4> Users;
 | |
|   while (ResourceMask) {
 | |
|     uint64_t Current = ResourceMask & (-ResourceMask);
 | |
|     Tracker.getResourceUsers(Current, Users);
 | |
|     for (const std::pair<unsigned, unsigned> &U : Users)
 | |
|       addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
 | |
|     Users.clear();
 | |
|     ResourceMask ^= Current;
 | |
|   }
 | |
| 
 | |
|   const CriticalDependency &RegDep = IS.getCriticalRegDep();
 | |
|   if (RegDep.Cycles) {
 | |
|     Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
 | |
|     unsigned From = RegDep.IID % Source.size();
 | |
|     addRegisterDep(From, To, RegDep.RegID, Cycles);
 | |
|   }
 | |
| 
 | |
|   const CriticalDependency &MemDep = IS.getCriticalMemDep();
 | |
|   if (MemDep.Cycles) {
 | |
|     Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
 | |
|     unsigned From = MemDep.IID % Source.size();
 | |
|     addMemoryDep(From, To, Cycles);
 | |
|   }
 | |
| 
 | |
|   Tracker.handleInstructionIssuedEvent(
 | |
|       static_cast<const HWInstructionIssuedEvent &>(Event));
 | |
| 
 | |
|   // Check if this is the last simulated instruction.
 | |
|   if (IID == ((Iterations * Source.size()) - 1))
 | |
|     DG.finalizeGraph(Iterations);
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
 | |
|   assert(Event.Reason != HWPressureEvent::INVALID &&
 | |
|          "Unexpected invalid event!");
 | |
| 
 | |
|   Tracker.handlePressureEvent(Event);
 | |
| 
 | |
|   switch (Event.Reason) {
 | |
|   default:
 | |
|     break;
 | |
| 
 | |
|   case HWPressureEvent::RESOURCES:
 | |
|     PressureIncreasedBecauseOfResources = true;
 | |
|     break;
 | |
|   case HWPressureEvent::REGISTER_DEPS:
 | |
|     PressureIncreasedBecauseOfRegisterDependencies = true;
 | |
|     break;
 | |
|   case HWPressureEvent::MEMORY_DEPS:
 | |
|     PressureIncreasedBecauseOfMemoryDependencies = true;
 | |
|     break;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::onCycleEnd() {
 | |
|   ++TotalCycles;
 | |
| 
 | |
|   bool PressureIncreasedBecauseOfDataDependencies =
 | |
|       PressureIncreasedBecauseOfRegisterDependencies ||
 | |
|       PressureIncreasedBecauseOfMemoryDependencies;
 | |
|   if (!PressureIncreasedBecauseOfResources &&
 | |
|       !PressureIncreasedBecauseOfDataDependencies)
 | |
|     return;
 | |
| 
 | |
|   ++BPI.PressureIncreaseCycles;
 | |
|   if (PressureIncreasedBecauseOfRegisterDependencies)
 | |
|     ++BPI.RegisterDependencyCycles;
 | |
|   if (PressureIncreasedBecauseOfMemoryDependencies)
 | |
|     ++BPI.MemoryDependencyCycles;
 | |
|   if (PressureIncreasedBecauseOfDataDependencies)
 | |
|     ++BPI.DataDependencyCycles;
 | |
|   if (PressureIncreasedBecauseOfResources)
 | |
|     ++BPI.ResourcePressureCycles;
 | |
|   PressureIncreasedBecauseOfResources = false;
 | |
|   PressureIncreasedBecauseOfRegisterDependencies = false;
 | |
|   PressureIncreasedBecauseOfMemoryDependencies = false;
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
 | |
|   if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
 | |
|     OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   double PressurePerCycle =
 | |
|       (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
 | |
|   double ResourcePressurePerCycle =
 | |
|       (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
 | |
|   double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
 | |
|   double RegDepPressurePerCycle =
 | |
|       (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
 | |
|   double MemDepPressurePerCycle =
 | |
|       (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
 | |
| 
 | |
|   OS << "\n\nCycles with backend pressure increase [ "
 | |
|      << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
 | |
| 
 | |
|   OS << "\nThroughput Bottlenecks: "
 | |
|      << "\n  Resource Pressure       [ "
 | |
|      << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
 | |
|      << "% ]";
 | |
| 
 | |
|   if (BPI.PressureIncreaseCycles) {
 | |
|     ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
 | |
|     const MCSchedModel &SM = STI.getSchedModel();
 | |
|     for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
 | |
|       unsigned ResourceCycles = Distribution[I];
 | |
|       if (ResourceCycles) {
 | |
|         double Frequency = (double)ResourceCycles * 100 / TotalCycles;
 | |
|         const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
 | |
|         OS << "\n  - " << PRDesc.Name << "  [ "
 | |
|            << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   OS << "\n  Data Dependencies:      [ "
 | |
|      << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
 | |
|   OS << "\n  - Register Dependencies [ "
 | |
|      << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
 | |
|      << "% ]";
 | |
|   OS << "\n  - Memory Dependencies   [ "
 | |
|      << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
 | |
|      << "% ]\n";
 | |
| }
 | |
| 
 | |
| void BottleneckAnalysis::printView(raw_ostream &OS) const {
 | |
|   std::string Buffer;
 | |
|   raw_string_ostream TempStream(Buffer);
 | |
|   printBottleneckHints(TempStream);
 | |
|   TempStream.flush();
 | |
|   OS << Buffer;
 | |
|   printCriticalSequence(OS);
 | |
| }
 | |
| 
 | |
| } // namespace mca.
 | |
| } // namespace llvm
 |