1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19 
20 namespace llvm {
21 namespace mca {
22 
23 #define DEBUG_TYPE "llvm-mca"
24 
PressureTracker(const MCSchedModel & Model)25 PressureTracker::PressureTracker(const MCSchedModel &Model)
26     : SM(Model),
27       ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
28       ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
29       ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
30       ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
31   computeProcResourceMasks(SM, ProcResID2Mask);
32 
33   // Ignore the invalid resource at index zero.
34   unsigned NextResourceUsersIdx = 0;
35   for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
36     const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
37     ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
38     NextResourceUsersIdx += ProcResource.NumUnits;
39     uint64_t ResourceMask = ProcResID2Mask[I];
40     ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
41   }
42 
43   ResourceUsers.resize(NextResourceUsersIdx);
44   std::fill(ResourceUsers.begin(), ResourceUsers.end(),
45             std::make_pair<unsigned, unsigned>(~0U, 0U));
46 }
47 
getResourceUsers(uint64_t ResourceMask,SmallVectorImpl<User> & Users) const48 void PressureTracker::getResourceUsers(uint64_t ResourceMask,
49                                        SmallVectorImpl<User> &Users) const {
50   unsigned Index = getResourceStateIndex(ResourceMask);
51   unsigned ProcResID = ResIdx2ProcResID[Index];
52   const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
53   for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
54     const User U = getResourceUser(ProcResID, I);
55     if (U.second && IPI.find(U.first) != IPI.end())
56       Users.emplace_back(U);
57   }
58 }
59 
onInstructionDispatched(unsigned IID)60 void PressureTracker::onInstructionDispatched(unsigned IID) {
61   IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
62 }
63 
onInstructionExecuted(unsigned IID)64 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
65 
handleInstructionIssuedEvent(const HWInstructionIssuedEvent & Event)66 void PressureTracker::handleInstructionIssuedEvent(
67     const HWInstructionIssuedEvent &Event) {
68   unsigned IID = Event.IR.getSourceIndex();
69   using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
70   using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
71   for (const ResourceUse &Use : Event.UsedResources) {
72     const ResourceRef &RR = Use.first;
73     unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
74     Index += countTrailingZeros(RR.second);
75     ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
76   }
77 }
78 
updateResourcePressureDistribution(uint64_t CumulativeMask)79 void PressureTracker::updateResourcePressureDistribution(
80     uint64_t CumulativeMask) {
81   while (CumulativeMask) {
82     uint64_t Current = CumulativeMask & (-CumulativeMask);
83     unsigned ResIdx = getResourceStateIndex(Current);
84     unsigned ProcResID = ResIdx2ProcResID[ResIdx];
85     uint64_t Mask = ProcResID2Mask[ProcResID];
86 
87     if (Mask == Current) {
88       ResourcePressureDistribution[ProcResID]++;
89       CumulativeMask ^= Current;
90       continue;
91     }
92 
93     Mask ^= Current;
94     while (Mask) {
95       uint64_t SubUnit = Mask & (-Mask);
96       ResIdx = getResourceStateIndex(SubUnit);
97       ProcResID = ResIdx2ProcResID[ResIdx];
98       ResourcePressureDistribution[ProcResID]++;
99       Mask ^= SubUnit;
100     }
101 
102     CumulativeMask ^= Current;
103   }
104 }
105 
handlePressureEvent(const HWPressureEvent & Event)106 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
107   assert(Event.Reason != HWPressureEvent::INVALID &&
108          "Unexpected invalid event!");
109 
110   switch (Event.Reason) {
111   default:
112     break;
113 
114   case HWPressureEvent::RESOURCES: {
115     const uint64_t ResourceMask = Event.ResourceMask;
116     updateResourcePressureDistribution(Event.ResourceMask);
117 
118     for (const InstRef &IR : Event.AffectedInstructions) {
119       const Instruction &IS = *IR.getInstruction();
120       unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
121       if (!BusyResources)
122         continue;
123 
124       unsigned IID = IR.getSourceIndex();
125       IPI[IID].ResourcePressureCycles++;
126     }
127     break;
128   }
129 
130   case HWPressureEvent::REGISTER_DEPS:
131     for (const InstRef &IR : Event.AffectedInstructions) {
132       unsigned IID = IR.getSourceIndex();
133       IPI[IID].RegisterPressureCycles++;
134     }
135     break;
136 
137   case HWPressureEvent::MEMORY_DEPS:
138     for (const InstRef &IR : Event.AffectedInstructions) {
139       unsigned IID = IR.getSourceIndex();
140       IPI[IID].MemoryPressureCycles++;
141     }
142   }
143 }
144 
145 #ifndef NDEBUG
dumpDependencyEdge(raw_ostream & OS,const DependencyEdge & DepEdge,MCInstPrinter & MCIP) const146 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
147                                          const DependencyEdge &DepEdge,
148                                          MCInstPrinter &MCIP) const {
149   unsigned FromIID = DepEdge.FromIID;
150   unsigned ToIID = DepEdge.ToIID;
151   assert(FromIID < ToIID && "Graph should be acyclic!");
152 
153   const DependencyEdge::Dependency &DE = DepEdge.Dep;
154   assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
155 
156   OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
157   if (DE.Type == DependencyEdge::DT_REGISTER) {
158     OS << " - REGISTER: ";
159     MCIP.printRegName(OS, DE.ResourceOrRegID);
160   } else if (DE.Type == DependencyEdge::DT_MEMORY) {
161     OS << " - MEMORY";
162   } else {
163     assert(DE.Type == DependencyEdge::DT_RESOURCE &&
164            "Unsupported dependency type!");
165     OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
166   }
167   OS << " - COST: " << DE.Cost << '\n';
168 }
169 #endif // NDEBUG
170 
pruneEdges(unsigned Iterations)171 void DependencyGraph::pruneEdges(unsigned Iterations) {
172   for (DGNode &N : Nodes) {
173     unsigned NumPruned = 0;
174     const unsigned Size = N.OutgoingEdges.size();
175     // Use a cut-off threshold to prune edges with a low frequency.
176     for (unsigned I = 0, E = Size; I < E; ++I) {
177       DependencyEdge &Edge = N.OutgoingEdges[I];
178       if (Edge.Frequency == Iterations)
179         continue;
180       double Factor = (double)Edge.Frequency / Iterations;
181       if (0.10 < Factor)
182         continue;
183       Nodes[Edge.ToIID].NumPredecessors--;
184       std::swap(Edge, N.OutgoingEdges[E - 1]);
185       --E;
186       ++NumPruned;
187     }
188 
189     if (NumPruned)
190       N.OutgoingEdges.resize(Size - NumPruned);
191   }
192 }
193 
initializeRootSet(SmallVectorImpl<unsigned> & RootSet) const194 void DependencyGraph::initializeRootSet(
195     SmallVectorImpl<unsigned> &RootSet) const {
196   for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
197     const DGNode &N = Nodes[I];
198     if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
199       RootSet.emplace_back(I);
200   }
201 }
202 
propagateThroughEdges(SmallVectorImpl<unsigned> & RootSet,unsigned Iterations)203 void DependencyGraph::propagateThroughEdges(
204     SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
205   SmallVector<unsigned, 8> ToVisit;
206 
207   // A critical sequence is computed as the longest path from a node of the
208   // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
209   // composed of nodes with at least one successor, and no predecessors.
210   //
211   // Each node of the graph starts with an initial default cost of zero.  The
212   // cost of a node is a measure of criticality: the higher the cost, the bigger
213   // is the performance impact.
214   // For register and memory dependencies, the cost is a function of the write
215   // latency as well as the actual delay (in cycles) caused to users.
216   // For processor resource dependencies, the cost is a function of the resource
217   // pressure. Resource interferences with low frequency values are ignored.
218   //
219   // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
220   // the inner loop selects (i.e. visits) a node N from a set of `unvisited
221   // nodes`, and then propagates the cost of N to all its neighbors.
222   //
223   // The `unvisited nodes` set initially contains all the nodes from the
224   // RootSet.  A node N is added to the `unvisited nodes` if all its
225   // predecessors have been visited already.
226   //
227   // For simplicity, every node tracks the number of unvisited incoming edges in
228   // field `NumVisitedPredecessors`.  When the value of that field drops to
229   // zero, then the corresponding node is added to a `ToVisit` set.
230   //
231   // At the end of every iteration of the outer loop, set `ToVisit` becomes our
232   // new `unvisited nodes` set.
233   //
234   // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
235   // is empty. This algorithm works under the assumption that the graph is
236   // acyclic.
237   do {
238     for (unsigned IID : RootSet) {
239       const DGNode &N = Nodes[IID];
240       for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
241         unsigned ToIID = DepEdge.ToIID;
242         DGNode &To = Nodes[ToIID];
243         uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
244         // Check if this is the most expensive incoming edge seen so far.  In
245         // case, update the total cost of the destination node (ToIID), as well
246         // its field `CriticalPredecessor`.
247         if (Cost > To.Cost) {
248           To.CriticalPredecessor = DepEdge;
249           To.Cost = Cost;
250           To.Depth = N.Depth + 1;
251         }
252         To.NumVisitedPredecessors++;
253         if (To.NumVisitedPredecessors == To.NumPredecessors)
254           ToVisit.emplace_back(ToIID);
255       }
256     }
257 
258     std::swap(RootSet, ToVisit);
259     ToVisit.clear();
260   } while (!RootSet.empty());
261 }
262 
getCriticalSequence(SmallVectorImpl<const DependencyEdge * > & Seq) const263 void DependencyGraph::getCriticalSequence(
264     SmallVectorImpl<const DependencyEdge *> &Seq) const {
265   // At this stage, nodes of the graph have been already visited, and costs have
266   // been propagated through the edges (see method `propagateThroughEdges()`).
267 
268   // Identify the node N with the highest cost in the graph. By construction,
269   // that node is the last instruction of our critical sequence.
270   // Field N.Depth would tell us the total length of the sequence.
271   //
272   // To obtain the sequence of critical edges, we simply follow the chain of critical
273   // predecessors starting from node N (field DGNode::CriticalPredecessor).
274   const auto It = std::max_element(
275       Nodes.begin(), Nodes.end(),
276       [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
277   unsigned IID = std::distance(Nodes.begin(), It);
278   Seq.resize(Nodes[IID].Depth);
279   for (unsigned I = Seq.size(), E = 0; I > E; --I) {
280     const DGNode &N = Nodes[IID];
281     Seq[I - 1] = &N.CriticalPredecessor;
282     IID = N.CriticalPredecessor.FromIID;
283   }
284 }
285 
printInstruction(formatted_raw_ostream & FOS,const MCInst & MCI,bool UseDifferentColor) const286 void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
287                                           const MCInst &MCI,
288                                           bool UseDifferentColor) const {
289   FOS.PadToColumn(14);
290 
291   if (UseDifferentColor)
292     FOS.changeColor(raw_ostream::CYAN, true, false);
293   FOS << printInstructionString(MCI);
294   if (UseDifferentColor)
295     FOS.resetColor();
296 }
297 
printCriticalSequence(raw_ostream & OS) const298 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
299   // Early exit if no bottlenecks were found during the simulation.
300   if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
301     return;
302 
303   SmallVector<const DependencyEdge *, 16> Seq;
304   DG.getCriticalSequence(Seq);
305   if (Seq.empty())
306     return;
307 
308   OS << "\nCritical sequence based on the simulation:\n\n";
309 
310   const DependencyEdge &FirstEdge = *Seq[0];
311   ArrayRef<llvm::MCInst> Source = getSource();
312   unsigned FromIID = FirstEdge.FromIID % Source.size();
313   unsigned ToIID = FirstEdge.ToIID % Source.size();
314   bool IsLoopCarried = FromIID >= ToIID;
315 
316   formatted_raw_ostream FOS(OS);
317   FOS.PadToColumn(14);
318   FOS << "Instruction";
319   FOS.PadToColumn(58);
320   FOS << "Dependency Information";
321 
322   bool HasColors = FOS.has_colors();
323 
324   unsigned CurrentIID = 0;
325   if (IsLoopCarried) {
326     FOS << "\n +----< " << FromIID << ".";
327     printInstruction(FOS, Source[FromIID], HasColors);
328     FOS << "\n |\n |    < loop carried > \n |";
329   } else {
330     while (CurrentIID < FromIID) {
331       FOS << "\n        " << CurrentIID << ".";
332       printInstruction(FOS, Source[CurrentIID]);
333       CurrentIID++;
334     }
335 
336     FOS << "\n +----< " << CurrentIID << ".";
337     printInstruction(FOS, Source[CurrentIID], HasColors);
338     CurrentIID++;
339   }
340 
341   for (const DependencyEdge *&DE : Seq) {
342     ToIID = DE->ToIID % Source.size();
343     unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
344 
345     while (CurrentIID < LastIID) {
346       FOS << "\n |      " << CurrentIID << ".";
347       printInstruction(FOS, Source[CurrentIID]);
348       CurrentIID++;
349     }
350 
351     if (CurrentIID == ToIID) {
352       FOS << "\n +----> " << ToIID << ".";
353       printInstruction(FOS, Source[CurrentIID], HasColors);
354     } else {
355       FOS << "\n |\n |    < loop carried > \n |"
356           << "\n +----> " << ToIID << ".";
357       printInstruction(FOS, Source[ToIID], HasColors);
358     }
359     FOS.PadToColumn(58);
360 
361     const DependencyEdge::Dependency &Dep = DE->Dep;
362     if (HasColors)
363       FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
364 
365     if (Dep.Type == DependencyEdge::DT_REGISTER) {
366       FOS << "## REGISTER dependency:  ";
367       if (HasColors)
368         FOS.changeColor(raw_ostream::MAGENTA, true, false);
369       getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
370     } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
371       FOS << "## MEMORY dependency.";
372     } else {
373       assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
374              "Unsupported dependency type!");
375       FOS << "## RESOURCE interference:  ";
376       if (HasColors)
377         FOS.changeColor(raw_ostream::MAGENTA, true, false);
378       FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
379       if (HasColors) {
380         FOS.resetColor();
381         FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
382       }
383       FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
384           << "% ]";
385     }
386     if (HasColors)
387       FOS.resetColor();
388     ++CurrentIID;
389   }
390 
391   while (CurrentIID < Source.size()) {
392     FOS << "\n        " << CurrentIID << ".";
393     printInstruction(FOS, Source[CurrentIID]);
394     CurrentIID++;
395   }
396 
397   FOS << '\n';
398   FOS.flush();
399 }
400 
401 #ifndef NDEBUG
dump(raw_ostream & OS,MCInstPrinter & MCIP) const402 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
403   OS << "\nREG DEPS\n";
404   for (const DGNode &Node : Nodes)
405     for (const DependencyEdge &DE : Node.OutgoingEdges)
406       if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
407         dumpDependencyEdge(OS, DE, MCIP);
408 
409   OS << "\nMEM DEPS\n";
410   for (const DGNode &Node : Nodes)
411     for (const DependencyEdge &DE : Node.OutgoingEdges)
412       if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
413         dumpDependencyEdge(OS, DE, MCIP);
414 
415   OS << "\nRESOURCE DEPS\n";
416   for (const DGNode &Node : Nodes)
417     for (const DependencyEdge &DE : Node.OutgoingEdges)
418       if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
419         dumpDependencyEdge(OS, DE, MCIP);
420 }
421 #endif // NDEBUG
422 
addDependency(unsigned From,unsigned To,DependencyEdge::Dependency && Dep)423 void DependencyGraph::addDependency(unsigned From, unsigned To,
424                                     DependencyEdge::Dependency &&Dep) {
425   DGNode &NodeFrom = Nodes[From];
426   DGNode &NodeTo = Nodes[To];
427   SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
428 
429   auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
430     return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
431   });
432 
433   if (It != Vec.end()) {
434     It->Dep.Cost += Dep.Cost;
435     It->Frequency++;
436     return;
437   }
438 
439   DependencyEdge DE = {Dep, From, To, 1};
440   Vec.emplace_back(DE);
441   NodeTo.NumPredecessors++;
442 }
443 
BottleneckAnalysis(const MCSubtargetInfo & sti,MCInstPrinter & Printer,ArrayRef<MCInst> S,unsigned NumIter)444 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
445                                        MCInstPrinter &Printer,
446                                        ArrayRef<MCInst> S, unsigned NumIter)
447     : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
448       DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
449       PressureIncreasedBecauseOfResources(false),
450       PressureIncreasedBecauseOfRegisterDependencies(false),
451       PressureIncreasedBecauseOfMemoryDependencies(false),
452       SeenStallCycles(false), BPI() {}
453 
addRegisterDep(unsigned From,unsigned To,unsigned RegID,unsigned Cost)454 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
455                                         unsigned RegID, unsigned Cost) {
456   bool IsLoopCarried = From >= To;
457   unsigned SourceSize = getSource().size();
458   if (IsLoopCarried) {
459     DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
460     DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
461     return;
462   }
463   DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
464 }
465 
addMemoryDep(unsigned From,unsigned To,unsigned Cost)466 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
467                                       unsigned Cost) {
468   bool IsLoopCarried = From >= To;
469   unsigned SourceSize = getSource().size();
470   if (IsLoopCarried) {
471     DG.addMemoryDep(From, To + SourceSize, Cost);
472     DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
473     return;
474   }
475   DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
476 }
477 
addResourceDep(unsigned From,unsigned To,uint64_t Mask,unsigned Cost)478 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
479                                         uint64_t Mask, unsigned Cost) {
480   bool IsLoopCarried = From >= To;
481   unsigned SourceSize = getSource().size();
482   if (IsLoopCarried) {
483     DG.addResourceDep(From, To + SourceSize, Mask, Cost);
484     DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
485     return;
486   }
487   DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
488 }
489 
onEvent(const HWInstructionEvent & Event)490 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
491   const unsigned IID = Event.IR.getSourceIndex();
492   if (Event.Type == HWInstructionEvent::Dispatched) {
493     Tracker.onInstructionDispatched(IID);
494     return;
495   }
496   if (Event.Type == HWInstructionEvent::Executed) {
497     Tracker.onInstructionExecuted(IID);
498     return;
499   }
500 
501   if (Event.Type != HWInstructionEvent::Issued)
502     return;
503 
504   ArrayRef<llvm::MCInst> Source = getSource();
505   const Instruction &IS = *Event.IR.getInstruction();
506   unsigned To = IID % Source.size();
507 
508   unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
509   uint64_t ResourceMask = IS.getCriticalResourceMask();
510   SmallVector<std::pair<unsigned, unsigned>, 4> Users;
511   while (ResourceMask) {
512     uint64_t Current = ResourceMask & (-ResourceMask);
513     Tracker.getResourceUsers(Current, Users);
514     for (const std::pair<unsigned, unsigned> &U : Users)
515       addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
516     Users.clear();
517     ResourceMask ^= Current;
518   }
519 
520   const CriticalDependency &RegDep = IS.getCriticalRegDep();
521   if (RegDep.Cycles) {
522     Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
523     unsigned From = RegDep.IID % Source.size();
524     addRegisterDep(From, To, RegDep.RegID, Cycles);
525   }
526 
527   const CriticalDependency &MemDep = IS.getCriticalMemDep();
528   if (MemDep.Cycles) {
529     Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
530     unsigned From = MemDep.IID % Source.size();
531     addMemoryDep(From, To, Cycles);
532   }
533 
534   Tracker.handleInstructionIssuedEvent(
535       static_cast<const HWInstructionIssuedEvent &>(Event));
536 
537   // Check if this is the last simulated instruction.
538   if (IID == ((Iterations * Source.size()) - 1))
539     DG.finalizeGraph(Iterations);
540 }
541 
onEvent(const HWPressureEvent & Event)542 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
543   assert(Event.Reason != HWPressureEvent::INVALID &&
544          "Unexpected invalid event!");
545 
546   Tracker.handlePressureEvent(Event);
547 
548   switch (Event.Reason) {
549   default:
550     break;
551 
552   case HWPressureEvent::RESOURCES:
553     PressureIncreasedBecauseOfResources = true;
554     break;
555   case HWPressureEvent::REGISTER_DEPS:
556     PressureIncreasedBecauseOfRegisterDependencies = true;
557     break;
558   case HWPressureEvent::MEMORY_DEPS:
559     PressureIncreasedBecauseOfMemoryDependencies = true;
560     break;
561   }
562 }
563 
onCycleEnd()564 void BottleneckAnalysis::onCycleEnd() {
565   ++TotalCycles;
566 
567   bool PressureIncreasedBecauseOfDataDependencies =
568       PressureIncreasedBecauseOfRegisterDependencies ||
569       PressureIncreasedBecauseOfMemoryDependencies;
570   if (!PressureIncreasedBecauseOfResources &&
571       !PressureIncreasedBecauseOfDataDependencies)
572     return;
573 
574   ++BPI.PressureIncreaseCycles;
575   if (PressureIncreasedBecauseOfRegisterDependencies)
576     ++BPI.RegisterDependencyCycles;
577   if (PressureIncreasedBecauseOfMemoryDependencies)
578     ++BPI.MemoryDependencyCycles;
579   if (PressureIncreasedBecauseOfDataDependencies)
580     ++BPI.DataDependencyCycles;
581   if (PressureIncreasedBecauseOfResources)
582     ++BPI.ResourcePressureCycles;
583   PressureIncreasedBecauseOfResources = false;
584   PressureIncreasedBecauseOfRegisterDependencies = false;
585   PressureIncreasedBecauseOfMemoryDependencies = false;
586 }
587 
printBottleneckHints(raw_ostream & OS) const588 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
589   if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
590     OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
591     return;
592   }
593 
594   double PressurePerCycle =
595       (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
596   double ResourcePressurePerCycle =
597       (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
598   double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
599   double RegDepPressurePerCycle =
600       (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
601   double MemDepPressurePerCycle =
602       (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
603 
604   OS << "\n\nCycles with backend pressure increase [ "
605      << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
606 
607   OS << "\nThroughput Bottlenecks: "
608      << "\n  Resource Pressure       [ "
609      << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
610      << "% ]";
611 
612   if (BPI.PressureIncreaseCycles) {
613     ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
614     const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
615     for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
616       unsigned ResourceCycles = Distribution[I];
617       if (ResourceCycles) {
618         double Frequency = (double)ResourceCycles * 100 / TotalCycles;
619         const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
620         OS << "\n  - " << PRDesc.Name << "  [ "
621            << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
622       }
623     }
624   }
625 
626   OS << "\n  Data Dependencies:      [ "
627      << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
628   OS << "\n  - Register Dependencies [ "
629      << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
630      << "% ]";
631   OS << "\n  - Memory Dependencies   [ "
632      << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
633      << "% ]\n";
634 }
635 
printView(raw_ostream & OS) const636 void BottleneckAnalysis::printView(raw_ostream &OS) const {
637   std::string Buffer;
638   raw_string_ostream TempStream(Buffer);
639   printBottleneckHints(TempStream);
640   TempStream.flush();
641   OS << Buffer;
642   printCriticalSequence(OS);
643 }
644 
645 } // namespace mca.
646 } // namespace llvm
647