1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19 
20 namespace llvm {
21 namespace mca {
22 
23 #define DEBUG_TYPE "llvm-mca"
24 
PressureTracker(const MCSchedModel & Model)25 PressureTracker::PressureTracker(const MCSchedModel &Model)
26     : SM(Model),
27       ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
28       ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
29       ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
30       ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
31   computeProcResourceMasks(SM, ProcResID2Mask);
32 
33   // Ignore the invalid resource at index zero.
34   unsigned NextResourceUsersIdx = 0;
35   for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
36     const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
37     ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
38     NextResourceUsersIdx += ProcResource.NumUnits;
39     uint64_t ResourceMask = ProcResID2Mask[I];
40     ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
41   }
42 
43   ResourceUsers.resize(NextResourceUsersIdx);
44   std::fill(ResourceUsers.begin(), ResourceUsers.end(),
45             std::make_pair<unsigned, unsigned>(~0U, 0U));
46 }
47 
getResourceUsers(uint64_t ResourceMask,SmallVectorImpl<User> & Users) const48 void PressureTracker::getResourceUsers(uint64_t ResourceMask,
49                                        SmallVectorImpl<User> &Users) const {
50   unsigned Index = getResourceStateIndex(ResourceMask);
51   unsigned ProcResID = ResIdx2ProcResID[Index];
52   const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
53   for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
54     const User U = getResourceUser(ProcResID, I);
55     if (U.second && IPI.contains(U.first))
56       Users.emplace_back(U);
57   }
58 }
59 
onInstructionDispatched(unsigned IID)60 void PressureTracker::onInstructionDispatched(unsigned IID) {
61   IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
62 }
63 
onInstructionExecuted(unsigned IID)64 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
65 
handleInstructionIssuedEvent(const HWInstructionIssuedEvent & Event)66 void PressureTracker::handleInstructionIssuedEvent(
67     const HWInstructionIssuedEvent &Event) {
68   unsigned IID = Event.IR.getSourceIndex();
69   for (const ResourceUse &Use : Event.UsedResources) {
70     const ResourceRef &RR = Use.first;
71     unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
72     Index += llvm::countr_zero(RR.second);
73     ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
74   }
75 }
76 
updateResourcePressureDistribution(uint64_t CumulativeMask)77 void PressureTracker::updateResourcePressureDistribution(
78     uint64_t CumulativeMask) {
79   while (CumulativeMask) {
80     uint64_t Current = CumulativeMask & (-CumulativeMask);
81     unsigned ResIdx = getResourceStateIndex(Current);
82     unsigned ProcResID = ResIdx2ProcResID[ResIdx];
83     uint64_t Mask = ProcResID2Mask[ProcResID];
84 
85     if (Mask == Current) {
86       ResourcePressureDistribution[ProcResID]++;
87       CumulativeMask ^= Current;
88       continue;
89     }
90 
91     Mask ^= Current;
92     while (Mask) {
93       uint64_t SubUnit = Mask & (-Mask);
94       ResIdx = getResourceStateIndex(SubUnit);
95       ProcResID = ResIdx2ProcResID[ResIdx];
96       ResourcePressureDistribution[ProcResID]++;
97       Mask ^= SubUnit;
98     }
99 
100     CumulativeMask ^= Current;
101   }
102 }
103 
handlePressureEvent(const HWPressureEvent & Event)104 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
105   assert(Event.Reason != HWPressureEvent::INVALID &&
106          "Unexpected invalid event!");
107 
108   switch (Event.Reason) {
109   default:
110     break;
111 
112   case HWPressureEvent::RESOURCES: {
113     const uint64_t ResourceMask = Event.ResourceMask;
114     updateResourcePressureDistribution(Event.ResourceMask);
115 
116     for (const InstRef &IR : Event.AffectedInstructions) {
117       const Instruction &IS = *IR.getInstruction();
118       unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
119       if (!BusyResources)
120         continue;
121 
122       unsigned IID = IR.getSourceIndex();
123       IPI[IID].ResourcePressureCycles++;
124     }
125     break;
126   }
127 
128   case HWPressureEvent::REGISTER_DEPS:
129     for (const InstRef &IR : Event.AffectedInstructions) {
130       unsigned IID = IR.getSourceIndex();
131       IPI[IID].RegisterPressureCycles++;
132     }
133     break;
134 
135   case HWPressureEvent::MEMORY_DEPS:
136     for (const InstRef &IR : Event.AffectedInstructions) {
137       unsigned IID = IR.getSourceIndex();
138       IPI[IID].MemoryPressureCycles++;
139     }
140   }
141 }
142 
143 #ifndef NDEBUG
dumpDependencyEdge(raw_ostream & OS,const DependencyEdge & DepEdge,MCInstPrinter & MCIP) const144 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
145                                          const DependencyEdge &DepEdge,
146                                          MCInstPrinter &MCIP) const {
147   unsigned FromIID = DepEdge.FromIID;
148   unsigned ToIID = DepEdge.ToIID;
149   assert(FromIID < ToIID && "Graph should be acyclic!");
150 
151   const DependencyEdge::Dependency &DE = DepEdge.Dep;
152   assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
153 
154   OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
155   if (DE.Type == DependencyEdge::DT_REGISTER) {
156     OS << " - REGISTER: ";
157     MCIP.printRegName(OS, DE.ResourceOrRegID);
158   } else if (DE.Type == DependencyEdge::DT_MEMORY) {
159     OS << " - MEMORY";
160   } else {
161     assert(DE.Type == DependencyEdge::DT_RESOURCE &&
162            "Unsupported dependency type!");
163     OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
164   }
165   OS << " - COST: " << DE.Cost << '\n';
166 }
167 #endif // NDEBUG
168 
pruneEdges(unsigned Iterations)169 void DependencyGraph::pruneEdges(unsigned Iterations) {
170   for (DGNode &N : Nodes) {
171     unsigned NumPruned = 0;
172     const unsigned Size = N.OutgoingEdges.size();
173     // Use a cut-off threshold to prune edges with a low frequency.
174     for (unsigned I = 0, E = Size; I < E; ++I) {
175       DependencyEdge &Edge = N.OutgoingEdges[I];
176       if (Edge.Frequency == Iterations)
177         continue;
178       double Factor = (double)Edge.Frequency / Iterations;
179       if (0.10 < Factor)
180         continue;
181       Nodes[Edge.ToIID].NumPredecessors--;
182       std::swap(Edge, N.OutgoingEdges[E - 1]);
183       --E;
184       ++NumPruned;
185     }
186 
187     if (NumPruned)
188       N.OutgoingEdges.resize(Size - NumPruned);
189   }
190 }
191 
initializeRootSet(SmallVectorImpl<unsigned> & RootSet) const192 void DependencyGraph::initializeRootSet(
193     SmallVectorImpl<unsigned> &RootSet) const {
194   for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
195     const DGNode &N = Nodes[I];
196     if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
197       RootSet.emplace_back(I);
198   }
199 }
200 
propagateThroughEdges(SmallVectorImpl<unsigned> & RootSet,unsigned Iterations)201 void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
202                                             unsigned Iterations) {
203   SmallVector<unsigned, 8> ToVisit;
204 
205   // A critical sequence is computed as the longest path from a node of the
206   // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
207   // composed of nodes with at least one successor, and no predecessors.
208   //
209   // Each node of the graph starts with an initial default cost of zero.  The
210   // cost of a node is a measure of criticality: the higher the cost, the bigger
211   // is the performance impact.
212   // For register and memory dependencies, the cost is a function of the write
213   // latency as well as the actual delay (in cycles) caused to users.
214   // For processor resource dependencies, the cost is a function of the resource
215   // pressure. Resource interferences with low frequency values are ignored.
216   //
217   // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
218   // the inner loop selects (i.e. visits) a node N from a set of `unvisited
219   // nodes`, and then propagates the cost of N to all its neighbors.
220   //
221   // The `unvisited nodes` set initially contains all the nodes from the
222   // RootSet.  A node N is added to the `unvisited nodes` if all its
223   // predecessors have been visited already.
224   //
225   // For simplicity, every node tracks the number of unvisited incoming edges in
226   // field `NumVisitedPredecessors`.  When the value of that field drops to
227   // zero, then the corresponding node is added to a `ToVisit` set.
228   //
229   // At the end of every iteration of the outer loop, set `ToVisit` becomes our
230   // new `unvisited nodes` set.
231   //
232   // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
233   // is empty. This algorithm works under the assumption that the graph is
234   // acyclic.
235   do {
236     for (unsigned IID : RootSet) {
237       const DGNode &N = Nodes[IID];
238       for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
239         unsigned ToIID = DepEdge.ToIID;
240         DGNode &To = Nodes[ToIID];
241         uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
242         // Check if this is the most expensive incoming edge seen so far.  In
243         // case, update the total cost of the destination node (ToIID), as well
244         // its field `CriticalPredecessor`.
245         if (Cost > To.Cost) {
246           To.CriticalPredecessor = DepEdge;
247           To.Cost = Cost;
248           To.Depth = N.Depth + 1;
249         }
250         To.NumVisitedPredecessors++;
251         if (To.NumVisitedPredecessors == To.NumPredecessors)
252           ToVisit.emplace_back(ToIID);
253       }
254     }
255 
256     std::swap(RootSet, ToVisit);
257     ToVisit.clear();
258   } while (!RootSet.empty());
259 }
260 
getCriticalSequence(SmallVectorImpl<const DependencyEdge * > & Seq) const261 void DependencyGraph::getCriticalSequence(
262     SmallVectorImpl<const DependencyEdge *> &Seq) const {
263   // At this stage, nodes of the graph have been already visited, and costs have
264   // been propagated through the edges (see method `propagateThroughEdges()`).
265 
266   // Identify the node N with the highest cost in the graph. By construction,
267   // that node is the last instruction of our critical sequence.
268   // Field N.Depth would tell us the total length of the sequence.
269   //
270   // To obtain the sequence of critical edges, we simply follow the chain of
271   // critical predecessors starting from node N (field
272   // DGNode::CriticalPredecessor).
273   const auto It = std::max_element(
274       Nodes.begin(), Nodes.end(),
275       [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
276   unsigned IID = std::distance(Nodes.begin(), It);
277   Seq.resize(Nodes[IID].Depth);
278   for (const DependencyEdge *&DE : llvm::reverse(Seq)) {
279     const DGNode &N = Nodes[IID];
280     DE = &N.CriticalPredecessor;
281     IID = N.CriticalPredecessor.FromIID;
282   }
283 }
284 
printInstruction(formatted_raw_ostream & FOS,const MCInst & MCI,bool UseDifferentColor) const285 void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
286                                           const MCInst &MCI,
287                                           bool UseDifferentColor) const {
288   FOS.PadToColumn(14);
289   if (UseDifferentColor)
290     FOS.changeColor(raw_ostream::CYAN, true, false);
291   FOS << printInstructionString(MCI);
292   if (UseDifferentColor)
293     FOS.resetColor();
294 }
295 
printCriticalSequence(raw_ostream & OS) const296 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
297   // Early exit if no bottlenecks were found during the simulation.
298   if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
299     return;
300 
301   SmallVector<const DependencyEdge *, 16> Seq;
302   DG.getCriticalSequence(Seq);
303   if (Seq.empty())
304     return;
305 
306   OS << "\nCritical sequence based on the simulation:\n\n";
307 
308   const DependencyEdge &FirstEdge = *Seq[0];
309   ArrayRef<llvm::MCInst> Source = getSource();
310   unsigned FromIID = FirstEdge.FromIID % Source.size();
311   unsigned ToIID = FirstEdge.ToIID % Source.size();
312   bool IsLoopCarried = FromIID >= ToIID;
313 
314   formatted_raw_ostream FOS(OS);
315   FOS.PadToColumn(14);
316   FOS << "Instruction";
317   FOS.PadToColumn(58);
318   FOS << "Dependency Information";
319 
320   bool HasColors = FOS.has_colors();
321 
322   unsigned CurrentIID = 0;
323   if (IsLoopCarried) {
324     FOS << "\n +----< " << FromIID << ".";
325     printInstruction(FOS, Source[FromIID], HasColors);
326     FOS << "\n |\n |    < loop carried > \n |";
327   } else {
328     while (CurrentIID < FromIID) {
329       FOS << "\n        " << CurrentIID << ".";
330       printInstruction(FOS, Source[CurrentIID]);
331       CurrentIID++;
332     }
333 
334     FOS << "\n +----< " << CurrentIID << ".";
335     printInstruction(FOS, Source[CurrentIID], HasColors);
336     CurrentIID++;
337   }
338 
339   for (const DependencyEdge *&DE : Seq) {
340     ToIID = DE->ToIID % Source.size();
341     unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
342 
343     while (CurrentIID < LastIID) {
344       FOS << "\n |      " << CurrentIID << ".";
345       printInstruction(FOS, Source[CurrentIID]);
346       CurrentIID++;
347     }
348 
349     if (CurrentIID == ToIID) {
350       FOS << "\n +----> " << ToIID << ".";
351       printInstruction(FOS, Source[CurrentIID], HasColors);
352     } else {
353       FOS << "\n |\n |    < loop carried > \n |"
354           << "\n +----> " << ToIID << ".";
355       printInstruction(FOS, Source[ToIID], HasColors);
356     }
357     FOS.PadToColumn(58);
358 
359     const DependencyEdge::Dependency &Dep = DE->Dep;
360     if (HasColors)
361       FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
362 
363     if (Dep.Type == DependencyEdge::DT_REGISTER) {
364       FOS << "## REGISTER dependency:  ";
365       if (HasColors)
366         FOS.changeColor(raw_ostream::MAGENTA, true, false);
367       getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
368     } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
369       FOS << "## MEMORY dependency.";
370     } else {
371       assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
372              "Unsupported dependency type!");
373       FOS << "## RESOURCE interference:  ";
374       if (HasColors)
375         FOS.changeColor(raw_ostream::MAGENTA, true, false);
376       FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
377       if (HasColors) {
378         FOS.resetColor();
379         FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
380       }
381       FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
382           << "% ]";
383     }
384     if (HasColors)
385       FOS.resetColor();
386     ++CurrentIID;
387   }
388 
389   while (CurrentIID < Source.size()) {
390     FOS << "\n        " << CurrentIID << ".";
391     printInstruction(FOS, Source[CurrentIID]);
392     CurrentIID++;
393   }
394 
395   FOS << '\n';
396   FOS.flush();
397 }
398 
399 #ifndef NDEBUG
dump(raw_ostream & OS,MCInstPrinter & MCIP) const400 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
401   OS << "\nREG DEPS\n";
402   for (const DGNode &Node : Nodes)
403     for (const DependencyEdge &DE : Node.OutgoingEdges)
404       if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
405         dumpDependencyEdge(OS, DE, MCIP);
406 
407   OS << "\nMEM DEPS\n";
408   for (const DGNode &Node : Nodes)
409     for (const DependencyEdge &DE : Node.OutgoingEdges)
410       if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
411         dumpDependencyEdge(OS, DE, MCIP);
412 
413   OS << "\nRESOURCE DEPS\n";
414   for (const DGNode &Node : Nodes)
415     for (const DependencyEdge &DE : Node.OutgoingEdges)
416       if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
417         dumpDependencyEdge(OS, DE, MCIP);
418 }
419 #endif // NDEBUG
420 
addDependency(unsigned From,unsigned To,DependencyEdge::Dependency && Dep)421 void DependencyGraph::addDependency(unsigned From, unsigned To,
422                                     DependencyEdge::Dependency &&Dep) {
423   DGNode &NodeFrom = Nodes[From];
424   DGNode &NodeTo = Nodes[To];
425   SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
426 
427   auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
428     return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
429   });
430 
431   if (It != Vec.end()) {
432     It->Dep.Cost += Dep.Cost;
433     It->Frequency++;
434     return;
435   }
436 
437   DependencyEdge DE = {Dep, From, To, 1};
438   Vec.emplace_back(DE);
439   NodeTo.NumPredecessors++;
440 }
441 
BottleneckAnalysis(const MCSubtargetInfo & sti,MCInstPrinter & Printer,ArrayRef<MCInst> S,unsigned NumIter)442 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
443                                        MCInstPrinter &Printer,
444                                        ArrayRef<MCInst> S, unsigned NumIter)
445     : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
446       DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
447       PressureIncreasedBecauseOfResources(false),
448       PressureIncreasedBecauseOfRegisterDependencies(false),
449       PressureIncreasedBecauseOfMemoryDependencies(false),
450       SeenStallCycles(false), BPI() {}
451 
addRegisterDep(unsigned From,unsigned To,unsigned RegID,unsigned Cost)452 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
453                                         unsigned RegID, unsigned Cost) {
454   bool IsLoopCarried = From >= To;
455   unsigned SourceSize = getSource().size();
456   if (IsLoopCarried) {
457     DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
458     DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
459     return;
460   }
461   DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
462 }
463 
addMemoryDep(unsigned From,unsigned To,unsigned Cost)464 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
465                                       unsigned Cost) {
466   bool IsLoopCarried = From >= To;
467   unsigned SourceSize = getSource().size();
468   if (IsLoopCarried) {
469     DG.addMemoryDep(From, To + SourceSize, Cost);
470     DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
471     return;
472   }
473   DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
474 }
475 
addResourceDep(unsigned From,unsigned To,uint64_t Mask,unsigned Cost)476 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
477                                         uint64_t Mask, unsigned Cost) {
478   bool IsLoopCarried = From >= To;
479   unsigned SourceSize = getSource().size();
480   if (IsLoopCarried) {
481     DG.addResourceDep(From, To + SourceSize, Mask, Cost);
482     DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
483     return;
484   }
485   DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
486 }
487 
onEvent(const HWInstructionEvent & Event)488 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
489   const unsigned IID = Event.IR.getSourceIndex();
490   if (Event.Type == HWInstructionEvent::Dispatched) {
491     Tracker.onInstructionDispatched(IID);
492     return;
493   }
494   if (Event.Type == HWInstructionEvent::Executed) {
495     Tracker.onInstructionExecuted(IID);
496     return;
497   }
498 
499   if (Event.Type != HWInstructionEvent::Issued)
500     return;
501 
502   ArrayRef<llvm::MCInst> Source = getSource();
503   const Instruction &IS = *Event.IR.getInstruction();
504   unsigned To = IID % Source.size();
505 
506   unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
507   uint64_t ResourceMask = IS.getCriticalResourceMask();
508   SmallVector<std::pair<unsigned, unsigned>, 4> Users;
509   while (ResourceMask) {
510     uint64_t Current = ResourceMask & (-ResourceMask);
511     Tracker.getResourceUsers(Current, Users);
512     for (const std::pair<unsigned, unsigned> &U : Users)
513       addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
514     Users.clear();
515     ResourceMask ^= Current;
516   }
517 
518   const CriticalDependency &RegDep = IS.getCriticalRegDep();
519   if (RegDep.Cycles) {
520     Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
521     unsigned From = RegDep.IID % Source.size();
522     addRegisterDep(From, To, RegDep.RegID, Cycles);
523   }
524 
525   const CriticalDependency &MemDep = IS.getCriticalMemDep();
526   if (MemDep.Cycles) {
527     Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
528     unsigned From = MemDep.IID % Source.size();
529     addMemoryDep(From, To, Cycles);
530   }
531 
532   Tracker.handleInstructionIssuedEvent(
533       static_cast<const HWInstructionIssuedEvent &>(Event));
534 
535   // Check if this is the last simulated instruction.
536   if (IID == ((Iterations * Source.size()) - 1))
537     DG.finalizeGraph(Iterations);
538 }
539 
onEvent(const HWPressureEvent & Event)540 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
541   assert(Event.Reason != HWPressureEvent::INVALID &&
542          "Unexpected invalid event!");
543 
544   Tracker.handlePressureEvent(Event);
545 
546   switch (Event.Reason) {
547   default:
548     break;
549 
550   case HWPressureEvent::RESOURCES:
551     PressureIncreasedBecauseOfResources = true;
552     break;
553   case HWPressureEvent::REGISTER_DEPS:
554     PressureIncreasedBecauseOfRegisterDependencies = true;
555     break;
556   case HWPressureEvent::MEMORY_DEPS:
557     PressureIncreasedBecauseOfMemoryDependencies = true;
558     break;
559   }
560 }
561 
onCycleEnd()562 void BottleneckAnalysis::onCycleEnd() {
563   ++TotalCycles;
564 
565   bool PressureIncreasedBecauseOfDataDependencies =
566       PressureIncreasedBecauseOfRegisterDependencies ||
567       PressureIncreasedBecauseOfMemoryDependencies;
568   if (!PressureIncreasedBecauseOfResources &&
569       !PressureIncreasedBecauseOfDataDependencies)
570     return;
571 
572   ++BPI.PressureIncreaseCycles;
573   if (PressureIncreasedBecauseOfRegisterDependencies)
574     ++BPI.RegisterDependencyCycles;
575   if (PressureIncreasedBecauseOfMemoryDependencies)
576     ++BPI.MemoryDependencyCycles;
577   if (PressureIncreasedBecauseOfDataDependencies)
578     ++BPI.DataDependencyCycles;
579   if (PressureIncreasedBecauseOfResources)
580     ++BPI.ResourcePressureCycles;
581   PressureIncreasedBecauseOfResources = false;
582   PressureIncreasedBecauseOfRegisterDependencies = false;
583   PressureIncreasedBecauseOfMemoryDependencies = false;
584 }
585 
printBottleneckHints(raw_ostream & OS) const586 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
587   if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
588     OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
589     return;
590   }
591 
592   double PressurePerCycle =
593       (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
594   double ResourcePressurePerCycle =
595       (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
596   double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
597   double RegDepPressurePerCycle =
598       (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
599   double MemDepPressurePerCycle =
600       (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
601 
602   OS << "\n\nCycles with backend pressure increase [ "
603      << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
604 
605   OS << "\nThroughput Bottlenecks: "
606      << "\n  Resource Pressure       [ "
607      << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
608      << "% ]";
609 
610   if (BPI.PressureIncreaseCycles) {
611     ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
612     const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
613     for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
614       unsigned ReleaseAtCycles = Distribution[I];
615       if (ReleaseAtCycles) {
616         double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles;
617         const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
618         OS << "\n  - " << PRDesc.Name << "  [ "
619            << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
620       }
621     }
622   }
623 
624   OS << "\n  Data Dependencies:      [ "
625      << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
626   OS << "\n  - Register Dependencies [ "
627      << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
628      << "% ]";
629   OS << "\n  - Memory Dependencies   [ "
630      << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
631      << "% ]\n";
632 }
633 
printView(raw_ostream & OS) const634 void BottleneckAnalysis::printView(raw_ostream &OS) const {
635   std::string Buffer;
636   raw_string_ostream TempStream(Buffer);
637   printBottleneckHints(TempStream);
638   TempStream.flush();
639   OS << Buffer;
640   printCriticalSequence(OS);
641 }
642 
643 } // namespace mca.
644 } // namespace llvm
645