1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19
20 namespace llvm {
21 namespace mca {
22
23 #define DEBUG_TYPE "llvm-mca"
24
PressureTracker(const MCSchedModel & Model)25 PressureTracker::PressureTracker(const MCSchedModel &Model)
26 : SM(Model),
27 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
28 ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
29 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
30 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
31 computeProcResourceMasks(SM, ProcResID2Mask);
32
33 // Ignore the invalid resource at index zero.
34 unsigned NextResourceUsersIdx = 0;
35 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
36 const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
37 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
38 NextResourceUsersIdx += ProcResource.NumUnits;
39 uint64_t ResourceMask = ProcResID2Mask[I];
40 ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
41 }
42
43 ResourceUsers.resize(NextResourceUsersIdx);
44 std::fill(ResourceUsers.begin(), ResourceUsers.end(),
45 std::make_pair<unsigned, unsigned>(~0U, 0U));
46 }
47
getResourceUsers(uint64_t ResourceMask,SmallVectorImpl<User> & Users) const48 void PressureTracker::getResourceUsers(uint64_t ResourceMask,
49 SmallVectorImpl<User> &Users) const {
50 unsigned Index = getResourceStateIndex(ResourceMask);
51 unsigned ProcResID = ResIdx2ProcResID[Index];
52 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
53 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
54 const User U = getResourceUser(ProcResID, I);
55 if (U.second && IPI.find(U.first) != IPI.end())
56 Users.emplace_back(U);
57 }
58 }
59
onInstructionDispatched(unsigned IID)60 void PressureTracker::onInstructionDispatched(unsigned IID) {
61 IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
62 }
63
onInstructionExecuted(unsigned IID)64 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
65
handleInstructionIssuedEvent(const HWInstructionIssuedEvent & Event)66 void PressureTracker::handleInstructionIssuedEvent(
67 const HWInstructionIssuedEvent &Event) {
68 unsigned IID = Event.IR.getSourceIndex();
69 using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
70 using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
71 for (const ResourceUse &Use : Event.UsedResources) {
72 const ResourceRef &RR = Use.first;
73 unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
74 Index += countTrailingZeros(RR.second);
75 ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
76 }
77 }
78
updateResourcePressureDistribution(uint64_t CumulativeMask)79 void PressureTracker::updateResourcePressureDistribution(
80 uint64_t CumulativeMask) {
81 while (CumulativeMask) {
82 uint64_t Current = CumulativeMask & (-CumulativeMask);
83 unsigned ResIdx = getResourceStateIndex(Current);
84 unsigned ProcResID = ResIdx2ProcResID[ResIdx];
85 uint64_t Mask = ProcResID2Mask[ProcResID];
86
87 if (Mask == Current) {
88 ResourcePressureDistribution[ProcResID]++;
89 CumulativeMask ^= Current;
90 continue;
91 }
92
93 Mask ^= Current;
94 while (Mask) {
95 uint64_t SubUnit = Mask & (-Mask);
96 ResIdx = getResourceStateIndex(SubUnit);
97 ProcResID = ResIdx2ProcResID[ResIdx];
98 ResourcePressureDistribution[ProcResID]++;
99 Mask ^= SubUnit;
100 }
101
102 CumulativeMask ^= Current;
103 }
104 }
105
handlePressureEvent(const HWPressureEvent & Event)106 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
107 assert(Event.Reason != HWPressureEvent::INVALID &&
108 "Unexpected invalid event!");
109
110 switch (Event.Reason) {
111 default:
112 break;
113
114 case HWPressureEvent::RESOURCES: {
115 const uint64_t ResourceMask = Event.ResourceMask;
116 updateResourcePressureDistribution(Event.ResourceMask);
117
118 for (const InstRef &IR : Event.AffectedInstructions) {
119 const Instruction &IS = *IR.getInstruction();
120 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
121 if (!BusyResources)
122 continue;
123
124 unsigned IID = IR.getSourceIndex();
125 IPI[IID].ResourcePressureCycles++;
126 }
127 break;
128 }
129
130 case HWPressureEvent::REGISTER_DEPS:
131 for (const InstRef &IR : Event.AffectedInstructions) {
132 unsigned IID = IR.getSourceIndex();
133 IPI[IID].RegisterPressureCycles++;
134 }
135 break;
136
137 case HWPressureEvent::MEMORY_DEPS:
138 for (const InstRef &IR : Event.AffectedInstructions) {
139 unsigned IID = IR.getSourceIndex();
140 IPI[IID].MemoryPressureCycles++;
141 }
142 }
143 }
144
145 #ifndef NDEBUG
dumpDependencyEdge(raw_ostream & OS,const DependencyEdge & DepEdge,MCInstPrinter & MCIP) const146 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
147 const DependencyEdge &DepEdge,
148 MCInstPrinter &MCIP) const {
149 unsigned FromIID = DepEdge.FromIID;
150 unsigned ToIID = DepEdge.ToIID;
151 assert(FromIID < ToIID && "Graph should be acyclic!");
152
153 const DependencyEdge::Dependency &DE = DepEdge.Dep;
154 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
155
156 OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
157 if (DE.Type == DependencyEdge::DT_REGISTER) {
158 OS << " - REGISTER: ";
159 MCIP.printRegName(OS, DE.ResourceOrRegID);
160 } else if (DE.Type == DependencyEdge::DT_MEMORY) {
161 OS << " - MEMORY";
162 } else {
163 assert(DE.Type == DependencyEdge::DT_RESOURCE &&
164 "Unsupported dependency type!");
165 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
166 }
167 OS << " - COST: " << DE.Cost << '\n';
168 }
169 #endif // NDEBUG
170
pruneEdges(unsigned Iterations)171 void DependencyGraph::pruneEdges(unsigned Iterations) {
172 for (DGNode &N : Nodes) {
173 unsigned NumPruned = 0;
174 const unsigned Size = N.OutgoingEdges.size();
175 // Use a cut-off threshold to prune edges with a low frequency.
176 for (unsigned I = 0, E = Size; I < E; ++I) {
177 DependencyEdge &Edge = N.OutgoingEdges[I];
178 if (Edge.Frequency == Iterations)
179 continue;
180 double Factor = (double)Edge.Frequency / Iterations;
181 if (0.10 < Factor)
182 continue;
183 Nodes[Edge.ToIID].NumPredecessors--;
184 std::swap(Edge, N.OutgoingEdges[E - 1]);
185 --E;
186 ++NumPruned;
187 }
188
189 if (NumPruned)
190 N.OutgoingEdges.resize(Size - NumPruned);
191 }
192 }
193
initializeRootSet(SmallVectorImpl<unsigned> & RootSet) const194 void DependencyGraph::initializeRootSet(
195 SmallVectorImpl<unsigned> &RootSet) const {
196 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
197 const DGNode &N = Nodes[I];
198 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
199 RootSet.emplace_back(I);
200 }
201 }
202
propagateThroughEdges(SmallVectorImpl<unsigned> & RootSet,unsigned Iterations)203 void DependencyGraph::propagateThroughEdges(
204 SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
205 SmallVector<unsigned, 8> ToVisit;
206
207 // A critical sequence is computed as the longest path from a node of the
208 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
209 // composed of nodes with at least one successor, and no predecessors.
210 //
211 // Each node of the graph starts with an initial default cost of zero. The
212 // cost of a node is a measure of criticality: the higher the cost, the bigger
213 // is the performance impact.
214 // For register and memory dependencies, the cost is a function of the write
215 // latency as well as the actual delay (in cycles) caused to users.
216 // For processor resource dependencies, the cost is a function of the resource
217 // pressure. Resource interferences with low frequency values are ignored.
218 //
219 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
220 // the inner loop selects (i.e. visits) a node N from a set of `unvisited
221 // nodes`, and then propagates the cost of N to all its neighbors.
222 //
223 // The `unvisited nodes` set initially contains all the nodes from the
224 // RootSet. A node N is added to the `unvisited nodes` if all its
225 // predecessors have been visited already.
226 //
227 // For simplicity, every node tracks the number of unvisited incoming edges in
228 // field `NumVisitedPredecessors`. When the value of that field drops to
229 // zero, then the corresponding node is added to a `ToVisit` set.
230 //
231 // At the end of every iteration of the outer loop, set `ToVisit` becomes our
232 // new `unvisited nodes` set.
233 //
234 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
235 // is empty. This algorithm works under the assumption that the graph is
236 // acyclic.
237 do {
238 for (unsigned IID : RootSet) {
239 const DGNode &N = Nodes[IID];
240 for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
241 unsigned ToIID = DepEdge.ToIID;
242 DGNode &To = Nodes[ToIID];
243 uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
244 // Check if this is the most expensive incoming edge seen so far. In
245 // case, update the total cost of the destination node (ToIID), as well
246 // its field `CriticalPredecessor`.
247 if (Cost > To.Cost) {
248 To.CriticalPredecessor = DepEdge;
249 To.Cost = Cost;
250 To.Depth = N.Depth + 1;
251 }
252 To.NumVisitedPredecessors++;
253 if (To.NumVisitedPredecessors == To.NumPredecessors)
254 ToVisit.emplace_back(ToIID);
255 }
256 }
257
258 std::swap(RootSet, ToVisit);
259 ToVisit.clear();
260 } while (!RootSet.empty());
261 }
262
getCriticalSequence(SmallVectorImpl<const DependencyEdge * > & Seq) const263 void DependencyGraph::getCriticalSequence(
264 SmallVectorImpl<const DependencyEdge *> &Seq) const {
265 // At this stage, nodes of the graph have been already visited, and costs have
266 // been propagated through the edges (see method `propagateThroughEdges()`).
267
268 // Identify the node N with the highest cost in the graph. By construction,
269 // that node is the last instruction of our critical sequence.
270 // Field N.Depth would tell us the total length of the sequence.
271 //
272 // To obtain the sequence of critical edges, we simply follow the chain of critical
273 // predecessors starting from node N (field DGNode::CriticalPredecessor).
274 const auto It = std::max_element(
275 Nodes.begin(), Nodes.end(),
276 [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
277 unsigned IID = std::distance(Nodes.begin(), It);
278 Seq.resize(Nodes[IID].Depth);
279 for (unsigned I = Seq.size(), E = 0; I > E; --I) {
280 const DGNode &N = Nodes[IID];
281 Seq[I - 1] = &N.CriticalPredecessor;
282 IID = N.CriticalPredecessor.FromIID;
283 }
284 }
285
printInstruction(formatted_raw_ostream & FOS,const MCInst & MCI,bool UseDifferentColor) const286 void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
287 const MCInst &MCI,
288 bool UseDifferentColor) const {
289 FOS.PadToColumn(14);
290
291 if (UseDifferentColor)
292 FOS.changeColor(raw_ostream::CYAN, true, false);
293 FOS << printInstructionString(MCI);
294 if (UseDifferentColor)
295 FOS.resetColor();
296 }
297
printCriticalSequence(raw_ostream & OS) const298 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
299 // Early exit if no bottlenecks were found during the simulation.
300 if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
301 return;
302
303 SmallVector<const DependencyEdge *, 16> Seq;
304 DG.getCriticalSequence(Seq);
305 if (Seq.empty())
306 return;
307
308 OS << "\nCritical sequence based on the simulation:\n\n";
309
310 const DependencyEdge &FirstEdge = *Seq[0];
311 ArrayRef<llvm::MCInst> Source = getSource();
312 unsigned FromIID = FirstEdge.FromIID % Source.size();
313 unsigned ToIID = FirstEdge.ToIID % Source.size();
314 bool IsLoopCarried = FromIID >= ToIID;
315
316 formatted_raw_ostream FOS(OS);
317 FOS.PadToColumn(14);
318 FOS << "Instruction";
319 FOS.PadToColumn(58);
320 FOS << "Dependency Information";
321
322 bool HasColors = FOS.has_colors();
323
324 unsigned CurrentIID = 0;
325 if (IsLoopCarried) {
326 FOS << "\n +----< " << FromIID << ".";
327 printInstruction(FOS, Source[FromIID], HasColors);
328 FOS << "\n |\n | < loop carried > \n |";
329 } else {
330 while (CurrentIID < FromIID) {
331 FOS << "\n " << CurrentIID << ".";
332 printInstruction(FOS, Source[CurrentIID]);
333 CurrentIID++;
334 }
335
336 FOS << "\n +----< " << CurrentIID << ".";
337 printInstruction(FOS, Source[CurrentIID], HasColors);
338 CurrentIID++;
339 }
340
341 for (const DependencyEdge *&DE : Seq) {
342 ToIID = DE->ToIID % Source.size();
343 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
344
345 while (CurrentIID < LastIID) {
346 FOS << "\n | " << CurrentIID << ".";
347 printInstruction(FOS, Source[CurrentIID]);
348 CurrentIID++;
349 }
350
351 if (CurrentIID == ToIID) {
352 FOS << "\n +----> " << ToIID << ".";
353 printInstruction(FOS, Source[CurrentIID], HasColors);
354 } else {
355 FOS << "\n |\n | < loop carried > \n |"
356 << "\n +----> " << ToIID << ".";
357 printInstruction(FOS, Source[ToIID], HasColors);
358 }
359 FOS.PadToColumn(58);
360
361 const DependencyEdge::Dependency &Dep = DE->Dep;
362 if (HasColors)
363 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
364
365 if (Dep.Type == DependencyEdge::DT_REGISTER) {
366 FOS << "## REGISTER dependency: ";
367 if (HasColors)
368 FOS.changeColor(raw_ostream::MAGENTA, true, false);
369 getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
370 } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
371 FOS << "## MEMORY dependency.";
372 } else {
373 assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
374 "Unsupported dependency type!");
375 FOS << "## RESOURCE interference: ";
376 if (HasColors)
377 FOS.changeColor(raw_ostream::MAGENTA, true, false);
378 FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
379 if (HasColors) {
380 FOS.resetColor();
381 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
382 }
383 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
384 << "% ]";
385 }
386 if (HasColors)
387 FOS.resetColor();
388 ++CurrentIID;
389 }
390
391 while (CurrentIID < Source.size()) {
392 FOS << "\n " << CurrentIID << ".";
393 printInstruction(FOS, Source[CurrentIID]);
394 CurrentIID++;
395 }
396
397 FOS << '\n';
398 FOS.flush();
399 }
400
401 #ifndef NDEBUG
dump(raw_ostream & OS,MCInstPrinter & MCIP) const402 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
403 OS << "\nREG DEPS\n";
404 for (const DGNode &Node : Nodes)
405 for (const DependencyEdge &DE : Node.OutgoingEdges)
406 if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
407 dumpDependencyEdge(OS, DE, MCIP);
408
409 OS << "\nMEM DEPS\n";
410 for (const DGNode &Node : Nodes)
411 for (const DependencyEdge &DE : Node.OutgoingEdges)
412 if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
413 dumpDependencyEdge(OS, DE, MCIP);
414
415 OS << "\nRESOURCE DEPS\n";
416 for (const DGNode &Node : Nodes)
417 for (const DependencyEdge &DE : Node.OutgoingEdges)
418 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
419 dumpDependencyEdge(OS, DE, MCIP);
420 }
421 #endif // NDEBUG
422
addDependency(unsigned From,unsigned To,DependencyEdge::Dependency && Dep)423 void DependencyGraph::addDependency(unsigned From, unsigned To,
424 DependencyEdge::Dependency &&Dep) {
425 DGNode &NodeFrom = Nodes[From];
426 DGNode &NodeTo = Nodes[To];
427 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
428
429 auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
430 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
431 });
432
433 if (It != Vec.end()) {
434 It->Dep.Cost += Dep.Cost;
435 It->Frequency++;
436 return;
437 }
438
439 DependencyEdge DE = {Dep, From, To, 1};
440 Vec.emplace_back(DE);
441 NodeTo.NumPredecessors++;
442 }
443
BottleneckAnalysis(const MCSubtargetInfo & sti,MCInstPrinter & Printer,ArrayRef<MCInst> S,unsigned NumIter)444 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
445 MCInstPrinter &Printer,
446 ArrayRef<MCInst> S, unsigned NumIter)
447 : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
448 DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
449 PressureIncreasedBecauseOfResources(false),
450 PressureIncreasedBecauseOfRegisterDependencies(false),
451 PressureIncreasedBecauseOfMemoryDependencies(false),
452 SeenStallCycles(false), BPI() {}
453
addRegisterDep(unsigned From,unsigned To,unsigned RegID,unsigned Cost)454 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
455 unsigned RegID, unsigned Cost) {
456 bool IsLoopCarried = From >= To;
457 unsigned SourceSize = getSource().size();
458 if (IsLoopCarried) {
459 DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
460 DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
461 return;
462 }
463 DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
464 }
465
addMemoryDep(unsigned From,unsigned To,unsigned Cost)466 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
467 unsigned Cost) {
468 bool IsLoopCarried = From >= To;
469 unsigned SourceSize = getSource().size();
470 if (IsLoopCarried) {
471 DG.addMemoryDep(From, To + SourceSize, Cost);
472 DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
473 return;
474 }
475 DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
476 }
477
addResourceDep(unsigned From,unsigned To,uint64_t Mask,unsigned Cost)478 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
479 uint64_t Mask, unsigned Cost) {
480 bool IsLoopCarried = From >= To;
481 unsigned SourceSize = getSource().size();
482 if (IsLoopCarried) {
483 DG.addResourceDep(From, To + SourceSize, Mask, Cost);
484 DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
485 return;
486 }
487 DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
488 }
489
onEvent(const HWInstructionEvent & Event)490 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
491 const unsigned IID = Event.IR.getSourceIndex();
492 if (Event.Type == HWInstructionEvent::Dispatched) {
493 Tracker.onInstructionDispatched(IID);
494 return;
495 }
496 if (Event.Type == HWInstructionEvent::Executed) {
497 Tracker.onInstructionExecuted(IID);
498 return;
499 }
500
501 if (Event.Type != HWInstructionEvent::Issued)
502 return;
503
504 ArrayRef<llvm::MCInst> Source = getSource();
505 const Instruction &IS = *Event.IR.getInstruction();
506 unsigned To = IID % Source.size();
507
508 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
509 uint64_t ResourceMask = IS.getCriticalResourceMask();
510 SmallVector<std::pair<unsigned, unsigned>, 4> Users;
511 while (ResourceMask) {
512 uint64_t Current = ResourceMask & (-ResourceMask);
513 Tracker.getResourceUsers(Current, Users);
514 for (const std::pair<unsigned, unsigned> &U : Users)
515 addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
516 Users.clear();
517 ResourceMask ^= Current;
518 }
519
520 const CriticalDependency &RegDep = IS.getCriticalRegDep();
521 if (RegDep.Cycles) {
522 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
523 unsigned From = RegDep.IID % Source.size();
524 addRegisterDep(From, To, RegDep.RegID, Cycles);
525 }
526
527 const CriticalDependency &MemDep = IS.getCriticalMemDep();
528 if (MemDep.Cycles) {
529 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
530 unsigned From = MemDep.IID % Source.size();
531 addMemoryDep(From, To, Cycles);
532 }
533
534 Tracker.handleInstructionIssuedEvent(
535 static_cast<const HWInstructionIssuedEvent &>(Event));
536
537 // Check if this is the last simulated instruction.
538 if (IID == ((Iterations * Source.size()) - 1))
539 DG.finalizeGraph(Iterations);
540 }
541
onEvent(const HWPressureEvent & Event)542 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
543 assert(Event.Reason != HWPressureEvent::INVALID &&
544 "Unexpected invalid event!");
545
546 Tracker.handlePressureEvent(Event);
547
548 switch (Event.Reason) {
549 default:
550 break;
551
552 case HWPressureEvent::RESOURCES:
553 PressureIncreasedBecauseOfResources = true;
554 break;
555 case HWPressureEvent::REGISTER_DEPS:
556 PressureIncreasedBecauseOfRegisterDependencies = true;
557 break;
558 case HWPressureEvent::MEMORY_DEPS:
559 PressureIncreasedBecauseOfMemoryDependencies = true;
560 break;
561 }
562 }
563
onCycleEnd()564 void BottleneckAnalysis::onCycleEnd() {
565 ++TotalCycles;
566
567 bool PressureIncreasedBecauseOfDataDependencies =
568 PressureIncreasedBecauseOfRegisterDependencies ||
569 PressureIncreasedBecauseOfMemoryDependencies;
570 if (!PressureIncreasedBecauseOfResources &&
571 !PressureIncreasedBecauseOfDataDependencies)
572 return;
573
574 ++BPI.PressureIncreaseCycles;
575 if (PressureIncreasedBecauseOfRegisterDependencies)
576 ++BPI.RegisterDependencyCycles;
577 if (PressureIncreasedBecauseOfMemoryDependencies)
578 ++BPI.MemoryDependencyCycles;
579 if (PressureIncreasedBecauseOfDataDependencies)
580 ++BPI.DataDependencyCycles;
581 if (PressureIncreasedBecauseOfResources)
582 ++BPI.ResourcePressureCycles;
583 PressureIncreasedBecauseOfResources = false;
584 PressureIncreasedBecauseOfRegisterDependencies = false;
585 PressureIncreasedBecauseOfMemoryDependencies = false;
586 }
587
printBottleneckHints(raw_ostream & OS) const588 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
589 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
590 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
591 return;
592 }
593
594 double PressurePerCycle =
595 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
596 double ResourcePressurePerCycle =
597 (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
598 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
599 double RegDepPressurePerCycle =
600 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
601 double MemDepPressurePerCycle =
602 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
603
604 OS << "\n\nCycles with backend pressure increase [ "
605 << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
606
607 OS << "\nThroughput Bottlenecks: "
608 << "\n Resource Pressure [ "
609 << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
610 << "% ]";
611
612 if (BPI.PressureIncreaseCycles) {
613 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
614 const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
615 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
616 unsigned ResourceCycles = Distribution[I];
617 if (ResourceCycles) {
618 double Frequency = (double)ResourceCycles * 100 / TotalCycles;
619 const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
620 OS << "\n - " << PRDesc.Name << " [ "
621 << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
622 }
623 }
624 }
625
626 OS << "\n Data Dependencies: [ "
627 << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
628 OS << "\n - Register Dependencies [ "
629 << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
630 << "% ]";
631 OS << "\n - Memory Dependencies [ "
632 << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
633 << "% ]\n";
634 }
635
printView(raw_ostream & OS) const636 void BottleneckAnalysis::printView(raw_ostream &OS) const {
637 std::string Buffer;
638 raw_string_ostream TempStream(Buffer);
639 printBottleneckHints(TempStream);
640 TempStream.flush();
641 OS << Buffer;
642 printCriticalSequence(OS);
643 }
644
645 } // namespace mca.
646 } // namespace llvm
647