1 //===- PassTiming.cpp -----------------------------------------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "PassDetail.h"
10 #include "mlir/Pass/PassManager.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Support/Format.h"
16 #include "llvm/Support/FormatVariadic.h"
17 #include "llvm/Support/Threading.h"
18 #include <chrono>
19 
20 using namespace mlir;
21 using namespace mlir::detail;
22 
23 constexpr StringLiteral kPassTimingDescription =
24     "... Pass execution timing report ...";
25 
26 namespace {
27 /// Simple record class to record timing information.
28 struct TimeRecord {
TimeRecord__anonda9abf970111::TimeRecord29   TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
30 
operator +=__anonda9abf970111::TimeRecord31   TimeRecord &operator+=(const TimeRecord &other) {
32     wall += other.wall;
33     user += other.user;
34     return *this;
35   }
36 
37   /// Print the current time record to 'os', with a breakdown showing
38   /// contributions to the give 'total' time record.
print__anonda9abf970111::TimeRecord39   void print(raw_ostream &os, const TimeRecord &total) {
40     if (total.user != total.wall)
41       os << llvm::format("  %7.4f (%5.1f%%)  ", user,
42                          100.0 * user / total.user);
43     os << llvm::format("  %7.4f (%5.1f%%)  ", wall, 100.0 * wall / total.wall);
44   }
45 
46   double wall, user;
47 };
48 
49 /// An enumeration of the different types of timers.
50 enum class TimerKind {
51   /// This timer represents an ordered collection of pass timers, corresponding
52   /// to a pass pipeline.
53   Pipeline,
54 
55   /// This timer represents a collection of pipeline timers.
56   PipelineCollection,
57 
58   /// This timer represents an analysis or pass timer.
59   PassOrAnalysis
60 };
61 
62 struct Timer {
Timer__anonda9abf970111::Timer63   explicit Timer(std::string &&name, TimerKind kind)
64       : name(std::move(name)), kind(kind) {}
65 
66   /// Start the timer.
start__anonda9abf970111::Timer67   void start() { startTime = std::chrono::system_clock::now(); }
68 
69   /// Stop the timer.
stop__anonda9abf970111::Timer70   void stop() {
71     auto newTime = std::chrono::system_clock::now() - startTime;
72     wallTime += newTime;
73     userTime += newTime;
74   }
75 
76   /// Get or create a child timer with the provided name and id.
getChildTimer__anonda9abf970111::Timer77   Timer *getChildTimer(const void *id, TimerKind kind,
78                        std::function<std::string()> &&nameBuilder) {
79     auto &child = children[id];
80     if (!child)
81       child = std::make_unique<Timer>(nameBuilder(), kind);
82     return child.get();
83   }
84 
85   /// Returns the total time for this timer in seconds.
getTotalTime__anonda9abf970111::Timer86   TimeRecord getTotalTime() {
87     // If this is a pass or analysis timer, use the recorded time directly.
88     if (kind == TimerKind::PassOrAnalysis) {
89       return TimeRecord(
90           std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
91               .count(),
92           std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
93               .count());
94     }
95 
96     // Otherwise, accumulate the timing from each of the children.
97     TimeRecord totalTime;
98     for (auto &child : children)
99       totalTime += child.second->getTotalTime();
100     return totalTime;
101   }
102 
103   /// A map of unique identifiers to child timers.
104   using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<Timer>>;
105 
106   /// Merge the timing data from 'other' into this timer.
merge__anonda9abf970111::Timer107   void merge(Timer &&other) {
108     if (wallTime < other.wallTime)
109       wallTime = other.wallTime;
110     userTime += other.userTime;
111     mergeChildren(std::move(other.children));
112   }
113 
114   /// Merge the timer children in 'otherChildren' with the children of this
115   /// timer.
mergeChildren__anonda9abf970111::Timer116   void mergeChildren(ChildrenMap &&otherChildren) {
117     // Check for an empty children list.
118     if (children.empty()) {
119       children = std::move(otherChildren);
120       return;
121     }
122 
123     // Pipeline merges are handled separately as the children are merged
124     // lexicographically.
125     if (kind == TimerKind::Pipeline) {
126       assert(children.size() == otherChildren.size() &&
127              "pipeline merge requires the same number of children");
128       for (auto it : llvm::zip(children, otherChildren))
129         std::get<0>(it).second->merge(std::move(*std::get<1>(it).second));
130       return;
131     }
132 
133     // Otherwise, we merge children based upon their timer key.
134     for (auto &otherChild : otherChildren)
135       mergeChild(std::move(otherChild));
136   }
137 
138   /// Merge in the given child timer and id into this timer.
mergeChild__anonda9abf970111::Timer139   void mergeChild(ChildrenMap::value_type &&childIt) {
140     auto &child = children[childIt.first];
141     if (!child)
142       child = std::move(childIt.second);
143     else
144       child->merge(std::move(*childIt.second));
145   }
146 
147   /// Raw timing information.
148   std::chrono::time_point<std::chrono::system_clock> startTime;
149   std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
150   std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
151 
152   /// A map of unique identifiers to child timers.
153   ChildrenMap children;
154 
155   /// A descriptive name for this timer.
156   std::string name;
157 
158   /// The type of timer this instance represents.
159   TimerKind kind;
160 };
161 
162 struct PassTiming : public PassInstrumentation {
PassTiming__anonda9abf970111::PassTiming163   PassTiming(PassDisplayMode displayMode) : displayMode(displayMode) {}
~PassTiming__anonda9abf970111::PassTiming164   ~PassTiming() override { print(); }
165 
166   /// Setup the instrumentation hooks.
167   void runBeforePipeline(const OperationName &name,
168                          const PipelineParentInfo &parentInfo) override;
169   void runAfterPipeline(const OperationName &name,
170                         const PipelineParentInfo &parentInfo) override;
runBeforePass__anonda9abf970111::PassTiming171   void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); }
172   void runAfterPass(Pass *pass, Operation *) override;
runAfterPassFailed__anonda9abf970111::PassTiming173   void runAfterPassFailed(Pass *pass, Operation *op) override {
174     runAfterPass(pass, op);
175   }
runBeforeAnalysis__anonda9abf970111::PassTiming176   void runBeforeAnalysis(StringRef name, AnalysisID *id, Operation *) override {
177     startAnalysisTimer(name, id);
178   }
179   void runAfterAnalysis(StringRef, AnalysisID *, Operation *) override;
180 
181   /// Print and clear the timing results.
182   void print();
183 
184   /// Start a new timer for the given pass.
185   void startPassTimer(Pass *pass);
186 
187   /// Start a new timer for the given analysis.
188   void startAnalysisTimer(StringRef name, AnalysisID *id);
189 
190   /// Pop the last active timer for the current thread.
popLastActiveTimer__anonda9abf970111::PassTiming191   Timer *popLastActiveTimer() {
192     auto tid = llvm::get_threadid();
193     auto &activeTimers = activeThreadTimers[tid];
194     assert(!activeTimers.empty() && "expected active timer");
195     return activeTimers.pop_back_val();
196   }
197 
198   /// Print the timing result in list mode.
199   void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime);
200 
201   /// Print the timing result in pipeline mode.
202   void printResultsAsPipeline(raw_ostream &os, Timer *root,
203                               TimeRecord totalTime);
204 
205   /// Returns a timer for the provided identifier and name.
getTimer__anonda9abf970111::PassTiming206   Timer *getTimer(const void *id, TimerKind kind,
207                   std::function<std::string()> &&nameBuilder) {
208     auto tid = llvm::get_threadid();
209 
210     // If there is no active timer then add to the root timer.
211     auto &activeTimers = activeThreadTimers[tid];
212     Timer *parentTimer;
213     if (activeTimers.empty()) {
214       auto &rootTimer = rootTimers[tid];
215       if (!rootTimer)
216         rootTimer = std::make_unique<Timer>("root", TimerKind::Pipeline);
217       parentTimer = rootTimer.get();
218     } else {
219       // Otherwise, add this to the active timer.
220       parentTimer = activeTimers.back();
221     }
222 
223     auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder));
224     activeTimers.push_back(timer);
225     return timer;
226   }
227 
228   /// The root top level timers for each thread.
229   DenseMap<uint64_t, std::unique_ptr<Timer>> rootTimers;
230 
231   /// A stack of the currently active pass timers per thread.
232   DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers;
233 
234   /// The display mode to use when printing the timing results.
235   PassDisplayMode displayMode;
236 
237   /// A mapping of pipeline timers that need to be merged into the parent
238   /// collection. The timers are mapped to the parent info to merge into.
239   DenseMap<PipelineParentInfo, SmallVector<Timer::ChildrenMap::value_type, 4>>
240       pipelinesToMerge;
241 };
242 } // end anonymous namespace
243 
runBeforePipeline(const OperationName & name,const PipelineParentInfo & parentInfo)244 void PassTiming::runBeforePipeline(const OperationName &name,
245                                    const PipelineParentInfo &parentInfo) {
246   // We don't actually want to time the piplelines, they gather their total
247   // from their held passes.
248   getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline,
249            [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); });
250 }
251 
runAfterPipeline(const OperationName & name,const PipelineParentInfo & parentInfo)252 void PassTiming::runAfterPipeline(const OperationName &name,
253                                   const PipelineParentInfo &parentInfo) {
254   // Pop the timer for the pipeline.
255   auto tid = llvm::get_threadid();
256   auto &activeTimers = activeThreadTimers[tid];
257   assert(!activeTimers.empty() && "expected active timer");
258   activeTimers.pop_back();
259 
260   // If the current thread is the same as the parent, there is nothing left to
261   // do.
262   if (tid == parentInfo.parentThreadID)
263     return;
264 
265   // Otherwise, mark the pipeline timer for merging into the correct parent
266   // thread.
267   assert(activeTimers.empty() && "expected parent timer to be root");
268   auto *parentTimer = rootTimers[tid].get();
269   assert(parentTimer->children.size() == 1 &&
270          parentTimer->children.count(name.getAsOpaquePointer()) &&
271          "expected a single pipeline timer");
272   pipelinesToMerge[parentInfo].push_back(
273       std::move(*parentTimer->children.begin()));
274   rootTimers.erase(tid);
275 }
276 
277 /// Start a new timer for the given pass.
startPassTimer(Pass * pass)278 void PassTiming::startPassTimer(Pass *pass) {
279   auto kind = isAdaptorPass(pass) ? TimerKind::PipelineCollection
280                                   : TimerKind::PassOrAnalysis;
281   Timer *timer = getTimer(pass, kind, [pass]() -> std::string {
282     if (auto *adaptor = getAdaptorPassBase(pass))
283       return adaptor->getName();
284     return pass->getName();
285   });
286 
287   // We don't actually want to time the adaptor passes, they gather their total
288   // from their held passes.
289   if (!isAdaptorPass(pass))
290     timer->start();
291 }
292 
293 /// Start a new timer for the given analysis.
startAnalysisTimer(StringRef name,AnalysisID * id)294 void PassTiming::startAnalysisTimer(StringRef name, AnalysisID *id) {
295   Timer *timer = getTimer(id, TimerKind::PassOrAnalysis,
296                           [name] { return "(A) " + name.str(); });
297   timer->start();
298 }
299 
300 /// Stop a pass timer.
runAfterPass(Pass * pass,Operation *)301 void PassTiming::runAfterPass(Pass *pass, Operation *) {
302   Timer *timer = popLastActiveTimer();
303 
304   // If this is an OpToOpPassAdaptorParallel, then we need to merge in the
305   // timing data for the pipelines running on other threads.
306   if (isa<OpToOpPassAdaptorParallel>(pass)) {
307     auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass});
308     if (toMerge != pipelinesToMerge.end()) {
309       for (auto &it : toMerge->second)
310         timer->mergeChild(std::move(it));
311       pipelinesToMerge.erase(toMerge);
312     }
313     return;
314   }
315 
316   // Adaptor passes aren't timed directly, so we don't need to stop their
317   // timers.
318   if (!isAdaptorPass(pass))
319     timer->stop();
320 }
321 
322 /// Stop a timer.
runAfterAnalysis(StringRef,AnalysisID *,Operation *)323 void PassTiming::runAfterAnalysis(StringRef, AnalysisID *, Operation *) {
324   popLastActiveTimer()->stop();
325 }
326 
327 /// Utility to print the timer heading information.
printTimerHeader(raw_ostream & os,TimeRecord total)328 static void printTimerHeader(raw_ostream &os, TimeRecord total) {
329   os << "===" << std::string(73, '-') << "===\n";
330   // Figure out how many spaces to description name.
331   unsigned padding = (80 - kPassTimingDescription.size()) / 2;
332   os.indent(padding) << kPassTimingDescription << '\n';
333   os << "===" << std::string(73, '-') << "===\n";
334 
335   // Print the total time followed by the section headers.
336   os << llvm::format("  Total Execution Time: %5.4f seconds\n\n", total.wall);
337   if (total.user != total.wall)
338     os << "   ---User Time---";
339   os << "   ---Wall Time---  --- Name ---\n";
340 }
341 
342 /// Utility to print a single line entry in the timer output.
printTimeEntry(raw_ostream & os,unsigned indent,StringRef name,TimeRecord time,TimeRecord totalTime)343 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
344                            TimeRecord time, TimeRecord totalTime) {
345   time.print(os, totalTime);
346   os.indent(indent) << name << "\n";
347 }
348 
349 /// Print out the current timing information.
print()350 void PassTiming::print() {
351   // Don't print anything if there is no timing data.
352   if (rootTimers.empty())
353     return;
354 
355   assert(rootTimers.size() == 1 && "expected one remaining root timer");
356   auto &rootTimer = rootTimers.begin()->second;
357   auto os = llvm::CreateInfoOutputFile();
358 
359   // Print the timer header.
360   TimeRecord totalTime = rootTimer->getTotalTime();
361   printTimerHeader(*os, totalTime);
362 
363   // Defer to a specialized printer for each display mode.
364   switch (displayMode) {
365   case PassDisplayMode::List:
366     printResultsAsList(*os, rootTimer.get(), totalTime);
367     break;
368   case PassDisplayMode::Pipeline:
369     printResultsAsPipeline(*os, rootTimer.get(), totalTime);
370     break;
371   }
372   printTimeEntry(*os, 0, "Total", totalTime, totalTime);
373   os->flush();
374 
375   // Reset root timers.
376   rootTimers.clear();
377   activeThreadTimers.clear();
378 }
379 
380 /// Print the timing result in list mode.
printResultsAsList(raw_ostream & os,Timer * root,TimeRecord totalTime)381 void PassTiming::printResultsAsList(raw_ostream &os, Timer *root,
382                                     TimeRecord totalTime) {
383   llvm::StringMap<TimeRecord> mergedTimings;
384 
385   std::function<void(Timer *)> addTimer = [&](Timer *timer) {
386     // Only add timing information for passes and analyses.
387     if (timer->kind == TimerKind::PassOrAnalysis)
388       mergedTimings[timer->name] += timer->getTotalTime();
389     for (auto &children : timer->children)
390       addTimer(children.second.get());
391   };
392 
393   // Add each of the top level timers.
394   for (auto &topLevelTimer : root->children)
395     addTimer(topLevelTimer.second.get());
396 
397   // Sort the timing information by wall time.
398   std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
399   for (auto &it : mergedTimings)
400     timerNameAndTime.emplace_back(it.first(), it.second);
401   llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
402                        [](const std::pair<StringRef, TimeRecord> *lhs,
403                           const std::pair<StringRef, TimeRecord> *rhs) {
404                          return llvm::array_pod_sort_comparator<double>(
405                              &rhs->second.wall, &lhs->second.wall);
406                        });
407 
408   // Print the timing information sequentially.
409   for (auto &timeData : timerNameAndTime)
410     printTimeEntry(os, 0, timeData.first, timeData.second, totalTime);
411 }
412 
413 /// Print the timing result in pipeline mode.
printResultsAsPipeline(raw_ostream & os,Timer * root,TimeRecord totalTime)414 void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root,
415                                         TimeRecord totalTime) {
416   std::function<void(unsigned, Timer *)> printTimer = [&](unsigned indent,
417                                                           Timer *timer) {
418     // If this is a timer for a pipeline collection and the collection only has
419     // one pipeline child, then only print the child.
420     if (timer->kind == TimerKind::PipelineCollection &&
421         timer->children.size() == 1)
422       return printTimer(indent, timer->children.begin()->second.get());
423 
424     printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime);
425 
426     // If this timer is a pipeline, then print the children in-order.
427     if (timer->kind == TimerKind::Pipeline) {
428       for (auto &child : timer->children)
429         printTimer(indent + 2, child.second.get());
430       return;
431     }
432 
433     // Otherwise, sort the children by name to give a deterministic ordering
434     // when emitting the time.
435     SmallVector<Timer *, 4> children;
436     children.reserve(timer->children.size());
437     for (auto &child : timer->children)
438       children.push_back(child.second.get());
439     llvm::array_pod_sort(children.begin(), children.end(),
440                          [](Timer *const *lhs, Timer *const *rhs) {
441                            return (*lhs)->name.compare((*rhs)->name);
442                          });
443     for (auto &child : children)
444       printTimer(indent + 2, child);
445   };
446 
447   // Print each of the top level timers.
448   for (auto &topLevelTimer : root->children)
449     printTimer(0, topLevelTimer.second.get());
450 }
451 
452 //===----------------------------------------------------------------------===//
453 // PassManager
454 //===----------------------------------------------------------------------===//
455 
456 /// Add an instrumentation to time the execution of passes and the computation
457 /// of analyses.
enableTiming(PassDisplayMode displayMode)458 void PassManager::enableTiming(PassDisplayMode displayMode) {
459   // Check if pass timing is already enabled.
460   if (passTiming)
461     return;
462   addInstrumentation(std::make_unique<PassTiming>(displayMode));
463   passTiming = true;
464 }
465