1 //===- PassTiming.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "PassDetail.h"
10 #include "mlir/Pass/PassManager.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Support/Format.h"
16 #include "llvm/Support/FormatVariadic.h"
17 #include "llvm/Support/Threading.h"
18 #include <chrono>
19 
20 using namespace mlir;
21 using namespace mlir::detail;
22 
23 constexpr StringLiteral kPassTimingDescription =
24     "... Pass execution timing report ...";
25 
26 namespace {
27 /// Simple record class to record timing information.
28 struct TimeRecord {
TimeRecord__anondc85f41c0111::TimeRecord29   TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
30 
operator +=__anondc85f41c0111::TimeRecord31   TimeRecord &operator+=(const TimeRecord &other) {
32     wall += other.wall;
33     user += other.user;
34     return *this;
35   }
36 
37   /// Print the current time record to 'os', with a breakdown showing
38   /// contributions to the give 'total' time record.
print__anondc85f41c0111::TimeRecord39   void print(raw_ostream &os, const TimeRecord &total) {
40     if (total.user != total.wall)
41       os << llvm::format("  %7.4f (%5.1f%%)  ", user,
42                          100.0 * user / total.user);
43     os << llvm::format("  %7.4f (%5.1f%%)  ", wall, 100.0 * wall / total.wall);
44   }
45 
46   double wall, user;
47 };
48 
49 /// An enumeration of the different types of timers.
50 enum class TimerKind {
51   /// This timer represents an ordered collection of pass timers, corresponding
52   /// to a pass pipeline.
53   Pipeline,
54 
55   /// This timer represents a collection of pipeline timers.
56   PipelineCollection,
57 
58   /// This timer represents an analysis or pass timer.
59   PassOrAnalysis
60 };
61 
62 struct Timer {
Timer__anondc85f41c0111::Timer63   explicit Timer(std::string &&name, TimerKind kind)
64       : name(std::move(name)), kind(kind) {}
65 
66   /// Start the timer.
start__anondc85f41c0111::Timer67   void start() { startTime = std::chrono::system_clock::now(); }
68 
69   /// Stop the timer.
stop__anondc85f41c0111::Timer70   void stop() {
71     auto newTime = std::chrono::system_clock::now() - startTime;
72     wallTime += newTime;
73     userTime += newTime;
74   }
75 
76   /// Get or create a child timer with the provided name and id.
getChildTimer__anondc85f41c0111::Timer77   Timer *getChildTimer(const void *id, TimerKind kind,
78                        std::function<std::string()> &&nameBuilder) {
79     auto &child = children[id];
80     if (!child)
81       child = std::make_unique<Timer>(nameBuilder(), kind);
82     return child.get();
83   }
84 
85   /// Returns the total time for this timer in seconds.
getTotalTime__anondc85f41c0111::Timer86   TimeRecord getTotalTime() {
87     // If this is a pass or analysis timer, use the recorded time directly.
88     if (kind == TimerKind::PassOrAnalysis) {
89       return TimeRecord(
90           std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
91               .count(),
92           std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
93               .count());
94     }
95 
96     // Otherwise, accumulate the timing from each of the children.
97     TimeRecord totalTime;
98     for (auto &child : children)
99       totalTime += child.second->getTotalTime();
100     return totalTime;
101   }
102 
103   /// A map of unique identifiers to child timers.
104   using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<Timer>>;
105 
106   /// Merge the timing data from 'other' into this timer.
merge__anondc85f41c0111::Timer107   void merge(Timer &&other) {
108     if (wallTime < other.wallTime)
109       wallTime = other.wallTime;
110     userTime += other.userTime;
111     mergeChildren(std::move(other.children));
112   }
113 
114   /// Merge the timer children in 'otherChildren' with the children of this
115   /// timer.
mergeChildren__anondc85f41c0111::Timer116   void mergeChildren(ChildrenMap &&otherChildren) {
117     // Check for an empty children list.
118     if (children.empty()) {
119       children = std::move(otherChildren);
120       return;
121     }
122 
123     // Pipeline merges are handled separately as the children are merged
124     // lexicographically.
125     if (kind == TimerKind::Pipeline) {
126       assert(children.size() == otherChildren.size() &&
127              "pipeline merge requires the same number of children");
128       for (auto it : llvm::zip(children, otherChildren))
129         std::get<0>(it).second->merge(std::move(*std::get<1>(it).second));
130       return;
131     }
132 
133     // Otherwise, we merge children based upon their timer key.
134     for (auto &otherChild : otherChildren)
135       mergeChild(std::move(otherChild));
136   }
137 
138   /// Merge in the given child timer and id into this timer.
mergeChild__anondc85f41c0111::Timer139   void mergeChild(ChildrenMap::value_type &&childIt) {
140     auto &child = children[childIt.first];
141     if (!child)
142       child = std::move(childIt.second);
143     else
144       child->merge(std::move(*childIt.second));
145   }
146 
147   /// Raw timing information.
148   std::chrono::time_point<std::chrono::system_clock> startTime;
149   std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
150   std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
151 
152   /// A map of unique identifiers to child timers.
153   ChildrenMap children;
154 
155   /// A descriptive name for this timer.
156   std::string name;
157 
158   /// The type of timer this instance represents.
159   TimerKind kind;
160 };
161 
162 struct PassTiming : public PassInstrumentation {
PassTiming__anondc85f41c0111::PassTiming163   PassTiming(std::unique_ptr<PassManager::PassTimingConfig> config)
164       : config(std::move(config)) {}
~PassTiming__anondc85f41c0111::PassTiming165   ~PassTiming() override { print(); }
166 
167   /// Setup the instrumentation hooks.
168   void runBeforePipeline(const OperationName &name,
169                          const PipelineParentInfo &parentInfo) override;
170   void runAfterPipeline(const OperationName &name,
171                         const PipelineParentInfo &parentInfo) override;
runBeforePass__anondc85f41c0111::PassTiming172   void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); }
173   void runAfterPass(Pass *pass, Operation *) override;
runAfterPassFailed__anondc85f41c0111::PassTiming174   void runAfterPassFailed(Pass *pass, Operation *op) override {
175     runAfterPass(pass, op);
176   }
runBeforeAnalysis__anondc85f41c0111::PassTiming177   void runBeforeAnalysis(StringRef name, TypeID id, Operation *) override {
178     startAnalysisTimer(name, id);
179   }
180   void runAfterAnalysis(StringRef, TypeID, Operation *) override;
181 
182   /// Print and clear the timing results.
183   void print();
184 
185   /// Start a new timer for the given pass.
186   void startPassTimer(Pass *pass);
187 
188   /// Start a new timer for the given analysis.
189   void startAnalysisTimer(StringRef name, TypeID id);
190 
191   /// Pop the last active timer for the current thread.
popLastActiveTimer__anondc85f41c0111::PassTiming192   Timer *popLastActiveTimer() {
193     auto tid = llvm::get_threadid();
194     auto &activeTimers = activeThreadTimers[tid];
195     assert(!activeTimers.empty() && "expected active timer");
196     return activeTimers.pop_back_val();
197   }
198 
199   /// Print the timing result in list mode.
200   void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime);
201 
202   /// Print the timing result in pipeline mode.
203   void printResultsAsPipeline(raw_ostream &os, Timer *root,
204                               TimeRecord totalTime);
205 
206   /// Returns a timer for the provided identifier and name.
getTimer__anondc85f41c0111::PassTiming207   Timer *getTimer(const void *id, TimerKind kind,
208                   std::function<std::string()> &&nameBuilder) {
209     auto tid = llvm::get_threadid();
210 
211     // If there is no active timer then add to the root timer.
212     auto &activeTimers = activeThreadTimers[tid];
213     Timer *parentTimer;
214     if (activeTimers.empty()) {
215       auto &rootTimer = rootTimers[tid];
216       if (!rootTimer)
217         rootTimer = std::make_unique<Timer>("root", TimerKind::Pipeline);
218       parentTimer = rootTimer.get();
219     } else {
220       // Otherwise, add this to the active timer.
221       parentTimer = activeTimers.back();
222     }
223 
224     auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder));
225     activeTimers.push_back(timer);
226     return timer;
227   }
228 
229   /// The root top level timers for each thread.
230   DenseMap<uint64_t, std::unique_ptr<Timer>> rootTimers;
231 
232   /// A stack of the currently active pass timers per thread.
233   DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers;
234 
235   /// The configuration object to use when printing the timing results.
236   std::unique_ptr<PassManager::PassTimingConfig> config;
237 
238   /// A mapping of pipeline timers that need to be merged into the parent
239   /// collection. The timers are mapped to the parent info to merge into.
240   DenseMap<PipelineParentInfo, SmallVector<Timer::ChildrenMap::value_type, 4>>
241       pipelinesToMerge;
242 };
243 } // end anonymous namespace
244 
runBeforePipeline(const OperationName & name,const PipelineParentInfo & parentInfo)245 void PassTiming::runBeforePipeline(const OperationName &name,
246                                    const PipelineParentInfo &parentInfo) {
247   // We don't actually want to time the pipelines, they gather their total
248   // from their held passes.
249   getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline,
250            [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); });
251 }
252 
runAfterPipeline(const OperationName & name,const PipelineParentInfo & parentInfo)253 void PassTiming::runAfterPipeline(const OperationName &name,
254                                   const PipelineParentInfo &parentInfo) {
255   // Pop the timer for the pipeline.
256   auto tid = llvm::get_threadid();
257   auto &activeTimers = activeThreadTimers[tid];
258   assert(!activeTimers.empty() && "expected active timer");
259   activeTimers.pop_back();
260 
261   // If the current thread is the same as the parent, there is nothing left to
262   // do.
263   if (tid == parentInfo.parentThreadID)
264     return;
265 
266   // Otherwise, mark the pipeline timer for merging into the correct parent
267   // thread.
268   assert(activeTimers.empty() && "expected parent timer to be root");
269   auto *parentTimer = rootTimers[tid].get();
270   assert(parentTimer->children.size() == 1 &&
271          parentTimer->children.count(name.getAsOpaquePointer()) &&
272          "expected a single pipeline timer");
273   pipelinesToMerge[parentInfo].push_back(
274       std::move(*parentTimer->children.begin()));
275   rootTimers.erase(tid);
276 }
277 
278 /// Start a new timer for the given pass.
startPassTimer(Pass * pass)279 void PassTiming::startPassTimer(Pass *pass) {
280   auto kind = isa<OpToOpPassAdaptor>(pass) ? TimerKind::PipelineCollection
281                                            : TimerKind::PassOrAnalysis;
282   Timer *timer = getTimer(pass, kind, [pass]() -> std::string {
283     if (auto *adaptor = dyn_cast<OpToOpPassAdaptor>(pass))
284       return adaptor->getAdaptorName();
285     return std::string(pass->getName());
286   });
287 
288   // We don't actually want to time the adaptor passes, they gather their total
289   // from their held passes.
290   if (!isa<OpToOpPassAdaptor>(pass))
291     timer->start();
292 }
293 
294 /// Start a new timer for the given analysis.
startAnalysisTimer(StringRef name,TypeID id)295 void PassTiming::startAnalysisTimer(StringRef name, TypeID id) {
296   Timer *timer = getTimer(id.getAsOpaquePointer(), TimerKind::PassOrAnalysis,
297                           [name] { return "(A) " + name.str(); });
298   timer->start();
299 }
300 
301 /// Stop a pass timer.
runAfterPass(Pass * pass,Operation *)302 void PassTiming::runAfterPass(Pass *pass, Operation *) {
303   Timer *timer = popLastActiveTimer();
304 
305   // If this is a pass adaptor, then we need to merge in the timing data for the
306   // pipelines running on other threads.
307   if (isa<OpToOpPassAdaptor>(pass)) {
308     auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass});
309     if (toMerge != pipelinesToMerge.end()) {
310       for (auto &it : toMerge->second)
311         timer->mergeChild(std::move(it));
312       pipelinesToMerge.erase(toMerge);
313     }
314     return;
315   }
316 
317   timer->stop();
318 }
319 
320 /// Stop a timer.
runAfterAnalysis(StringRef,TypeID,Operation *)321 void PassTiming::runAfterAnalysis(StringRef, TypeID, Operation *) {
322   popLastActiveTimer()->stop();
323 }
324 
325 /// Utility to print the timer heading information.
printTimerHeader(raw_ostream & os,TimeRecord total)326 static void printTimerHeader(raw_ostream &os, TimeRecord total) {
327   os << "===" << std::string(73, '-') << "===\n";
328   // Figure out how many spaces to description name.
329   unsigned padding = (80 - kPassTimingDescription.size()) / 2;
330   os.indent(padding) << kPassTimingDescription << '\n';
331   os << "===" << std::string(73, '-') << "===\n";
332 
333   // Print the total time followed by the section headers.
334   os << llvm::format("  Total Execution Time: %5.4f seconds\n\n", total.wall);
335   if (total.user != total.wall)
336     os << "   ---User Time---";
337   os << "   ---Wall Time---  --- Name ---\n";
338 }
339 
340 /// Utility to print a single line entry in the timer output.
printTimeEntry(raw_ostream & os,unsigned indent,StringRef name,TimeRecord time,TimeRecord totalTime)341 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
342                            TimeRecord time, TimeRecord totalTime) {
343   time.print(os, totalTime);
344   os.indent(indent) << name << "\n";
345 }
346 
347 /// Print out the current timing information.
print()348 void PassTiming::print() {
349   // Don't print anything if there is no timing data.
350   if (rootTimers.empty())
351     return;
352 
353   assert(rootTimers.size() == 1 && "expected one remaining root timer");
354 
355   auto printCallback = [&](raw_ostream &os) {
356     auto &rootTimer = rootTimers.begin()->second;
357     // Print the timer header.
358     TimeRecord totalTime = rootTimer->getTotalTime();
359     printTimerHeader(os, totalTime);
360     // Defer to a specialized printer for each display mode.
361     switch (config->getDisplayMode()) {
362     case PassDisplayMode::List:
363       printResultsAsList(os, rootTimer.get(), totalTime);
364       break;
365     case PassDisplayMode::Pipeline:
366       printResultsAsPipeline(os, rootTimer.get(), totalTime);
367       break;
368     }
369     printTimeEntry(os, 0, "Total", totalTime, totalTime);
370     os.flush();
371 
372     // Reset root timers.
373     rootTimers.clear();
374     activeThreadTimers.clear();
375   };
376 
377   config->printTiming(printCallback);
378 }
379 
380 // The default implementation for printTiming uses
381 // `llvm::CreateInfoOutputFile()` as stream, it can be overridden by clients
382 // to customize the output.
printTiming(PrintCallbackFn printCallback)383 void PassManager::PassTimingConfig::printTiming(PrintCallbackFn printCallback) {
384   printCallback(*llvm::CreateInfoOutputFile());
385 }
386 
387 /// Print the timing result in list mode.
printResultsAsList(raw_ostream & os,Timer * root,TimeRecord totalTime)388 void PassTiming::printResultsAsList(raw_ostream &os, Timer *root,
389                                     TimeRecord totalTime) {
390   llvm::StringMap<TimeRecord> mergedTimings;
391 
392   std::function<void(Timer *)> addTimer = [&](Timer *timer) {
393     // Only add timing information for passes and analyses.
394     if (timer->kind == TimerKind::PassOrAnalysis)
395       mergedTimings[timer->name] += timer->getTotalTime();
396     for (auto &children : timer->children)
397       addTimer(children.second.get());
398   };
399 
400   // Add each of the top level timers.
401   for (auto &topLevelTimer : root->children)
402     addTimer(topLevelTimer.second.get());
403 
404   // Sort the timing information by wall time.
405   std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
406   for (auto &it : mergedTimings)
407     timerNameAndTime.emplace_back(it.first(), it.second);
408   llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
409                        [](const std::pair<StringRef, TimeRecord> *lhs,
410                           const std::pair<StringRef, TimeRecord> *rhs) {
411                          return llvm::array_pod_sort_comparator<double>(
412                              &rhs->second.wall, &lhs->second.wall);
413                        });
414 
415   // Print the timing information sequentially.
416   for (auto &timeData : timerNameAndTime)
417     printTimeEntry(os, 0, timeData.first, timeData.second, totalTime);
418 }
419 
420 /// Print the timing result in pipeline mode.
printResultsAsPipeline(raw_ostream & os,Timer * root,TimeRecord totalTime)421 void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root,
422                                         TimeRecord totalTime) {
423   std::function<void(unsigned, Timer *)> printTimer = [&](unsigned indent,
424                                                           Timer *timer) {
425     // If this is a timer for a pipeline collection and the collection only has
426     // one pipeline child, then only print the child.
427     if (timer->kind == TimerKind::PipelineCollection &&
428         timer->children.size() == 1)
429       return printTimer(indent, timer->children.begin()->second.get());
430 
431     printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime);
432 
433     // If this timer is a pipeline, then print the children in-order.
434     if (timer->kind == TimerKind::Pipeline) {
435       for (auto &child : timer->children)
436         printTimer(indent + 2, child.second.get());
437       return;
438     }
439 
440     // Otherwise, sort the children by name to give a deterministic ordering
441     // when emitting the time.
442     SmallVector<Timer *, 4> children;
443     children.reserve(timer->children.size());
444     for (auto &child : timer->children)
445       children.push_back(child.second.get());
446     llvm::array_pod_sort(children.begin(), children.end(),
447                          [](Timer *const *lhs, Timer *const *rhs) {
448                            return (*lhs)->name.compare((*rhs)->name);
449                          });
450     for (auto &child : children)
451       printTimer(indent + 2, child);
452   };
453 
454   // Print each of the top level timers.
455   for (auto &topLevelTimer : root->children)
456     printTimer(0, topLevelTimer.second.get());
457 }
458 
459 // Out-of-line as key function.
~PassTimingConfig()460 PassManager::PassTimingConfig::~PassTimingConfig() {}
461 
462 //===----------------------------------------------------------------------===//
463 // PassManager
464 //===----------------------------------------------------------------------===//
465 
466 /// Add an instrumentation to time the execution of passes and the computation
467 /// of analyses.
enableTiming(std::unique_ptr<PassTimingConfig> config)468 void PassManager::enableTiming(std::unique_ptr<PassTimingConfig> config) {
469   // Check if pass timing is already enabled.
470   if (passTiming)
471     return;
472   if (!config)
473     config = std::make_unique<PassManager::PassTimingConfig>();
474   addInstrumentation(std::make_unique<PassTiming>(std::move(config)));
475   passTiming = true;
476 }
477