1 //===- Timing.cpp - Execution time measurement facilities -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Facilities to measure and provide statistics on execution time.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Support/Timing.h"
14 #include "mlir/Support/ThreadLocalCache.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/RWMutex.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <atomic>
29 #include <chrono>
30 
31 using namespace mlir;
32 using namespace detail;
33 using DisplayMode = DefaultTimingManager::DisplayMode;
34 
35 constexpr llvm::StringLiteral kTimingDescription =
36     "... Execution time report ...";
37 
38 //===----------------------------------------------------------------------===//
39 // TimingManager
40 //===----------------------------------------------------------------------===//
41 
42 namespace mlir {
43 namespace detail {
44 /// Private implementation details of the `TimingManager`.
45 class TimingManagerImpl {
46 public:
47   // Identifier allocator, map, and mutex for thread safety.
48   llvm::BumpPtrAllocator identifierAllocator;
49   llvm::StringSet<llvm::BumpPtrAllocator &> identifiers;
50   llvm::sys::SmartRWMutex<true> identifierMutex;
51 
52   /// A thread local cache of identifiers to reduce lock contention.
53   ThreadLocalCache<llvm::StringMap<llvm::StringMapEntry<llvm::NoneType> *>>
54       localIdentifierCache;
55 
TimingManagerImpl()56   TimingManagerImpl() : identifiers(identifierAllocator) {}
57 };
58 } // namespace detail
59 } // namespace mlir
60 
TimingManager()61 TimingManager::TimingManager() : impl(std::make_unique<TimingManagerImpl>()) {}
62 
~TimingManager()63 TimingManager::~TimingManager() {}
64 
65 /// Get the root timer of this timing manager.
getRootTimer()66 Timer TimingManager::getRootTimer() {
67   auto rt = rootTimer();
68   return rt.hasValue() ? Timer(*this, rt.getValue()) : Timer();
69 }
70 
71 /// Get the root timer of this timing manager wrapped in a `TimingScope`.
getRootScope()72 TimingScope TimingManager::getRootScope() {
73   return TimingScope(getRootTimer());
74 }
75 
76 //===----------------------------------------------------------------------===//
77 // Identifier uniquing
78 //===----------------------------------------------------------------------===//
79 
80 /// Return an identifier for the specified string.
get(StringRef str,TimingManager & tm)81 TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) {
82   // Check for an existing instance in the local cache.
83   auto &impl = *tm.impl;
84   auto *&localEntry = (*impl.localIdentifierCache)[str];
85   if (localEntry)
86     return TimingIdentifier(localEntry);
87 
88   // Check for an existing identifier in read-only mode.
89   {
90     llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex);
91     auto it = impl.identifiers.find(str);
92     if (it != impl.identifiers.end()) {
93       localEntry = &*it;
94       return TimingIdentifier(localEntry);
95     }
96   }
97 
98   // Acquire a writer-lock so that we can safely create the new instance.
99   llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex);
100   auto it = impl.identifiers.insert(str).first;
101   localEntry = &*it;
102   return TimingIdentifier(localEntry);
103 }
104 
105 //===----------------------------------------------------------------------===//
106 // Helpers for time record printing
107 //===----------------------------------------------------------------------===//
108 
109 namespace {
110 
111 /// Simple record class to record timing information.
112 struct TimeRecord {
TimeRecord__anon47832b370111::TimeRecord113   TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
114 
operator +=__anon47832b370111::TimeRecord115   TimeRecord &operator+=(const TimeRecord &other) {
116     wall += other.wall;
117     user += other.user;
118     return *this;
119   }
120 
operator -=__anon47832b370111::TimeRecord121   TimeRecord &operator-=(const TimeRecord &other) {
122     wall -= other.wall;
123     user -= other.user;
124     return *this;
125   }
126 
127   /// Print the current time record to 'os', with a breakdown showing
128   /// contributions to the give 'total' time record.
print__anon47832b370111::TimeRecord129   void print(raw_ostream &os, const TimeRecord &total) {
130     if (total.user != total.wall)
131       os << llvm::format("  %8.4f (%5.1f%%)", user, 100.0 * user / total.user);
132     os << llvm::format("  %8.4f (%5.1f%%)  ", wall, 100.0 * wall / total.wall);
133   }
134 
135   double wall, user;
136 };
137 
138 } // namespace
139 
140 /// Utility to print a single line entry in the timer output.
printTimeEntry(raw_ostream & os,unsigned indent,StringRef name,TimeRecord time,TimeRecord total)141 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
142                            TimeRecord time, TimeRecord total) {
143   time.print(os, total);
144   os.indent(indent) << name << "\n";
145 }
146 
147 /// Utility to print the timer heading information.
printTimeHeader(raw_ostream & os,TimeRecord total)148 static void printTimeHeader(raw_ostream &os, TimeRecord total) {
149   // Figure out how many spaces to description name.
150   unsigned padding = (80 - kTimingDescription.size()) / 2;
151   os << "===" << std::string(73, '-') << "===\n";
152   os.indent(padding) << kTimingDescription << '\n';
153   os << "===" << std::string(73, '-') << "===\n";
154 
155   // Print the total time followed by the section headers.
156   os << llvm::format("  Total Execution Time: %.4f seconds\n\n", total.wall);
157   if (total.user != total.wall)
158     os << "  ----User Time----";
159   os << "  ----Wall Time----  ----Name----\n";
160 }
161 
162 //===----------------------------------------------------------------------===//
163 // Timer Implementation for DefaultTimingManager
164 //===----------------------------------------------------------------------===//
165 
166 namespace {
167 
168 /// A timer used to sample execution time.
169 ///
170 /// Separately tracks wall time and user time to account for parallel threads of
171 /// execution. Timers are intended to be started and stopped multiple times.
172 /// Each start and stop will add to the timer's wall and user time.
173 class TimerImpl {
174 public:
175   using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
176   using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>;
177 
TimerImpl(std::string && name)178   TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {}
179 
180   /// Start the timer.
start()181   void start() { startTime = std::chrono::steady_clock::now(); }
182 
183   /// Stop the timer.
stop()184   void stop() {
185     auto newTime = std::chrono::steady_clock::now() - startTime;
186     wallTime += newTime;
187     userTime += newTime;
188   }
189 
190   /// Create a child timer nested within this one. Multiple calls to this
191   /// function with the same unique identifier `id` will return the same child
192   /// timer.
193   ///
194   /// This function can be called from other threads, as long as this timer
195   /// outlives any uses of the child timer on the other thread.
nest(const void * id,function_ref<std::string ()> nameBuilder)196   TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) {
197     auto tid = llvm::get_threadid();
198     if (tid == threadId)
199       return nestTail(children[id], std::move(nameBuilder));
200     std::unique_lock<std::mutex> lock(asyncMutex);
201     return nestTail(asyncChildren[tid][id], std::move(nameBuilder));
202   }
203 
204   /// Tail-called from `nest()`.
nestTail(std::unique_ptr<TimerImpl> & child,function_ref<std::string ()> nameBuilder)205   TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
206                       function_ref<std::string()> nameBuilder) {
207     if (!child)
208       child = std::make_unique<TimerImpl>(nameBuilder());
209     return child.get();
210   }
211 
212   /// Finalize this timer and all its children.
213   ///
214   /// If this timer has async children, which happens if `nest()` was called
215   /// from another thread, this function merges the async childr timers into the
216   /// main list of child timers.
217   ///
218   /// Caution: Call this function only after all nested timers running on other
219   /// threads no longer need their timers!
finalize()220   void finalize() {
221     addAsyncUserTime();
222     mergeAsyncChildren();
223   }
224 
225   /// Add the user time of all async children to this timer's user time. This is
226   /// necessary since the user time already contains all regular child timers,
227   /// but not the asynchronous ones (by the nesting nature of the timers).
addAsyncUserTime()228   std::chrono::nanoseconds addAsyncUserTime() {
229     auto added = std::chrono::nanoseconds(0);
230     for (auto &child : children)
231       added += child.second->addAsyncUserTime();
232     for (auto &thread : asyncChildren) {
233       for (auto &child : thread.second) {
234         child.second->addAsyncUserTime();
235         added += child.second->userTime;
236       }
237     }
238     userTime += added;
239     return added;
240   }
241 
242   /// Ensure that this timer and recursively all its children have their async
243   /// children folded into the main map of children.
mergeAsyncChildren()244   void mergeAsyncChildren() {
245     for (auto &child : children)
246       child.second->mergeAsyncChildren();
247     mergeChildren(std::move(asyncChildren));
248     assert(asyncChildren.empty());
249   }
250 
251   /// Merge multiple child timers into this timer.
252   ///
253   /// Children in `other` are added as children to this timer, or, if this timer
254   /// already contains a child with the corresponding unique identifier, are
255   /// merged into the existing child.
mergeChildren(ChildrenMap && other)256   void mergeChildren(ChildrenMap &&other) {
257     if (children.empty()) {
258       children = std::move(other);
259       for (auto &child : children)
260         child.second->mergeAsyncChildren();
261     } else {
262       for (auto &child : other)
263         mergeChild(child.first, std::move(child.second));
264       other.clear();
265     }
266   }
267 
268   /// See above.
mergeChildren(AsyncChildrenMap && other)269   void mergeChildren(AsyncChildrenMap &&other) {
270     for (auto &thread : other) {
271       mergeChildren(std::move(thread.second));
272       assert(thread.second.empty());
273     }
274     other.clear();
275   }
276 
277   /// Merge a child timer into this timer for a given unique identifier.
278   ///
279   /// Moves all child and async child timers of `other` into this timer's child
280   /// for the given unique identifier.
mergeChild(const void * id,std::unique_ptr<TimerImpl> && other)281   void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) {
282     auto &into = children[id];
283     if (!into) {
284       into = std::move(other);
285       into->mergeAsyncChildren();
286     } else {
287       into->wallTime = std::max(into->wallTime, other->wallTime);
288       into->userTime += other->userTime;
289       into->mergeChildren(std::move(other->children));
290       into->mergeChildren(std::move(other->asyncChildren));
291       other.reset();
292     }
293   }
294 
295   /// Dump a human-readable tree representation of the timer and its children.
296   /// This is useful for debugging the timing mechanisms and structure of the
297   /// timers.
dump(raw_ostream & os,unsigned indent=0,unsigned markThreadId=0)298   void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) {
299     auto time = getTimeRecord();
300     os << std::string(indent * 2, ' ') << name << " [" << threadId << "]"
301        << llvm::format("  %7.4f / %7.4f", time.user, time.wall);
302     if (threadId != markThreadId && markThreadId != 0)
303       os << " (*)";
304     os << "\n";
305     for (auto &child : children)
306       child.second->dump(os, indent + 1, threadId);
307     for (auto &thread : asyncChildren)
308       for (auto &child : thread.second)
309         child.second->dump(os, indent + 1, threadId);
310   }
311 
312   /// Returns the time for this timer in seconds.
getTimeRecord()313   TimeRecord getTimeRecord() {
314     return TimeRecord(
315         std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
316             .count(),
317         std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
318             .count());
319   }
320 
321   /// Print the timing result in list mode.
printAsList(raw_ostream & os,TimeRecord total)322   void printAsList(raw_ostream &os, TimeRecord total) {
323     // Flatten the leaf timers in the tree and merge them by name.
324     llvm::StringMap<TimeRecord> mergedTimers;
325     std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
326       mergedTimers[timer->name] += timer->getTimeRecord();
327       for (auto &children : timer->children)
328         addTimer(children.second.get());
329     };
330     addTimer(this);
331 
332     // Sort the timing information by wall time.
333     std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
334     for (auto &it : mergedTimers)
335       timerNameAndTime.emplace_back(it.first(), it.second);
336     llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
337                          [](const std::pair<StringRef, TimeRecord> *lhs,
338                             const std::pair<StringRef, TimeRecord> *rhs) {
339                            return llvm::array_pod_sort_comparator<double>(
340                                &rhs->second.wall, &lhs->second.wall);
341                          });
342 
343     // Print the timing information sequentially.
344     for (auto &timeData : timerNameAndTime)
345       printTimeEntry(os, 0, timeData.first, timeData.second, total);
346   }
347 
348   /// Print the timing result in tree mode.
printAsTree(raw_ostream & os,TimeRecord total,unsigned indent=0)349   void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) {
350     unsigned childIndent = indent;
351     if (!hidden) {
352       printTimeEntry(os, indent, name, getTimeRecord(), total);
353       childIndent += 2;
354     }
355     for (auto &child : children) {
356       child.second->printAsTree(os, total, childIndent);
357     }
358   }
359 
360   /// Print the current timing information.
print(raw_ostream & os,DisplayMode displayMode)361   void print(raw_ostream &os, DisplayMode displayMode) {
362     // Print the banner.
363     auto total = getTimeRecord();
364     printTimeHeader(os, total);
365 
366     // Defer to a specialized printer for each display mode.
367     switch (displayMode) {
368     case DisplayMode::List:
369       printAsList(os, total);
370       break;
371     case DisplayMode::Tree:
372       printAsTree(os, total);
373       break;
374     }
375 
376     // Print the top-level time not accounted for by child timers, and the
377     // total.
378     auto rest = total;
379     for (auto &child : children)
380       rest -= child.second->getTimeRecord();
381     printTimeEntry(os, 0, "Rest", rest, total);
382     printTimeEntry(os, 0, "Total", total, total);
383     os.flush();
384   }
385 
386   /// The last time instant at which the timer was started.
387   std::chrono::time_point<std::chrono::steady_clock> startTime;
388 
389   /// Accumulated wall time. If multiple threads of execution are merged into
390   /// this timer, the wall time will hold the maximum wall time of each thread
391   /// of execution.
392   std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
393 
394   /// Accumulated user time. If multiple threads of execution are merged into
395   /// this timer, each thread's user time is added here.
396   std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
397 
398   /// The thread on which this timer is running.
399   uint64_t threadId;
400 
401   /// A descriptive name for this timer.
402   std::string name;
403 
404   /// Whether to omit this timer from reports and directly show its children.
405   bool hidden = false;
406 
407   /// Child timers on the same thread the timer itself. We keep at most one
408   /// timer per unique identifier.
409   ChildrenMap children;
410 
411   /// Child timers on other threads. We keep at most one timer per unique
412   /// identifier.
413   AsyncChildrenMap asyncChildren;
414 
415   /// Mutex for the async children.
416   std::mutex asyncMutex;
417 };
418 
419 } // namespace
420 
421 //===----------------------------------------------------------------------===//
422 // DefaultTimingManager
423 //===----------------------------------------------------------------------===//
424 
425 namespace mlir {
426 namespace detail {
427 
428 /// Implementation details of the `DefaultTimingManager`.
429 class DefaultTimingManagerImpl {
430 public:
431   /// Whether we should do our work or not.
432   bool enabled = false;
433 
434   /// The configured display mode.
435   DisplayMode displayMode = DisplayMode::Tree;
436 
437   /// The stream where we should print our output. This will always be non-null.
438   raw_ostream *output = &llvm::errs();
439 
440   /// The root timer.
441   std::unique_ptr<TimerImpl> rootTimer;
442 };
443 
444 } // namespace detail
445 } // namespace mlir
446 
DefaultTimingManager()447 DefaultTimingManager::DefaultTimingManager()
448     : impl(std::make_unique<DefaultTimingManagerImpl>()) {
449   clear(); // initializes the root timer
450 }
451 
~DefaultTimingManager()452 DefaultTimingManager::~DefaultTimingManager() { print(); }
453 
454 /// Enable or disable execution time sampling.
setEnabled(bool enabled)455 void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; }
456 
457 /// Return whether execution time sampling is enabled.
isEnabled() const458 bool DefaultTimingManager::isEnabled() const { return impl->enabled; }
459 
460 /// Change the display mode.
setDisplayMode(DisplayMode displayMode)461 void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) {
462   impl->displayMode = displayMode;
463 }
464 
465 /// Return the current display mode;
getDisplayMode() const466 DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const {
467   return impl->displayMode;
468 }
469 
470 /// Change the stream where the output will be printed to.
setOutput(raw_ostream & os)471 void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; }
472 
473 /// Return the current output stream where the output will be printed to.
getOutput() const474 raw_ostream &DefaultTimingManager::getOutput() const {
475   assert(impl->output);
476   return *impl->output;
477 }
478 
479 /// Print and clear the timing results.
print()480 void DefaultTimingManager::print() {
481   if (impl->enabled) {
482     impl->rootTimer->finalize();
483     impl->rootTimer->print(*impl->output, impl->displayMode);
484   }
485   clear();
486 }
487 
488 /// Clear the timing results.
clear()489 void DefaultTimingManager::clear() {
490   impl->rootTimer = std::make_unique<TimerImpl>("root");
491   impl->rootTimer->hidden = true;
492 }
493 
494 /// Debug print the timer data structures to an output stream.
dumpTimers(raw_ostream & os)495 void DefaultTimingManager::dumpTimers(raw_ostream &os) {
496   impl->rootTimer->dump(os);
497 }
498 
499 /// Debug print the timers as a list.
dumpAsList(raw_ostream & os)500 void DefaultTimingManager::dumpAsList(raw_ostream &os) {
501   impl->rootTimer->finalize();
502   impl->rootTimer->print(os, DisplayMode::List);
503 }
504 
505 /// Debug print the timers as a tree.
dumpAsTree(raw_ostream & os)506 void DefaultTimingManager::dumpAsTree(raw_ostream &os) {
507   impl->rootTimer->finalize();
508   impl->rootTimer->print(os, DisplayMode::Tree);
509 }
510 
rootTimer()511 Optional<void *> DefaultTimingManager::rootTimer() {
512   if (impl->enabled)
513     return impl->rootTimer.get();
514   return llvm::None;
515 }
516 
startTimer(void * handle)517 void DefaultTimingManager::startTimer(void *handle) {
518   static_cast<TimerImpl *>(handle)->start();
519 }
520 
stopTimer(void * handle)521 void DefaultTimingManager::stopTimer(void *handle) {
522   static_cast<TimerImpl *>(handle)->stop();
523 }
524 
nestTimer(void * handle,const void * id,function_ref<std::string ()> nameBuilder)525 void *DefaultTimingManager::nestTimer(void *handle, const void *id,
526                                       function_ref<std::string()> nameBuilder) {
527   return static_cast<TimerImpl *>(handle)->nest(id, std::move(nameBuilder));
528 }
529 
hideTimer(void * handle)530 void DefaultTimingManager::hideTimer(void *handle) {
531   static_cast<TimerImpl *>(handle)->hidden = true;
532 }
533 
534 //===----------------------------------------------------------------------===//
535 // DefaultTimingManager Command Line Options
536 //===----------------------------------------------------------------------===//
537 
538 namespace {
539 struct DefaultTimingManagerOptions {
540   llvm::cl::opt<bool> timing{"mlir-timing",
541                              llvm::cl::desc("Display execution times"),
542                              llvm::cl::init(false)};
543   llvm::cl::opt<DisplayMode> displayMode{
544       "mlir-timing-display", llvm::cl::desc("Display method for timing data"),
545       llvm::cl::init(DisplayMode::Tree),
546       llvm::cl::values(
547           clEnumValN(DisplayMode::List, "list",
548                      "display the results in a list sorted by total time"),
549           clEnumValN(DisplayMode::Tree, "tree",
550                      "display the results ina with a nested tree view"))};
551 };
552 } // end anonymous namespace
553 
554 static llvm::ManagedStatic<DefaultTimingManagerOptions> options;
555 
registerDefaultTimingManagerCLOptions()556 void mlir::registerDefaultTimingManagerCLOptions() {
557   // Make sure that the options struct has been constructed.
558   *options;
559 }
560 
applyDefaultTimingManagerCLOptions(DefaultTimingManager & tm)561 void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) {
562   if (!options.isConstructed())
563     return;
564   tm.setEnabled(options->timing);
565   tm.setDisplayMode(options->displayMode);
566 }
567