1 //===- Timing.cpp - Execution time measurement facilities -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Facilities to measure and provide statistics on execution time.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "mlir/Support/Timing.h"
14 #include "mlir/Support/ThreadLocalCache.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/RWMutex.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 #include <atomic>
29 #include <chrono>
30
31 using namespace mlir;
32 using namespace detail;
33 using DisplayMode = DefaultTimingManager::DisplayMode;
34
35 constexpr llvm::StringLiteral kTimingDescription =
36 "... Execution time report ...";
37
38 //===----------------------------------------------------------------------===//
39 // TimingManager
40 //===----------------------------------------------------------------------===//
41
42 namespace mlir {
43 namespace detail {
44 /// Private implementation details of the `TimingManager`.
45 class TimingManagerImpl {
46 public:
47 // Identifier allocator, map, and mutex for thread safety.
48 llvm::BumpPtrAllocator identifierAllocator;
49 llvm::StringSet<llvm::BumpPtrAllocator &> identifiers;
50 llvm::sys::SmartRWMutex<true> identifierMutex;
51
52 /// A thread local cache of identifiers to reduce lock contention.
53 ThreadLocalCache<llvm::StringMap<llvm::StringMapEntry<llvm::NoneType> *>>
54 localIdentifierCache;
55
TimingManagerImpl()56 TimingManagerImpl() : identifiers(identifierAllocator) {}
57 };
58 } // namespace detail
59 } // namespace mlir
60
TimingManager()61 TimingManager::TimingManager() : impl(std::make_unique<TimingManagerImpl>()) {}
62
~TimingManager()63 TimingManager::~TimingManager() {}
64
65 /// Get the root timer of this timing manager.
getRootTimer()66 Timer TimingManager::getRootTimer() {
67 auto rt = rootTimer();
68 return rt.hasValue() ? Timer(*this, rt.getValue()) : Timer();
69 }
70
71 /// Get the root timer of this timing manager wrapped in a `TimingScope`.
getRootScope()72 TimingScope TimingManager::getRootScope() {
73 return TimingScope(getRootTimer());
74 }
75
76 //===----------------------------------------------------------------------===//
77 // Identifier uniquing
78 //===----------------------------------------------------------------------===//
79
80 /// Return an identifier for the specified string.
get(StringRef str,TimingManager & tm)81 TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) {
82 // Check for an existing instance in the local cache.
83 auto &impl = *tm.impl;
84 auto *&localEntry = (*impl.localIdentifierCache)[str];
85 if (localEntry)
86 return TimingIdentifier(localEntry);
87
88 // Check for an existing identifier in read-only mode.
89 {
90 llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex);
91 auto it = impl.identifiers.find(str);
92 if (it != impl.identifiers.end()) {
93 localEntry = &*it;
94 return TimingIdentifier(localEntry);
95 }
96 }
97
98 // Acquire a writer-lock so that we can safely create the new instance.
99 llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex);
100 auto it = impl.identifiers.insert(str).first;
101 localEntry = &*it;
102 return TimingIdentifier(localEntry);
103 }
104
105 //===----------------------------------------------------------------------===//
106 // Helpers for time record printing
107 //===----------------------------------------------------------------------===//
108
109 namespace {
110
111 /// Simple record class to record timing information.
112 struct TimeRecord {
TimeRecord__anon47832b370111::TimeRecord113 TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
114
operator +=__anon47832b370111::TimeRecord115 TimeRecord &operator+=(const TimeRecord &other) {
116 wall += other.wall;
117 user += other.user;
118 return *this;
119 }
120
operator -=__anon47832b370111::TimeRecord121 TimeRecord &operator-=(const TimeRecord &other) {
122 wall -= other.wall;
123 user -= other.user;
124 return *this;
125 }
126
127 /// Print the current time record to 'os', with a breakdown showing
128 /// contributions to the give 'total' time record.
print__anon47832b370111::TimeRecord129 void print(raw_ostream &os, const TimeRecord &total) {
130 if (total.user != total.wall)
131 os << llvm::format(" %8.4f (%5.1f%%)", user, 100.0 * user / total.user);
132 os << llvm::format(" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall);
133 }
134
135 double wall, user;
136 };
137
138 } // namespace
139
140 /// Utility to print a single line entry in the timer output.
printTimeEntry(raw_ostream & os,unsigned indent,StringRef name,TimeRecord time,TimeRecord total)141 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
142 TimeRecord time, TimeRecord total) {
143 time.print(os, total);
144 os.indent(indent) << name << "\n";
145 }
146
147 /// Utility to print the timer heading information.
printTimeHeader(raw_ostream & os,TimeRecord total)148 static void printTimeHeader(raw_ostream &os, TimeRecord total) {
149 // Figure out how many spaces to description name.
150 unsigned padding = (80 - kTimingDescription.size()) / 2;
151 os << "===" << std::string(73, '-') << "===\n";
152 os.indent(padding) << kTimingDescription << '\n';
153 os << "===" << std::string(73, '-') << "===\n";
154
155 // Print the total time followed by the section headers.
156 os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall);
157 if (total.user != total.wall)
158 os << " ----User Time----";
159 os << " ----Wall Time---- ----Name----\n";
160 }
161
162 //===----------------------------------------------------------------------===//
163 // Timer Implementation for DefaultTimingManager
164 //===----------------------------------------------------------------------===//
165
166 namespace {
167
168 /// A timer used to sample execution time.
169 ///
170 /// Separately tracks wall time and user time to account for parallel threads of
171 /// execution. Timers are intended to be started and stopped multiple times.
172 /// Each start and stop will add to the timer's wall and user time.
173 class TimerImpl {
174 public:
175 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
176 using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>;
177
TimerImpl(std::string && name)178 TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {}
179
180 /// Start the timer.
start()181 void start() { startTime = std::chrono::steady_clock::now(); }
182
183 /// Stop the timer.
stop()184 void stop() {
185 auto newTime = std::chrono::steady_clock::now() - startTime;
186 wallTime += newTime;
187 userTime += newTime;
188 }
189
190 /// Create a child timer nested within this one. Multiple calls to this
191 /// function with the same unique identifier `id` will return the same child
192 /// timer.
193 ///
194 /// This function can be called from other threads, as long as this timer
195 /// outlives any uses of the child timer on the other thread.
nest(const void * id,function_ref<std::string ()> nameBuilder)196 TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) {
197 auto tid = llvm::get_threadid();
198 if (tid == threadId)
199 return nestTail(children[id], std::move(nameBuilder));
200 std::unique_lock<std::mutex> lock(asyncMutex);
201 return nestTail(asyncChildren[tid][id], std::move(nameBuilder));
202 }
203
204 /// Tail-called from `nest()`.
nestTail(std::unique_ptr<TimerImpl> & child,function_ref<std::string ()> nameBuilder)205 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
206 function_ref<std::string()> nameBuilder) {
207 if (!child)
208 child = std::make_unique<TimerImpl>(nameBuilder());
209 return child.get();
210 }
211
212 /// Finalize this timer and all its children.
213 ///
214 /// If this timer has async children, which happens if `nest()` was called
215 /// from another thread, this function merges the async childr timers into the
216 /// main list of child timers.
217 ///
218 /// Caution: Call this function only after all nested timers running on other
219 /// threads no longer need their timers!
finalize()220 void finalize() {
221 addAsyncUserTime();
222 mergeAsyncChildren();
223 }
224
225 /// Add the user time of all async children to this timer's user time. This is
226 /// necessary since the user time already contains all regular child timers,
227 /// but not the asynchronous ones (by the nesting nature of the timers).
addAsyncUserTime()228 std::chrono::nanoseconds addAsyncUserTime() {
229 auto added = std::chrono::nanoseconds(0);
230 for (auto &child : children)
231 added += child.second->addAsyncUserTime();
232 for (auto &thread : asyncChildren) {
233 for (auto &child : thread.second) {
234 child.second->addAsyncUserTime();
235 added += child.second->userTime;
236 }
237 }
238 userTime += added;
239 return added;
240 }
241
242 /// Ensure that this timer and recursively all its children have their async
243 /// children folded into the main map of children.
mergeAsyncChildren()244 void mergeAsyncChildren() {
245 for (auto &child : children)
246 child.second->mergeAsyncChildren();
247 mergeChildren(std::move(asyncChildren));
248 assert(asyncChildren.empty());
249 }
250
251 /// Merge multiple child timers into this timer.
252 ///
253 /// Children in `other` are added as children to this timer, or, if this timer
254 /// already contains a child with the corresponding unique identifier, are
255 /// merged into the existing child.
mergeChildren(ChildrenMap && other)256 void mergeChildren(ChildrenMap &&other) {
257 if (children.empty()) {
258 children = std::move(other);
259 for (auto &child : children)
260 child.second->mergeAsyncChildren();
261 } else {
262 for (auto &child : other)
263 mergeChild(child.first, std::move(child.second));
264 other.clear();
265 }
266 }
267
268 /// See above.
mergeChildren(AsyncChildrenMap && other)269 void mergeChildren(AsyncChildrenMap &&other) {
270 for (auto &thread : other) {
271 mergeChildren(std::move(thread.second));
272 assert(thread.second.empty());
273 }
274 other.clear();
275 }
276
277 /// Merge a child timer into this timer for a given unique identifier.
278 ///
279 /// Moves all child and async child timers of `other` into this timer's child
280 /// for the given unique identifier.
mergeChild(const void * id,std::unique_ptr<TimerImpl> && other)281 void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) {
282 auto &into = children[id];
283 if (!into) {
284 into = std::move(other);
285 into->mergeAsyncChildren();
286 } else {
287 into->wallTime = std::max(into->wallTime, other->wallTime);
288 into->userTime += other->userTime;
289 into->mergeChildren(std::move(other->children));
290 into->mergeChildren(std::move(other->asyncChildren));
291 other.reset();
292 }
293 }
294
295 /// Dump a human-readable tree representation of the timer and its children.
296 /// This is useful for debugging the timing mechanisms and structure of the
297 /// timers.
dump(raw_ostream & os,unsigned indent=0,unsigned markThreadId=0)298 void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) {
299 auto time = getTimeRecord();
300 os << std::string(indent * 2, ' ') << name << " [" << threadId << "]"
301 << llvm::format(" %7.4f / %7.4f", time.user, time.wall);
302 if (threadId != markThreadId && markThreadId != 0)
303 os << " (*)";
304 os << "\n";
305 for (auto &child : children)
306 child.second->dump(os, indent + 1, threadId);
307 for (auto &thread : asyncChildren)
308 for (auto &child : thread.second)
309 child.second->dump(os, indent + 1, threadId);
310 }
311
312 /// Returns the time for this timer in seconds.
getTimeRecord()313 TimeRecord getTimeRecord() {
314 return TimeRecord(
315 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
316 .count(),
317 std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
318 .count());
319 }
320
321 /// Print the timing result in list mode.
printAsList(raw_ostream & os,TimeRecord total)322 void printAsList(raw_ostream &os, TimeRecord total) {
323 // Flatten the leaf timers in the tree and merge them by name.
324 llvm::StringMap<TimeRecord> mergedTimers;
325 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
326 mergedTimers[timer->name] += timer->getTimeRecord();
327 for (auto &children : timer->children)
328 addTimer(children.second.get());
329 };
330 addTimer(this);
331
332 // Sort the timing information by wall time.
333 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
334 for (auto &it : mergedTimers)
335 timerNameAndTime.emplace_back(it.first(), it.second);
336 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
337 [](const std::pair<StringRef, TimeRecord> *lhs,
338 const std::pair<StringRef, TimeRecord> *rhs) {
339 return llvm::array_pod_sort_comparator<double>(
340 &rhs->second.wall, &lhs->second.wall);
341 });
342
343 // Print the timing information sequentially.
344 for (auto &timeData : timerNameAndTime)
345 printTimeEntry(os, 0, timeData.first, timeData.second, total);
346 }
347
348 /// Print the timing result in tree mode.
printAsTree(raw_ostream & os,TimeRecord total,unsigned indent=0)349 void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) {
350 unsigned childIndent = indent;
351 if (!hidden) {
352 printTimeEntry(os, indent, name, getTimeRecord(), total);
353 childIndent += 2;
354 }
355 for (auto &child : children) {
356 child.second->printAsTree(os, total, childIndent);
357 }
358 }
359
360 /// Print the current timing information.
print(raw_ostream & os,DisplayMode displayMode)361 void print(raw_ostream &os, DisplayMode displayMode) {
362 // Print the banner.
363 auto total = getTimeRecord();
364 printTimeHeader(os, total);
365
366 // Defer to a specialized printer for each display mode.
367 switch (displayMode) {
368 case DisplayMode::List:
369 printAsList(os, total);
370 break;
371 case DisplayMode::Tree:
372 printAsTree(os, total);
373 break;
374 }
375
376 // Print the top-level time not accounted for by child timers, and the
377 // total.
378 auto rest = total;
379 for (auto &child : children)
380 rest -= child.second->getTimeRecord();
381 printTimeEntry(os, 0, "Rest", rest, total);
382 printTimeEntry(os, 0, "Total", total, total);
383 os.flush();
384 }
385
386 /// The last time instant at which the timer was started.
387 std::chrono::time_point<std::chrono::steady_clock> startTime;
388
389 /// Accumulated wall time. If multiple threads of execution are merged into
390 /// this timer, the wall time will hold the maximum wall time of each thread
391 /// of execution.
392 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
393
394 /// Accumulated user time. If multiple threads of execution are merged into
395 /// this timer, each thread's user time is added here.
396 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
397
398 /// The thread on which this timer is running.
399 uint64_t threadId;
400
401 /// A descriptive name for this timer.
402 std::string name;
403
404 /// Whether to omit this timer from reports and directly show its children.
405 bool hidden = false;
406
407 /// Child timers on the same thread the timer itself. We keep at most one
408 /// timer per unique identifier.
409 ChildrenMap children;
410
411 /// Child timers on other threads. We keep at most one timer per unique
412 /// identifier.
413 AsyncChildrenMap asyncChildren;
414
415 /// Mutex for the async children.
416 std::mutex asyncMutex;
417 };
418
419 } // namespace
420
421 //===----------------------------------------------------------------------===//
422 // DefaultTimingManager
423 //===----------------------------------------------------------------------===//
424
425 namespace mlir {
426 namespace detail {
427
428 /// Implementation details of the `DefaultTimingManager`.
429 class DefaultTimingManagerImpl {
430 public:
431 /// Whether we should do our work or not.
432 bool enabled = false;
433
434 /// The configured display mode.
435 DisplayMode displayMode = DisplayMode::Tree;
436
437 /// The stream where we should print our output. This will always be non-null.
438 raw_ostream *output = &llvm::errs();
439
440 /// The root timer.
441 std::unique_ptr<TimerImpl> rootTimer;
442 };
443
444 } // namespace detail
445 } // namespace mlir
446
DefaultTimingManager()447 DefaultTimingManager::DefaultTimingManager()
448 : impl(std::make_unique<DefaultTimingManagerImpl>()) {
449 clear(); // initializes the root timer
450 }
451
~DefaultTimingManager()452 DefaultTimingManager::~DefaultTimingManager() { print(); }
453
454 /// Enable or disable execution time sampling.
setEnabled(bool enabled)455 void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; }
456
457 /// Return whether execution time sampling is enabled.
isEnabled() const458 bool DefaultTimingManager::isEnabled() const { return impl->enabled; }
459
460 /// Change the display mode.
setDisplayMode(DisplayMode displayMode)461 void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) {
462 impl->displayMode = displayMode;
463 }
464
465 /// Return the current display mode;
getDisplayMode() const466 DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const {
467 return impl->displayMode;
468 }
469
470 /// Change the stream where the output will be printed to.
setOutput(raw_ostream & os)471 void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; }
472
473 /// Return the current output stream where the output will be printed to.
getOutput() const474 raw_ostream &DefaultTimingManager::getOutput() const {
475 assert(impl->output);
476 return *impl->output;
477 }
478
479 /// Print and clear the timing results.
print()480 void DefaultTimingManager::print() {
481 if (impl->enabled) {
482 impl->rootTimer->finalize();
483 impl->rootTimer->print(*impl->output, impl->displayMode);
484 }
485 clear();
486 }
487
488 /// Clear the timing results.
clear()489 void DefaultTimingManager::clear() {
490 impl->rootTimer = std::make_unique<TimerImpl>("root");
491 impl->rootTimer->hidden = true;
492 }
493
494 /// Debug print the timer data structures to an output stream.
dumpTimers(raw_ostream & os)495 void DefaultTimingManager::dumpTimers(raw_ostream &os) {
496 impl->rootTimer->dump(os);
497 }
498
499 /// Debug print the timers as a list.
dumpAsList(raw_ostream & os)500 void DefaultTimingManager::dumpAsList(raw_ostream &os) {
501 impl->rootTimer->finalize();
502 impl->rootTimer->print(os, DisplayMode::List);
503 }
504
505 /// Debug print the timers as a tree.
dumpAsTree(raw_ostream & os)506 void DefaultTimingManager::dumpAsTree(raw_ostream &os) {
507 impl->rootTimer->finalize();
508 impl->rootTimer->print(os, DisplayMode::Tree);
509 }
510
rootTimer()511 Optional<void *> DefaultTimingManager::rootTimer() {
512 if (impl->enabled)
513 return impl->rootTimer.get();
514 return llvm::None;
515 }
516
startTimer(void * handle)517 void DefaultTimingManager::startTimer(void *handle) {
518 static_cast<TimerImpl *>(handle)->start();
519 }
520
stopTimer(void * handle)521 void DefaultTimingManager::stopTimer(void *handle) {
522 static_cast<TimerImpl *>(handle)->stop();
523 }
524
nestTimer(void * handle,const void * id,function_ref<std::string ()> nameBuilder)525 void *DefaultTimingManager::nestTimer(void *handle, const void *id,
526 function_ref<std::string()> nameBuilder) {
527 return static_cast<TimerImpl *>(handle)->nest(id, std::move(nameBuilder));
528 }
529
hideTimer(void * handle)530 void DefaultTimingManager::hideTimer(void *handle) {
531 static_cast<TimerImpl *>(handle)->hidden = true;
532 }
533
534 //===----------------------------------------------------------------------===//
535 // DefaultTimingManager Command Line Options
536 //===----------------------------------------------------------------------===//
537
538 namespace {
539 struct DefaultTimingManagerOptions {
540 llvm::cl::opt<bool> timing{"mlir-timing",
541 llvm::cl::desc("Display execution times"),
542 llvm::cl::init(false)};
543 llvm::cl::opt<DisplayMode> displayMode{
544 "mlir-timing-display", llvm::cl::desc("Display method for timing data"),
545 llvm::cl::init(DisplayMode::Tree),
546 llvm::cl::values(
547 clEnumValN(DisplayMode::List, "list",
548 "display the results in a list sorted by total time"),
549 clEnumValN(DisplayMode::Tree, "tree",
550 "display the results ina with a nested tree view"))};
551 };
552 } // end anonymous namespace
553
554 static llvm::ManagedStatic<DefaultTimingManagerOptions> options;
555
registerDefaultTimingManagerCLOptions()556 void mlir::registerDefaultTimingManagerCLOptions() {
557 // Make sure that the options struct has been constructed.
558 *options;
559 }
560
applyDefaultTimingManagerCLOptions(DefaultTimingManager & tm)561 void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) {
562 if (!options.isConstructed())
563 return;
564 tm.setEnabled(options->timing);
565 tm.setDisplayMode(options->displayMode);
566 }
567