1 //===- PassTiming.cpp -----------------------------------------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "PassDetail.h"
10 #include "mlir/Pass/PassManager.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Support/Format.h"
16 #include "llvm/Support/FormatVariadic.h"
17 #include "llvm/Support/Threading.h"
18 #include <chrono>
19
20 using namespace mlir;
21 using namespace mlir::detail;
22
23 constexpr StringLiteral kPassTimingDescription =
24 "... Pass execution timing report ...";
25
26 namespace {
27 /// Simple record class to record timing information.
28 struct TimeRecord {
TimeRecord__anonda9abf970111::TimeRecord29 TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
30
operator +=__anonda9abf970111::TimeRecord31 TimeRecord &operator+=(const TimeRecord &other) {
32 wall += other.wall;
33 user += other.user;
34 return *this;
35 }
36
37 /// Print the current time record to 'os', with a breakdown showing
38 /// contributions to the give 'total' time record.
print__anonda9abf970111::TimeRecord39 void print(raw_ostream &os, const TimeRecord &total) {
40 if (total.user != total.wall)
41 os << llvm::format(" %7.4f (%5.1f%%) ", user,
42 100.0 * user / total.user);
43 os << llvm::format(" %7.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall);
44 }
45
46 double wall, user;
47 };
48
49 /// An enumeration of the different types of timers.
50 enum class TimerKind {
51 /// This timer represents an ordered collection of pass timers, corresponding
52 /// to a pass pipeline.
53 Pipeline,
54
55 /// This timer represents a collection of pipeline timers.
56 PipelineCollection,
57
58 /// This timer represents an analysis or pass timer.
59 PassOrAnalysis
60 };
61
62 struct Timer {
Timer__anonda9abf970111::Timer63 explicit Timer(std::string &&name, TimerKind kind)
64 : name(std::move(name)), kind(kind) {}
65
66 /// Start the timer.
start__anonda9abf970111::Timer67 void start() { startTime = std::chrono::system_clock::now(); }
68
69 /// Stop the timer.
stop__anonda9abf970111::Timer70 void stop() {
71 auto newTime = std::chrono::system_clock::now() - startTime;
72 wallTime += newTime;
73 userTime += newTime;
74 }
75
76 /// Get or create a child timer with the provided name and id.
getChildTimer__anonda9abf970111::Timer77 Timer *getChildTimer(const void *id, TimerKind kind,
78 std::function<std::string()> &&nameBuilder) {
79 auto &child = children[id];
80 if (!child)
81 child = std::make_unique<Timer>(nameBuilder(), kind);
82 return child.get();
83 }
84
85 /// Returns the total time for this timer in seconds.
getTotalTime__anonda9abf970111::Timer86 TimeRecord getTotalTime() {
87 // If this is a pass or analysis timer, use the recorded time directly.
88 if (kind == TimerKind::PassOrAnalysis) {
89 return TimeRecord(
90 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
91 .count(),
92 std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
93 .count());
94 }
95
96 // Otherwise, accumulate the timing from each of the children.
97 TimeRecord totalTime;
98 for (auto &child : children)
99 totalTime += child.second->getTotalTime();
100 return totalTime;
101 }
102
103 /// A map of unique identifiers to child timers.
104 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<Timer>>;
105
106 /// Merge the timing data from 'other' into this timer.
merge__anonda9abf970111::Timer107 void merge(Timer &&other) {
108 if (wallTime < other.wallTime)
109 wallTime = other.wallTime;
110 userTime += other.userTime;
111 mergeChildren(std::move(other.children));
112 }
113
114 /// Merge the timer children in 'otherChildren' with the children of this
115 /// timer.
mergeChildren__anonda9abf970111::Timer116 void mergeChildren(ChildrenMap &&otherChildren) {
117 // Check for an empty children list.
118 if (children.empty()) {
119 children = std::move(otherChildren);
120 return;
121 }
122
123 // Pipeline merges are handled separately as the children are merged
124 // lexicographically.
125 if (kind == TimerKind::Pipeline) {
126 assert(children.size() == otherChildren.size() &&
127 "pipeline merge requires the same number of children");
128 for (auto it : llvm::zip(children, otherChildren))
129 std::get<0>(it).second->merge(std::move(*std::get<1>(it).second));
130 return;
131 }
132
133 // Otherwise, we merge children based upon their timer key.
134 for (auto &otherChild : otherChildren)
135 mergeChild(std::move(otherChild));
136 }
137
138 /// Merge in the given child timer and id into this timer.
mergeChild__anonda9abf970111::Timer139 void mergeChild(ChildrenMap::value_type &&childIt) {
140 auto &child = children[childIt.first];
141 if (!child)
142 child = std::move(childIt.second);
143 else
144 child->merge(std::move(*childIt.second));
145 }
146
147 /// Raw timing information.
148 std::chrono::time_point<std::chrono::system_clock> startTime;
149 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
150 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
151
152 /// A map of unique identifiers to child timers.
153 ChildrenMap children;
154
155 /// A descriptive name for this timer.
156 std::string name;
157
158 /// The type of timer this instance represents.
159 TimerKind kind;
160 };
161
162 struct PassTiming : public PassInstrumentation {
PassTiming__anonda9abf970111::PassTiming163 PassTiming(PassDisplayMode displayMode) : displayMode(displayMode) {}
~PassTiming__anonda9abf970111::PassTiming164 ~PassTiming() override { print(); }
165
166 /// Setup the instrumentation hooks.
167 void runBeforePipeline(const OperationName &name,
168 const PipelineParentInfo &parentInfo) override;
169 void runAfterPipeline(const OperationName &name,
170 const PipelineParentInfo &parentInfo) override;
runBeforePass__anonda9abf970111::PassTiming171 void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); }
172 void runAfterPass(Pass *pass, Operation *) override;
runAfterPassFailed__anonda9abf970111::PassTiming173 void runAfterPassFailed(Pass *pass, Operation *op) override {
174 runAfterPass(pass, op);
175 }
runBeforeAnalysis__anonda9abf970111::PassTiming176 void runBeforeAnalysis(StringRef name, AnalysisID *id, Operation *) override {
177 startAnalysisTimer(name, id);
178 }
179 void runAfterAnalysis(StringRef, AnalysisID *, Operation *) override;
180
181 /// Print and clear the timing results.
182 void print();
183
184 /// Start a new timer for the given pass.
185 void startPassTimer(Pass *pass);
186
187 /// Start a new timer for the given analysis.
188 void startAnalysisTimer(StringRef name, AnalysisID *id);
189
190 /// Pop the last active timer for the current thread.
popLastActiveTimer__anonda9abf970111::PassTiming191 Timer *popLastActiveTimer() {
192 auto tid = llvm::get_threadid();
193 auto &activeTimers = activeThreadTimers[tid];
194 assert(!activeTimers.empty() && "expected active timer");
195 return activeTimers.pop_back_val();
196 }
197
198 /// Print the timing result in list mode.
199 void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime);
200
201 /// Print the timing result in pipeline mode.
202 void printResultsAsPipeline(raw_ostream &os, Timer *root,
203 TimeRecord totalTime);
204
205 /// Returns a timer for the provided identifier and name.
getTimer__anonda9abf970111::PassTiming206 Timer *getTimer(const void *id, TimerKind kind,
207 std::function<std::string()> &&nameBuilder) {
208 auto tid = llvm::get_threadid();
209
210 // If there is no active timer then add to the root timer.
211 auto &activeTimers = activeThreadTimers[tid];
212 Timer *parentTimer;
213 if (activeTimers.empty()) {
214 auto &rootTimer = rootTimers[tid];
215 if (!rootTimer)
216 rootTimer = std::make_unique<Timer>("root", TimerKind::Pipeline);
217 parentTimer = rootTimer.get();
218 } else {
219 // Otherwise, add this to the active timer.
220 parentTimer = activeTimers.back();
221 }
222
223 auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder));
224 activeTimers.push_back(timer);
225 return timer;
226 }
227
228 /// The root top level timers for each thread.
229 DenseMap<uint64_t, std::unique_ptr<Timer>> rootTimers;
230
231 /// A stack of the currently active pass timers per thread.
232 DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers;
233
234 /// The display mode to use when printing the timing results.
235 PassDisplayMode displayMode;
236
237 /// A mapping of pipeline timers that need to be merged into the parent
238 /// collection. The timers are mapped to the parent info to merge into.
239 DenseMap<PipelineParentInfo, SmallVector<Timer::ChildrenMap::value_type, 4>>
240 pipelinesToMerge;
241 };
242 } // end anonymous namespace
243
runBeforePipeline(const OperationName & name,const PipelineParentInfo & parentInfo)244 void PassTiming::runBeforePipeline(const OperationName &name,
245 const PipelineParentInfo &parentInfo) {
246 // We don't actually want to time the piplelines, they gather their total
247 // from their held passes.
248 getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline,
249 [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); });
250 }
251
runAfterPipeline(const OperationName & name,const PipelineParentInfo & parentInfo)252 void PassTiming::runAfterPipeline(const OperationName &name,
253 const PipelineParentInfo &parentInfo) {
254 // Pop the timer for the pipeline.
255 auto tid = llvm::get_threadid();
256 auto &activeTimers = activeThreadTimers[tid];
257 assert(!activeTimers.empty() && "expected active timer");
258 activeTimers.pop_back();
259
260 // If the current thread is the same as the parent, there is nothing left to
261 // do.
262 if (tid == parentInfo.parentThreadID)
263 return;
264
265 // Otherwise, mark the pipeline timer for merging into the correct parent
266 // thread.
267 assert(activeTimers.empty() && "expected parent timer to be root");
268 auto *parentTimer = rootTimers[tid].get();
269 assert(parentTimer->children.size() == 1 &&
270 parentTimer->children.count(name.getAsOpaquePointer()) &&
271 "expected a single pipeline timer");
272 pipelinesToMerge[parentInfo].push_back(
273 std::move(*parentTimer->children.begin()));
274 rootTimers.erase(tid);
275 }
276
277 /// Start a new timer for the given pass.
startPassTimer(Pass * pass)278 void PassTiming::startPassTimer(Pass *pass) {
279 auto kind = isAdaptorPass(pass) ? TimerKind::PipelineCollection
280 : TimerKind::PassOrAnalysis;
281 Timer *timer = getTimer(pass, kind, [pass]() -> std::string {
282 if (auto *adaptor = getAdaptorPassBase(pass))
283 return adaptor->getName();
284 return pass->getName();
285 });
286
287 // We don't actually want to time the adaptor passes, they gather their total
288 // from their held passes.
289 if (!isAdaptorPass(pass))
290 timer->start();
291 }
292
293 /// Start a new timer for the given analysis.
startAnalysisTimer(StringRef name,AnalysisID * id)294 void PassTiming::startAnalysisTimer(StringRef name, AnalysisID *id) {
295 Timer *timer = getTimer(id, TimerKind::PassOrAnalysis,
296 [name] { return "(A) " + name.str(); });
297 timer->start();
298 }
299
300 /// Stop a pass timer.
runAfterPass(Pass * pass,Operation *)301 void PassTiming::runAfterPass(Pass *pass, Operation *) {
302 Timer *timer = popLastActiveTimer();
303
304 // If this is an OpToOpPassAdaptorParallel, then we need to merge in the
305 // timing data for the pipelines running on other threads.
306 if (isa<OpToOpPassAdaptorParallel>(pass)) {
307 auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass});
308 if (toMerge != pipelinesToMerge.end()) {
309 for (auto &it : toMerge->second)
310 timer->mergeChild(std::move(it));
311 pipelinesToMerge.erase(toMerge);
312 }
313 return;
314 }
315
316 // Adaptor passes aren't timed directly, so we don't need to stop their
317 // timers.
318 if (!isAdaptorPass(pass))
319 timer->stop();
320 }
321
322 /// Stop a timer.
runAfterAnalysis(StringRef,AnalysisID *,Operation *)323 void PassTiming::runAfterAnalysis(StringRef, AnalysisID *, Operation *) {
324 popLastActiveTimer()->stop();
325 }
326
327 /// Utility to print the timer heading information.
printTimerHeader(raw_ostream & os,TimeRecord total)328 static void printTimerHeader(raw_ostream &os, TimeRecord total) {
329 os << "===" << std::string(73, '-') << "===\n";
330 // Figure out how many spaces to description name.
331 unsigned padding = (80 - kPassTimingDescription.size()) / 2;
332 os.indent(padding) << kPassTimingDescription << '\n';
333 os << "===" << std::string(73, '-') << "===\n";
334
335 // Print the total time followed by the section headers.
336 os << llvm::format(" Total Execution Time: %5.4f seconds\n\n", total.wall);
337 if (total.user != total.wall)
338 os << " ---User Time---";
339 os << " ---Wall Time--- --- Name ---\n";
340 }
341
342 /// Utility to print a single line entry in the timer output.
printTimeEntry(raw_ostream & os,unsigned indent,StringRef name,TimeRecord time,TimeRecord totalTime)343 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
344 TimeRecord time, TimeRecord totalTime) {
345 time.print(os, totalTime);
346 os.indent(indent) << name << "\n";
347 }
348
349 /// Print out the current timing information.
print()350 void PassTiming::print() {
351 // Don't print anything if there is no timing data.
352 if (rootTimers.empty())
353 return;
354
355 assert(rootTimers.size() == 1 && "expected one remaining root timer");
356 auto &rootTimer = rootTimers.begin()->second;
357 auto os = llvm::CreateInfoOutputFile();
358
359 // Print the timer header.
360 TimeRecord totalTime = rootTimer->getTotalTime();
361 printTimerHeader(*os, totalTime);
362
363 // Defer to a specialized printer for each display mode.
364 switch (displayMode) {
365 case PassDisplayMode::List:
366 printResultsAsList(*os, rootTimer.get(), totalTime);
367 break;
368 case PassDisplayMode::Pipeline:
369 printResultsAsPipeline(*os, rootTimer.get(), totalTime);
370 break;
371 }
372 printTimeEntry(*os, 0, "Total", totalTime, totalTime);
373 os->flush();
374
375 // Reset root timers.
376 rootTimers.clear();
377 activeThreadTimers.clear();
378 }
379
380 /// Print the timing result in list mode.
printResultsAsList(raw_ostream & os,Timer * root,TimeRecord totalTime)381 void PassTiming::printResultsAsList(raw_ostream &os, Timer *root,
382 TimeRecord totalTime) {
383 llvm::StringMap<TimeRecord> mergedTimings;
384
385 std::function<void(Timer *)> addTimer = [&](Timer *timer) {
386 // Only add timing information for passes and analyses.
387 if (timer->kind == TimerKind::PassOrAnalysis)
388 mergedTimings[timer->name] += timer->getTotalTime();
389 for (auto &children : timer->children)
390 addTimer(children.second.get());
391 };
392
393 // Add each of the top level timers.
394 for (auto &topLevelTimer : root->children)
395 addTimer(topLevelTimer.second.get());
396
397 // Sort the timing information by wall time.
398 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
399 for (auto &it : mergedTimings)
400 timerNameAndTime.emplace_back(it.first(), it.second);
401 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
402 [](const std::pair<StringRef, TimeRecord> *lhs,
403 const std::pair<StringRef, TimeRecord> *rhs) {
404 return llvm::array_pod_sort_comparator<double>(
405 &rhs->second.wall, &lhs->second.wall);
406 });
407
408 // Print the timing information sequentially.
409 for (auto &timeData : timerNameAndTime)
410 printTimeEntry(os, 0, timeData.first, timeData.second, totalTime);
411 }
412
413 /// Print the timing result in pipeline mode.
printResultsAsPipeline(raw_ostream & os,Timer * root,TimeRecord totalTime)414 void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root,
415 TimeRecord totalTime) {
416 std::function<void(unsigned, Timer *)> printTimer = [&](unsigned indent,
417 Timer *timer) {
418 // If this is a timer for a pipeline collection and the collection only has
419 // one pipeline child, then only print the child.
420 if (timer->kind == TimerKind::PipelineCollection &&
421 timer->children.size() == 1)
422 return printTimer(indent, timer->children.begin()->second.get());
423
424 printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime);
425
426 // If this timer is a pipeline, then print the children in-order.
427 if (timer->kind == TimerKind::Pipeline) {
428 for (auto &child : timer->children)
429 printTimer(indent + 2, child.second.get());
430 return;
431 }
432
433 // Otherwise, sort the children by name to give a deterministic ordering
434 // when emitting the time.
435 SmallVector<Timer *, 4> children;
436 children.reserve(timer->children.size());
437 for (auto &child : timer->children)
438 children.push_back(child.second.get());
439 llvm::array_pod_sort(children.begin(), children.end(),
440 [](Timer *const *lhs, Timer *const *rhs) {
441 return (*lhs)->name.compare((*rhs)->name);
442 });
443 for (auto &child : children)
444 printTimer(indent + 2, child);
445 };
446
447 // Print each of the top level timers.
448 for (auto &topLevelTimer : root->children)
449 printTimer(0, topLevelTimer.second.get());
450 }
451
452 //===----------------------------------------------------------------------===//
453 // PassManager
454 //===----------------------------------------------------------------------===//
455
456 /// Add an instrumentation to time the execution of passes and the computation
457 /// of analyses.
enableTiming(PassDisplayMode displayMode)458 void PassManager::enableTiming(PassDisplayMode displayMode) {
459 // Check if pass timing is already enabled.
460 if (passTiming)
461 return;
462 addInstrumentation(std::make_unique<PassTiming>(displayMode));
463 passTiming = true;
464 }
465