1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLFunctionalExtras.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Support/Process.h"
19 #include "llvm/Support/Threading.h"
20 #include <algorithm>
21 #include <cassert>
22 #include <chrono>
23 #include <mutex>
24 #include <string>
25 #include <vector>
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 using std::chrono::duration;
32 using std::chrono::duration_cast;
33 using std::chrono::microseconds;
34 using std::chrono::steady_clock;
35 using std::chrono::system_clock;
36 using std::chrono::time_point;
37 using std::chrono::time_point_cast;
38 
39 struct TimeTraceProfilerInstances {
40   std::mutex Lock;
41   std::vector<TimeTraceProfiler *> List;
42 };
43 
44 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
45   static TimeTraceProfilerInstances Instances;
46   return Instances;
47 }
48 
49 } // anonymous namespace
50 
51 // Per Thread instance
52 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
53 
54 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
55   return TimeTraceProfilerInstance;
56 }
57 
58 namespace {
59 
60 using ClockType = steady_clock;
61 using TimePointType = time_point<ClockType>;
62 using DurationType = duration<ClockType::rep, ClockType::period>;
63 using CountAndDurationType = std::pair<size_t, DurationType>;
64 using NameAndCountAndDurationType =
65     std::pair<std::string, CountAndDurationType>;
66 
67 /// Represents an open or completed time section entry to be captured.
68 struct TimeTraceProfilerEntry {
69   const TimePointType Start;
70   TimePointType End;
71   const std::string Name;
72   const std::string Detail;
73 
74   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
75                          std::string &&Dt)
76       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
77         Detail(std::move(Dt)) {}
78 
79   // Calculate timings for FlameGraph. Cast time points to microsecond precision
80   // rather than casting duration. This avoids truncation issues causing inner
81   // scopes overruning outer scopes.
82   ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
83     return (time_point_cast<microseconds>(Start) -
84             time_point_cast<microseconds>(StartTime))
85         .count();
86   }
87 
88   ClockType::rep getFlameGraphDurUs() const {
89     return (time_point_cast<microseconds>(End) -
90             time_point_cast<microseconds>(Start))
91         .count();
92   }
93 };
94 
95 } // anonymous namespace
96 
97 struct llvm::TimeTraceProfiler {
98   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
99       : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
100         ProcName(ProcName), Pid(sys::Process::getProcessId()),
101         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
102     llvm::get_thread_name(ThreadName);
103   }
104 
105   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
106     Stack.emplace_back(ClockType::now(), TimePointType(), std::move(Name),
107                        Detail());
108   }
109 
110   void end() {
111     assert(!Stack.empty() && "Must call begin() first");
112     TimeTraceProfilerEntry &E = Stack.back();
113     E.End = ClockType::now();
114 
115     // Check that end times monotonically increase.
116     assert((Entries.empty() ||
117             (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
118              Entries.back().getFlameGraphStartUs(StartTime) +
119                  Entries.back().getFlameGraphDurUs())) &&
120            "TimeProfiler scope ended earlier than previous scope");
121 
122     // Calculate duration at full precision for overall counts.
123     DurationType Duration = E.End - E.Start;
124 
125     // Only include sections longer or equal to TimeTraceGranularity msec.
126     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
127       Entries.emplace_back(E);
128 
129     // Track total time taken by each "name", but only the topmost levels of
130     // them; e.g. if there's a template instantiation that instantiates other
131     // templates from within, we only want to add the topmost one. "topmost"
132     // happens to be the ones that don't have any currently open entries above
133     // itself.
134     if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
135                       [&](const TimeTraceProfilerEntry &Val) {
136                         return Val.Name == E.Name;
137                       })) {
138       auto &CountAndTotal = CountAndTotalPerName[E.Name];
139       CountAndTotal.first++;
140       CountAndTotal.second += Duration;
141     }
142 
143     Stack.pop_back();
144   }
145 
146   // Write events from this TimeTraceProfilerInstance and
147   // ThreadTimeTraceProfilerInstances.
148   void write(raw_pwrite_stream &OS) {
149     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
150     auto &Instances = getTimeTraceProfilerInstances();
151     std::lock_guard<std::mutex> Lock(Instances.Lock);
152     assert(Stack.empty() &&
153            "All profiler sections should be ended when calling write");
154     assert(llvm::all_of(Instances.List,
155                         [](const auto &TTP) { return TTP->Stack.empty(); }) &&
156            "All profiler sections should be ended when calling write");
157 
158     json::OStream J(OS);
159     J.objectBegin();
160     J.attributeBegin("traceEvents");
161     J.arrayBegin();
162 
163     // Emit all events for the main flame graph.
164     auto writeEvent = [&](const auto &E, uint64_t Tid) {
165       auto StartUs = E.getFlameGraphStartUs(StartTime);
166       auto DurUs = E.getFlameGraphDurUs();
167 
168       J.object([&] {
169         J.attribute("pid", Pid);
170         J.attribute("tid", int64_t(Tid));
171         J.attribute("ph", "X");
172         J.attribute("ts", StartUs);
173         J.attribute("dur", DurUs);
174         J.attribute("name", E.Name);
175         if (!E.Detail.empty()) {
176           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
177         }
178       });
179     };
180     for (const TimeTraceProfilerEntry &E : Entries)
181       writeEvent(E, this->Tid);
182     for (const TimeTraceProfiler *TTP : Instances.List)
183       for (const TimeTraceProfilerEntry &E : TTP->Entries)
184         writeEvent(E, TTP->Tid);
185 
186     // Emit totals by section name as additional "thread" events, sorted from
187     // longest one.
188     // Find highest used thread id.
189     uint64_t MaxTid = this->Tid;
190     for (const TimeTraceProfiler *TTP : Instances.List)
191       MaxTid = std::max(MaxTid, TTP->Tid);
192 
193     // Combine all CountAndTotalPerName from threads into one.
194     StringMap<CountAndDurationType> AllCountAndTotalPerName;
195     auto combineStat = [&](const auto &Stat) {
196       StringRef Key = Stat.getKey();
197       auto Value = Stat.getValue();
198       auto &CountAndTotal = AllCountAndTotalPerName[Key];
199       CountAndTotal.first += Value.first;
200       CountAndTotal.second += Value.second;
201     };
202     for (const auto &Stat : CountAndTotalPerName)
203       combineStat(Stat);
204     for (const TimeTraceProfiler *TTP : Instances.List)
205       for (const auto &Stat : TTP->CountAndTotalPerName)
206         combineStat(Stat);
207 
208     std::vector<NameAndCountAndDurationType> SortedTotals;
209     SortedTotals.reserve(AllCountAndTotalPerName.size());
210     for (const auto &Total : AllCountAndTotalPerName)
211       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
212 
213     llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
214                                 const NameAndCountAndDurationType &B) {
215       return A.second.second > B.second.second;
216     });
217 
218     // Report totals on separate threads of tracing file.
219     uint64_t TotalTid = MaxTid + 1;
220     for (const NameAndCountAndDurationType &Total : SortedTotals) {
221       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
222       auto Count = AllCountAndTotalPerName[Total.first].first;
223 
224       J.object([&] {
225         J.attribute("pid", Pid);
226         J.attribute("tid", int64_t(TotalTid));
227         J.attribute("ph", "X");
228         J.attribute("ts", 0);
229         J.attribute("dur", DurUs);
230         J.attribute("name", "Total " + Total.first);
231         J.attributeObject("args", [&] {
232           J.attribute("count", int64_t(Count));
233           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
234         });
235       });
236 
237       ++TotalTid;
238     }
239 
240     auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
241                                   StringRef arg) {
242       J.object([&] {
243         J.attribute("cat", "");
244         J.attribute("pid", Pid);
245         J.attribute("tid", int64_t(Tid));
246         J.attribute("ts", 0);
247         J.attribute("ph", "M");
248         J.attribute("name", Name);
249         J.attributeObject("args", [&] { J.attribute("name", arg); });
250       });
251     };
252 
253     writeMetadataEvent("process_name", Tid, ProcName);
254     writeMetadataEvent("thread_name", Tid, ThreadName);
255     for (const TimeTraceProfiler *TTP : Instances.List)
256       writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
257 
258     J.arrayEnd();
259     J.attributeEnd();
260 
261     // Emit the absolute time when this TimeProfiler started.
262     // This can be used to combine the profiling data from
263     // multiple processes and preserve actual time intervals.
264     J.attribute("beginningOfTime",
265                 time_point_cast<microseconds>(BeginningOfTime)
266                     .time_since_epoch()
267                     .count());
268 
269     J.objectEnd();
270   }
271 
272   SmallVector<TimeTraceProfilerEntry, 16> Stack;
273   SmallVector<TimeTraceProfilerEntry, 128> Entries;
274   StringMap<CountAndDurationType> CountAndTotalPerName;
275   // System clock time when the session was begun.
276   const time_point<system_clock> BeginningOfTime;
277   // Profiling clock time when the session was begun.
278   const TimePointType StartTime;
279   const std::string ProcName;
280   const sys::Process::Pid Pid;
281   SmallString<0> ThreadName;
282   const uint64_t Tid;
283 
284   // Minimum time granularity (in microseconds)
285   const unsigned TimeTraceGranularity;
286 };
287 
288 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
289                                        StringRef ProcName) {
290   assert(TimeTraceProfilerInstance == nullptr &&
291          "Profiler should not be initialized");
292   TimeTraceProfilerInstance = new TimeTraceProfiler(
293       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
294 }
295 
296 // Removes all TimeTraceProfilerInstances.
297 // Called from main thread.
298 void llvm::timeTraceProfilerCleanup() {
299   delete TimeTraceProfilerInstance;
300   TimeTraceProfilerInstance = nullptr;
301 
302   auto &Instances = getTimeTraceProfilerInstances();
303   std::lock_guard<std::mutex> Lock(Instances.Lock);
304   for (auto *TTP : Instances.List)
305     delete TTP;
306   Instances.List.clear();
307 }
308 
309 // Finish TimeTraceProfilerInstance on a worker thread.
310 // This doesn't remove the instance, just moves the pointer to global vector.
311 void llvm::timeTraceProfilerFinishThread() {
312   auto &Instances = getTimeTraceProfilerInstances();
313   std::lock_guard<std::mutex> Lock(Instances.Lock);
314   Instances.List.push_back(TimeTraceProfilerInstance);
315   TimeTraceProfilerInstance = nullptr;
316 }
317 
318 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
319   assert(TimeTraceProfilerInstance != nullptr &&
320          "Profiler object can't be null");
321   TimeTraceProfilerInstance->write(OS);
322 }
323 
324 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
325                                    StringRef FallbackFileName) {
326   assert(TimeTraceProfilerInstance != nullptr &&
327          "Profiler object can't be null");
328 
329   std::string Path = PreferredFileName.str();
330   if (Path.empty()) {
331     Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
332     Path += ".time-trace";
333   }
334 
335   std::error_code EC;
336   raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
337   if (EC)
338     return createStringError(EC, "Could not open " + Path);
339 
340   timeTraceProfilerWrite(OS);
341   return Error::success();
342 }
343 
344 void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
345   if (TimeTraceProfilerInstance != nullptr)
346     TimeTraceProfilerInstance->begin(std::string(Name),
347                                      [&]() { return std::string(Detail); });
348 }
349 
350 void llvm::timeTraceProfilerBegin(StringRef Name,
351                                   llvm::function_ref<std::string()> Detail) {
352   if (TimeTraceProfilerInstance != nullptr)
353     TimeTraceProfilerInstance->begin(std::string(Name), Detail);
354 }
355 
356 void llvm::timeTraceProfilerEnd() {
357   if (TimeTraceProfilerInstance != nullptr)
358     TimeTraceProfilerInstance->end();
359 }
360