1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/Process.h"
20 #include "llvm/Support/Threading.h"
21 #include <algorithm>
22 #include <cassert>
23 #include <chrono>
24 #include <mutex>
25 #include <string>
26 #include <vector>
27 
28 using namespace std::chrono;
29 using namespace llvm;
30 
31 static std::mutex Mu;
32 // List of all instances
33 static std::vector<TimeTraceProfiler *>
34     ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
35 // Per Thread instance
36 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
37 
38 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
39   return TimeTraceProfilerInstance;
40 }
41 
42 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
43 typedef time_point<steady_clock> TimePointType;
44 typedef std::pair<size_t, DurationType> CountAndDurationType;
45 typedef std::pair<std::string, CountAndDurationType>
46     NameAndCountAndDurationType;
47 
48 namespace {
49 struct Entry {
50   const TimePointType Start;
51   TimePointType End;
52   const std::string Name;
53   const std::string Detail;
54 
55   Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
56       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
57         Detail(std::move(Dt)) {}
58 
59   // Calculate timings for FlameGraph. Cast time points to microsecond precision
60   // rather than casting duration. This avoid truncation issues causing inner
61   // scopes overruning outer scopes.
62   steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
63     return (time_point_cast<microseconds>(Start) -
64             time_point_cast<microseconds>(StartTime))
65         .count();
66   }
67 
68   steady_clock::rep getFlameGraphDurUs() const {
69     return (time_point_cast<microseconds>(End) -
70             time_point_cast<microseconds>(Start))
71         .count();
72   }
73 };
74 } // namespace
75 
76 struct llvm::TimeTraceProfiler {
77   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
78       : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
79         ProcName(ProcName), Pid(sys::Process::getProcessId()),
80         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
81     llvm::get_thread_name(ThreadName);
82   }
83 
84   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
85     Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
86                        Detail());
87   }
88 
89   void end() {
90     assert(!Stack.empty() && "Must call begin() first");
91     Entry &E = Stack.back();
92     E.End = steady_clock::now();
93 
94     // Check that end times monotonically increase.
95     assert((Entries.empty() ||
96             (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
97              Entries.back().getFlameGraphStartUs(StartTime) +
98                  Entries.back().getFlameGraphDurUs())) &&
99            "TimeProfiler scope ended earlier than previous scope");
100 
101     // Calculate duration at full precision for overall counts.
102     DurationType Duration = E.End - E.Start;
103 
104     // Only include sections longer or equal to TimeTraceGranularity msec.
105     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
106       Entries.emplace_back(E);
107 
108     // Track total time taken by each "name", but only the topmost levels of
109     // them; e.g. if there's a template instantiation that instantiates other
110     // templates from within, we only want to add the topmost one. "topmost"
111     // happens to be the ones that don't have any currently open entries above
112     // itself.
113     if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
114           return Val.Name == E.Name;
115         }) == Stack.rend()) {
116       auto &CountAndTotal = CountAndTotalPerName[E.Name];
117       CountAndTotal.first++;
118       CountAndTotal.second += Duration;
119     }
120 
121     Stack.pop_back();
122   }
123 
124   // Write events from this TimeTraceProfilerInstance and
125   // ThreadTimeTraceProfilerInstances.
126   void write(raw_pwrite_stream &OS) {
127     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
128     std::lock_guard<std::mutex> Lock(Mu);
129     assert(Stack.empty() &&
130            "All profiler sections should be ended when calling write");
131     assert(llvm::all_of(ThreadTimeTraceProfilerInstances,
132                         [](const auto &TTP) { return TTP->Stack.empty(); }) &&
133            "All profiler sections should be ended when calling write");
134 
135     json::OStream J(OS);
136     J.objectBegin();
137     J.attributeBegin("traceEvents");
138     J.arrayBegin();
139 
140     // Emit all events for the main flame graph.
141     auto writeEvent = [&](const auto &E, uint64_t Tid) {
142       auto StartUs = E.getFlameGraphStartUs(StartTime);
143       auto DurUs = E.getFlameGraphDurUs();
144 
145       J.object([&] {
146         J.attribute("pid", Pid);
147         J.attribute("tid", int64_t(Tid));
148         J.attribute("ph", "X");
149         J.attribute("ts", StartUs);
150         J.attribute("dur", DurUs);
151         J.attribute("name", E.Name);
152         if (!E.Detail.empty()) {
153           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
154         }
155       });
156     };
157     for (const Entry &E : Entries)
158       writeEvent(E, this->Tid);
159     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
160       for (const Entry &E : TTP->Entries)
161         writeEvent(E, TTP->Tid);
162 
163     // Emit totals by section name as additional "thread" events, sorted from
164     // longest one.
165     // Find highest used thread id.
166     uint64_t MaxTid = this->Tid;
167     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
168       MaxTid = std::max(MaxTid, TTP->Tid);
169 
170     // Combine all CountAndTotalPerName from threads into one.
171     StringMap<CountAndDurationType> AllCountAndTotalPerName;
172     auto combineStat = [&](const auto &Stat) {
173       StringRef Key = Stat.getKey();
174       auto Value = Stat.getValue();
175       auto &CountAndTotal = AllCountAndTotalPerName[Key];
176       CountAndTotal.first += Value.first;
177       CountAndTotal.second += Value.second;
178     };
179     for (const auto &Stat : CountAndTotalPerName)
180       combineStat(Stat);
181     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
182       for (const auto &Stat : TTP->CountAndTotalPerName)
183         combineStat(Stat);
184 
185     std::vector<NameAndCountAndDurationType> SortedTotals;
186     SortedTotals.reserve(AllCountAndTotalPerName.size());
187     for (const auto &Total : AllCountAndTotalPerName)
188       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
189 
190     llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
191                                 const NameAndCountAndDurationType &B) {
192       return A.second.second > B.second.second;
193     });
194 
195     // Report totals on separate threads of tracing file.
196     uint64_t TotalTid = MaxTid + 1;
197     for (const NameAndCountAndDurationType &Total : SortedTotals) {
198       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
199       auto Count = AllCountAndTotalPerName[Total.first].first;
200 
201       J.object([&] {
202         J.attribute("pid", Pid);
203         J.attribute("tid", int64_t(TotalTid));
204         J.attribute("ph", "X");
205         J.attribute("ts", 0);
206         J.attribute("dur", DurUs);
207         J.attribute("name", "Total " + Total.first);
208         J.attributeObject("args", [&] {
209           J.attribute("count", int64_t(Count));
210           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
211         });
212       });
213 
214       ++TotalTid;
215     }
216 
217     auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
218                                   StringRef arg) {
219       J.object([&] {
220         J.attribute("cat", "");
221         J.attribute("pid", Pid);
222         J.attribute("tid", int64_t(Tid));
223         J.attribute("ts", 0);
224         J.attribute("ph", "M");
225         J.attribute("name", Name);
226         J.attributeObject("args", [&] { J.attribute("name", arg); });
227       });
228     };
229 
230     writeMetadataEvent("process_name", Tid, ProcName);
231     writeMetadataEvent("thread_name", Tid, ThreadName);
232     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
233       writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
234 
235     J.arrayEnd();
236     J.attributeEnd();
237 
238     // Emit the absolute time when this TimeProfiler started.
239     // This can be used to combine the profiling data from
240     // multiple processes and preserve actual time intervals.
241     J.attribute("beginningOfTime",
242                 time_point_cast<microseconds>(BeginningOfTime)
243                     .time_since_epoch()
244                     .count());
245 
246     J.objectEnd();
247   }
248 
249   SmallVector<Entry, 16> Stack;
250   SmallVector<Entry, 128> Entries;
251   StringMap<CountAndDurationType> CountAndTotalPerName;
252   const time_point<system_clock> BeginningOfTime;
253   const TimePointType StartTime;
254   const std::string ProcName;
255   const sys::Process::Pid Pid;
256   SmallString<0> ThreadName;
257   const uint64_t Tid;
258 
259   // Minimum time granularity (in microseconds)
260   const unsigned TimeTraceGranularity;
261 };
262 
263 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
264                                        StringRef ProcName) {
265   assert(TimeTraceProfilerInstance == nullptr &&
266          "Profiler should not be initialized");
267   TimeTraceProfilerInstance = new TimeTraceProfiler(
268       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
269 }
270 
271 // Removes all TimeTraceProfilerInstances.
272 // Called from main thread.
273 void llvm::timeTraceProfilerCleanup() {
274   delete TimeTraceProfilerInstance;
275   std::lock_guard<std::mutex> Lock(Mu);
276   for (auto TTP : ThreadTimeTraceProfilerInstances)
277     delete TTP;
278   ThreadTimeTraceProfilerInstances.clear();
279 }
280 
281 // Finish TimeTraceProfilerInstance on a worker thread.
282 // This doesn't remove the instance, just moves the pointer to global vector.
283 void llvm::timeTraceProfilerFinishThread() {
284   std::lock_guard<std::mutex> Lock(Mu);
285   ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance);
286   TimeTraceProfilerInstance = nullptr;
287 }
288 
289 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
290   assert(TimeTraceProfilerInstance != nullptr &&
291          "Profiler object can't be null");
292   TimeTraceProfilerInstance->write(OS);
293 }
294 
295 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
296                                    StringRef FallbackFileName) {
297   assert(TimeTraceProfilerInstance != nullptr &&
298          "Profiler object can't be null");
299 
300   std::string Path = PreferredFileName.str();
301   if (Path.empty()) {
302     Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
303     Path += ".time-trace";
304   }
305 
306   std::error_code EC;
307   raw_fd_ostream OS(Path, EC, sys::fs::OF_Text);
308   if (EC)
309     return createStringError(EC, "Could not open " + Path);
310 
311   timeTraceProfilerWrite(OS);
312   return Error::success();
313 }
314 
315 void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
316   if (TimeTraceProfilerInstance != nullptr)
317     TimeTraceProfilerInstance->begin(std::string(Name),
318                                      [&]() { return std::string(Detail); });
319 }
320 
321 void llvm::timeTraceProfilerBegin(StringRef Name,
322                                   llvm::function_ref<std::string()> Detail) {
323   if (TimeTraceProfilerInstance != nullptr)
324     TimeTraceProfilerInstance->begin(std::string(Name), Detail);
325 }
326 
327 void llvm::timeTraceProfilerEnd() {
328   if (TimeTraceProfilerInstance != nullptr)
329     TimeTraceProfilerInstance->end();
330 }
331