1 //===-- TraceDumper.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Symbol/SymbolContext.h"
10 #include "lldb/Target/TraceCursor.h"
11 #include <optional>
12 
13 #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
14 #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
15 
16 namespace lldb_private {
17 
18 /// Class that holds the configuration used by \a TraceDumper for
19 /// traversing and dumping instructions.
20 struct TraceDumperOptions {
21   /// If \b true, the cursor will be iterated forwards starting from the
22   /// oldest instruction. Otherwise, the iteration starts from the most
23   /// recent instruction.
24   bool forwards = false;
25   /// Dump only instruction addresses without disassembly nor symbol
26   /// information.
27   bool raw = false;
28   /// Dump in json format.
29   bool json = false;
30   /// When dumping in JSON format, pretty print the output.
31   bool pretty_print_json = false;
32   /// For each trace item, print the corresponding timestamp in nanoseconds if
33   /// available.
34   bool show_timestamps = false;
35   /// Dump the events that happened between instructions.
36   bool show_events = false;
37   /// Dump events and none of the instructions.
38   bool only_events = false;
39   /// For each instruction, print the instruction kind.
40   bool show_control_flow_kind = false;
41   /// Optional custom id to start traversing from.
42   std::optional<uint64_t> id;
43   /// Optional number of instructions to skip from the starting position
44   /// of the cursor.
45   std::optional<size_t> skip;
46 };
47 
48 /// Class used to dump the instructions of a \a TraceCursor using its current
49 /// state and granularity.
50 class TraceDumper {
51 public:
52   /// Helper struct that holds symbol, disassembly and address information of an
53   /// instruction.
54   struct SymbolInfo {
55     SymbolContext sc;
56     Address address;
57     lldb::DisassemblerSP disassembler;
58     lldb::InstructionSP instruction;
59     lldb_private::ExecutionContext exe_ctx;
60   };
61 
62   /// Helper struct that holds all the information we know about a trace item
63   struct TraceItem {
64     lldb::user_id_t id;
65     lldb::addr_t load_address;
66     std::optional<double> timestamp;
67     std::optional<uint64_t> hw_clock;
68     std::optional<std::string> sync_point_metadata;
69     std::optional<llvm::StringRef> error;
70     std::optional<lldb::TraceEvent> event;
71     std::optional<SymbolInfo> symbol_info;
72     std::optional<SymbolInfo> prev_symbol_info;
73     std::optional<lldb::cpu_id_t> cpu_id;
74   };
75 
76   /// An object representing a traced function call.
77   ///
78   /// A function call is represented using segments and subcalls.
79   ///
80   /// TracedSegment:
81   ///   A traced segment is a maximal list of consecutive traced instructions
82   ///   that belong to the same function call. A traced segment will end in
83   ///   three possible ways:
84   ///     - With a call to a function deeper in the callstack. In this case,
85   ///     most of the times this nested call will return
86   ///       and resume with the next segment of this segment's owning function
87   ///       call. More on this later.
88   ///     - Abruptly due to end of trace. In this case, we weren't able to trace
89   ///     the end of this function call.
90   ///     - Simply a return higher in the callstack.
91   ///
92   ///   In terms of implementation details, as segment can be represented with
93   ///   the beginning and ending instruction IDs from the instruction trace.
94   ///
95   ///  UntracedPrefixSegment:
96   ///   It might happen that we didn't trace the beginning of a function and we
97   ///   saw it for the first time as part of a return. As a way to signal these
98   ///   cases, we have a placeholder UntracedPrefixSegment class that completes the
99   ///   callgraph.
100   ///
101   ///  Example:
102   ///   We might have this piece of execution:
103   ///
104   ///     main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4]
105   ///       foo()  [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main
106   ///       invoked foo
107   ///     main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30]
108   ///
109   ///   In this case, our function main invokes foo. We have 3 segments: main
110   ///   [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset
111   ///   0x24 to 0x40]. We also have the instruction ids from the corresponding
112   ///   linear instruction trace for each segment.
113   ///
114   ///   But what if we started tracing since the middle of foo? Then we'd have
115   ///   an incomplete trace
116   ///
117   ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
118   ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
119   ///
120   ///   Notice that we changed the instruction ids because this is a new trace.
121   ///   Here, in order to have a somewhat complete tree with good traversal
122   ///   capabilities, we can create an UntracedPrefixSegment to signal the portion of
123   ///   main() that we didn't trace. We don't know if this segment was in fact
124   ///   multiple segments with many function calls. We'll never know. The
125   ///   resulting tree looks like the following:
126   ///
127   ///     main() [untraced]
128   ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
129   ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
130   ///
131   ///   And in pseudo-code:
132   ///
133   ///     FunctionCall [
134   ///       UntracedPrefixSegment {
135   ///         symbol: main()
136   ///         nestedCall: FunctionCall [ # this untraced segment has a nested
137   ///         call
138   ///           TracedSegment {
139   ///             symbol: foo()
140   ///             fromInstructionId: 1
141   ///             toInstructionId: 10
142   ///             nestedCall: none # this doesn't have a nested call
143   ///           }
144   ///         }
145   ///       ],
146   ///       TracedSegment {
147   ///         symbol: main()
148   ///         fromInstructionId: 11
149   ///         toInstructionId: 20
150   ///         nestedCall: none # this also doesn't have a nested call
151   ///       }
152   ///   ]
153   ///
154   ///   We can see the nested structure and how instructions are represented as
155   ///   segments.
156   ///
157   ///
158   ///   Returns:
159   ///     Code doesn't always behave intuitively. Some interesting functions
160   ///     might modify the stack and thus change the behavior of common
161   ///     instructions like CALL and RET. We try to identify these cases, and
162   ///     the result is that the return edge from a segment might connect with a
163   ///     function call very high the stack. For example, you might have
164   ///
165   ///     main()
166   ///       foo()
167   ///         bar()
168   ///         # here bar modifies the stack and pops foo() from it. Then it
169   ///         finished the a RET (return)
170   ///     main() # we came back directly to main()
171   ///
172   ///     I have observed some trampolines doing this, as well as some std
173   ///     functions (like ostream functions). So consumers should be aware of
174   ///     this.
175   ///
176   ///     There are all sorts of "abnormal" behaviors you can see in code, and
177   ///     whenever we fail at identifying what's going on, we prefer to create a
178   ///     new tree.
179   ///
180   ///   Function call forest:
181   ///     A single tree would suffice if a trace didn't contain errors nor
182   ///     abnormal behaviors that made our algorithms fail. Sadly these
183   ///     anomalies exist and we prefer not to use too many heuristics and
184   ///     probably end up lying to the user. So we create a new tree from the
185   ///     point we can't continue using the previous tree. This results in
186   ///     having a forest instead of a single tree. This is probably the best we
187   ///     can do if we consumers want to use this data to perform performance
188   ///     analysis or reverse debugging.
189   ///
190   ///   Non-functions:
191   ///     Not everything in a program is a function. There are blocks of
192   ///     instructions that are simply labeled or even regions without symbol
193   ///     information that we don't what they are. We treat all of them as
194   ///     functions for simplicity.
195   ///
196   ///   Errors:
197   ///     Whenever an error is found, a new tree with a single segment is
198   ///     created. All consecutive errors after the original one are then
199   ///     appended to this segment. As a note, something that GDB does is to use
200   ///     some heuristics to merge trees that were interrupted by errors. We are
201   ///     leaving that out of scope until a feature like that one is really
202   ///     needed.
203 
204   /// Forward declaration
205   class FunctionCall;
206   using FunctionCallUP = std::unique_ptr<FunctionCall>;
207 
208   class FunctionCall {
209   public:
210     class TracedSegment {
211     public:
212       /// \param[in] cursor_sp
213       ///   A cursor pointing to the beginning of the segment.
214       ///
215       /// \param[in] symbol_info
216       ///   The symbol information of the first instruction of the segment.
217       ///
218       /// \param[in] call
219       ///   The FunctionCall object that owns this segment.
220       TracedSegment(const lldb::TraceCursorSP &cursor_sp,
221                     const SymbolInfo &symbol_info, FunctionCall &owning_call)
222           : m_first_insn_id(cursor_sp->GetId()),
223             m_last_insn_id(cursor_sp->GetId()),
224             m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info),
225             m_owning_call(owning_call) {}
226 
227       /// \return
228       ///   The chronologically first instruction ID in this segment.
229       lldb::user_id_t GetFirstInstructionID() const;
230       /// \return
231       ///   The chronologically last instruction ID in this segment.
232       lldb::user_id_t GetLastInstructionID() const;
233 
234       /// \return
235       ///   The symbol information of the chronologically first instruction ID
236       ///   in this segment.
237       const SymbolInfo &GetFirstInstructionSymbolInfo() const;
238 
239       /// \return
240       ///   The symbol information of the chronologically last instruction ID in
241       ///   this segment.
242       const SymbolInfo &GetLastInstructionSymbolInfo() const;
243 
244       /// \return
245       ///   Get the call that owns this segment.
246       const FunctionCall &GetOwningCall() const;
247 
248       /// Append a new instruction to this segment.
249       ///
250       /// \param[in] cursor_sp
251       ///   A cursor pointing to the new instruction.
252       ///
253       /// \param[in] symbol_info
254       ///   The symbol information of the new instruction.
255       void AppendInsn(const lldb::TraceCursorSP &cursor_sp,
256                       const SymbolInfo &symbol_info);
257 
258       /// Create a nested call at the end of this segment.
259       ///
260       /// \param[in] cursor_sp
261       ///   A cursor pointing to the first instruction of the nested call.
262       ///
263       /// \param[in] symbol_info
264       ///   The symbol information of the first instruction of the nested call.
265       FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp,
266                                      const SymbolInfo &symbol_info);
267 
268       /// Executed the given callback if there's a nested call at the end of
269       /// this segment.
270       void IfNestedCall(std::function<void(const FunctionCall &function_call)>
271                             callback) const;
272 
273     private:
274       TracedSegment(const TracedSegment &) = delete;
275       TracedSegment &operator=(TracedSegment const &);
276 
277       /// Delimiting instruction IDs taken chronologically.
278       /// \{
279       lldb::user_id_t m_first_insn_id;
280       lldb::user_id_t m_last_insn_id;
281       /// \}
282       /// An optional nested call starting at the end of this segment.
283       FunctionCallUP m_nested_call;
284       /// The symbol information of the delimiting instructions
285       /// \{
286       SymbolInfo m_first_symbol_info;
287       SymbolInfo m_last_symbol_info;
288       /// \}
289       FunctionCall &m_owning_call;
290     };
291 
292     class UntracedPrefixSegment {
293     public:
294       /// Note: Untraced segments can only exist if have also seen a traced
295       /// segment of the same function call. Thus, we can use those traced
296       /// segments if we want symbol information and such.
297 
298       UntracedPrefixSegment(FunctionCallUP &&nested_call)
299           : m_nested_call(std::move(nested_call)) {}
300 
301       const FunctionCall &GetNestedCall() const;
302 
303     private:
304       UntracedPrefixSegment(const UntracedPrefixSegment &) = delete;
305       UntracedPrefixSegment &operator=(UntracedPrefixSegment const &);
306       FunctionCallUP m_nested_call;
307     };
308 
309     /// Create a new function call given an instruction. This will also create a
310     /// segment for that instruction.
311     ///
312     /// \param[in] cursor_sp
313     ///   A cursor pointing to the first instruction of that function call.
314     ///
315     /// \param[in] symbol_info
316     ///   The symbol information of that first instruction.
317     FunctionCall(const lldb::TraceCursorSP &cursor_sp,
318                  const SymbolInfo &symbol_info);
319 
320     /// Append a new traced segment to this function call.
321     ///
322     /// \param[in] cursor_sp
323     ///   A cursor pointing to the first instruction of the new segment.
324     ///
325     /// \param[in] symbol_info
326     ///   The symbol information of that first instruction.
327     void AppendSegment(const lldb::TraceCursorSP &cursor_sp,
328                        const SymbolInfo &symbol_info);
329 
330     /// \return
331     ///   The symbol info of some traced instruction of this call.
332     const SymbolInfo &GetSymbolInfo() const;
333 
334     /// \return
335     ///   \b true if and only if the instructions in this function call are
336     ///   trace errors, in which case this function call is a fake one.
337     bool IsError() const;
338 
339     /// \return
340     ///   The list of traced segments of this call.
341     const std::deque<TracedSegment> &GetTracedSegments() const;
342 
343     /// \return
344     ///   A non-const reference to the most-recent traced segment.
345     TracedSegment &GetLastTracedSegment();
346 
347     /// Create an untraced segment for this call that jumps to the provided
348     /// nested call.
349     void SetUntracedPrefixSegment(FunctionCallUP &&nested_call);
350 
351     /// \return
352     ///   A optional to the untraced prefix segment of this call.
353     const std::optional<UntracedPrefixSegment> &
354     GetUntracedPrefixSegment() const;
355 
356     /// \return
357     ///   A pointer to the parent call. It may be \b nullptr.
358     FunctionCall *GetParentCall() const;
359 
360     void SetParentCall(FunctionCall &parent_call);
361 
362   private:
363     /// An optional untraced segment that precedes all the traced segments.
364     std::optional<UntracedPrefixSegment> m_untraced_prefix_segment;
365     /// The traced segments in order. We used a deque to prevent moving these
366     /// objects when appending to the list, which would happen with vector.
367     std::deque<TracedSegment> m_traced_segments;
368     /// The parent call, which might be null. Useful for reconstructing
369     /// callstacks.
370     FunctionCall *m_parent_call = nullptr;
371     /// Whether this call represents a list of consecutive errors.
372     bool m_is_error;
373   };
374 
375   /// Interface used to abstract away the format in which the instruction
376   /// information will be dumped.
377   class OutputWriter {
378   public:
379     virtual ~OutputWriter() = default;
380 
381     /// Notify this writer that the cursor ran out of data.
382     virtual void NoMoreData() {}
383 
384     /// Dump a trace item (instruction, error or event).
385     virtual void TraceItem(const TraceItem &item) = 0;
386 
387     /// Dump a function call forest.
388     virtual void
389     FunctionCallForest(const std::vector<FunctionCallUP> &forest) = 0;
390   };
391 
392   /// Create a instruction dumper for the cursor.
393   ///
394   /// \param[in] cursor
395   ///     The cursor whose instructions will be dumped.
396   ///
397   /// \param[in] s
398   ///     The stream where to dump the instructions to.
399   ///
400   /// \param[in] options
401   ///     Additional options for configuring the dumping.
402   TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s,
403               const TraceDumperOptions &options);
404 
405   /// Dump \a count instructions of the thread trace starting at the current
406   /// cursor position.
407   ///
408   /// This effectively moves the cursor to the next unvisited position, so that
409   /// a subsequent call to this method continues where it left off.
410   ///
411   /// \param[in] count
412   ///     The number of instructions to print.
413   ///
414   /// \return
415   ///     The instruction id of the last traversed instruction, or \b
416   ///     std::nullopt if no instructions were visited.
417   std::optional<lldb::user_id_t> DumpInstructions(size_t count);
418 
419   /// Dump all function calls forwards chronologically and hierarchically
420   void DumpFunctionCalls();
421 
422 private:
423   /// Create a trace item for the current position without symbol information.
424   TraceItem CreatRawTraceItem();
425 
426   lldb::TraceCursorSP m_cursor_sp;
427   TraceDumperOptions m_options;
428   std::unique_ptr<OutputWriter> m_writer_up;
429 };
430 
431 } // namespace lldb_private
432 
433 #endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
434