1 //===-- TraceDumper.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Symbol/SymbolContext.h" 10 #include "lldb/Target/TraceCursor.h" 11 #include <optional> 12 13 #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H 14 #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H 15 16 namespace lldb_private { 17 18 /// Class that holds the configuration used by \a TraceDumper for 19 /// traversing and dumping instructions. 20 struct TraceDumperOptions { 21 /// If \b true, the cursor will be iterated forwards starting from the 22 /// oldest instruction. Otherwise, the iteration starts from the most 23 /// recent instruction. 24 bool forwards = false; 25 /// Dump only instruction addresses without disassembly nor symbol 26 /// information. 27 bool raw = false; 28 /// Dump in json format. 29 bool json = false; 30 /// When dumping in JSON format, pretty print the output. 31 bool pretty_print_json = false; 32 /// For each trace item, print the corresponding timestamp in nanoseconds if 33 /// available. 34 bool show_timestamps = false; 35 /// Dump the events that happened between instructions. 36 bool show_events = false; 37 /// Dump events and none of the instructions. 38 bool only_events = false; 39 /// For each instruction, print the instruction kind. 40 bool show_control_flow_kind = false; 41 /// Optional custom id to start traversing from. 42 std::optional<uint64_t> id; 43 /// Optional number of instructions to skip from the starting position 44 /// of the cursor. 45 std::optional<size_t> skip; 46 }; 47 48 /// Class used to dump the instructions of a \a TraceCursor using its current 49 /// state and granularity. 50 class TraceDumper { 51 public: 52 /// Helper struct that holds symbol, disassembly and address information of an 53 /// instruction. 54 struct SymbolInfo { 55 SymbolContext sc; 56 Address address; 57 lldb::DisassemblerSP disassembler; 58 lldb::InstructionSP instruction; 59 lldb_private::ExecutionContext exe_ctx; 60 }; 61 62 /// Helper struct that holds all the information we know about a trace item 63 struct TraceItem { 64 lldb::user_id_t id; 65 lldb::addr_t load_address; 66 std::optional<double> timestamp; 67 std::optional<uint64_t> hw_clock; 68 std::optional<std::string> sync_point_metadata; 69 std::optional<llvm::StringRef> error; 70 std::optional<lldb::TraceEvent> event; 71 std::optional<SymbolInfo> symbol_info; 72 std::optional<SymbolInfo> prev_symbol_info; 73 std::optional<lldb::cpu_id_t> cpu_id; 74 }; 75 76 /// An object representing a traced function call. 77 /// 78 /// A function call is represented using segments and subcalls. 79 /// 80 /// TracedSegment: 81 /// A traced segment is a maximal list of consecutive traced instructions 82 /// that belong to the same function call. A traced segment will end in 83 /// three possible ways: 84 /// - With a call to a function deeper in the callstack. In this case, 85 /// most of the times this nested call will return 86 /// and resume with the next segment of this segment's owning function 87 /// call. More on this later. 88 /// - Abruptly due to end of trace. In this case, we weren't able to trace 89 /// the end of this function call. 90 /// - Simply a return higher in the callstack. 91 /// 92 /// In terms of implementation details, as segment can be represented with 93 /// the beginning and ending instruction IDs from the instruction trace. 94 /// 95 /// UntracedPrefixSegment: 96 /// It might happen that we didn't trace the beginning of a function and we 97 /// saw it for the first time as part of a return. As a way to signal these 98 /// cases, we have a placeholder UntracedPrefixSegment class that completes the 99 /// callgraph. 100 /// 101 /// Example: 102 /// We might have this piece of execution: 103 /// 104 /// main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4] 105 /// foo() [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main 106 /// invoked foo 107 /// main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30] 108 /// 109 /// In this case, our function main invokes foo. We have 3 segments: main 110 /// [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset 111 /// 0x24 to 0x40]. We also have the instruction ids from the corresponding 112 /// linear instruction trace for each segment. 113 /// 114 /// But what if we started tracing since the middle of foo? Then we'd have 115 /// an incomplete trace 116 /// 117 /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] 118 /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] 119 /// 120 /// Notice that we changed the instruction ids because this is a new trace. 121 /// Here, in order to have a somewhat complete tree with good traversal 122 /// capabilities, we can create an UntracedPrefixSegment to signal the portion of 123 /// main() that we didn't trace. We don't know if this segment was in fact 124 /// multiple segments with many function calls. We'll never know. The 125 /// resulting tree looks like the following: 126 /// 127 /// main() [untraced] 128 /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] 129 /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] 130 /// 131 /// And in pseudo-code: 132 /// 133 /// FunctionCall [ 134 /// UntracedPrefixSegment { 135 /// symbol: main() 136 /// nestedCall: FunctionCall [ # this untraced segment has a nested 137 /// call 138 /// TracedSegment { 139 /// symbol: foo() 140 /// fromInstructionId: 1 141 /// toInstructionId: 10 142 /// nestedCall: none # this doesn't have a nested call 143 /// } 144 /// } 145 /// ], 146 /// TracedSegment { 147 /// symbol: main() 148 /// fromInstructionId: 11 149 /// toInstructionId: 20 150 /// nestedCall: none # this also doesn't have a nested call 151 /// } 152 /// ] 153 /// 154 /// We can see the nested structure and how instructions are represented as 155 /// segments. 156 /// 157 /// 158 /// Returns: 159 /// Code doesn't always behave intuitively. Some interesting functions 160 /// might modify the stack and thus change the behavior of common 161 /// instructions like CALL and RET. We try to identify these cases, and 162 /// the result is that the return edge from a segment might connect with a 163 /// function call very high the stack. For example, you might have 164 /// 165 /// main() 166 /// foo() 167 /// bar() 168 /// # here bar modifies the stack and pops foo() from it. Then it 169 /// finished the a RET (return) 170 /// main() # we came back directly to main() 171 /// 172 /// I have observed some trampolines doing this, as well as some std 173 /// functions (like ostream functions). So consumers should be aware of 174 /// this. 175 /// 176 /// There are all sorts of "abnormal" behaviors you can see in code, and 177 /// whenever we fail at identifying what's going on, we prefer to create a 178 /// new tree. 179 /// 180 /// Function call forest: 181 /// A single tree would suffice if a trace didn't contain errors nor 182 /// abnormal behaviors that made our algorithms fail. Sadly these 183 /// anomalies exist and we prefer not to use too many heuristics and 184 /// probably end up lying to the user. So we create a new tree from the 185 /// point we can't continue using the previous tree. This results in 186 /// having a forest instead of a single tree. This is probably the best we 187 /// can do if we consumers want to use this data to perform performance 188 /// analysis or reverse debugging. 189 /// 190 /// Non-functions: 191 /// Not everything in a program is a function. There are blocks of 192 /// instructions that are simply labeled or even regions without symbol 193 /// information that we don't what they are. We treat all of them as 194 /// functions for simplicity. 195 /// 196 /// Errors: 197 /// Whenever an error is found, a new tree with a single segment is 198 /// created. All consecutive errors after the original one are then 199 /// appended to this segment. As a note, something that GDB does is to use 200 /// some heuristics to merge trees that were interrupted by errors. We are 201 /// leaving that out of scope until a feature like that one is really 202 /// needed. 203 204 /// Forward declaration 205 class FunctionCall; 206 using FunctionCallUP = std::unique_ptr<FunctionCall>; 207 208 class FunctionCall { 209 public: 210 class TracedSegment { 211 public: 212 /// \param[in] cursor_sp 213 /// A cursor pointing to the beginning of the segment. 214 /// 215 /// \param[in] symbol_info 216 /// The symbol information of the first instruction of the segment. 217 /// 218 /// \param[in] call 219 /// The FunctionCall object that owns this segment. 220 TracedSegment(const lldb::TraceCursorSP &cursor_sp, 221 const SymbolInfo &symbol_info, FunctionCall &owning_call) 222 : m_first_insn_id(cursor_sp->GetId()), 223 m_last_insn_id(cursor_sp->GetId()), 224 m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info), 225 m_owning_call(owning_call) {} 226 227 /// \return 228 /// The chronologically first instruction ID in this segment. 229 lldb::user_id_t GetFirstInstructionID() const; 230 /// \return 231 /// The chronologically last instruction ID in this segment. 232 lldb::user_id_t GetLastInstructionID() const; 233 234 /// \return 235 /// The symbol information of the chronologically first instruction ID 236 /// in this segment. 237 const SymbolInfo &GetFirstInstructionSymbolInfo() const; 238 239 /// \return 240 /// The symbol information of the chronologically last instruction ID in 241 /// this segment. 242 const SymbolInfo &GetLastInstructionSymbolInfo() const; 243 244 /// \return 245 /// Get the call that owns this segment. 246 const FunctionCall &GetOwningCall() const; 247 248 /// Append a new instruction to this segment. 249 /// 250 /// \param[in] cursor_sp 251 /// A cursor pointing to the new instruction. 252 /// 253 /// \param[in] symbol_info 254 /// The symbol information of the new instruction. 255 void AppendInsn(const lldb::TraceCursorSP &cursor_sp, 256 const SymbolInfo &symbol_info); 257 258 /// Create a nested call at the end of this segment. 259 /// 260 /// \param[in] cursor_sp 261 /// A cursor pointing to the first instruction of the nested call. 262 /// 263 /// \param[in] symbol_info 264 /// The symbol information of the first instruction of the nested call. 265 FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp, 266 const SymbolInfo &symbol_info); 267 268 /// Executed the given callback if there's a nested call at the end of 269 /// this segment. 270 void IfNestedCall(std::function<void(const FunctionCall &function_call)> 271 callback) const; 272 273 private: 274 TracedSegment(const TracedSegment &) = delete; 275 TracedSegment &operator=(TracedSegment const &); 276 277 /// Delimiting instruction IDs taken chronologically. 278 /// \{ 279 lldb::user_id_t m_first_insn_id; 280 lldb::user_id_t m_last_insn_id; 281 /// \} 282 /// An optional nested call starting at the end of this segment. 283 FunctionCallUP m_nested_call; 284 /// The symbol information of the delimiting instructions 285 /// \{ 286 SymbolInfo m_first_symbol_info; 287 SymbolInfo m_last_symbol_info; 288 /// \} 289 FunctionCall &m_owning_call; 290 }; 291 292 class UntracedPrefixSegment { 293 public: 294 /// Note: Untraced segments can only exist if have also seen a traced 295 /// segment of the same function call. Thus, we can use those traced 296 /// segments if we want symbol information and such. 297 298 UntracedPrefixSegment(FunctionCallUP &&nested_call) 299 : m_nested_call(std::move(nested_call)) {} 300 301 const FunctionCall &GetNestedCall() const; 302 303 private: 304 UntracedPrefixSegment(const UntracedPrefixSegment &) = delete; 305 UntracedPrefixSegment &operator=(UntracedPrefixSegment const &); 306 FunctionCallUP m_nested_call; 307 }; 308 309 /// Create a new function call given an instruction. This will also create a 310 /// segment for that instruction. 311 /// 312 /// \param[in] cursor_sp 313 /// A cursor pointing to the first instruction of that function call. 314 /// 315 /// \param[in] symbol_info 316 /// The symbol information of that first instruction. 317 FunctionCall(const lldb::TraceCursorSP &cursor_sp, 318 const SymbolInfo &symbol_info); 319 320 /// Append a new traced segment to this function call. 321 /// 322 /// \param[in] cursor_sp 323 /// A cursor pointing to the first instruction of the new segment. 324 /// 325 /// \param[in] symbol_info 326 /// The symbol information of that first instruction. 327 void AppendSegment(const lldb::TraceCursorSP &cursor_sp, 328 const SymbolInfo &symbol_info); 329 330 /// \return 331 /// The symbol info of some traced instruction of this call. 332 const SymbolInfo &GetSymbolInfo() const; 333 334 /// \return 335 /// \b true if and only if the instructions in this function call are 336 /// trace errors, in which case this function call is a fake one. 337 bool IsError() const; 338 339 /// \return 340 /// The list of traced segments of this call. 341 const std::deque<TracedSegment> &GetTracedSegments() const; 342 343 /// \return 344 /// A non-const reference to the most-recent traced segment. 345 TracedSegment &GetLastTracedSegment(); 346 347 /// Create an untraced segment for this call that jumps to the provided 348 /// nested call. 349 void SetUntracedPrefixSegment(FunctionCallUP &&nested_call); 350 351 /// \return 352 /// A optional to the untraced prefix segment of this call. 353 const std::optional<UntracedPrefixSegment> & 354 GetUntracedPrefixSegment() const; 355 356 /// \return 357 /// A pointer to the parent call. It may be \b nullptr. 358 FunctionCall *GetParentCall() const; 359 360 void SetParentCall(FunctionCall &parent_call); 361 362 private: 363 /// An optional untraced segment that precedes all the traced segments. 364 std::optional<UntracedPrefixSegment> m_untraced_prefix_segment; 365 /// The traced segments in order. We used a deque to prevent moving these 366 /// objects when appending to the list, which would happen with vector. 367 std::deque<TracedSegment> m_traced_segments; 368 /// The parent call, which might be null. Useful for reconstructing 369 /// callstacks. 370 FunctionCall *m_parent_call = nullptr; 371 /// Whether this call represents a list of consecutive errors. 372 bool m_is_error; 373 }; 374 375 /// Interface used to abstract away the format in which the instruction 376 /// information will be dumped. 377 class OutputWriter { 378 public: 379 virtual ~OutputWriter() = default; 380 381 /// Notify this writer that the cursor ran out of data. 382 virtual void NoMoreData() {} 383 384 /// Dump a trace item (instruction, error or event). 385 virtual void TraceItem(const TraceItem &item) = 0; 386 387 /// Dump a function call forest. 388 virtual void 389 FunctionCallForest(const std::vector<FunctionCallUP> &forest) = 0; 390 }; 391 392 /// Create a instruction dumper for the cursor. 393 /// 394 /// \param[in] cursor 395 /// The cursor whose instructions will be dumped. 396 /// 397 /// \param[in] s 398 /// The stream where to dump the instructions to. 399 /// 400 /// \param[in] options 401 /// Additional options for configuring the dumping. 402 TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s, 403 const TraceDumperOptions &options); 404 405 /// Dump \a count instructions of the thread trace starting at the current 406 /// cursor position. 407 /// 408 /// This effectively moves the cursor to the next unvisited position, so that 409 /// a subsequent call to this method continues where it left off. 410 /// 411 /// \param[in] count 412 /// The number of instructions to print. 413 /// 414 /// \return 415 /// The instruction id of the last traversed instruction, or \b 416 /// std::nullopt if no instructions were visited. 417 std::optional<lldb::user_id_t> DumpInstructions(size_t count); 418 419 /// Dump all function calls forwards chronologically and hierarchically 420 void DumpFunctionCalls(); 421 422 private: 423 /// Create a trace item for the current position without symbol information. 424 TraceItem CreatRawTraceItem(); 425 426 lldb::TraceCursorSP m_cursor_sp; 427 TraceDumperOptions m_options; 428 std::unique_ptr<OutputWriter> m_writer_up; 429 }; 430 431 } // namespace lldb_private 432 433 #endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H 434