1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_CORE_DISASSEMBLER_H 10 #define LLDB_CORE_DISASSEMBLER_H 11 12 #include "lldb/Core/Address.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/FormatEntity.h" 15 #include "lldb/Core/Opcode.h" 16 #include "lldb/Core/PluginInterface.h" 17 #include "lldb/Interpreter/OptionValue.h" 18 #include "lldb/Symbol/LineEntry.h" 19 #include "lldb/Target/ExecutionContext.h" 20 #include "lldb/Utility/ArchSpec.h" 21 #include "lldb/Utility/ConstString.h" 22 #include "lldb/Utility/FileSpec.h" 23 #include "lldb/lldb-defines.h" 24 #include "lldb/lldb-forward.h" 25 #include "lldb/lldb-private-enumerations.h" 26 #include "lldb/lldb-types.h" 27 28 #include "llvm/ADT/StringRef.h" 29 30 #include <functional> 31 #include <map> 32 #include <memory> 33 #include <set> 34 #include <string> 35 #include <vector> 36 37 #include <cstddef> 38 #include <cstdint> 39 #include <cstdio> 40 41 namespace llvm { 42 template <typename T> class SmallVectorImpl; 43 } 44 45 namespace lldb_private { 46 class AddressRange; 47 class DataExtractor; 48 class Debugger; 49 class Disassembler; 50 class Module; 51 class StackFrame; 52 class Stream; 53 class SymbolContext; 54 class SymbolContextList; 55 class Target; 56 struct RegisterInfo; 57 58 class Instruction { 59 public: 60 Instruction(const Address &address, 61 AddressClass addr_class = AddressClass::eInvalid); 62 63 virtual ~Instruction(); 64 GetAddress()65 const Address &GetAddress() const { return m_address; } 66 GetMnemonic(const ExecutionContext * exe_ctx)67 const char *GetMnemonic(const ExecutionContext *exe_ctx) { 68 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 69 return m_opcode_name.c_str(); 70 } 71 GetOperands(const ExecutionContext * exe_ctx)72 const char *GetOperands(const ExecutionContext *exe_ctx) { 73 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 74 return m_mnemonics.c_str(); 75 } 76 GetComment(const ExecutionContext * exe_ctx)77 const char *GetComment(const ExecutionContext *exe_ctx) { 78 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 79 return m_comment.c_str(); 80 } 81 82 /// \return 83 /// The control flow kind of this instruction, or 84 /// eInstructionControlFlowKindUnknown if the instruction 85 /// can't be classified. 86 virtual lldb::InstructionControlFlowKind GetControlFlowKind(const ExecutionContext * exe_ctx)87 GetControlFlowKind(const ExecutionContext *exe_ctx) { 88 return lldb::eInstructionControlFlowKindUnknown; 89 } 90 91 virtual void 92 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; 93 94 AddressClass GetAddressClass(); 95 SetAddress(const Address & addr)96 void SetAddress(const Address &addr) { 97 // Invalidate the address class to lazily discover it if we need to. 98 m_address_class = AddressClass::eInvalid; 99 m_address = addr; 100 } 101 102 /// Dump the text representation of this Instruction to a Stream 103 /// 104 /// Print the (optional) address, (optional) bytes, opcode, 105 /// operands, and instruction comments to a stream. 106 /// 107 /// \param[in] s 108 /// The Stream to add the text to. 109 /// 110 /// \param[in] show_address 111 /// Whether the address (using disassembly_addr_format_spec formatting) 112 /// should be printed. 113 /// 114 /// \param[in] show_bytes 115 /// Whether the bytes of the assembly instruction should be printed. 116 /// 117 /// \param[in] show_control_flow_kind 118 /// Whether the control flow kind of the instruction should be printed. 119 /// 120 /// \param[in] max_opcode_byte_size 121 /// The size (in bytes) of the largest instruction in the list that 122 /// we are printing (for text justification/alignment purposes) 123 /// Only needed if show_bytes is true. 124 /// 125 /// \param[in] exe_ctx 126 /// The current execution context, if available. May be used in 127 /// the assembling of the operands+comments for this instruction. 128 /// Pass NULL if not applicable. 129 /// 130 /// \param[in] sym_ctx 131 /// The SymbolContext for this instruction. 132 /// Pass NULL if not available/computed. 133 /// Only needed if show_address is true. 134 /// 135 /// \param[in] prev_sym_ctx 136 /// The SymbolContext for the previous instruction. Depending on 137 /// the disassembly address format specification, a change in 138 /// Symbol / Function may mean that a line is printed with the new 139 /// symbol/function name. 140 /// Pass NULL if unavailable, or if this is the first instruction of 141 /// the InstructionList. 142 /// Only needed if show_address is true. 143 /// 144 /// \param[in] disassembly_addr_format 145 /// The format specification for how addresses are printed. 146 /// Only needed if show_address is true. 147 /// 148 /// \param[in] max_address_text_size 149 /// The length of the longest address string at the start of the 150 /// disassembly line that will be printed (the 151 /// Debugger::FormatDisassemblerAddress() string) 152 /// so this method can properly align the instruction opcodes. 153 /// May be 0 to indicate no indentation/alignment of the opcodes. 154 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, 155 bool show_bytes, bool show_control_flow_kind, 156 const ExecutionContext *exe_ctx, 157 const SymbolContext *sym_ctx, 158 const SymbolContext *prev_sym_ctx, 159 const FormatEntity::Entry *disassembly_addr_format, 160 size_t max_address_text_size); 161 162 virtual bool DoesBranch() = 0; 163 164 virtual bool HasDelaySlot(); 165 166 virtual bool IsLoad() = 0; 167 168 virtual bool IsAuthenticated() = 0; 169 170 bool CanSetBreakpoint (); 171 172 virtual size_t Decode(const Disassembler &disassembler, 173 const DataExtractor &data, 174 lldb::offset_t data_offset) = 0; 175 SetDescription(llvm::StringRef)176 virtual void SetDescription(llvm::StringRef) { 177 } // May be overridden in sub-classes that have descriptions. 178 179 lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream, 180 OptionValue::Type data_type); 181 182 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream); 183 184 bool DumpEmulation(const ArchSpec &arch); 185 186 virtual bool TestEmulation(Stream *stream, const char *test_file_name); 187 188 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, 189 EmulateInstruction::ReadMemoryCallback read_mem_callback, 190 EmulateInstruction::WriteMemoryCallback write_mem_calback, 191 EmulateInstruction::ReadRegisterCallback read_reg_callback, 192 EmulateInstruction::WriteRegisterCallback write_reg_callback); 193 GetOpcode()194 const Opcode &GetOpcode() const { return m_opcode; } 195 196 uint32_t GetData(DataExtractor &data); 197 198 struct Operand { 199 enum class Type { 200 Invalid = 0, 201 Register, 202 Immediate, 203 Dereference, 204 Sum, 205 Product 206 } m_type = Type::Invalid; 207 std::vector<Operand> m_children; 208 lldb::addr_t m_immediate = 0; 209 ConstString m_register; 210 bool m_negative = false; 211 bool m_clobbered = false; 212 IsValidOperand213 bool IsValid() { return m_type != Type::Invalid; } 214 215 static Operand BuildRegister(ConstString &r); 216 static Operand BuildImmediate(lldb::addr_t imm, bool neg); 217 static Operand BuildImmediate(int64_t imm); 218 static Operand BuildDereference(const Operand &ref); 219 static Operand BuildSum(const Operand &lhs, const Operand &rhs); 220 static Operand BuildProduct(const Operand &lhs, const Operand &rhs); 221 }; 222 ParseOperands(llvm::SmallVectorImpl<Operand> & operands)223 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { 224 return false; 225 } 226 IsCall()227 virtual bool IsCall() { return false; } 228 229 static const char *GetNameForInstructionControlFlowKind( 230 lldb::InstructionControlFlowKind instruction_control_flow_kind); 231 232 protected: 233 Address m_address; // The section offset address of this instruction 234 // We include an address class in the Instruction class to 235 // allow the instruction specify the 236 // AddressClass::eCodeAlternateISA (currently used for 237 // thumb), and also to specify data (AddressClass::eData). 238 // The usual value will be AddressClass::eCode, but often 239 // when disassembling memory, you might run into data. 240 // This can help us to disassemble appropriately. 241 private: 242 AddressClass m_address_class; // Use GetAddressClass () accessor function! 243 244 protected: 245 Opcode m_opcode; // The opcode for this instruction 246 std::string m_opcode_name; 247 std::string m_mnemonics; 248 std::string m_comment; 249 bool m_calculated_strings; 250 251 void CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext * exe_ctx)252 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { 253 if (!m_calculated_strings) { 254 m_calculated_strings = true; 255 CalculateMnemonicOperandsAndComment(exe_ctx); 256 } 257 } 258 }; 259 260 namespace OperandMatchers { 261 std::function<bool(const Instruction::Operand &)> 262 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, 263 std::function<bool(const Instruction::Operand &)> left, 264 std::function<bool(const Instruction::Operand &)> right); 265 266 std::function<bool(const Instruction::Operand &)> 267 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, 268 std::function<bool(const Instruction::Operand &)> child); 269 270 std::function<bool(const Instruction::Operand &)> 271 MatchRegOp(const RegisterInfo &info); 272 273 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); 274 275 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); 276 277 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); 278 279 std::function<bool(const Instruction::Operand &)> 280 MatchOpType(Instruction::Operand::Type type); 281 } 282 283 class InstructionList { 284 public: 285 InstructionList(); 286 ~InstructionList(); 287 288 size_t GetSize() const; 289 290 uint32_t GetMaxOpcocdeByteSize() const; 291 292 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; 293 294 /// Get the instruction at the given address. 295 /// 296 /// \return 297 /// A valid \a InstructionSP if the address could be found, or null 298 /// otherwise. 299 lldb::InstructionSP GetInstructionAtAddress(const Address &addr); 300 301 //------------------------------------------------------------------ 302 /// Get the index of the next branch instruction. 303 /// 304 /// Given a list of instructions, find the next branch instruction 305 /// in the list by returning an index. 306 /// 307 /// @param[in] start 308 /// The instruction index of the first instruction to check. 309 /// 310 /// @param[in] ignore_calls 311 /// It true, then fine the first branch instruction that isn't 312 /// a function call (a branch that calls and returns to the next 313 /// instruction). If false, find the instruction index of any 314 /// branch in the list. 315 /// 316 /// @param[out] found_calls 317 /// If non-null, this will be set to true if any calls were found in 318 /// extending the range. 319 /// 320 /// @return 321 /// The instruction index of the first branch that is at or past 322 /// \a start. Returns UINT32_MAX if no matching branches are 323 /// found. 324 //------------------------------------------------------------------ 325 uint32_t GetIndexOfNextBranchInstruction(uint32_t start, 326 bool ignore_calls, 327 bool *found_calls) const; 328 329 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 330 Target &target); 331 332 uint32_t GetIndexOfInstructionAtAddress(const Address &addr); 333 334 void Clear(); 335 336 void Append(lldb::InstructionSP &inst_sp); 337 338 void Dump(Stream *s, bool show_address, bool show_bytes, 339 bool show_control_flow_kind, const ExecutionContext *exe_ctx); 340 341 private: 342 typedef std::vector<lldb::InstructionSP> collection; 343 typedef collection::iterator iterator; 344 typedef collection::const_iterator const_iterator; 345 346 collection m_instructions; 347 }; 348 349 class PseudoInstruction : public Instruction { 350 public: 351 PseudoInstruction(); 352 353 ~PseudoInstruction() override; 354 355 bool DoesBranch() override; 356 357 bool HasDelaySlot() override; 358 359 bool IsLoad() override; 360 361 bool IsAuthenticated() override; 362 CalculateMnemonicOperandsAndComment(const ExecutionContext * exe_ctx)363 void CalculateMnemonicOperandsAndComment( 364 const ExecutionContext *exe_ctx) override { 365 // TODO: fill this in and put opcode name into Instruction::m_opcode_name, 366 // mnemonic into Instruction::m_mnemonics, and any comment into 367 // Instruction::m_comment 368 } 369 370 size_t Decode(const Disassembler &disassembler, const DataExtractor &data, 371 lldb::offset_t data_offset) override; 372 373 void SetOpcode(size_t opcode_size, void *opcode_data); 374 375 void SetDescription(llvm::StringRef description) override; 376 377 protected: 378 std::string m_description; 379 380 PseudoInstruction(const PseudoInstruction &) = delete; 381 const PseudoInstruction &operator=(const PseudoInstruction &) = delete; 382 }; 383 384 class Disassembler : public std::enable_shared_from_this<Disassembler>, 385 public PluginInterface { 386 public: 387 enum { 388 eOptionNone = 0u, 389 eOptionShowBytes = (1u << 0), 390 eOptionRawOuput = (1u << 1), 391 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains 392 // the current PC (mixed mode only) 393 eOptionMarkPCAddress = 394 (1u << 3), // Mark the disassembly line the contains the PC 395 eOptionShowControlFlowKind = (1u << 4), 396 }; 397 398 enum HexImmediateStyle { 399 eHexStyleC, 400 eHexStyleAsm, 401 }; 402 403 // FindPlugin should be lax about the flavor string (it is too annoying to 404 // have various internal uses of the disassembler fail because the global 405 // flavor string gets set wrong. Instead, if you get a flavor string you 406 // don't understand, use the default. Folks who care to check can use the 407 // FlavorValidForArchSpec method on the disassembler they got back. 408 static lldb::DisassemblerSP 409 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); 410 411 // This version will use the value in the Target settings if flavor is NULL; 412 static lldb::DisassemblerSP FindPluginForTarget(const Target &target, 413 const ArchSpec &arch, 414 const char *flavor, 415 const char *plugin_name); 416 417 struct Limit { 418 enum { Bytes, Instructions } kind; 419 lldb::addr_t value; 420 }; 421 422 static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch, 423 const char *plugin_name, 424 const char *flavor, 425 Target &target, 426 const AddressRange &disasm_range, 427 bool force_live_memory = false); 428 429 static lldb::DisassemblerSP 430 DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 431 const char *flavor, const Address &start, const void *bytes, 432 size_t length, uint32_t max_num_instructions, 433 bool data_from_file); 434 435 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 436 const char *plugin_name, const char *flavor, 437 const ExecutionContext &exe_ctx, const Address &start, 438 Limit limit, bool mixed_source_and_assembly, 439 uint32_t num_mixed_context_lines, uint32_t options, 440 Stream &strm); 441 442 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 443 StackFrame &frame, Stream &strm); 444 445 // Constructors and Destructors 446 Disassembler(const ArchSpec &arch, const char *flavor); 447 ~Disassembler() override; 448 449 void PrintInstructions(Debugger &debugger, const ArchSpec &arch, 450 const ExecutionContext &exe_ctx, 451 bool mixed_source_and_assembly, 452 uint32_t num_mixed_context_lines, uint32_t options, 453 Stream &strm); 454 455 size_t ParseInstructions(Target &target, Address address, Limit limit, 456 Stream *error_strm_ptr, 457 bool force_live_memory = false); 458 459 virtual size_t DecodeInstructions(const Address &base_addr, 460 const DataExtractor &data, 461 lldb::offset_t data_offset, 462 size_t num_instructions, bool append, 463 bool data_from_file) = 0; 464 465 InstructionList &GetInstructionList(); 466 467 const InstructionList &GetInstructionList() const; 468 GetArchitecture()469 const ArchSpec &GetArchitecture() const { return m_arch; } 470 GetFlavor()471 const char *GetFlavor() const { return m_flavor.c_str(); } 472 473 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, 474 const char *flavor) = 0; 475 476 protected: 477 // SourceLine and SourceLinesToDisplay structures are only used in the mixed 478 // source and assembly display methods internal to this class. 479 480 struct SourceLine { 481 FileSpec file; 482 uint32_t line = LLDB_INVALID_LINE_NUMBER; 483 uint32_t column = 0; 484 485 SourceLine() = default; 486 487 bool operator==(const SourceLine &rhs) const { 488 return file == rhs.file && line == rhs.line && rhs.column == column; 489 } 490 491 bool operator!=(const SourceLine &rhs) const { 492 return file != rhs.file || line != rhs.line || column != rhs.column; 493 } 494 IsValidSourceLine495 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } 496 }; 497 498 struct SourceLinesToDisplay { 499 std::vector<SourceLine> lines; 500 501 // index of the "current" source line, if we want to highlight that when 502 // displaying the source lines. (as opposed to the surrounding source 503 // lines provided to give context) 504 size_t current_source_line = -1; 505 506 // Whether to print a blank line at the end of the source lines. 507 bool print_source_context_end_eol = true; 508 509 SourceLinesToDisplay() = default; 510 }; 511 512 // Get the function's declaration line number, hopefully a line number 513 // earlier than the opening curly brace at the start of the function body. 514 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); 515 516 // Add the provided SourceLine to the map of filenames-to-source-lines-seen. 517 static void AddLineToSourceLineTables( 518 SourceLine &line, 519 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); 520 521 // Given a source line, determine if we should print it when we're doing 522 // mixed source & assembly output. We're currently using the 523 // target.process.thread.step-avoid-regexp setting (which is used for 524 // stepping over inlined STL functions by default) to determine what source 525 // lines to avoid showing. 526 // 527 // Returns true if this source line should be elided (if the source line 528 // should not be displayed). 529 static bool 530 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 531 const SymbolContext &sc, SourceLine &line); 532 533 static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext & exe_ctx,const SymbolContext & sc,LineEntry & line)534 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 535 const SymbolContext &sc, LineEntry &line) { 536 SourceLine sl; 537 sl.file = line.file; 538 sl.line = line.line; 539 sl.column = line.column; 540 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); 541 }; 542 543 // Classes that inherit from Disassembler can see and modify these 544 ArchSpec m_arch; 545 InstructionList m_instruction_list; 546 lldb::addr_t m_base_addr; 547 std::string m_flavor; 548 549 private: 550 // For Disassembler only 551 Disassembler(const Disassembler &) = delete; 552 const Disassembler &operator=(const Disassembler &) = delete; 553 }; 554 555 } // namespace lldb_private 556 557 #endif // LLDB_CORE_DISASSEMBLER_H 558