1 //===-- Disassembler.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Disassembler.h" 10 11 #include "lldb/Core/AddressRange.h" 12 #include "lldb/Core/Debugger.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/Mangled.h" 15 #include "lldb/Core/Module.h" 16 #include "lldb/Core/ModuleList.h" 17 #include "lldb/Core/PluginManager.h" 18 #include "lldb/Core/SourceManager.h" 19 #include "lldb/Host/FileSystem.h" 20 #include "lldb/Interpreter/OptionValue.h" 21 #include "lldb/Interpreter/OptionValueArray.h" 22 #include "lldb/Interpreter/OptionValueDictionary.h" 23 #include "lldb/Interpreter/OptionValueRegex.h" 24 #include "lldb/Interpreter/OptionValueString.h" 25 #include "lldb/Interpreter/OptionValueUInt64.h" 26 #include "lldb/Symbol/Function.h" 27 #include "lldb/Symbol/Symbol.h" 28 #include "lldb/Symbol/SymbolContext.h" 29 #include "lldb/Target/ExecutionContext.h" 30 #include "lldb/Target/SectionLoadList.h" 31 #include "lldb/Target/StackFrame.h" 32 #include "lldb/Target/Target.h" 33 #include "lldb/Target/Thread.h" 34 #include "lldb/Utility/DataBufferHeap.h" 35 #include "lldb/Utility/DataExtractor.h" 36 #include "lldb/Utility/RegularExpression.h" 37 #include "lldb/Utility/Status.h" 38 #include "lldb/Utility/Stream.h" 39 #include "lldb/Utility/StreamString.h" 40 #include "lldb/Utility/Timer.h" 41 #include "lldb/lldb-private-enumerations.h" 42 #include "lldb/lldb-private-interfaces.h" 43 #include "lldb/lldb-private-types.h" 44 #include "llvm/ADT/Triple.h" 45 #include "llvm/Support/Compiler.h" 46 47 #include <cstdint> 48 #include <cstring> 49 #include <utility> 50 51 #include <cassert> 52 53 #define DEFAULT_DISASM_BYTE_SIZE 32 54 55 using namespace lldb; 56 using namespace lldb_private; 57 58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch, 59 const char *flavor, 60 const char *plugin_name) { 61 LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)", 62 arch.GetArchitectureName(), plugin_name); 63 64 DisassemblerCreateInstance create_callback = nullptr; 65 66 if (plugin_name) { 67 create_callback = 68 PluginManager::GetDisassemblerCreateCallbackForPluginName(plugin_name); 69 if (create_callback) { 70 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 71 72 if (disassembler_sp) 73 return disassembler_sp; 74 } 75 } else { 76 for (uint32_t idx = 0; 77 (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex( 78 idx)) != nullptr; 79 ++idx) { 80 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 81 82 if (disassembler_sp) 83 return disassembler_sp; 84 } 85 } 86 return DisassemblerSP(); 87 } 88 89 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target, 90 const ArchSpec &arch, 91 const char *flavor, 92 const char *plugin_name) { 93 if (flavor == nullptr) { 94 // FIXME - we don't have the mechanism in place to do per-architecture 95 // settings. But since we know that for now we only support flavors on x86 96 // & x86_64, 97 if (arch.GetTriple().getArch() == llvm::Triple::x86 || 98 arch.GetTriple().getArch() == llvm::Triple::x86_64) 99 flavor = target.GetDisassemblyFlavor(); 100 } 101 return FindPlugin(arch, flavor, plugin_name); 102 } 103 104 static Address ResolveAddress(Target &target, const Address &addr) { 105 if (!addr.IsSectionOffset()) { 106 Address resolved_addr; 107 // If we weren't passed in a section offset address range, try and resolve 108 // it to something 109 bool is_resolved = target.GetSectionLoadList().IsEmpty() 110 ? target.GetImages().ResolveFileAddress( 111 addr.GetOffset(), resolved_addr) 112 : target.GetSectionLoadList().ResolveLoadAddress( 113 addr.GetOffset(), resolved_addr); 114 115 // We weren't able to resolve the address, just treat it as a raw address 116 if (is_resolved && resolved_addr.IsValid()) 117 return resolved_addr; 118 } 119 return addr; 120 } 121 122 lldb::DisassemblerSP Disassembler::DisassembleRange( 123 const ArchSpec &arch, const char *plugin_name, const char *flavor, 124 Target &target, const AddressRange &range, bool force_live_memory) { 125 if (range.GetByteSize() <= 0) 126 return {}; 127 128 if (!range.GetBaseAddress().IsValid()) 129 return {}; 130 131 lldb::DisassemblerSP disasm_sp = 132 Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name); 133 134 if (!disasm_sp) 135 return {}; 136 137 const size_t bytes_disassembled = disasm_sp->ParseInstructions( 138 target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()}, 139 nullptr, force_live_memory); 140 if (bytes_disassembled == 0) 141 return {}; 142 143 return disasm_sp; 144 } 145 146 lldb::DisassemblerSP 147 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 148 const char *flavor, const Address &start, 149 const void *src, size_t src_len, 150 uint32_t num_instructions, bool data_from_file) { 151 if (!src) 152 return {}; 153 154 lldb::DisassemblerSP disasm_sp = 155 Disassembler::FindPlugin(arch, flavor, plugin_name); 156 157 if (!disasm_sp) 158 return {}; 159 160 DataExtractor data(src, src_len, arch.GetByteOrder(), 161 arch.GetAddressByteSize()); 162 163 (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false, 164 data_from_file); 165 return disasm_sp; 166 } 167 168 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 169 const char *plugin_name, const char *flavor, 170 const ExecutionContext &exe_ctx, 171 const Address &address, Limit limit, 172 bool mixed_source_and_assembly, 173 uint32_t num_mixed_context_lines, 174 uint32_t options, Stream &strm) { 175 if (!exe_ctx.GetTargetPtr()) 176 return false; 177 178 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget( 179 exe_ctx.GetTargetRef(), arch, flavor, plugin_name)); 180 if (!disasm_sp) 181 return false; 182 183 const bool force_live_memory = true; 184 size_t bytes_disassembled = disasm_sp->ParseInstructions( 185 exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory); 186 if (bytes_disassembled == 0) 187 return false; 188 189 disasm_sp->PrintInstructions(debugger, arch, exe_ctx, 190 mixed_source_and_assembly, 191 num_mixed_context_lines, options, strm); 192 return true; 193 } 194 195 Disassembler::SourceLine 196 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) { 197 if (!sc.function) 198 return {}; 199 200 if (!sc.line_entry.IsValid()) 201 return {}; 202 203 LineEntry prologue_end_line = sc.line_entry; 204 FileSpec func_decl_file; 205 uint32_t func_decl_line; 206 sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line); 207 208 if (func_decl_file != prologue_end_line.file && 209 func_decl_file != prologue_end_line.original_file) 210 return {}; 211 212 SourceLine decl_line; 213 decl_line.file = func_decl_file; 214 decl_line.line = func_decl_line; 215 // TODO: Do we care about column on these entries? If so, we need to plumb 216 // that through GetStartLineSourceInfo. 217 decl_line.column = 0; 218 return decl_line; 219 } 220 221 void Disassembler::AddLineToSourceLineTables( 222 SourceLine &line, 223 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) { 224 if (line.IsValid()) { 225 auto source_lines_seen_pos = source_lines_seen.find(line.file); 226 if (source_lines_seen_pos == source_lines_seen.end()) { 227 std::set<uint32_t> lines; 228 lines.insert(line.line); 229 source_lines_seen.emplace(line.file, lines); 230 } else { 231 source_lines_seen_pos->second.insert(line.line); 232 } 233 } 234 } 235 236 bool Disassembler::ElideMixedSourceAndDisassemblyLine( 237 const ExecutionContext &exe_ctx, const SymbolContext &sc, 238 SourceLine &line) { 239 240 // TODO: should we also check target.process.thread.step-avoid-libraries ? 241 242 const RegularExpression *avoid_regex = nullptr; 243 244 // Skip any line #0 entries - they are implementation details 245 if (line.line == 0) 246 return false; 247 248 ThreadSP thread_sp = exe_ctx.GetThreadSP(); 249 if (thread_sp) { 250 avoid_regex = thread_sp->GetSymbolsToAvoidRegexp(); 251 } else { 252 TargetSP target_sp = exe_ctx.GetTargetSP(); 253 if (target_sp) { 254 Status error; 255 OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue( 256 &exe_ctx, "target.process.thread.step-avoid-regexp", false, error); 257 if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) { 258 OptionValueRegex *re = value_sp->GetAsRegex(); 259 if (re) { 260 avoid_regex = re->GetCurrentValue(); 261 } 262 } 263 } 264 } 265 if (avoid_regex && sc.symbol != nullptr) { 266 const char *function_name = 267 sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments) 268 .GetCString(); 269 if (function_name && avoid_regex->Execute(function_name)) { 270 // skip this source line 271 return true; 272 } 273 } 274 // don't skip this source line 275 return false; 276 } 277 278 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, 279 const ExecutionContext &exe_ctx, 280 bool mixed_source_and_assembly, 281 uint32_t num_mixed_context_lines, 282 uint32_t options, Stream &strm) { 283 // We got some things disassembled... 284 size_t num_instructions_found = GetInstructionList().GetSize(); 285 286 const uint32_t max_opcode_byte_size = 287 GetInstructionList().GetMaxOpcocdeByteSize(); 288 SymbolContext sc; 289 SymbolContext prev_sc; 290 AddressRange current_source_line_range; 291 const Address *pc_addr_ptr = nullptr; 292 StackFrame *frame = exe_ctx.GetFramePtr(); 293 294 TargetSP target_sp(exe_ctx.GetTargetSP()); 295 SourceManager &source_manager = 296 target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager(); 297 298 if (frame) { 299 pc_addr_ptr = &frame->GetFrameCodeAddress(); 300 } 301 const uint32_t scope = 302 eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol; 303 const bool use_inline_block_range = false; 304 305 const FormatEntity::Entry *disassembly_format = nullptr; 306 FormatEntity::Entry format; 307 if (exe_ctx.HasTargetScope()) { 308 disassembly_format = 309 exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat(); 310 } else { 311 FormatEntity::Parse("${addr}: ", format); 312 disassembly_format = &format; 313 } 314 315 // First pass: step through the list of instructions, find how long the 316 // initial addresses strings are, insert padding in the second pass so the 317 // opcodes all line up nicely. 318 319 // Also build up the source line mapping if this is mixed source & assembly 320 // mode. Calculate the source line for each assembly instruction (eliding 321 // inlined functions which the user wants to skip). 322 323 std::map<FileSpec, std::set<uint32_t>> source_lines_seen; 324 Symbol *previous_symbol = nullptr; 325 326 size_t address_text_size = 0; 327 for (size_t i = 0; i < num_instructions_found; ++i) { 328 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 329 if (inst) { 330 const Address &addr = inst->GetAddress(); 331 ModuleSP module_sp(addr.GetModule()); 332 if (module_sp) { 333 const SymbolContextItem resolve_mask = eSymbolContextFunction | 334 eSymbolContextSymbol | 335 eSymbolContextLineEntry; 336 uint32_t resolved_mask = 337 module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc); 338 if (resolved_mask) { 339 StreamString strmstr; 340 Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr, 341 &exe_ctx, &addr, strmstr); 342 size_t cur_line = strmstr.GetSizeOfLastLine(); 343 if (cur_line > address_text_size) 344 address_text_size = cur_line; 345 346 // Add entries to our "source_lines_seen" map+set which list which 347 // sources lines occur in this disassembly session. We will print 348 // lines of context around a source line, but we don't want to print 349 // a source line that has a line table entry of its own - we'll leave 350 // that source line to be printed when it actually occurs in the 351 // disassembly. 352 353 if (mixed_source_and_assembly && sc.line_entry.IsValid()) { 354 if (sc.symbol != previous_symbol) { 355 SourceLine decl_line = GetFunctionDeclLineEntry(sc); 356 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line)) 357 AddLineToSourceLineTables(decl_line, source_lines_seen); 358 } 359 if (sc.line_entry.IsValid()) { 360 SourceLine this_line; 361 this_line.file = sc.line_entry.file; 362 this_line.line = sc.line_entry.line; 363 this_line.column = sc.line_entry.column; 364 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line)) 365 AddLineToSourceLineTables(this_line, source_lines_seen); 366 } 367 } 368 } 369 sc.Clear(false); 370 } 371 } 372 } 373 374 previous_symbol = nullptr; 375 SourceLine previous_line; 376 for (size_t i = 0; i < num_instructions_found; ++i) { 377 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 378 379 if (inst) { 380 const Address &addr = inst->GetAddress(); 381 const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr; 382 SourceLinesToDisplay source_lines_to_display; 383 384 prev_sc = sc; 385 386 ModuleSP module_sp(addr.GetModule()); 387 if (module_sp) { 388 uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress( 389 addr, eSymbolContextEverything, sc); 390 if (resolved_mask) { 391 if (mixed_source_and_assembly) { 392 393 // If we've started a new function (non-inlined), print all of the 394 // source lines from the function declaration until the first line 395 // table entry - typically the opening curly brace of the function. 396 if (previous_symbol != sc.symbol) { 397 // The default disassembly format puts an extra blank line 398 // between functions - so when we're displaying the source 399 // context for a function, we don't want to add a blank line 400 // after the source context or we'll end up with two of them. 401 if (previous_symbol != nullptr) 402 source_lines_to_display.print_source_context_end_eol = false; 403 404 previous_symbol = sc.symbol; 405 if (sc.function && sc.line_entry.IsValid()) { 406 LineEntry prologue_end_line = sc.line_entry; 407 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 408 prologue_end_line)) { 409 FileSpec func_decl_file; 410 uint32_t func_decl_line; 411 sc.function->GetStartLineSourceInfo(func_decl_file, 412 func_decl_line); 413 if (func_decl_file == prologue_end_line.file || 414 func_decl_file == prologue_end_line.original_file) { 415 // Add all the lines between the function declaration and 416 // the first non-prologue source line to the list of lines 417 // to print. 418 for (uint32_t lineno = func_decl_line; 419 lineno <= prologue_end_line.line; lineno++) { 420 SourceLine this_line; 421 this_line.file = func_decl_file; 422 this_line.line = lineno; 423 source_lines_to_display.lines.push_back(this_line); 424 } 425 // Mark the last line as the "current" one. Usually this 426 // is the open curly brace. 427 if (source_lines_to_display.lines.size() > 0) 428 source_lines_to_display.current_source_line = 429 source_lines_to_display.lines.size() - 1; 430 } 431 } 432 } 433 sc.GetAddressRange(scope, 0, use_inline_block_range, 434 current_source_line_range); 435 } 436 437 // If we've left a previous source line's address range, print a 438 // new source line 439 if (!current_source_line_range.ContainsFileAddress(addr)) { 440 sc.GetAddressRange(scope, 0, use_inline_block_range, 441 current_source_line_range); 442 443 if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) { 444 SourceLine this_line; 445 this_line.file = sc.line_entry.file; 446 this_line.line = sc.line_entry.line; 447 448 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 449 this_line)) { 450 // Only print this source line if it is different from the 451 // last source line we printed. There may have been inlined 452 // functions between these lines that we elided, resulting in 453 // the same line being printed twice in a row for a 454 // contiguous block of assembly instructions. 455 if (this_line != previous_line) { 456 457 std::vector<uint32_t> previous_lines; 458 for (uint32_t i = 0; 459 i < num_mixed_context_lines && 460 (this_line.line - num_mixed_context_lines) > 0; 461 i++) { 462 uint32_t line = 463 this_line.line - num_mixed_context_lines + i; 464 auto pos = source_lines_seen.find(this_line.file); 465 if (pos != source_lines_seen.end()) { 466 if (pos->second.count(line) == 1) { 467 previous_lines.clear(); 468 } else { 469 previous_lines.push_back(line); 470 } 471 } 472 } 473 for (size_t i = 0; i < previous_lines.size(); i++) { 474 SourceLine previous_line; 475 previous_line.file = this_line.file; 476 previous_line.line = previous_lines[i]; 477 auto pos = source_lines_seen.find(previous_line.file); 478 if (pos != source_lines_seen.end()) { 479 pos->second.insert(previous_line.line); 480 } 481 source_lines_to_display.lines.push_back(previous_line); 482 } 483 484 source_lines_to_display.lines.push_back(this_line); 485 source_lines_to_display.current_source_line = 486 source_lines_to_display.lines.size() - 1; 487 488 for (uint32_t i = 0; i < num_mixed_context_lines; i++) { 489 SourceLine next_line; 490 next_line.file = this_line.file; 491 next_line.line = this_line.line + i + 1; 492 auto pos = source_lines_seen.find(next_line.file); 493 if (pos != source_lines_seen.end()) { 494 if (pos->second.count(next_line.line) == 1) 495 break; 496 pos->second.insert(next_line.line); 497 } 498 source_lines_to_display.lines.push_back(next_line); 499 } 500 } 501 previous_line = this_line; 502 } 503 } 504 } 505 } 506 } else { 507 sc.Clear(true); 508 } 509 } 510 511 if (source_lines_to_display.lines.size() > 0) { 512 strm.EOL(); 513 for (size_t idx = 0; idx < source_lines_to_display.lines.size(); 514 idx++) { 515 SourceLine ln = source_lines_to_display.lines[idx]; 516 const char *line_highlight = ""; 517 if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) { 518 line_highlight = "->"; 519 } else if (idx == source_lines_to_display.current_source_line) { 520 line_highlight = "**"; 521 } 522 source_manager.DisplaySourceLinesWithLineNumbers( 523 ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm); 524 } 525 if (source_lines_to_display.print_source_context_end_eol) 526 strm.EOL(); 527 } 528 529 const bool show_bytes = (options & eOptionShowBytes) != 0; 530 inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc, 531 &prev_sc, nullptr, address_text_size); 532 strm.EOL(); 533 } else { 534 break; 535 } 536 } 537 } 538 539 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 540 StackFrame &frame, Stream &strm) { 541 AddressRange range; 542 SymbolContext sc( 543 frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol)); 544 if (sc.function) { 545 range = sc.function->GetAddressRange(); 546 } else if (sc.symbol && sc.symbol->ValueIsAddress()) { 547 range.GetBaseAddress() = sc.symbol->GetAddressRef(); 548 range.SetByteSize(sc.symbol->GetByteSize()); 549 } else { 550 range.GetBaseAddress() = frame.GetFrameCodeAddress(); 551 } 552 553 if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0) 554 range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE); 555 556 Disassembler::Limit limit = {Disassembler::Limit::Bytes, 557 range.GetByteSize()}; 558 if (limit.value == 0) 559 limit.value = DEFAULT_DISASM_BYTE_SIZE; 560 561 return Disassemble(debugger, arch, nullptr, nullptr, frame, 562 range.GetBaseAddress(), limit, false, 0, 0, strm); 563 } 564 565 Instruction::Instruction(const Address &address, AddressClass addr_class) 566 : m_address(address), m_address_class(addr_class), m_opcode(), 567 m_calculated_strings(false) {} 568 569 Instruction::~Instruction() = default; 570 571 AddressClass Instruction::GetAddressClass() { 572 if (m_address_class == AddressClass::eInvalid) 573 m_address_class = m_address.GetAddressClass(); 574 return m_address_class; 575 } 576 577 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, 578 bool show_address, bool show_bytes, 579 const ExecutionContext *exe_ctx, 580 const SymbolContext *sym_ctx, 581 const SymbolContext *prev_sym_ctx, 582 const FormatEntity::Entry *disassembly_addr_format, 583 size_t max_address_text_size) { 584 size_t opcode_column_width = 7; 585 const size_t operand_column_width = 25; 586 587 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 588 589 StreamString ss; 590 591 if (show_address) { 592 Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx, 593 prev_sym_ctx, exe_ctx, &m_address, ss); 594 ss.FillLastLineToColumn(max_address_text_size, ' '); 595 } 596 597 if (show_bytes) { 598 if (m_opcode.GetType() == Opcode::eTypeBytes) { 599 // x86_64 and i386 are the only ones that use bytes right now so pad out 600 // the byte dump to be able to always show 15 bytes (3 chars each) plus a 601 // space 602 if (max_opcode_byte_size > 0) 603 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 604 else 605 m_opcode.Dump(&ss, 15 * 3 + 1); 606 } else { 607 // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000 608 // (10 spaces) plus two for padding... 609 if (max_opcode_byte_size > 0) 610 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 611 else 612 m_opcode.Dump(&ss, 12); 613 } 614 } 615 616 const size_t opcode_pos = ss.GetSizeOfLastLine(); 617 618 // The default opcode size of 7 characters is plenty for most architectures 619 // but some like arm can pull out the occasional vqrshrun.s16. We won't get 620 // consistent column spacing in these cases, unfortunately. 621 if (m_opcode_name.length() >= opcode_column_width) { 622 opcode_column_width = m_opcode_name.length() + 1; 623 } 624 625 ss.PutCString(m_opcode_name); 626 ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' '); 627 ss.PutCString(m_mnemonics); 628 629 if (!m_comment.empty()) { 630 ss.FillLastLineToColumn( 631 opcode_pos + opcode_column_width + operand_column_width, ' '); 632 ss.PutCString(" ; "); 633 ss.PutCString(m_comment); 634 } 635 s->PutCString(ss.GetString()); 636 } 637 638 bool Instruction::DumpEmulation(const ArchSpec &arch) { 639 std::unique_ptr<EmulateInstruction> insn_emulator_up( 640 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 641 if (insn_emulator_up) { 642 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 643 return insn_emulator_up->EvaluateInstruction(0); 644 } 645 646 return false; 647 } 648 649 bool Instruction::CanSetBreakpoint () { 650 return !HasDelaySlot(); 651 } 652 653 bool Instruction::HasDelaySlot() { 654 // Default is false. 655 return false; 656 } 657 658 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream, 659 OptionValue::Type data_type) { 660 bool done = false; 661 char buffer[1024]; 662 663 auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type); 664 665 int idx = 0; 666 while (!done) { 667 if (!fgets(buffer, 1023, in_file)) { 668 out_stream->Printf( 669 "Instruction::ReadArray: Error reading file (fgets).\n"); 670 option_value_sp.reset(); 671 return option_value_sp; 672 } 673 674 std::string line(buffer); 675 676 size_t len = line.size(); 677 if (line[len - 1] == '\n') { 678 line[len - 1] = '\0'; 679 line.resize(len - 1); 680 } 681 682 if ((line.size() == 1) && line[0] == ']') { 683 done = true; 684 line.clear(); 685 } 686 687 if (!line.empty()) { 688 std::string value; 689 static RegularExpression g_reg_exp( 690 llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); 691 llvm::SmallVector<llvm::StringRef, 2> matches; 692 if (g_reg_exp.Execute(line, &matches)) 693 value = matches[1].str(); 694 else 695 value = line; 696 697 OptionValueSP data_value_sp; 698 switch (data_type) { 699 case OptionValue::eTypeUInt64: 700 data_value_sp = std::make_shared<OptionValueUInt64>(0, 0); 701 data_value_sp->SetValueFromString(value); 702 break; 703 // Other types can be added later as needed. 704 default: 705 data_value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 706 break; 707 } 708 709 option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp); 710 ++idx; 711 } 712 } 713 714 return option_value_sp; 715 } 716 717 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) { 718 bool done = false; 719 char buffer[1024]; 720 721 auto option_value_sp = std::make_shared<OptionValueDictionary>(); 722 static ConstString encoding_key("data_encoding"); 723 OptionValue::Type data_type = OptionValue::eTypeInvalid; 724 725 while (!done) { 726 // Read the next line in the file 727 if (!fgets(buffer, 1023, in_file)) { 728 out_stream->Printf( 729 "Instruction::ReadDictionary: Error reading file (fgets).\n"); 730 option_value_sp.reset(); 731 return option_value_sp; 732 } 733 734 // Check to see if the line contains the end-of-dictionary marker ("}") 735 std::string line(buffer); 736 737 size_t len = line.size(); 738 if (line[len - 1] == '\n') { 739 line[len - 1] = '\0'; 740 line.resize(len - 1); 741 } 742 743 if ((line.size() == 1) && (line[0] == '}')) { 744 done = true; 745 line.clear(); 746 } 747 748 // Try to find a key-value pair in the current line and add it to the 749 // dictionary. 750 if (!line.empty()) { 751 static RegularExpression g_reg_exp(llvm::StringRef( 752 "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$")); 753 754 llvm::SmallVector<llvm::StringRef, 3> matches; 755 756 bool reg_exp_success = g_reg_exp.Execute(line, &matches); 757 std::string key; 758 std::string value; 759 if (reg_exp_success) { 760 key = matches[1].str(); 761 value = matches[2].str(); 762 } else { 763 out_stream->Printf("Instruction::ReadDictionary: Failure executing " 764 "regular expression.\n"); 765 option_value_sp.reset(); 766 return option_value_sp; 767 } 768 769 ConstString const_key(key.c_str()); 770 // Check value to see if it's the start of an array or dictionary. 771 772 lldb::OptionValueSP value_sp; 773 assert(value.empty() == false); 774 assert(key.empty() == false); 775 776 if (value[0] == '{') { 777 assert(value.size() == 1); 778 // value is a dictionary 779 value_sp = ReadDictionary(in_file, out_stream); 780 if (!value_sp) { 781 option_value_sp.reset(); 782 return option_value_sp; 783 } 784 } else if (value[0] == '[') { 785 assert(value.size() == 1); 786 // value is an array 787 value_sp = ReadArray(in_file, out_stream, data_type); 788 if (!value_sp) { 789 option_value_sp.reset(); 790 return option_value_sp; 791 } 792 // We've used the data_type to read an array; re-set the type to 793 // Invalid 794 data_type = OptionValue::eTypeInvalid; 795 } else if ((value[0] == '0') && (value[1] == 'x')) { 796 value_sp = std::make_shared<OptionValueUInt64>(0, 0); 797 value_sp->SetValueFromString(value); 798 } else { 799 size_t len = value.size(); 800 if ((value[0] == '"') && (value[len - 1] == '"')) 801 value = value.substr(1, len - 2); 802 value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 803 } 804 805 if (const_key == encoding_key) { 806 // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data 807 // indicating the 808 // data type of an upcoming array (usually the next bit of data to be 809 // read in). 810 if (strcmp(value.c_str(), "uint32_t") == 0) 811 data_type = OptionValue::eTypeUInt64; 812 } else 813 option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp, 814 false); 815 } 816 } 817 818 return option_value_sp; 819 } 820 821 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) { 822 if (!out_stream) 823 return false; 824 825 if (!file_name) { 826 out_stream->Printf("Instruction::TestEmulation: Missing file_name."); 827 return false; 828 } 829 FILE *test_file = FileSystem::Instance().Fopen(file_name, "r"); 830 if (!test_file) { 831 out_stream->Printf( 832 "Instruction::TestEmulation: Attempt to open test file failed."); 833 return false; 834 } 835 836 char buffer[256]; 837 if (!fgets(buffer, 255, test_file)) { 838 out_stream->Printf( 839 "Instruction::TestEmulation: Error reading first line of test file.\n"); 840 fclose(test_file); 841 return false; 842 } 843 844 if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) { 845 out_stream->Printf("Instructin::TestEmulation: Test file does not contain " 846 "emulation state dictionary\n"); 847 fclose(test_file); 848 return false; 849 } 850 851 // Read all the test information from the test file into an 852 // OptionValueDictionary. 853 854 OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream)); 855 if (!data_dictionary_sp) { 856 out_stream->Printf( 857 "Instruction::TestEmulation: Error reading Dictionary Object.\n"); 858 fclose(test_file); 859 return false; 860 } 861 862 fclose(test_file); 863 864 OptionValueDictionary *data_dictionary = 865 data_dictionary_sp->GetAsDictionary(); 866 static ConstString description_key("assembly_string"); 867 static ConstString triple_key("triple"); 868 869 OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key); 870 871 if (!value_sp) { 872 out_stream->Printf("Instruction::TestEmulation: Test file does not " 873 "contain description string.\n"); 874 return false; 875 } 876 877 SetDescription(value_sp->GetStringValue()); 878 879 value_sp = data_dictionary->GetValueForKey(triple_key); 880 if (!value_sp) { 881 out_stream->Printf( 882 "Instruction::TestEmulation: Test file does not contain triple.\n"); 883 return false; 884 } 885 886 ArchSpec arch; 887 arch.SetTriple(llvm::Triple(value_sp->GetStringValue())); 888 889 bool success = false; 890 std::unique_ptr<EmulateInstruction> insn_emulator_up( 891 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 892 if (insn_emulator_up) 893 success = 894 insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary); 895 896 if (success) 897 out_stream->Printf("Emulation test succeeded."); 898 else 899 out_stream->Printf("Emulation test failed."); 900 901 return success; 902 } 903 904 bool Instruction::Emulate( 905 const ArchSpec &arch, uint32_t evaluate_options, void *baton, 906 EmulateInstruction::ReadMemoryCallback read_mem_callback, 907 EmulateInstruction::WriteMemoryCallback write_mem_callback, 908 EmulateInstruction::ReadRegisterCallback read_reg_callback, 909 EmulateInstruction::WriteRegisterCallback write_reg_callback) { 910 std::unique_ptr<EmulateInstruction> insn_emulator_up( 911 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 912 if (insn_emulator_up) { 913 insn_emulator_up->SetBaton(baton); 914 insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback, 915 read_reg_callback, write_reg_callback); 916 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 917 return insn_emulator_up->EvaluateInstruction(evaluate_options); 918 } 919 920 return false; 921 } 922 923 uint32_t Instruction::GetData(DataExtractor &data) { 924 return m_opcode.GetData(data); 925 } 926 927 InstructionList::InstructionList() : m_instructions() {} 928 929 InstructionList::~InstructionList() = default; 930 931 size_t InstructionList::GetSize() const { return m_instructions.size(); } 932 933 uint32_t InstructionList::GetMaxOpcocdeByteSize() const { 934 uint32_t max_inst_size = 0; 935 collection::const_iterator pos, end; 936 for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end; 937 ++pos) { 938 uint32_t inst_size = (*pos)->GetOpcode().GetByteSize(); 939 if (max_inst_size < inst_size) 940 max_inst_size = inst_size; 941 } 942 return max_inst_size; 943 } 944 945 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const { 946 InstructionSP inst_sp; 947 if (idx < m_instructions.size()) 948 inst_sp = m_instructions[idx]; 949 return inst_sp; 950 } 951 952 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) { 953 uint32_t index = GetIndexOfInstructionAtAddress(address); 954 if (index != UINT32_MAX) 955 return GetInstructionAtIndex(index); 956 return nullptr; 957 } 958 959 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, 960 const ExecutionContext *exe_ctx) { 961 const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); 962 collection::const_iterator pos, begin, end; 963 964 const FormatEntity::Entry *disassembly_format = nullptr; 965 FormatEntity::Entry format; 966 if (exe_ctx && exe_ctx->HasTargetScope()) { 967 disassembly_format = 968 exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat(); 969 } else { 970 FormatEntity::Parse("${addr}: ", format); 971 disassembly_format = &format; 972 } 973 974 for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin; 975 pos != end; ++pos) { 976 if (pos != begin) 977 s->EOL(); 978 (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx, 979 nullptr, nullptr, disassembly_format, 0); 980 } 981 } 982 983 void InstructionList::Clear() { m_instructions.clear(); } 984 985 void InstructionList::Append(lldb::InstructionSP &inst_sp) { 986 if (inst_sp) 987 m_instructions.push_back(inst_sp); 988 } 989 990 uint32_t 991 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start, 992 bool ignore_calls, 993 bool *found_calls) const { 994 size_t num_instructions = m_instructions.size(); 995 996 uint32_t next_branch = UINT32_MAX; 997 998 if (found_calls) 999 *found_calls = false; 1000 for (size_t i = start; i < num_instructions; i++) { 1001 if (m_instructions[i]->DoesBranch()) { 1002 if (ignore_calls && m_instructions[i]->IsCall()) { 1003 if (found_calls) 1004 *found_calls = true; 1005 continue; 1006 } 1007 next_branch = i; 1008 break; 1009 } 1010 } 1011 1012 return next_branch; 1013 } 1014 1015 uint32_t 1016 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) { 1017 size_t num_instructions = m_instructions.size(); 1018 uint32_t index = UINT32_MAX; 1019 for (size_t i = 0; i < num_instructions; i++) { 1020 if (m_instructions[i]->GetAddress() == address) { 1021 index = i; 1022 break; 1023 } 1024 } 1025 return index; 1026 } 1027 1028 uint32_t 1029 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 1030 Target &target) { 1031 Address address; 1032 address.SetLoadAddress(load_addr, &target); 1033 return GetIndexOfInstructionAtAddress(address); 1034 } 1035 1036 size_t Disassembler::ParseInstructions(Target &target, Address start, 1037 Limit limit, Stream *error_strm_ptr, 1038 bool force_live_memory) { 1039 m_instruction_list.Clear(); 1040 1041 if (!start.IsValid()) 1042 return 0; 1043 1044 start = ResolveAddress(target, start); 1045 1046 addr_t byte_size = limit.value; 1047 if (limit.kind == Limit::Instructions) 1048 byte_size *= m_arch.GetMaximumOpcodeByteSize(); 1049 auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0'); 1050 1051 Status error; 1052 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; 1053 const size_t bytes_read = 1054 target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(), 1055 error, force_live_memory, &load_addr); 1056 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; 1057 1058 if (bytes_read == 0) { 1059 if (error_strm_ptr) { 1060 if (const char *error_cstr = error.AsCString()) 1061 error_strm_ptr->Printf("error: %s\n", error_cstr); 1062 } 1063 return 0; 1064 } 1065 1066 if (bytes_read != data_sp->GetByteSize()) 1067 data_sp->SetByteSize(bytes_read); 1068 DataExtractor data(data_sp, m_arch.GetByteOrder(), 1069 m_arch.GetAddressByteSize()); 1070 return DecodeInstructions(start, data, 0, 1071 limit.kind == Limit::Instructions ? limit.value 1072 : UINT32_MAX, 1073 false, data_from_file); 1074 } 1075 1076 // Disassembler copy constructor 1077 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor) 1078 : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS), 1079 m_flavor() { 1080 if (flavor == nullptr) 1081 m_flavor.assign("default"); 1082 else 1083 m_flavor.assign(flavor); 1084 1085 // If this is an arm variant that can only include thumb (T16, T32) 1086 // instructions, force the arch triple to be "thumbv.." instead of "armv..." 1087 if (arch.IsAlwaysThumbInstructions()) { 1088 std::string thumb_arch_name(arch.GetTriple().getArchName().str()); 1089 // Replace "arm" with "thumb" so we get all thumb variants correct 1090 if (thumb_arch_name.size() > 3) { 1091 thumb_arch_name.erase(0, 3); 1092 thumb_arch_name.insert(0, "thumb"); 1093 } 1094 m_arch.SetTriple(thumb_arch_name.c_str()); 1095 } 1096 } 1097 1098 Disassembler::~Disassembler() = default; 1099 1100 InstructionList &Disassembler::GetInstructionList() { 1101 return m_instruction_list; 1102 } 1103 1104 const InstructionList &Disassembler::GetInstructionList() const { 1105 return m_instruction_list; 1106 } 1107 1108 // Class PseudoInstruction 1109 1110 PseudoInstruction::PseudoInstruction() 1111 : Instruction(Address(), AddressClass::eUnknown), m_description() {} 1112 1113 PseudoInstruction::~PseudoInstruction() = default; 1114 1115 bool PseudoInstruction::DoesBranch() { 1116 // This is NOT a valid question for a pseudo instruction. 1117 return false; 1118 } 1119 1120 bool PseudoInstruction::HasDelaySlot() { 1121 // This is NOT a valid question for a pseudo instruction. 1122 return false; 1123 } 1124 1125 bool PseudoInstruction::IsLoad() { return false; } 1126 1127 bool PseudoInstruction::IsAuthenticated() { return false; } 1128 1129 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler, 1130 const lldb_private::DataExtractor &data, 1131 lldb::offset_t data_offset) { 1132 return m_opcode.GetByteSize(); 1133 } 1134 1135 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) { 1136 if (!opcode_data) 1137 return; 1138 1139 switch (opcode_size) { 1140 case 8: { 1141 uint8_t value8 = *((uint8_t *)opcode_data); 1142 m_opcode.SetOpcode8(value8, eByteOrderInvalid); 1143 break; 1144 } 1145 case 16: { 1146 uint16_t value16 = *((uint16_t *)opcode_data); 1147 m_opcode.SetOpcode16(value16, eByteOrderInvalid); 1148 break; 1149 } 1150 case 32: { 1151 uint32_t value32 = *((uint32_t *)opcode_data); 1152 m_opcode.SetOpcode32(value32, eByteOrderInvalid); 1153 break; 1154 } 1155 case 64: { 1156 uint64_t value64 = *((uint64_t *)opcode_data); 1157 m_opcode.SetOpcode64(value64, eByteOrderInvalid); 1158 break; 1159 } 1160 default: 1161 break; 1162 } 1163 } 1164 1165 void PseudoInstruction::SetDescription(llvm::StringRef description) { 1166 m_description = std::string(description); 1167 } 1168 1169 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) { 1170 Operand ret; 1171 ret.m_type = Type::Register; 1172 ret.m_register = r; 1173 return ret; 1174 } 1175 1176 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm, 1177 bool neg) { 1178 Operand ret; 1179 ret.m_type = Type::Immediate; 1180 ret.m_immediate = imm; 1181 ret.m_negative = neg; 1182 return ret; 1183 } 1184 1185 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) { 1186 Operand ret; 1187 ret.m_type = Type::Immediate; 1188 if (imm < 0) { 1189 ret.m_immediate = -imm; 1190 ret.m_negative = true; 1191 } else { 1192 ret.m_immediate = imm; 1193 ret.m_negative = false; 1194 } 1195 return ret; 1196 } 1197 1198 Instruction::Operand 1199 Instruction::Operand::BuildDereference(const Operand &ref) { 1200 Operand ret; 1201 ret.m_type = Type::Dereference; 1202 ret.m_children = {ref}; 1203 return ret; 1204 } 1205 1206 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs, 1207 const Operand &rhs) { 1208 Operand ret; 1209 ret.m_type = Type::Sum; 1210 ret.m_children = {lhs, rhs}; 1211 return ret; 1212 } 1213 1214 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs, 1215 const Operand &rhs) { 1216 Operand ret; 1217 ret.m_type = Type::Product; 1218 ret.m_children = {lhs, rhs}; 1219 return ret; 1220 } 1221 1222 std::function<bool(const Instruction::Operand &)> 1223 lldb_private::OperandMatchers::MatchBinaryOp( 1224 std::function<bool(const Instruction::Operand &)> base, 1225 std::function<bool(const Instruction::Operand &)> left, 1226 std::function<bool(const Instruction::Operand &)> right) { 1227 return [base, left, right](const Instruction::Operand &op) -> bool { 1228 return (base(op) && op.m_children.size() == 2 && 1229 ((left(op.m_children[0]) && right(op.m_children[1])) || 1230 (left(op.m_children[1]) && right(op.m_children[0])))); 1231 }; 1232 } 1233 1234 std::function<bool(const Instruction::Operand &)> 1235 lldb_private::OperandMatchers::MatchUnaryOp( 1236 std::function<bool(const Instruction::Operand &)> base, 1237 std::function<bool(const Instruction::Operand &)> child) { 1238 return [base, child](const Instruction::Operand &op) -> bool { 1239 return (base(op) && op.m_children.size() == 1 && child(op.m_children[0])); 1240 }; 1241 } 1242 1243 std::function<bool(const Instruction::Operand &)> 1244 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) { 1245 return [&info](const Instruction::Operand &op) { 1246 return (op.m_type == Instruction::Operand::Type::Register && 1247 (op.m_register == ConstString(info.name) || 1248 op.m_register == ConstString(info.alt_name))); 1249 }; 1250 } 1251 1252 std::function<bool(const Instruction::Operand &)> 1253 lldb_private::OperandMatchers::FetchRegOp(ConstString ®) { 1254 return [®](const Instruction::Operand &op) { 1255 if (op.m_type != Instruction::Operand::Type::Register) { 1256 return false; 1257 } 1258 reg = op.m_register; 1259 return true; 1260 }; 1261 } 1262 1263 std::function<bool(const Instruction::Operand &)> 1264 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) { 1265 return [imm](const Instruction::Operand &op) { 1266 return (op.m_type == Instruction::Operand::Type::Immediate && 1267 ((op.m_negative && op.m_immediate == (uint64_t)-imm) || 1268 (!op.m_negative && op.m_immediate == (uint64_t)imm))); 1269 }; 1270 } 1271 1272 std::function<bool(const Instruction::Operand &)> 1273 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) { 1274 return [&imm](const Instruction::Operand &op) { 1275 if (op.m_type != Instruction::Operand::Type::Immediate) { 1276 return false; 1277 } 1278 if (op.m_negative) { 1279 imm = -((int64_t)op.m_immediate); 1280 } else { 1281 imm = ((int64_t)op.m_immediate); 1282 } 1283 return true; 1284 }; 1285 } 1286 1287 std::function<bool(const Instruction::Operand &)> 1288 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) { 1289 return [type](const Instruction::Operand &op) { return op.m_type == type; }; 1290 } 1291