1 //===-- Disassembler.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Disassembler.h" 10 11 #include "lldb/Core/AddressRange.h" 12 #include "lldb/Core/Debugger.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/Mangled.h" 15 #include "lldb/Core/Module.h" 16 #include "lldb/Core/ModuleList.h" 17 #include "lldb/Core/PluginManager.h" 18 #include "lldb/Core/SourceManager.h" 19 #include "lldb/Host/FileSystem.h" 20 #include "lldb/Interpreter/OptionValue.h" 21 #include "lldb/Interpreter/OptionValueArray.h" 22 #include "lldb/Interpreter/OptionValueDictionary.h" 23 #include "lldb/Interpreter/OptionValueRegex.h" 24 #include "lldb/Interpreter/OptionValueString.h" 25 #include "lldb/Interpreter/OptionValueUInt64.h" 26 #include "lldb/Symbol/Function.h" 27 #include "lldb/Symbol/Symbol.h" 28 #include "lldb/Symbol/SymbolContext.h" 29 #include "lldb/Target/ExecutionContext.h" 30 #include "lldb/Target/SectionLoadList.h" 31 #include "lldb/Target/StackFrame.h" 32 #include "lldb/Target/Target.h" 33 #include "lldb/Target/Thread.h" 34 #include "lldb/Utility/DataBufferHeap.h" 35 #include "lldb/Utility/DataExtractor.h" 36 #include "lldb/Utility/RegularExpression.h" 37 #include "lldb/Utility/Status.h" 38 #include "lldb/Utility/Stream.h" 39 #include "lldb/Utility/StreamString.h" 40 #include "lldb/Utility/Timer.h" 41 #include "lldb/lldb-private-enumerations.h" 42 #include "lldb/lldb-private-interfaces.h" 43 #include "lldb/lldb-private-types.h" 44 #include "llvm/ADT/Triple.h" 45 #include "llvm/Support/Compiler.h" 46 47 #include <cstdint> 48 #include <cstring> 49 #include <utility> 50 51 #include <cassert> 52 53 #define DEFAULT_DISASM_BYTE_SIZE 32 54 55 using namespace lldb; 56 using namespace lldb_private; 57 58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch, 59 const char *flavor, 60 const char *plugin_name) { 61 LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)", 62 arch.GetArchitectureName(), plugin_name); 63 64 DisassemblerCreateInstance create_callback = nullptr; 65 66 if (plugin_name) { 67 ConstString const_plugin_name(plugin_name); 68 create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName( 69 const_plugin_name); 70 if (create_callback) { 71 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 72 73 if (disassembler_sp) 74 return disassembler_sp; 75 } 76 } else { 77 for (uint32_t idx = 0; 78 (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex( 79 idx)) != nullptr; 80 ++idx) { 81 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 82 83 if (disassembler_sp) 84 return disassembler_sp; 85 } 86 } 87 return DisassemblerSP(); 88 } 89 90 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target, 91 const ArchSpec &arch, 92 const char *flavor, 93 const char *plugin_name) { 94 if (flavor == nullptr) { 95 // FIXME - we don't have the mechanism in place to do per-architecture 96 // settings. But since we know that for now we only support flavors on x86 97 // & x86_64, 98 if (arch.GetTriple().getArch() == llvm::Triple::x86 || 99 arch.GetTriple().getArch() == llvm::Triple::x86_64) 100 flavor = target.GetDisassemblyFlavor(); 101 } 102 return FindPlugin(arch, flavor, plugin_name); 103 } 104 105 static Address ResolveAddress(Target &target, const Address &addr) { 106 if (!addr.IsSectionOffset()) { 107 Address resolved_addr; 108 // If we weren't passed in a section offset address range, try and resolve 109 // it to something 110 bool is_resolved = target.GetSectionLoadList().IsEmpty() 111 ? target.GetImages().ResolveFileAddress( 112 addr.GetOffset(), resolved_addr) 113 : target.GetSectionLoadList().ResolveLoadAddress( 114 addr.GetOffset(), resolved_addr); 115 116 // We weren't able to resolve the address, just treat it as a raw address 117 if (is_resolved && resolved_addr.IsValid()) 118 return resolved_addr; 119 } 120 return addr; 121 } 122 123 lldb::DisassemblerSP Disassembler::DisassembleRange( 124 const ArchSpec &arch, const char *plugin_name, const char *flavor, 125 Target &target, const AddressRange &range, bool force_live_memory) { 126 if (range.GetByteSize() <= 0) 127 return {}; 128 129 if (!range.GetBaseAddress().IsValid()) 130 return {}; 131 132 lldb::DisassemblerSP disasm_sp = 133 Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name); 134 135 if (!disasm_sp) 136 return {}; 137 138 const size_t bytes_disassembled = disasm_sp->ParseInstructions( 139 target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()}, 140 nullptr, force_live_memory); 141 if (bytes_disassembled == 0) 142 return {}; 143 144 return disasm_sp; 145 } 146 147 lldb::DisassemblerSP 148 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 149 const char *flavor, const Address &start, 150 const void *src, size_t src_len, 151 uint32_t num_instructions, bool data_from_file) { 152 if (!src) 153 return {}; 154 155 lldb::DisassemblerSP disasm_sp = 156 Disassembler::FindPlugin(arch, flavor, plugin_name); 157 158 if (!disasm_sp) 159 return {}; 160 161 DataExtractor data(src, src_len, arch.GetByteOrder(), 162 arch.GetAddressByteSize()); 163 164 (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false, 165 data_from_file); 166 return disasm_sp; 167 } 168 169 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 170 const char *plugin_name, const char *flavor, 171 const ExecutionContext &exe_ctx, 172 const Address &address, Limit limit, 173 bool mixed_source_and_assembly, 174 uint32_t num_mixed_context_lines, 175 uint32_t options, Stream &strm) { 176 if (!exe_ctx.GetTargetPtr()) 177 return false; 178 179 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget( 180 exe_ctx.GetTargetRef(), arch, flavor, plugin_name)); 181 if (!disasm_sp) 182 return false; 183 184 const bool force_live_memory = true; 185 size_t bytes_disassembled = disasm_sp->ParseInstructions( 186 exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory); 187 if (bytes_disassembled == 0) 188 return false; 189 190 disasm_sp->PrintInstructions(debugger, arch, exe_ctx, 191 mixed_source_and_assembly, 192 num_mixed_context_lines, options, strm); 193 return true; 194 } 195 196 Disassembler::SourceLine 197 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) { 198 if (!sc.function) 199 return {}; 200 201 if (!sc.line_entry.IsValid()) 202 return {}; 203 204 LineEntry prologue_end_line = sc.line_entry; 205 FileSpec func_decl_file; 206 uint32_t func_decl_line; 207 sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line); 208 209 if (func_decl_file != prologue_end_line.file && 210 func_decl_file != prologue_end_line.original_file) 211 return {}; 212 213 SourceLine decl_line; 214 decl_line.file = func_decl_file; 215 decl_line.line = func_decl_line; 216 // TODO: Do we care about column on these entries? If so, we need to plumb 217 // that through GetStartLineSourceInfo. 218 decl_line.column = 0; 219 return decl_line; 220 } 221 222 void Disassembler::AddLineToSourceLineTables( 223 SourceLine &line, 224 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) { 225 if (line.IsValid()) { 226 auto source_lines_seen_pos = source_lines_seen.find(line.file); 227 if (source_lines_seen_pos == source_lines_seen.end()) { 228 std::set<uint32_t> lines; 229 lines.insert(line.line); 230 source_lines_seen.emplace(line.file, lines); 231 } else { 232 source_lines_seen_pos->second.insert(line.line); 233 } 234 } 235 } 236 237 bool Disassembler::ElideMixedSourceAndDisassemblyLine( 238 const ExecutionContext &exe_ctx, const SymbolContext &sc, 239 SourceLine &line) { 240 241 // TODO: should we also check target.process.thread.step-avoid-libraries ? 242 243 const RegularExpression *avoid_regex = nullptr; 244 245 // Skip any line #0 entries - they are implementation details 246 if (line.line == 0) 247 return false; 248 249 ThreadSP thread_sp = exe_ctx.GetThreadSP(); 250 if (thread_sp) { 251 avoid_regex = thread_sp->GetSymbolsToAvoidRegexp(); 252 } else { 253 TargetSP target_sp = exe_ctx.GetTargetSP(); 254 if (target_sp) { 255 Status error; 256 OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue( 257 &exe_ctx, "target.process.thread.step-avoid-regexp", false, error); 258 if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) { 259 OptionValueRegex *re = value_sp->GetAsRegex(); 260 if (re) { 261 avoid_regex = re->GetCurrentValue(); 262 } 263 } 264 } 265 } 266 if (avoid_regex && sc.symbol != nullptr) { 267 const char *function_name = 268 sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments) 269 .GetCString(); 270 if (function_name && avoid_regex->Execute(function_name)) { 271 // skip this source line 272 return true; 273 } 274 } 275 // don't skip this source line 276 return false; 277 } 278 279 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, 280 const ExecutionContext &exe_ctx, 281 bool mixed_source_and_assembly, 282 uint32_t num_mixed_context_lines, 283 uint32_t options, Stream &strm) { 284 // We got some things disassembled... 285 size_t num_instructions_found = GetInstructionList().GetSize(); 286 287 const uint32_t max_opcode_byte_size = 288 GetInstructionList().GetMaxOpcocdeByteSize(); 289 SymbolContext sc; 290 SymbolContext prev_sc; 291 AddressRange current_source_line_range; 292 const Address *pc_addr_ptr = nullptr; 293 StackFrame *frame = exe_ctx.GetFramePtr(); 294 295 TargetSP target_sp(exe_ctx.GetTargetSP()); 296 SourceManager &source_manager = 297 target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager(); 298 299 if (frame) { 300 pc_addr_ptr = &frame->GetFrameCodeAddress(); 301 } 302 const uint32_t scope = 303 eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol; 304 const bool use_inline_block_range = false; 305 306 const FormatEntity::Entry *disassembly_format = nullptr; 307 FormatEntity::Entry format; 308 if (exe_ctx.HasTargetScope()) { 309 disassembly_format = 310 exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat(); 311 } else { 312 FormatEntity::Parse("${addr}: ", format); 313 disassembly_format = &format; 314 } 315 316 // First pass: step through the list of instructions, find how long the 317 // initial addresses strings are, insert padding in the second pass so the 318 // opcodes all line up nicely. 319 320 // Also build up the source line mapping if this is mixed source & assembly 321 // mode. Calculate the source line for each assembly instruction (eliding 322 // inlined functions which the user wants to skip). 323 324 std::map<FileSpec, std::set<uint32_t>> source_lines_seen; 325 Symbol *previous_symbol = nullptr; 326 327 size_t address_text_size = 0; 328 for (size_t i = 0; i < num_instructions_found; ++i) { 329 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 330 if (inst) { 331 const Address &addr = inst->GetAddress(); 332 ModuleSP module_sp(addr.GetModule()); 333 if (module_sp) { 334 const SymbolContextItem resolve_mask = eSymbolContextFunction | 335 eSymbolContextSymbol | 336 eSymbolContextLineEntry; 337 uint32_t resolved_mask = 338 module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc); 339 if (resolved_mask) { 340 StreamString strmstr; 341 Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr, 342 &exe_ctx, &addr, strmstr); 343 size_t cur_line = strmstr.GetSizeOfLastLine(); 344 if (cur_line > address_text_size) 345 address_text_size = cur_line; 346 347 // Add entries to our "source_lines_seen" map+set which list which 348 // sources lines occur in this disassembly session. We will print 349 // lines of context around a source line, but we don't want to print 350 // a source line that has a line table entry of its own - we'll leave 351 // that source line to be printed when it actually occurs in the 352 // disassembly. 353 354 if (mixed_source_and_assembly && sc.line_entry.IsValid()) { 355 if (sc.symbol != previous_symbol) { 356 SourceLine decl_line = GetFunctionDeclLineEntry(sc); 357 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line)) 358 AddLineToSourceLineTables(decl_line, source_lines_seen); 359 } 360 if (sc.line_entry.IsValid()) { 361 SourceLine this_line; 362 this_line.file = sc.line_entry.file; 363 this_line.line = sc.line_entry.line; 364 this_line.column = sc.line_entry.column; 365 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line)) 366 AddLineToSourceLineTables(this_line, source_lines_seen); 367 } 368 } 369 } 370 sc.Clear(false); 371 } 372 } 373 } 374 375 previous_symbol = nullptr; 376 SourceLine previous_line; 377 for (size_t i = 0; i < num_instructions_found; ++i) { 378 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 379 380 if (inst) { 381 const Address &addr = inst->GetAddress(); 382 const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr; 383 SourceLinesToDisplay source_lines_to_display; 384 385 prev_sc = sc; 386 387 ModuleSP module_sp(addr.GetModule()); 388 if (module_sp) { 389 uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress( 390 addr, eSymbolContextEverything, sc); 391 if (resolved_mask) { 392 if (mixed_source_and_assembly) { 393 394 // If we've started a new function (non-inlined), print all of the 395 // source lines from the function declaration until the first line 396 // table entry - typically the opening curly brace of the function. 397 if (previous_symbol != sc.symbol) { 398 // The default disassembly format puts an extra blank line 399 // between functions - so when we're displaying the source 400 // context for a function, we don't want to add a blank line 401 // after the source context or we'll end up with two of them. 402 if (previous_symbol != nullptr) 403 source_lines_to_display.print_source_context_end_eol = false; 404 405 previous_symbol = sc.symbol; 406 if (sc.function && sc.line_entry.IsValid()) { 407 LineEntry prologue_end_line = sc.line_entry; 408 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 409 prologue_end_line)) { 410 FileSpec func_decl_file; 411 uint32_t func_decl_line; 412 sc.function->GetStartLineSourceInfo(func_decl_file, 413 func_decl_line); 414 if (func_decl_file == prologue_end_line.file || 415 func_decl_file == prologue_end_line.original_file) { 416 // Add all the lines between the function declaration and 417 // the first non-prologue source line to the list of lines 418 // to print. 419 for (uint32_t lineno = func_decl_line; 420 lineno <= prologue_end_line.line; lineno++) { 421 SourceLine this_line; 422 this_line.file = func_decl_file; 423 this_line.line = lineno; 424 source_lines_to_display.lines.push_back(this_line); 425 } 426 // Mark the last line as the "current" one. Usually this 427 // is the open curly brace. 428 if (source_lines_to_display.lines.size() > 0) 429 source_lines_to_display.current_source_line = 430 source_lines_to_display.lines.size() - 1; 431 } 432 } 433 } 434 sc.GetAddressRange(scope, 0, use_inline_block_range, 435 current_source_line_range); 436 } 437 438 // If we've left a previous source line's address range, print a 439 // new source line 440 if (!current_source_line_range.ContainsFileAddress(addr)) { 441 sc.GetAddressRange(scope, 0, use_inline_block_range, 442 current_source_line_range); 443 444 if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) { 445 SourceLine this_line; 446 this_line.file = sc.line_entry.file; 447 this_line.line = sc.line_entry.line; 448 449 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 450 this_line)) { 451 // Only print this source line if it is different from the 452 // last source line we printed. There may have been inlined 453 // functions between these lines that we elided, resulting in 454 // the same line being printed twice in a row for a 455 // contiguous block of assembly instructions. 456 if (this_line != previous_line) { 457 458 std::vector<uint32_t> previous_lines; 459 for (uint32_t i = 0; 460 i < num_mixed_context_lines && 461 (this_line.line - num_mixed_context_lines) > 0; 462 i++) { 463 uint32_t line = 464 this_line.line - num_mixed_context_lines + i; 465 auto pos = source_lines_seen.find(this_line.file); 466 if (pos != source_lines_seen.end()) { 467 if (pos->second.count(line) == 1) { 468 previous_lines.clear(); 469 } else { 470 previous_lines.push_back(line); 471 } 472 } 473 } 474 for (size_t i = 0; i < previous_lines.size(); i++) { 475 SourceLine previous_line; 476 previous_line.file = this_line.file; 477 previous_line.line = previous_lines[i]; 478 auto pos = source_lines_seen.find(previous_line.file); 479 if (pos != source_lines_seen.end()) { 480 pos->second.insert(previous_line.line); 481 } 482 source_lines_to_display.lines.push_back(previous_line); 483 } 484 485 source_lines_to_display.lines.push_back(this_line); 486 source_lines_to_display.current_source_line = 487 source_lines_to_display.lines.size() - 1; 488 489 for (uint32_t i = 0; i < num_mixed_context_lines; i++) { 490 SourceLine next_line; 491 next_line.file = this_line.file; 492 next_line.line = this_line.line + i + 1; 493 auto pos = source_lines_seen.find(next_line.file); 494 if (pos != source_lines_seen.end()) { 495 if (pos->second.count(next_line.line) == 1) 496 break; 497 pos->second.insert(next_line.line); 498 } 499 source_lines_to_display.lines.push_back(next_line); 500 } 501 } 502 previous_line = this_line; 503 } 504 } 505 } 506 } 507 } else { 508 sc.Clear(true); 509 } 510 } 511 512 if (source_lines_to_display.lines.size() > 0) { 513 strm.EOL(); 514 for (size_t idx = 0; idx < source_lines_to_display.lines.size(); 515 idx++) { 516 SourceLine ln = source_lines_to_display.lines[idx]; 517 const char *line_highlight = ""; 518 if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) { 519 line_highlight = "->"; 520 } else if (idx == source_lines_to_display.current_source_line) { 521 line_highlight = "**"; 522 } 523 source_manager.DisplaySourceLinesWithLineNumbers( 524 ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm); 525 } 526 if (source_lines_to_display.print_source_context_end_eol) 527 strm.EOL(); 528 } 529 530 const bool show_bytes = (options & eOptionShowBytes) != 0; 531 inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc, 532 &prev_sc, nullptr, address_text_size); 533 strm.EOL(); 534 } else { 535 break; 536 } 537 } 538 } 539 540 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 541 StackFrame &frame, Stream &strm) { 542 AddressRange range; 543 SymbolContext sc( 544 frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol)); 545 if (sc.function) { 546 range = sc.function->GetAddressRange(); 547 } else if (sc.symbol && sc.symbol->ValueIsAddress()) { 548 range.GetBaseAddress() = sc.symbol->GetAddressRef(); 549 range.SetByteSize(sc.symbol->GetByteSize()); 550 } else { 551 range.GetBaseAddress() = frame.GetFrameCodeAddress(); 552 } 553 554 if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0) 555 range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE); 556 557 Disassembler::Limit limit = {Disassembler::Limit::Bytes, 558 range.GetByteSize()}; 559 if (limit.value == 0) 560 limit.value = DEFAULT_DISASM_BYTE_SIZE; 561 562 return Disassemble(debugger, arch, nullptr, nullptr, frame, 563 range.GetBaseAddress(), limit, false, 0, 0, strm); 564 } 565 566 Instruction::Instruction(const Address &address, AddressClass addr_class) 567 : m_address(address), m_address_class(addr_class), m_opcode(), 568 m_calculated_strings(false) {} 569 570 Instruction::~Instruction() = default; 571 572 AddressClass Instruction::GetAddressClass() { 573 if (m_address_class == AddressClass::eInvalid) 574 m_address_class = m_address.GetAddressClass(); 575 return m_address_class; 576 } 577 578 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, 579 bool show_address, bool show_bytes, 580 const ExecutionContext *exe_ctx, 581 const SymbolContext *sym_ctx, 582 const SymbolContext *prev_sym_ctx, 583 const FormatEntity::Entry *disassembly_addr_format, 584 size_t max_address_text_size) { 585 size_t opcode_column_width = 7; 586 const size_t operand_column_width = 25; 587 588 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 589 590 StreamString ss; 591 592 if (show_address) { 593 Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx, 594 prev_sym_ctx, exe_ctx, &m_address, ss); 595 ss.FillLastLineToColumn(max_address_text_size, ' '); 596 } 597 598 if (show_bytes) { 599 if (m_opcode.GetType() == Opcode::eTypeBytes) { 600 // x86_64 and i386 are the only ones that use bytes right now so pad out 601 // the byte dump to be able to always show 15 bytes (3 chars each) plus a 602 // space 603 if (max_opcode_byte_size > 0) 604 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 605 else 606 m_opcode.Dump(&ss, 15 * 3 + 1); 607 } else { 608 // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000 609 // (10 spaces) plus two for padding... 610 if (max_opcode_byte_size > 0) 611 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 612 else 613 m_opcode.Dump(&ss, 12); 614 } 615 } 616 617 const size_t opcode_pos = ss.GetSizeOfLastLine(); 618 619 // The default opcode size of 7 characters is plenty for most architectures 620 // but some like arm can pull out the occasional vqrshrun.s16. We won't get 621 // consistent column spacing in these cases, unfortunately. 622 if (m_opcode_name.length() >= opcode_column_width) { 623 opcode_column_width = m_opcode_name.length() + 1; 624 } 625 626 ss.PutCString(m_opcode_name); 627 ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' '); 628 ss.PutCString(m_mnemonics); 629 630 if (!m_comment.empty()) { 631 ss.FillLastLineToColumn( 632 opcode_pos + opcode_column_width + operand_column_width, ' '); 633 ss.PutCString(" ; "); 634 ss.PutCString(m_comment); 635 } 636 s->PutCString(ss.GetString()); 637 } 638 639 bool Instruction::DumpEmulation(const ArchSpec &arch) { 640 std::unique_ptr<EmulateInstruction> insn_emulator_up( 641 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 642 if (insn_emulator_up) { 643 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 644 return insn_emulator_up->EvaluateInstruction(0); 645 } 646 647 return false; 648 } 649 650 bool Instruction::CanSetBreakpoint () { 651 return !HasDelaySlot(); 652 } 653 654 bool Instruction::HasDelaySlot() { 655 // Default is false. 656 return false; 657 } 658 659 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream, 660 OptionValue::Type data_type) { 661 bool done = false; 662 char buffer[1024]; 663 664 auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type); 665 666 int idx = 0; 667 while (!done) { 668 if (!fgets(buffer, 1023, in_file)) { 669 out_stream->Printf( 670 "Instruction::ReadArray: Error reading file (fgets).\n"); 671 option_value_sp.reset(); 672 return option_value_sp; 673 } 674 675 std::string line(buffer); 676 677 size_t len = line.size(); 678 if (line[len - 1] == '\n') { 679 line[len - 1] = '\0'; 680 line.resize(len - 1); 681 } 682 683 if ((line.size() == 1) && line[0] == ']') { 684 done = true; 685 line.clear(); 686 } 687 688 if (!line.empty()) { 689 std::string value; 690 static RegularExpression g_reg_exp( 691 llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); 692 llvm::SmallVector<llvm::StringRef, 2> matches; 693 if (g_reg_exp.Execute(line, &matches)) 694 value = matches[1].str(); 695 else 696 value = line; 697 698 OptionValueSP data_value_sp; 699 switch (data_type) { 700 case OptionValue::eTypeUInt64: 701 data_value_sp = std::make_shared<OptionValueUInt64>(0, 0); 702 data_value_sp->SetValueFromString(value); 703 break; 704 // Other types can be added later as needed. 705 default: 706 data_value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 707 break; 708 } 709 710 option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp); 711 ++idx; 712 } 713 } 714 715 return option_value_sp; 716 } 717 718 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) { 719 bool done = false; 720 char buffer[1024]; 721 722 auto option_value_sp = std::make_shared<OptionValueDictionary>(); 723 static ConstString encoding_key("data_encoding"); 724 OptionValue::Type data_type = OptionValue::eTypeInvalid; 725 726 while (!done) { 727 // Read the next line in the file 728 if (!fgets(buffer, 1023, in_file)) { 729 out_stream->Printf( 730 "Instruction::ReadDictionary: Error reading file (fgets).\n"); 731 option_value_sp.reset(); 732 return option_value_sp; 733 } 734 735 // Check to see if the line contains the end-of-dictionary marker ("}") 736 std::string line(buffer); 737 738 size_t len = line.size(); 739 if (line[len - 1] == '\n') { 740 line[len - 1] = '\0'; 741 line.resize(len - 1); 742 } 743 744 if ((line.size() == 1) && (line[0] == '}')) { 745 done = true; 746 line.clear(); 747 } 748 749 // Try to find a key-value pair in the current line and add it to the 750 // dictionary. 751 if (!line.empty()) { 752 static RegularExpression g_reg_exp(llvm::StringRef( 753 "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$")); 754 755 llvm::SmallVector<llvm::StringRef, 3> matches; 756 757 bool reg_exp_success = g_reg_exp.Execute(line, &matches); 758 std::string key; 759 std::string value; 760 if (reg_exp_success) { 761 key = matches[1].str(); 762 value = matches[2].str(); 763 } else { 764 out_stream->Printf("Instruction::ReadDictionary: Failure executing " 765 "regular expression.\n"); 766 option_value_sp.reset(); 767 return option_value_sp; 768 } 769 770 ConstString const_key(key.c_str()); 771 // Check value to see if it's the start of an array or dictionary. 772 773 lldb::OptionValueSP value_sp; 774 assert(value.empty() == false); 775 assert(key.empty() == false); 776 777 if (value[0] == '{') { 778 assert(value.size() == 1); 779 // value is a dictionary 780 value_sp = ReadDictionary(in_file, out_stream); 781 if (!value_sp) { 782 option_value_sp.reset(); 783 return option_value_sp; 784 } 785 } else if (value[0] == '[') { 786 assert(value.size() == 1); 787 // value is an array 788 value_sp = ReadArray(in_file, out_stream, data_type); 789 if (!value_sp) { 790 option_value_sp.reset(); 791 return option_value_sp; 792 } 793 // We've used the data_type to read an array; re-set the type to 794 // Invalid 795 data_type = OptionValue::eTypeInvalid; 796 } else if ((value[0] == '0') && (value[1] == 'x')) { 797 value_sp = std::make_shared<OptionValueUInt64>(0, 0); 798 value_sp->SetValueFromString(value); 799 } else { 800 size_t len = value.size(); 801 if ((value[0] == '"') && (value[len - 1] == '"')) 802 value = value.substr(1, len - 2); 803 value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 804 } 805 806 if (const_key == encoding_key) { 807 // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data 808 // indicating the 809 // data type of an upcoming array (usually the next bit of data to be 810 // read in). 811 if (strcmp(value.c_str(), "uint32_t") == 0) 812 data_type = OptionValue::eTypeUInt64; 813 } else 814 option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp, 815 false); 816 } 817 } 818 819 return option_value_sp; 820 } 821 822 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) { 823 if (!out_stream) 824 return false; 825 826 if (!file_name) { 827 out_stream->Printf("Instruction::TestEmulation: Missing file_name."); 828 return false; 829 } 830 FILE *test_file = FileSystem::Instance().Fopen(file_name, "r"); 831 if (!test_file) { 832 out_stream->Printf( 833 "Instruction::TestEmulation: Attempt to open test file failed."); 834 return false; 835 } 836 837 char buffer[256]; 838 if (!fgets(buffer, 255, test_file)) { 839 out_stream->Printf( 840 "Instruction::TestEmulation: Error reading first line of test file.\n"); 841 fclose(test_file); 842 return false; 843 } 844 845 if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) { 846 out_stream->Printf("Instructin::TestEmulation: Test file does not contain " 847 "emulation state dictionary\n"); 848 fclose(test_file); 849 return false; 850 } 851 852 // Read all the test information from the test file into an 853 // OptionValueDictionary. 854 855 OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream)); 856 if (!data_dictionary_sp) { 857 out_stream->Printf( 858 "Instruction::TestEmulation: Error reading Dictionary Object.\n"); 859 fclose(test_file); 860 return false; 861 } 862 863 fclose(test_file); 864 865 OptionValueDictionary *data_dictionary = 866 data_dictionary_sp->GetAsDictionary(); 867 static ConstString description_key("assembly_string"); 868 static ConstString triple_key("triple"); 869 870 OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key); 871 872 if (!value_sp) { 873 out_stream->Printf("Instruction::TestEmulation: Test file does not " 874 "contain description string.\n"); 875 return false; 876 } 877 878 SetDescription(value_sp->GetStringValue()); 879 880 value_sp = data_dictionary->GetValueForKey(triple_key); 881 if (!value_sp) { 882 out_stream->Printf( 883 "Instruction::TestEmulation: Test file does not contain triple.\n"); 884 return false; 885 } 886 887 ArchSpec arch; 888 arch.SetTriple(llvm::Triple(value_sp->GetStringValue())); 889 890 bool success = false; 891 std::unique_ptr<EmulateInstruction> insn_emulator_up( 892 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 893 if (insn_emulator_up) 894 success = 895 insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary); 896 897 if (success) 898 out_stream->Printf("Emulation test succeeded."); 899 else 900 out_stream->Printf("Emulation test failed."); 901 902 return success; 903 } 904 905 bool Instruction::Emulate( 906 const ArchSpec &arch, uint32_t evaluate_options, void *baton, 907 EmulateInstruction::ReadMemoryCallback read_mem_callback, 908 EmulateInstruction::WriteMemoryCallback write_mem_callback, 909 EmulateInstruction::ReadRegisterCallback read_reg_callback, 910 EmulateInstruction::WriteRegisterCallback write_reg_callback) { 911 std::unique_ptr<EmulateInstruction> insn_emulator_up( 912 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 913 if (insn_emulator_up) { 914 insn_emulator_up->SetBaton(baton); 915 insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback, 916 read_reg_callback, write_reg_callback); 917 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 918 return insn_emulator_up->EvaluateInstruction(evaluate_options); 919 } 920 921 return false; 922 } 923 924 uint32_t Instruction::GetData(DataExtractor &data) { 925 return m_opcode.GetData(data); 926 } 927 928 InstructionList::InstructionList() : m_instructions() {} 929 930 InstructionList::~InstructionList() = default; 931 932 size_t InstructionList::GetSize() const { return m_instructions.size(); } 933 934 uint32_t InstructionList::GetMaxOpcocdeByteSize() const { 935 uint32_t max_inst_size = 0; 936 collection::const_iterator pos, end; 937 for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end; 938 ++pos) { 939 uint32_t inst_size = (*pos)->GetOpcode().GetByteSize(); 940 if (max_inst_size < inst_size) 941 max_inst_size = inst_size; 942 } 943 return max_inst_size; 944 } 945 946 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const { 947 InstructionSP inst_sp; 948 if (idx < m_instructions.size()) 949 inst_sp = m_instructions[idx]; 950 return inst_sp; 951 } 952 953 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) { 954 uint32_t index = GetIndexOfInstructionAtAddress(address); 955 if (index != UINT32_MAX) 956 return GetInstructionAtIndex(index); 957 return nullptr; 958 } 959 960 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, 961 const ExecutionContext *exe_ctx) { 962 const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); 963 collection::const_iterator pos, begin, end; 964 965 const FormatEntity::Entry *disassembly_format = nullptr; 966 FormatEntity::Entry format; 967 if (exe_ctx && exe_ctx->HasTargetScope()) { 968 disassembly_format = 969 exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat(); 970 } else { 971 FormatEntity::Parse("${addr}: ", format); 972 disassembly_format = &format; 973 } 974 975 for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin; 976 pos != end; ++pos) { 977 if (pos != begin) 978 s->EOL(); 979 (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx, 980 nullptr, nullptr, disassembly_format, 0); 981 } 982 } 983 984 void InstructionList::Clear() { m_instructions.clear(); } 985 986 void InstructionList::Append(lldb::InstructionSP &inst_sp) { 987 if (inst_sp) 988 m_instructions.push_back(inst_sp); 989 } 990 991 uint32_t 992 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start, 993 bool ignore_calls, 994 bool *found_calls) const { 995 size_t num_instructions = m_instructions.size(); 996 997 uint32_t next_branch = UINT32_MAX; 998 999 if (found_calls) 1000 *found_calls = false; 1001 for (size_t i = start; i < num_instructions; i++) { 1002 if (m_instructions[i]->DoesBranch()) { 1003 if (ignore_calls && m_instructions[i]->IsCall()) { 1004 if (found_calls) 1005 *found_calls = true; 1006 continue; 1007 } 1008 next_branch = i; 1009 break; 1010 } 1011 } 1012 1013 return next_branch; 1014 } 1015 1016 uint32_t 1017 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) { 1018 size_t num_instructions = m_instructions.size(); 1019 uint32_t index = UINT32_MAX; 1020 for (size_t i = 0; i < num_instructions; i++) { 1021 if (m_instructions[i]->GetAddress() == address) { 1022 index = i; 1023 break; 1024 } 1025 } 1026 return index; 1027 } 1028 1029 uint32_t 1030 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 1031 Target &target) { 1032 Address address; 1033 address.SetLoadAddress(load_addr, &target); 1034 return GetIndexOfInstructionAtAddress(address); 1035 } 1036 1037 size_t Disassembler::ParseInstructions(Target &target, Address start, 1038 Limit limit, Stream *error_strm_ptr, 1039 bool force_live_memory) { 1040 m_instruction_list.Clear(); 1041 1042 if (!start.IsValid()) 1043 return 0; 1044 1045 start = ResolveAddress(target, start); 1046 1047 addr_t byte_size = limit.value; 1048 if (limit.kind == Limit::Instructions) 1049 byte_size *= m_arch.GetMaximumOpcodeByteSize(); 1050 auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0'); 1051 1052 Status error; 1053 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; 1054 const size_t bytes_read = 1055 target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(), 1056 error, force_live_memory, &load_addr); 1057 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; 1058 1059 if (bytes_read == 0) { 1060 if (error_strm_ptr) { 1061 if (const char *error_cstr = error.AsCString()) 1062 error_strm_ptr->Printf("error: %s\n", error_cstr); 1063 } 1064 return 0; 1065 } 1066 1067 if (bytes_read != data_sp->GetByteSize()) 1068 data_sp->SetByteSize(bytes_read); 1069 DataExtractor data(data_sp, m_arch.GetByteOrder(), 1070 m_arch.GetAddressByteSize()); 1071 return DecodeInstructions(start, data, 0, 1072 limit.kind == Limit::Instructions ? limit.value 1073 : UINT32_MAX, 1074 false, data_from_file); 1075 } 1076 1077 // Disassembler copy constructor 1078 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor) 1079 : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS), 1080 m_flavor() { 1081 if (flavor == nullptr) 1082 m_flavor.assign("default"); 1083 else 1084 m_flavor.assign(flavor); 1085 1086 // If this is an arm variant that can only include thumb (T16, T32) 1087 // instructions, force the arch triple to be "thumbv.." instead of "armv..." 1088 if (arch.IsAlwaysThumbInstructions()) { 1089 std::string thumb_arch_name(arch.GetTriple().getArchName().str()); 1090 // Replace "arm" with "thumb" so we get all thumb variants correct 1091 if (thumb_arch_name.size() > 3) { 1092 thumb_arch_name.erase(0, 3); 1093 thumb_arch_name.insert(0, "thumb"); 1094 } 1095 m_arch.SetTriple(thumb_arch_name.c_str()); 1096 } 1097 } 1098 1099 Disassembler::~Disassembler() = default; 1100 1101 InstructionList &Disassembler::GetInstructionList() { 1102 return m_instruction_list; 1103 } 1104 1105 const InstructionList &Disassembler::GetInstructionList() const { 1106 return m_instruction_list; 1107 } 1108 1109 // Class PseudoInstruction 1110 1111 PseudoInstruction::PseudoInstruction() 1112 : Instruction(Address(), AddressClass::eUnknown), m_description() {} 1113 1114 PseudoInstruction::~PseudoInstruction() = default; 1115 1116 bool PseudoInstruction::DoesBranch() { 1117 // This is NOT a valid question for a pseudo instruction. 1118 return false; 1119 } 1120 1121 bool PseudoInstruction::HasDelaySlot() { 1122 // This is NOT a valid question for a pseudo instruction. 1123 return false; 1124 } 1125 1126 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler, 1127 const lldb_private::DataExtractor &data, 1128 lldb::offset_t data_offset) { 1129 return m_opcode.GetByteSize(); 1130 } 1131 1132 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) { 1133 if (!opcode_data) 1134 return; 1135 1136 switch (opcode_size) { 1137 case 8: { 1138 uint8_t value8 = *((uint8_t *)opcode_data); 1139 m_opcode.SetOpcode8(value8, eByteOrderInvalid); 1140 break; 1141 } 1142 case 16: { 1143 uint16_t value16 = *((uint16_t *)opcode_data); 1144 m_opcode.SetOpcode16(value16, eByteOrderInvalid); 1145 break; 1146 } 1147 case 32: { 1148 uint32_t value32 = *((uint32_t *)opcode_data); 1149 m_opcode.SetOpcode32(value32, eByteOrderInvalid); 1150 break; 1151 } 1152 case 64: { 1153 uint64_t value64 = *((uint64_t *)opcode_data); 1154 m_opcode.SetOpcode64(value64, eByteOrderInvalid); 1155 break; 1156 } 1157 default: 1158 break; 1159 } 1160 } 1161 1162 void PseudoInstruction::SetDescription(llvm::StringRef description) { 1163 m_description = std::string(description); 1164 } 1165 1166 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) { 1167 Operand ret; 1168 ret.m_type = Type::Register; 1169 ret.m_register = r; 1170 return ret; 1171 } 1172 1173 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm, 1174 bool neg) { 1175 Operand ret; 1176 ret.m_type = Type::Immediate; 1177 ret.m_immediate = imm; 1178 ret.m_negative = neg; 1179 return ret; 1180 } 1181 1182 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) { 1183 Operand ret; 1184 ret.m_type = Type::Immediate; 1185 if (imm < 0) { 1186 ret.m_immediate = -imm; 1187 ret.m_negative = true; 1188 } else { 1189 ret.m_immediate = imm; 1190 ret.m_negative = false; 1191 } 1192 return ret; 1193 } 1194 1195 Instruction::Operand 1196 Instruction::Operand::BuildDereference(const Operand &ref) { 1197 Operand ret; 1198 ret.m_type = Type::Dereference; 1199 ret.m_children = {ref}; 1200 return ret; 1201 } 1202 1203 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs, 1204 const Operand &rhs) { 1205 Operand ret; 1206 ret.m_type = Type::Sum; 1207 ret.m_children = {lhs, rhs}; 1208 return ret; 1209 } 1210 1211 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs, 1212 const Operand &rhs) { 1213 Operand ret; 1214 ret.m_type = Type::Product; 1215 ret.m_children = {lhs, rhs}; 1216 return ret; 1217 } 1218 1219 std::function<bool(const Instruction::Operand &)> 1220 lldb_private::OperandMatchers::MatchBinaryOp( 1221 std::function<bool(const Instruction::Operand &)> base, 1222 std::function<bool(const Instruction::Operand &)> left, 1223 std::function<bool(const Instruction::Operand &)> right) { 1224 return [base, left, right](const Instruction::Operand &op) -> bool { 1225 return (base(op) && op.m_children.size() == 2 && 1226 ((left(op.m_children[0]) && right(op.m_children[1])) || 1227 (left(op.m_children[1]) && right(op.m_children[0])))); 1228 }; 1229 } 1230 1231 std::function<bool(const Instruction::Operand &)> 1232 lldb_private::OperandMatchers::MatchUnaryOp( 1233 std::function<bool(const Instruction::Operand &)> base, 1234 std::function<bool(const Instruction::Operand &)> child) { 1235 return [base, child](const Instruction::Operand &op) -> bool { 1236 return (base(op) && op.m_children.size() == 1 && child(op.m_children[0])); 1237 }; 1238 } 1239 1240 std::function<bool(const Instruction::Operand &)> 1241 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) { 1242 return [&info](const Instruction::Operand &op) { 1243 return (op.m_type == Instruction::Operand::Type::Register && 1244 (op.m_register == ConstString(info.name) || 1245 op.m_register == ConstString(info.alt_name))); 1246 }; 1247 } 1248 1249 std::function<bool(const Instruction::Operand &)> 1250 lldb_private::OperandMatchers::FetchRegOp(ConstString ®) { 1251 return [®](const Instruction::Operand &op) { 1252 if (op.m_type != Instruction::Operand::Type::Register) { 1253 return false; 1254 } 1255 reg = op.m_register; 1256 return true; 1257 }; 1258 } 1259 1260 std::function<bool(const Instruction::Operand &)> 1261 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) { 1262 return [imm](const Instruction::Operand &op) { 1263 return (op.m_type == Instruction::Operand::Type::Immediate && 1264 ((op.m_negative && op.m_immediate == (uint64_t)-imm) || 1265 (!op.m_negative && op.m_immediate == (uint64_t)imm))); 1266 }; 1267 } 1268 1269 std::function<bool(const Instruction::Operand &)> 1270 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) { 1271 return [&imm](const Instruction::Operand &op) { 1272 if (op.m_type != Instruction::Operand::Type::Immediate) { 1273 return false; 1274 } 1275 if (op.m_negative) { 1276 imm = -((int64_t)op.m_immediate); 1277 } else { 1278 imm = ((int64_t)op.m_immediate); 1279 } 1280 return true; 1281 }; 1282 } 1283 1284 std::function<bool(const Instruction::Operand &)> 1285 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) { 1286 return [type](const Instruction::Operand &op) { return op.m_type == type; }; 1287 } 1288