1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DisassemblerLLVMC.h" 10 11 #include "llvm-c/Disassembler.h" 12 #include "llvm/ADT/SmallString.h" 13 #include "llvm/ADT/StringExtras.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCInstPrinter.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCRegisterInfo.h" 23 #include "llvm/MC/MCSubtargetInfo.h" 24 #include "llvm/MC/MCTargetOptions.h" 25 #include "llvm/MC/TargetRegistry.h" 26 #include "llvm/Support/AArch64TargetParser.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/ScopedPrinter.h" 29 #include "llvm/Support/TargetSelect.h" 30 31 #include "lldb/Core/Address.h" 32 #include "lldb/Core/Module.h" 33 #include "lldb/Symbol/SymbolContext.h" 34 #include "lldb/Target/ExecutionContext.h" 35 #include "lldb/Target/Process.h" 36 #include "lldb/Target/RegisterContext.h" 37 #include "lldb/Target/SectionLoadList.h" 38 #include "lldb/Target/StackFrame.h" 39 #include "lldb/Target/Target.h" 40 #include "lldb/Utility/DataExtractor.h" 41 #include "lldb/Utility/LLDBLog.h" 42 #include "lldb/Utility/Log.h" 43 #include "lldb/Utility/RegularExpression.h" 44 #include "lldb/Utility/Stream.h" 45 46 using namespace lldb; 47 using namespace lldb_private; 48 49 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) 50 51 class DisassemblerLLVMC::MCDisasmInstance { 52 public: 53 static std::unique_ptr<MCDisasmInstance> 54 Create(const char *triple, const char *cpu, const char *features_str, 55 unsigned flavor, DisassemblerLLVMC &owner); 56 57 ~MCDisasmInstance() = default; 58 59 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, 60 lldb::addr_t pc, llvm::MCInst &mc_inst) const; 61 void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, 62 std::string &comments_string); 63 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); 64 bool CanBranch(llvm::MCInst &mc_inst) const; 65 bool HasDelaySlot(llvm::MCInst &mc_inst) const; 66 bool IsCall(llvm::MCInst &mc_inst) const; 67 bool IsLoad(llvm::MCInst &mc_inst) const; 68 bool IsAuthenticated(llvm::MCInst &mc_inst) const; 69 70 private: 71 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 72 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 73 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 74 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 75 std::unique_ptr<llvm::MCContext> &&context_up, 76 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 77 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up); 78 79 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; 80 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; 81 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; 82 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; 83 std::unique_ptr<llvm::MCContext> m_context_up; 84 std::unique_ptr<llvm::MCDisassembler> m_disasm_up; 85 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; 86 }; 87 88 class InstructionLLVMC : public lldb_private::Instruction { 89 public: 90 InstructionLLVMC(DisassemblerLLVMC &disasm, 91 const lldb_private::Address &address, 92 AddressClass addr_class) 93 : Instruction(address, addr_class), 94 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( 95 disasm.shared_from_this())) {} 96 97 ~InstructionLLVMC() override = default; 98 99 bool DoesBranch() override { 100 VisitInstruction(); 101 return m_does_branch; 102 } 103 104 bool HasDelaySlot() override { 105 VisitInstruction(); 106 return m_has_delay_slot; 107 } 108 109 bool IsLoad() override { 110 VisitInstruction(); 111 return m_is_load; 112 } 113 114 bool IsAuthenticated() override { 115 VisitInstruction(); 116 return m_is_authenticated; 117 } 118 119 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { 120 DisassemblerScope disasm(*this); 121 return GetDisasmToUse(is_alternate_isa, disasm); 122 } 123 124 size_t Decode(const lldb_private::Disassembler &disassembler, 125 const lldb_private::DataExtractor &data, 126 lldb::offset_t data_offset) override { 127 // All we have to do is read the opcode which can be easy for some 128 // architectures 129 bool got_op = false; 130 DisassemblerScope disasm(*this); 131 if (disasm) { 132 const ArchSpec &arch = disasm->GetArchitecture(); 133 const lldb::ByteOrder byte_order = data.GetByteOrder(); 134 135 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); 136 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); 137 if (min_op_byte_size == max_op_byte_size) { 138 // Fixed size instructions, just read that amount of data. 139 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size)) 140 return false; 141 142 switch (min_op_byte_size) { 143 case 1: 144 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order); 145 got_op = true; 146 break; 147 148 case 2: 149 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order); 150 got_op = true; 151 break; 152 153 case 4: 154 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 155 got_op = true; 156 break; 157 158 case 8: 159 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order); 160 got_op = true; 161 break; 162 163 default: 164 m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), 165 min_op_byte_size); 166 got_op = true; 167 break; 168 } 169 } 170 if (!got_op) { 171 bool is_alternate_isa = false; 172 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 173 GetDisasmToUse(is_alternate_isa, disasm); 174 175 const llvm::Triple::ArchType machine = arch.GetMachine(); 176 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { 177 if (machine == llvm::Triple::thumb || is_alternate_isa) { 178 uint32_t thumb_opcode = data.GetU16(&data_offset); 179 if ((thumb_opcode & 0xe000) != 0xe000 || 180 ((thumb_opcode & 0x1800u) == 0)) { 181 m_opcode.SetOpcode16(thumb_opcode, byte_order); 182 m_is_valid = true; 183 } else { 184 thumb_opcode <<= 16; 185 thumb_opcode |= data.GetU16(&data_offset); 186 m_opcode.SetOpcode16_2(thumb_opcode, byte_order); 187 m_is_valid = true; 188 } 189 } else { 190 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 191 m_is_valid = true; 192 } 193 } else { 194 // The opcode isn't evenly sized, so we need to actually use the llvm 195 // disassembler to parse it and get the size. 196 uint8_t *opcode_data = 197 const_cast<uint8_t *>(data.PeekData(data_offset, 1)); 198 const size_t opcode_data_len = data.BytesLeft(data_offset); 199 const addr_t pc = m_address.GetFileAddress(); 200 llvm::MCInst inst; 201 202 const size_t inst_size = 203 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 204 if (inst_size == 0) 205 m_opcode.Clear(); 206 else { 207 m_opcode.SetOpcodeBytes(opcode_data, inst_size); 208 m_is_valid = true; 209 } 210 } 211 } 212 return m_opcode.GetByteSize(); 213 } 214 return 0; 215 } 216 217 void AppendComment(std::string &description) { 218 if (m_comment.empty()) 219 m_comment.swap(description); 220 else { 221 m_comment.append(", "); 222 m_comment.append(description); 223 } 224 } 225 226 void CalculateMnemonicOperandsAndComment( 227 const lldb_private::ExecutionContext *exe_ctx) override { 228 DataExtractor data; 229 const AddressClass address_class = GetAddressClass(); 230 231 if (m_opcode.GetData(data)) { 232 std::string out_string; 233 std::string comment_string; 234 235 DisassemblerScope disasm(*this, exe_ctx); 236 if (disasm) { 237 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; 238 239 if (address_class == AddressClass::eCodeAlternateISA) 240 mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); 241 else 242 mc_disasm_ptr = disasm->m_disasm_up.get(); 243 244 lldb::addr_t pc = m_address.GetFileAddress(); 245 m_using_file_addr = true; 246 247 const bool data_from_file = disasm->m_data_from_file; 248 bool use_hex_immediates = true; 249 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; 250 251 if (exe_ctx) { 252 Target *target = exe_ctx->GetTargetPtr(); 253 if (target) { 254 use_hex_immediates = target->GetUseHexImmediates(); 255 hex_style = target->GetHexImmediateStyle(); 256 257 if (!data_from_file) { 258 const lldb::addr_t load_addr = m_address.GetLoadAddress(target); 259 if (load_addr != LLDB_INVALID_ADDRESS) { 260 pc = load_addr; 261 m_using_file_addr = false; 262 } 263 } 264 } 265 } 266 267 const uint8_t *opcode_data = data.GetDataStart(); 268 const size_t opcode_data_len = data.GetByteSize(); 269 llvm::MCInst inst; 270 size_t inst_size = 271 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 272 273 if (inst_size > 0) { 274 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); 275 mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string); 276 277 if (!comment_string.empty()) { 278 AppendComment(comment_string); 279 } 280 } 281 282 if (inst_size == 0) { 283 m_comment.assign("unknown opcode"); 284 inst_size = m_opcode.GetByteSize(); 285 StreamString mnemonic_strm; 286 lldb::offset_t offset = 0; 287 lldb::ByteOrder byte_order = data.GetByteOrder(); 288 switch (inst_size) { 289 case 1: { 290 const uint8_t uval8 = data.GetU8(&offset); 291 m_opcode.SetOpcode8(uval8, byte_order); 292 m_opcode_name.assign(".byte"); 293 mnemonic_strm.Printf("0x%2.2x", uval8); 294 } break; 295 case 2: { 296 const uint16_t uval16 = data.GetU16(&offset); 297 m_opcode.SetOpcode16(uval16, byte_order); 298 m_opcode_name.assign(".short"); 299 mnemonic_strm.Printf("0x%4.4x", uval16); 300 } break; 301 case 4: { 302 const uint32_t uval32 = data.GetU32(&offset); 303 m_opcode.SetOpcode32(uval32, byte_order); 304 m_opcode_name.assign(".long"); 305 mnemonic_strm.Printf("0x%8.8x", uval32); 306 } break; 307 case 8: { 308 const uint64_t uval64 = data.GetU64(&offset); 309 m_opcode.SetOpcode64(uval64, byte_order); 310 m_opcode_name.assign(".quad"); 311 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64); 312 } break; 313 default: 314 if (inst_size == 0) 315 return; 316 else { 317 const uint8_t *bytes = data.PeekData(offset, inst_size); 318 if (bytes == nullptr) 319 return; 320 m_opcode_name.assign(".byte"); 321 m_opcode.SetOpcodeBytes(bytes, inst_size); 322 mnemonic_strm.Printf("0x%2.2x", bytes[0]); 323 for (uint32_t i = 1; i < inst_size; ++i) 324 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]); 325 } 326 break; 327 } 328 m_mnemonics = std::string(mnemonic_strm.GetString()); 329 return; 330 } 331 332 static RegularExpression s_regex( 333 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); 334 335 llvm::SmallVector<llvm::StringRef, 4> matches; 336 if (s_regex.Execute(out_string, &matches)) { 337 m_opcode_name = matches[1].str(); 338 m_mnemonics = matches[2].str(); 339 } 340 } 341 } 342 } 343 344 bool IsValid() const { return m_is_valid; } 345 346 bool UsingFileAddress() const { return m_using_file_addr; } 347 size_t GetByteSize() const { return m_opcode.GetByteSize(); } 348 349 /// Grants exclusive access to the disassembler and initializes it with the 350 /// given InstructionLLVMC and an optional ExecutionContext. 351 class DisassemblerScope { 352 std::shared_ptr<DisassemblerLLVMC> m_disasm; 353 354 public: 355 explicit DisassemblerScope( 356 InstructionLLVMC &i, 357 const lldb_private::ExecutionContext *exe_ctx = nullptr) 358 : m_disasm(i.m_disasm_wp.lock()) { 359 m_disasm->m_mutex.lock(); 360 m_disasm->m_inst = &i; 361 m_disasm->m_exe_ctx = exe_ctx; 362 } 363 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } 364 365 /// Evaluates to true if this scope contains a valid disassembler. 366 operator bool() const { return static_cast<bool>(m_disasm); } 367 368 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } 369 }; 370 371 static llvm::StringRef::const_iterator 372 ConsumeWhitespace(llvm::StringRef::const_iterator osi, 373 llvm::StringRef::const_iterator ose) { 374 while (osi != ose) { 375 switch (*osi) { 376 default: 377 return osi; 378 case ' ': 379 case '\t': 380 break; 381 } 382 ++osi; 383 } 384 385 return osi; 386 } 387 388 static std::pair<bool, llvm::StringRef::const_iterator> 389 ConsumeChar(llvm::StringRef::const_iterator osi, const char c, 390 llvm::StringRef::const_iterator ose) { 391 bool found = false; 392 393 osi = ConsumeWhitespace(osi, ose); 394 if (osi != ose && *osi == c) { 395 found = true; 396 ++osi; 397 } 398 399 return std::make_pair(found, osi); 400 } 401 402 static std::pair<Operand, llvm::StringRef::const_iterator> 403 ParseRegisterName(llvm::StringRef::const_iterator osi, 404 llvm::StringRef::const_iterator ose) { 405 Operand ret; 406 ret.m_type = Operand::Type::Register; 407 std::string str; 408 409 osi = ConsumeWhitespace(osi, ose); 410 411 while (osi != ose) { 412 if (*osi >= '0' && *osi <= '9') { 413 if (str.empty()) { 414 return std::make_pair(Operand(), osi); 415 } else { 416 str.push_back(*osi); 417 } 418 } else if (*osi >= 'a' && *osi <= 'z') { 419 str.push_back(*osi); 420 } else { 421 switch (*osi) { 422 default: 423 if (str.empty()) { 424 return std::make_pair(Operand(), osi); 425 } else { 426 ret.m_register = ConstString(str); 427 return std::make_pair(ret, osi); 428 } 429 case '%': 430 if (!str.empty()) { 431 return std::make_pair(Operand(), osi); 432 } 433 break; 434 } 435 } 436 ++osi; 437 } 438 439 ret.m_register = ConstString(str); 440 return std::make_pair(ret, osi); 441 } 442 443 static std::pair<Operand, llvm::StringRef::const_iterator> 444 ParseImmediate(llvm::StringRef::const_iterator osi, 445 llvm::StringRef::const_iterator ose) { 446 Operand ret; 447 ret.m_type = Operand::Type::Immediate; 448 std::string str; 449 bool is_hex = false; 450 451 osi = ConsumeWhitespace(osi, ose); 452 453 while (osi != ose) { 454 if (*osi >= '0' && *osi <= '9') { 455 str.push_back(*osi); 456 } else if (*osi >= 'a' && *osi <= 'f') { 457 if (is_hex) { 458 str.push_back(*osi); 459 } else { 460 return std::make_pair(Operand(), osi); 461 } 462 } else { 463 switch (*osi) { 464 default: 465 if (str.empty()) { 466 return std::make_pair(Operand(), osi); 467 } else { 468 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 469 return std::make_pair(ret, osi); 470 } 471 case 'x': 472 if (!str.compare("0")) { 473 is_hex = true; 474 str.push_back(*osi); 475 } else { 476 return std::make_pair(Operand(), osi); 477 } 478 break; 479 case '#': 480 case '$': 481 if (!str.empty()) { 482 return std::make_pair(Operand(), osi); 483 } 484 break; 485 case '-': 486 if (str.empty()) { 487 ret.m_negative = true; 488 } else { 489 return std::make_pair(Operand(), osi); 490 } 491 } 492 } 493 ++osi; 494 } 495 496 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 497 return std::make_pair(ret, osi); 498 } 499 500 // -0x5(%rax,%rax,2) 501 static std::pair<Operand, llvm::StringRef::const_iterator> 502 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, 503 llvm::StringRef::const_iterator ose) { 504 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 505 ParseImmediate(osi, ose); 506 if (offset_and_iterator.first.IsValid()) { 507 osi = offset_and_iterator.second; 508 } 509 510 bool found = false; 511 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 512 if (!found) { 513 return std::make_pair(Operand(), osi); 514 } 515 516 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 517 ParseRegisterName(osi, ose); 518 if (base_and_iterator.first.IsValid()) { 519 osi = base_and_iterator.second; 520 } else { 521 return std::make_pair(Operand(), osi); 522 } 523 524 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 525 if (!found) { 526 return std::make_pair(Operand(), osi); 527 } 528 529 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = 530 ParseRegisterName(osi, ose); 531 if (index_and_iterator.first.IsValid()) { 532 osi = index_and_iterator.second; 533 } else { 534 return std::make_pair(Operand(), osi); 535 } 536 537 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 538 if (!found) { 539 return std::make_pair(Operand(), osi); 540 } 541 542 std::pair<Operand, llvm::StringRef::const_iterator> 543 multiplier_and_iterator = ParseImmediate(osi, ose); 544 if (index_and_iterator.first.IsValid()) { 545 osi = index_and_iterator.second; 546 } else { 547 return std::make_pair(Operand(), osi); 548 } 549 550 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 551 if (!found) { 552 return std::make_pair(Operand(), osi); 553 } 554 555 Operand product; 556 product.m_type = Operand::Type::Product; 557 product.m_children.push_back(index_and_iterator.first); 558 product.m_children.push_back(multiplier_and_iterator.first); 559 560 Operand index; 561 index.m_type = Operand::Type::Sum; 562 index.m_children.push_back(base_and_iterator.first); 563 index.m_children.push_back(product); 564 565 if (offset_and_iterator.first.IsValid()) { 566 Operand offset; 567 offset.m_type = Operand::Type::Sum; 568 offset.m_children.push_back(offset_and_iterator.first); 569 offset.m_children.push_back(index); 570 571 Operand deref; 572 deref.m_type = Operand::Type::Dereference; 573 deref.m_children.push_back(offset); 574 return std::make_pair(deref, osi); 575 } else { 576 Operand deref; 577 deref.m_type = Operand::Type::Dereference; 578 deref.m_children.push_back(index); 579 return std::make_pair(deref, osi); 580 } 581 } 582 583 // -0x10(%rbp) 584 static std::pair<Operand, llvm::StringRef::const_iterator> 585 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, 586 llvm::StringRef::const_iterator ose) { 587 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 588 ParseImmediate(osi, ose); 589 if (offset_and_iterator.first.IsValid()) { 590 osi = offset_and_iterator.second; 591 } 592 593 bool found = false; 594 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 595 if (!found) { 596 return std::make_pair(Operand(), osi); 597 } 598 599 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 600 ParseRegisterName(osi, ose); 601 if (base_and_iterator.first.IsValid()) { 602 osi = base_and_iterator.second; 603 } else { 604 return std::make_pair(Operand(), osi); 605 } 606 607 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 608 if (!found) { 609 return std::make_pair(Operand(), osi); 610 } 611 612 if (offset_and_iterator.first.IsValid()) { 613 Operand offset; 614 offset.m_type = Operand::Type::Sum; 615 offset.m_children.push_back(offset_and_iterator.first); 616 offset.m_children.push_back(base_and_iterator.first); 617 618 Operand deref; 619 deref.m_type = Operand::Type::Dereference; 620 deref.m_children.push_back(offset); 621 return std::make_pair(deref, osi); 622 } else { 623 Operand deref; 624 deref.m_type = Operand::Type::Dereference; 625 deref.m_children.push_back(base_and_iterator.first); 626 return std::make_pair(deref, osi); 627 } 628 } 629 630 // [sp, #8]! 631 static std::pair<Operand, llvm::StringRef::const_iterator> 632 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, 633 llvm::StringRef::const_iterator ose) { 634 bool found = false; 635 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 636 if (!found) { 637 return std::make_pair(Operand(), osi); 638 } 639 640 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 641 ParseRegisterName(osi, ose); 642 if (base_and_iterator.first.IsValid()) { 643 osi = base_and_iterator.second; 644 } else { 645 return std::make_pair(Operand(), osi); 646 } 647 648 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 649 if (!found) { 650 return std::make_pair(Operand(), osi); 651 } 652 653 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 654 ParseImmediate(osi, ose); 655 if (offset_and_iterator.first.IsValid()) { 656 osi = offset_and_iterator.second; 657 } 658 659 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 660 if (!found) { 661 return std::make_pair(Operand(), osi); 662 } 663 664 Operand offset; 665 offset.m_type = Operand::Type::Sum; 666 offset.m_children.push_back(offset_and_iterator.first); 667 offset.m_children.push_back(base_and_iterator.first); 668 669 Operand deref; 670 deref.m_type = Operand::Type::Dereference; 671 deref.m_children.push_back(offset); 672 return std::make_pair(deref, osi); 673 } 674 675 // [sp] 676 static std::pair<Operand, llvm::StringRef::const_iterator> 677 ParseARMDerefAccess(llvm::StringRef::const_iterator osi, 678 llvm::StringRef::const_iterator ose) { 679 bool found = false; 680 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 681 if (!found) { 682 return std::make_pair(Operand(), osi); 683 } 684 685 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 686 ParseRegisterName(osi, ose); 687 if (base_and_iterator.first.IsValid()) { 688 osi = base_and_iterator.second; 689 } else { 690 return std::make_pair(Operand(), osi); 691 } 692 693 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 694 if (!found) { 695 return std::make_pair(Operand(), osi); 696 } 697 698 Operand deref; 699 deref.m_type = Operand::Type::Dereference; 700 deref.m_children.push_back(base_and_iterator.first); 701 return std::make_pair(deref, osi); 702 } 703 704 static void DumpOperand(const Operand &op, Stream &s) { 705 switch (op.m_type) { 706 case Operand::Type::Dereference: 707 s.PutCString("*"); 708 DumpOperand(op.m_children[0], s); 709 break; 710 case Operand::Type::Immediate: 711 if (op.m_negative) { 712 s.PutCString("-"); 713 } 714 s.PutCString(llvm::to_string(op.m_immediate)); 715 break; 716 case Operand::Type::Invalid: 717 s.PutCString("Invalid"); 718 break; 719 case Operand::Type::Product: 720 s.PutCString("("); 721 DumpOperand(op.m_children[0], s); 722 s.PutCString("*"); 723 DumpOperand(op.m_children[1], s); 724 s.PutCString(")"); 725 break; 726 case Operand::Type::Register: 727 s.PutCString(op.m_register.GetStringRef()); 728 break; 729 case Operand::Type::Sum: 730 s.PutCString("("); 731 DumpOperand(op.m_children[0], s); 732 s.PutCString("+"); 733 DumpOperand(op.m_children[1], s); 734 s.PutCString(")"); 735 break; 736 } 737 } 738 739 bool ParseOperands( 740 llvm::SmallVectorImpl<Instruction::Operand> &operands) override { 741 const char *operands_string = GetOperands(nullptr); 742 743 if (!operands_string) { 744 return false; 745 } 746 747 llvm::StringRef operands_ref(operands_string); 748 749 llvm::StringRef::const_iterator osi = operands_ref.begin(); 750 llvm::StringRef::const_iterator ose = operands_ref.end(); 751 752 while (osi != ose) { 753 Operand operand; 754 llvm::StringRef::const_iterator iter; 755 756 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose), 757 operand.IsValid()) || 758 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose), 759 operand.IsValid()) || 760 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose), 761 operand.IsValid()) || 762 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose), 763 operand.IsValid()) || 764 (std::tie(operand, iter) = ParseRegisterName(osi, ose), 765 operand.IsValid()) || 766 (std::tie(operand, iter) = ParseImmediate(osi, ose), 767 operand.IsValid())) { 768 osi = iter; 769 operands.push_back(operand); 770 } else { 771 return false; 772 } 773 774 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = 775 ConsumeChar(osi, ',', ose); 776 if (found_and_iter.first) { 777 osi = found_and_iter.second; 778 } 779 780 osi = ConsumeWhitespace(osi, ose); 781 } 782 783 DisassemblerSP disasm_sp = m_disasm_wp.lock(); 784 785 if (disasm_sp && operands.size() > 1) { 786 // TODO tie this into the MC Disassembler's notion of clobbers. 787 switch (disasm_sp->GetArchitecture().GetMachine()) { 788 default: 789 break; 790 case llvm::Triple::x86: 791 case llvm::Triple::x86_64: 792 operands[operands.size() - 1].m_clobbered = true; 793 break; 794 case llvm::Triple::arm: 795 operands[0].m_clobbered = true; 796 break; 797 } 798 } 799 800 if (Log *log = GetLog(LLDBLog::Process)) { 801 StreamString ss; 802 803 ss.Printf("[%s] expands to %zu operands:\n", operands_string, 804 operands.size()); 805 for (const Operand &operand : operands) { 806 ss.PutCString(" "); 807 DumpOperand(operand, ss); 808 ss.PutCString("\n"); 809 } 810 811 log->PutString(ss.GetString()); 812 } 813 814 return true; 815 } 816 817 bool IsCall() override { 818 VisitInstruction(); 819 return m_is_call; 820 } 821 822 protected: 823 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; 824 825 bool m_is_valid = false; 826 bool m_using_file_addr = false; 827 bool m_has_visited_instruction = false; 828 829 // Be conservative. If we didn't understand the instruction, say it: 830 // - Might branch 831 // - Does not have a delay slot 832 // - Is not a call 833 // - Is not a load 834 // - Is not an authenticated instruction 835 bool m_does_branch = true; 836 bool m_has_delay_slot = false; 837 bool m_is_call = false; 838 bool m_is_load = false; 839 bool m_is_authenticated = false; 840 841 void VisitInstruction() { 842 if (m_has_visited_instruction) 843 return; 844 845 DisassemblerScope disasm(*this); 846 if (!disasm) 847 return; 848 849 DataExtractor data; 850 if (!m_opcode.GetData(data)) 851 return; 852 853 bool is_alternate_isa; 854 lldb::addr_t pc = m_address.GetFileAddress(); 855 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 856 GetDisasmToUse(is_alternate_isa, disasm); 857 const uint8_t *opcode_data = data.GetDataStart(); 858 const size_t opcode_data_len = data.GetByteSize(); 859 llvm::MCInst inst; 860 const size_t inst_size = 861 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 862 if (inst_size == 0) 863 return; 864 865 m_has_visited_instruction = true; 866 m_does_branch = mc_disasm_ptr->CanBranch(inst); 867 m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst); 868 m_is_call = mc_disasm_ptr->IsCall(inst); 869 m_is_load = mc_disasm_ptr->IsLoad(inst); 870 m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst); 871 } 872 873 private: 874 DisassemblerLLVMC::MCDisasmInstance * 875 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { 876 is_alternate_isa = false; 877 if (disasm) { 878 if (disasm->m_alternate_disasm_up) { 879 const AddressClass address_class = GetAddressClass(); 880 881 if (address_class == AddressClass::eCodeAlternateISA) { 882 is_alternate_isa = true; 883 return disasm->m_alternate_disasm_up.get(); 884 } 885 } 886 return disasm->m_disasm_up.get(); 887 } 888 return nullptr; 889 } 890 }; 891 892 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> 893 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, 894 const char *features_str, 895 unsigned flavor, 896 DisassemblerLLVMC &owner) { 897 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; 898 899 std::string Status; 900 const llvm::Target *curr_target = 901 llvm::TargetRegistry::lookupTarget(triple, Status); 902 if (!curr_target) 903 return Instance(); 904 905 std::unique_ptr<llvm::MCInstrInfo> instr_info_up( 906 curr_target->createMCInstrInfo()); 907 if (!instr_info_up) 908 return Instance(); 909 910 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( 911 curr_target->createMCRegInfo(triple)); 912 if (!reg_info_up) 913 return Instance(); 914 915 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( 916 curr_target->createMCSubtargetInfo(triple, cpu, features_str)); 917 if (!subtarget_info_up) 918 return Instance(); 919 920 llvm::MCTargetOptions MCOptions; 921 std::unique_ptr<llvm::MCAsmInfo> asm_info_up( 922 curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions)); 923 if (!asm_info_up) 924 return Instance(); 925 926 std::unique_ptr<llvm::MCContext> context_up( 927 new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), 928 reg_info_up.get(), subtarget_info_up.get())); 929 if (!context_up) 930 return Instance(); 931 932 std::unique_ptr<llvm::MCDisassembler> disasm_up( 933 curr_target->createMCDisassembler(*subtarget_info_up, *context_up)); 934 if (!disasm_up) 935 return Instance(); 936 937 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( 938 curr_target->createMCRelocationInfo(triple, *context_up)); 939 if (!rel_info_up) 940 return Instance(); 941 942 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( 943 curr_target->createMCSymbolizer( 944 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner, 945 context_up.get(), std::move(rel_info_up))); 946 disasm_up->setSymbolizer(std::move(symbolizer_up)); 947 948 unsigned asm_printer_variant = 949 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; 950 951 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( 952 curr_target->createMCInstPrinter(llvm::Triple{triple}, 953 asm_printer_variant, *asm_info_up, 954 *instr_info_up, *reg_info_up)); 955 if (!instr_printer_up) 956 return Instance(); 957 958 return Instance( 959 new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up), 960 std::move(subtarget_info_up), std::move(asm_info_up), 961 std::move(context_up), std::move(disasm_up), 962 std::move(instr_printer_up))); 963 } 964 965 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( 966 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 967 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 968 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 969 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 970 std::unique_ptr<llvm::MCContext> &&context_up, 971 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 972 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up) 973 : m_instr_info_up(std::move(instr_info_up)), 974 m_reg_info_up(std::move(reg_info_up)), 975 m_subtarget_info_up(std::move(subtarget_info_up)), 976 m_asm_info_up(std::move(asm_info_up)), 977 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), 978 m_instr_printer_up(std::move(instr_printer_up)) { 979 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && 980 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); 981 } 982 983 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( 984 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, 985 llvm::MCInst &mc_inst) const { 986 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); 987 llvm::MCDisassembler::DecodeStatus status; 988 989 uint64_t new_inst_size; 990 status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, 991 llvm::nulls()); 992 if (status == llvm::MCDisassembler::Success) 993 return new_inst_size; 994 else 995 return 0; 996 } 997 998 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( 999 llvm::MCInst &mc_inst, std::string &inst_string, 1000 std::string &comments_string) { 1001 llvm::raw_string_ostream inst_stream(inst_string); 1002 llvm::raw_string_ostream comments_stream(comments_string); 1003 1004 m_instr_printer_up->setCommentStream(comments_stream); 1005 m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(), 1006 *m_subtarget_info_up, inst_stream); 1007 m_instr_printer_up->setCommentStream(llvm::nulls()); 1008 comments_stream.flush(); 1009 1010 static std::string g_newlines("\r\n"); 1011 1012 for (size_t newline_pos = 0; 1013 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) != 1014 comments_string.npos; 1015 /**/) { 1016 comments_string.replace(comments_string.begin() + newline_pos, 1017 comments_string.begin() + newline_pos + 1, 1, ' '); 1018 } 1019 } 1020 1021 void DisassemblerLLVMC::MCDisasmInstance::SetStyle( 1022 bool use_hex_immed, HexImmediateStyle hex_style) { 1023 m_instr_printer_up->setPrintImmHex(use_hex_immed); 1024 switch (hex_style) { 1025 case eHexStyleC: 1026 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); 1027 break; 1028 case eHexStyleAsm: 1029 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); 1030 break; 1031 } 1032 } 1033 1034 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( 1035 llvm::MCInst &mc_inst) const { 1036 return m_instr_info_up->get(mc_inst.getOpcode()) 1037 .mayAffectControlFlow(mc_inst, *m_reg_info_up); 1038 } 1039 1040 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( 1041 llvm::MCInst &mc_inst) const { 1042 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot(); 1043 } 1044 1045 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { 1046 return m_instr_info_up->get(mc_inst.getOpcode()).isCall(); 1047 } 1048 1049 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { 1050 return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad(); 1051 } 1052 1053 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( 1054 llvm::MCInst &mc_inst) const { 1055 auto InstrDesc = m_instr_info_up->get(mc_inst.getOpcode()); 1056 1057 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 1058 // == 'a' + 'c') as authenticated instructions for reporting purposes, in 1059 // addition to the standard authenticated instructions specified in ARMv8.3. 1060 bool IsBrkC47x = false; 1061 if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { 1062 const llvm::MCOperand &Op0 = mc_inst.getOperand(0); 1063 if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) 1064 IsBrkC47x = true; 1065 } 1066 1067 return InstrDesc.isAuthenticated() || IsBrkC47x; 1068 } 1069 1070 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, 1071 const char *flavor_string) 1072 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), 1073 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), 1074 m_adrp_insn() { 1075 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) { 1076 m_flavor.assign("default"); 1077 } 1078 1079 unsigned flavor = ~0U; 1080 llvm::Triple triple = arch.GetTriple(); 1081 1082 // So far the only supported flavor is "intel" on x86. The base class will 1083 // set this correctly coming in. 1084 if (triple.getArch() == llvm::Triple::x86 || 1085 triple.getArch() == llvm::Triple::x86_64) { 1086 if (m_flavor == "intel") { 1087 flavor = 1; 1088 } else if (m_flavor == "att") { 1089 flavor = 0; 1090 } 1091 } 1092 1093 ArchSpec thumb_arch(arch); 1094 if (triple.getArch() == llvm::Triple::arm) { 1095 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); 1096 // Replace "arm" with "thumb" so we get all thumb variants correct 1097 if (thumb_arch_name.size() > 3) { 1098 thumb_arch_name.erase(0, 3); 1099 thumb_arch_name.insert(0, "thumb"); 1100 } else { 1101 thumb_arch_name = "thumbv9.3a"; 1102 } 1103 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); 1104 } 1105 1106 // If no sub architecture specified then use the most recent arm architecture 1107 // so the disassembler will return all instructions. Without it we will see a 1108 // lot of unknown opcodes if the code uses instructions which are not 1109 // available in the oldest arm version (which is used when no sub architecture 1110 // is specified). 1111 if (triple.getArch() == llvm::Triple::arm && 1112 triple.getSubArch() == llvm::Triple::NoSubArch) 1113 triple.setArchName("armv9.3a"); 1114 1115 std::string features_str; 1116 const char *triple_str = triple.getTriple().c_str(); 1117 1118 // ARM Cortex M0-M7 devices only execute thumb instructions 1119 if (arch.IsAlwaysThumbInstructions()) { 1120 triple_str = thumb_arch.GetTriple().getTriple().c_str(); 1121 features_str += "+fp-armv8,"; 1122 } 1123 1124 const char *cpu = ""; 1125 1126 switch (arch.GetCore()) { 1127 case ArchSpec::eCore_mips32: 1128 case ArchSpec::eCore_mips32el: 1129 cpu = "mips32"; 1130 break; 1131 case ArchSpec::eCore_mips32r2: 1132 case ArchSpec::eCore_mips32r2el: 1133 cpu = "mips32r2"; 1134 break; 1135 case ArchSpec::eCore_mips32r3: 1136 case ArchSpec::eCore_mips32r3el: 1137 cpu = "mips32r3"; 1138 break; 1139 case ArchSpec::eCore_mips32r5: 1140 case ArchSpec::eCore_mips32r5el: 1141 cpu = "mips32r5"; 1142 break; 1143 case ArchSpec::eCore_mips32r6: 1144 case ArchSpec::eCore_mips32r6el: 1145 cpu = "mips32r6"; 1146 break; 1147 case ArchSpec::eCore_mips64: 1148 case ArchSpec::eCore_mips64el: 1149 cpu = "mips64"; 1150 break; 1151 case ArchSpec::eCore_mips64r2: 1152 case ArchSpec::eCore_mips64r2el: 1153 cpu = "mips64r2"; 1154 break; 1155 case ArchSpec::eCore_mips64r3: 1156 case ArchSpec::eCore_mips64r3el: 1157 cpu = "mips64r3"; 1158 break; 1159 case ArchSpec::eCore_mips64r5: 1160 case ArchSpec::eCore_mips64r5el: 1161 cpu = "mips64r5"; 1162 break; 1163 case ArchSpec::eCore_mips64r6: 1164 case ArchSpec::eCore_mips64r6el: 1165 cpu = "mips64r6"; 1166 break; 1167 default: 1168 cpu = ""; 1169 break; 1170 } 1171 1172 if (arch.IsMIPS()) { 1173 uint32_t arch_flags = arch.GetFlags(); 1174 if (arch_flags & ArchSpec::eMIPSAse_msa) 1175 features_str += "+msa,"; 1176 if (arch_flags & ArchSpec::eMIPSAse_dsp) 1177 features_str += "+dsp,"; 1178 if (arch_flags & ArchSpec::eMIPSAse_dspr2) 1179 features_str += "+dspr2,"; 1180 } 1181 1182 // If any AArch64 variant, enable latest ISA with all extensions. 1183 if (triple.isAArch64()) { 1184 features_str += "+all,"; 1185 1186 if (triple.getVendor() == llvm::Triple::Apple) 1187 cpu = "apple-latest"; 1188 } 1189 1190 if (triple.isRISCV()) { 1191 uint32_t arch_flags = arch.GetFlags(); 1192 if (arch_flags & ArchSpec::eRISCV_rvc) 1193 features_str += "+c,"; 1194 if (arch_flags & ArchSpec::eRISCV_rve) 1195 features_str += "+e,"; 1196 if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == 1197 ArchSpec::eRISCV_float_abi_single) 1198 features_str += "+f,"; 1199 if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == 1200 ArchSpec::eRISCV_float_abi_double) 1201 features_str += "+f,+d,"; 1202 if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == 1203 ArchSpec::eRISCV_float_abi_quad) 1204 features_str += "+f,+d,+q,"; 1205 // FIXME: how do we detect features such as `+a`, `+m`? 1206 } 1207 1208 // We use m_disasm_up.get() to tell whether we are valid or not, so if this 1209 // isn't good for some reason, we won't be valid and FindPlugin will fail and 1210 // we won't get used. 1211 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(), 1212 flavor, *this); 1213 1214 llvm::Triple::ArchType llvm_arch = triple.getArch(); 1215 1216 // For arm CPUs that can execute arm or thumb instructions, also create a 1217 // thumb instruction disassembler. 1218 if (llvm_arch == llvm::Triple::arm) { 1219 std::string thumb_triple(thumb_arch.GetTriple().getTriple()); 1220 m_alternate_disasm_up = 1221 MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(), 1222 flavor, *this); 1223 if (!m_alternate_disasm_up) 1224 m_disasm_up.reset(); 1225 1226 } else if (arch.IsMIPS()) { 1227 /* Create alternate disassembler for MIPS16 and microMIPS */ 1228 uint32_t arch_flags = arch.GetFlags(); 1229 if (arch_flags & ArchSpec::eMIPSAse_mips16) 1230 features_str += "+mips16,"; 1231 else if (arch_flags & ArchSpec::eMIPSAse_micromips) 1232 features_str += "+micromips,"; 1233 1234 m_alternate_disasm_up = MCDisasmInstance::Create( 1235 triple_str, cpu, features_str.c_str(), flavor, *this); 1236 if (!m_alternate_disasm_up) 1237 m_disasm_up.reset(); 1238 } 1239 } 1240 1241 DisassemblerLLVMC::~DisassemblerLLVMC() = default; 1242 1243 Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, 1244 const char *flavor) { 1245 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { 1246 std::unique_ptr<DisassemblerLLVMC> disasm_up( 1247 new DisassemblerLLVMC(arch, flavor)); 1248 1249 if (disasm_up.get() && disasm_up->IsValid()) 1250 return disasm_up.release(); 1251 } 1252 return nullptr; 1253 } 1254 1255 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr, 1256 const DataExtractor &data, 1257 lldb::offset_t data_offset, 1258 size_t num_instructions, 1259 bool append, bool data_from_file) { 1260 if (!append) 1261 m_instruction_list.Clear(); 1262 1263 if (!IsValid()) 1264 return 0; 1265 1266 m_data_from_file = data_from_file; 1267 uint32_t data_cursor = data_offset; 1268 const size_t data_byte_size = data.GetByteSize(); 1269 uint32_t instructions_parsed = 0; 1270 Address inst_addr(base_addr); 1271 1272 while (data_cursor < data_byte_size && 1273 instructions_parsed < num_instructions) { 1274 1275 AddressClass address_class = AddressClass::eCode; 1276 1277 if (m_alternate_disasm_up) 1278 address_class = inst_addr.GetAddressClass(); 1279 1280 InstructionSP inst_sp( 1281 new InstructionLLVMC(*this, inst_addr, address_class)); 1282 1283 if (!inst_sp) 1284 break; 1285 1286 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor); 1287 1288 if (inst_size == 0) 1289 break; 1290 1291 m_instruction_list.Append(inst_sp); 1292 data_cursor += inst_size; 1293 inst_addr.Slide(inst_size); 1294 instructions_parsed++; 1295 } 1296 1297 return data_cursor - data_offset; 1298 } 1299 1300 void DisassemblerLLVMC::Initialize() { 1301 PluginManager::RegisterPlugin(GetPluginNameStatic(), 1302 "Disassembler that uses LLVM MC to disassemble " 1303 "i386, x86_64, ARM, and ARM64.", 1304 CreateInstance); 1305 1306 llvm::InitializeAllTargetInfos(); 1307 llvm::InitializeAllTargetMCs(); 1308 llvm::InitializeAllAsmParsers(); 1309 llvm::InitializeAllDisassemblers(); 1310 } 1311 1312 void DisassemblerLLVMC::Terminate() { 1313 PluginManager::UnregisterPlugin(CreateInstance); 1314 } 1315 1316 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, 1317 uint64_t offset, uint64_t size, 1318 int tag_type, void *tag_bug) { 1319 return static_cast<DisassemblerLLVMC *>(disassembler) 1320 ->OpInfo(pc, offset, size, tag_type, tag_bug); 1321 } 1322 1323 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, 1324 uint64_t value, 1325 uint64_t *type, uint64_t pc, 1326 const char **name) { 1327 return static_cast<DisassemblerLLVMC *>(disassembler) 1328 ->SymbolLookup(value, type, pc, name); 1329 } 1330 1331 bool DisassemblerLLVMC::FlavorValidForArchSpec( 1332 const lldb_private::ArchSpec &arch, const char *flavor) { 1333 llvm::Triple triple = arch.GetTriple(); 1334 if (flavor == nullptr || strcmp(flavor, "default") == 0) 1335 return true; 1336 1337 if (triple.getArch() == llvm::Triple::x86 || 1338 triple.getArch() == llvm::Triple::x86_64) { 1339 return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0; 1340 } else 1341 return false; 1342 } 1343 1344 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } 1345 1346 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, 1347 int tag_type, void *tag_bug) { 1348 switch (tag_type) { 1349 default: 1350 break; 1351 case 1: 1352 memset(tag_bug, 0, sizeof(::LLVMOpInfo1)); 1353 break; 1354 } 1355 return 0; 1356 } 1357 1358 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, 1359 uint64_t pc, const char **name) { 1360 if (*type_ptr) { 1361 if (m_exe_ctx && m_inst) { 1362 // std::string remove_this_prior_to_checkin; 1363 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; 1364 Address value_so_addr; 1365 Address pc_so_addr; 1366 if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || 1367 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || 1368 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { 1369 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { 1370 m_adrp_address = pc; 1371 m_adrp_insn = value; 1372 *name = nullptr; 1373 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1374 return nullptr; 1375 } 1376 // If this instruction is an ADD and 1377 // the previous instruction was an ADRP and 1378 // the ADRP's register and this ADD's register are the same, 1379 // then this is a pc-relative address calculation. 1380 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && 1381 m_adrp_insn && m_adrp_address == pc - 4 && 1382 (m_adrp_insn.getValue() & 0x1f) == ((value >> 5) & 0x1f)) { 1383 uint32_t addxri_inst; 1384 uint64_t adrp_imm, addxri_imm; 1385 // Get immlo and immhi bits, OR them together to get the ADRP imm 1386 // value. 1387 adrp_imm = ((m_adrp_insn.getValue() & 0x00ffffe0) >> 3) | 1388 ((m_adrp_insn.getValue() >> 29) & 0x3); 1389 // if high bit of immhi after right-shifting set, sign extend 1390 if (adrp_imm & (1ULL << 20)) 1391 adrp_imm |= ~((1ULL << 21) - 1); 1392 1393 addxri_inst = value; 1394 addxri_imm = (addxri_inst >> 10) & 0xfff; 1395 // check if 'sh' bit is set, shift imm value up if so 1396 // (this would make no sense, ADRP already gave us this part) 1397 if ((addxri_inst >> (12 + 5 + 5)) & 1) 1398 addxri_imm <<= 12; 1399 value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + 1400 addxri_imm; 1401 } 1402 m_adrp_address = LLDB_INVALID_ADDRESS; 1403 m_adrp_insn.reset(); 1404 } 1405 1406 if (m_inst->UsingFileAddress()) { 1407 ModuleSP module_sp(m_inst->GetAddress().GetModule()); 1408 if (module_sp) { 1409 module_sp->ResolveFileAddress(value, value_so_addr); 1410 module_sp->ResolveFileAddress(pc, pc_so_addr); 1411 } 1412 } else if (target && !target->GetSectionLoadList().IsEmpty()) { 1413 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr); 1414 target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr); 1415 } 1416 1417 SymbolContext sym_ctx; 1418 const SymbolContextItem resolve_scope = 1419 eSymbolContextFunction | eSymbolContextSymbol; 1420 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { 1421 pc_so_addr.GetModule()->ResolveSymbolContextForAddress( 1422 pc_so_addr, resolve_scope, sym_ctx); 1423 } 1424 1425 if (value_so_addr.IsValid() && value_so_addr.GetSection()) { 1426 StreamString ss; 1427 1428 bool format_omitting_current_func_name = false; 1429 if (sym_ctx.symbol || sym_ctx.function) { 1430 AddressRange range; 1431 if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) && 1432 range.GetBaseAddress().IsValid() && 1433 range.ContainsLoadAddress(value_so_addr, target)) { 1434 format_omitting_current_func_name = true; 1435 } 1436 } 1437 1438 // If the "value" address (the target address we're symbolicating) is 1439 // inside the same SymbolContext as the current instruction pc 1440 // (pc_so_addr), don't print the full function name - just print it 1441 // with DumpStyleNoFunctionName style, e.g. "<+36>". 1442 if (format_omitting_current_func_name) { 1443 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName, 1444 Address::DumpStyleSectionNameOffset); 1445 } else { 1446 value_so_addr.Dump( 1447 &ss, target, 1448 Address::DumpStyleResolvedDescriptionNoFunctionArguments, 1449 Address::DumpStyleSectionNameOffset); 1450 } 1451 1452 if (!ss.GetString().empty()) { 1453 // If Address::Dump returned a multi-line description, most commonly 1454 // seen when we have multiple levels of inlined functions at an 1455 // address, only show the first line. 1456 std::string str = std::string(ss.GetString()); 1457 size_t first_eol_char = str.find_first_of("\r\n"); 1458 if (first_eol_char != std::string::npos) { 1459 str.erase(first_eol_char); 1460 } 1461 m_inst->AppendComment(str); 1462 } 1463 } 1464 } 1465 } 1466 1467 // TODO: llvm-objdump sets the type_ptr to the 1468 // LLVMDisassembler_ReferenceType_Out_* values 1469 // based on where value_so_addr is pointing, with 1470 // Mach-O specific augmentations in MachODump.cpp. e.g. 1471 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand 1472 // handles. 1473 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1474 *name = nullptr; 1475 return nullptr; 1476 } 1477