1 //===-- Disassembler.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Disassembler.h"
10 
11 #include "lldb/Core/AddressRange.h"
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/Mangled.h"
15 #include "lldb/Core/Module.h"
16 #include "lldb/Core/ModuleList.h"
17 #include "lldb/Core/PluginManager.h"
18 #include "lldb/Core/SourceManager.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/OptionValue.h"
21 #include "lldb/Interpreter/OptionValueArray.h"
22 #include "lldb/Interpreter/OptionValueDictionary.h"
23 #include "lldb/Interpreter/OptionValueRegex.h"
24 #include "lldb/Interpreter/OptionValueString.h"
25 #include "lldb/Interpreter/OptionValueUInt64.h"
26 #include "lldb/Symbol/Function.h"
27 #include "lldb/Symbol/Symbol.h"
28 #include "lldb/Symbol/SymbolContext.h"
29 #include "lldb/Target/ExecutionContext.h"
30 #include "lldb/Target/SectionLoadList.h"
31 #include "lldb/Target/StackFrame.h"
32 #include "lldb/Target/Target.h"
33 #include "lldb/Target/Thread.h"
34 #include "lldb/Utility/DataBufferHeap.h"
35 #include "lldb/Utility/DataExtractor.h"
36 #include "lldb/Utility/RegularExpression.h"
37 #include "lldb/Utility/Status.h"
38 #include "lldb/Utility/Stream.h"
39 #include "lldb/Utility/StreamString.h"
40 #include "lldb/Utility/Timer.h"
41 #include "lldb/lldb-private-enumerations.h"
42 #include "lldb/lldb-private-interfaces.h"
43 #include "lldb/lldb-private-types.h"
44 #include "llvm/ADT/Triple.h"
45 #include "llvm/Support/Compiler.h"
46 
47 #include <cstdint>
48 #include <cstring>
49 #include <utility>
50 
51 #include <cassert>
52 
53 #define DEFAULT_DISASM_BYTE_SIZE 32
54 
55 using namespace lldb;
56 using namespace lldb_private;
57 
58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
59                                         const char *flavor,
60                                         const char *plugin_name) {
61   LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
62                      arch.GetArchitectureName(), plugin_name);
63 
64   DisassemblerCreateInstance create_callback = nullptr;
65 
66   if (plugin_name) {
67     ConstString const_plugin_name(plugin_name);
68     create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName(
69         const_plugin_name);
70     if (create_callback) {
71       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
72 
73       if (disassembler_sp)
74         return disassembler_sp;
75     }
76   } else {
77     for (uint32_t idx = 0;
78          (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
79               idx)) != nullptr;
80          ++idx) {
81       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
82 
83       if (disassembler_sp)
84         return disassembler_sp;
85     }
86   }
87   return DisassemblerSP();
88 }
89 
90 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target,
91                                                  const ArchSpec &arch,
92                                                  const char *flavor,
93                                                  const char *plugin_name) {
94   if (flavor == nullptr) {
95     // FIXME - we don't have the mechanism in place to do per-architecture
96     // settings.  But since we know that for now we only support flavors on x86
97     // & x86_64,
98     if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
99         arch.GetTriple().getArch() == llvm::Triple::x86_64)
100       flavor = target.GetDisassemblyFlavor();
101   }
102   return FindPlugin(arch, flavor, plugin_name);
103 }
104 
105 static Address ResolveAddress(Target &target, const Address &addr) {
106   if (!addr.IsSectionOffset()) {
107     Address resolved_addr;
108     // If we weren't passed in a section offset address range, try and resolve
109     // it to something
110     bool is_resolved = target.GetSectionLoadList().IsEmpty()
111                            ? target.GetImages().ResolveFileAddress(
112                                  addr.GetOffset(), resolved_addr)
113                            : target.GetSectionLoadList().ResolveLoadAddress(
114                                  addr.GetOffset(), resolved_addr);
115 
116     // We weren't able to resolve the address, just treat it as a raw address
117     if (is_resolved && resolved_addr.IsValid())
118       return resolved_addr;
119   }
120   return addr;
121 }
122 
123 lldb::DisassemblerSP Disassembler::DisassembleRange(
124     const ArchSpec &arch, const char *plugin_name, const char *flavor,
125     Target &target, const AddressRange &range, bool force_live_memory) {
126   if (range.GetByteSize() <= 0)
127     return {};
128 
129   if (!range.GetBaseAddress().IsValid())
130     return {};
131 
132   lldb::DisassemblerSP disasm_sp =
133       Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name);
134 
135   if (!disasm_sp)
136     return {};
137 
138   const size_t bytes_disassembled = disasm_sp->ParseInstructions(
139       target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
140       nullptr, force_live_memory);
141   if (bytes_disassembled == 0)
142     return {};
143 
144   return disasm_sp;
145 }
146 
147 lldb::DisassemblerSP
148 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
149                                const char *flavor, const Address &start,
150                                const void *src, size_t src_len,
151                                uint32_t num_instructions, bool data_from_file) {
152   if (!src)
153     return {};
154 
155   lldb::DisassemblerSP disasm_sp =
156       Disassembler::FindPlugin(arch, flavor, plugin_name);
157 
158   if (!disasm_sp)
159     return {};
160 
161   DataExtractor data(src, src_len, arch.GetByteOrder(),
162                      arch.GetAddressByteSize());
163 
164   (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false,
165                                       data_from_file);
166   return disasm_sp;
167 }
168 
169 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
170                                const char *plugin_name, const char *flavor,
171                                const ExecutionContext &exe_ctx,
172                                const Address &address, Limit limit,
173                                bool mixed_source_and_assembly,
174                                uint32_t num_mixed_context_lines,
175                                uint32_t options, Stream &strm) {
176   if (!exe_ctx.GetTargetPtr())
177     return false;
178 
179   lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
180       exe_ctx.GetTargetRef(), arch, flavor, plugin_name));
181   if (!disasm_sp)
182     return false;
183 
184   const bool force_live_memory = true;
185   size_t bytes_disassembled = disasm_sp->ParseInstructions(
186       exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory);
187   if (bytes_disassembled == 0)
188     return false;
189 
190   disasm_sp->PrintInstructions(debugger, arch, exe_ctx,
191                                mixed_source_and_assembly,
192                                num_mixed_context_lines, options, strm);
193   return true;
194 }
195 
196 Disassembler::SourceLine
197 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
198   if (!sc.function)
199     return {};
200 
201   if (!sc.line_entry.IsValid())
202     return {};
203 
204   LineEntry prologue_end_line = sc.line_entry;
205   FileSpec func_decl_file;
206   uint32_t func_decl_line;
207   sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
208 
209   if (func_decl_file != prologue_end_line.file &&
210       func_decl_file != prologue_end_line.original_file)
211     return {};
212 
213   SourceLine decl_line;
214   decl_line.file = func_decl_file;
215   decl_line.line = func_decl_line;
216   // TODO: Do we care about column on these entries?  If so, we need to plumb
217   // that through GetStartLineSourceInfo.
218   decl_line.column = 0;
219   return decl_line;
220 }
221 
222 void Disassembler::AddLineToSourceLineTables(
223     SourceLine &line,
224     std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
225   if (line.IsValid()) {
226     auto source_lines_seen_pos = source_lines_seen.find(line.file);
227     if (source_lines_seen_pos == source_lines_seen.end()) {
228       std::set<uint32_t> lines;
229       lines.insert(line.line);
230       source_lines_seen.emplace(line.file, lines);
231     } else {
232       source_lines_seen_pos->second.insert(line.line);
233     }
234   }
235 }
236 
237 bool Disassembler::ElideMixedSourceAndDisassemblyLine(
238     const ExecutionContext &exe_ctx, const SymbolContext &sc,
239     SourceLine &line) {
240 
241   // TODO: should we also check target.process.thread.step-avoid-libraries ?
242 
243   const RegularExpression *avoid_regex = nullptr;
244 
245   // Skip any line #0 entries - they are implementation details
246   if (line.line == 0)
247     return false;
248 
249   ThreadSP thread_sp = exe_ctx.GetThreadSP();
250   if (thread_sp) {
251     avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
252   } else {
253     TargetSP target_sp = exe_ctx.GetTargetSP();
254     if (target_sp) {
255       Status error;
256       OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
257           &exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
258       if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
259         OptionValueRegex *re = value_sp->GetAsRegex();
260         if (re) {
261           avoid_regex = re->GetCurrentValue();
262         }
263       }
264     }
265   }
266   if (avoid_regex && sc.symbol != nullptr) {
267     const char *function_name =
268         sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
269             .GetCString();
270     if (function_name && avoid_regex->Execute(function_name)) {
271       // skip this source line
272       return true;
273     }
274   }
275   // don't skip this source line
276   return false;
277 }
278 
279 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
280                                      const ExecutionContext &exe_ctx,
281                                      bool mixed_source_and_assembly,
282                                      uint32_t num_mixed_context_lines,
283                                      uint32_t options, Stream &strm) {
284   // We got some things disassembled...
285   size_t num_instructions_found = GetInstructionList().GetSize();
286 
287   const uint32_t max_opcode_byte_size =
288       GetInstructionList().GetMaxOpcocdeByteSize();
289   SymbolContext sc;
290   SymbolContext prev_sc;
291   AddressRange current_source_line_range;
292   const Address *pc_addr_ptr = nullptr;
293   StackFrame *frame = exe_ctx.GetFramePtr();
294 
295   TargetSP target_sp(exe_ctx.GetTargetSP());
296   SourceManager &source_manager =
297       target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
298 
299   if (frame) {
300     pc_addr_ptr = &frame->GetFrameCodeAddress();
301   }
302   const uint32_t scope =
303       eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
304   const bool use_inline_block_range = false;
305 
306   const FormatEntity::Entry *disassembly_format = nullptr;
307   FormatEntity::Entry format;
308   if (exe_ctx.HasTargetScope()) {
309     disassembly_format =
310         exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
311   } else {
312     FormatEntity::Parse("${addr}: ", format);
313     disassembly_format = &format;
314   }
315 
316   // First pass: step through the list of instructions, find how long the
317   // initial addresses strings are, insert padding in the second pass so the
318   // opcodes all line up nicely.
319 
320   // Also build up the source line mapping if this is mixed source & assembly
321   // mode. Calculate the source line for each assembly instruction (eliding
322   // inlined functions which the user wants to skip).
323 
324   std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
325   Symbol *previous_symbol = nullptr;
326 
327   size_t address_text_size = 0;
328   for (size_t i = 0; i < num_instructions_found; ++i) {
329     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
330     if (inst) {
331       const Address &addr = inst->GetAddress();
332       ModuleSP module_sp(addr.GetModule());
333       if (module_sp) {
334         const SymbolContextItem resolve_mask = eSymbolContextFunction |
335                                                eSymbolContextSymbol |
336                                                eSymbolContextLineEntry;
337         uint32_t resolved_mask =
338             module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
339         if (resolved_mask) {
340           StreamString strmstr;
341           Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
342                                               &exe_ctx, &addr, strmstr);
343           size_t cur_line = strmstr.GetSizeOfLastLine();
344           if (cur_line > address_text_size)
345             address_text_size = cur_line;
346 
347           // Add entries to our "source_lines_seen" map+set which list which
348           // sources lines occur in this disassembly session.  We will print
349           // lines of context around a source line, but we don't want to print
350           // a source line that has a line table entry of its own - we'll leave
351           // that source line to be printed when it actually occurs in the
352           // disassembly.
353 
354           if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
355             if (sc.symbol != previous_symbol) {
356               SourceLine decl_line = GetFunctionDeclLineEntry(sc);
357               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
358                 AddLineToSourceLineTables(decl_line, source_lines_seen);
359             }
360             if (sc.line_entry.IsValid()) {
361               SourceLine this_line;
362               this_line.file = sc.line_entry.file;
363               this_line.line = sc.line_entry.line;
364               this_line.column = sc.line_entry.column;
365               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
366                 AddLineToSourceLineTables(this_line, source_lines_seen);
367             }
368           }
369         }
370         sc.Clear(false);
371       }
372     }
373   }
374 
375   previous_symbol = nullptr;
376   SourceLine previous_line;
377   for (size_t i = 0; i < num_instructions_found; ++i) {
378     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
379 
380     if (inst) {
381       const Address &addr = inst->GetAddress();
382       const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
383       SourceLinesToDisplay source_lines_to_display;
384 
385       prev_sc = sc;
386 
387       ModuleSP module_sp(addr.GetModule());
388       if (module_sp) {
389         uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
390             addr, eSymbolContextEverything, sc);
391         if (resolved_mask) {
392           if (mixed_source_and_assembly) {
393 
394             // If we've started a new function (non-inlined), print all of the
395             // source lines from the function declaration until the first line
396             // table entry - typically the opening curly brace of the function.
397             if (previous_symbol != sc.symbol) {
398               // The default disassembly format puts an extra blank line
399               // between functions - so when we're displaying the source
400               // context for a function, we don't want to add a blank line
401               // after the source context or we'll end up with two of them.
402               if (previous_symbol != nullptr)
403                 source_lines_to_display.print_source_context_end_eol = false;
404 
405               previous_symbol = sc.symbol;
406               if (sc.function && sc.line_entry.IsValid()) {
407                 LineEntry prologue_end_line = sc.line_entry;
408                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
409                                                         prologue_end_line)) {
410                   FileSpec func_decl_file;
411                   uint32_t func_decl_line;
412                   sc.function->GetStartLineSourceInfo(func_decl_file,
413                                                       func_decl_line);
414                   if (func_decl_file == prologue_end_line.file ||
415                       func_decl_file == prologue_end_line.original_file) {
416                     // Add all the lines between the function declaration and
417                     // the first non-prologue source line to the list of lines
418                     // to print.
419                     for (uint32_t lineno = func_decl_line;
420                          lineno <= prologue_end_line.line; lineno++) {
421                       SourceLine this_line;
422                       this_line.file = func_decl_file;
423                       this_line.line = lineno;
424                       source_lines_to_display.lines.push_back(this_line);
425                     }
426                     // Mark the last line as the "current" one.  Usually this
427                     // is the open curly brace.
428                     if (source_lines_to_display.lines.size() > 0)
429                       source_lines_to_display.current_source_line =
430                           source_lines_to_display.lines.size() - 1;
431                   }
432                 }
433               }
434               sc.GetAddressRange(scope, 0, use_inline_block_range,
435                                  current_source_line_range);
436             }
437 
438             // If we've left a previous source line's address range, print a
439             // new source line
440             if (!current_source_line_range.ContainsFileAddress(addr)) {
441               sc.GetAddressRange(scope, 0, use_inline_block_range,
442                                  current_source_line_range);
443 
444               if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
445                 SourceLine this_line;
446                 this_line.file = sc.line_entry.file;
447                 this_line.line = sc.line_entry.line;
448 
449                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
450                                                         this_line)) {
451                   // Only print this source line if it is different from the
452                   // last source line we printed.  There may have been inlined
453                   // functions between these lines that we elided, resulting in
454                   // the same line being printed twice in a row for a
455                   // contiguous block of assembly instructions.
456                   if (this_line != previous_line) {
457 
458                     std::vector<uint32_t> previous_lines;
459                     for (uint32_t i = 0;
460                          i < num_mixed_context_lines &&
461                          (this_line.line - num_mixed_context_lines) > 0;
462                          i++) {
463                       uint32_t line =
464                           this_line.line - num_mixed_context_lines + i;
465                       auto pos = source_lines_seen.find(this_line.file);
466                       if (pos != source_lines_seen.end()) {
467                         if (pos->second.count(line) == 1) {
468                           previous_lines.clear();
469                         } else {
470                           previous_lines.push_back(line);
471                         }
472                       }
473                     }
474                     for (size_t i = 0; i < previous_lines.size(); i++) {
475                       SourceLine previous_line;
476                       previous_line.file = this_line.file;
477                       previous_line.line = previous_lines[i];
478                       auto pos = source_lines_seen.find(previous_line.file);
479                       if (pos != source_lines_seen.end()) {
480                         pos->second.insert(previous_line.line);
481                       }
482                       source_lines_to_display.lines.push_back(previous_line);
483                     }
484 
485                     source_lines_to_display.lines.push_back(this_line);
486                     source_lines_to_display.current_source_line =
487                         source_lines_to_display.lines.size() - 1;
488 
489                     for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
490                       SourceLine next_line;
491                       next_line.file = this_line.file;
492                       next_line.line = this_line.line + i + 1;
493                       auto pos = source_lines_seen.find(next_line.file);
494                       if (pos != source_lines_seen.end()) {
495                         if (pos->second.count(next_line.line) == 1)
496                           break;
497                         pos->second.insert(next_line.line);
498                       }
499                       source_lines_to_display.lines.push_back(next_line);
500                     }
501                   }
502                   previous_line = this_line;
503                 }
504               }
505             }
506           }
507         } else {
508           sc.Clear(true);
509         }
510       }
511 
512       if (source_lines_to_display.lines.size() > 0) {
513         strm.EOL();
514         for (size_t idx = 0; idx < source_lines_to_display.lines.size();
515              idx++) {
516           SourceLine ln = source_lines_to_display.lines[idx];
517           const char *line_highlight = "";
518           if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
519             line_highlight = "->";
520           } else if (idx == source_lines_to_display.current_source_line) {
521             line_highlight = "**";
522           }
523           source_manager.DisplaySourceLinesWithLineNumbers(
524               ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
525         }
526         if (source_lines_to_display.print_source_context_end_eol)
527           strm.EOL();
528       }
529 
530       const bool show_bytes = (options & eOptionShowBytes) != 0;
531       inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc,
532                  &prev_sc, nullptr, address_text_size);
533       strm.EOL();
534     } else {
535       break;
536     }
537   }
538 }
539 
540 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
541                                StackFrame &frame, Stream &strm) {
542   AddressRange range;
543   SymbolContext sc(
544       frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
545   if (sc.function) {
546     range = sc.function->GetAddressRange();
547   } else if (sc.symbol && sc.symbol->ValueIsAddress()) {
548     range.GetBaseAddress() = sc.symbol->GetAddressRef();
549     range.SetByteSize(sc.symbol->GetByteSize());
550   } else {
551     range.GetBaseAddress() = frame.GetFrameCodeAddress();
552   }
553 
554     if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
555       range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
556 
557     Disassembler::Limit limit = {Disassembler::Limit::Bytes,
558                                  range.GetByteSize()};
559     if (limit.value == 0)
560       limit.value = DEFAULT_DISASM_BYTE_SIZE;
561 
562     return Disassemble(debugger, arch, nullptr, nullptr, frame,
563                        range.GetBaseAddress(), limit, false, 0, 0, strm);
564 }
565 
566 Instruction::Instruction(const Address &address, AddressClass addr_class)
567     : m_address(address), m_address_class(addr_class), m_opcode(),
568       m_calculated_strings(false) {}
569 
570 Instruction::~Instruction() = default;
571 
572 AddressClass Instruction::GetAddressClass() {
573   if (m_address_class == AddressClass::eInvalid)
574     m_address_class = m_address.GetAddressClass();
575   return m_address_class;
576 }
577 
578 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
579                        bool show_address, bool show_bytes,
580                        const ExecutionContext *exe_ctx,
581                        const SymbolContext *sym_ctx,
582                        const SymbolContext *prev_sym_ctx,
583                        const FormatEntity::Entry *disassembly_addr_format,
584                        size_t max_address_text_size) {
585   size_t opcode_column_width = 7;
586   const size_t operand_column_width = 25;
587 
588   CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
589 
590   StreamString ss;
591 
592   if (show_address) {
593     Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
594                                         prev_sym_ctx, exe_ctx, &m_address, ss);
595     ss.FillLastLineToColumn(max_address_text_size, ' ');
596   }
597 
598   if (show_bytes) {
599     if (m_opcode.GetType() == Opcode::eTypeBytes) {
600       // x86_64 and i386 are the only ones that use bytes right now so pad out
601       // the byte dump to be able to always show 15 bytes (3 chars each) plus a
602       // space
603       if (max_opcode_byte_size > 0)
604         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
605       else
606         m_opcode.Dump(&ss, 15 * 3 + 1);
607     } else {
608       // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
609       // (10 spaces) plus two for padding...
610       if (max_opcode_byte_size > 0)
611         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
612       else
613         m_opcode.Dump(&ss, 12);
614     }
615   }
616 
617   const size_t opcode_pos = ss.GetSizeOfLastLine();
618 
619   // The default opcode size of 7 characters is plenty for most architectures
620   // but some like arm can pull out the occasional vqrshrun.s16.  We won't get
621   // consistent column spacing in these cases, unfortunately.
622   if (m_opcode_name.length() >= opcode_column_width) {
623     opcode_column_width = m_opcode_name.length() + 1;
624   }
625 
626   ss.PutCString(m_opcode_name);
627   ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
628   ss.PutCString(m_mnemonics);
629 
630   if (!m_comment.empty()) {
631     ss.FillLastLineToColumn(
632         opcode_pos + opcode_column_width + operand_column_width, ' ');
633     ss.PutCString(" ; ");
634     ss.PutCString(m_comment);
635   }
636   s->PutCString(ss.GetString());
637 }
638 
639 bool Instruction::DumpEmulation(const ArchSpec &arch) {
640   std::unique_ptr<EmulateInstruction> insn_emulator_up(
641       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
642   if (insn_emulator_up) {
643     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
644     return insn_emulator_up->EvaluateInstruction(0);
645   }
646 
647   return false;
648 }
649 
650 bool Instruction::CanSetBreakpoint () {
651   return !HasDelaySlot();
652 }
653 
654 bool Instruction::HasDelaySlot() {
655   // Default is false.
656   return false;
657 }
658 
659 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
660                                      OptionValue::Type data_type) {
661   bool done = false;
662   char buffer[1024];
663 
664   auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
665 
666   int idx = 0;
667   while (!done) {
668     if (!fgets(buffer, 1023, in_file)) {
669       out_stream->Printf(
670           "Instruction::ReadArray:  Error reading file (fgets).\n");
671       option_value_sp.reset();
672       return option_value_sp;
673     }
674 
675     std::string line(buffer);
676 
677     size_t len = line.size();
678     if (line[len - 1] == '\n') {
679       line[len - 1] = '\0';
680       line.resize(len - 1);
681     }
682 
683     if ((line.size() == 1) && line[0] == ']') {
684       done = true;
685       line.clear();
686     }
687 
688     if (!line.empty()) {
689       std::string value;
690       static RegularExpression g_reg_exp(
691           llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
692       llvm::SmallVector<llvm::StringRef, 2> matches;
693       if (g_reg_exp.Execute(line, &matches))
694         value = matches[1].str();
695       else
696         value = line;
697 
698       OptionValueSP data_value_sp;
699       switch (data_type) {
700       case OptionValue::eTypeUInt64:
701         data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
702         data_value_sp->SetValueFromString(value);
703         break;
704       // Other types can be added later as needed.
705       default:
706         data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
707         break;
708       }
709 
710       option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
711       ++idx;
712     }
713   }
714 
715   return option_value_sp;
716 }
717 
718 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
719   bool done = false;
720   char buffer[1024];
721 
722   auto option_value_sp = std::make_shared<OptionValueDictionary>();
723   static ConstString encoding_key("data_encoding");
724   OptionValue::Type data_type = OptionValue::eTypeInvalid;
725 
726   while (!done) {
727     // Read the next line in the file
728     if (!fgets(buffer, 1023, in_file)) {
729       out_stream->Printf(
730           "Instruction::ReadDictionary: Error reading file (fgets).\n");
731       option_value_sp.reset();
732       return option_value_sp;
733     }
734 
735     // Check to see if the line contains the end-of-dictionary marker ("}")
736     std::string line(buffer);
737 
738     size_t len = line.size();
739     if (line[len - 1] == '\n') {
740       line[len - 1] = '\0';
741       line.resize(len - 1);
742     }
743 
744     if ((line.size() == 1) && (line[0] == '}')) {
745       done = true;
746       line.clear();
747     }
748 
749     // Try to find a key-value pair in the current line and add it to the
750     // dictionary.
751     if (!line.empty()) {
752       static RegularExpression g_reg_exp(llvm::StringRef(
753           "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
754 
755       llvm::SmallVector<llvm::StringRef, 3> matches;
756 
757       bool reg_exp_success = g_reg_exp.Execute(line, &matches);
758       std::string key;
759       std::string value;
760       if (reg_exp_success) {
761         key = matches[1].str();
762         value = matches[2].str();
763       } else {
764         out_stream->Printf("Instruction::ReadDictionary: Failure executing "
765                            "regular expression.\n");
766         option_value_sp.reset();
767         return option_value_sp;
768       }
769 
770       ConstString const_key(key.c_str());
771       // Check value to see if it's the start of an array or dictionary.
772 
773       lldb::OptionValueSP value_sp;
774       assert(value.empty() == false);
775       assert(key.empty() == false);
776 
777       if (value[0] == '{') {
778         assert(value.size() == 1);
779         // value is a dictionary
780         value_sp = ReadDictionary(in_file, out_stream);
781         if (!value_sp) {
782           option_value_sp.reset();
783           return option_value_sp;
784         }
785       } else if (value[0] == '[') {
786         assert(value.size() == 1);
787         // value is an array
788         value_sp = ReadArray(in_file, out_stream, data_type);
789         if (!value_sp) {
790           option_value_sp.reset();
791           return option_value_sp;
792         }
793         // We've used the data_type to read an array; re-set the type to
794         // Invalid
795         data_type = OptionValue::eTypeInvalid;
796       } else if ((value[0] == '0') && (value[1] == 'x')) {
797         value_sp = std::make_shared<OptionValueUInt64>(0, 0);
798         value_sp->SetValueFromString(value);
799       } else {
800         size_t len = value.size();
801         if ((value[0] == '"') && (value[len - 1] == '"'))
802           value = value.substr(1, len - 2);
803         value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
804       }
805 
806       if (const_key == encoding_key) {
807         // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
808         // indicating the
809         // data type of an upcoming array (usually the next bit of data to be
810         // read in).
811         if (strcmp(value.c_str(), "uint32_t") == 0)
812           data_type = OptionValue::eTypeUInt64;
813       } else
814         option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
815                                                            false);
816     }
817   }
818 
819   return option_value_sp;
820 }
821 
822 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
823   if (!out_stream)
824     return false;
825 
826   if (!file_name) {
827     out_stream->Printf("Instruction::TestEmulation:  Missing file_name.");
828     return false;
829   }
830   FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
831   if (!test_file) {
832     out_stream->Printf(
833         "Instruction::TestEmulation: Attempt to open test file failed.");
834     return false;
835   }
836 
837   char buffer[256];
838   if (!fgets(buffer, 255, test_file)) {
839     out_stream->Printf(
840         "Instruction::TestEmulation: Error reading first line of test file.\n");
841     fclose(test_file);
842     return false;
843   }
844 
845   if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
846     out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
847                        "emulation state dictionary\n");
848     fclose(test_file);
849     return false;
850   }
851 
852   // Read all the test information from the test file into an
853   // OptionValueDictionary.
854 
855   OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
856   if (!data_dictionary_sp) {
857     out_stream->Printf(
858         "Instruction::TestEmulation:  Error reading Dictionary Object.\n");
859     fclose(test_file);
860     return false;
861   }
862 
863   fclose(test_file);
864 
865   OptionValueDictionary *data_dictionary =
866       data_dictionary_sp->GetAsDictionary();
867   static ConstString description_key("assembly_string");
868   static ConstString triple_key("triple");
869 
870   OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
871 
872   if (!value_sp) {
873     out_stream->Printf("Instruction::TestEmulation:  Test file does not "
874                        "contain description string.\n");
875     return false;
876   }
877 
878   SetDescription(value_sp->GetStringValue());
879 
880   value_sp = data_dictionary->GetValueForKey(triple_key);
881   if (!value_sp) {
882     out_stream->Printf(
883         "Instruction::TestEmulation: Test file does not contain triple.\n");
884     return false;
885   }
886 
887   ArchSpec arch;
888   arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
889 
890   bool success = false;
891   std::unique_ptr<EmulateInstruction> insn_emulator_up(
892       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
893   if (insn_emulator_up)
894     success =
895         insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
896 
897   if (success)
898     out_stream->Printf("Emulation test succeeded.");
899   else
900     out_stream->Printf("Emulation test failed.");
901 
902   return success;
903 }
904 
905 bool Instruction::Emulate(
906     const ArchSpec &arch, uint32_t evaluate_options, void *baton,
907     EmulateInstruction::ReadMemoryCallback read_mem_callback,
908     EmulateInstruction::WriteMemoryCallback write_mem_callback,
909     EmulateInstruction::ReadRegisterCallback read_reg_callback,
910     EmulateInstruction::WriteRegisterCallback write_reg_callback) {
911   std::unique_ptr<EmulateInstruction> insn_emulator_up(
912       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
913   if (insn_emulator_up) {
914     insn_emulator_up->SetBaton(baton);
915     insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
916                                    read_reg_callback, write_reg_callback);
917     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
918     return insn_emulator_up->EvaluateInstruction(evaluate_options);
919   }
920 
921   return false;
922 }
923 
924 uint32_t Instruction::GetData(DataExtractor &data) {
925   return m_opcode.GetData(data);
926 }
927 
928 InstructionList::InstructionList() : m_instructions() {}
929 
930 InstructionList::~InstructionList() = default;
931 
932 size_t InstructionList::GetSize() const { return m_instructions.size(); }
933 
934 uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
935   uint32_t max_inst_size = 0;
936   collection::const_iterator pos, end;
937   for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
938        ++pos) {
939     uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
940     if (max_inst_size < inst_size)
941       max_inst_size = inst_size;
942   }
943   return max_inst_size;
944 }
945 
946 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
947   InstructionSP inst_sp;
948   if (idx < m_instructions.size())
949     inst_sp = m_instructions[idx];
950   return inst_sp;
951 }
952 
953 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) {
954   uint32_t index = GetIndexOfInstructionAtAddress(address);
955   if (index != UINT32_MAX)
956     return GetInstructionAtIndex(index);
957   return nullptr;
958 }
959 
960 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
961                            const ExecutionContext *exe_ctx) {
962   const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
963   collection::const_iterator pos, begin, end;
964 
965   const FormatEntity::Entry *disassembly_format = nullptr;
966   FormatEntity::Entry format;
967   if (exe_ctx && exe_ctx->HasTargetScope()) {
968     disassembly_format =
969         exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
970   } else {
971     FormatEntity::Parse("${addr}: ", format);
972     disassembly_format = &format;
973   }
974 
975   for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
976        pos != end; ++pos) {
977     if (pos != begin)
978       s->EOL();
979     (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx,
980                  nullptr, nullptr, disassembly_format, 0);
981   }
982 }
983 
984 void InstructionList::Clear() { m_instructions.clear(); }
985 
986 void InstructionList::Append(lldb::InstructionSP &inst_sp) {
987   if (inst_sp)
988     m_instructions.push_back(inst_sp);
989 }
990 
991 uint32_t
992 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
993                                                  bool ignore_calls,
994                                                  bool *found_calls) const {
995   size_t num_instructions = m_instructions.size();
996 
997   uint32_t next_branch = UINT32_MAX;
998 
999   if (found_calls)
1000     *found_calls = false;
1001   for (size_t i = start; i < num_instructions; i++) {
1002     if (m_instructions[i]->DoesBranch()) {
1003       if (ignore_calls && m_instructions[i]->IsCall()) {
1004         if (found_calls)
1005           *found_calls = true;
1006         continue;
1007       }
1008       next_branch = i;
1009       break;
1010     }
1011   }
1012 
1013   return next_branch;
1014 }
1015 
1016 uint32_t
1017 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
1018   size_t num_instructions = m_instructions.size();
1019   uint32_t index = UINT32_MAX;
1020   for (size_t i = 0; i < num_instructions; i++) {
1021     if (m_instructions[i]->GetAddress() == address) {
1022       index = i;
1023       break;
1024     }
1025   }
1026   return index;
1027 }
1028 
1029 uint32_t
1030 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
1031                                                     Target &target) {
1032   Address address;
1033   address.SetLoadAddress(load_addr, &target);
1034   return GetIndexOfInstructionAtAddress(address);
1035 }
1036 
1037 size_t Disassembler::ParseInstructions(Target &target, Address start,
1038                                        Limit limit, Stream *error_strm_ptr,
1039                                        bool force_live_memory) {
1040   m_instruction_list.Clear();
1041 
1042   if (!start.IsValid())
1043     return 0;
1044 
1045   start = ResolveAddress(target, start);
1046 
1047   addr_t byte_size = limit.value;
1048   if (limit.kind == Limit::Instructions)
1049     byte_size *= m_arch.GetMaximumOpcodeByteSize();
1050   auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
1051 
1052   Status error;
1053   lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1054   const size_t bytes_read =
1055       target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(),
1056                         error, force_live_memory, &load_addr);
1057   const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1058 
1059   if (bytes_read == 0) {
1060     if (error_strm_ptr) {
1061       if (const char *error_cstr = error.AsCString())
1062         error_strm_ptr->Printf("error: %s\n", error_cstr);
1063     }
1064     return 0;
1065   }
1066 
1067   if (bytes_read != data_sp->GetByteSize())
1068     data_sp->SetByteSize(bytes_read);
1069   DataExtractor data(data_sp, m_arch.GetByteOrder(),
1070                      m_arch.GetAddressByteSize());
1071   return DecodeInstructions(start, data, 0,
1072                             limit.kind == Limit::Instructions ? limit.value
1073                                                               : UINT32_MAX,
1074                             false, data_from_file);
1075 }
1076 
1077 // Disassembler copy constructor
1078 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
1079     : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
1080       m_flavor() {
1081   if (flavor == nullptr)
1082     m_flavor.assign("default");
1083   else
1084     m_flavor.assign(flavor);
1085 
1086   // If this is an arm variant that can only include thumb (T16, T32)
1087   // instructions, force the arch triple to be "thumbv.." instead of "armv..."
1088   if (arch.IsAlwaysThumbInstructions()) {
1089     std::string thumb_arch_name(arch.GetTriple().getArchName().str());
1090     // Replace "arm" with "thumb" so we get all thumb variants correct
1091     if (thumb_arch_name.size() > 3) {
1092       thumb_arch_name.erase(0, 3);
1093       thumb_arch_name.insert(0, "thumb");
1094     }
1095     m_arch.SetTriple(thumb_arch_name.c_str());
1096   }
1097 }
1098 
1099 Disassembler::~Disassembler() = default;
1100 
1101 InstructionList &Disassembler::GetInstructionList() {
1102   return m_instruction_list;
1103 }
1104 
1105 const InstructionList &Disassembler::GetInstructionList() const {
1106   return m_instruction_list;
1107 }
1108 
1109 // Class PseudoInstruction
1110 
1111 PseudoInstruction::PseudoInstruction()
1112     : Instruction(Address(), AddressClass::eUnknown), m_description() {}
1113 
1114 PseudoInstruction::~PseudoInstruction() = default;
1115 
1116 bool PseudoInstruction::DoesBranch() {
1117   // This is NOT a valid question for a pseudo instruction.
1118   return false;
1119 }
1120 
1121 bool PseudoInstruction::HasDelaySlot() {
1122   // This is NOT a valid question for a pseudo instruction.
1123   return false;
1124 }
1125 
1126 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
1127                                  const lldb_private::DataExtractor &data,
1128                                  lldb::offset_t data_offset) {
1129   return m_opcode.GetByteSize();
1130 }
1131 
1132 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
1133   if (!opcode_data)
1134     return;
1135 
1136   switch (opcode_size) {
1137   case 8: {
1138     uint8_t value8 = *((uint8_t *)opcode_data);
1139     m_opcode.SetOpcode8(value8, eByteOrderInvalid);
1140     break;
1141   }
1142   case 16: {
1143     uint16_t value16 = *((uint16_t *)opcode_data);
1144     m_opcode.SetOpcode16(value16, eByteOrderInvalid);
1145     break;
1146   }
1147   case 32: {
1148     uint32_t value32 = *((uint32_t *)opcode_data);
1149     m_opcode.SetOpcode32(value32, eByteOrderInvalid);
1150     break;
1151   }
1152   case 64: {
1153     uint64_t value64 = *((uint64_t *)opcode_data);
1154     m_opcode.SetOpcode64(value64, eByteOrderInvalid);
1155     break;
1156   }
1157   default:
1158     break;
1159   }
1160 }
1161 
1162 void PseudoInstruction::SetDescription(llvm::StringRef description) {
1163   m_description = std::string(description);
1164 }
1165 
1166 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
1167   Operand ret;
1168   ret.m_type = Type::Register;
1169   ret.m_register = r;
1170   return ret;
1171 }
1172 
1173 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
1174                                                           bool neg) {
1175   Operand ret;
1176   ret.m_type = Type::Immediate;
1177   ret.m_immediate = imm;
1178   ret.m_negative = neg;
1179   return ret;
1180 }
1181 
1182 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
1183   Operand ret;
1184   ret.m_type = Type::Immediate;
1185   if (imm < 0) {
1186     ret.m_immediate = -imm;
1187     ret.m_negative = true;
1188   } else {
1189     ret.m_immediate = imm;
1190     ret.m_negative = false;
1191   }
1192   return ret;
1193 }
1194 
1195 Instruction::Operand
1196 Instruction::Operand::BuildDereference(const Operand &ref) {
1197   Operand ret;
1198   ret.m_type = Type::Dereference;
1199   ret.m_children = {ref};
1200   return ret;
1201 }
1202 
1203 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
1204                                                     const Operand &rhs) {
1205   Operand ret;
1206   ret.m_type = Type::Sum;
1207   ret.m_children = {lhs, rhs};
1208   return ret;
1209 }
1210 
1211 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
1212                                                         const Operand &rhs) {
1213   Operand ret;
1214   ret.m_type = Type::Product;
1215   ret.m_children = {lhs, rhs};
1216   return ret;
1217 }
1218 
1219 std::function<bool(const Instruction::Operand &)>
1220 lldb_private::OperandMatchers::MatchBinaryOp(
1221     std::function<bool(const Instruction::Operand &)> base,
1222     std::function<bool(const Instruction::Operand &)> left,
1223     std::function<bool(const Instruction::Operand &)> right) {
1224   return [base, left, right](const Instruction::Operand &op) -> bool {
1225     return (base(op) && op.m_children.size() == 2 &&
1226             ((left(op.m_children[0]) && right(op.m_children[1])) ||
1227              (left(op.m_children[1]) && right(op.m_children[0]))));
1228   };
1229 }
1230 
1231 std::function<bool(const Instruction::Operand &)>
1232 lldb_private::OperandMatchers::MatchUnaryOp(
1233     std::function<bool(const Instruction::Operand &)> base,
1234     std::function<bool(const Instruction::Operand &)> child) {
1235   return [base, child](const Instruction::Operand &op) -> bool {
1236     return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
1237   };
1238 }
1239 
1240 std::function<bool(const Instruction::Operand &)>
1241 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
1242   return [&info](const Instruction::Operand &op) {
1243     return (op.m_type == Instruction::Operand::Type::Register &&
1244             (op.m_register == ConstString(info.name) ||
1245              op.m_register == ConstString(info.alt_name)));
1246   };
1247 }
1248 
1249 std::function<bool(const Instruction::Operand &)>
1250 lldb_private::OperandMatchers::FetchRegOp(ConstString &reg) {
1251   return [&reg](const Instruction::Operand &op) {
1252     if (op.m_type != Instruction::Operand::Type::Register) {
1253       return false;
1254     }
1255     reg = op.m_register;
1256     return true;
1257   };
1258 }
1259 
1260 std::function<bool(const Instruction::Operand &)>
1261 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
1262   return [imm](const Instruction::Operand &op) {
1263     return (op.m_type == Instruction::Operand::Type::Immediate &&
1264             ((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
1265              (!op.m_negative && op.m_immediate == (uint64_t)imm)));
1266   };
1267 }
1268 
1269 std::function<bool(const Instruction::Operand &)>
1270 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
1271   return [&imm](const Instruction::Operand &op) {
1272     if (op.m_type != Instruction::Operand::Type::Immediate) {
1273       return false;
1274     }
1275     if (op.m_negative) {
1276       imm = -((int64_t)op.m_immediate);
1277     } else {
1278       imm = ((int64_t)op.m_immediate);
1279     }
1280     return true;
1281   };
1282 }
1283 
1284 std::function<bool(const Instruction::Operand &)>
1285 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
1286   return [type](const Instruction::Operand &op) { return op.m_type == type; };
1287 }
1288