1 //===-- Disassembler.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Disassembler.h"
10 
11 #include "lldb/Core/AddressRange.h"
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/Mangled.h"
15 #include "lldb/Core/Module.h"
16 #include "lldb/Core/ModuleList.h"
17 #include "lldb/Core/PluginManager.h"
18 #include "lldb/Core/SourceManager.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/OptionValue.h"
21 #include "lldb/Interpreter/OptionValueArray.h"
22 #include "lldb/Interpreter/OptionValueDictionary.h"
23 #include "lldb/Interpreter/OptionValueRegex.h"
24 #include "lldb/Interpreter/OptionValueString.h"
25 #include "lldb/Interpreter/OptionValueUInt64.h"
26 #include "lldb/Symbol/Function.h"
27 #include "lldb/Symbol/Symbol.h"
28 #include "lldb/Symbol/SymbolContext.h"
29 #include "lldb/Target/ExecutionContext.h"
30 #include "lldb/Target/SectionLoadList.h"
31 #include "lldb/Target/StackFrame.h"
32 #include "lldb/Target/Target.h"
33 #include "lldb/Target/Thread.h"
34 #include "lldb/Utility/DataBufferHeap.h"
35 #include "lldb/Utility/DataExtractor.h"
36 #include "lldb/Utility/RegularExpression.h"
37 #include "lldb/Utility/Status.h"
38 #include "lldb/Utility/Stream.h"
39 #include "lldb/Utility/StreamString.h"
40 #include "lldb/Utility/Timer.h"
41 #include "lldb/lldb-private-enumerations.h"
42 #include "lldb/lldb-private-interfaces.h"
43 #include "lldb/lldb-private-types.h"
44 #include "llvm/ADT/Triple.h"
45 #include "llvm/Support/Compiler.h"
46 
47 #include <cstdint>
48 #include <cstring>
49 #include <utility>
50 
51 #include <cassert>
52 
53 #define DEFAULT_DISASM_BYTE_SIZE 32
54 
55 using namespace lldb;
56 using namespace lldb_private;
57 
58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
59                                         const char *flavor,
60                                         const char *plugin_name) {
61   LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
62                      arch.GetArchitectureName(), plugin_name);
63 
64   DisassemblerCreateInstance create_callback = nullptr;
65 
66   if (plugin_name) {
67     create_callback =
68         PluginManager::GetDisassemblerCreateCallbackForPluginName(plugin_name);
69     if (create_callback) {
70       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
71 
72       if (disassembler_sp)
73         return disassembler_sp;
74     }
75   } else {
76     for (uint32_t idx = 0;
77          (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
78               idx)) != nullptr;
79          ++idx) {
80       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
81 
82       if (disassembler_sp)
83         return disassembler_sp;
84     }
85   }
86   return DisassemblerSP();
87 }
88 
89 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target,
90                                                  const ArchSpec &arch,
91                                                  const char *flavor,
92                                                  const char *plugin_name) {
93   if (flavor == nullptr) {
94     // FIXME - we don't have the mechanism in place to do per-architecture
95     // settings.  But since we know that for now we only support flavors on x86
96     // & x86_64,
97     if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
98         arch.GetTriple().getArch() == llvm::Triple::x86_64)
99       flavor = target.GetDisassemblyFlavor();
100   }
101   return FindPlugin(arch, flavor, plugin_name);
102 }
103 
104 static Address ResolveAddress(Target &target, const Address &addr) {
105   if (!addr.IsSectionOffset()) {
106     Address resolved_addr;
107     // If we weren't passed in a section offset address range, try and resolve
108     // it to something
109     bool is_resolved = target.GetSectionLoadList().IsEmpty()
110                            ? target.GetImages().ResolveFileAddress(
111                                  addr.GetOffset(), resolved_addr)
112                            : target.GetSectionLoadList().ResolveLoadAddress(
113                                  addr.GetOffset(), resolved_addr);
114 
115     // We weren't able to resolve the address, just treat it as a raw address
116     if (is_resolved && resolved_addr.IsValid())
117       return resolved_addr;
118   }
119   return addr;
120 }
121 
122 lldb::DisassemblerSP Disassembler::DisassembleRange(
123     const ArchSpec &arch, const char *plugin_name, const char *flavor,
124     Target &target, const AddressRange &range, bool force_live_memory) {
125   if (range.GetByteSize() <= 0)
126     return {};
127 
128   if (!range.GetBaseAddress().IsValid())
129     return {};
130 
131   lldb::DisassemblerSP disasm_sp =
132       Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name);
133 
134   if (!disasm_sp)
135     return {};
136 
137   const size_t bytes_disassembled = disasm_sp->ParseInstructions(
138       target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
139       nullptr, force_live_memory);
140   if (bytes_disassembled == 0)
141     return {};
142 
143   return disasm_sp;
144 }
145 
146 lldb::DisassemblerSP
147 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
148                                const char *flavor, const Address &start,
149                                const void *src, size_t src_len,
150                                uint32_t num_instructions, bool data_from_file) {
151   if (!src)
152     return {};
153 
154   lldb::DisassemblerSP disasm_sp =
155       Disassembler::FindPlugin(arch, flavor, plugin_name);
156 
157   if (!disasm_sp)
158     return {};
159 
160   DataExtractor data(src, src_len, arch.GetByteOrder(),
161                      arch.GetAddressByteSize());
162 
163   (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false,
164                                       data_from_file);
165   return disasm_sp;
166 }
167 
168 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
169                                const char *plugin_name, const char *flavor,
170                                const ExecutionContext &exe_ctx,
171                                const Address &address, Limit limit,
172                                bool mixed_source_and_assembly,
173                                uint32_t num_mixed_context_lines,
174                                uint32_t options, Stream &strm) {
175   if (!exe_ctx.GetTargetPtr())
176     return false;
177 
178   lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
179       exe_ctx.GetTargetRef(), arch, flavor, plugin_name));
180   if (!disasm_sp)
181     return false;
182 
183   const bool force_live_memory = true;
184   size_t bytes_disassembled = disasm_sp->ParseInstructions(
185       exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory);
186   if (bytes_disassembled == 0)
187     return false;
188 
189   disasm_sp->PrintInstructions(debugger, arch, exe_ctx,
190                                mixed_source_and_assembly,
191                                num_mixed_context_lines, options, strm);
192   return true;
193 }
194 
195 Disassembler::SourceLine
196 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
197   if (!sc.function)
198     return {};
199 
200   if (!sc.line_entry.IsValid())
201     return {};
202 
203   LineEntry prologue_end_line = sc.line_entry;
204   FileSpec func_decl_file;
205   uint32_t func_decl_line;
206   sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
207 
208   if (func_decl_file != prologue_end_line.file &&
209       func_decl_file != prologue_end_line.original_file)
210     return {};
211 
212   SourceLine decl_line;
213   decl_line.file = func_decl_file;
214   decl_line.line = func_decl_line;
215   // TODO: Do we care about column on these entries?  If so, we need to plumb
216   // that through GetStartLineSourceInfo.
217   decl_line.column = 0;
218   return decl_line;
219 }
220 
221 void Disassembler::AddLineToSourceLineTables(
222     SourceLine &line,
223     std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
224   if (line.IsValid()) {
225     auto source_lines_seen_pos = source_lines_seen.find(line.file);
226     if (source_lines_seen_pos == source_lines_seen.end()) {
227       std::set<uint32_t> lines;
228       lines.insert(line.line);
229       source_lines_seen.emplace(line.file, lines);
230     } else {
231       source_lines_seen_pos->second.insert(line.line);
232     }
233   }
234 }
235 
236 bool Disassembler::ElideMixedSourceAndDisassemblyLine(
237     const ExecutionContext &exe_ctx, const SymbolContext &sc,
238     SourceLine &line) {
239 
240   // TODO: should we also check target.process.thread.step-avoid-libraries ?
241 
242   const RegularExpression *avoid_regex = nullptr;
243 
244   // Skip any line #0 entries - they are implementation details
245   if (line.line == 0)
246     return false;
247 
248   ThreadSP thread_sp = exe_ctx.GetThreadSP();
249   if (thread_sp) {
250     avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
251   } else {
252     TargetSP target_sp = exe_ctx.GetTargetSP();
253     if (target_sp) {
254       Status error;
255       OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
256           &exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
257       if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
258         OptionValueRegex *re = value_sp->GetAsRegex();
259         if (re) {
260           avoid_regex = re->GetCurrentValue();
261         }
262       }
263     }
264   }
265   if (avoid_regex && sc.symbol != nullptr) {
266     const char *function_name =
267         sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
268             .GetCString();
269     if (function_name && avoid_regex->Execute(function_name)) {
270       // skip this source line
271       return true;
272     }
273   }
274   // don't skip this source line
275   return false;
276 }
277 
278 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
279                                      const ExecutionContext &exe_ctx,
280                                      bool mixed_source_and_assembly,
281                                      uint32_t num_mixed_context_lines,
282                                      uint32_t options, Stream &strm) {
283   // We got some things disassembled...
284   size_t num_instructions_found = GetInstructionList().GetSize();
285 
286   const uint32_t max_opcode_byte_size =
287       GetInstructionList().GetMaxOpcocdeByteSize();
288   SymbolContext sc;
289   SymbolContext prev_sc;
290   AddressRange current_source_line_range;
291   const Address *pc_addr_ptr = nullptr;
292   StackFrame *frame = exe_ctx.GetFramePtr();
293 
294   TargetSP target_sp(exe_ctx.GetTargetSP());
295   SourceManager &source_manager =
296       target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
297 
298   if (frame) {
299     pc_addr_ptr = &frame->GetFrameCodeAddress();
300   }
301   const uint32_t scope =
302       eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
303   const bool use_inline_block_range = false;
304 
305   const FormatEntity::Entry *disassembly_format = nullptr;
306   FormatEntity::Entry format;
307   if (exe_ctx.HasTargetScope()) {
308     disassembly_format =
309         exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
310   } else {
311     FormatEntity::Parse("${addr}: ", format);
312     disassembly_format = &format;
313   }
314 
315   // First pass: step through the list of instructions, find how long the
316   // initial addresses strings are, insert padding in the second pass so the
317   // opcodes all line up nicely.
318 
319   // Also build up the source line mapping if this is mixed source & assembly
320   // mode. Calculate the source line for each assembly instruction (eliding
321   // inlined functions which the user wants to skip).
322 
323   std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
324   Symbol *previous_symbol = nullptr;
325 
326   size_t address_text_size = 0;
327   for (size_t i = 0; i < num_instructions_found; ++i) {
328     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
329     if (inst) {
330       const Address &addr = inst->GetAddress();
331       ModuleSP module_sp(addr.GetModule());
332       if (module_sp) {
333         const SymbolContextItem resolve_mask = eSymbolContextFunction |
334                                                eSymbolContextSymbol |
335                                                eSymbolContextLineEntry;
336         uint32_t resolved_mask =
337             module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
338         if (resolved_mask) {
339           StreamString strmstr;
340           Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
341                                               &exe_ctx, &addr, strmstr);
342           size_t cur_line = strmstr.GetSizeOfLastLine();
343           if (cur_line > address_text_size)
344             address_text_size = cur_line;
345 
346           // Add entries to our "source_lines_seen" map+set which list which
347           // sources lines occur in this disassembly session.  We will print
348           // lines of context around a source line, but we don't want to print
349           // a source line that has a line table entry of its own - we'll leave
350           // that source line to be printed when it actually occurs in the
351           // disassembly.
352 
353           if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
354             if (sc.symbol != previous_symbol) {
355               SourceLine decl_line = GetFunctionDeclLineEntry(sc);
356               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
357                 AddLineToSourceLineTables(decl_line, source_lines_seen);
358             }
359             if (sc.line_entry.IsValid()) {
360               SourceLine this_line;
361               this_line.file = sc.line_entry.file;
362               this_line.line = sc.line_entry.line;
363               this_line.column = sc.line_entry.column;
364               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
365                 AddLineToSourceLineTables(this_line, source_lines_seen);
366             }
367           }
368         }
369         sc.Clear(false);
370       }
371     }
372   }
373 
374   previous_symbol = nullptr;
375   SourceLine previous_line;
376   for (size_t i = 0; i < num_instructions_found; ++i) {
377     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
378 
379     if (inst) {
380       const Address &addr = inst->GetAddress();
381       const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
382       SourceLinesToDisplay source_lines_to_display;
383 
384       prev_sc = sc;
385 
386       ModuleSP module_sp(addr.GetModule());
387       if (module_sp) {
388         uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
389             addr, eSymbolContextEverything, sc);
390         if (resolved_mask) {
391           if (mixed_source_and_assembly) {
392 
393             // If we've started a new function (non-inlined), print all of the
394             // source lines from the function declaration until the first line
395             // table entry - typically the opening curly brace of the function.
396             if (previous_symbol != sc.symbol) {
397               // The default disassembly format puts an extra blank line
398               // between functions - so when we're displaying the source
399               // context for a function, we don't want to add a blank line
400               // after the source context or we'll end up with two of them.
401               if (previous_symbol != nullptr)
402                 source_lines_to_display.print_source_context_end_eol = false;
403 
404               previous_symbol = sc.symbol;
405               if (sc.function && sc.line_entry.IsValid()) {
406                 LineEntry prologue_end_line = sc.line_entry;
407                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
408                                                         prologue_end_line)) {
409                   FileSpec func_decl_file;
410                   uint32_t func_decl_line;
411                   sc.function->GetStartLineSourceInfo(func_decl_file,
412                                                       func_decl_line);
413                   if (func_decl_file == prologue_end_line.file ||
414                       func_decl_file == prologue_end_line.original_file) {
415                     // Add all the lines between the function declaration and
416                     // the first non-prologue source line to the list of lines
417                     // to print.
418                     for (uint32_t lineno = func_decl_line;
419                          lineno <= prologue_end_line.line; lineno++) {
420                       SourceLine this_line;
421                       this_line.file = func_decl_file;
422                       this_line.line = lineno;
423                       source_lines_to_display.lines.push_back(this_line);
424                     }
425                     // Mark the last line as the "current" one.  Usually this
426                     // is the open curly brace.
427                     if (source_lines_to_display.lines.size() > 0)
428                       source_lines_to_display.current_source_line =
429                           source_lines_to_display.lines.size() - 1;
430                   }
431                 }
432               }
433               sc.GetAddressRange(scope, 0, use_inline_block_range,
434                                  current_source_line_range);
435             }
436 
437             // If we've left a previous source line's address range, print a
438             // new source line
439             if (!current_source_line_range.ContainsFileAddress(addr)) {
440               sc.GetAddressRange(scope, 0, use_inline_block_range,
441                                  current_source_line_range);
442 
443               if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
444                 SourceLine this_line;
445                 this_line.file = sc.line_entry.file;
446                 this_line.line = sc.line_entry.line;
447 
448                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
449                                                         this_line)) {
450                   // Only print this source line if it is different from the
451                   // last source line we printed.  There may have been inlined
452                   // functions between these lines that we elided, resulting in
453                   // the same line being printed twice in a row for a
454                   // contiguous block of assembly instructions.
455                   if (this_line != previous_line) {
456 
457                     std::vector<uint32_t> previous_lines;
458                     for (uint32_t i = 0;
459                          i < num_mixed_context_lines &&
460                          (this_line.line - num_mixed_context_lines) > 0;
461                          i++) {
462                       uint32_t line =
463                           this_line.line - num_mixed_context_lines + i;
464                       auto pos = source_lines_seen.find(this_line.file);
465                       if (pos != source_lines_seen.end()) {
466                         if (pos->second.count(line) == 1) {
467                           previous_lines.clear();
468                         } else {
469                           previous_lines.push_back(line);
470                         }
471                       }
472                     }
473                     for (size_t i = 0; i < previous_lines.size(); i++) {
474                       SourceLine previous_line;
475                       previous_line.file = this_line.file;
476                       previous_line.line = previous_lines[i];
477                       auto pos = source_lines_seen.find(previous_line.file);
478                       if (pos != source_lines_seen.end()) {
479                         pos->second.insert(previous_line.line);
480                       }
481                       source_lines_to_display.lines.push_back(previous_line);
482                     }
483 
484                     source_lines_to_display.lines.push_back(this_line);
485                     source_lines_to_display.current_source_line =
486                         source_lines_to_display.lines.size() - 1;
487 
488                     for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
489                       SourceLine next_line;
490                       next_line.file = this_line.file;
491                       next_line.line = this_line.line + i + 1;
492                       auto pos = source_lines_seen.find(next_line.file);
493                       if (pos != source_lines_seen.end()) {
494                         if (pos->second.count(next_line.line) == 1)
495                           break;
496                         pos->second.insert(next_line.line);
497                       }
498                       source_lines_to_display.lines.push_back(next_line);
499                     }
500                   }
501                   previous_line = this_line;
502                 }
503               }
504             }
505           }
506         } else {
507           sc.Clear(true);
508         }
509       }
510 
511       if (source_lines_to_display.lines.size() > 0) {
512         strm.EOL();
513         for (size_t idx = 0; idx < source_lines_to_display.lines.size();
514              idx++) {
515           SourceLine ln = source_lines_to_display.lines[idx];
516           const char *line_highlight = "";
517           if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
518             line_highlight = "->";
519           } else if (idx == source_lines_to_display.current_source_line) {
520             line_highlight = "**";
521           }
522           source_manager.DisplaySourceLinesWithLineNumbers(
523               ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
524         }
525         if (source_lines_to_display.print_source_context_end_eol)
526           strm.EOL();
527       }
528 
529       const bool show_bytes = (options & eOptionShowBytes) != 0;
530       inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc,
531                  &prev_sc, nullptr, address_text_size);
532       strm.EOL();
533     } else {
534       break;
535     }
536   }
537 }
538 
539 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
540                                StackFrame &frame, Stream &strm) {
541   AddressRange range;
542   SymbolContext sc(
543       frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
544   if (sc.function) {
545     range = sc.function->GetAddressRange();
546   } else if (sc.symbol && sc.symbol->ValueIsAddress()) {
547     range.GetBaseAddress() = sc.symbol->GetAddressRef();
548     range.SetByteSize(sc.symbol->GetByteSize());
549   } else {
550     range.GetBaseAddress() = frame.GetFrameCodeAddress();
551   }
552 
553     if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
554       range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
555 
556     Disassembler::Limit limit = {Disassembler::Limit::Bytes,
557                                  range.GetByteSize()};
558     if (limit.value == 0)
559       limit.value = DEFAULT_DISASM_BYTE_SIZE;
560 
561     return Disassemble(debugger, arch, nullptr, nullptr, frame,
562                        range.GetBaseAddress(), limit, false, 0, 0, strm);
563 }
564 
565 Instruction::Instruction(const Address &address, AddressClass addr_class)
566     : m_address(address), m_address_class(addr_class), m_opcode(),
567       m_calculated_strings(false) {}
568 
569 Instruction::~Instruction() = default;
570 
571 AddressClass Instruction::GetAddressClass() {
572   if (m_address_class == AddressClass::eInvalid)
573     m_address_class = m_address.GetAddressClass();
574   return m_address_class;
575 }
576 
577 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
578                        bool show_address, bool show_bytes,
579                        const ExecutionContext *exe_ctx,
580                        const SymbolContext *sym_ctx,
581                        const SymbolContext *prev_sym_ctx,
582                        const FormatEntity::Entry *disassembly_addr_format,
583                        size_t max_address_text_size) {
584   size_t opcode_column_width = 7;
585   const size_t operand_column_width = 25;
586 
587   CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
588 
589   StreamString ss;
590 
591   if (show_address) {
592     Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
593                                         prev_sym_ctx, exe_ctx, &m_address, ss);
594     ss.FillLastLineToColumn(max_address_text_size, ' ');
595   }
596 
597   if (show_bytes) {
598     if (m_opcode.GetType() == Opcode::eTypeBytes) {
599       // x86_64 and i386 are the only ones that use bytes right now so pad out
600       // the byte dump to be able to always show 15 bytes (3 chars each) plus a
601       // space
602       if (max_opcode_byte_size > 0)
603         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
604       else
605         m_opcode.Dump(&ss, 15 * 3 + 1);
606     } else {
607       // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
608       // (10 spaces) plus two for padding...
609       if (max_opcode_byte_size > 0)
610         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
611       else
612         m_opcode.Dump(&ss, 12);
613     }
614   }
615 
616   const size_t opcode_pos = ss.GetSizeOfLastLine();
617 
618   // The default opcode size of 7 characters is plenty for most architectures
619   // but some like arm can pull out the occasional vqrshrun.s16.  We won't get
620   // consistent column spacing in these cases, unfortunately.
621   if (m_opcode_name.length() >= opcode_column_width) {
622     opcode_column_width = m_opcode_name.length() + 1;
623   }
624 
625   ss.PutCString(m_opcode_name);
626   ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
627   ss.PutCString(m_mnemonics);
628 
629   if (!m_comment.empty()) {
630     ss.FillLastLineToColumn(
631         opcode_pos + opcode_column_width + operand_column_width, ' ');
632     ss.PutCString(" ; ");
633     ss.PutCString(m_comment);
634   }
635   s->PutCString(ss.GetString());
636 }
637 
638 bool Instruction::DumpEmulation(const ArchSpec &arch) {
639   std::unique_ptr<EmulateInstruction> insn_emulator_up(
640       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
641   if (insn_emulator_up) {
642     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
643     return insn_emulator_up->EvaluateInstruction(0);
644   }
645 
646   return false;
647 }
648 
649 bool Instruction::CanSetBreakpoint () {
650   return !HasDelaySlot();
651 }
652 
653 bool Instruction::HasDelaySlot() {
654   // Default is false.
655   return false;
656 }
657 
658 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
659                                      OptionValue::Type data_type) {
660   bool done = false;
661   char buffer[1024];
662 
663   auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
664 
665   int idx = 0;
666   while (!done) {
667     if (!fgets(buffer, 1023, in_file)) {
668       out_stream->Printf(
669           "Instruction::ReadArray:  Error reading file (fgets).\n");
670       option_value_sp.reset();
671       return option_value_sp;
672     }
673 
674     std::string line(buffer);
675 
676     size_t len = line.size();
677     if (line[len - 1] == '\n') {
678       line[len - 1] = '\0';
679       line.resize(len - 1);
680     }
681 
682     if ((line.size() == 1) && line[0] == ']') {
683       done = true;
684       line.clear();
685     }
686 
687     if (!line.empty()) {
688       std::string value;
689       static RegularExpression g_reg_exp(
690           llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
691       llvm::SmallVector<llvm::StringRef, 2> matches;
692       if (g_reg_exp.Execute(line, &matches))
693         value = matches[1].str();
694       else
695         value = line;
696 
697       OptionValueSP data_value_sp;
698       switch (data_type) {
699       case OptionValue::eTypeUInt64:
700         data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
701         data_value_sp->SetValueFromString(value);
702         break;
703       // Other types can be added later as needed.
704       default:
705         data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
706         break;
707       }
708 
709       option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
710       ++idx;
711     }
712   }
713 
714   return option_value_sp;
715 }
716 
717 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
718   bool done = false;
719   char buffer[1024];
720 
721   auto option_value_sp = std::make_shared<OptionValueDictionary>();
722   static ConstString encoding_key("data_encoding");
723   OptionValue::Type data_type = OptionValue::eTypeInvalid;
724 
725   while (!done) {
726     // Read the next line in the file
727     if (!fgets(buffer, 1023, in_file)) {
728       out_stream->Printf(
729           "Instruction::ReadDictionary: Error reading file (fgets).\n");
730       option_value_sp.reset();
731       return option_value_sp;
732     }
733 
734     // Check to see if the line contains the end-of-dictionary marker ("}")
735     std::string line(buffer);
736 
737     size_t len = line.size();
738     if (line[len - 1] == '\n') {
739       line[len - 1] = '\0';
740       line.resize(len - 1);
741     }
742 
743     if ((line.size() == 1) && (line[0] == '}')) {
744       done = true;
745       line.clear();
746     }
747 
748     // Try to find a key-value pair in the current line and add it to the
749     // dictionary.
750     if (!line.empty()) {
751       static RegularExpression g_reg_exp(llvm::StringRef(
752           "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
753 
754       llvm::SmallVector<llvm::StringRef, 3> matches;
755 
756       bool reg_exp_success = g_reg_exp.Execute(line, &matches);
757       std::string key;
758       std::string value;
759       if (reg_exp_success) {
760         key = matches[1].str();
761         value = matches[2].str();
762       } else {
763         out_stream->Printf("Instruction::ReadDictionary: Failure executing "
764                            "regular expression.\n");
765         option_value_sp.reset();
766         return option_value_sp;
767       }
768 
769       ConstString const_key(key.c_str());
770       // Check value to see if it's the start of an array or dictionary.
771 
772       lldb::OptionValueSP value_sp;
773       assert(value.empty() == false);
774       assert(key.empty() == false);
775 
776       if (value[0] == '{') {
777         assert(value.size() == 1);
778         // value is a dictionary
779         value_sp = ReadDictionary(in_file, out_stream);
780         if (!value_sp) {
781           option_value_sp.reset();
782           return option_value_sp;
783         }
784       } else if (value[0] == '[') {
785         assert(value.size() == 1);
786         // value is an array
787         value_sp = ReadArray(in_file, out_stream, data_type);
788         if (!value_sp) {
789           option_value_sp.reset();
790           return option_value_sp;
791         }
792         // We've used the data_type to read an array; re-set the type to
793         // Invalid
794         data_type = OptionValue::eTypeInvalid;
795       } else if ((value[0] == '0') && (value[1] == 'x')) {
796         value_sp = std::make_shared<OptionValueUInt64>(0, 0);
797         value_sp->SetValueFromString(value);
798       } else {
799         size_t len = value.size();
800         if ((value[0] == '"') && (value[len - 1] == '"'))
801           value = value.substr(1, len - 2);
802         value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
803       }
804 
805       if (const_key == encoding_key) {
806         // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
807         // indicating the
808         // data type of an upcoming array (usually the next bit of data to be
809         // read in).
810         if (strcmp(value.c_str(), "uint32_t") == 0)
811           data_type = OptionValue::eTypeUInt64;
812       } else
813         option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
814                                                            false);
815     }
816   }
817 
818   return option_value_sp;
819 }
820 
821 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
822   if (!out_stream)
823     return false;
824 
825   if (!file_name) {
826     out_stream->Printf("Instruction::TestEmulation:  Missing file_name.");
827     return false;
828   }
829   FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
830   if (!test_file) {
831     out_stream->Printf(
832         "Instruction::TestEmulation: Attempt to open test file failed.");
833     return false;
834   }
835 
836   char buffer[256];
837   if (!fgets(buffer, 255, test_file)) {
838     out_stream->Printf(
839         "Instruction::TestEmulation: Error reading first line of test file.\n");
840     fclose(test_file);
841     return false;
842   }
843 
844   if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
845     out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
846                        "emulation state dictionary\n");
847     fclose(test_file);
848     return false;
849   }
850 
851   // Read all the test information from the test file into an
852   // OptionValueDictionary.
853 
854   OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
855   if (!data_dictionary_sp) {
856     out_stream->Printf(
857         "Instruction::TestEmulation:  Error reading Dictionary Object.\n");
858     fclose(test_file);
859     return false;
860   }
861 
862   fclose(test_file);
863 
864   OptionValueDictionary *data_dictionary =
865       data_dictionary_sp->GetAsDictionary();
866   static ConstString description_key("assembly_string");
867   static ConstString triple_key("triple");
868 
869   OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
870 
871   if (!value_sp) {
872     out_stream->Printf("Instruction::TestEmulation:  Test file does not "
873                        "contain description string.\n");
874     return false;
875   }
876 
877   SetDescription(value_sp->GetStringValue());
878 
879   value_sp = data_dictionary->GetValueForKey(triple_key);
880   if (!value_sp) {
881     out_stream->Printf(
882         "Instruction::TestEmulation: Test file does not contain triple.\n");
883     return false;
884   }
885 
886   ArchSpec arch;
887   arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
888 
889   bool success = false;
890   std::unique_ptr<EmulateInstruction> insn_emulator_up(
891       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
892   if (insn_emulator_up)
893     success =
894         insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
895 
896   if (success)
897     out_stream->Printf("Emulation test succeeded.");
898   else
899     out_stream->Printf("Emulation test failed.");
900 
901   return success;
902 }
903 
904 bool Instruction::Emulate(
905     const ArchSpec &arch, uint32_t evaluate_options, void *baton,
906     EmulateInstruction::ReadMemoryCallback read_mem_callback,
907     EmulateInstruction::WriteMemoryCallback write_mem_callback,
908     EmulateInstruction::ReadRegisterCallback read_reg_callback,
909     EmulateInstruction::WriteRegisterCallback write_reg_callback) {
910   std::unique_ptr<EmulateInstruction> insn_emulator_up(
911       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
912   if (insn_emulator_up) {
913     insn_emulator_up->SetBaton(baton);
914     insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
915                                    read_reg_callback, write_reg_callback);
916     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
917     return insn_emulator_up->EvaluateInstruction(evaluate_options);
918   }
919 
920   return false;
921 }
922 
923 uint32_t Instruction::GetData(DataExtractor &data) {
924   return m_opcode.GetData(data);
925 }
926 
927 InstructionList::InstructionList() : m_instructions() {}
928 
929 InstructionList::~InstructionList() = default;
930 
931 size_t InstructionList::GetSize() const { return m_instructions.size(); }
932 
933 uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
934   uint32_t max_inst_size = 0;
935   collection::const_iterator pos, end;
936   for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
937        ++pos) {
938     uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
939     if (max_inst_size < inst_size)
940       max_inst_size = inst_size;
941   }
942   return max_inst_size;
943 }
944 
945 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
946   InstructionSP inst_sp;
947   if (idx < m_instructions.size())
948     inst_sp = m_instructions[idx];
949   return inst_sp;
950 }
951 
952 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) {
953   uint32_t index = GetIndexOfInstructionAtAddress(address);
954   if (index != UINT32_MAX)
955     return GetInstructionAtIndex(index);
956   return nullptr;
957 }
958 
959 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
960                            const ExecutionContext *exe_ctx) {
961   const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
962   collection::const_iterator pos, begin, end;
963 
964   const FormatEntity::Entry *disassembly_format = nullptr;
965   FormatEntity::Entry format;
966   if (exe_ctx && exe_ctx->HasTargetScope()) {
967     disassembly_format =
968         exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
969   } else {
970     FormatEntity::Parse("${addr}: ", format);
971     disassembly_format = &format;
972   }
973 
974   for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
975        pos != end; ++pos) {
976     if (pos != begin)
977       s->EOL();
978     (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx,
979                  nullptr, nullptr, disassembly_format, 0);
980   }
981 }
982 
983 void InstructionList::Clear() { m_instructions.clear(); }
984 
985 void InstructionList::Append(lldb::InstructionSP &inst_sp) {
986   if (inst_sp)
987     m_instructions.push_back(inst_sp);
988 }
989 
990 uint32_t
991 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
992                                                  bool ignore_calls,
993                                                  bool *found_calls) const {
994   size_t num_instructions = m_instructions.size();
995 
996   uint32_t next_branch = UINT32_MAX;
997 
998   if (found_calls)
999     *found_calls = false;
1000   for (size_t i = start; i < num_instructions; i++) {
1001     if (m_instructions[i]->DoesBranch()) {
1002       if (ignore_calls && m_instructions[i]->IsCall()) {
1003         if (found_calls)
1004           *found_calls = true;
1005         continue;
1006       }
1007       next_branch = i;
1008       break;
1009     }
1010   }
1011 
1012   return next_branch;
1013 }
1014 
1015 uint32_t
1016 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
1017   size_t num_instructions = m_instructions.size();
1018   uint32_t index = UINT32_MAX;
1019   for (size_t i = 0; i < num_instructions; i++) {
1020     if (m_instructions[i]->GetAddress() == address) {
1021       index = i;
1022       break;
1023     }
1024   }
1025   return index;
1026 }
1027 
1028 uint32_t
1029 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
1030                                                     Target &target) {
1031   Address address;
1032   address.SetLoadAddress(load_addr, &target);
1033   return GetIndexOfInstructionAtAddress(address);
1034 }
1035 
1036 size_t Disassembler::ParseInstructions(Target &target, Address start,
1037                                        Limit limit, Stream *error_strm_ptr,
1038                                        bool force_live_memory) {
1039   m_instruction_list.Clear();
1040 
1041   if (!start.IsValid())
1042     return 0;
1043 
1044   start = ResolveAddress(target, start);
1045 
1046   addr_t byte_size = limit.value;
1047   if (limit.kind == Limit::Instructions)
1048     byte_size *= m_arch.GetMaximumOpcodeByteSize();
1049   auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
1050 
1051   Status error;
1052   lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1053   const size_t bytes_read =
1054       target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(),
1055                         error, force_live_memory, &load_addr);
1056   const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1057 
1058   if (bytes_read == 0) {
1059     if (error_strm_ptr) {
1060       if (const char *error_cstr = error.AsCString())
1061         error_strm_ptr->Printf("error: %s\n", error_cstr);
1062     }
1063     return 0;
1064   }
1065 
1066   if (bytes_read != data_sp->GetByteSize())
1067     data_sp->SetByteSize(bytes_read);
1068   DataExtractor data(data_sp, m_arch.GetByteOrder(),
1069                      m_arch.GetAddressByteSize());
1070   return DecodeInstructions(start, data, 0,
1071                             limit.kind == Limit::Instructions ? limit.value
1072                                                               : UINT32_MAX,
1073                             false, data_from_file);
1074 }
1075 
1076 // Disassembler copy constructor
1077 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
1078     : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
1079       m_flavor() {
1080   if (flavor == nullptr)
1081     m_flavor.assign("default");
1082   else
1083     m_flavor.assign(flavor);
1084 
1085   // If this is an arm variant that can only include thumb (T16, T32)
1086   // instructions, force the arch triple to be "thumbv.." instead of "armv..."
1087   if (arch.IsAlwaysThumbInstructions()) {
1088     std::string thumb_arch_name(arch.GetTriple().getArchName().str());
1089     // Replace "arm" with "thumb" so we get all thumb variants correct
1090     if (thumb_arch_name.size() > 3) {
1091       thumb_arch_name.erase(0, 3);
1092       thumb_arch_name.insert(0, "thumb");
1093     }
1094     m_arch.SetTriple(thumb_arch_name.c_str());
1095   }
1096 }
1097 
1098 Disassembler::~Disassembler() = default;
1099 
1100 InstructionList &Disassembler::GetInstructionList() {
1101   return m_instruction_list;
1102 }
1103 
1104 const InstructionList &Disassembler::GetInstructionList() const {
1105   return m_instruction_list;
1106 }
1107 
1108 // Class PseudoInstruction
1109 
1110 PseudoInstruction::PseudoInstruction()
1111     : Instruction(Address(), AddressClass::eUnknown), m_description() {}
1112 
1113 PseudoInstruction::~PseudoInstruction() = default;
1114 
1115 bool PseudoInstruction::DoesBranch() {
1116   // This is NOT a valid question for a pseudo instruction.
1117   return false;
1118 }
1119 
1120 bool PseudoInstruction::HasDelaySlot() {
1121   // This is NOT a valid question for a pseudo instruction.
1122   return false;
1123 }
1124 
1125 bool PseudoInstruction::IsLoad() { return false; }
1126 
1127 bool PseudoInstruction::IsAuthenticated() { return false; }
1128 
1129 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
1130                                  const lldb_private::DataExtractor &data,
1131                                  lldb::offset_t data_offset) {
1132   return m_opcode.GetByteSize();
1133 }
1134 
1135 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
1136   if (!opcode_data)
1137     return;
1138 
1139   switch (opcode_size) {
1140   case 8: {
1141     uint8_t value8 = *((uint8_t *)opcode_data);
1142     m_opcode.SetOpcode8(value8, eByteOrderInvalid);
1143     break;
1144   }
1145   case 16: {
1146     uint16_t value16 = *((uint16_t *)opcode_data);
1147     m_opcode.SetOpcode16(value16, eByteOrderInvalid);
1148     break;
1149   }
1150   case 32: {
1151     uint32_t value32 = *((uint32_t *)opcode_data);
1152     m_opcode.SetOpcode32(value32, eByteOrderInvalid);
1153     break;
1154   }
1155   case 64: {
1156     uint64_t value64 = *((uint64_t *)opcode_data);
1157     m_opcode.SetOpcode64(value64, eByteOrderInvalid);
1158     break;
1159   }
1160   default:
1161     break;
1162   }
1163 }
1164 
1165 void PseudoInstruction::SetDescription(llvm::StringRef description) {
1166   m_description = std::string(description);
1167 }
1168 
1169 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
1170   Operand ret;
1171   ret.m_type = Type::Register;
1172   ret.m_register = r;
1173   return ret;
1174 }
1175 
1176 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
1177                                                           bool neg) {
1178   Operand ret;
1179   ret.m_type = Type::Immediate;
1180   ret.m_immediate = imm;
1181   ret.m_negative = neg;
1182   return ret;
1183 }
1184 
1185 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
1186   Operand ret;
1187   ret.m_type = Type::Immediate;
1188   if (imm < 0) {
1189     ret.m_immediate = -imm;
1190     ret.m_negative = true;
1191   } else {
1192     ret.m_immediate = imm;
1193     ret.m_negative = false;
1194   }
1195   return ret;
1196 }
1197 
1198 Instruction::Operand
1199 Instruction::Operand::BuildDereference(const Operand &ref) {
1200   Operand ret;
1201   ret.m_type = Type::Dereference;
1202   ret.m_children = {ref};
1203   return ret;
1204 }
1205 
1206 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
1207                                                     const Operand &rhs) {
1208   Operand ret;
1209   ret.m_type = Type::Sum;
1210   ret.m_children = {lhs, rhs};
1211   return ret;
1212 }
1213 
1214 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
1215                                                         const Operand &rhs) {
1216   Operand ret;
1217   ret.m_type = Type::Product;
1218   ret.m_children = {lhs, rhs};
1219   return ret;
1220 }
1221 
1222 std::function<bool(const Instruction::Operand &)>
1223 lldb_private::OperandMatchers::MatchBinaryOp(
1224     std::function<bool(const Instruction::Operand &)> base,
1225     std::function<bool(const Instruction::Operand &)> left,
1226     std::function<bool(const Instruction::Operand &)> right) {
1227   return [base, left, right](const Instruction::Operand &op) -> bool {
1228     return (base(op) && op.m_children.size() == 2 &&
1229             ((left(op.m_children[0]) && right(op.m_children[1])) ||
1230              (left(op.m_children[1]) && right(op.m_children[0]))));
1231   };
1232 }
1233 
1234 std::function<bool(const Instruction::Operand &)>
1235 lldb_private::OperandMatchers::MatchUnaryOp(
1236     std::function<bool(const Instruction::Operand &)> base,
1237     std::function<bool(const Instruction::Operand &)> child) {
1238   return [base, child](const Instruction::Operand &op) -> bool {
1239     return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
1240   };
1241 }
1242 
1243 std::function<bool(const Instruction::Operand &)>
1244 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
1245   return [&info](const Instruction::Operand &op) {
1246     return (op.m_type == Instruction::Operand::Type::Register &&
1247             (op.m_register == ConstString(info.name) ||
1248              op.m_register == ConstString(info.alt_name)));
1249   };
1250 }
1251 
1252 std::function<bool(const Instruction::Operand &)>
1253 lldb_private::OperandMatchers::FetchRegOp(ConstString &reg) {
1254   return [&reg](const Instruction::Operand &op) {
1255     if (op.m_type != Instruction::Operand::Type::Register) {
1256       return false;
1257     }
1258     reg = op.m_register;
1259     return true;
1260   };
1261 }
1262 
1263 std::function<bool(const Instruction::Operand &)>
1264 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
1265   return [imm](const Instruction::Operand &op) {
1266     return (op.m_type == Instruction::Operand::Type::Immediate &&
1267             ((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
1268              (!op.m_negative && op.m_immediate == (uint64_t)imm)));
1269   };
1270 }
1271 
1272 std::function<bool(const Instruction::Operand &)>
1273 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
1274   return [&imm](const Instruction::Operand &op) {
1275     if (op.m_type != Instruction::Operand::Type::Immediate) {
1276       return false;
1277     }
1278     if (op.m_negative) {
1279       imm = -((int64_t)op.m_immediate);
1280     } else {
1281       imm = ((int64_t)op.m_immediate);
1282     }
1283     return true;
1284   };
1285 }
1286 
1287 std::function<bool(const Instruction::Operand &)>
1288 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
1289   return [type](const Instruction::Operand &op) { return op.m_type == type; };
1290 }
1291