1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef liblldb_Disassembler_h_
10 #define liblldb_Disassembler_h_
11 
12 #include "lldb/Core/Address.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/FormatEntity.h"
15 #include "lldb/Core/Opcode.h"
16 #include "lldb/Core/PluginInterface.h"
17 #include "lldb/Interpreter/OptionValue.h"
18 #include "lldb/Symbol/LineEntry.h"
19 #include "lldb/Target/ExecutionContext.h"
20 #include "lldb/Utility/ArchSpec.h"
21 #include "lldb/Utility/ConstString.h"
22 #include "lldb/Utility/FileSpec.h"
23 #include "lldb/lldb-defines.h"
24 #include "lldb/lldb-forward.h"
25 #include "lldb/lldb-private-enumerations.h"
26 #include "lldb/lldb-types.h"
27 
28 #include "llvm/ADT/StringRef.h"
29 
30 #include <functional>
31 #include <map>
32 #include <memory>
33 #include <set>
34 #include <string>
35 #include <vector>
36 
37 #include <stddef.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 
41 namespace llvm {
42 template <typename T> class SmallVectorImpl;
43 }
44 
45 namespace lldb_private {
46 class AddressRange;
47 class DataExtractor;
48 class Debugger;
49 class Disassembler;
50 class Module;
51 class Stream;
52 class SymbolContext;
53 class SymbolContextList;
54 class Target;
55 struct RegisterInfo;
56 
57 class Instruction {
58 public:
59   Instruction(const Address &address,
60               AddressClass addr_class = AddressClass::eInvalid);
61 
62   virtual ~Instruction();
63 
64   const Address &GetAddress() const { return m_address; }
65 
66   const char *GetMnemonic(const ExecutionContext *exe_ctx) {
67     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
68     return m_opcode_name.c_str();
69   }
70 
71   const char *GetOperands(const ExecutionContext *exe_ctx) {
72     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
73     return m_mnemonics.c_str();
74   }
75 
76   const char *GetComment(const ExecutionContext *exe_ctx) {
77     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
78     return m_comment.c_str();
79   }
80 
81   virtual void
82   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
83 
84   AddressClass GetAddressClass();
85 
86   void SetAddress(const Address &addr) {
87     // Invalidate the address class to lazily discover it if we need to.
88     m_address_class = AddressClass::eInvalid;
89     m_address = addr;
90   }
91 
92   /// Dump the text representation of this Instruction to a Stream
93   ///
94   /// Print the (optional) address, (optional) bytes, opcode,
95   /// operands, and instruction comments to a stream.
96   ///
97   /// \param[in] s
98   ///     The Stream to add the text to.
99   ///
100   /// \param[in] show_address
101   ///     Whether the address (using disassembly_addr_format_spec formatting)
102   ///     should be printed.
103   ///
104   /// \param[in] show_bytes
105   ///     Whether the bytes of the assembly instruction should be printed.
106   ///
107   /// \param[in] max_opcode_byte_size
108   ///     The size (in bytes) of the largest instruction in the list that
109   ///     we are printing (for text justification/alignment purposes)
110   ///     Only needed if show_bytes is true.
111   ///
112   /// \param[in] exe_ctx
113   ///     The current execution context, if available.  May be used in
114   ///     the assembling of the operands+comments for this instruction.
115   ///     Pass NULL if not applicable.
116   ///
117   /// \param[in] sym_ctx
118   ///     The SymbolContext for this instruction.
119   ///     Pass NULL if not available/computed.
120   ///     Only needed if show_address is true.
121   ///
122   /// \param[in] prev_sym_ctx
123   ///     The SymbolContext for the previous instruction.  Depending on
124   ///     the disassembly address format specification, a change in
125   ///     Symbol / Function may mean that a line is printed with the new
126   ///     symbol/function name.
127   ///     Pass NULL if unavailable, or if this is the first instruction of
128   ///     the InstructionList.
129   ///     Only needed if show_address is true.
130   ///
131   /// \param[in] disassembly_addr_format
132   ///     The format specification for how addresses are printed.
133   ///     Only needed if show_address is true.
134   ///
135   /// \param[in] max_address_text_size
136   ///     The length of the longest address string at the start of the
137   ///     disassembly line that will be printed (the
138   ///     Debugger::FormatDisassemblerAddress() string)
139   ///     so this method can properly align the instruction opcodes.
140   ///     May be 0 to indicate no indentation/alignment of the opcodes.
141   virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
142                     bool show_bytes, const ExecutionContext *exe_ctx,
143                     const SymbolContext *sym_ctx,
144                     const SymbolContext *prev_sym_ctx,
145                     const FormatEntity::Entry *disassembly_addr_format,
146                     size_t max_address_text_size);
147 
148   virtual bool DoesBranch() = 0;
149 
150   virtual bool HasDelaySlot();
151 
152   bool CanSetBreakpoint ();
153 
154   virtual size_t Decode(const Disassembler &disassembler,
155                         const DataExtractor &data,
156                         lldb::offset_t data_offset) = 0;
157 
158   virtual void SetDescription(llvm::StringRef) {
159   } // May be overridden in sub-classes that have descriptions.
160 
161   lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream,
162                                 OptionValue::Type data_type);
163 
164   lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream);
165 
166   bool DumpEmulation(const ArchSpec &arch);
167 
168   virtual bool TestEmulation(Stream *stream, const char *test_file_name);
169 
170   bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
171                EmulateInstruction::ReadMemoryCallback read_mem_callback,
172                EmulateInstruction::WriteMemoryCallback write_mem_calback,
173                EmulateInstruction::ReadRegisterCallback read_reg_callback,
174                EmulateInstruction::WriteRegisterCallback write_reg_callback);
175 
176   const Opcode &GetOpcode() const { return m_opcode; }
177 
178   uint32_t GetData(DataExtractor &data);
179 
180   struct Operand {
181     enum class Type {
182       Invalid = 0,
183       Register,
184       Immediate,
185       Dereference,
186       Sum,
187       Product
188     } m_type = Type::Invalid;
189     std::vector<Operand> m_children;
190     lldb::addr_t m_immediate = 0;
191     ConstString m_register;
192     bool m_negative = false;
193     bool m_clobbered = false;
194 
195     bool IsValid() { return m_type != Type::Invalid; }
196 
197     static Operand BuildRegister(ConstString &r);
198     static Operand BuildImmediate(lldb::addr_t imm, bool neg);
199     static Operand BuildImmediate(int64_t imm);
200     static Operand BuildDereference(const Operand &ref);
201     static Operand BuildSum(const Operand &lhs, const Operand &rhs);
202     static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
203   };
204 
205   virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
206     return false;
207   }
208 
209   virtual bool IsCall() { return false; }
210 
211 protected:
212   Address m_address; // The section offset address of this instruction
213                      // We include an address class in the Instruction class to
214                      // allow the instruction specify the
215                      // AddressClass::eCodeAlternateISA (currently used for
216                      // thumb), and also to specify data (AddressClass::eData).
217                      // The usual value will be AddressClass::eCode, but often
218                      // when disassembling memory, you might run into data.
219                      // This can help us to disassemble appropriately.
220 private:
221   AddressClass m_address_class; // Use GetAddressClass () accessor function!
222 
223 protected:
224   Opcode m_opcode; // The opcode for this instruction
225   std::string m_opcode_name;
226   std::string m_mnemonics;
227   std::string m_comment;
228   bool m_calculated_strings;
229 
230   void
231   CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
232     if (!m_calculated_strings) {
233       m_calculated_strings = true;
234       CalculateMnemonicOperandsAndComment(exe_ctx);
235     }
236   }
237 };
238 
239 namespace OperandMatchers {
240 std::function<bool(const Instruction::Operand &)>
241 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
242               std::function<bool(const Instruction::Operand &)> left,
243               std::function<bool(const Instruction::Operand &)> right);
244 
245 std::function<bool(const Instruction::Operand &)>
246 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
247              std::function<bool(const Instruction::Operand &)> child);
248 
249 std::function<bool(const Instruction::Operand &)>
250 MatchRegOp(const RegisterInfo &info);
251 
252 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
253 
254 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
255 
256 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
257 
258 std::function<bool(const Instruction::Operand &)>
259 MatchOpType(Instruction::Operand::Type type);
260 }
261 
262 class InstructionList {
263 public:
264   InstructionList();
265   ~InstructionList();
266 
267   size_t GetSize() const;
268 
269   uint32_t GetMaxOpcocdeByteSize() const;
270 
271   lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
272 
273   //------------------------------------------------------------------
274   /// Get the index of the next branch instruction.
275   ///
276   /// Given a list of instructions, find the next branch instruction
277   /// in the list by returning an index.
278   ///
279   /// @param[in] start
280   ///     The instruction index of the first instruction to check.
281   ///
282   /// @param[in] target
283   ///     A LLDB target object that is used to resolve addresses.
284   ///
285   /// @param[in] ignore_calls
286   ///     It true, then fine the first branch instruction that isn't
287   ///     a function call (a branch that calls and returns to the next
288   ///     instruction). If false, find the instruction index of any
289   ///     branch in the list.
290   ///
291   /// @param[out] found_calls
292   ///     If non-null, this will be set to true if any calls were found in
293   ///     extending the range.
294   ///
295   /// @return
296   ///     The instruction index of the first branch that is at or past
297   ///     \a start. Returns UINT32_MAX if no matching branches are
298   ///     found.
299   //------------------------------------------------------------------
300   uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
301                                            Target &target,
302                                            bool ignore_calls,
303                                            bool *found_calls) const;
304 
305   uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
306                                               Target &target);
307 
308   uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
309 
310   void Clear();
311 
312   void Append(lldb::InstructionSP &inst_sp);
313 
314   void Dump(Stream *s, bool show_address, bool show_bytes,
315             const ExecutionContext *exe_ctx);
316 
317 private:
318   typedef std::vector<lldb::InstructionSP> collection;
319   typedef collection::iterator iterator;
320   typedef collection::const_iterator const_iterator;
321 
322   collection m_instructions;
323 };
324 
325 class PseudoInstruction : public Instruction {
326 public:
327   PseudoInstruction();
328 
329   ~PseudoInstruction() override;
330 
331   bool DoesBranch() override;
332 
333   bool HasDelaySlot() override;
334 
335   void CalculateMnemonicOperandsAndComment(
336       const ExecutionContext *exe_ctx) override {
337     // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
338     // mnemonic into Instruction::m_mnemonics, and any comment into
339     // Instruction::m_comment
340   }
341 
342   size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
343                 lldb::offset_t data_offset) override;
344 
345   void SetOpcode(size_t opcode_size, void *opcode_data);
346 
347   void SetDescription(llvm::StringRef description) override;
348 
349 protected:
350   std::string m_description;
351 
352   DISALLOW_COPY_AND_ASSIGN(PseudoInstruction);
353 };
354 
355 class Disassembler : public std::enable_shared_from_this<Disassembler>,
356                      public PluginInterface {
357 public:
358   enum {
359     eOptionNone = 0u,
360     eOptionShowBytes = (1u << 0),
361     eOptionRawOuput = (1u << 1),
362     eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
363                                          // the current PC (mixed mode only)
364     eOptionMarkPCAddress =
365         (1u << 3) // Mark the disassembly line the contains the PC
366   };
367 
368   enum HexImmediateStyle {
369     eHexStyleC,
370     eHexStyleAsm,
371   };
372 
373   // FindPlugin should be lax about the flavor string (it is too annoying to
374   // have various internal uses of the disassembler fail because the global
375   // flavor string gets set wrong. Instead, if you get a flavor string you
376   // don't understand, use the default.  Folks who care to check can use the
377   // FlavorValidForArchSpec method on the disassembler they got back.
378   static lldb::DisassemblerSP
379   FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
380 
381   // This version will use the value in the Target settings if flavor is NULL;
382   static lldb::DisassemblerSP
383   FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch,
384                       const char *flavor, const char *plugin_name);
385 
386   static lldb::DisassemblerSP
387   DisassembleRange(const ArchSpec &arch, const char *plugin_name,
388                    const char *flavor, const ExecutionContext &exe_ctx,
389                    const AddressRange &disasm_range, bool prefer_file_cache);
390 
391   static lldb::DisassemblerSP
392   DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
393                    const char *flavor, const Address &start, const void *bytes,
394                    size_t length, uint32_t max_num_instructions,
395                    bool data_from_file);
396 
397   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
398                           const char *plugin_name, const char *flavor,
399                           const ExecutionContext &exe_ctx,
400                           const AddressRange &range, uint32_t num_instructions,
401                           bool mixed_source_and_assembly,
402                           uint32_t num_mixed_context_lines, uint32_t options,
403                           Stream &strm);
404 
405   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
406                           const char *plugin_name, const char *flavor,
407                           const ExecutionContext &exe_ctx, const Address &start,
408                           uint32_t num_instructions,
409                           bool mixed_source_and_assembly,
410                           uint32_t num_mixed_context_lines, uint32_t options,
411                           Stream &strm);
412 
413   static size_t
414   Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
415               const char *flavor, const ExecutionContext &exe_ctx,
416               SymbolContextList &sc_list, uint32_t num_instructions,
417               bool mixed_source_and_assembly, uint32_t num_mixed_context_lines,
418               uint32_t options, Stream &strm);
419 
420   static bool
421   Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
422               const char *flavor, const ExecutionContext &exe_ctx,
423               ConstString name, Module *module,
424               uint32_t num_instructions, bool mixed_source_and_assembly,
425               uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
426 
427   static bool
428   Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
429               const char *flavor, const ExecutionContext &exe_ctx,
430               uint32_t num_instructions, bool mixed_source_and_assembly,
431               uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
432 
433   // Constructors and Destructors
434   Disassembler(const ArchSpec &arch, const char *flavor);
435   ~Disassembler() override;
436 
437   typedef const char *(*SummaryCallback)(const Instruction &inst,
438                                          ExecutionContext *exe_context,
439                                          void *user_data);
440 
441   static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger,
442                                 const ArchSpec &arch,
443                                 const ExecutionContext &exe_ctx,
444                                 uint32_t num_instructions,
445                                 bool mixed_source_and_assembly,
446                                 uint32_t num_mixed_context_lines,
447                                 uint32_t options, Stream &strm);
448 
449   size_t ParseInstructions(const ExecutionContext *exe_ctx,
450                            const AddressRange &range, Stream *error_strm_ptr,
451                            bool prefer_file_cache);
452 
453   size_t ParseInstructions(const ExecutionContext *exe_ctx,
454                            const Address &range, uint32_t num_instructions,
455                            bool prefer_file_cache);
456 
457   virtual size_t DecodeInstructions(const Address &base_addr,
458                                     const DataExtractor &data,
459                                     lldb::offset_t data_offset,
460                                     size_t num_instructions, bool append,
461                                     bool data_from_file) = 0;
462 
463   InstructionList &GetInstructionList();
464 
465   const InstructionList &GetInstructionList() const;
466 
467   const ArchSpec &GetArchitecture() const { return m_arch; }
468 
469   const char *GetFlavor() const { return m_flavor.c_str(); }
470 
471   virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
472                                       const char *flavor) = 0;
473 
474 protected:
475   // SourceLine and SourceLinesToDisplay structures are only used in the mixed
476   // source and assembly display methods internal to this class.
477 
478   struct SourceLine {
479     FileSpec file;
480     uint32_t line;
481     uint32_t column;
482 
483     SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {}
484 
485     bool operator==(const SourceLine &rhs) const {
486       return file == rhs.file && line == rhs.line && rhs.column == column;
487     }
488 
489     bool operator!=(const SourceLine &rhs) const {
490       return file != rhs.file || line != rhs.line || column != rhs.column;
491     }
492 
493     bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
494   };
495 
496   struct SourceLinesToDisplay {
497     std::vector<SourceLine> lines;
498 
499     // index of the "current" source line, if we want to highlight that when
500     // displaying the source lines.  (as opposed to the surrounding source
501     // lines provided to give context)
502     size_t current_source_line;
503 
504     // Whether to print a blank line at the end of the source lines.
505     bool print_source_context_end_eol;
506 
507     SourceLinesToDisplay()
508         : lines(), current_source_line(-1), print_source_context_end_eol(true) {
509     }
510   };
511 
512   // Get the function's declaration line number, hopefully a line number
513   // earlier than the opening curly brace at the start of the function body.
514   static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
515 
516   // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
517   static void AddLineToSourceLineTables(
518       SourceLine &line,
519       std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
520 
521   // Given a source line, determine if we should print it when we're doing
522   // mixed source & assembly output. We're currently using the
523   // target.process.thread.step-avoid-regexp setting (which is used for
524   // stepping over inlined STL functions by default) to determine what source
525   // lines to avoid showing.
526   //
527   // Returns true if this source line should be elided (if the source line
528   // should not be displayed).
529   static bool
530   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
531                                      const SymbolContext &sc, SourceLine &line);
532 
533   static bool
534   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
535                                      const SymbolContext &sc, LineEntry &line) {
536     SourceLine sl;
537     sl.file = line.file;
538     sl.line = line.line;
539     sl.column = line.column;
540     return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
541   };
542 
543   // Classes that inherit from Disassembler can see and modify these
544   ArchSpec m_arch;
545   InstructionList m_instruction_list;
546   lldb::addr_t m_base_addr;
547   std::string m_flavor;
548 
549 private:
550   // For Disassembler only
551   DISALLOW_COPY_AND_ASSIGN(Disassembler);
552 };
553 
554 } // namespace lldb_private
555 
556 #endif // liblldb_Disassembler_h_
557