1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_CORE_DISASSEMBLER_H
10 #define LLDB_CORE_DISASSEMBLER_H
11 
12 #include "lldb/Core/Address.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/FormatEntity.h"
15 #include "lldb/Core/Opcode.h"
16 #include "lldb/Core/PluginInterface.h"
17 #include "lldb/Interpreter/OptionValue.h"
18 #include "lldb/Symbol/LineEntry.h"
19 #include "lldb/Target/ExecutionContext.h"
20 #include "lldb/Utility/ArchSpec.h"
21 #include "lldb/Utility/ConstString.h"
22 #include "lldb/Utility/FileSpec.h"
23 #include "lldb/lldb-defines.h"
24 #include "lldb/lldb-forward.h"
25 #include "lldb/lldb-private-enumerations.h"
26 #include "lldb/lldb-types.h"
27 
28 #include "llvm/ADT/StringRef.h"
29 
30 #include <functional>
31 #include <map>
32 #include <memory>
33 #include <set>
34 #include <string>
35 #include <vector>
36 
37 #include <cstddef>
38 #include <cstdint>
39 #include <cstdio>
40 
41 namespace llvm {
42 template <typename T> class SmallVectorImpl;
43 }
44 
45 namespace lldb_private {
46 class AddressRange;
47 class DataExtractor;
48 class Debugger;
49 class Disassembler;
50 class Module;
51 class StackFrame;
52 class Stream;
53 class SymbolContext;
54 class SymbolContextList;
55 class Target;
56 struct RegisterInfo;
57 
58 class Instruction {
59 public:
60   Instruction(const Address &address,
61               AddressClass addr_class = AddressClass::eInvalid);
62 
63   virtual ~Instruction();
64 
GetAddress()65   const Address &GetAddress() const { return m_address; }
66 
67   const char *GetMnemonic(const ExecutionContext *exe_ctx,
68                           bool markup = false) {
69     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
70     return markup ? m_markup_opcode_name.c_str() : m_opcode_name.c_str();
71   }
72 
73   const char *GetOperands(const ExecutionContext *exe_ctx,
74                           bool markup = false) {
75     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
76     return markup ? m_markup_mnemonics.c_str() : m_mnemonics.c_str();
77   }
78 
GetComment(const ExecutionContext * exe_ctx)79   const char *GetComment(const ExecutionContext *exe_ctx) {
80     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
81     return m_comment.c_str();
82   }
83 
84   /// \return
85   ///    The control flow kind of this instruction, or
86   ///    eInstructionControlFlowKindUnknown if the instruction
87   ///    can't be classified.
88   virtual lldb::InstructionControlFlowKind
GetControlFlowKind(const ExecutionContext * exe_ctx)89   GetControlFlowKind(const ExecutionContext *exe_ctx) {
90     return lldb::eInstructionControlFlowKindUnknown;
91   }
92 
93   virtual void
94   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
95 
96   AddressClass GetAddressClass();
97 
SetAddress(const Address & addr)98   void SetAddress(const Address &addr) {
99     // Invalidate the address class to lazily discover it if we need to.
100     m_address_class = AddressClass::eInvalid;
101     m_address = addr;
102   }
103 
104   /// Dump the text representation of this Instruction to a Stream
105   ///
106   /// Print the (optional) address, (optional) bytes, opcode,
107   /// operands, and instruction comments to a stream.
108   ///
109   /// \param[in] s
110   ///     The Stream to add the text to.
111   ///
112   /// \param[in] show_address
113   ///     Whether the address (using disassembly_addr_format_spec formatting)
114   ///     should be printed.
115   ///
116   /// \param[in] show_bytes
117   ///     Whether the bytes of the assembly instruction should be printed.
118   ///
119   /// \param[in] show_control_flow_kind
120   ///     Whether the control flow kind of the instruction should be printed.
121   ///
122   /// \param[in] max_opcode_byte_size
123   ///     The size (in bytes) of the largest instruction in the list that
124   ///     we are printing (for text justification/alignment purposes)
125   ///     Only needed if show_bytes is true.
126   ///
127   /// \param[in] exe_ctx
128   ///     The current execution context, if available.  May be used in
129   ///     the assembling of the operands+comments for this instruction.
130   ///     Pass NULL if not applicable.
131   ///
132   /// \param[in] sym_ctx
133   ///     The SymbolContext for this instruction.
134   ///     Pass NULL if not available/computed.
135   ///     Only needed if show_address is true.
136   ///
137   /// \param[in] prev_sym_ctx
138   ///     The SymbolContext for the previous instruction.  Depending on
139   ///     the disassembly address format specification, a change in
140   ///     Symbol / Function may mean that a line is printed with the new
141   ///     symbol/function name.
142   ///     Pass NULL if unavailable, or if this is the first instruction of
143   ///     the InstructionList.
144   ///     Only needed if show_address is true.
145   ///
146   /// \param[in] disassembly_addr_format
147   ///     The format specification for how addresses are printed.
148   ///     Only needed if show_address is true.
149   ///
150   /// \param[in] max_address_text_size
151   ///     The length of the longest address string at the start of the
152   ///     disassembly line that will be printed (the
153   ///     Debugger::FormatDisassemblerAddress() string)
154   ///     so this method can properly align the instruction opcodes.
155   ///     May be 0 to indicate no indentation/alignment of the opcodes.
156   virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
157                     bool show_bytes, bool show_control_flow_kind,
158                     const ExecutionContext *exe_ctx,
159                     const SymbolContext *sym_ctx,
160                     const SymbolContext *prev_sym_ctx,
161                     const FormatEntity::Entry *disassembly_addr_format,
162                     size_t max_address_text_size);
163 
164   virtual bool DoesBranch() = 0;
165 
166   virtual bool HasDelaySlot();
167 
168   virtual bool IsLoad() = 0;
169 
170   virtual bool IsAuthenticated() = 0;
171 
172   bool CanSetBreakpoint ();
173 
174   virtual size_t Decode(const Disassembler &disassembler,
175                         const DataExtractor &data,
176                         lldb::offset_t data_offset) = 0;
177 
SetDescription(llvm::StringRef)178   virtual void SetDescription(llvm::StringRef) {
179   } // May be overridden in sub-classes that have descriptions.
180 
181   lldb::OptionValueSP ReadArray(FILE *in_file, Stream &out_stream,
182                                 OptionValue::Type data_type);
183 
184   lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream &out_stream);
185 
186   bool DumpEmulation(const ArchSpec &arch);
187 
188   virtual bool TestEmulation(Stream &stream, const char *test_file_name);
189 
190   bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
191                EmulateInstruction::ReadMemoryCallback read_mem_callback,
192                EmulateInstruction::WriteMemoryCallback write_mem_calback,
193                EmulateInstruction::ReadRegisterCallback read_reg_callback,
194                EmulateInstruction::WriteRegisterCallback write_reg_callback);
195 
GetOpcode()196   const Opcode &GetOpcode() const { return m_opcode; }
197 
198   uint32_t GetData(DataExtractor &data);
199 
200   struct Operand {
201     enum class Type {
202       Invalid = 0,
203       Register,
204       Immediate,
205       Dereference,
206       Sum,
207       Product
208     } m_type = Type::Invalid;
209     std::vector<Operand> m_children;
210     lldb::addr_t m_immediate = 0;
211     ConstString m_register;
212     bool m_negative = false;
213     bool m_clobbered = false;
214 
IsValidOperand215     bool IsValid() { return m_type != Type::Invalid; }
216 
217     static Operand BuildRegister(ConstString &r);
218     static Operand BuildImmediate(lldb::addr_t imm, bool neg);
219     static Operand BuildImmediate(int64_t imm);
220     static Operand BuildDereference(const Operand &ref);
221     static Operand BuildSum(const Operand &lhs, const Operand &rhs);
222     static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
223   };
224 
ParseOperands(llvm::SmallVectorImpl<Operand> & operands)225   virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
226     return false;
227   }
228 
IsCall()229   virtual bool IsCall() { return false; }
230 
231   static const char *GetNameForInstructionControlFlowKind(
232       lldb::InstructionControlFlowKind instruction_control_flow_kind);
233 
234 protected:
235   Address m_address; // The section offset address of this instruction
236                      // We include an address class in the Instruction class to
237                      // allow the instruction specify the
238                      // AddressClass::eCodeAlternateISA (currently used for
239                      // thumb), and also to specify data (AddressClass::eData).
240                      // The usual value will be AddressClass::eCode, but often
241                      // when disassembling memory, you might run into data.
242                      // This can help us to disassemble appropriately.
243 private:
244   AddressClass m_address_class; // Use GetAddressClass () accessor function!
245 
246 protected:
247   Opcode m_opcode; // The opcode for this instruction
248   std::string m_opcode_name;
249   std::string m_markup_opcode_name;
250   std::string m_mnemonics;
251   std::string m_markup_mnemonics;
252   std::string m_comment;
253   bool m_calculated_strings;
254 
255   void
CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext * exe_ctx)256   CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
257     if (!m_calculated_strings) {
258       m_calculated_strings = true;
259       CalculateMnemonicOperandsAndComment(exe_ctx);
260     }
261   }
262 };
263 
264 namespace OperandMatchers {
265 std::function<bool(const Instruction::Operand &)>
266 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
267               std::function<bool(const Instruction::Operand &)> left,
268               std::function<bool(const Instruction::Operand &)> right);
269 
270 std::function<bool(const Instruction::Operand &)>
271 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
272              std::function<bool(const Instruction::Operand &)> child);
273 
274 std::function<bool(const Instruction::Operand &)>
275 MatchRegOp(const RegisterInfo &info);
276 
277 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
278 
279 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
280 
281 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
282 
283 std::function<bool(const Instruction::Operand &)>
284 MatchOpType(Instruction::Operand::Type type);
285 }
286 
287 class InstructionList {
288 public:
289   InstructionList();
290   ~InstructionList();
291 
292   size_t GetSize() const;
293 
294   uint32_t GetMaxOpcocdeByteSize() const;
295 
296   lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
297 
298   /// Get the instruction at the given address.
299   ///
300   /// \return
301   ///    A valid \a InstructionSP if the address could be found, or null
302   ///    otherwise.
303   lldb::InstructionSP GetInstructionAtAddress(const Address &addr);
304 
305   //------------------------------------------------------------------
306   /// Get the index of the next branch instruction.
307   ///
308   /// Given a list of instructions, find the next branch instruction
309   /// in the list by returning an index.
310   ///
311   /// @param[in] start
312   ///     The instruction index of the first instruction to check.
313   ///
314   /// @param[in] ignore_calls
315   ///     It true, then fine the first branch instruction that isn't
316   ///     a function call (a branch that calls and returns to the next
317   ///     instruction). If false, find the instruction index of any
318   ///     branch in the list.
319   ///
320   /// @param[out] found_calls
321   ///     If non-null, this will be set to true if any calls were found in
322   ///     extending the range.
323   ///
324   /// @return
325   ///     The instruction index of the first branch that is at or past
326   ///     \a start. Returns UINT32_MAX if no matching branches are
327   ///     found.
328   //------------------------------------------------------------------
329   uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
330                                            bool ignore_calls,
331                                            bool *found_calls) const;
332 
333   uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
334                                               Target &target);
335 
336   uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
337 
338   void Clear();
339 
340   void Append(lldb::InstructionSP &inst_sp);
341 
342   void Dump(Stream *s, bool show_address, bool show_bytes,
343             bool show_control_flow_kind, const ExecutionContext *exe_ctx);
344 
345 private:
346   typedef std::vector<lldb::InstructionSP> collection;
347   typedef collection::iterator iterator;
348   typedef collection::const_iterator const_iterator;
349 
350   collection m_instructions;
351 };
352 
353 class PseudoInstruction : public Instruction {
354 public:
355   PseudoInstruction();
356 
357   ~PseudoInstruction() override;
358 
359   bool DoesBranch() override;
360 
361   bool HasDelaySlot() override;
362 
363   bool IsLoad() override;
364 
365   bool IsAuthenticated() override;
366 
CalculateMnemonicOperandsAndComment(const ExecutionContext * exe_ctx)367   void CalculateMnemonicOperandsAndComment(
368       const ExecutionContext *exe_ctx) override {
369     // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
370     // mnemonic into Instruction::m_mnemonics, and any comment into
371     // Instruction::m_comment
372   }
373 
374   size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
375                 lldb::offset_t data_offset) override;
376 
377   void SetOpcode(size_t opcode_size, void *opcode_data);
378 
379   void SetDescription(llvm::StringRef description) override;
380 
381 protected:
382   std::string m_description;
383 
384   PseudoInstruction(const PseudoInstruction &) = delete;
385   const PseudoInstruction &operator=(const PseudoInstruction &) = delete;
386 };
387 
388 class Disassembler : public std::enable_shared_from_this<Disassembler>,
389                      public PluginInterface {
390 public:
391   enum {
392     eOptionNone = 0u,
393     eOptionShowBytes = (1u << 0),
394     eOptionRawOuput = (1u << 1),
395     eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
396                                          // the current PC (mixed mode only)
397     eOptionMarkPCAddress =
398         (1u << 3), // Mark the disassembly line the contains the PC
399     eOptionShowControlFlowKind = (1u << 4),
400   };
401 
402   enum HexImmediateStyle {
403     eHexStyleC,
404     eHexStyleAsm,
405   };
406 
407   // FindPlugin should be lax about the flavor string (it is too annoying to
408   // have various internal uses of the disassembler fail because the global
409   // flavor string gets set wrong. Instead, if you get a flavor string you
410   // don't understand, use the default.  Folks who care to check can use the
411   // FlavorValidForArchSpec method on the disassembler they got back.
412   static lldb::DisassemblerSP
413   FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
414 
415   // This version will use the value in the Target settings if flavor is NULL;
416   static lldb::DisassemblerSP FindPluginForTarget(const Target &target,
417                                                   const ArchSpec &arch,
418                                                   const char *flavor,
419                                                   const char *plugin_name);
420 
421   struct Limit {
422     enum { Bytes, Instructions } kind;
423     lldb::addr_t value;
424   };
425 
426   static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch,
427                                                const char *plugin_name,
428                                                const char *flavor,
429                                                Target &target,
430                                                const AddressRange &disasm_range,
431                                                bool force_live_memory = false);
432 
433   static lldb::DisassemblerSP
434   DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
435                    const char *flavor, const Address &start, const void *bytes,
436                    size_t length, uint32_t max_num_instructions,
437                    bool data_from_file);
438 
439   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
440                           const char *plugin_name, const char *flavor,
441                           const ExecutionContext &exe_ctx, const Address &start,
442                           Limit limit, bool mixed_source_and_assembly,
443                           uint32_t num_mixed_context_lines, uint32_t options,
444                           Stream &strm);
445 
446   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
447                           StackFrame &frame, Stream &strm);
448 
449   // Constructors and Destructors
450   Disassembler(const ArchSpec &arch, const char *flavor);
451   ~Disassembler() override;
452 
453   void PrintInstructions(Debugger &debugger, const ArchSpec &arch,
454                          const ExecutionContext &exe_ctx,
455                          bool mixed_source_and_assembly,
456                          uint32_t num_mixed_context_lines, uint32_t options,
457                          Stream &strm);
458 
459   size_t ParseInstructions(Target &target, Address address, Limit limit,
460                            Stream *error_strm_ptr,
461                            bool force_live_memory = false);
462 
463   virtual size_t DecodeInstructions(const Address &base_addr,
464                                     const DataExtractor &data,
465                                     lldb::offset_t data_offset,
466                                     size_t num_instructions, bool append,
467                                     bool data_from_file) = 0;
468 
469   InstructionList &GetInstructionList();
470 
471   const InstructionList &GetInstructionList() const;
472 
GetArchitecture()473   const ArchSpec &GetArchitecture() const { return m_arch; }
474 
GetFlavor()475   const char *GetFlavor() const { return m_flavor.c_str(); }
476 
477   virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
478                                       const char *flavor) = 0;
479 
480 protected:
481   // SourceLine and SourceLinesToDisplay structures are only used in the mixed
482   // source and assembly display methods internal to this class.
483 
484   struct SourceLine {
485     FileSpec file;
486     uint32_t line = LLDB_INVALID_LINE_NUMBER;
487     uint32_t column = 0;
488 
489     SourceLine() = default;
490 
491     bool operator==(const SourceLine &rhs) const {
492       return file == rhs.file && line == rhs.line && rhs.column == column;
493     }
494 
495     bool operator!=(const SourceLine &rhs) const {
496       return file != rhs.file || line != rhs.line || column != rhs.column;
497     }
498 
IsValidSourceLine499     bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
500   };
501 
502   struct SourceLinesToDisplay {
503     std::vector<SourceLine> lines;
504 
505     // index of the "current" source line, if we want to highlight that when
506     // displaying the source lines.  (as opposed to the surrounding source
507     // lines provided to give context)
508     size_t current_source_line = -1;
509 
510     // Whether to print a blank line at the end of the source lines.
511     bool print_source_context_end_eol = true;
512 
513     SourceLinesToDisplay() = default;
514   };
515 
516   // Get the function's declaration line number, hopefully a line number
517   // earlier than the opening curly brace at the start of the function body.
518   static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
519 
520   // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
521   static void AddLineToSourceLineTables(
522       SourceLine &line,
523       std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
524 
525   // Given a source line, determine if we should print it when we're doing
526   // mixed source & assembly output. We're currently using the
527   // target.process.thread.step-avoid-regexp setting (which is used for
528   // stepping over inlined STL functions by default) to determine what source
529   // lines to avoid showing.
530   //
531   // Returns true if this source line should be elided (if the source line
532   // should not be displayed).
533   static bool
534   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
535                                      const SymbolContext &sc, SourceLine &line);
536 
537   static bool
ElideMixedSourceAndDisassemblyLine(const ExecutionContext & exe_ctx,const SymbolContext & sc,LineEntry & line)538   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
539                                      const SymbolContext &sc, LineEntry &line) {
540     SourceLine sl;
541     sl.file = line.file;
542     sl.line = line.line;
543     sl.column = line.column;
544     return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
545   };
546 
547   // Classes that inherit from Disassembler can see and modify these
548   ArchSpec m_arch;
549   InstructionList m_instruction_list;
550   lldb::addr_t m_base_addr;
551   std::string m_flavor;
552 
553 private:
554   // For Disassembler only
555   Disassembler(const Disassembler &) = delete;
556   const Disassembler &operator=(const Disassembler &) = delete;
557 };
558 
559 } // namespace lldb_private
560 
561 #endif // LLDB_CORE_DISASSEMBLER_H
562