1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_CORE_DISASSEMBLER_H
10 #define LLDB_CORE_DISASSEMBLER_H
11 
12 #include "lldb/Core/Address.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/FormatEntity.h"
15 #include "lldb/Core/Opcode.h"
16 #include "lldb/Core/PluginInterface.h"
17 #include "lldb/Interpreter/OptionValue.h"
18 #include "lldb/Symbol/LineEntry.h"
19 #include "lldb/Target/ExecutionContext.h"
20 #include "lldb/Utility/ArchSpec.h"
21 #include "lldb/Utility/ConstString.h"
22 #include "lldb/Utility/FileSpec.h"
23 #include "lldb/lldb-defines.h"
24 #include "lldb/lldb-forward.h"
25 #include "lldb/lldb-private-enumerations.h"
26 #include "lldb/lldb-types.h"
27 
28 #include "llvm/ADT/StringRef.h"
29 
30 #include <functional>
31 #include <map>
32 #include <memory>
33 #include <set>
34 #include <string>
35 #include <vector>
36 
37 #include <cstddef>
38 #include <cstdint>
39 #include <cstdio>
40 
41 namespace llvm {
42 template <typename T> class SmallVectorImpl;
43 }
44 
45 namespace lldb_private {
46 class AddressRange;
47 class DataExtractor;
48 class Debugger;
49 class Disassembler;
50 class Module;
51 class StackFrame;
52 class Stream;
53 class SymbolContext;
54 class SymbolContextList;
55 class Target;
56 struct RegisterInfo;
57 
58 class Instruction {
59 public:
60   Instruction(const Address &address,
61               AddressClass addr_class = AddressClass::eInvalid);
62 
63   virtual ~Instruction();
64 
65   const Address &GetAddress() const { return m_address; }
66 
67   const char *GetMnemonic(const ExecutionContext *exe_ctx) {
68     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
69     return m_opcode_name.c_str();
70   }
71 
72   const char *GetOperands(const ExecutionContext *exe_ctx) {
73     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
74     return m_mnemonics.c_str();
75   }
76 
77   const char *GetComment(const ExecutionContext *exe_ctx) {
78     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
79     return m_comment.c_str();
80   }
81 
82   /// \return
83   ///    The control flow kind of this instruction, or
84   ///    eInstructionControlFlowKindUnknown if the instruction
85   ///    can't be classified.
86   virtual lldb::InstructionControlFlowKind
87   GetControlFlowKind(const ExecutionContext *exe_ctx) {
88     return lldb::eInstructionControlFlowKindUnknown;
89   }
90 
91   virtual void
92   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
93 
94   AddressClass GetAddressClass();
95 
96   void SetAddress(const Address &addr) {
97     // Invalidate the address class to lazily discover it if we need to.
98     m_address_class = AddressClass::eInvalid;
99     m_address = addr;
100   }
101 
102   /// Dump the text representation of this Instruction to a Stream
103   ///
104   /// Print the (optional) address, (optional) bytes, opcode,
105   /// operands, and instruction comments to a stream.
106   ///
107   /// \param[in] s
108   ///     The Stream to add the text to.
109   ///
110   /// \param[in] show_address
111   ///     Whether the address (using disassembly_addr_format_spec formatting)
112   ///     should be printed.
113   ///
114   /// \param[in] show_bytes
115   ///     Whether the bytes of the assembly instruction should be printed.
116   ///
117   /// \param[in] show_control_flow_kind
118   ///     Whether the control flow kind of the instruction should be printed.
119   ///
120   /// \param[in] max_opcode_byte_size
121   ///     The size (in bytes) of the largest instruction in the list that
122   ///     we are printing (for text justification/alignment purposes)
123   ///     Only needed if show_bytes is true.
124   ///
125   /// \param[in] exe_ctx
126   ///     The current execution context, if available.  May be used in
127   ///     the assembling of the operands+comments for this instruction.
128   ///     Pass NULL if not applicable.
129   ///
130   /// \param[in] sym_ctx
131   ///     The SymbolContext for this instruction.
132   ///     Pass NULL if not available/computed.
133   ///     Only needed if show_address is true.
134   ///
135   /// \param[in] prev_sym_ctx
136   ///     The SymbolContext for the previous instruction.  Depending on
137   ///     the disassembly address format specification, a change in
138   ///     Symbol / Function may mean that a line is printed with the new
139   ///     symbol/function name.
140   ///     Pass NULL if unavailable, or if this is the first instruction of
141   ///     the InstructionList.
142   ///     Only needed if show_address is true.
143   ///
144   /// \param[in] disassembly_addr_format
145   ///     The format specification for how addresses are printed.
146   ///     Only needed if show_address is true.
147   ///
148   /// \param[in] max_address_text_size
149   ///     The length of the longest address string at the start of the
150   ///     disassembly line that will be printed (the
151   ///     Debugger::FormatDisassemblerAddress() string)
152   ///     so this method can properly align the instruction opcodes.
153   ///     May be 0 to indicate no indentation/alignment of the opcodes.
154   virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
155                     bool show_bytes, bool show_control_flow_kind,
156                     const ExecutionContext *exe_ctx,
157                     const SymbolContext *sym_ctx,
158                     const SymbolContext *prev_sym_ctx,
159                     const FormatEntity::Entry *disassembly_addr_format,
160                     size_t max_address_text_size);
161 
162   virtual bool DoesBranch() = 0;
163 
164   virtual bool HasDelaySlot();
165 
166   virtual bool IsLoad() = 0;
167 
168   virtual bool IsAuthenticated() = 0;
169 
170   bool CanSetBreakpoint ();
171 
172   virtual size_t Decode(const Disassembler &disassembler,
173                         const DataExtractor &data,
174                         lldb::offset_t data_offset) = 0;
175 
176   virtual void SetDescription(llvm::StringRef) {
177   } // May be overridden in sub-classes that have descriptions.
178 
179   lldb::OptionValueSP ReadArray(FILE *in_file, Stream &out_stream,
180                                 OptionValue::Type data_type);
181 
182   lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream &out_stream);
183 
184   bool DumpEmulation(const ArchSpec &arch);
185 
186   virtual bool TestEmulation(Stream &stream, const char *test_file_name);
187 
188   bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
189                EmulateInstruction::ReadMemoryCallback read_mem_callback,
190                EmulateInstruction::WriteMemoryCallback write_mem_calback,
191                EmulateInstruction::ReadRegisterCallback read_reg_callback,
192                EmulateInstruction::WriteRegisterCallback write_reg_callback);
193 
194   const Opcode &GetOpcode() const { return m_opcode; }
195 
196   uint32_t GetData(DataExtractor &data);
197 
198   struct Operand {
199     enum class Type {
200       Invalid = 0,
201       Register,
202       Immediate,
203       Dereference,
204       Sum,
205       Product
206     } m_type = Type::Invalid;
207     std::vector<Operand> m_children;
208     lldb::addr_t m_immediate = 0;
209     ConstString m_register;
210     bool m_negative = false;
211     bool m_clobbered = false;
212 
213     bool IsValid() { return m_type != Type::Invalid; }
214 
215     static Operand BuildRegister(ConstString &r);
216     static Operand BuildImmediate(lldb::addr_t imm, bool neg);
217     static Operand BuildImmediate(int64_t imm);
218     static Operand BuildDereference(const Operand &ref);
219     static Operand BuildSum(const Operand &lhs, const Operand &rhs);
220     static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
221   };
222 
223   virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
224     return false;
225   }
226 
227   virtual bool IsCall() { return false; }
228 
229   static const char *GetNameForInstructionControlFlowKind(
230       lldb::InstructionControlFlowKind instruction_control_flow_kind);
231 
232 protected:
233   Address m_address; // The section offset address of this instruction
234                      // We include an address class in the Instruction class to
235                      // allow the instruction specify the
236                      // AddressClass::eCodeAlternateISA (currently used for
237                      // thumb), and also to specify data (AddressClass::eData).
238                      // The usual value will be AddressClass::eCode, but often
239                      // when disassembling memory, you might run into data.
240                      // This can help us to disassemble appropriately.
241 private:
242   AddressClass m_address_class; // Use GetAddressClass () accessor function!
243 
244 protected:
245   Opcode m_opcode; // The opcode for this instruction
246   std::string m_opcode_name;
247   std::string m_mnemonics;
248   std::string m_comment;
249   bool m_calculated_strings;
250 
251   void
252   CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
253     if (!m_calculated_strings) {
254       m_calculated_strings = true;
255       CalculateMnemonicOperandsAndComment(exe_ctx);
256     }
257   }
258 };
259 
260 namespace OperandMatchers {
261 std::function<bool(const Instruction::Operand &)>
262 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
263               std::function<bool(const Instruction::Operand &)> left,
264               std::function<bool(const Instruction::Operand &)> right);
265 
266 std::function<bool(const Instruction::Operand &)>
267 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
268              std::function<bool(const Instruction::Operand &)> child);
269 
270 std::function<bool(const Instruction::Operand &)>
271 MatchRegOp(const RegisterInfo &info);
272 
273 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
274 
275 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
276 
277 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
278 
279 std::function<bool(const Instruction::Operand &)>
280 MatchOpType(Instruction::Operand::Type type);
281 }
282 
283 class InstructionList {
284 public:
285   InstructionList();
286   ~InstructionList();
287 
288   size_t GetSize() const;
289 
290   uint32_t GetMaxOpcocdeByteSize() const;
291 
292   lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
293 
294   /// Get the instruction at the given address.
295   ///
296   /// \return
297   ///    A valid \a InstructionSP if the address could be found, or null
298   ///    otherwise.
299   lldb::InstructionSP GetInstructionAtAddress(const Address &addr);
300 
301   //------------------------------------------------------------------
302   /// Get the index of the next branch instruction.
303   ///
304   /// Given a list of instructions, find the next branch instruction
305   /// in the list by returning an index.
306   ///
307   /// @param[in] start
308   ///     The instruction index of the first instruction to check.
309   ///
310   /// @param[in] ignore_calls
311   ///     It true, then fine the first branch instruction that isn't
312   ///     a function call (a branch that calls and returns to the next
313   ///     instruction). If false, find the instruction index of any
314   ///     branch in the list.
315   ///
316   /// @param[out] found_calls
317   ///     If non-null, this will be set to true if any calls were found in
318   ///     extending the range.
319   ///
320   /// @return
321   ///     The instruction index of the first branch that is at or past
322   ///     \a start. Returns UINT32_MAX if no matching branches are
323   ///     found.
324   //------------------------------------------------------------------
325   uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
326                                            bool ignore_calls,
327                                            bool *found_calls) const;
328 
329   uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
330                                               Target &target);
331 
332   uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
333 
334   void Clear();
335 
336   void Append(lldb::InstructionSP &inst_sp);
337 
338   void Dump(Stream *s, bool show_address, bool show_bytes,
339             bool show_control_flow_kind, const ExecutionContext *exe_ctx);
340 
341 private:
342   typedef std::vector<lldb::InstructionSP> collection;
343   typedef collection::iterator iterator;
344   typedef collection::const_iterator const_iterator;
345 
346   collection m_instructions;
347 };
348 
349 class PseudoInstruction : public Instruction {
350 public:
351   PseudoInstruction();
352 
353   ~PseudoInstruction() override;
354 
355   bool DoesBranch() override;
356 
357   bool HasDelaySlot() override;
358 
359   bool IsLoad() override;
360 
361   bool IsAuthenticated() override;
362 
363   void CalculateMnemonicOperandsAndComment(
364       const ExecutionContext *exe_ctx) override {
365     // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
366     // mnemonic into Instruction::m_mnemonics, and any comment into
367     // Instruction::m_comment
368   }
369 
370   size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
371                 lldb::offset_t data_offset) override;
372 
373   void SetOpcode(size_t opcode_size, void *opcode_data);
374 
375   void SetDescription(llvm::StringRef description) override;
376 
377 protected:
378   std::string m_description;
379 
380   PseudoInstruction(const PseudoInstruction &) = delete;
381   const PseudoInstruction &operator=(const PseudoInstruction &) = delete;
382 };
383 
384 class Disassembler : public std::enable_shared_from_this<Disassembler>,
385                      public PluginInterface {
386 public:
387   enum {
388     eOptionNone = 0u,
389     eOptionShowBytes = (1u << 0),
390     eOptionRawOuput = (1u << 1),
391     eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
392                                          // the current PC (mixed mode only)
393     eOptionMarkPCAddress =
394         (1u << 3), // Mark the disassembly line the contains the PC
395     eOptionShowControlFlowKind = (1u << 4),
396   };
397 
398   enum HexImmediateStyle {
399     eHexStyleC,
400     eHexStyleAsm,
401   };
402 
403   // FindPlugin should be lax about the flavor string (it is too annoying to
404   // have various internal uses of the disassembler fail because the global
405   // flavor string gets set wrong. Instead, if you get a flavor string you
406   // don't understand, use the default.  Folks who care to check can use the
407   // FlavorValidForArchSpec method on the disassembler they got back.
408   static lldb::DisassemblerSP
409   FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
410 
411   // This version will use the value in the Target settings if flavor is NULL;
412   static lldb::DisassemblerSP FindPluginForTarget(const Target &target,
413                                                   const ArchSpec &arch,
414                                                   const char *flavor,
415                                                   const char *plugin_name);
416 
417   struct Limit {
418     enum { Bytes, Instructions } kind;
419     lldb::addr_t value;
420   };
421 
422   static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch,
423                                                const char *plugin_name,
424                                                const char *flavor,
425                                                Target &target,
426                                                const AddressRange &disasm_range,
427                                                bool force_live_memory = false);
428 
429   static lldb::DisassemblerSP
430   DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
431                    const char *flavor, const Address &start, const void *bytes,
432                    size_t length, uint32_t max_num_instructions,
433                    bool data_from_file);
434 
435   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
436                           const char *plugin_name, const char *flavor,
437                           const ExecutionContext &exe_ctx, const Address &start,
438                           Limit limit, bool mixed_source_and_assembly,
439                           uint32_t num_mixed_context_lines, uint32_t options,
440                           Stream &strm);
441 
442   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
443                           StackFrame &frame, Stream &strm);
444 
445   // Constructors and Destructors
446   Disassembler(const ArchSpec &arch, const char *flavor);
447   ~Disassembler() override;
448 
449   void PrintInstructions(Debugger &debugger, const ArchSpec &arch,
450                          const ExecutionContext &exe_ctx,
451                          bool mixed_source_and_assembly,
452                          uint32_t num_mixed_context_lines, uint32_t options,
453                          Stream &strm);
454 
455   size_t ParseInstructions(Target &target, Address address, Limit limit,
456                            Stream *error_strm_ptr,
457                            bool force_live_memory = false);
458 
459   virtual size_t DecodeInstructions(const Address &base_addr,
460                                     const DataExtractor &data,
461                                     lldb::offset_t data_offset,
462                                     size_t num_instructions, bool append,
463                                     bool data_from_file) = 0;
464 
465   InstructionList &GetInstructionList();
466 
467   const InstructionList &GetInstructionList() const;
468 
469   const ArchSpec &GetArchitecture() const { return m_arch; }
470 
471   const char *GetFlavor() const { return m_flavor.c_str(); }
472 
473   virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
474                                       const char *flavor) = 0;
475 
476 protected:
477   // SourceLine and SourceLinesToDisplay structures are only used in the mixed
478   // source and assembly display methods internal to this class.
479 
480   struct SourceLine {
481     FileSpec file;
482     uint32_t line = LLDB_INVALID_LINE_NUMBER;
483     uint32_t column = 0;
484 
485     SourceLine() = default;
486 
487     bool operator==(const SourceLine &rhs) const {
488       return file == rhs.file && line == rhs.line && rhs.column == column;
489     }
490 
491     bool operator!=(const SourceLine &rhs) const {
492       return file != rhs.file || line != rhs.line || column != rhs.column;
493     }
494 
495     bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
496   };
497 
498   struct SourceLinesToDisplay {
499     std::vector<SourceLine> lines;
500 
501     // index of the "current" source line, if we want to highlight that when
502     // displaying the source lines.  (as opposed to the surrounding source
503     // lines provided to give context)
504     size_t current_source_line = -1;
505 
506     // Whether to print a blank line at the end of the source lines.
507     bool print_source_context_end_eol = true;
508 
509     SourceLinesToDisplay() = default;
510   };
511 
512   // Get the function's declaration line number, hopefully a line number
513   // earlier than the opening curly brace at the start of the function body.
514   static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
515 
516   // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
517   static void AddLineToSourceLineTables(
518       SourceLine &line,
519       std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
520 
521   // Given a source line, determine if we should print it when we're doing
522   // mixed source & assembly output. We're currently using the
523   // target.process.thread.step-avoid-regexp setting (which is used for
524   // stepping over inlined STL functions by default) to determine what source
525   // lines to avoid showing.
526   //
527   // Returns true if this source line should be elided (if the source line
528   // should not be displayed).
529   static bool
530   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
531                                      const SymbolContext &sc, SourceLine &line);
532 
533   static bool
534   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
535                                      const SymbolContext &sc, LineEntry &line) {
536     SourceLine sl;
537     sl.file = line.file;
538     sl.line = line.line;
539     sl.column = line.column;
540     return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
541   };
542 
543   // Classes that inherit from Disassembler can see and modify these
544   ArchSpec m_arch;
545   InstructionList m_instruction_list;
546   lldb::addr_t m_base_addr;
547   std::string m_flavor;
548 
549 private:
550   // For Disassembler only
551   Disassembler(const Disassembler &) = delete;
552   const Disassembler &operator=(const Disassembler &) = delete;
553 };
554 
555 } // namespace lldb_private
556 
557 #endif // LLDB_CORE_DISASSEMBLER_H
558