1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_CORE_DISASSEMBLER_H
10 #define LLDB_CORE_DISASSEMBLER_H
11 
12 #include "lldb/Core/Address.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/FormatEntity.h"
15 #include "lldb/Core/Opcode.h"
16 #include "lldb/Core/PluginInterface.h"
17 #include "lldb/Interpreter/OptionValue.h"
18 #include "lldb/Symbol/LineEntry.h"
19 #include "lldb/Target/ExecutionContext.h"
20 #include "lldb/Utility/ArchSpec.h"
21 #include "lldb/Utility/ConstString.h"
22 #include "lldb/Utility/FileSpec.h"
23 #include "lldb/lldb-defines.h"
24 #include "lldb/lldb-forward.h"
25 #include "lldb/lldb-private-enumerations.h"
26 #include "lldb/lldb-types.h"
27 
28 #include "llvm/ADT/StringRef.h"
29 
30 #include <functional>
31 #include <map>
32 #include <memory>
33 #include <set>
34 #include <string>
35 #include <vector>
36 
37 #include <stddef.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 
41 namespace llvm {
42 template <typename T> class SmallVectorImpl;
43 }
44 
45 namespace lldb_private {
46 class AddressRange;
47 class DataExtractor;
48 class Debugger;
49 class Disassembler;
50 class Module;
51 class StackFrame;
52 class Stream;
53 class SymbolContext;
54 class SymbolContextList;
55 class Target;
56 struct RegisterInfo;
57 
58 class Instruction {
59 public:
60   Instruction(const Address &address,
61               AddressClass addr_class = AddressClass::eInvalid);
62 
63   virtual ~Instruction();
64 
65   const Address &GetAddress() const { return m_address; }
66 
67   const char *GetMnemonic(const ExecutionContext *exe_ctx) {
68     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
69     return m_opcode_name.c_str();
70   }
71 
72   const char *GetOperands(const ExecutionContext *exe_ctx) {
73     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
74     return m_mnemonics.c_str();
75   }
76 
77   const char *GetComment(const ExecutionContext *exe_ctx) {
78     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
79     return m_comment.c_str();
80   }
81 
82   virtual void
83   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
84 
85   AddressClass GetAddressClass();
86 
87   void SetAddress(const Address &addr) {
88     // Invalidate the address class to lazily discover it if we need to.
89     m_address_class = AddressClass::eInvalid;
90     m_address = addr;
91   }
92 
93   /// Dump the text representation of this Instruction to a Stream
94   ///
95   /// Print the (optional) address, (optional) bytes, opcode,
96   /// operands, and instruction comments to a stream.
97   ///
98   /// \param[in] s
99   ///     The Stream to add the text to.
100   ///
101   /// \param[in] show_address
102   ///     Whether the address (using disassembly_addr_format_spec formatting)
103   ///     should be printed.
104   ///
105   /// \param[in] show_bytes
106   ///     Whether the bytes of the assembly instruction should be printed.
107   ///
108   /// \param[in] max_opcode_byte_size
109   ///     The size (in bytes) of the largest instruction in the list that
110   ///     we are printing (for text justification/alignment purposes)
111   ///     Only needed if show_bytes is true.
112   ///
113   /// \param[in] exe_ctx
114   ///     The current execution context, if available.  May be used in
115   ///     the assembling of the operands+comments for this instruction.
116   ///     Pass NULL if not applicable.
117   ///
118   /// \param[in] sym_ctx
119   ///     The SymbolContext for this instruction.
120   ///     Pass NULL if not available/computed.
121   ///     Only needed if show_address is true.
122   ///
123   /// \param[in] prev_sym_ctx
124   ///     The SymbolContext for the previous instruction.  Depending on
125   ///     the disassembly address format specification, a change in
126   ///     Symbol / Function may mean that a line is printed with the new
127   ///     symbol/function name.
128   ///     Pass NULL if unavailable, or if this is the first instruction of
129   ///     the InstructionList.
130   ///     Only needed if show_address is true.
131   ///
132   /// \param[in] disassembly_addr_format
133   ///     The format specification for how addresses are printed.
134   ///     Only needed if show_address is true.
135   ///
136   /// \param[in] max_address_text_size
137   ///     The length of the longest address string at the start of the
138   ///     disassembly line that will be printed (the
139   ///     Debugger::FormatDisassemblerAddress() string)
140   ///     so this method can properly align the instruction opcodes.
141   ///     May be 0 to indicate no indentation/alignment of the opcodes.
142   virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
143                     bool show_bytes, const ExecutionContext *exe_ctx,
144                     const SymbolContext *sym_ctx,
145                     const SymbolContext *prev_sym_ctx,
146                     const FormatEntity::Entry *disassembly_addr_format,
147                     size_t max_address_text_size);
148 
149   virtual bool DoesBranch() = 0;
150 
151   virtual bool HasDelaySlot();
152 
153   bool CanSetBreakpoint ();
154 
155   virtual size_t Decode(const Disassembler &disassembler,
156                         const DataExtractor &data,
157                         lldb::offset_t data_offset) = 0;
158 
159   virtual void SetDescription(llvm::StringRef) {
160   } // May be overridden in sub-classes that have descriptions.
161 
162   lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream,
163                                 OptionValue::Type data_type);
164 
165   lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream);
166 
167   bool DumpEmulation(const ArchSpec &arch);
168 
169   virtual bool TestEmulation(Stream *stream, const char *test_file_name);
170 
171   bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
172                EmulateInstruction::ReadMemoryCallback read_mem_callback,
173                EmulateInstruction::WriteMemoryCallback write_mem_calback,
174                EmulateInstruction::ReadRegisterCallback read_reg_callback,
175                EmulateInstruction::WriteRegisterCallback write_reg_callback);
176 
177   const Opcode &GetOpcode() const { return m_opcode; }
178 
179   uint32_t GetData(DataExtractor &data);
180 
181   struct Operand {
182     enum class Type {
183       Invalid = 0,
184       Register,
185       Immediate,
186       Dereference,
187       Sum,
188       Product
189     } m_type = Type::Invalid;
190     std::vector<Operand> m_children;
191     lldb::addr_t m_immediate = 0;
192     ConstString m_register;
193     bool m_negative = false;
194     bool m_clobbered = false;
195 
196     bool IsValid() { return m_type != Type::Invalid; }
197 
198     static Operand BuildRegister(ConstString &r);
199     static Operand BuildImmediate(lldb::addr_t imm, bool neg);
200     static Operand BuildImmediate(int64_t imm);
201     static Operand BuildDereference(const Operand &ref);
202     static Operand BuildSum(const Operand &lhs, const Operand &rhs);
203     static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
204   };
205 
206   virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
207     return false;
208   }
209 
210   virtual bool IsCall() { return false; }
211 
212 protected:
213   Address m_address; // The section offset address of this instruction
214                      // We include an address class in the Instruction class to
215                      // allow the instruction specify the
216                      // AddressClass::eCodeAlternateISA (currently used for
217                      // thumb), and also to specify data (AddressClass::eData).
218                      // The usual value will be AddressClass::eCode, but often
219                      // when disassembling memory, you might run into data.
220                      // This can help us to disassemble appropriately.
221 private:
222   AddressClass m_address_class; // Use GetAddressClass () accessor function!
223 
224 protected:
225   Opcode m_opcode; // The opcode for this instruction
226   std::string m_opcode_name;
227   std::string m_mnemonics;
228   std::string m_comment;
229   bool m_calculated_strings;
230 
231   void
232   CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
233     if (!m_calculated_strings) {
234       m_calculated_strings = true;
235       CalculateMnemonicOperandsAndComment(exe_ctx);
236     }
237   }
238 };
239 
240 namespace OperandMatchers {
241 std::function<bool(const Instruction::Operand &)>
242 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
243               std::function<bool(const Instruction::Operand &)> left,
244               std::function<bool(const Instruction::Operand &)> right);
245 
246 std::function<bool(const Instruction::Operand &)>
247 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
248              std::function<bool(const Instruction::Operand &)> child);
249 
250 std::function<bool(const Instruction::Operand &)>
251 MatchRegOp(const RegisterInfo &info);
252 
253 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
254 
255 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
256 
257 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
258 
259 std::function<bool(const Instruction::Operand &)>
260 MatchOpType(Instruction::Operand::Type type);
261 }
262 
263 class InstructionList {
264 public:
265   InstructionList();
266   ~InstructionList();
267 
268   size_t GetSize() const;
269 
270   uint32_t GetMaxOpcocdeByteSize() const;
271 
272   lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
273 
274   /// Get the instruction at the given address.
275   ///
276   /// \return
277   ///    A valid \a InstructionSP if the address could be found, or null
278   ///    otherwise.
279   lldb::InstructionSP GetInstructionAtAddress(const Address &addr);
280 
281   //------------------------------------------------------------------
282   /// Get the index of the next branch instruction.
283   ///
284   /// Given a list of instructions, find the next branch instruction
285   /// in the list by returning an index.
286   ///
287   /// @param[in] start
288   ///     The instruction index of the first instruction to check.
289   ///
290   /// @param[in] ignore_calls
291   ///     It true, then fine the first branch instruction that isn't
292   ///     a function call (a branch that calls and returns to the next
293   ///     instruction). If false, find the instruction index of any
294   ///     branch in the list.
295   ///
296   /// @param[out] found_calls
297   ///     If non-null, this will be set to true if any calls were found in
298   ///     extending the range.
299   ///
300   /// @return
301   ///     The instruction index of the first branch that is at or past
302   ///     \a start. Returns UINT32_MAX if no matching branches are
303   ///     found.
304   //------------------------------------------------------------------
305   uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
306                                            bool ignore_calls,
307                                            bool *found_calls) const;
308 
309   uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
310                                               Target &target);
311 
312   uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
313 
314   void Clear();
315 
316   void Append(lldb::InstructionSP &inst_sp);
317 
318   void Dump(Stream *s, bool show_address, bool show_bytes,
319             const ExecutionContext *exe_ctx);
320 
321 private:
322   typedef std::vector<lldb::InstructionSP> collection;
323   typedef collection::iterator iterator;
324   typedef collection::const_iterator const_iterator;
325 
326   collection m_instructions;
327 };
328 
329 class PseudoInstruction : public Instruction {
330 public:
331   PseudoInstruction();
332 
333   ~PseudoInstruction() override;
334 
335   bool DoesBranch() override;
336 
337   bool HasDelaySlot() override;
338 
339   void CalculateMnemonicOperandsAndComment(
340       const ExecutionContext *exe_ctx) override {
341     // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
342     // mnemonic into Instruction::m_mnemonics, and any comment into
343     // Instruction::m_comment
344   }
345 
346   size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
347                 lldb::offset_t data_offset) override;
348 
349   void SetOpcode(size_t opcode_size, void *opcode_data);
350 
351   void SetDescription(llvm::StringRef description) override;
352 
353 protected:
354   std::string m_description;
355 
356   PseudoInstruction(const PseudoInstruction &) = delete;
357   const PseudoInstruction &operator=(const PseudoInstruction &) = delete;
358 };
359 
360 class Disassembler : public std::enable_shared_from_this<Disassembler>,
361                      public PluginInterface {
362 public:
363   enum {
364     eOptionNone = 0u,
365     eOptionShowBytes = (1u << 0),
366     eOptionRawOuput = (1u << 1),
367     eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
368                                          // the current PC (mixed mode only)
369     eOptionMarkPCAddress =
370         (1u << 3) // Mark the disassembly line the contains the PC
371   };
372 
373   enum HexImmediateStyle {
374     eHexStyleC,
375     eHexStyleAsm,
376   };
377 
378   // FindPlugin should be lax about the flavor string (it is too annoying to
379   // have various internal uses of the disassembler fail because the global
380   // flavor string gets set wrong. Instead, if you get a flavor string you
381   // don't understand, use the default.  Folks who care to check can use the
382   // FlavorValidForArchSpec method on the disassembler they got back.
383   static lldb::DisassemblerSP
384   FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
385 
386   // This version will use the value in the Target settings if flavor is NULL;
387   static lldb::DisassemblerSP FindPluginForTarget(const Target &target,
388                                                   const ArchSpec &arch,
389                                                   const char *flavor,
390                                                   const char *plugin_name);
391 
392   struct Limit {
393     enum { Bytes, Instructions } kind;
394     lldb::addr_t value;
395   };
396 
397   static lldb::DisassemblerSP
398   DisassembleRange(const ArchSpec &arch, const char *plugin_name,
399                    const char *flavor, Target &target,
400                    const AddressRange &disasm_range, bool prefer_file_cache);
401 
402   static lldb::DisassemblerSP
403   DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
404                    const char *flavor, const Address &start, const void *bytes,
405                    size_t length, uint32_t max_num_instructions,
406                    bool data_from_file);
407 
408   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
409                           const char *plugin_name, const char *flavor,
410                           const ExecutionContext &exe_ctx, const Address &start,
411                           Limit limit, bool mixed_source_and_assembly,
412                           uint32_t num_mixed_context_lines, uint32_t options,
413                           Stream &strm);
414 
415   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
416                           StackFrame &frame, Stream &strm);
417 
418   // Constructors and Destructors
419   Disassembler(const ArchSpec &arch, const char *flavor);
420   ~Disassembler() override;
421 
422   void PrintInstructions(Debugger &debugger, const ArchSpec &arch,
423                          const ExecutionContext &exe_ctx,
424                          bool mixed_source_and_assembly,
425                          uint32_t num_mixed_context_lines, uint32_t options,
426                          Stream &strm);
427 
428   size_t ParseInstructions(Target &target, Address address, Limit limit,
429                            Stream *error_strm_ptr, bool prefer_file_cache);
430 
431   virtual size_t DecodeInstructions(const Address &base_addr,
432                                     const DataExtractor &data,
433                                     lldb::offset_t data_offset,
434                                     size_t num_instructions, bool append,
435                                     bool data_from_file) = 0;
436 
437   InstructionList &GetInstructionList();
438 
439   const InstructionList &GetInstructionList() const;
440 
441   const ArchSpec &GetArchitecture() const { return m_arch; }
442 
443   const char *GetFlavor() const { return m_flavor.c_str(); }
444 
445   virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
446                                       const char *flavor) = 0;
447 
448 protected:
449   // SourceLine and SourceLinesToDisplay structures are only used in the mixed
450   // source and assembly display methods internal to this class.
451 
452   struct SourceLine {
453     FileSpec file;
454     uint32_t line;
455     uint32_t column;
456 
457     SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {}
458 
459     bool operator==(const SourceLine &rhs) const {
460       return file == rhs.file && line == rhs.line && rhs.column == column;
461     }
462 
463     bool operator!=(const SourceLine &rhs) const {
464       return file != rhs.file || line != rhs.line || column != rhs.column;
465     }
466 
467     bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
468   };
469 
470   struct SourceLinesToDisplay {
471     std::vector<SourceLine> lines;
472 
473     // index of the "current" source line, if we want to highlight that when
474     // displaying the source lines.  (as opposed to the surrounding source
475     // lines provided to give context)
476     size_t current_source_line;
477 
478     // Whether to print a blank line at the end of the source lines.
479     bool print_source_context_end_eol;
480 
481     SourceLinesToDisplay()
482         : lines(), current_source_line(-1), print_source_context_end_eol(true) {
483     }
484   };
485 
486   // Get the function's declaration line number, hopefully a line number
487   // earlier than the opening curly brace at the start of the function body.
488   static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
489 
490   // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
491   static void AddLineToSourceLineTables(
492       SourceLine &line,
493       std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
494 
495   // Given a source line, determine if we should print it when we're doing
496   // mixed source & assembly output. We're currently using the
497   // target.process.thread.step-avoid-regexp setting (which is used for
498   // stepping over inlined STL functions by default) to determine what source
499   // lines to avoid showing.
500   //
501   // Returns true if this source line should be elided (if the source line
502   // should not be displayed).
503   static bool
504   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
505                                      const SymbolContext &sc, SourceLine &line);
506 
507   static bool
508   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
509                                      const SymbolContext &sc, LineEntry &line) {
510     SourceLine sl;
511     sl.file = line.file;
512     sl.line = line.line;
513     sl.column = line.column;
514     return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
515   };
516 
517   // Classes that inherit from Disassembler can see and modify these
518   ArchSpec m_arch;
519   InstructionList m_instruction_list;
520   lldb::addr_t m_base_addr;
521   std::string m_flavor;
522 
523 private:
524   // For Disassembler only
525   Disassembler(const Disassembler &) = delete;
526   const Disassembler &operator=(const Disassembler &) = delete;
527 };
528 
529 } // namespace lldb_private
530 
531 #endif // LLDB_CORE_DISASSEMBLER_H
532