1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11 
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/BinaryFormat/XCOFF.h"
15 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
16 #include <cstdint>
17 #include <memory>
18 #include <vector>
19 
20 namespace llvm {
21 
22 struct XCOFFSymbolInfo {
23   Optional<XCOFF::StorageMappingClass> StorageMappingClass;
24   Optional<uint32_t> Index;
25   bool IsLabel;
XCOFFSymbolInfoXCOFFSymbolInfo26   XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc,
27                   Optional<uint32_t> Idx, bool Label)
28       : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {}
29 
30   bool operator<(const XCOFFSymbolInfo &SymInfo) const;
31 };
32 
33 struct SymbolInfoTy {
34   uint64_t Addr;
35   StringRef Name;
36   union {
37     uint8_t Type;
38     XCOFFSymbolInfo XCOFFSymInfo;
39   };
40 
41 private:
42   bool IsXCOFF;
43 
44 public:
SymbolInfoTySymbolInfoTy45   SymbolInfoTy(uint64_t Addr, StringRef Name,
46                Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx,
47                bool Label)
48       : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {}
SymbolInfoTySymbolInfoTy49   SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
50       : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {}
isXCOFFSymbolInfoTy51   bool isXCOFF() const { return IsXCOFF; }
52 
53 private:
54   friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
55     assert(P1.IsXCOFF == P2.IsXCOFF &&
56            "P1.IsXCOFF should be equal to P2.IsXCOFF.");
57     if (P1.IsXCOFF)
58       return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
59              std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
60 
61     return std::tie(P1.Addr, P1.Name, P1.Type) <
62              std::tie(P2.Addr, P2.Name, P2.Type);
63   }
64 };
65 
66 using SectionSymbolsTy = std::vector<SymbolInfoTy>;
67 
68 template <typename T> class ArrayRef;
69 class MCContext;
70 class MCInst;
71 class MCSubtargetInfo;
72 class raw_ostream;
73 
74 /// Superclass for all disassemblers. Consumes a memory region and provides an
75 /// array of assembly instructions.
76 class MCDisassembler {
77 public:
78   /// Ternary decode status. Most backends will just use Fail and
79   /// Success, however some have a concept of an instruction with
80   /// understandable semantics but which is architecturally
81   /// incorrect. An example of this is ARM UNPREDICTABLE instructions
82   /// which are disassemblable but cause undefined behaviour.
83   ///
84   /// Because it makes sense to disassemble these instructions, there
85   /// is a "soft fail" failure mode that indicates the MCInst& is
86   /// valid but architecturally incorrect.
87   ///
88   /// The enum numbers are deliberately chosen such that reduction
89   /// from Success->SoftFail ->Fail can be done with a simple
90   /// bitwise-AND:
91   ///
92   ///   LEFT & TOP =  | Success       Unpredictable   Fail
93   ///   --------------+-----------------------------------
94   ///   Success       | Success       Unpredictable   Fail
95   ///   Unpredictable | Unpredictable Unpredictable   Fail
96   ///   Fail          | Fail          Fail            Fail
97   ///
98   /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
99   /// Success, SoftFail, Fail respectively.
100   enum DecodeStatus {
101     Fail = 0,
102     SoftFail = 1,
103     Success = 3
104   };
105 
MCDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx)106   MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
107     : Ctx(Ctx), STI(STI) {}
108 
109   virtual ~MCDisassembler();
110 
111   /// Returns the disassembly of a single instruction.
112   ///
113   /// \param Instr    - An MCInst to populate with the contents of the
114   ///                   instruction.
115   /// \param Size     - A value to populate with the size of the instruction, or
116   ///                   the number of bytes consumed while attempting to decode
117   ///                   an invalid instruction.
118   /// \param Address  - The address, in the memory space of region, of the first
119   ///                   byte of the instruction.
120   /// \param Bytes    - A reference to the actual bytes of the instruction.
121   /// \param CStream  - The stream to print comments and annotations on.
122   /// \return         - MCDisassembler::Success if the instruction is valid,
123   ///                   MCDisassembler::SoftFail if the instruction was
124   ///                                            disassemblable but invalid,
125   ///                   MCDisassembler::Fail if the instruction was invalid.
126   virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
127                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
128                                       raw_ostream &CStream) const = 0;
129 
130   /// Used to perform separate target specific disassembly for a particular
131   /// symbol. May parse any prelude that precedes instructions after the
132   /// start of a symbol, or the entire symbol.
133   /// This is used for example by WebAssembly to decode preludes.
134   ///
135   /// Base implementation returns None. So all targets by default ignore to
136   /// treat symbols separately.
137   ///
138   /// \param Symbol   - The symbol.
139   /// \param Size     - The number of bytes consumed.
140   /// \param Address  - The address, in the memory space of region, of the first
141   ///                   byte of the symbol.
142   /// \param Bytes    - A reference to the actual bytes at the symbol location.
143   /// \param CStream  - The stream to print comments and annotations on.
144   /// \return         - MCDisassembler::Success if bytes are decoded
145   ///                   successfully. Size must hold the number of bytes that
146   ///                   were decoded.
147   ///                 - MCDisassembler::Fail if the bytes are invalid. Size
148   ///                   must hold the number of bytes that were decoded before
149   ///                   failing. The target must print nothing. This can be
150   ///                   done by buffering the output if needed.
151   ///                 - None if the target doesn't want to handle the symbol
152   ///                   separately. Value of Size is ignored in this case.
153   virtual Optional<DecodeStatus>
154   onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
155                 uint64_t Address, raw_ostream &CStream) const;
156   // TODO:
157   // Implement similar hooks that can be used at other points during
158   // disassembly. Something along the following lines:
159   // - onBeforeInstructionDecode()
160   // - onAfterInstructionDecode()
161   // - onSymbolEnd()
162   // It should help move much of the target specific code from llvm-objdump to
163   // respective target disassemblers.
164 
165 private:
166   MCContext &Ctx;
167 
168 protected:
169   // Subtarget information, for instruction decoding predicates if required.
170   const MCSubtargetInfo &STI;
171   std::unique_ptr<MCSymbolizer> Symbolizer;
172 
173 public:
174   // Helpers around MCSymbolizer
175   bool tryAddingSymbolicOperand(MCInst &Inst,
176                                 int64_t Value,
177                                 uint64_t Address, bool IsBranch,
178                                 uint64_t Offset, uint64_t InstSize) const;
179 
180   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
181 
182   /// Set \p Symzer as the current symbolizer.
183   /// This takes ownership of \p Symzer, and deletes the previously set one.
184   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
185 
getContext()186   MCContext& getContext() const { return Ctx; }
187 
getSubtargetInfo()188   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
189 
190   // Marked mutable because we cache it inside the disassembler, rather than
191   // having to pass it around as an argument through all the autogenerated code.
192   mutable raw_ostream *CommentStream = nullptr;
193 };
194 
195 } // end namespace llvm
196 
197 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
198