1 //===- llvm/MC/MCSymbolizer.h - MCSymbolizer class --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the MCSymbolizer class, which is used
10 // to symbolize instructions decoded from an object, that is, transform their
11 // immediate operands to MCExprs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
16 #define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
20 #include <algorithm>
21 #include <cstdint>
22 #include <memory>
23 
24 namespace llvm {
25 
26 class MCContext;
27 class MCInst;
28 class raw_ostream;
29 
30 /// Symbolize and annotate disassembled instructions.
31 ///
32 /// For now this mimics the old symbolization logic (from both ARM and x86), that
33 /// relied on user-provided (C API) callbacks to do the actual symbol lookup in
34 /// the object file. This was moved to MCExternalSymbolizer.
35 /// A better API would not rely on actually calling the two methods here from
36 /// inside each disassembler, but would use the instr info to determine what
37 /// operands are actually symbolizable, and in what way. I don't think this
38 /// information exists right now.
39 class MCSymbolizer {
40 protected:
41   MCContext &Ctx;
42   std::unique_ptr<MCRelocationInfo> RelInfo;
43 
44 public:
45   /// Construct an MCSymbolizer, taking ownership of \p RelInfo.
MCSymbolizer(MCContext & Ctx,std::unique_ptr<MCRelocationInfo> RelInfo)46   MCSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo)
47     : Ctx(Ctx), RelInfo(std::move(RelInfo)) {
48   }
49 
50   MCSymbolizer(const MCSymbolizer &) = delete;
51   MCSymbolizer &operator=(const MCSymbolizer &) = delete;
52   virtual ~MCSymbolizer();
53 
54   /// Try to add a symbolic operand instead of \p Value to the MCInst.
55   ///
56   /// Instead of having a difficult to read immediate, a symbolic operand would
57   /// represent this immediate in a more understandable way, for instance as a
58   /// symbol or an offset from a symbol. Relocations can also be used to enrich
59   /// the symbolic expression.
60   /// \param Inst      - The MCInst where to insert the symbolic operand.
61   /// \param cStream   - Stream to print comments and annotations on.
62   /// \param Value     - Operand value, pc-adjusted by the caller if necessary.
63   /// \param Address   - Load address of the instruction.
64   /// \param IsBranch  - Is the instruction a branch?
65   /// \param Offset    - Byte offset of the operand inside the inst.
66   /// \param InstSize  - Size of the instruction in bytes.
67   /// \return Whether a symbolic operand was added.
68   virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
69                                         int64_t Value, uint64_t Address,
70                                         bool IsBranch, uint64_t Offset,
71                                         uint64_t InstSize) = 0;
72 
73   /// Try to add a comment on the PC-relative load.
74   /// For instance, in Mach-O, this is used to add annotations to instructions
75   /// that use C string literals, as found in __cstring.
76   virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
77                                                int64_t Value,
78                                                uint64_t Address) = 0;
79 
80   /// Get the MCSymbolizer's list of addresses that were referenced by
81   /// symbolizable operands but not resolved to a symbol. The caller (some
82   /// code that is disassembling a section or other chunk of code) would
83   /// typically create a synthetic label at each address and add them to its
84   /// list of symbols in the section, before creating a new MCSymbolizer with
85   /// the enhanced symbol list and retrying disassembling the section.
86   /// The returned array is unordered and may have duplicates.
87   /// The returned ArrayRef stops being valid on any call to or destruction of
88   /// the MCSymbolizer object.
getReferencedAddresses()89   virtual ArrayRef<uint64_t> getReferencedAddresses() const { return {}; }
90 };
91 
92 } // end namespace llvm
93 
94 #endif // LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
95