1 //===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_READER_WRITER_MACHO_ARCH_HANDLER_H
10 #define LLD_READER_WRITER_MACHO_ARCH_HANDLER_H
11 
12 #include "Atoms.h"
13 #include "File.h"
14 #include "MachONormalizedFile.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Core/Error.h"
17 #include "lld/Core/Reference.h"
18 #include "lld/Core/Simple.h"
19 #include "lld/ReaderWriter/MachOLinkingContext.h"
20 #include "llvm/ADT/Triple.h"
21 
22 namespace lld {
23 namespace mach_o {
24 
25 ///
26 /// The ArchHandler class handles all architecture specific aspects of
27 /// mach-o linking.
28 ///
29 class ArchHandler {
30 public:
31   virtual ~ArchHandler();
32 
33   /// There is no public interface to subclasses of ArchHandler, so this
34   /// is the only way to instantiate an ArchHandler.
35   static std::unique_ptr<ArchHandler> create(MachOLinkingContext::Arch arch);
36 
37   /// Get (arch specific) kind strings used by Registry.
38   virtual const Registry::KindStrings *kindStrings() = 0;
39 
40   /// Convert mach-o Arch to Reference::KindArch.
41   virtual Reference::KindArch kindArch() = 0;
42 
43   /// Used by StubPass to update References to shared library functions
44   /// to be references to a stub.
45   virtual bool isCallSite(const Reference &) = 0;
46 
47   /// Used by GOTPass to locate GOT References
isGOTAccess(const Reference &,bool & canBypassGOT)48   virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) {
49     return false;
50   }
51 
52   /// Used by TLVPass to locate TLV References.
isTLVAccess(const Reference &)53   virtual bool isTLVAccess(const Reference &) const { return false; }
54 
55   /// Used by the TLVPass to update TLV References.
updateReferenceToTLV(const Reference *)56   virtual void updateReferenceToTLV(const Reference *) {}
57 
58   /// Used by ShimPass to insert shims in branches that switch mode.
59   virtual bool isNonCallBranch(const Reference &) = 0;
60 
61   /// Used by GOTPass to update GOT References
updateReferenceToGOT(const Reference *,bool targetIsNowGOT)62   virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {}
63 
64   /// Does this architecture make use of __unwind_info sections for exception
65   /// handling? If so, it will need a separate pass to create them.
66   virtual bool needsCompactUnwind() = 0;
67 
68   /// Returns the kind of reference to use to synthesize a 32-bit image-offset
69   /// value, used in the __unwind_info section.
70   virtual Reference::KindValue imageOffsetKind() = 0;
71 
72   /// Returns the kind of reference to use to synthesize a 32-bit image-offset
73   /// indirect value. Used for personality functions in the __unwind_info
74   /// section.
75   virtual Reference::KindValue imageOffsetKindIndirect() = 0;
76 
77   /// Architecture specific compact unwind type that signals __eh_frame should
78   /// actually be used.
79   virtual uint32_t dwarfCompactUnwindType() = 0;
80 
81   /// Reference from an __eh_frame CIE atom to its personality function it's
82   /// describing. Usually pointer-sized and PC-relative, but differs in whether
83   /// it needs to be in relocatable objects.
84   virtual Reference::KindValue unwindRefToPersonalityFunctionKind() = 0;
85 
86   /// Reference from an __eh_frame FDE to the CIE it's based on.
87   virtual Reference::KindValue unwindRefToCIEKind() = 0;
88 
89   /// Reference from an __eh_frame FDE atom to the function it's
90   /// describing. Usually pointer-sized and PC-relative, but differs in whether
91   /// it needs to be in relocatable objects.
92   virtual Reference::KindValue unwindRefToFunctionKind() = 0;
93 
94   /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the
95   /// required __eh_frame entry. On current architectures, the low 24 bits
96   /// represent the offset of the function's FDE entry from the start of
97   /// __eh_frame.
98   virtual Reference::KindValue unwindRefToEhFrameKind() = 0;
99 
100   /// Returns a pointer sized reference kind.  On 64-bit targets this will
101   /// likely be something like pointer64, and pointer32 on 32-bit targets.
102   virtual Reference::KindValue pointerKind() = 0;
103 
104   virtual const Atom *fdeTargetFunction(const DefinedAtom *fde);
105 
106   /// Used by normalizedFromAtoms() to know where to generated rebasing and
107   /// binding info in final executables.
108   virtual bool isPointer(const Reference &) = 0;
109 
110   /// Used by normalizedFromAtoms() to know where to generated lazy binding
111   /// info in final executables.
112   virtual bool isLazyPointer(const Reference &);
113 
114   /// Reference from an __stub_helper entry to the required offset of the
115   /// lazy bind commands.
116   virtual Reference::KindValue lazyImmediateLocationKind() = 0;
117 
118   /// Returns true if the specified relocation is paired to the next relocation.
119   virtual bool isPairedReloc(const normalized::Relocation &) = 0;
120 
121   /// Prototype for a helper function.  Given a sectionIndex and address,
122   /// finds the atom and offset with that atom of that address.
123   typedef std::function<llvm::Error (uint32_t sectionIndex, uint64_t addr,
124                         const lld::Atom **, Reference::Addend *)>
125                         FindAtomBySectionAndAddress;
126 
127   /// Prototype for a helper function.  Given a symbolIndex, finds the atom
128   /// representing that symbol.
129   typedef std::function<llvm::Error (uint32_t symbolIndex,
130                         const lld::Atom **)> FindAtomBySymbolIndex;
131 
132   /// Analyzes a relocation from a .o file and returns the info
133   /// (kind, target, addend) needed to instantiate a Reference.
134   /// Two helper functions are passed as parameters to find the target atom
135   /// given a symbol index or address.
136   virtual llvm::Error
137           getReferenceInfo(const normalized::Relocation &reloc,
138                            const DefinedAtom *inAtom,
139                            uint32_t offsetInAtom,
140                            uint64_t fixupAddress, bool isBigEndian,
141                            FindAtomBySectionAndAddress atomFromAddress,
142                            FindAtomBySymbolIndex atomFromSymbolIndex,
143                            Reference::KindValue *kind,
144                            const lld::Atom **target,
145                            Reference::Addend *addend) = 0;
146 
147   /// Analyzes a pair of relocations from a .o file and returns the info
148   /// (kind, target, addend) needed to instantiate a Reference.
149   /// Two helper functions are passed as parameters to find the target atom
150   /// given a symbol index or address.
151   virtual llvm::Error
152       getPairReferenceInfo(const normalized::Relocation &reloc1,
153                            const normalized::Relocation &reloc2,
154                            const DefinedAtom *inAtom,
155                            uint32_t offsetInAtom,
156                            uint64_t fixupAddress, bool isBig, bool scatterable,
157                            FindAtomBySectionAndAddress atomFromAddress,
158                            FindAtomBySymbolIndex atomFromSymbolIndex,
159                            Reference::KindValue *kind,
160                            const lld::Atom **target,
161                            Reference::Addend *addend) = 0;
162 
163   /// Prototype for a helper function.  Given an atom, finds the symbol table
164   /// index for it in the output file.
165   typedef std::function<uint32_t (const Atom &atom)> FindSymbolIndexForAtom;
166 
167   /// Prototype for a helper function.  Given an atom, finds the index
168   /// of the section that will contain the atom.
169   typedef std::function<uint32_t (const Atom &atom)> FindSectionIndexForAtom;
170 
171   /// Prototype for a helper function.  Given an atom, finds the address
172   /// assigned to it in the output file.
173   typedef std::function<uint64_t (const Atom &atom)> FindAddressForAtom;
174 
175   /// Some architectures require local symbols on anonymous atoms.
needsLocalSymbolInRelocatableFile(const DefinedAtom * atom)176   virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) {
177     return false;
178   }
179 
180   /// Copy raw content then apply all fixup References on an Atom.
181   virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable,
182                                    FindAddressForAtom findAddress,
183                                    FindAddressForAtom findSectionAddress,
184                                    uint64_t imageBaseAddress,
185                           llvm::MutableArrayRef<uint8_t> atomContentBuffer) = 0;
186 
187   /// Used in -r mode to convert a Reference to a mach-o relocation.
188   virtual void appendSectionRelocations(const DefinedAtom &atom,
189                                         uint64_t atomSectionOffset,
190                                         const Reference &ref,
191                                         FindSymbolIndexForAtom,
192                                         FindSectionIndexForAtom,
193                                         FindAddressForAtom,
194                                         normalized::Relocations&) = 0;
195 
196   /// Add arch-specific References.
addAdditionalReferences(MachODefinedAtom & atom)197   virtual void addAdditionalReferences(MachODefinedAtom &atom) { }
198 
199   // Add Reference for data-in-code marker.
addDataInCodeReference(MachODefinedAtom & atom,uint32_t atomOff,uint16_t length,uint16_t kind)200   virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff,
201                                       uint16_t length, uint16_t kind) { }
202 
203   /// Returns true if the specificed Reference value marks the start or end
204   /// of a data-in-code range in an atom.
isDataInCodeTransition(Reference::KindValue refKind)205   virtual bool isDataInCodeTransition(Reference::KindValue refKind) {
206     return false;
207   }
208 
209   /// Returns the Reference value for a Reference that marks that start of
210   /// a data-in-code range.
dataInCodeTransitionStart(const MachODefinedAtom & atom)211   virtual Reference::KindValue dataInCodeTransitionStart(
212                                                 const MachODefinedAtom &atom) {
213     return 0;
214   }
215 
216   /// Returns the Reference value for a Reference that marks that end of
217   /// a data-in-code range.
dataInCodeTransitionEnd(const MachODefinedAtom & atom)218   virtual Reference::KindValue dataInCodeTransitionEnd(
219                                                 const MachODefinedAtom &atom) {
220     return 0;
221   }
222 
223   /// Only relevant for 32-bit arm archs.
isThumbFunction(const DefinedAtom & atom)224   virtual bool isThumbFunction(const DefinedAtom &atom) { return false; }
225 
226   /// Only relevant for 32-bit arm archs.
createShim(MachOFile & file,bool thumbToArm,const DefinedAtom &)227   virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm,
228                                         const DefinedAtom &) {
229     llvm_unreachable("shims only support on arm");
230   }
231 
232   /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE).
233   static bool isDwarfCIE(bool isBig, const DefinedAtom *atom);
234 
235   struct ReferenceInfo {
236     Reference::KindArch arch;
237     uint16_t            kind;
238     uint32_t            offset;
239     int32_t             addend;
240   };
241 
242   struct OptionalRefInfo {
243     bool                used;
244     uint16_t            kind;
245     uint32_t            offset;
246     int32_t             addend;
247   };
248 
249   /// Table of architecture specific information for creating stubs.
250   struct StubInfo {
251     const char*     binderSymbolName;
252     ReferenceInfo   lazyPointerReferenceToHelper;
253     ReferenceInfo   lazyPointerReferenceToFinal;
254     ReferenceInfo   nonLazyPointerReferenceToBinder;
255     uint8_t         codeAlignment;
256 
257     uint32_t        stubSize;
258     uint8_t         stubBytes[16];
259     ReferenceInfo   stubReferenceToLP;
260     OptionalRefInfo optStubReferenceToLP;
261 
262     uint32_t        stubHelperSize;
263     uint8_t         stubHelperBytes[16];
264     ReferenceInfo   stubHelperReferenceToImm;
265     ReferenceInfo   stubHelperReferenceToHelperCommon;
266 
267     DefinedAtom::ContentType stubHelperImageCacheContentType;
268 
269     uint32_t        stubHelperCommonSize;
270     uint8_t         stubHelperCommonAlignment;
271     uint8_t         stubHelperCommonBytes[36];
272     ReferenceInfo   stubHelperCommonReferenceToCache;
273     OptionalRefInfo optStubHelperCommonReferenceToCache;
274     ReferenceInfo   stubHelperCommonReferenceToBinder;
275     OptionalRefInfo optStubHelperCommonReferenceToBinder;
276   };
277 
278   virtual const StubInfo &stubInfo() = 0;
279 
280 protected:
281   ArchHandler();
282 
283   static std::unique_ptr<mach_o::ArchHandler> create_x86_64();
284   static std::unique_ptr<mach_o::ArchHandler> create_x86();
285   static std::unique_ptr<mach_o::ArchHandler> create_arm();
286   static std::unique_ptr<mach_o::ArchHandler> create_arm64();
287 
288   // Handy way to pack mach-o r_type and other bit fields into one 16-bit value.
289   typedef uint16_t RelocPattern;
290   enum {
291     rScattered = 0x8000,
292     rPcRel     = 0x4000,
293     rExtern    = 0x2000,
294     rLength1   = 0x0000,
295     rLength2   = 0x0100,
296     rLength4   = 0x0200,
297     rLength8   = 0x0300,
298     rLenArmLo  = rLength1,
299     rLenArmHi  = rLength2,
300     rLenThmbLo = rLength4,
301     rLenThmbHi = rLength8
302   };
303   /// Extract RelocPattern from normalized mach-o relocation.
304   static RelocPattern relocPattern(const normalized::Relocation &reloc);
305   /// Create normalized Relocation initialized from pattern.
306   static normalized::Relocation relocFromPattern(RelocPattern pattern);
307   /// One liner to add a relocation.
308   static void appendReloc(normalized::Relocations &relocs, uint32_t offset,
309                           uint32_t symbol, uint32_t value,
310                           RelocPattern pattern);
311 
312 
313   static int16_t  readS16(const uint8_t *addr, bool isBig);
314   static int32_t  readS32(const uint8_t *addr, bool isBig);
315   static uint32_t readU32(const uint8_t *addr, bool isBig);
316   static int64_t  readS64(const uint8_t *addr, bool isBig);
317 };
318 
319 } // namespace mach_o
320 } // namespace lld
321 
322 #endif // LLD_READER_WRITER_MACHO_ARCH_HANDLER_H
323