1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
11 
12 #include "InputSection.h"
13 #include "Target.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/Object/Archive.h"
17 #include "llvm/Support/MathExtras.h"
18 
19 namespace lld {
20 namespace macho {
21 
22 class InputSection;
23 class MachHeaderSection;
24 class DylibFile;
25 class ArchiveFile;
26 
27 struct StringRefZ {
28   StringRefZ(const char *s) : data(s), size(-1) {}
29   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
30 
31   const char *data;
32   const uint32_t size;
33 };
34 
35 class Symbol {
36 public:
37   enum Kind {
38     DefinedKind,
39     UndefinedKind,
40     CommonKind,
41     DylibKind,
42     LazyKind,
43     DSOHandleKind,
44   };
45 
46   virtual ~Symbol() {}
47 
48   Kind kind() const { return static_cast<Kind>(symbolKind); }
49 
50   StringRef getName() const {
51     if (nameSize == (uint32_t)-1)
52       nameSize = strlen(nameData);
53     return {nameData, nameSize};
54   }
55 
56   virtual uint64_t getVA() const { return 0; }
57 
58   virtual uint64_t getFileOffset() const {
59     llvm_unreachable("attempt to get an offset from a non-defined symbol");
60   }
61 
62   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
63 
64   // Only undefined or dylib symbols can be weak references. A weak reference
65   // need not be satisfied at runtime, e.g. due to the symbol not being
66   // available on a given target platform.
67   virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
68 
69   virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
70 
71   // Whether this symbol is in the GOT or TLVPointer sections.
72   bool isInGot() const { return gotIndex != UINT32_MAX; }
73 
74   // Whether this symbol is in the StubsSection.
75   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
76 
77   // The index of this symbol in the GOT or the TLVPointer section, depending
78   // on whether it is a thread-local. A given symbol cannot be referenced by
79   // both these sections at once.
80   uint32_t gotIndex = UINT32_MAX;
81 
82   uint32_t stubsIndex = UINT32_MAX;
83 
84   uint32_t symtabIndex = UINT32_MAX;
85 
86 protected:
87   Symbol(Kind k, StringRefZ name)
88       : symbolKind(k), nameData(name.data), nameSize(name.size) {}
89 
90   Kind symbolKind;
91   const char *nameData;
92   mutable uint32_t nameSize;
93 };
94 
95 class Defined : public Symbol {
96 public:
97   Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef,
98           bool isExternal, bool isPrivateExtern)
99       : Symbol(DefinedKind, name), isec(isec), value(value),
100         overridesWeakDef(false), privateExtern(isPrivateExtern),
101         weakDef(isWeakDef), external(isExternal) {}
102 
103   bool isWeakDef() const override { return weakDef; }
104   bool isExternalWeakDef() const {
105     return isWeakDef() && isExternal() && !privateExtern;
106   }
107   bool isTlv() const override {
108     return !isAbsolute() && isThreadLocalVariables(isec->flags);
109   }
110 
111   bool isExternal() const { return external; }
112   bool isAbsolute() const { return isec == nullptr; }
113 
114   uint64_t getVA() const override;
115   uint64_t getFileOffset() const override;
116 
117   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
118 
119   InputSection *isec;
120   uint32_t value;
121 
122   bool overridesWeakDef : 1;
123   bool privateExtern : 1;
124 
125 private:
126   const bool weakDef : 1;
127   const bool external : 1;
128 };
129 
130 // This enum does double-duty: as a symbol property, it indicates whether & how
131 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
132 // of referenced symbols contained within the file. If there are both weak
133 // and strong references to the same file, we will count the file as
134 // strongly-referenced.
135 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
136 
137 class Undefined : public Symbol {
138 public:
139   Undefined(StringRefZ name, RefState refState)
140       : Symbol(UndefinedKind, name), refState(refState) {
141     assert(refState != RefState::Unreferenced);
142   }
143 
144   bool isWeakRef() const override { return refState == RefState::Weak; }
145 
146   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
147 
148   RefState refState : 2;
149 };
150 
151 // On Unix, it is traditionally allowed to write variable definitions without
152 // initialization expressions (such as "int foo;") to header files. These are
153 // called tentative definitions.
154 //
155 // Using tentative definitions is usually considered a bad practice; you should
156 // write only declarations (such as "extern int foo;") to header files.
157 // Nevertheless, the linker and the compiler have to do something to support
158 // bad code by allowing duplicate definitions for this particular case.
159 //
160 // The compiler creates common symbols when it sees tentative definitions.
161 // (You can suppress this behavior and let the compiler create a regular
162 // defined symbol by passing -fno-common. -fno-common is the default in clang
163 // as of LLVM 11.0.) When linking the final binary, if there are remaining
164 // common symbols after name resolution is complete, the linker converts them
165 // to regular defined symbols in a __common section.
166 class CommonSymbol : public Symbol {
167 public:
168   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
169                bool isPrivateExtern)
170       : Symbol(CommonKind, name), file(file), size(size),
171         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
172         privateExtern(isPrivateExtern) {
173     // TODO: cap maximum alignment
174   }
175 
176   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
177 
178   InputFile *const file;
179   const uint64_t size;
180   const uint32_t align;
181   const bool privateExtern;
182 };
183 
184 class DylibSymbol : public Symbol {
185 public:
186   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
187               RefState refState, bool isTlv)
188       : Symbol(DylibKind, name), file(file), refState(refState),
189         weakDef(isWeakDef), tlv(isTlv) {}
190 
191   bool isWeakDef() const override { return weakDef; }
192   bool isWeakRef() const override { return refState == RefState::Weak; }
193   bool isReferenced() const { return refState != RefState::Unreferenced; }
194   bool isTlv() const override { return tlv; }
195   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
196 
197   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
198 
199   DylibFile *file;
200   uint32_t stubsHelperIndex = UINT32_MAX;
201   uint32_t lazyBindOffset = UINT32_MAX;
202 
203   RefState refState : 2;
204 
205 private:
206   const bool weakDef : 1;
207   const bool tlv : 1;
208 };
209 
210 class LazySymbol : public Symbol {
211 public:
212   LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
213       : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
214 
215   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
216 
217   void fetchArchiveMember();
218 
219 private:
220   ArchiveFile *file;
221   const llvm::object::Archive::Symbol sym;
222 };
223 
224 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
225 // does e.g. cleanup of static global variables. The ABI document says that the
226 // pointer can point to any address in one of the dylib's segments, but in
227 // practice ld64 seems to set it to point to the header, so that's what's
228 // implemented here.
229 //
230 // The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
231 // tested this on an ARM platform.
232 //
233 // DSOHandle effectively functions like a Defined symbol, but it doesn't belong
234 // to an InputSection.
235 class DSOHandle : public Symbol {
236 public:
237   DSOHandle(const MachHeaderSection *header)
238       : Symbol(DSOHandleKind, name), header(header) {}
239 
240   const MachHeaderSection *header;
241 
242   uint64_t getVA() const override;
243 
244   uint64_t getFileOffset() const override;
245 
246   bool isWeakDef() const override { return false; }
247 
248   bool isTlv() const override { return false; }
249 
250   static constexpr StringRef name = "___dso_handle";
251 
252   static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; }
253 };
254 
255 union SymbolUnion {
256   alignas(Defined) char a[sizeof(Defined)];
257   alignas(Undefined) char b[sizeof(Undefined)];
258   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
259   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
260   alignas(LazySymbol) char e[sizeof(LazySymbol)];
261   alignas(DSOHandle) char f[sizeof(DSOHandle)];
262 };
263 
264 template <typename T, typename... ArgT>
265 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
266   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
267   static_assert(alignof(T) <= alignof(SymbolUnion),
268                 "SymbolUnion not aligned enough");
269   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
270          "Not a Symbol");
271 
272   return new (s) T(std::forward<ArgT>(arg)...);
273 }
274 
275 } // namespace macho
276 
277 std::string toString(const macho::Symbol &);
278 std::string toMachOString(const llvm::object::Archive::Symbol &);
279 
280 } // namespace lld
281 
282 #endif
283