1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
11
12 #include "InputFiles.h"
13 #include "InputSection.h"
14 #include "Target.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Strings.h"
17 #include "llvm/Object/Archive.h"
18 #include "llvm/Support/MathExtras.h"
19
20 namespace lld {
21 namespace macho {
22
23 class InputSection;
24 class MachHeaderSection;
25
26 struct StringRefZ {
StringRefZStringRefZ27 StringRefZ(const char *s) : data(s), size(-1) {}
StringRefZStringRefZ28 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
29
30 const char *data;
31 const uint32_t size;
32 };
33
34 class Symbol {
35 public:
36 enum Kind {
37 DefinedKind,
38 UndefinedKind,
39 CommonKind,
40 DylibKind,
41 LazyKind,
42 };
43
~Symbol()44 virtual ~Symbol() {}
45
kind()46 Kind kind() const { return symbolKind; }
47
getName()48 StringRef getName() const {
49 if (nameSize == (uint32_t)-1)
50 nameSize = strlen(nameData);
51 return {nameData, nameSize};
52 }
53
54 bool isLive() const;
55
getVA()56 virtual uint64_t getVA() const { return 0; }
57
isWeakDef()58 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
59
60 // Only undefined or dylib symbols can be weak references. A weak reference
61 // need not be satisfied at runtime, e.g. due to the symbol not being
62 // available on a given target platform.
isWeakRef()63 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
64
isTlv()65 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
66
67 // Whether this symbol is in the GOT or TLVPointer sections.
isInGot()68 bool isInGot() const { return gotIndex != UINT32_MAX; }
69
70 // Whether this symbol is in the StubsSection.
isInStubs()71 bool isInStubs() const { return stubsIndex != UINT32_MAX; }
72
73 uint64_t getStubVA() const;
74 uint64_t getGotVA() const;
75 uint64_t getTlvVA() const;
resolveBranchVA()76 uint64_t resolveBranchVA() const {
77 assert(isa<Defined>(this) || isa<DylibSymbol>(this));
78 return isInStubs() ? getStubVA() : getVA();
79 }
resolveGotVA()80 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
resolveTlvVA()81 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
82
83 // The index of this symbol in the GOT or the TLVPointer section, depending
84 // on whether it is a thread-local. A given symbol cannot be referenced by
85 // both these sections at once.
86 uint32_t gotIndex = UINT32_MAX;
87
88 uint32_t stubsIndex = UINT32_MAX;
89
90 uint32_t symtabIndex = UINT32_MAX;
91
getFile()92 InputFile *getFile() const { return file; }
93
94 protected:
Symbol(Kind k,StringRefZ name,InputFile * file)95 Symbol(Kind k, StringRefZ name, InputFile *file)
96 : symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
97 isUsedInRegularObj(!file || isa<ObjFile>(file)),
98 used(!config->deadStrip) {}
99
100 Kind symbolKind;
101 const char *nameData;
102 mutable uint32_t nameSize;
103 InputFile *file;
104
105 public:
106 // True if this symbol was referenced by a regular (non-bitcode) object.
107 bool isUsedInRegularObj : 1;
108
109 // True if an undefined or dylib symbol is used from a live section.
110 bool used : 1;
111 };
112
113 class Defined : public Symbol {
114 public:
Defined(StringRefZ name,InputFile * file,InputSection * isec,uint64_t value,uint64_t size,bool isWeakDef,bool isExternal,bool isPrivateExtern,bool isThumb,bool isReferencedDynamically,bool noDeadStrip)115 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
116 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
117 bool isThumb, bool isReferencedDynamically, bool noDeadStrip)
118 : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
119 overridesWeakDef(false), privateExtern(isPrivateExtern),
120 includeInSymtab(true), thumb(isThumb),
121 referencedDynamically(isReferencedDynamically),
122 noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) {
123 if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
124 concatIsec->numRefs++;
125 }
126
isWeakDef()127 bool isWeakDef() const override { return weakDef; }
isExternalWeakDef()128 bool isExternalWeakDef() const {
129 return isWeakDef() && isExternal() && !privateExtern;
130 }
isTlv()131 bool isTlv() const override {
132 return !isAbsolute() && isThreadLocalVariables(isec->getFlags());
133 }
134
isExternal()135 bool isExternal() const { return external; }
isAbsolute()136 bool isAbsolute() const { return isec == nullptr; }
137
138 uint64_t getVA() const override;
139
classof(const Symbol * s)140 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
141
142 InputSection *isec;
143 // Contains the offset from the containing subsection. Note that this is
144 // different from nlist::n_value, which is the absolute address of the symbol.
145 uint64_t value;
146 // size is only calculated for regular (non-bitcode) symbols.
147 uint64_t size;
148
149 bool overridesWeakDef : 1;
150 // Whether this symbol should appear in the output binary's export trie.
151 bool privateExtern : 1;
152 // Whether this symbol should appear in the output symbol table.
153 bool includeInSymtab : 1;
154 // Only relevant when compiling for Thumb-supporting arm32 archs.
155 bool thumb : 1;
156 // Symbols marked referencedDynamically won't be removed from the output's
157 // symbol table by tools like strip. In theory, this could be set on arbitrary
158 // symbols in input object files. In practice, it's used solely for the
159 // synthetic __mh_execute_header symbol.
160 // This is information for the static linker, and it's also written to the
161 // output file's symbol table for tools running later (such as `strip`).
162 bool referencedDynamically : 1;
163 // Set on symbols that should not be removed by dead code stripping.
164 // Set for example on `__attribute__((used))` globals, or on some Objective-C
165 // metadata. This is information only for the static linker and not written
166 // to the output.
167 bool noDeadStrip : 1;
168
169 private:
170 const bool weakDef : 1;
171 const bool external : 1;
172 };
173
174 // This enum does double-duty: as a symbol property, it indicates whether & how
175 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
176 // of referenced symbols contained within the file. If there are both weak
177 // and strong references to the same file, we will count the file as
178 // strongly-referenced.
179 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
180
181 class Undefined : public Symbol {
182 public:
Undefined(StringRefZ name,InputFile * file,RefState refState)183 Undefined(StringRefZ name, InputFile *file, RefState refState)
184 : Symbol(UndefinedKind, name, file), refState(refState) {
185 assert(refState != RefState::Unreferenced);
186 }
187
isWeakRef()188 bool isWeakRef() const override { return refState == RefState::Weak; }
189
classof(const Symbol * s)190 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
191
192 RefState refState : 2;
193 };
194
195 // On Unix, it is traditionally allowed to write variable definitions without
196 // initialization expressions (such as "int foo;") to header files. These are
197 // called tentative definitions.
198 //
199 // Using tentative definitions is usually considered a bad practice; you should
200 // write only declarations (such as "extern int foo;") to header files.
201 // Nevertheless, the linker and the compiler have to do something to support
202 // bad code by allowing duplicate definitions for this particular case.
203 //
204 // The compiler creates common symbols when it sees tentative definitions.
205 // (You can suppress this behavior and let the compiler create a regular
206 // defined symbol by passing -fno-common. -fno-common is the default in clang
207 // as of LLVM 11.0.) When linking the final binary, if there are remaining
208 // common symbols after name resolution is complete, the linker converts them
209 // to regular defined symbols in a __common section.
210 class CommonSymbol : public Symbol {
211 public:
CommonSymbol(StringRefZ name,InputFile * file,uint64_t size,uint32_t align,bool isPrivateExtern)212 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
213 bool isPrivateExtern)
214 : Symbol(CommonKind, name, file), size(size),
215 align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
216 privateExtern(isPrivateExtern) {
217 // TODO: cap maximum alignment
218 }
219
classof(const Symbol * s)220 static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
221
222 const uint64_t size;
223 const uint32_t align;
224 const bool privateExtern;
225 };
226
227 class DylibSymbol : public Symbol {
228 public:
DylibSymbol(DylibFile * file,StringRefZ name,bool isWeakDef,RefState refState,bool isTlv)229 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
230 RefState refState, bool isTlv)
231 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
232 tlv(isTlv) {
233 if (file && refState > RefState::Unreferenced)
234 file->numReferencedSymbols++;
235 }
236
237 uint64_t getVA() const override;
isWeakDef()238 bool isWeakDef() const override { return weakDef; }
isWeakRef()239 bool isWeakRef() const override { return refState == RefState::Weak; }
isReferenced()240 bool isReferenced() const { return refState != RefState::Unreferenced; }
isTlv()241 bool isTlv() const override { return tlv; }
isDynamicLookup()242 bool isDynamicLookup() const { return file == nullptr; }
hasStubsHelper()243 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
244
getFile()245 DylibFile *getFile() const {
246 assert(!isDynamicLookup());
247 return cast<DylibFile>(file);
248 }
249
classof(const Symbol * s)250 static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
251
252 uint32_t stubsHelperIndex = UINT32_MAX;
253 uint32_t lazyBindOffset = UINT32_MAX;
254
getRefState()255 RefState getRefState() const { return refState; }
256
reference(RefState newState)257 void reference(RefState newState) {
258 assert(newState > RefState::Unreferenced);
259 if (refState == RefState::Unreferenced && file)
260 getFile()->numReferencedSymbols++;
261 refState = std::max(refState, newState);
262 }
263
unreference()264 void unreference() {
265 // dynamic_lookup symbols have no file.
266 if (refState > RefState::Unreferenced && file) {
267 assert(getFile()->numReferencedSymbols > 0);
268 getFile()->numReferencedSymbols--;
269 }
270 }
271
272 private:
273 RefState refState : 2;
274 const bool weakDef : 1;
275 const bool tlv : 1;
276 };
277
278 class LazySymbol : public Symbol {
279 public:
LazySymbol(ArchiveFile * file,const llvm::object::Archive::Symbol & sym)280 LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
281 : Symbol(LazyKind, sym.getName(), file), sym(sym) {}
282
getFile()283 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
284 void fetchArchiveMember();
285
classof(const Symbol * s)286 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
287
288 private:
289 const llvm::object::Archive::Symbol sym;
290 };
291
292 union SymbolUnion {
293 alignas(Defined) char a[sizeof(Defined)];
294 alignas(Undefined) char b[sizeof(Undefined)];
295 alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
296 alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
297 alignas(LazySymbol) char e[sizeof(LazySymbol)];
298 };
299
300 template <typename T, typename... ArgT>
replaceSymbol(Symbol * s,ArgT &&...arg)301 T *replaceSymbol(Symbol *s, ArgT &&...arg) {
302 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
303 static_assert(alignof(T) <= alignof(SymbolUnion),
304 "SymbolUnion not aligned enough");
305 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
306 "Not a Symbol");
307
308 bool isUsedInRegularObj = s->isUsedInRegularObj;
309 bool used = s->used;
310 T *sym = new (s) T(std::forward<ArgT>(arg)...);
311 sym->isUsedInRegularObj |= isUsedInRegularObj;
312 sym->used |= used;
313 return sym;
314 }
315
316 } // namespace macho
317
318 std::string toString(const macho::Symbol &);
319 std::string toMachOString(const llvm::object::Archive::Symbol &);
320
321 } // namespace lld
322
323 #endif
324