1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
11
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/Wasm.h"
16
17 namespace lld {
18 namespace wasm {
19
20 // Shared string constants
21
22 // The default module name to use for symbol imports.
23 extern const char *defaultModule;
24
25 // The name under which to import or export the wasm table.
26 extern const char *functionTableName;
27
28 using llvm::wasm::WasmSymbolType;
29
30 class InputFile;
31 class InputChunk;
32 class InputSegment;
33 class InputFunction;
34 class InputGlobal;
35 class InputEvent;
36 class InputSection;
37 class OutputSection;
38
39 #define INVALID_INDEX UINT32_MAX
40
41 // The base class for real symbol classes.
42 class Symbol {
43 public:
44 enum Kind : uint8_t {
45 DefinedFunctionKind,
46 DefinedDataKind,
47 DefinedGlobalKind,
48 DefinedEventKind,
49 SectionKind,
50 OutputSectionKind,
51 UndefinedFunctionKind,
52 UndefinedDataKind,
53 UndefinedGlobalKind,
54 LazyKind,
55 };
56
kind()57 Kind kind() const { return symbolKind; }
58
isDefined()59 bool isDefined() const { return !isLazy() && !isUndefined(); }
60
isUndefined()61 bool isUndefined() const {
62 return symbolKind == UndefinedFunctionKind ||
63 symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind;
64 }
65
isLazy()66 bool isLazy() const { return symbolKind == LazyKind; }
67
68 bool isLocal() const;
69 bool isWeak() const;
70 bool isHidden() const;
71
72 // Returns true if this symbol exists in a discarded (due to COMDAT) section
73 bool isDiscarded() const;
74
75 // True if this is an undefined weak symbol. This only works once
76 // all input files have been added.
isUndefWeak()77 bool isUndefWeak() const {
78 // See comment on lazy symbols for details.
79 return isWeak() && (isUndefined() || isLazy());
80 }
81
82 // Returns the symbol name.
getName()83 StringRef getName() const { return name; }
84
85 // Returns the file from which this symbol was created.
getFile()86 InputFile *getFile() const { return file; }
87
getFlags()88 uint32_t getFlags() const { return flags; }
89
90 InputChunk *getChunk() const;
91
92 // Indicates that the section or import for this symbol will be included in
93 // the final image.
94 bool isLive() const;
95
96 // Marks the symbol's InputChunk as Live, so that it will be included in the
97 // final image.
98 void markLive();
99
100 void setHidden(bool isHidden);
101
102 // Get/set the index in the output symbol table. This is only used for
103 // relocatable output.
104 uint32_t getOutputSymbolIndex() const;
105 void setOutputSymbolIndex(uint32_t index);
106
107 WasmSymbolType getWasmType() const;
108 bool isExported() const;
109
110 // Indicates that the symbol is used in an __attribute__((used)) directive
111 // or similar.
112 bool isNoStrip() const;
113
114 const WasmSignature* getSignature() const;
115
getGOTIndex()116 uint32_t getGOTIndex() const {
117 assert(gotIndex != INVALID_INDEX);
118 return gotIndex;
119 }
120
121 void setGOTIndex(uint32_t index);
hasGOTIndex()122 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
123
124 protected:
Symbol(StringRef name,Kind k,uint32_t flags,InputFile * f)125 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
126 : name(name), file(f), flags(flags), symbolKind(k),
127 referenced(!config->gcSections), requiresGOT(false),
128 isUsedInRegularObj(false), forceExport(false), canInline(false),
129 traced(false) {}
130
131 StringRef name;
132 InputFile *file;
133 uint32_t flags;
134 uint32_t outputSymbolIndex = INVALID_INDEX;
135 uint32_t gotIndex = INVALID_INDEX;
136 Kind symbolKind;
137
138 public:
139 bool referenced : 1;
140
141 // True for data symbols that needs a dummy GOT entry. Used for static
142 // linking of GOT accesses.
143 bool requiresGOT : 1;
144
145 // True if the symbol was used for linking and thus need to be added to the
146 // output file's symbol table. This is true for all symbols except for
147 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
148 // are unreferenced except by other bitcode objects.
149 bool isUsedInRegularObj : 1;
150
151 // True if ths symbol is explicitly marked for export (i.e. via the
152 // -e/--export command line flag)
153 bool forceExport : 1;
154
155 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
156 // is overwritten after LTO, LTO shouldn't inline the symbol because it
157 // doesn't know the final contents of the symbol.
158 bool canInline : 1;
159
160 // True if this symbol is specified by --trace-symbol option.
161 bool traced : 1;
162 };
163
164 class FunctionSymbol : public Symbol {
165 public:
classof(const Symbol * s)166 static bool classof(const Symbol *s) {
167 return s->kind() == DefinedFunctionKind ||
168 s->kind() == UndefinedFunctionKind;
169 }
170
171 // Get/set the table index
172 void setTableIndex(uint32_t index);
173 uint32_t getTableIndex() const;
174 bool hasTableIndex() const;
175
176 // Get/set the function index
177 uint32_t getFunctionIndex() const;
178 void setFunctionIndex(uint32_t index);
179 bool hasFunctionIndex() const;
180
181 const WasmSignature *signature;
182
183 protected:
FunctionSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmSignature * sig)184 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
185 const WasmSignature *sig)
186 : Symbol(name, k, flags, f), signature(sig) {}
187
188 uint32_t tableIndex = INVALID_INDEX;
189 uint32_t functionIndex = INVALID_INDEX;
190 };
191
192 class DefinedFunction : public FunctionSymbol {
193 public:
194 DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
195 InputFunction *function);
196
classof(const Symbol * s)197 static bool classof(const Symbol *s) {
198 return s->kind() == DefinedFunctionKind;
199 }
200
201 InputFunction *function;
202 };
203
204 class UndefinedFunction : public FunctionSymbol {
205 public:
206 UndefinedFunction(StringRef name, StringRef importName,
207 StringRef importModule, uint32_t flags,
208 InputFile *file = nullptr,
209 const WasmSignature *type = nullptr,
210 bool isCalledDirectly = true)
FunctionSymbol(name,UndefinedFunctionKind,flags,file,type)211 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
212 importName(importName), importModule(importModule), isCalledDirectly(isCalledDirectly) {}
213
classof(const Symbol * s)214 static bool classof(const Symbol *s) {
215 return s->kind() == UndefinedFunctionKind;
216 }
217
218 StringRef importName;
219 StringRef importModule;
220 bool isCalledDirectly;
221 };
222
223 // Section symbols for output sections are different from those for input
224 // section. These are generated by the linker and point the OutputSection
225 // rather than an InputSection.
226 class OutputSectionSymbol : public Symbol {
227 public:
OutputSectionSymbol(const OutputSection * s)228 OutputSectionSymbol(const OutputSection *s)
229 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
230 nullptr),
231 section(s) {}
232
classof(const Symbol * s)233 static bool classof(const Symbol *s) {
234 return s->kind() == OutputSectionKind;
235 }
236
237 const OutputSection *section;
238 };
239
240 class SectionSymbol : public Symbol {
241 public:
242 SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
243 : Symbol("", SectionKind, flags, f), section(s) {}
244
classof(const Symbol * s)245 static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
246
247 const OutputSectionSymbol *getOutputSectionSymbol() const;
248
249 const InputSection *section;
250 };
251
252 class DataSymbol : public Symbol {
253 public:
classof(const Symbol * s)254 static bool classof(const Symbol *s) {
255 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
256 }
257
258 protected:
DataSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f)259 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
260 : Symbol(name, k, flags, f) {}
261 };
262
263 class DefinedData : public DataSymbol {
264 public:
265 // Constructor for regular data symbols originating from input files.
DefinedData(StringRef name,uint32_t flags,InputFile * f,InputSegment * segment,uint32_t offset,uint32_t size)266 DefinedData(StringRef name, uint32_t flags, InputFile *f,
267 InputSegment *segment, uint32_t offset, uint32_t size)
268 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
269 offset(offset), size(size) {}
270
271 // Constructor for linker synthetic data symbols.
DefinedData(StringRef name,uint32_t flags)272 DefinedData(StringRef name, uint32_t flags)
273 : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
274
classof(const Symbol * s)275 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
276
277 // Returns the output virtual address of a defined data symbol.
278 uint32_t getVirtualAddress() const;
279 void setVirtualAddress(uint32_t va);
280
281 // Returns the offset of a defined data symbol within its OutputSegment.
282 uint32_t getOutputSegmentOffset() const;
283 uint32_t getOutputSegmentIndex() const;
getSize()284 uint32_t getSize() const { return size; }
285
286 InputSegment *segment = nullptr;
287
288 protected:
289 uint32_t offset = 0;
290 uint32_t size = 0;
291 };
292
293 class UndefinedData : public DataSymbol {
294 public:
295 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
DataSymbol(name,UndefinedDataKind,flags,file)296 : DataSymbol(name, UndefinedDataKind, flags, file) {}
classof(const Symbol * s)297 static bool classof(const Symbol *s) {
298 return s->kind() == UndefinedDataKind;
299 }
300 };
301
302 class GlobalSymbol : public Symbol {
303 public:
classof(const Symbol * s)304 static bool classof(const Symbol *s) {
305 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
306 }
307
getGlobalType()308 const WasmGlobalType *getGlobalType() const { return globalType; }
309
310 // Get/set the global index
311 uint32_t getGlobalIndex() const;
312 void setGlobalIndex(uint32_t index);
313 bool hasGlobalIndex() const;
314
315 protected:
GlobalSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmGlobalType * globalType)316 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
317 const WasmGlobalType *globalType)
318 : Symbol(name, k, flags, f), globalType(globalType) {}
319
320 const WasmGlobalType *globalType;
321 uint32_t globalIndex = INVALID_INDEX;
322 };
323
324 class DefinedGlobal : public GlobalSymbol {
325 public:
326 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
327 InputGlobal *global);
328
classof(const Symbol * s)329 static bool classof(const Symbol *s) {
330 return s->kind() == DefinedGlobalKind;
331 }
332
333 InputGlobal *global;
334 };
335
336 class UndefinedGlobal : public GlobalSymbol {
337 public:
338 UndefinedGlobal(StringRef name, StringRef importName, StringRef importModule,
339 uint32_t flags, InputFile *file = nullptr,
340 const WasmGlobalType *type = nullptr)
GlobalSymbol(name,UndefinedGlobalKind,flags,file,type)341 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type),
342 importName(importName), importModule(importModule) {}
343
classof(const Symbol * s)344 static bool classof(const Symbol *s) {
345 return s->kind() == UndefinedGlobalKind;
346 }
347
348 StringRef importName;
349 StringRef importModule;
350 };
351
352 // Wasm events are features that suspend the current execution and transfer the
353 // control flow to a corresponding handler. Currently the only supported event
354 // kind is exceptions.
355 //
356 // Event tags are values to distinguish different events. For exceptions, they
357 // can be used to distinguish different language's exceptions, i.e., all C++
358 // exceptions have the same tag. Wasm can generate code capable of doing
359 // different handling actions based on the tag of caught exceptions.
360 //
361 // A single EventSymbol object represents a single tag. C++ exception event
362 // symbol is a weak symbol generated in every object file in which exceptions
363 // are used, and has name '__cpp_exception' for linking.
364 class EventSymbol : public Symbol {
365 public:
classof(const Symbol * s)366 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
367
getEventType()368 const WasmEventType *getEventType() const { return eventType; }
369
370 // Get/set the event index
371 uint32_t getEventIndex() const;
372 void setEventIndex(uint32_t index);
373 bool hasEventIndex() const;
374
375 const WasmSignature *signature;
376
377 protected:
EventSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmEventType * eventType,const WasmSignature * sig)378 EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
379 const WasmEventType *eventType, const WasmSignature *sig)
380 : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {}
381
382 const WasmEventType *eventType;
383 uint32_t eventIndex = INVALID_INDEX;
384 };
385
386 class DefinedEvent : public EventSymbol {
387 public:
388 DefinedEvent(StringRef name, uint32_t flags, InputFile *file,
389 InputEvent *event);
390
classof(const Symbol * s)391 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
392
393 InputEvent *event;
394 };
395
396 // LazySymbol represents a symbol that is not yet in the link, but we know where
397 // to find it if needed. If the resolver finds both Undefined and Lazy for the
398 // same name, it will ask the Lazy to load a file.
399 //
400 // A special complication is the handling of weak undefined symbols. They should
401 // not load a file, but we have to remember we have seen both the weak undefined
402 // and the lazy. We represent that with a lazy symbol with a weak binding. This
403 // means that code looking for undefined symbols normally also has to take lazy
404 // symbols into consideration.
405 class LazySymbol : public Symbol {
406 public:
LazySymbol(StringRef name,uint32_t flags,InputFile * file,const llvm::object::Archive::Symbol & sym)407 LazySymbol(StringRef name, uint32_t flags, InputFile *file,
408 const llvm::object::Archive::Symbol &sym)
409 : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
410
classof(const Symbol * s)411 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
412 void fetch();
413 MemoryBufferRef getMemberBuffer();
414
415 // Lazy symbols can have a signature because they can replace an
416 // UndefinedFunction which which case we need to be able to preserve the
417 // signture.
418 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
419 // the use of class hierarchy to represent symbol taxonomy.
420 const WasmSignature *signature = nullptr;
421
422 private:
423 llvm::object::Archive::Symbol archiveSymbol;
424 };
425
426 // linker-generated symbols
427 struct WasmSym {
428 // __global_base
429 // Symbol marking the start of the global section.
430 static DefinedData *globalBase;
431
432 // __stack_pointer
433 // Global that holds the address of the top of the explicit value stack in
434 // linear memory.
435 static GlobalSymbol *stackPointer;
436
437 // __tls_base
438 // Global that holds the address of the base of the current thread's
439 // TLS block.
440 static GlobalSymbol *tlsBase;
441
442 // __tls_size
443 // Symbol whose value is the size of the TLS block.
444 static GlobalSymbol *tlsSize;
445
446 // __tls_size
447 // Symbol whose value is the alignment of the TLS block.
448 static GlobalSymbol *tlsAlign;
449
450 // __data_end
451 // Symbol marking the end of the data and bss.
452 static DefinedData *dataEnd;
453
454 // __heap_base
455 // Symbol marking the end of the data, bss and explicit stack. Any linear
456 // memory following this address is not used by the linked code and can
457 // therefore be used as a backing store for brk()/malloc() implementations.
458 static DefinedData *heapBase;
459
460 // __wasm_init_memory_flag
461 // Symbol whose contents are nonzero iff memory has already been initialized.
462 static DefinedData *initMemoryFlag;
463
464 // __wasm_init_memory
465 // Function that initializes passive data segments during instantiation.
466 static DefinedFunction *initMemory;
467
468 // __wasm_call_ctors
469 // Function that directly calls all ctors in priority order.
470 static DefinedFunction *callCtors;
471
472 // __wasm_apply_relocs
473 // Function that applies relocations to data segment post-instantiation.
474 static DefinedFunction *applyRelocs;
475
476 // __wasm_init_tls
477 // Function that allocates thread-local storage and initializes it.
478 static DefinedFunction *initTLS;
479
480 // __dso_handle
481 // Symbol used in calls to __cxa_atexit to determine current DLL
482 static DefinedData *dsoHandle;
483
484 // __table_base
485 // Used in PIC code for offset of indirect function table
486 static UndefinedGlobal *tableBase;
487 static DefinedData *definedTableBase;
488
489 // __memory_base
490 // Used in PIC code for offset of global data
491 static UndefinedGlobal *memoryBase;
492 static DefinedData *definedMemoryBase;
493 };
494
495 // A buffer class that is large enough to hold any Symbol-derived
496 // object. We allocate memory using this class and instantiate a symbol
497 // using the placement new.
498 union SymbolUnion {
499 alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
500 alignas(DefinedData) char b[sizeof(DefinedData)];
501 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
502 alignas(DefinedEvent) char d[sizeof(DefinedEvent)];
503 alignas(LazySymbol) char e[sizeof(LazySymbol)];
504 alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)];
505 alignas(UndefinedData) char g[sizeof(UndefinedData)];
506 alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)];
507 alignas(SectionSymbol) char i[sizeof(SectionSymbol)];
508 };
509
510 // It is important to keep the size of SymbolUnion small for performance and
511 // memory usage reasons. 96 bytes is a soft limit based on the size of
512 // UndefinedFunction on a 64-bit system.
513 static_assert(sizeof(SymbolUnion) <= 96, "SymbolUnion too large");
514
515 void printTraceSymbol(Symbol *sym);
516 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
517
518 template <typename T, typename... ArgT>
replaceSymbol(Symbol * s,ArgT &&...arg)519 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
520 static_assert(std::is_trivially_destructible<T>(),
521 "Symbol types must be trivially destructible");
522 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
523 static_assert(alignof(T) <= alignof(SymbolUnion),
524 "SymbolUnion not aligned enough");
525 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
526 "Not a Symbol");
527
528 Symbol symCopy = *s;
529
530 T *s2 = new (s) T(std::forward<ArgT>(arg)...);
531 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
532 s2->forceExport = symCopy.forceExport;
533 s2->canInline = symCopy.canInline;
534 s2->traced = symCopy.traced;
535
536 // Print out a log message if --trace-symbol was specified.
537 // This is for debugging.
538 if (s2->traced)
539 printTraceSymbol(s2);
540
541 return s2;
542 }
543
544 } // namespace wasm
545
546 // Returns a symbol name for an error message.
547 std::string toString(const wasm::Symbol &sym);
548 std::string toString(wasm::Symbol::Kind kind);
549 std::string maybeDemangleSymbol(StringRef name);
550
551 } // namespace lld
552
553 #endif
554