1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/Wasm.h"
16 
17 namespace lld {
18 namespace wasm {
19 
20 // Shared string constants
21 
22 // The default module name to use for symbol imports.
23 extern const char *defaultModule;
24 
25 // The name under which to import or export the wasm table.
26 extern const char *functionTableName;
27 
28 using llvm::wasm::WasmSymbolType;
29 
30 class InputFile;
31 class InputChunk;
32 class InputSegment;
33 class InputFunction;
34 class InputGlobal;
35 class InputEvent;
36 class InputSection;
37 class OutputSection;
38 
39 #define INVALID_INDEX UINT32_MAX
40 
41 // The base class for real symbol classes.
42 class Symbol {
43 public:
44   enum Kind : uint8_t {
45     DefinedFunctionKind,
46     DefinedDataKind,
47     DefinedGlobalKind,
48     DefinedEventKind,
49     SectionKind,
50     OutputSectionKind,
51     UndefinedFunctionKind,
52     UndefinedDataKind,
53     UndefinedGlobalKind,
54     LazyKind,
55   };
56 
kind()57   Kind kind() const { return symbolKind; }
58 
isDefined()59   bool isDefined() const { return !isLazy() && !isUndefined(); }
60 
isUndefined()61   bool isUndefined() const {
62     return symbolKind == UndefinedFunctionKind ||
63            symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind;
64   }
65 
isLazy()66   bool isLazy() const { return symbolKind == LazyKind; }
67 
68   bool isLocal() const;
69   bool isWeak() const;
70   bool isHidden() const;
71 
72   // Returns true if this symbol exists in a discarded (due to COMDAT) section
73   bool isDiscarded() const;
74 
75   // True if this is an undefined weak symbol. This only works once
76   // all input files have been added.
isUndefWeak()77   bool isUndefWeak() const {
78     // See comment on lazy symbols for details.
79     return isWeak() && (isUndefined() || isLazy());
80   }
81 
82   // Returns the symbol name.
getName()83   StringRef getName() const { return name; }
84 
85   // Returns the file from which this symbol was created.
getFile()86   InputFile *getFile() const { return file; }
87 
getFlags()88   uint32_t getFlags() const { return flags; }
89 
90   InputChunk *getChunk() const;
91 
92   // Indicates that the section or import for this symbol will be included in
93   // the final image.
94   bool isLive() const;
95 
96   // Marks the symbol's InputChunk as Live, so that it will be included in the
97   // final image.
98   void markLive();
99 
100   void setHidden(bool isHidden);
101 
102   // Get/set the index in the output symbol table.  This is only used for
103   // relocatable output.
104   uint32_t getOutputSymbolIndex() const;
105   void setOutputSymbolIndex(uint32_t index);
106 
107   WasmSymbolType getWasmType() const;
108   bool isExported() const;
109 
110   // Indicates that the symbol is used in an __attribute__((used)) directive
111   // or similar.
112   bool isNoStrip() const;
113 
114   const WasmSignature* getSignature() const;
115 
getGOTIndex()116   uint32_t getGOTIndex() const {
117     assert(gotIndex != INVALID_INDEX);
118     return gotIndex;
119   }
120 
121   void setGOTIndex(uint32_t index);
hasGOTIndex()122   bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
123 
124 protected:
Symbol(StringRef name,Kind k,uint32_t flags,InputFile * f)125   Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
126       : name(name), file(f), flags(flags), symbolKind(k),
127         referenced(!config->gcSections), requiresGOT(false),
128         isUsedInRegularObj(false), forceExport(false), canInline(false),
129         traced(false) {}
130 
131   StringRef name;
132   InputFile *file;
133   uint32_t flags;
134   uint32_t outputSymbolIndex = INVALID_INDEX;
135   uint32_t gotIndex = INVALID_INDEX;
136   Kind symbolKind;
137 
138 public:
139   bool referenced : 1;
140 
141   // True for data symbols that needs a dummy GOT entry.  Used for static
142   // linking of GOT accesses.
143   bool requiresGOT : 1;
144 
145   // True if the symbol was used for linking and thus need to be added to the
146   // output file's symbol table. This is true for all symbols except for
147   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
148   // are unreferenced except by other bitcode objects.
149   bool isUsedInRegularObj : 1;
150 
151   // True if ths symbol is explicitly marked for export (i.e. via the
152   // -e/--export command line flag)
153   bool forceExport : 1;
154 
155   // False if LTO shouldn't inline whatever this symbol points to. If a symbol
156   // is overwritten after LTO, LTO shouldn't inline the symbol because it
157   // doesn't know the final contents of the symbol.
158   bool canInline : 1;
159 
160   // True if this symbol is specified by --trace-symbol option.
161   bool traced : 1;
162 };
163 
164 class FunctionSymbol : public Symbol {
165 public:
classof(const Symbol * s)166   static bool classof(const Symbol *s) {
167     return s->kind() == DefinedFunctionKind ||
168            s->kind() == UndefinedFunctionKind;
169   }
170 
171   // Get/set the table index
172   void setTableIndex(uint32_t index);
173   uint32_t getTableIndex() const;
174   bool hasTableIndex() const;
175 
176   // Get/set the function index
177   uint32_t getFunctionIndex() const;
178   void setFunctionIndex(uint32_t index);
179   bool hasFunctionIndex() const;
180 
181   const WasmSignature *signature;
182 
183 protected:
FunctionSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmSignature * sig)184   FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
185                  const WasmSignature *sig)
186       : Symbol(name, k, flags, f), signature(sig) {}
187 
188   uint32_t tableIndex = INVALID_INDEX;
189   uint32_t functionIndex = INVALID_INDEX;
190 };
191 
192 class DefinedFunction : public FunctionSymbol {
193 public:
194   DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
195                   InputFunction *function);
196 
classof(const Symbol * s)197   static bool classof(const Symbol *s) {
198     return s->kind() == DefinedFunctionKind;
199   }
200 
201   InputFunction *function;
202 };
203 
204 class UndefinedFunction : public FunctionSymbol {
205 public:
206   UndefinedFunction(StringRef name, StringRef importName,
207                     StringRef importModule, uint32_t flags,
208                     InputFile *file = nullptr,
209                     const WasmSignature *type = nullptr,
210                     bool isCalledDirectly = true)
FunctionSymbol(name,UndefinedFunctionKind,flags,file,type)211       : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
212         importName(importName), importModule(importModule), isCalledDirectly(isCalledDirectly) {}
213 
classof(const Symbol * s)214   static bool classof(const Symbol *s) {
215     return s->kind() == UndefinedFunctionKind;
216   }
217 
218   StringRef importName;
219   StringRef importModule;
220   bool isCalledDirectly;
221 };
222 
223 // Section symbols for output sections are different from those for input
224 // section.  These are generated by the linker and point the OutputSection
225 // rather than an InputSection.
226 class OutputSectionSymbol : public Symbol {
227 public:
OutputSectionSymbol(const OutputSection * s)228   OutputSectionSymbol(const OutputSection *s)
229       : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
230                nullptr),
231         section(s) {}
232 
classof(const Symbol * s)233   static bool classof(const Symbol *s) {
234     return s->kind() == OutputSectionKind;
235   }
236 
237   const OutputSection *section;
238 };
239 
240 class SectionSymbol : public Symbol {
241 public:
242   SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
243       : Symbol("", SectionKind, flags, f), section(s) {}
244 
classof(const Symbol * s)245   static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
246 
247   const OutputSectionSymbol *getOutputSectionSymbol() const;
248 
249   const InputSection *section;
250 };
251 
252 class DataSymbol : public Symbol {
253 public:
classof(const Symbol * s)254   static bool classof(const Symbol *s) {
255     return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
256   }
257 
258 protected:
DataSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f)259   DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
260       : Symbol(name, k, flags, f) {}
261 };
262 
263 class DefinedData : public DataSymbol {
264 public:
265   // Constructor for regular data symbols originating from input files.
DefinedData(StringRef name,uint32_t flags,InputFile * f,InputSegment * segment,uint32_t offset,uint32_t size)266   DefinedData(StringRef name, uint32_t flags, InputFile *f,
267               InputSegment *segment, uint32_t offset, uint32_t size)
268       : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
269         offset(offset), size(size) {}
270 
271   // Constructor for linker synthetic data symbols.
DefinedData(StringRef name,uint32_t flags)272   DefinedData(StringRef name, uint32_t flags)
273       : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
274 
classof(const Symbol * s)275   static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
276 
277   // Returns the output virtual address of a defined data symbol.
278   uint32_t getVirtualAddress() const;
279   void setVirtualAddress(uint32_t va);
280 
281   // Returns the offset of a defined data symbol within its OutputSegment.
282   uint32_t getOutputSegmentOffset() const;
283   uint32_t getOutputSegmentIndex() const;
getSize()284   uint32_t getSize() const { return size; }
285 
286   InputSegment *segment = nullptr;
287 
288 protected:
289   uint32_t offset = 0;
290   uint32_t size = 0;
291 };
292 
293 class UndefinedData : public DataSymbol {
294 public:
295   UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
DataSymbol(name,UndefinedDataKind,flags,file)296       : DataSymbol(name, UndefinedDataKind, flags, file) {}
classof(const Symbol * s)297   static bool classof(const Symbol *s) {
298     return s->kind() == UndefinedDataKind;
299   }
300 };
301 
302 class GlobalSymbol : public Symbol {
303 public:
classof(const Symbol * s)304   static bool classof(const Symbol *s) {
305     return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
306   }
307 
getGlobalType()308   const WasmGlobalType *getGlobalType() const { return globalType; }
309 
310   // Get/set the global index
311   uint32_t getGlobalIndex() const;
312   void setGlobalIndex(uint32_t index);
313   bool hasGlobalIndex() const;
314 
315 protected:
GlobalSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmGlobalType * globalType)316   GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
317                const WasmGlobalType *globalType)
318       : Symbol(name, k, flags, f), globalType(globalType) {}
319 
320   const WasmGlobalType *globalType;
321   uint32_t globalIndex = INVALID_INDEX;
322 };
323 
324 class DefinedGlobal : public GlobalSymbol {
325 public:
326   DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
327                 InputGlobal *global);
328 
classof(const Symbol * s)329   static bool classof(const Symbol *s) {
330     return s->kind() == DefinedGlobalKind;
331   }
332 
333   InputGlobal *global;
334 };
335 
336 class UndefinedGlobal : public GlobalSymbol {
337 public:
338   UndefinedGlobal(StringRef name, StringRef importName, StringRef importModule,
339                   uint32_t flags, InputFile *file = nullptr,
340                   const WasmGlobalType *type = nullptr)
GlobalSymbol(name,UndefinedGlobalKind,flags,file,type)341       : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type),
342         importName(importName), importModule(importModule) {}
343 
classof(const Symbol * s)344   static bool classof(const Symbol *s) {
345     return s->kind() == UndefinedGlobalKind;
346   }
347 
348   StringRef importName;
349   StringRef importModule;
350 };
351 
352 // Wasm events are features that suspend the current execution and transfer the
353 // control flow to a corresponding handler. Currently the only supported event
354 // kind is exceptions.
355 //
356 // Event tags are values to distinguish different events. For exceptions, they
357 // can be used to distinguish different language's exceptions, i.e., all C++
358 // exceptions have the same tag. Wasm can generate code capable of doing
359 // different handling actions based on the tag of caught exceptions.
360 //
361 // A single EventSymbol object represents a single tag. C++ exception event
362 // symbol is a weak symbol generated in every object file in which exceptions
363 // are used, and has name '__cpp_exception' for linking.
364 class EventSymbol : public Symbol {
365 public:
classof(const Symbol * s)366   static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
367 
getEventType()368   const WasmEventType *getEventType() const { return eventType; }
369 
370   // Get/set the event index
371   uint32_t getEventIndex() const;
372   void setEventIndex(uint32_t index);
373   bool hasEventIndex() const;
374 
375   const WasmSignature *signature;
376 
377 protected:
EventSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmEventType * eventType,const WasmSignature * sig)378   EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
379               const WasmEventType *eventType, const WasmSignature *sig)
380       : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {}
381 
382   const WasmEventType *eventType;
383   uint32_t eventIndex = INVALID_INDEX;
384 };
385 
386 class DefinedEvent : public EventSymbol {
387 public:
388   DefinedEvent(StringRef name, uint32_t flags, InputFile *file,
389                InputEvent *event);
390 
classof(const Symbol * s)391   static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
392 
393   InputEvent *event;
394 };
395 
396 // LazySymbol represents a symbol that is not yet in the link, but we know where
397 // to find it if needed. If the resolver finds both Undefined and Lazy for the
398 // same name, it will ask the Lazy to load a file.
399 //
400 // A special complication is the handling of weak undefined symbols. They should
401 // not load a file, but we have to remember we have seen both the weak undefined
402 // and the lazy. We represent that with a lazy symbol with a weak binding. This
403 // means that code looking for undefined symbols normally also has to take lazy
404 // symbols into consideration.
405 class LazySymbol : public Symbol {
406 public:
LazySymbol(StringRef name,uint32_t flags,InputFile * file,const llvm::object::Archive::Symbol & sym)407   LazySymbol(StringRef name, uint32_t flags, InputFile *file,
408              const llvm::object::Archive::Symbol &sym)
409       : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
410 
classof(const Symbol * s)411   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
412   void fetch();
413   MemoryBufferRef getMemberBuffer();
414 
415   // Lazy symbols can have a signature because they can replace an
416   // UndefinedFunction which which case we need to be able to preserve the
417   // signture.
418   // TODO(sbc): This repetition of the signature field is inelegant.  Revisit
419   // the use of class hierarchy to represent symbol taxonomy.
420   const WasmSignature *signature = nullptr;
421 
422 private:
423   llvm::object::Archive::Symbol archiveSymbol;
424 };
425 
426 // linker-generated symbols
427 struct WasmSym {
428   // __global_base
429   // Symbol marking the start of the global section.
430   static DefinedData *globalBase;
431 
432   // __stack_pointer
433   // Global that holds the address of the top of the explicit value stack in
434   // linear memory.
435   static GlobalSymbol *stackPointer;
436 
437   // __tls_base
438   // Global that holds the address of the base of the current thread's
439   // TLS block.
440   static GlobalSymbol *tlsBase;
441 
442   // __tls_size
443   // Symbol whose value is the size of the TLS block.
444   static GlobalSymbol *tlsSize;
445 
446   // __tls_size
447   // Symbol whose value is the alignment of the TLS block.
448   static GlobalSymbol *tlsAlign;
449 
450   // __data_end
451   // Symbol marking the end of the data and bss.
452   static DefinedData *dataEnd;
453 
454   // __heap_base
455   // Symbol marking the end of the data, bss and explicit stack.  Any linear
456   // memory following this address is not used by the linked code and can
457   // therefore be used as a backing store for brk()/malloc() implementations.
458   static DefinedData *heapBase;
459 
460   // __wasm_init_memory_flag
461   // Symbol whose contents are nonzero iff memory has already been initialized.
462   static DefinedData *initMemoryFlag;
463 
464   // __wasm_init_memory
465   // Function that initializes passive data segments during instantiation.
466   static DefinedFunction *initMemory;
467 
468   // __wasm_call_ctors
469   // Function that directly calls all ctors in priority order.
470   static DefinedFunction *callCtors;
471 
472   // __wasm_apply_relocs
473   // Function that applies relocations to data segment post-instantiation.
474   static DefinedFunction *applyRelocs;
475 
476   // __wasm_init_tls
477   // Function that allocates thread-local storage and initializes it.
478   static DefinedFunction *initTLS;
479 
480   // __dso_handle
481   // Symbol used in calls to __cxa_atexit to determine current DLL
482   static DefinedData *dsoHandle;
483 
484   // __table_base
485   // Used in PIC code for offset of indirect function table
486   static UndefinedGlobal *tableBase;
487   static DefinedData *definedTableBase;
488 
489   // __memory_base
490   // Used in PIC code for offset of global data
491   static UndefinedGlobal *memoryBase;
492   static DefinedData *definedMemoryBase;
493 };
494 
495 // A buffer class that is large enough to hold any Symbol-derived
496 // object. We allocate memory using this class and instantiate a symbol
497 // using the placement new.
498 union SymbolUnion {
499   alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
500   alignas(DefinedData) char b[sizeof(DefinedData)];
501   alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
502   alignas(DefinedEvent) char d[sizeof(DefinedEvent)];
503   alignas(LazySymbol) char e[sizeof(LazySymbol)];
504   alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)];
505   alignas(UndefinedData) char g[sizeof(UndefinedData)];
506   alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)];
507   alignas(SectionSymbol) char i[sizeof(SectionSymbol)];
508 };
509 
510 // It is important to keep the size of SymbolUnion small for performance and
511 // memory usage reasons. 96 bytes is a soft limit based on the size of
512 // UndefinedFunction on a 64-bit system.
513 static_assert(sizeof(SymbolUnion) <= 96, "SymbolUnion too large");
514 
515 void printTraceSymbol(Symbol *sym);
516 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
517 
518 template <typename T, typename... ArgT>
replaceSymbol(Symbol * s,ArgT &&...arg)519 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
520   static_assert(std::is_trivially_destructible<T>(),
521                 "Symbol types must be trivially destructible");
522   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
523   static_assert(alignof(T) <= alignof(SymbolUnion),
524                 "SymbolUnion not aligned enough");
525   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
526          "Not a Symbol");
527 
528   Symbol symCopy = *s;
529 
530   T *s2 = new (s) T(std::forward<ArgT>(arg)...);
531   s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
532   s2->forceExport = symCopy.forceExport;
533   s2->canInline = symCopy.canInline;
534   s2->traced = symCopy.traced;
535 
536   // Print out a log message if --trace-symbol was specified.
537   // This is for debugging.
538   if (s2->traced)
539     printTraceSymbol(s2);
540 
541   return s2;
542 }
543 
544 } // namespace wasm
545 
546 // Returns a symbol name for an error message.
547 std::string toString(const wasm::Symbol &sym);
548 std::string toString(wasm::Symbol::Kind kind);
549 std::string maybeDemangleSymbol(StringRef name);
550 
551 } // namespace lld
552 
553 #endif
554