xref: /openbsd/gnu/llvm/lld/wasm/Symbols.h (revision 73471bf0)
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/Object/Archive.h"
16 #include "llvm/Object/Wasm.h"
17 
18 namespace lld {
19 namespace wasm {
20 
21 // Shared string constants
22 
23 // The default module name to use for symbol imports.
24 extern const char *defaultModule;
25 
26 // The name under which to import or export the wasm table.
27 extern const char *functionTableName;
28 
29 using llvm::wasm::WasmSymbolType;
30 
31 class InputFile;
32 class InputChunk;
33 class InputSegment;
34 class InputFunction;
35 class InputGlobal;
36 class InputEvent;
37 class InputSection;
38 class OutputSection;
39 
40 #define INVALID_INDEX UINT32_MAX
41 
42 // The base class for real symbol classes.
43 class Symbol {
44 public:
45   enum Kind : uint8_t {
46     DefinedFunctionKind,
47     DefinedDataKind,
48     DefinedGlobalKind,
49     DefinedEventKind,
50     SectionKind,
51     OutputSectionKind,
52     UndefinedFunctionKind,
53     UndefinedDataKind,
54     UndefinedGlobalKind,
55     LazyKind,
56   };
57 
58   Kind kind() const { return symbolKind; }
59 
60   bool isDefined() const { return !isLazy() && !isUndefined(); }
61 
62   bool isUndefined() const {
63     return symbolKind == UndefinedFunctionKind ||
64            symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind;
65   }
66 
67   bool isLazy() const { return symbolKind == LazyKind; }
68 
69   bool isLocal() const;
70   bool isWeak() const;
71   bool isHidden() const;
72 
73   // Returns true if this symbol exists in a discarded (due to COMDAT) section
74   bool isDiscarded() const;
75 
76   // True if this is an undefined weak symbol. This only works once
77   // all input files have been added.
78   bool isUndefWeak() const {
79     // See comment on lazy symbols for details.
80     return isWeak() && (isUndefined() || isLazy());
81   }
82 
83   // Returns the symbol name.
84   StringRef getName() const { return name; }
85 
86   // Returns the file from which this symbol was created.
87   InputFile *getFile() const { return file; }
88 
89   InputChunk *getChunk() const;
90 
91   // Indicates that the section or import for this symbol will be included in
92   // the final image.
93   bool isLive() const;
94 
95   // Marks the symbol's InputChunk as Live, so that it will be included in the
96   // final image.
97   void markLive();
98 
99   void setHidden(bool isHidden);
100 
101   // Get/set the index in the output symbol table.  This is only used for
102   // relocatable output.
103   uint32_t getOutputSymbolIndex() const;
104   void setOutputSymbolIndex(uint32_t index);
105 
106   WasmSymbolType getWasmType() const;
107   bool isExported() const;
108 
109   // Indicates that the symbol is used in an __attribute__((used)) directive
110   // or similar.
111   bool isNoStrip() const;
112 
113   const WasmSignature* getSignature() const;
114 
115   uint32_t getGOTIndex() const {
116     assert(gotIndex != INVALID_INDEX);
117     return gotIndex;
118   }
119 
120   void setGOTIndex(uint32_t index);
121   bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
122 
123 protected:
124   Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
125       : name(name), file(f), symbolKind(k), referenced(!config->gcSections),
126         requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
127         canInline(false), traced(false), flags(flags) {}
128 
129   StringRef name;
130   InputFile *file;
131   uint32_t outputSymbolIndex = INVALID_INDEX;
132   uint32_t gotIndex = INVALID_INDEX;
133   Kind symbolKind;
134 
135 public:
136   bool referenced : 1;
137 
138   // True for data symbols that needs a dummy GOT entry.  Used for static
139   // linking of GOT accesses.
140   bool requiresGOT : 1;
141 
142   // True if the symbol was used for linking and thus need to be added to the
143   // output file's symbol table. This is true for all symbols except for
144   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
145   // are unreferenced except by other bitcode objects.
146   bool isUsedInRegularObj : 1;
147 
148   // True if ths symbol is explicitly marked for export (i.e. via the
149   // -e/--export command line flag)
150   bool forceExport : 1;
151 
152   // False if LTO shouldn't inline whatever this symbol points to. If a symbol
153   // is overwritten after LTO, LTO shouldn't inline the symbol because it
154   // doesn't know the final contents of the symbol.
155   bool canInline : 1;
156 
157   // True if this symbol is specified by --trace-symbol option.
158   bool traced : 1;
159 
160   uint32_t flags;
161 };
162 
163 class FunctionSymbol : public Symbol {
164 public:
165   static bool classof(const Symbol *s) {
166     return s->kind() == DefinedFunctionKind ||
167            s->kind() == UndefinedFunctionKind;
168   }
169 
170   // Get/set the table index
171   void setTableIndex(uint32_t index);
172   uint32_t getTableIndex() const;
173   bool hasTableIndex() const;
174 
175   // Get/set the function index
176   uint32_t getFunctionIndex() const;
177   void setFunctionIndex(uint32_t index);
178   bool hasFunctionIndex() const;
179 
180   const WasmSignature *signature;
181 
182 protected:
183   FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
184                  const WasmSignature *sig)
185       : Symbol(name, k, flags, f), signature(sig) {}
186 
187   uint32_t tableIndex = INVALID_INDEX;
188   uint32_t functionIndex = INVALID_INDEX;
189 };
190 
191 class DefinedFunction : public FunctionSymbol {
192 public:
193   DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
194                   InputFunction *function);
195 
196   static bool classof(const Symbol *s) {
197     return s->kind() == DefinedFunctionKind;
198   }
199 
200   InputFunction *function;
201 };
202 
203 class UndefinedFunction : public FunctionSymbol {
204 public:
205   UndefinedFunction(StringRef name, llvm::Optional<StringRef> importName,
206                     llvm::Optional<StringRef> importModule, uint32_t flags,
207                     InputFile *file = nullptr,
208                     const WasmSignature *type = nullptr,
209                     bool isCalledDirectly = true)
210       : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
211         importName(importName), importModule(importModule),
212         isCalledDirectly(isCalledDirectly) {}
213 
214   static bool classof(const Symbol *s) {
215     return s->kind() == UndefinedFunctionKind;
216   }
217 
218   llvm::Optional<StringRef> importName;
219   llvm::Optional<StringRef> importModule;
220   bool isCalledDirectly;
221 };
222 
223 // Section symbols for output sections are different from those for input
224 // section.  These are generated by the linker and point the OutputSection
225 // rather than an InputSection.
226 class OutputSectionSymbol : public Symbol {
227 public:
228   OutputSectionSymbol(const OutputSection *s)
229       : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
230                nullptr),
231         section(s) {}
232 
233   static bool classof(const Symbol *s) {
234     return s->kind() == OutputSectionKind;
235   }
236 
237   const OutputSection *section;
238 };
239 
240 class SectionSymbol : public Symbol {
241 public:
242   SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
243       : Symbol("", SectionKind, flags, f), section(s) {}
244 
245   static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
246 
247   const OutputSectionSymbol *getOutputSectionSymbol() const;
248 
249   const InputSection *section;
250 };
251 
252 class DataSymbol : public Symbol {
253 public:
254   static bool classof(const Symbol *s) {
255     return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
256   }
257 
258 protected:
259   DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
260       : Symbol(name, k, flags, f) {}
261 };
262 
263 class DefinedData : public DataSymbol {
264 public:
265   // Constructor for regular data symbols originating from input files.
266   DefinedData(StringRef name, uint32_t flags, InputFile *f,
267               InputSegment *segment, uint64_t offset, uint64_t size)
268       : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
269         offset(offset), size(size) {}
270 
271   // Constructor for linker synthetic data symbols.
272   DefinedData(StringRef name, uint32_t flags)
273       : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
274 
275   static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
276 
277   // Returns the output virtual address of a defined data symbol.
278   uint64_t getVirtualAddress() const;
279   void setVirtualAddress(uint64_t va);
280 
281   // Returns the offset of a defined data symbol within its OutputSegment.
282   uint64_t getOutputSegmentOffset() const;
283   uint64_t getOutputSegmentIndex() const;
284   uint64_t getSize() const { return size; }
285 
286   InputSegment *segment = nullptr;
287 
288 protected:
289   uint64_t offset = 0;
290   uint64_t size = 0;
291 };
292 
293 class UndefinedData : public DataSymbol {
294 public:
295   UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
296       : DataSymbol(name, UndefinedDataKind, flags, file) {}
297   static bool classof(const Symbol *s) {
298     return s->kind() == UndefinedDataKind;
299   }
300 };
301 
302 class GlobalSymbol : public Symbol {
303 public:
304   static bool classof(const Symbol *s) {
305     return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
306   }
307 
308   const WasmGlobalType *getGlobalType() const { return globalType; }
309 
310   // Get/set the global index
311   uint32_t getGlobalIndex() const;
312   void setGlobalIndex(uint32_t index);
313   bool hasGlobalIndex() const;
314 
315 protected:
316   GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
317                const WasmGlobalType *globalType)
318       : Symbol(name, k, flags, f), globalType(globalType) {}
319 
320   const WasmGlobalType *globalType;
321   uint32_t globalIndex = INVALID_INDEX;
322 };
323 
324 class DefinedGlobal : public GlobalSymbol {
325 public:
326   DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
327                 InputGlobal *global);
328 
329   static bool classof(const Symbol *s) {
330     return s->kind() == DefinedGlobalKind;
331   }
332 
333   InputGlobal *global;
334 };
335 
336 class UndefinedGlobal : public GlobalSymbol {
337 public:
338   UndefinedGlobal(StringRef name, llvm::Optional<StringRef> importName,
339                   llvm::Optional<StringRef> importModule, uint32_t flags,
340                   InputFile *file = nullptr,
341                   const WasmGlobalType *type = nullptr)
342       : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type),
343         importName(importName), importModule(importModule) {}
344 
345   static bool classof(const Symbol *s) {
346     return s->kind() == UndefinedGlobalKind;
347   }
348 
349   llvm::Optional<StringRef> importName;
350   llvm::Optional<StringRef> importModule;
351 };
352 
353 // Wasm events are features that suspend the current execution and transfer the
354 // control flow to a corresponding handler. Currently the only supported event
355 // kind is exceptions.
356 //
357 // Event tags are values to distinguish different events. For exceptions, they
358 // can be used to distinguish different language's exceptions, i.e., all C++
359 // exceptions have the same tag. Wasm can generate code capable of doing
360 // different handling actions based on the tag of caught exceptions.
361 //
362 // A single EventSymbol object represents a single tag. C++ exception event
363 // symbol is a weak symbol generated in every object file in which exceptions
364 // are used, and has name '__cpp_exception' for linking.
365 class EventSymbol : public Symbol {
366 public:
367   static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
368 
369   const WasmEventType *getEventType() const { return eventType; }
370 
371   // Get/set the event index
372   uint32_t getEventIndex() const;
373   void setEventIndex(uint32_t index);
374   bool hasEventIndex() const;
375 
376   const WasmSignature *signature;
377 
378 protected:
379   EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
380               const WasmEventType *eventType, const WasmSignature *sig)
381       : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {}
382 
383   const WasmEventType *eventType;
384   uint32_t eventIndex = INVALID_INDEX;
385 };
386 
387 class DefinedEvent : public EventSymbol {
388 public:
389   DefinedEvent(StringRef name, uint32_t flags, InputFile *file,
390                InputEvent *event);
391 
392   static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
393 
394   InputEvent *event;
395 };
396 
397 // LazySymbol represents a symbol that is not yet in the link, but we know where
398 // to find it if needed. If the resolver finds both Undefined and Lazy for the
399 // same name, it will ask the Lazy to load a file.
400 //
401 // A special complication is the handling of weak undefined symbols. They should
402 // not load a file, but we have to remember we have seen both the weak undefined
403 // and the lazy. We represent that with a lazy symbol with a weak binding. This
404 // means that code looking for undefined symbols normally also has to take lazy
405 // symbols into consideration.
406 class LazySymbol : public Symbol {
407 public:
408   LazySymbol(StringRef name, uint32_t flags, InputFile *file,
409              const llvm::object::Archive::Symbol &sym)
410       : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
411 
412   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
413   void fetch();
414   MemoryBufferRef getMemberBuffer();
415 
416   // Lazy symbols can have a signature because they can replace an
417   // UndefinedFunction which which case we need to be able to preserve the
418   // signature.
419   // TODO(sbc): This repetition of the signature field is inelegant.  Revisit
420   // the use of class hierarchy to represent symbol taxonomy.
421   const WasmSignature *signature = nullptr;
422 
423 private:
424   llvm::object::Archive::Symbol archiveSymbol;
425 };
426 
427 // linker-generated symbols
428 struct WasmSym {
429   // __global_base
430   // Symbol marking the start of the global section.
431   static DefinedData *globalBase;
432 
433   // __stack_pointer
434   // Global that holds the address of the top of the explicit value stack in
435   // linear memory.
436   static GlobalSymbol *stackPointer;
437 
438   // __tls_base
439   // Global that holds the address of the base of the current thread's
440   // TLS block.
441   static GlobalSymbol *tlsBase;
442 
443   // __tls_size
444   // Symbol whose value is the size of the TLS block.
445   static GlobalSymbol *tlsSize;
446 
447   // __tls_size
448   // Symbol whose value is the alignment of the TLS block.
449   static GlobalSymbol *tlsAlign;
450 
451   // __data_end
452   // Symbol marking the end of the data and bss.
453   static DefinedData *dataEnd;
454 
455   // __heap_base
456   // Symbol marking the end of the data, bss and explicit stack.  Any linear
457   // memory following this address is not used by the linked code and can
458   // therefore be used as a backing store for brk()/malloc() implementations.
459   static DefinedData *heapBase;
460 
461   // __wasm_init_memory_flag
462   // Symbol whose contents are nonzero iff memory has already been initialized.
463   static DefinedData *initMemoryFlag;
464 
465   // __wasm_init_memory
466   // Function that initializes passive data segments during instantiation.
467   static DefinedFunction *initMemory;
468 
469   // __wasm_call_ctors
470   // Function that directly calls all ctors in priority order.
471   static DefinedFunction *callCtors;
472 
473   // __wasm_apply_relocs
474   // Function that applies relocations to data segment post-instantiation.
475   static DefinedFunction *applyRelocs;
476 
477   // __wasm_init_tls
478   // Function that allocates thread-local storage and initializes it.
479   static DefinedFunction *initTLS;
480 
481   // __dso_handle
482   // Symbol used in calls to __cxa_atexit to determine current DLL
483   static DefinedData *dsoHandle;
484 
485   // __table_base
486   // Used in PIC code for offset of indirect function table
487   static UndefinedGlobal *tableBase;
488   static DefinedData *definedTableBase;
489 
490   // __memory_base
491   // Used in PIC code for offset of global data
492   static UndefinedGlobal *memoryBase;
493   static DefinedData *definedMemoryBase;
494 };
495 
496 // A buffer class that is large enough to hold any Symbol-derived
497 // object. We allocate memory using this class and instantiate a symbol
498 // using the placement new.
499 union SymbolUnion {
500   alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
501   alignas(DefinedData) char b[sizeof(DefinedData)];
502   alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
503   alignas(DefinedEvent) char d[sizeof(DefinedEvent)];
504   alignas(LazySymbol) char e[sizeof(LazySymbol)];
505   alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)];
506   alignas(UndefinedData) char g[sizeof(UndefinedData)];
507   alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)];
508   alignas(SectionSymbol) char i[sizeof(SectionSymbol)];
509 };
510 
511 // It is important to keep the size of SymbolUnion small for performance and
512 // memory usage reasons. 96 bytes is a soft limit based on the size of
513 // UndefinedFunction on a 64-bit system.
514 static_assert(sizeof(SymbolUnion) <= 112, "SymbolUnion too large");
515 
516 void printTraceSymbol(Symbol *sym);
517 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
518 
519 template <typename T, typename... ArgT>
520 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
521   static_assert(std::is_trivially_destructible<T>(),
522                 "Symbol types must be trivially destructible");
523   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
524   static_assert(alignof(T) <= alignof(SymbolUnion),
525                 "SymbolUnion not aligned enough");
526   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
527          "Not a Symbol");
528 
529   Symbol symCopy = *s;
530 
531   T *s2 = new (s) T(std::forward<ArgT>(arg)...);
532   s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
533   s2->forceExport = symCopy.forceExport;
534   s2->canInline = symCopy.canInline;
535   s2->traced = symCopy.traced;
536 
537   // Print out a log message if --trace-symbol was specified.
538   // This is for debugging.
539   if (s2->traced)
540     printTraceSymbol(s2);
541 
542   return s2;
543 }
544 
545 } // namespace wasm
546 
547 // Returns a symbol name for an error message.
548 std::string toString(const wasm::Symbol &sym);
549 std::string toString(wasm::Symbol::Kind kind);
550 std::string maybeDemangleSymbol(StringRef name);
551 
552 } // namespace lld
553 
554 #endif
555