1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_WASM_SYMBOLS_H 10 #define LLD_WASM_SYMBOLS_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/Object/Archive.h" 16 #include "llvm/Object/Wasm.h" 17 18 namespace lld { 19 namespace wasm { 20 21 // Shared string constants 22 23 // The default module name to use for symbol imports. 24 extern const char *defaultModule; 25 26 // The name under which to import or export the wasm table. 27 extern const char *functionTableName; 28 29 using llvm::wasm::WasmSymbolType; 30 31 class InputFile; 32 class InputChunk; 33 class InputSegment; 34 class InputFunction; 35 class InputGlobal; 36 class InputEvent; 37 class InputSection; 38 class OutputSection; 39 40 #define INVALID_INDEX UINT32_MAX 41 42 // The base class for real symbol classes. 43 class Symbol { 44 public: 45 enum Kind : uint8_t { 46 DefinedFunctionKind, 47 DefinedDataKind, 48 DefinedGlobalKind, 49 DefinedEventKind, 50 SectionKind, 51 OutputSectionKind, 52 UndefinedFunctionKind, 53 UndefinedDataKind, 54 UndefinedGlobalKind, 55 LazyKind, 56 }; 57 58 Kind kind() const { return symbolKind; } 59 60 bool isDefined() const { return !isLazy() && !isUndefined(); } 61 62 bool isUndefined() const { 63 return symbolKind == UndefinedFunctionKind || 64 symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind; 65 } 66 67 bool isLazy() const { return symbolKind == LazyKind; } 68 69 bool isLocal() const; 70 bool isWeak() const; 71 bool isHidden() const; 72 73 // Returns true if this symbol exists in a discarded (due to COMDAT) section 74 bool isDiscarded() const; 75 76 // True if this is an undefined weak symbol. This only works once 77 // all input files have been added. 78 bool isUndefWeak() const { 79 // See comment on lazy symbols for details. 80 return isWeak() && (isUndefined() || isLazy()); 81 } 82 83 // Returns the symbol name. 84 StringRef getName() const { return name; } 85 86 // Returns the file from which this symbol was created. 87 InputFile *getFile() const { return file; } 88 89 InputChunk *getChunk() const; 90 91 // Indicates that the section or import for this symbol will be included in 92 // the final image. 93 bool isLive() const; 94 95 // Marks the symbol's InputChunk as Live, so that it will be included in the 96 // final image. 97 void markLive(); 98 99 void setHidden(bool isHidden); 100 101 // Get/set the index in the output symbol table. This is only used for 102 // relocatable output. 103 uint32_t getOutputSymbolIndex() const; 104 void setOutputSymbolIndex(uint32_t index); 105 106 WasmSymbolType getWasmType() const; 107 bool isExported() const; 108 109 // Indicates that the symbol is used in an __attribute__((used)) directive 110 // or similar. 111 bool isNoStrip() const; 112 113 const WasmSignature* getSignature() const; 114 115 uint32_t getGOTIndex() const { 116 assert(gotIndex != INVALID_INDEX); 117 return gotIndex; 118 } 119 120 void setGOTIndex(uint32_t index); 121 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; } 122 123 protected: 124 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f) 125 : name(name), file(f), symbolKind(k), referenced(!config->gcSections), 126 requiresGOT(false), isUsedInRegularObj(false), forceExport(false), 127 canInline(false), traced(false), flags(flags) {} 128 129 StringRef name; 130 InputFile *file; 131 uint32_t outputSymbolIndex = INVALID_INDEX; 132 uint32_t gotIndex = INVALID_INDEX; 133 Kind symbolKind; 134 135 public: 136 bool referenced : 1; 137 138 // True for data symbols that needs a dummy GOT entry. Used for static 139 // linking of GOT accesses. 140 bool requiresGOT : 1; 141 142 // True if the symbol was used for linking and thus need to be added to the 143 // output file's symbol table. This is true for all symbols except for 144 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that 145 // are unreferenced except by other bitcode objects. 146 bool isUsedInRegularObj : 1; 147 148 // True if ths symbol is explicitly marked for export (i.e. via the 149 // -e/--export command line flag) 150 bool forceExport : 1; 151 152 // False if LTO shouldn't inline whatever this symbol points to. If a symbol 153 // is overwritten after LTO, LTO shouldn't inline the symbol because it 154 // doesn't know the final contents of the symbol. 155 bool canInline : 1; 156 157 // True if this symbol is specified by --trace-symbol option. 158 bool traced : 1; 159 160 uint32_t flags; 161 }; 162 163 class FunctionSymbol : public Symbol { 164 public: 165 static bool classof(const Symbol *s) { 166 return s->kind() == DefinedFunctionKind || 167 s->kind() == UndefinedFunctionKind; 168 } 169 170 // Get/set the table index 171 void setTableIndex(uint32_t index); 172 uint32_t getTableIndex() const; 173 bool hasTableIndex() const; 174 175 // Get/set the function index 176 uint32_t getFunctionIndex() const; 177 void setFunctionIndex(uint32_t index); 178 bool hasFunctionIndex() const; 179 180 const WasmSignature *signature; 181 182 protected: 183 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 184 const WasmSignature *sig) 185 : Symbol(name, k, flags, f), signature(sig) {} 186 187 uint32_t tableIndex = INVALID_INDEX; 188 uint32_t functionIndex = INVALID_INDEX; 189 }; 190 191 class DefinedFunction : public FunctionSymbol { 192 public: 193 DefinedFunction(StringRef name, uint32_t flags, InputFile *f, 194 InputFunction *function); 195 196 static bool classof(const Symbol *s) { 197 return s->kind() == DefinedFunctionKind; 198 } 199 200 InputFunction *function; 201 }; 202 203 class UndefinedFunction : public FunctionSymbol { 204 public: 205 UndefinedFunction(StringRef name, llvm::Optional<StringRef> importName, 206 llvm::Optional<StringRef> importModule, uint32_t flags, 207 InputFile *file = nullptr, 208 const WasmSignature *type = nullptr, 209 bool isCalledDirectly = true) 210 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type), 211 importName(importName), importModule(importModule), 212 isCalledDirectly(isCalledDirectly) {} 213 214 static bool classof(const Symbol *s) { 215 return s->kind() == UndefinedFunctionKind; 216 } 217 218 llvm::Optional<StringRef> importName; 219 llvm::Optional<StringRef> importModule; 220 bool isCalledDirectly; 221 }; 222 223 // Section symbols for output sections are different from those for input 224 // section. These are generated by the linker and point the OutputSection 225 // rather than an InputSection. 226 class OutputSectionSymbol : public Symbol { 227 public: 228 OutputSectionSymbol(const OutputSection *s) 229 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL, 230 nullptr), 231 section(s) {} 232 233 static bool classof(const Symbol *s) { 234 return s->kind() == OutputSectionKind; 235 } 236 237 const OutputSection *section; 238 }; 239 240 class SectionSymbol : public Symbol { 241 public: 242 SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr) 243 : Symbol("", SectionKind, flags, f), section(s) {} 244 245 static bool classof(const Symbol *s) { return s->kind() == SectionKind; } 246 247 const OutputSectionSymbol *getOutputSectionSymbol() const; 248 249 const InputSection *section; 250 }; 251 252 class DataSymbol : public Symbol { 253 public: 254 static bool classof(const Symbol *s) { 255 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind; 256 } 257 258 protected: 259 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f) 260 : Symbol(name, k, flags, f) {} 261 }; 262 263 class DefinedData : public DataSymbol { 264 public: 265 // Constructor for regular data symbols originating from input files. 266 DefinedData(StringRef name, uint32_t flags, InputFile *f, 267 InputSegment *segment, uint64_t offset, uint64_t size) 268 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment), 269 offset(offset), size(size) {} 270 271 // Constructor for linker synthetic data symbols. 272 DefinedData(StringRef name, uint32_t flags) 273 : DataSymbol(name, DefinedDataKind, flags, nullptr) {} 274 275 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; } 276 277 // Returns the output virtual address of a defined data symbol. 278 uint64_t getVirtualAddress() const; 279 void setVirtualAddress(uint64_t va); 280 281 // Returns the offset of a defined data symbol within its OutputSegment. 282 uint64_t getOutputSegmentOffset() const; 283 uint64_t getOutputSegmentIndex() const; 284 uint64_t getSize() const { return size; } 285 286 InputSegment *segment = nullptr; 287 288 protected: 289 uint64_t offset = 0; 290 uint64_t size = 0; 291 }; 292 293 class UndefinedData : public DataSymbol { 294 public: 295 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr) 296 : DataSymbol(name, UndefinedDataKind, flags, file) {} 297 static bool classof(const Symbol *s) { 298 return s->kind() == UndefinedDataKind; 299 } 300 }; 301 302 class GlobalSymbol : public Symbol { 303 public: 304 static bool classof(const Symbol *s) { 305 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind; 306 } 307 308 const WasmGlobalType *getGlobalType() const { return globalType; } 309 310 // Get/set the global index 311 uint32_t getGlobalIndex() const; 312 void setGlobalIndex(uint32_t index); 313 bool hasGlobalIndex() const; 314 315 protected: 316 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 317 const WasmGlobalType *globalType) 318 : Symbol(name, k, flags, f), globalType(globalType) {} 319 320 const WasmGlobalType *globalType; 321 uint32_t globalIndex = INVALID_INDEX; 322 }; 323 324 class DefinedGlobal : public GlobalSymbol { 325 public: 326 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file, 327 InputGlobal *global); 328 329 static bool classof(const Symbol *s) { 330 return s->kind() == DefinedGlobalKind; 331 } 332 333 InputGlobal *global; 334 }; 335 336 class UndefinedGlobal : public GlobalSymbol { 337 public: 338 UndefinedGlobal(StringRef name, llvm::Optional<StringRef> importName, 339 llvm::Optional<StringRef> importModule, uint32_t flags, 340 InputFile *file = nullptr, 341 const WasmGlobalType *type = nullptr) 342 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type), 343 importName(importName), importModule(importModule) {} 344 345 static bool classof(const Symbol *s) { 346 return s->kind() == UndefinedGlobalKind; 347 } 348 349 llvm::Optional<StringRef> importName; 350 llvm::Optional<StringRef> importModule; 351 }; 352 353 // Wasm events are features that suspend the current execution and transfer the 354 // control flow to a corresponding handler. Currently the only supported event 355 // kind is exceptions. 356 // 357 // Event tags are values to distinguish different events. For exceptions, they 358 // can be used to distinguish different language's exceptions, i.e., all C++ 359 // exceptions have the same tag. Wasm can generate code capable of doing 360 // different handling actions based on the tag of caught exceptions. 361 // 362 // A single EventSymbol object represents a single tag. C++ exception event 363 // symbol is a weak symbol generated in every object file in which exceptions 364 // are used, and has name '__cpp_exception' for linking. 365 class EventSymbol : public Symbol { 366 public: 367 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; } 368 369 const WasmEventType *getEventType() const { return eventType; } 370 371 // Get/set the event index 372 uint32_t getEventIndex() const; 373 void setEventIndex(uint32_t index); 374 bool hasEventIndex() const; 375 376 const WasmSignature *signature; 377 378 protected: 379 EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 380 const WasmEventType *eventType, const WasmSignature *sig) 381 : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {} 382 383 const WasmEventType *eventType; 384 uint32_t eventIndex = INVALID_INDEX; 385 }; 386 387 class DefinedEvent : public EventSymbol { 388 public: 389 DefinedEvent(StringRef name, uint32_t flags, InputFile *file, 390 InputEvent *event); 391 392 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; } 393 394 InputEvent *event; 395 }; 396 397 // LazySymbol represents a symbol that is not yet in the link, but we know where 398 // to find it if needed. If the resolver finds both Undefined and Lazy for the 399 // same name, it will ask the Lazy to load a file. 400 // 401 // A special complication is the handling of weak undefined symbols. They should 402 // not load a file, but we have to remember we have seen both the weak undefined 403 // and the lazy. We represent that with a lazy symbol with a weak binding. This 404 // means that code looking for undefined symbols normally also has to take lazy 405 // symbols into consideration. 406 class LazySymbol : public Symbol { 407 public: 408 LazySymbol(StringRef name, uint32_t flags, InputFile *file, 409 const llvm::object::Archive::Symbol &sym) 410 : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {} 411 412 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 413 void fetch(); 414 MemoryBufferRef getMemberBuffer(); 415 416 // Lazy symbols can have a signature because they can replace an 417 // UndefinedFunction which which case we need to be able to preserve the 418 // signature. 419 // TODO(sbc): This repetition of the signature field is inelegant. Revisit 420 // the use of class hierarchy to represent symbol taxonomy. 421 const WasmSignature *signature = nullptr; 422 423 private: 424 llvm::object::Archive::Symbol archiveSymbol; 425 }; 426 427 // linker-generated symbols 428 struct WasmSym { 429 // __global_base 430 // Symbol marking the start of the global section. 431 static DefinedData *globalBase; 432 433 // __stack_pointer 434 // Global that holds the address of the top of the explicit value stack in 435 // linear memory. 436 static GlobalSymbol *stackPointer; 437 438 // __tls_base 439 // Global that holds the address of the base of the current thread's 440 // TLS block. 441 static GlobalSymbol *tlsBase; 442 443 // __tls_size 444 // Symbol whose value is the size of the TLS block. 445 static GlobalSymbol *tlsSize; 446 447 // __tls_size 448 // Symbol whose value is the alignment of the TLS block. 449 static GlobalSymbol *tlsAlign; 450 451 // __data_end 452 // Symbol marking the end of the data and bss. 453 static DefinedData *dataEnd; 454 455 // __heap_base 456 // Symbol marking the end of the data, bss and explicit stack. Any linear 457 // memory following this address is not used by the linked code and can 458 // therefore be used as a backing store for brk()/malloc() implementations. 459 static DefinedData *heapBase; 460 461 // __wasm_init_memory_flag 462 // Symbol whose contents are nonzero iff memory has already been initialized. 463 static DefinedData *initMemoryFlag; 464 465 // __wasm_init_memory 466 // Function that initializes passive data segments during instantiation. 467 static DefinedFunction *initMemory; 468 469 // __wasm_call_ctors 470 // Function that directly calls all ctors in priority order. 471 static DefinedFunction *callCtors; 472 473 // __wasm_apply_relocs 474 // Function that applies relocations to data segment post-instantiation. 475 static DefinedFunction *applyRelocs; 476 477 // __wasm_init_tls 478 // Function that allocates thread-local storage and initializes it. 479 static DefinedFunction *initTLS; 480 481 // __dso_handle 482 // Symbol used in calls to __cxa_atexit to determine current DLL 483 static DefinedData *dsoHandle; 484 485 // __table_base 486 // Used in PIC code for offset of indirect function table 487 static UndefinedGlobal *tableBase; 488 static DefinedData *definedTableBase; 489 490 // __memory_base 491 // Used in PIC code for offset of global data 492 static UndefinedGlobal *memoryBase; 493 static DefinedData *definedMemoryBase; 494 }; 495 496 // A buffer class that is large enough to hold any Symbol-derived 497 // object. We allocate memory using this class and instantiate a symbol 498 // using the placement new. 499 union SymbolUnion { 500 alignas(DefinedFunction) char a[sizeof(DefinedFunction)]; 501 alignas(DefinedData) char b[sizeof(DefinedData)]; 502 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)]; 503 alignas(DefinedEvent) char d[sizeof(DefinedEvent)]; 504 alignas(LazySymbol) char e[sizeof(LazySymbol)]; 505 alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)]; 506 alignas(UndefinedData) char g[sizeof(UndefinedData)]; 507 alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)]; 508 alignas(SectionSymbol) char i[sizeof(SectionSymbol)]; 509 }; 510 511 // It is important to keep the size of SymbolUnion small for performance and 512 // memory usage reasons. 96 bytes is a soft limit based on the size of 513 // UndefinedFunction on a 64-bit system. 514 static_assert(sizeof(SymbolUnion) <= 112, "SymbolUnion too large"); 515 516 void printTraceSymbol(Symbol *sym); 517 void printTraceSymbolUndefined(StringRef name, const InputFile* file); 518 519 template <typename T, typename... ArgT> 520 T *replaceSymbol(Symbol *s, ArgT &&... arg) { 521 static_assert(std::is_trivially_destructible<T>(), 522 "Symbol types must be trivially destructible"); 523 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 524 static_assert(alignof(T) <= alignof(SymbolUnion), 525 "SymbolUnion not aligned enough"); 526 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 527 "Not a Symbol"); 528 529 Symbol symCopy = *s; 530 531 T *s2 = new (s) T(std::forward<ArgT>(arg)...); 532 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj; 533 s2->forceExport = symCopy.forceExport; 534 s2->canInline = symCopy.canInline; 535 s2->traced = symCopy.traced; 536 537 // Print out a log message if --trace-symbol was specified. 538 // This is for debugging. 539 if (s2->traced) 540 printTraceSymbol(s2); 541 542 return s2; 543 } 544 545 } // namespace wasm 546 547 // Returns a symbol name for an error message. 548 std::string toString(const wasm::Symbol &sym); 549 std::string toString(wasm::Symbol::Kind kind); 550 std::string maybeDemangleSymbol(StringRef name); 551 552 } // namespace lld 553 554 #endif 555