1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_WASM_SYMBOLS_H 10 #define LLD_WASM_SYMBOLS_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/Object/Archive.h" 15 #include "llvm/Object/Wasm.h" 16 #include <optional> 17 18 namespace lld { 19 namespace wasm { 20 21 // Shared string constants 22 23 // The default module name to use for symbol imports. 24 extern const char *defaultModule; 25 26 // The name under which to import or export the wasm table. 27 extern const char *functionTableName; 28 29 // The name under which to import or export the wasm memory. 30 extern const char *memoryName; 31 32 using llvm::wasm::WasmSymbolType; 33 34 class InputFile; 35 class InputChunk; 36 class InputSegment; 37 class InputFunction; 38 class InputGlobal; 39 class InputTag; 40 class InputSection; 41 class InputTable; 42 class OutputSection; 43 44 #define INVALID_INDEX UINT32_MAX 45 46 // The base class for real symbol classes. 47 class Symbol { 48 public: 49 enum Kind : uint8_t { 50 DefinedFunctionKind, 51 DefinedDataKind, 52 DefinedGlobalKind, 53 DefinedTagKind, 54 DefinedTableKind, 55 SectionKind, 56 OutputSectionKind, 57 UndefinedFunctionKind, 58 UndefinedDataKind, 59 UndefinedGlobalKind, 60 UndefinedTableKind, 61 UndefinedTagKind, 62 LazyKind, 63 }; 64 65 Kind kind() const { return symbolKind; } 66 67 bool isDefined() const { return !isLazy() && !isUndefined(); } 68 69 bool isUndefined() const { 70 return symbolKind == UndefinedFunctionKind || 71 symbolKind == UndefinedDataKind || 72 symbolKind == UndefinedGlobalKind || 73 symbolKind == UndefinedTableKind || symbolKind == UndefinedTagKind; 74 } 75 76 bool isLazy() const { return symbolKind == LazyKind; } 77 78 bool isLocal() const; 79 bool isWeak() const; 80 bool isHidden() const; 81 bool isTLS() const; 82 83 // Returns true if this symbol exists in a discarded (due to COMDAT) section 84 bool isDiscarded() const; 85 86 // True if this is an undefined weak symbol. This only works once 87 // all input files have been added. 88 bool isUndefWeak() const { 89 // See comment on lazy symbols for details. 90 return isWeak() && (isUndefined() || isLazy()); 91 } 92 93 // Returns the symbol name. 94 StringRef getName() const { return name; } 95 96 // Returns the file from which this symbol was created. 97 InputFile *getFile() const { return file; } 98 99 InputChunk *getChunk() const; 100 101 // Indicates that the section or import for this symbol will be included in 102 // the final image. 103 bool isLive() const; 104 105 // Marks the symbol's InputChunk as Live, so that it will be included in the 106 // final image. 107 void markLive(); 108 109 void setHidden(bool isHidden); 110 111 // Get/set the index in the output symbol table. This is only used for 112 // relocatable output. 113 uint32_t getOutputSymbolIndex() const; 114 void setOutputSymbolIndex(uint32_t index); 115 116 WasmSymbolType getWasmType() const; 117 bool isImported() const; 118 bool isExported() const; 119 bool isExportedExplicit() const; 120 121 // Indicates that the symbol is used in an __attribute__((used)) directive 122 // or similar. 123 bool isNoStrip() const; 124 125 const WasmSignature* getSignature() const; 126 127 uint32_t getGOTIndex() const { 128 assert(gotIndex != INVALID_INDEX); 129 return gotIndex; 130 } 131 132 void setGOTIndex(uint32_t index); 133 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; } 134 135 protected: 136 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f) 137 : name(name), file(f), symbolKind(k), referenced(!config->gcSections), 138 requiresGOT(false), isUsedInRegularObj(false), forceExport(false), 139 forceImport(false), canInline(false), traced(false), isStub(false), 140 flags(flags) {} 141 142 StringRef name; 143 InputFile *file; 144 uint32_t outputSymbolIndex = INVALID_INDEX; 145 uint32_t gotIndex = INVALID_INDEX; 146 Kind symbolKind; 147 148 public: 149 bool referenced : 1; 150 151 // True for data symbols that needs a dummy GOT entry. Used for static 152 // linking of GOT accesses. 153 bool requiresGOT : 1; 154 155 // True if the symbol was used for linking and thus need to be added to the 156 // output file's symbol table. This is true for all symbols except for 157 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that 158 // are unreferenced except by other bitcode objects. 159 bool isUsedInRegularObj : 1; 160 161 // True if this symbol is explicitly marked for export (i.e. via the 162 // -e/--export command line flag) 163 bool forceExport : 1; 164 165 bool forceImport : 1; 166 167 // False if LTO shouldn't inline whatever this symbol points to. If a symbol 168 // is overwritten after LTO, LTO shouldn't inline the symbol because it 169 // doesn't know the final contents of the symbol. 170 bool canInline : 1; 171 172 // True if this symbol is specified by --trace-symbol option. 173 bool traced : 1; 174 175 // True if this symbol is a linker-synthesized stub function (traps when 176 // called) and should otherwise be treated as missing/undefined. See 177 // SymbolTable::replaceWithUndefined. 178 // These stubs never appear in the table and any table index relocations 179 // against them will produce address 0 (The table index representing 180 // the null function pointer). 181 bool isStub : 1; 182 183 uint32_t flags; 184 185 std::optional<StringRef> importName; 186 std::optional<StringRef> importModule; 187 }; 188 189 class FunctionSymbol : public Symbol { 190 public: 191 static bool classof(const Symbol *s) { 192 return s->kind() == DefinedFunctionKind || 193 s->kind() == UndefinedFunctionKind; 194 } 195 196 // Get/set the table index 197 void setTableIndex(uint32_t index); 198 uint32_t getTableIndex() const; 199 bool hasTableIndex() const; 200 201 // Get/set the function index 202 uint32_t getFunctionIndex() const; 203 void setFunctionIndex(uint32_t index); 204 bool hasFunctionIndex() const; 205 206 const WasmSignature *signature; 207 208 protected: 209 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 210 const WasmSignature *sig) 211 : Symbol(name, k, flags, f), signature(sig) {} 212 213 uint32_t tableIndex = INVALID_INDEX; 214 uint32_t functionIndex = INVALID_INDEX; 215 }; 216 217 class DefinedFunction : public FunctionSymbol { 218 public: 219 DefinedFunction(StringRef name, uint32_t flags, InputFile *f, 220 InputFunction *function); 221 222 static bool classof(const Symbol *s) { 223 return s->kind() == DefinedFunctionKind; 224 } 225 226 // Get the function index to be used when exporting. This only applies to 227 // defined functions and can be differ from the regular function index for 228 // weakly defined functions (that are imported and used via one index but 229 // defined and exported via another). 230 uint32_t getExportedFunctionIndex() const; 231 232 InputFunction *function; 233 }; 234 235 class UndefinedFunction : public FunctionSymbol { 236 public: 237 UndefinedFunction(StringRef name, std::optional<StringRef> importName, 238 std::optional<StringRef> importModule, uint32_t flags, 239 InputFile *file = nullptr, 240 const WasmSignature *type = nullptr, 241 bool isCalledDirectly = true) 242 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type), 243 isCalledDirectly(isCalledDirectly) { 244 this->importName = importName; 245 this->importModule = importModule; 246 } 247 248 static bool classof(const Symbol *s) { 249 return s->kind() == UndefinedFunctionKind; 250 } 251 252 DefinedFunction *stubFunction = nullptr; 253 bool isCalledDirectly; 254 }; 255 256 // Section symbols for output sections are different from those for input 257 // section. These are generated by the linker and point the OutputSection 258 // rather than an InputSection. 259 class OutputSectionSymbol : public Symbol { 260 public: 261 OutputSectionSymbol(const OutputSection *s) 262 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL, 263 nullptr), 264 section(s) {} 265 266 static bool classof(const Symbol *s) { 267 return s->kind() == OutputSectionKind; 268 } 269 270 const OutputSection *section; 271 }; 272 273 class SectionSymbol : public Symbol { 274 public: 275 SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr) 276 : Symbol("", SectionKind, flags, f), section(s) {} 277 278 static bool classof(const Symbol *s) { return s->kind() == SectionKind; } 279 280 const OutputSectionSymbol *getOutputSectionSymbol() const; 281 282 const InputChunk *section; 283 }; 284 285 class DataSymbol : public Symbol { 286 public: 287 static bool classof(const Symbol *s) { 288 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind; 289 } 290 291 protected: 292 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f) 293 : Symbol(name, k, flags, f) {} 294 }; 295 296 class DefinedData : public DataSymbol { 297 public: 298 // Constructor for regular data symbols originating from input files. 299 DefinedData(StringRef name, uint32_t flags, InputFile *f, InputChunk *segment, 300 uint64_t value, uint64_t size) 301 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment), 302 value(value), size(size) {} 303 304 // Constructor for linker synthetic data symbols. 305 DefinedData(StringRef name, uint32_t flags) 306 : DataSymbol(name, DefinedDataKind, flags, nullptr) {} 307 308 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; } 309 310 // Returns the output virtual address of a defined data symbol. 311 uint64_t getVA() const; 312 void setVA(uint64_t va); 313 314 // Returns the offset of a defined data symbol within its OutputSegment. 315 uint64_t getOutputSegmentOffset() const; 316 uint64_t getOutputSegmentIndex() const; 317 uint64_t getSize() const { return size; } 318 319 InputChunk *segment = nullptr; 320 uint64_t value = 0; 321 322 protected: 323 uint64_t size = 0; 324 }; 325 326 class UndefinedData : public DataSymbol { 327 public: 328 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr) 329 : DataSymbol(name, UndefinedDataKind, flags, file) {} 330 static bool classof(const Symbol *s) { 331 return s->kind() == UndefinedDataKind; 332 } 333 }; 334 335 class GlobalSymbol : public Symbol { 336 public: 337 static bool classof(const Symbol *s) { 338 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind; 339 } 340 341 const WasmGlobalType *getGlobalType() const { return globalType; } 342 343 // Get/set the global index 344 uint32_t getGlobalIndex() const; 345 void setGlobalIndex(uint32_t index); 346 bool hasGlobalIndex() const; 347 348 protected: 349 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 350 const WasmGlobalType *globalType) 351 : Symbol(name, k, flags, f), globalType(globalType) {} 352 353 const WasmGlobalType *globalType; 354 uint32_t globalIndex = INVALID_INDEX; 355 }; 356 357 class DefinedGlobal : public GlobalSymbol { 358 public: 359 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file, 360 InputGlobal *global); 361 362 static bool classof(const Symbol *s) { 363 return s->kind() == DefinedGlobalKind; 364 } 365 366 InputGlobal *global; 367 }; 368 369 class UndefinedGlobal : public GlobalSymbol { 370 public: 371 UndefinedGlobal(StringRef name, std::optional<StringRef> importName, 372 std::optional<StringRef> importModule, uint32_t flags, 373 InputFile *file = nullptr, 374 const WasmGlobalType *type = nullptr) 375 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type) { 376 this->importName = importName; 377 this->importModule = importModule; 378 } 379 380 static bool classof(const Symbol *s) { 381 return s->kind() == UndefinedGlobalKind; 382 } 383 }; 384 385 class TableSymbol : public Symbol { 386 public: 387 static bool classof(const Symbol *s) { 388 return s->kind() == DefinedTableKind || s->kind() == UndefinedTableKind; 389 } 390 391 const WasmTableType *getTableType() const { return tableType; } 392 void setLimits(const WasmLimits &limits); 393 394 // Get/set the table number 395 uint32_t getTableNumber() const; 396 void setTableNumber(uint32_t number); 397 bool hasTableNumber() const; 398 399 protected: 400 TableSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 401 const WasmTableType *type) 402 : Symbol(name, k, flags, f), tableType(type) {} 403 404 const WasmTableType *tableType; 405 uint32_t tableNumber = INVALID_INDEX; 406 }; 407 408 class DefinedTable : public TableSymbol { 409 public: 410 DefinedTable(StringRef name, uint32_t flags, InputFile *file, 411 InputTable *table); 412 413 static bool classof(const Symbol *s) { return s->kind() == DefinedTableKind; } 414 415 InputTable *table; 416 }; 417 418 class UndefinedTable : public TableSymbol { 419 public: 420 UndefinedTable(StringRef name, std::optional<StringRef> importName, 421 std::optional<StringRef> importModule, uint32_t flags, 422 InputFile *file, const WasmTableType *type) 423 : TableSymbol(name, UndefinedTableKind, flags, file, type) { 424 this->importName = importName; 425 this->importModule = importModule; 426 } 427 428 static bool classof(const Symbol *s) { 429 return s->kind() == UndefinedTableKind; 430 } 431 }; 432 433 // A tag is a general format to distinguish typed entities. Each tag has an 434 // attribute and a type. Currently the attribute can only specify that the tag 435 // is for an exception tag. 436 // 437 // In exception handling, tags are used to distinguish different kinds of 438 // exceptions. For example, they can be used to distinguish different language's 439 // exceptions, e.g., all C++ exceptions have the same tag and Java exceptions 440 // would have a distinct tag. Wasm can filter the exceptions it catches based on 441 // their tag. 442 // 443 // A single TagSymbol object represents a single tag. The C++ exception symbol 444 // is a weak symbol generated in every object file in which exceptions are used, 445 // and is named '__cpp_exception' for linking. 446 class TagSymbol : public Symbol { 447 public: 448 static bool classof(const Symbol *s) { 449 return s->kind() == DefinedTagKind || s->kind() == UndefinedTagKind; 450 } 451 452 // Get/set the tag index 453 uint32_t getTagIndex() const; 454 void setTagIndex(uint32_t index); 455 bool hasTagIndex() const; 456 457 const WasmSignature *signature; 458 459 protected: 460 TagSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 461 const WasmSignature *sig) 462 : Symbol(name, k, flags, f), signature(sig) {} 463 464 uint32_t tagIndex = INVALID_INDEX; 465 }; 466 467 class DefinedTag : public TagSymbol { 468 public: 469 DefinedTag(StringRef name, uint32_t flags, InputFile *file, InputTag *tag); 470 471 static bool classof(const Symbol *s) { return s->kind() == DefinedTagKind; } 472 473 InputTag *tag; 474 }; 475 476 class UndefinedTag : public TagSymbol { 477 public: 478 UndefinedTag(StringRef name, std::optional<StringRef> importName, 479 std::optional<StringRef> importModule, uint32_t flags, 480 InputFile *file = nullptr, const WasmSignature *sig = nullptr) 481 : TagSymbol(name, UndefinedTagKind, flags, file, sig) { 482 this->importName = importName; 483 this->importModule = importModule; 484 } 485 486 static bool classof(const Symbol *s) { return s->kind() == UndefinedTagKind; } 487 }; 488 489 // LazySymbol represents a symbol that is not yet in the link, but we know where 490 // to find it if needed. If the resolver finds both Undefined and Lazy for the 491 // same name, it will ask the Lazy to load a file. 492 // 493 // A special complication is the handling of weak undefined symbols. They should 494 // not load a file, but we have to remember we have seen both the weak undefined 495 // and the lazy. We represent that with a lazy symbol with a weak binding. This 496 // means that code looking for undefined symbols normally also has to take lazy 497 // symbols into consideration. 498 class LazySymbol : public Symbol { 499 public: 500 LazySymbol(StringRef name, uint32_t flags, InputFile *file, 501 const llvm::object::Archive::Symbol &sym) 502 : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {} 503 504 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 505 void fetch(); 506 void setWeak(); 507 MemoryBufferRef getMemberBuffer(); 508 509 // Lazy symbols can have a signature because they can replace an 510 // UndefinedFunction in which case we need to be able to preserve the 511 // signature. 512 // TODO(sbc): This repetition of the signature field is inelegant. Revisit 513 // the use of class hierarchy to represent symbol taxonomy. 514 const WasmSignature *signature = nullptr; 515 516 private: 517 llvm::object::Archive::Symbol archiveSymbol; 518 }; 519 520 // linker-generated symbols 521 struct WasmSym { 522 // __global_base 523 // Symbol marking the start of the global section. 524 static DefinedData *globalBase; 525 526 // __stack_pointer/__stack_low/__stack_high 527 // Global that holds current value of stack pointer and data symbols marking 528 // the start and end of the stack region. stackPointer is initialized to 529 // stackHigh and grows downwards towards stackLow 530 static GlobalSymbol *stackPointer; 531 static DefinedData *stackLow; 532 static DefinedData *stackHigh; 533 534 // __tls_base 535 // Global that holds the address of the base of the current thread's 536 // TLS block. 537 static GlobalSymbol *tlsBase; 538 539 // __tls_size 540 // Symbol whose value is the size of the TLS block. 541 static GlobalSymbol *tlsSize; 542 543 // __tls_size 544 // Symbol whose value is the alignment of the TLS block. 545 static GlobalSymbol *tlsAlign; 546 547 // __data_end 548 // Symbol marking the end of the data and bss. 549 static DefinedData *dataEnd; 550 551 // __heap_base/__heap_end 552 // Symbols marking the beginning and end of the "heap". It starts at the end 553 // of the data, bss and explicit stack, and extends to the end of the linear 554 // memory allocated by wasm-ld. This region of memory is not used by the 555 // linked code, so it may be used as a backing store for `sbrk` or `malloc` 556 // implementations. 557 static DefinedData *heapBase; 558 static DefinedData *heapEnd; 559 560 // __wasm_init_memory_flag 561 // Symbol whose contents are nonzero iff memory has already been initialized. 562 static DefinedData *initMemoryFlag; 563 564 // __wasm_init_memory 565 // Function that initializes passive data segments during instantiation. 566 static DefinedFunction *initMemory; 567 568 // __wasm_call_ctors 569 // Function that directly calls all ctors in priority order. 570 static DefinedFunction *callCtors; 571 572 // __wasm_call_dtors 573 // Function that calls the libc/etc. cleanup function. 574 static DefinedFunction *callDtors; 575 576 // __wasm_apply_data_relocs 577 // Function that applies relocations to data segment post-instantiation. 578 static DefinedFunction *applyDataRelocs; 579 580 // __wasm_apply_global_relocs 581 // Function that applies relocations to wasm globals post-instantiation. 582 // Unlike __wasm_apply_data_relocs this needs to run on every thread. 583 static DefinedFunction *applyGlobalRelocs; 584 585 // __wasm_apply_global_tls_relocs 586 // Like applyGlobalRelocs but for globals that hold TLS addresses. These 587 // must be delayed until __wasm_init_tls. 588 static DefinedFunction *applyGlobalTLSRelocs; 589 590 // __wasm_init_tls 591 // Function that allocates thread-local storage and initializes it. 592 static DefinedFunction *initTLS; 593 594 // Pointer to the function that is to be used in the start section. 595 // (normally an alias of initMemory, or applyGlobalRelocs). 596 static DefinedFunction *startFunction; 597 598 // __dso_handle 599 // Symbol used in calls to __cxa_atexit to determine current DLL 600 static DefinedData *dsoHandle; 601 602 // __table_base 603 // Used in PIC code for offset of indirect function table 604 static UndefinedGlobal *tableBase; 605 static DefinedData *definedTableBase; 606 // 32-bit copy in wasm64 to work around init expr limitations. 607 // These can potentially be removed again once we have 608 // https://github.com/WebAssembly/extended-const 609 static UndefinedGlobal *tableBase32; 610 static DefinedData *definedTableBase32; 611 612 // __memory_base 613 // Used in PIC code for offset of global data 614 static UndefinedGlobal *memoryBase; 615 static DefinedData *definedMemoryBase; 616 617 // __indirect_function_table 618 // Used as an address space for function pointers, with each function that is 619 // used as a function pointer being allocated a slot. 620 static TableSymbol *indirectFunctionTable; 621 }; 622 623 // A buffer class that is large enough to hold any Symbol-derived 624 // object. We allocate memory using this class and instantiate a symbol 625 // using the placement new. 626 union SymbolUnion { 627 alignas(DefinedFunction) char a[sizeof(DefinedFunction)]; 628 alignas(DefinedData) char b[sizeof(DefinedData)]; 629 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)]; 630 alignas(DefinedTag) char d[sizeof(DefinedTag)]; 631 alignas(DefinedTable) char e[sizeof(DefinedTable)]; 632 alignas(LazySymbol) char f[sizeof(LazySymbol)]; 633 alignas(UndefinedFunction) char g[sizeof(UndefinedFunction)]; 634 alignas(UndefinedData) char h[sizeof(UndefinedData)]; 635 alignas(UndefinedGlobal) char i[sizeof(UndefinedGlobal)]; 636 alignas(UndefinedTable) char j[sizeof(UndefinedTable)]; 637 alignas(SectionSymbol) char k[sizeof(SectionSymbol)]; 638 }; 639 640 // It is important to keep the size of SymbolUnion small for performance and 641 // memory usage reasons. 96 bytes is a soft limit based on the size of 642 // UndefinedFunction on a 64-bit system. 643 static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large"); 644 645 void printTraceSymbol(Symbol *sym); 646 void printTraceSymbolUndefined(StringRef name, const InputFile* file); 647 648 template <typename T, typename... ArgT> 649 T *replaceSymbol(Symbol *s, ArgT &&... arg) { 650 static_assert(std::is_trivially_destructible<T>(), 651 "Symbol types must be trivially destructible"); 652 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 653 static_assert(alignof(T) <= alignof(SymbolUnion), 654 "SymbolUnion not aligned enough"); 655 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 656 "Not a Symbol"); 657 658 Symbol symCopy = *s; 659 660 T *s2 = new (s) T(std::forward<ArgT>(arg)...); 661 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj; 662 s2->forceExport = symCopy.forceExport; 663 s2->forceImport = symCopy.forceImport; 664 s2->canInline = symCopy.canInline; 665 s2->traced = symCopy.traced; 666 s2->referenced = symCopy.referenced; 667 668 // Print out a log message if --trace-symbol was specified. 669 // This is for debugging. 670 if (s2->traced) 671 printTraceSymbol(s2); 672 673 return s2; 674 } 675 676 } // namespace wasm 677 678 // Returns a symbol name for an error message. 679 std::string toString(const wasm::Symbol &sym); 680 std::string toString(wasm::Symbol::Kind kind); 681 std::string maybeDemangleSymbol(StringRef name); 682 683 } // namespace lld 684 685 #endif 686