1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "WriterUtils.h"
14 #include "lld/Common/CommonLinkerContext.h"
15 #include <optional>
16
17 #define DEBUG_TYPE "lld"
18
19 using namespace llvm;
20 using namespace llvm::wasm;
21 using namespace llvm::object;
22
23 namespace lld {
24 namespace wasm {
25 SymbolTable *symtab;
26
addFile(InputFile * file)27 void SymbolTable::addFile(InputFile *file) {
28 log("Processing: " + toString(file));
29
30 // .a file
31 if (auto *f = dyn_cast<ArchiveFile>(file)) {
32 f->parse();
33 return;
34 }
35
36 // .so file
37 if (auto *f = dyn_cast<SharedFile>(file)) {
38 sharedFiles.push_back(f);
39 return;
40 }
41
42 // stub file
43 if (auto *f = dyn_cast<StubFile>(file)) {
44 f->parse();
45 stubFiles.push_back(f);
46 return;
47 }
48
49 if (config->trace)
50 message(toString(file));
51
52 // LLVM bitcode file
53 if (auto *f = dyn_cast<BitcodeFile>(file)) {
54 f->parse();
55 bitcodeFiles.push_back(f);
56 return;
57 }
58
59 // Regular object file
60 auto *f = cast<ObjFile>(file);
61 f->parse(false);
62 objectFiles.push_back(f);
63 }
64
65 // This function is where all the optimizations of link-time
66 // optimization happens. When LTO is in use, some input files are
67 // not in native object file format but in the LLVM bitcode format.
68 // This function compiles bitcode files into a few big native files
69 // using LLVM functions and replaces bitcode symbols with the results.
70 // Because all bitcode files that the program consists of are passed
71 // to the compiler at once, it can do whole-program optimization.
compileBitcodeFiles()72 void SymbolTable::compileBitcodeFiles() {
73 // Prevent further LTO objects being included
74 BitcodeFile::doneLTO = true;
75
76 if (bitcodeFiles.empty())
77 return;
78
79 // Compile bitcode files and replace bitcode symbols.
80 lto.reset(new BitcodeCompiler);
81 for (BitcodeFile *f : bitcodeFiles)
82 lto->add(*f);
83
84 for (StringRef filename : lto->compile()) {
85 auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), "");
86 obj->parse(true);
87 objectFiles.push_back(obj);
88 }
89 }
90
find(StringRef name)91 Symbol *SymbolTable::find(StringRef name) {
92 auto it = symMap.find(CachedHashStringRef(name));
93 if (it == symMap.end() || it->second == -1)
94 return nullptr;
95 return symVector[it->second];
96 }
97
replace(StringRef name,Symbol * sym)98 void SymbolTable::replace(StringRef name, Symbol* sym) {
99 auto it = symMap.find(CachedHashStringRef(name));
100 symVector[it->second] = sym;
101 }
102
insertName(StringRef name)103 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
104 bool trace = false;
105 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
106 int &symIndex = p.first->second;
107 bool isNew = p.second;
108 if (symIndex == -1) {
109 symIndex = symVector.size();
110 trace = true;
111 isNew = true;
112 }
113
114 if (!isNew)
115 return {symVector[symIndex], false};
116
117 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
118 sym->isUsedInRegularObj = false;
119 sym->canInline = true;
120 sym->traced = trace;
121 sym->forceExport = false;
122 sym->referenced = !config->gcSections;
123 symVector.emplace_back(sym);
124 return {sym, true};
125 }
126
insert(StringRef name,const InputFile * file)127 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
128 const InputFile *file) {
129 Symbol *s;
130 bool wasInserted;
131 std::tie(s, wasInserted) = insertName(name);
132
133 if (!file || file->kind() == InputFile::ObjectKind)
134 s->isUsedInRegularObj = true;
135
136 return {s, wasInserted};
137 }
138
reportTypeError(const Symbol * existing,const InputFile * file,llvm::wasm::WasmSymbolType type)139 static void reportTypeError(const Symbol *existing, const InputFile *file,
140 llvm::wasm::WasmSymbolType type) {
141 error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " +
142 toString(existing->getWasmType()) + " in " +
143 toString(existing->getFile()) + "\n>>> defined as " + toString(type) +
144 " in " + toString(file));
145 }
146
147 // Check the type of new symbol matches that of the symbol is replacing.
148 // Returns true if the function types match, false is there is a signature
149 // mismatch.
signatureMatches(FunctionSymbol * existing,const WasmSignature * newSig)150 static bool signatureMatches(FunctionSymbol *existing,
151 const WasmSignature *newSig) {
152 const WasmSignature *oldSig = existing->signature;
153
154 // If either function is missing a signature (this happens for bitcode
155 // symbols) then assume they match. Any mismatch will be reported later
156 // when the LTO objects are added.
157 if (!newSig || !oldSig)
158 return true;
159
160 return *newSig == *oldSig;
161 }
162
checkGlobalType(const Symbol * existing,const InputFile * file,const WasmGlobalType * newType)163 static void checkGlobalType(const Symbol *existing, const InputFile *file,
164 const WasmGlobalType *newType) {
165 if (!isa<GlobalSymbol>(existing)) {
166 reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL);
167 return;
168 }
169
170 const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType();
171 if (*newType != *oldType) {
172 error("Global type mismatch: " + existing->getName() + "\n>>> defined as " +
173 toString(*oldType) + " in " + toString(existing->getFile()) +
174 "\n>>> defined as " + toString(*newType) + " in " + toString(file));
175 }
176 }
177
checkTagType(const Symbol * existing,const InputFile * file,const WasmSignature * newSig)178 static void checkTagType(const Symbol *existing, const InputFile *file,
179 const WasmSignature *newSig) {
180 const auto *existingTag = dyn_cast<TagSymbol>(existing);
181 if (!isa<TagSymbol>(existing)) {
182 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG);
183 return;
184 }
185
186 const WasmSignature *oldSig = existingTag->signature;
187 if (*newSig != *oldSig)
188 warn("Tag signature mismatch: " + existing->getName() +
189 "\n>>> defined as " + toString(*oldSig) + " in " +
190 toString(existing->getFile()) + "\n>>> defined as " +
191 toString(*newSig) + " in " + toString(file));
192 }
193
checkTableType(const Symbol * existing,const InputFile * file,const WasmTableType * newType)194 static void checkTableType(const Symbol *existing, const InputFile *file,
195 const WasmTableType *newType) {
196 if (!isa<TableSymbol>(existing)) {
197 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE);
198 return;
199 }
200
201 const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType();
202 if (newType->ElemType != oldType->ElemType) {
203 error("Table type mismatch: " + existing->getName() + "\n>>> defined as " +
204 toString(*oldType) + " in " + toString(existing->getFile()) +
205 "\n>>> defined as " + toString(*newType) + " in " + toString(file));
206 }
207 // FIXME: No assertions currently on the limits.
208 }
209
checkDataType(const Symbol * existing,const InputFile * file)210 static void checkDataType(const Symbol *existing, const InputFile *file) {
211 if (!isa<DataSymbol>(existing))
212 reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA);
213 }
214
addSyntheticFunction(StringRef name,uint32_t flags,InputFunction * function)215 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
216 uint32_t flags,
217 InputFunction *function) {
218 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
219 assert(!find(name));
220 syntheticFunctions.emplace_back(function);
221 return replaceSymbol<DefinedFunction>(insertName(name).first, name,
222 flags, nullptr, function);
223 }
224
225 // Adds an optional, linker generated, data symbol. The symbol will only be
226 // added if there is an undefine reference to it, or if it is explicitly
227 // exported via the --export flag. Otherwise we don't add the symbol and return
228 // nullptr.
addOptionalDataSymbol(StringRef name,uint64_t value)229 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
230 uint64_t value) {
231 Symbol *s = find(name);
232 if (!s && (config->exportAll || config->exportedSymbols.count(name) != 0))
233 s = insertName(name).first;
234 else if (!s || s->isDefined())
235 return nullptr;
236 LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
237 auto *rtn = replaceSymbol<DefinedData>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN);
238 rtn->setVA(value);
239 rtn->referenced = true;
240 return rtn;
241 }
242
addSyntheticDataSymbol(StringRef name,uint32_t flags)243 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
244 uint32_t flags) {
245 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
246 assert(!find(name));
247 return replaceSymbol<DefinedData>(insertName(name).first, name, flags);
248 }
249
addSyntheticGlobal(StringRef name,uint32_t flags,InputGlobal * global)250 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
251 InputGlobal *global) {
252 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
253 << "\n");
254 assert(!find(name));
255 syntheticGlobals.emplace_back(global);
256 return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags,
257 nullptr, global);
258 }
259
addOptionalGlobalSymbol(StringRef name,InputGlobal * global)260 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
261 InputGlobal *global) {
262 Symbol *s = find(name);
263 if (!s || s->isDefined())
264 return nullptr;
265 LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
266 << "\n");
267 syntheticGlobals.emplace_back(global);
268 return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN,
269 nullptr, global);
270 }
271
addSyntheticTable(StringRef name,uint32_t flags,InputTable * table)272 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
273 InputTable *table) {
274 LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
275 << "\n");
276 Symbol *s = find(name);
277 assert(!s || s->isUndefined());
278 if (!s)
279 s = insertName(name).first;
280 syntheticTables.emplace_back(table);
281 return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table);
282 }
283
shouldReplace(const Symbol * existing,InputFile * newFile,uint32_t newFlags)284 static bool shouldReplace(const Symbol *existing, InputFile *newFile,
285 uint32_t newFlags) {
286 // If existing symbol is undefined, replace it.
287 if (!existing->isDefined()) {
288 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
289 << existing->getName() << "\n");
290 return true;
291 }
292
293 // Now we have two defined symbols. If the new one is weak, we can ignore it.
294 if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
295 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
296 return false;
297 }
298
299 // If the existing symbol is weak, we should replace it.
300 if (existing->isWeak()) {
301 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
302 return true;
303 }
304
305 // Neither symbol is week. They conflict.
306 error("duplicate symbol: " + toString(*existing) + "\n>>> defined in " +
307 toString(existing->getFile()) + "\n>>> defined in " +
308 toString(newFile));
309 return true;
310 }
311
addDefinedFunction(StringRef name,uint32_t flags,InputFile * file,InputFunction * function)312 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
313 InputFile *file,
314 InputFunction *function) {
315 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
316 << (function ? toString(function->signature) : "none")
317 << "]\n");
318 Symbol *s;
319 bool wasInserted;
320 std::tie(s, wasInserted) = insert(name, file);
321
322 auto replaceSym = [&](Symbol *sym) {
323 // If the new defined function doesn't have signature (i.e. bitcode
324 // functions) but the old symbol does, then preserve the old signature
325 const WasmSignature *oldSig = s->getSignature();
326 auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function);
327 if (!newSym->signature)
328 newSym->signature = oldSig;
329 };
330
331 if (wasInserted || s->isLazy()) {
332 replaceSym(s);
333 return s;
334 }
335
336 auto existingFunction = dyn_cast<FunctionSymbol>(s);
337 if (!existingFunction) {
338 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
339 return s;
340 }
341
342 bool checkSig = true;
343 if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
344 checkSig = ud->isCalledDirectly;
345
346 if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) {
347 Symbol* variant;
348 if (getFunctionVariant(s, &function->signature, file, &variant))
349 // New variant, always replace
350 replaceSym(variant);
351 else if (shouldReplace(s, file, flags))
352 // Variant already exists, replace it after checking shouldReplace
353 replaceSym(variant);
354
355 // This variant we found take the place in the symbol table as the primary
356 // variant.
357 replace(name, variant);
358 return variant;
359 }
360
361 // Existing function with matching signature.
362 if (shouldReplace(s, file, flags))
363 replaceSym(s);
364
365 return s;
366 }
367
addDefinedData(StringRef name,uint32_t flags,InputFile * file,InputChunk * segment,uint64_t address,uint64_t size)368 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
369 InputFile *file, InputChunk *segment,
370 uint64_t address, uint64_t size) {
371 LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
372 << "\n");
373 Symbol *s;
374 bool wasInserted;
375 std::tie(s, wasInserted) = insert(name, file);
376
377 auto replaceSym = [&]() {
378 replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size);
379 };
380
381 if (wasInserted || s->isLazy()) {
382 replaceSym();
383 return s;
384 }
385
386 checkDataType(s, file);
387
388 if (shouldReplace(s, file, flags))
389 replaceSym();
390 return s;
391 }
392
addDefinedGlobal(StringRef name,uint32_t flags,InputFile * file,InputGlobal * global)393 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
394 InputFile *file, InputGlobal *global) {
395 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
396
397 Symbol *s;
398 bool wasInserted;
399 std::tie(s, wasInserted) = insert(name, file);
400
401 auto replaceSym = [&]() {
402 replaceSymbol<DefinedGlobal>(s, name, flags, file, global);
403 };
404
405 if (wasInserted || s->isLazy()) {
406 replaceSym();
407 return s;
408 }
409
410 checkGlobalType(s, file, &global->getType());
411
412 if (shouldReplace(s, file, flags))
413 replaceSym();
414 return s;
415 }
416
addDefinedTag(StringRef name,uint32_t flags,InputFile * file,InputTag * tag)417 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
418 InputFile *file, InputTag *tag) {
419 LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
420
421 Symbol *s;
422 bool wasInserted;
423 std::tie(s, wasInserted) = insert(name, file);
424
425 auto replaceSym = [&]() {
426 replaceSymbol<DefinedTag>(s, name, flags, file, tag);
427 };
428
429 if (wasInserted || s->isLazy()) {
430 replaceSym();
431 return s;
432 }
433
434 checkTagType(s, file, &tag->signature);
435
436 if (shouldReplace(s, file, flags))
437 replaceSym();
438 return s;
439 }
440
addDefinedTable(StringRef name,uint32_t flags,InputFile * file,InputTable * table)441 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
442 InputFile *file, InputTable *table) {
443 LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
444
445 Symbol *s;
446 bool wasInserted;
447 std::tie(s, wasInserted) = insert(name, file);
448
449 auto replaceSym = [&]() {
450 replaceSymbol<DefinedTable>(s, name, flags, file, table);
451 };
452
453 if (wasInserted || s->isLazy()) {
454 replaceSym();
455 return s;
456 }
457
458 checkTableType(s, file, &table->getType());
459
460 if (shouldReplace(s, file, flags))
461 replaceSym();
462 return s;
463 }
464
465 // This function get called when an undefined symbol is added, and there is
466 // already an existing one in the symbols table. In this case we check that
467 // custom 'import-module' and 'import-field' symbol attributes agree.
468 // With LTO these attributes are not available when the bitcode is read and only
469 // become available when the LTO object is read. In this case we silently
470 // replace the empty attributes with the valid ones.
471 template <typename T>
setImportAttributes(T * existing,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file)472 static void setImportAttributes(T *existing,
473 std::optional<StringRef> importName,
474 std::optional<StringRef> importModule,
475 uint32_t flags, InputFile *file) {
476 if (importName) {
477 if (!existing->importName)
478 existing->importName = importName;
479 if (existing->importName != importName)
480 error("import name mismatch for symbol: " + toString(*existing) +
481 "\n>>> defined as " + *existing->importName + " in " +
482 toString(existing->getFile()) + "\n>>> defined as " + *importName +
483 " in " + toString(file));
484 }
485
486 if (importModule) {
487 if (!existing->importModule)
488 existing->importModule = importModule;
489 if (existing->importModule != importModule)
490 error("import module mismatch for symbol: " + toString(*existing) +
491 "\n>>> defined as " + *existing->importModule + " in " +
492 toString(existing->getFile()) + "\n>>> defined as " +
493 *importModule + " in " + toString(file));
494 }
495
496 // Update symbol binding, if the existing symbol is weak
497 uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
498 if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
499 existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
500 }
501 }
502
addUndefinedFunction(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmSignature * sig,bool isCalledDirectly)503 Symbol *SymbolTable::addUndefinedFunction(StringRef name,
504 std::optional<StringRef> importName,
505 std::optional<StringRef> importModule,
506 uint32_t flags, InputFile *file,
507 const WasmSignature *sig,
508 bool isCalledDirectly) {
509 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
510 << (sig ? toString(*sig) : "none")
511 << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
512 << utohexstr(flags) << "\n");
513 assert(flags & WASM_SYMBOL_UNDEFINED);
514
515 Symbol *s;
516 bool wasInserted;
517 std::tie(s, wasInserted) = insert(name, file);
518 if (s->traced)
519 printTraceSymbolUndefined(name, file);
520
521 auto replaceSym = [&]() {
522 replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags,
523 file, sig, isCalledDirectly);
524 };
525
526 if (wasInserted) {
527 replaceSym();
528 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
529 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
530 lazy->setWeak();
531 lazy->signature = sig;
532 } else {
533 lazy->fetch();
534 if (!config->whyExtract.empty())
535 config->whyExtractRecords.emplace_back(toString(file), s->getFile(),
536 *s);
537 }
538 } else {
539 auto existingFunction = dyn_cast<FunctionSymbol>(s);
540 if (!existingFunction) {
541 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
542 return s;
543 }
544 if (!existingFunction->signature && sig)
545 existingFunction->signature = sig;
546 auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction);
547 if (isCalledDirectly && !signatureMatches(existingFunction, sig)) {
548 // If the existing undefined functions is not called directly then let
549 // this one take precedence. Otherwise the existing function is either
550 // directly called or defined, in which case we need a function variant.
551 if (existingUndefined && !existingUndefined->isCalledDirectly)
552 replaceSym();
553 else if (getFunctionVariant(s, sig, file, &s))
554 replaceSym();
555 }
556 if (existingUndefined) {
557 setImportAttributes(existingUndefined, importName, importModule, flags,
558 file);
559 if (isCalledDirectly)
560 existingUndefined->isCalledDirectly = true;
561 }
562 }
563
564 return s;
565 }
566
addUndefinedData(StringRef name,uint32_t flags,InputFile * file)567 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
568 InputFile *file) {
569 LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
570 assert(flags & WASM_SYMBOL_UNDEFINED);
571
572 Symbol *s;
573 bool wasInserted;
574 std::tie(s, wasInserted) = insert(name, file);
575 if (s->traced)
576 printTraceSymbolUndefined(name, file);
577
578 if (wasInserted) {
579 replaceSymbol<UndefinedData>(s, name, flags, file);
580 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
581 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
582 lazy->setWeak();
583 else
584 lazy->fetch();
585 } else if (s->isDefined()) {
586 checkDataType(s, file);
587 }
588 return s;
589 }
590
addUndefinedGlobal(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmGlobalType * type)591 Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
592 std::optional<StringRef> importName,
593 std::optional<StringRef> importModule,
594 uint32_t flags, InputFile *file,
595 const WasmGlobalType *type) {
596 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
597 assert(flags & WASM_SYMBOL_UNDEFINED);
598
599 Symbol *s;
600 bool wasInserted;
601 std::tie(s, wasInserted) = insert(name, file);
602 if (s->traced)
603 printTraceSymbolUndefined(name, file);
604
605 if (wasInserted)
606 replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags,
607 file, type);
608 else if (auto *lazy = dyn_cast<LazySymbol>(s))
609 lazy->fetch();
610 else if (s->isDefined())
611 checkGlobalType(s, file, type);
612 return s;
613 }
614
addUndefinedTable(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmTableType * type)615 Symbol *SymbolTable::addUndefinedTable(StringRef name,
616 std::optional<StringRef> importName,
617 std::optional<StringRef> importModule,
618 uint32_t flags, InputFile *file,
619 const WasmTableType *type) {
620 LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
621 assert(flags & WASM_SYMBOL_UNDEFINED);
622
623 Symbol *s;
624 bool wasInserted;
625 std::tie(s, wasInserted) = insert(name, file);
626 if (s->traced)
627 printTraceSymbolUndefined(name, file);
628
629 if (wasInserted)
630 replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags,
631 file, type);
632 else if (auto *lazy = dyn_cast<LazySymbol>(s))
633 lazy->fetch();
634 else if (s->isDefined())
635 checkTableType(s, file, type);
636 return s;
637 }
638
addUndefinedTag(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmSignature * sig)639 Symbol *SymbolTable::addUndefinedTag(StringRef name,
640 std::optional<StringRef> importName,
641 std::optional<StringRef> importModule,
642 uint32_t flags, InputFile *file,
643 const WasmSignature *sig) {
644 LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
645 assert(flags & WASM_SYMBOL_UNDEFINED);
646
647 Symbol *s;
648 bool wasInserted;
649 std::tie(s, wasInserted) = insert(name, file);
650 if (s->traced)
651 printTraceSymbolUndefined(name, file);
652
653 if (wasInserted)
654 replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file,
655 sig);
656 else if (auto *lazy = dyn_cast<LazySymbol>(s))
657 lazy->fetch();
658 else if (s->isDefined())
659 checkTagType(s, file, sig);
660 return s;
661 }
662
createUndefinedIndirectFunctionTable(StringRef name)663 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
664 WasmLimits limits{0, 0, 0}; // Set by the writer.
665 WasmTableType *type = make<WasmTableType>();
666 type->ElemType = uint8_t(ValType::FUNCREF);
667 type->Limits = limits;
668 StringRef module(defaultModule);
669 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
670 flags |= WASM_SYMBOL_UNDEFINED;
671 Symbol *sym = addUndefinedTable(name, name, module, flags, nullptr, type);
672 sym->markLive();
673 sym->forceExport = config->exportTable;
674 return cast<TableSymbol>(sym);
675 }
676
createDefinedIndirectFunctionTable(StringRef name)677 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
678 const uint32_t invalidIndex = -1;
679 WasmLimits limits{0, 0, 0}; // Set by the writer.
680 WasmTableType type{uint8_t(ValType::FUNCREF), limits};
681 WasmTable desc{invalidIndex, type, name};
682 InputTable *table = make<InputTable>(desc, nullptr);
683 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
684 TableSymbol *sym = addSyntheticTable(name, flags, table);
685 sym->markLive();
686 sym->forceExport = config->exportTable;
687 return sym;
688 }
689
690 // Whether or not we need an indirect function table is usually a function of
691 // whether an input declares a need for it. However sometimes it's possible for
692 // no input to need the indirect function table, but then a late
693 // addInternalGOTEntry causes a function to be allocated an address. In that
694 // case address we synthesize a definition at the last minute.
resolveIndirectFunctionTable(bool required)695 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
696 Symbol *existing = find(functionTableName);
697 if (existing) {
698 if (!isa<TableSymbol>(existing)) {
699 error(Twine("reserved symbol must be of type table: `") +
700 functionTableName + "`");
701 return nullptr;
702 }
703 if (existing->isDefined()) {
704 error(Twine("reserved symbol must not be defined in input files: `") +
705 functionTableName + "`");
706 return nullptr;
707 }
708 }
709
710 if (config->importTable) {
711 if (existing)
712 return cast<TableSymbol>(existing);
713 if (required)
714 return createUndefinedIndirectFunctionTable(functionTableName);
715 } else if ((existing && existing->isLive()) || config->exportTable ||
716 required) {
717 // A defined table is required. Either because the user request an exported
718 // table or because the table symbol is already live. The existing table is
719 // guaranteed to be undefined due to the check above.
720 return createDefinedIndirectFunctionTable(functionTableName);
721 }
722
723 // An indirect function table will only be present in the symbol table if
724 // needed by a reloc; if we get here, we don't need one.
725 return nullptr;
726 }
727
addLazy(ArchiveFile * file,const Archive::Symbol * sym)728 void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
729 LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
730 StringRef name = sym->getName();
731
732 Symbol *s;
733 bool wasInserted;
734 std::tie(s, wasInserted) = insertName(name);
735
736 if (wasInserted) {
737 replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
738 return;
739 }
740
741 if (!s->isUndefined())
742 return;
743
744 // The existing symbol is undefined, load a new one from the archive,
745 // unless the existing symbol is weak in which case replace the undefined
746 // symbols with a LazySymbol.
747 if (s->isWeak()) {
748 const WasmSignature *oldSig = nullptr;
749 // In the case of an UndefinedFunction we need to preserve the expected
750 // signature.
751 if (auto *f = dyn_cast<UndefinedFunction>(s))
752 oldSig = f->signature;
753 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
754 auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
755 file, *sym);
756 newSym->signature = oldSig;
757 return;
758 }
759
760 LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
761 const InputFile *oldFile = s->getFile();
762 file->addMember(sym);
763 if (!config->whyExtract.empty())
764 config->whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
765 }
766
addComdat(StringRef name)767 bool SymbolTable::addComdat(StringRef name) {
768 return comdatGroups.insert(CachedHashStringRef(name)).second;
769 }
770
771 // The new signature doesn't match. Create a variant to the symbol with the
772 // signature encoded in the name and return that instead. These symbols are
773 // then unified later in handleSymbolVariants.
getFunctionVariant(Symbol * sym,const WasmSignature * sig,const InputFile * file,Symbol ** out)774 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig,
775 const InputFile *file, Symbol **out) {
776 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
777 << " " << toString(*sig) << "\n");
778 Symbol *variant = nullptr;
779
780 // Linear search through symbol variants. Should never be more than two
781 // or three entries here.
782 auto &variants = symVariants[CachedHashStringRef(sym->getName())];
783 if (variants.empty())
784 variants.push_back(sym);
785
786 for (Symbol* v : variants) {
787 if (*v->getSignature() == *sig) {
788 variant = v;
789 break;
790 }
791 }
792
793 bool wasAdded = !variant;
794 if (wasAdded) {
795 // Create a new variant;
796 LLVM_DEBUG(dbgs() << "added new variant\n");
797 variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
798 variant->isUsedInRegularObj =
799 !file || file->kind() == InputFile::ObjectKind;
800 variant->canInline = true;
801 variant->traced = false;
802 variant->forceExport = false;
803 variants.push_back(variant);
804 } else {
805 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n");
806 assert(*variant->getSignature() == *sig);
807 }
808
809 *out = variant;
810 return wasAdded;
811 }
812
813 // Set a flag for --trace-symbol so that we can print out a log message
814 // if a new symbol with the same name is inserted into the symbol table.
trace(StringRef name)815 void SymbolTable::trace(StringRef name) {
816 symMap.insert({CachedHashStringRef(name), -1});
817 }
818
wrap(Symbol * sym,Symbol * real,Symbol * wrap)819 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
820 // Swap symbols as instructed by -wrap.
821 int &origIdx = symMap[CachedHashStringRef(sym->getName())];
822 int &realIdx= symMap[CachedHashStringRef(real->getName())];
823 int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
824 LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
825
826 // Anyone looking up __real symbols should get the original
827 realIdx = origIdx;
828 // Anyone looking up the original should get the __wrap symbol
829 origIdx = wrapIdx;
830 }
831
832 static const uint8_t unreachableFn[] = {
833 0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
834 0x00 /* opcode unreachable */, 0x0b /* opcode end */
835 };
836
837 // Replace the given symbol body with an unreachable function.
838 // This is used by handleWeakUndefines in order to generate a callable
839 // equivalent of an undefined function and also handleSymbolVariants for
840 // undefined functions that don't match the signature of the definition.
replaceWithUnreachable(Symbol * sym,const WasmSignature & sig,StringRef debugName)841 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
842 const WasmSignature &sig,
843 StringRef debugName) {
844 auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName);
845 func->setBody(unreachableFn);
846 syntheticFunctions.emplace_back(func);
847 // Mark new symbols as local. For relocatable output we don't want them
848 // to be exported outside the object file.
849 replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL,
850 nullptr, func);
851 // Ensure the stub function doesn't get a table entry. Its address
852 // should always compare equal to the null pointer.
853 sym->isStub = true;
854 return func;
855 }
856
replaceWithUndefined(Symbol * sym)857 void SymbolTable::replaceWithUndefined(Symbol *sym) {
858 // Add a synthetic dummy for weak undefined functions. These dummies will
859 // be GC'd if not used as the target of any "call" instructions.
860 StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
861 replaceWithUnreachable(sym, *sym->getSignature(), debugName);
862 // Hide our dummy to prevent export.
863 sym->setHidden(true);
864 }
865
866 // For weak undefined functions, there may be "call" instructions that reference
867 // the symbol. In this case, we need to synthesise a dummy/stub function that
868 // will abort at runtime, so that relocations can still provided an operand to
869 // the call instruction that passes Wasm validation.
handleWeakUndefines()870 void SymbolTable::handleWeakUndefines() {
871 for (Symbol *sym : symbols()) {
872 if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
873 if (sym->getSignature()) {
874 replaceWithUndefined(sym);
875 } else {
876 // It is possible for undefined functions not to have a signature (eg.
877 // if added via "--undefined"), but weak undefined ones do have a
878 // signature. Lazy symbols may not be functions and therefore Sig can
879 // still be null in some circumstance.
880 assert(!isa<FunctionSymbol>(sym));
881 }
882 }
883 }
884 }
885
createUndefinedStub(const WasmSignature & sig)886 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
887 if (stubFunctions.count(sig))
888 return stubFunctions[sig];
889 LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
890 auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
891 sym->isUsedInRegularObj = true;
892 sym->canInline = true;
893 sym->traced = false;
894 sym->forceExport = false;
895 sym->signature = &sig;
896 replaceSymbol<DefinedFunction>(
897 sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr);
898 replaceWithUnreachable(sym, sig, "undefined_stub");
899 stubFunctions[sig] = sym;
900 return sym;
901 }
902
reportFunctionSignatureMismatch(StringRef symName,FunctionSymbol * a,FunctionSymbol * b,bool isError)903 static void reportFunctionSignatureMismatch(StringRef symName,
904 FunctionSymbol *a,
905 FunctionSymbol *b, bool isError) {
906 std::string msg = ("function signature mismatch: " + symName +
907 "\n>>> defined as " + toString(*a->signature) + " in " +
908 toString(a->getFile()) + "\n>>> defined as " +
909 toString(*b->signature) + " in " + toString(b->getFile()))
910 .str();
911 if (isError)
912 error(msg);
913 else
914 warn(msg);
915 }
916
917 // Remove any variant symbols that were created due to function signature
918 // mismatches.
handleSymbolVariants()919 void SymbolTable::handleSymbolVariants() {
920 for (auto pair : symVariants) {
921 // Push the initial symbol onto the list of variants.
922 StringRef symName = pair.first.val();
923 std::vector<Symbol *> &variants = pair.second;
924
925 #ifndef NDEBUG
926 LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
927 << ") variants: " << symName << "\n");
928 for (auto *s: variants) {
929 auto *f = cast<FunctionSymbol>(s);
930 LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
931 << toString(*f->signature) << "\n");
932 }
933 #endif
934
935 // Find the one definition.
936 DefinedFunction *defined = nullptr;
937 for (auto *symbol : variants) {
938 if (auto f = dyn_cast<DefinedFunction>(symbol)) {
939 defined = f;
940 break;
941 }
942 }
943
944 // If there are no definitions, and the undefined symbols disagree on
945 // the signature, there is not we can do since we don't know which one
946 // to use as the signature on the import.
947 if (!defined) {
948 reportFunctionSignatureMismatch(symName,
949 cast<FunctionSymbol>(variants[0]),
950 cast<FunctionSymbol>(variants[1]), true);
951 return;
952 }
953
954 for (auto *symbol : variants) {
955 if (symbol != defined) {
956 auto *f = cast<FunctionSymbol>(symbol);
957 reportFunctionSignatureMismatch(symName, f, defined, false);
958 StringRef debugName =
959 saver().save("signature_mismatch:" + toString(*f));
960 replaceWithUnreachable(f, *f->signature, debugName);
961 }
962 }
963 }
964 }
965
966 } // namespace wasm
967 } // namespace lld
968