xref: /openbsd/gnu/llvm/lld/wasm/SymbolTable.cpp (revision dfe94b16)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "WriterUtils.h"
14 #include "lld/Common/CommonLinkerContext.h"
15 #include <optional>
16 
17 #define DEBUG_TYPE "lld"
18 
19 using namespace llvm;
20 using namespace llvm::wasm;
21 using namespace llvm::object;
22 
23 namespace lld {
24 namespace wasm {
25 SymbolTable *symtab;
26 
addFile(InputFile * file)27 void SymbolTable::addFile(InputFile *file) {
28   log("Processing: " + toString(file));
29 
30   // .a file
31   if (auto *f = dyn_cast<ArchiveFile>(file)) {
32     f->parse();
33     return;
34   }
35 
36   // .so file
37   if (auto *f = dyn_cast<SharedFile>(file)) {
38     sharedFiles.push_back(f);
39     return;
40   }
41 
42   // stub file
43   if (auto *f = dyn_cast<StubFile>(file)) {
44     f->parse();
45     stubFiles.push_back(f);
46     return;
47   }
48 
49   if (config->trace)
50     message(toString(file));
51 
52   // LLVM bitcode file
53   if (auto *f = dyn_cast<BitcodeFile>(file)) {
54     f->parse();
55     bitcodeFiles.push_back(f);
56     return;
57   }
58 
59   // Regular object file
60   auto *f = cast<ObjFile>(file);
61   f->parse(false);
62   objectFiles.push_back(f);
63 }
64 
65 // This function is where all the optimizations of link-time
66 // optimization happens. When LTO is in use, some input files are
67 // not in native object file format but in the LLVM bitcode format.
68 // This function compiles bitcode files into a few big native files
69 // using LLVM functions and replaces bitcode symbols with the results.
70 // Because all bitcode files that the program consists of are passed
71 // to the compiler at once, it can do whole-program optimization.
compileBitcodeFiles()72 void SymbolTable::compileBitcodeFiles() {
73   // Prevent further LTO objects being included
74   BitcodeFile::doneLTO = true;
75 
76   if (bitcodeFiles.empty())
77     return;
78 
79   // Compile bitcode files and replace bitcode symbols.
80   lto.reset(new BitcodeCompiler);
81   for (BitcodeFile *f : bitcodeFiles)
82     lto->add(*f);
83 
84   for (StringRef filename : lto->compile()) {
85     auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), "");
86     obj->parse(true);
87     objectFiles.push_back(obj);
88   }
89 }
90 
find(StringRef name)91 Symbol *SymbolTable::find(StringRef name) {
92   auto it = symMap.find(CachedHashStringRef(name));
93   if (it == symMap.end() || it->second == -1)
94     return nullptr;
95   return symVector[it->second];
96 }
97 
replace(StringRef name,Symbol * sym)98 void SymbolTable::replace(StringRef name, Symbol* sym) {
99   auto it = symMap.find(CachedHashStringRef(name));
100   symVector[it->second] = sym;
101 }
102 
insertName(StringRef name)103 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
104   bool trace = false;
105   auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
106   int &symIndex = p.first->second;
107   bool isNew = p.second;
108   if (symIndex == -1) {
109     symIndex = symVector.size();
110     trace = true;
111     isNew = true;
112   }
113 
114   if (!isNew)
115     return {symVector[symIndex], false};
116 
117   Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
118   sym->isUsedInRegularObj = false;
119   sym->canInline = true;
120   sym->traced = trace;
121   sym->forceExport = false;
122   sym->referenced = !config->gcSections;
123   symVector.emplace_back(sym);
124   return {sym, true};
125 }
126 
insert(StringRef name,const InputFile * file)127 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
128                                               const InputFile *file) {
129   Symbol *s;
130   bool wasInserted;
131   std::tie(s, wasInserted) = insertName(name);
132 
133   if (!file || file->kind() == InputFile::ObjectKind)
134     s->isUsedInRegularObj = true;
135 
136   return {s, wasInserted};
137 }
138 
reportTypeError(const Symbol * existing,const InputFile * file,llvm::wasm::WasmSymbolType type)139 static void reportTypeError(const Symbol *existing, const InputFile *file,
140                             llvm::wasm::WasmSymbolType type) {
141   error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " +
142         toString(existing->getWasmType()) + " in " +
143         toString(existing->getFile()) + "\n>>> defined as " + toString(type) +
144         " in " + toString(file));
145 }
146 
147 // Check the type of new symbol matches that of the symbol is replacing.
148 // Returns true if the function types match, false is there is a signature
149 // mismatch.
signatureMatches(FunctionSymbol * existing,const WasmSignature * newSig)150 static bool signatureMatches(FunctionSymbol *existing,
151                              const WasmSignature *newSig) {
152   const WasmSignature *oldSig = existing->signature;
153 
154   // If either function is missing a signature (this happens for bitcode
155   // symbols) then assume they match.  Any mismatch will be reported later
156   // when the LTO objects are added.
157   if (!newSig || !oldSig)
158     return true;
159 
160   return *newSig == *oldSig;
161 }
162 
checkGlobalType(const Symbol * existing,const InputFile * file,const WasmGlobalType * newType)163 static void checkGlobalType(const Symbol *existing, const InputFile *file,
164                             const WasmGlobalType *newType) {
165   if (!isa<GlobalSymbol>(existing)) {
166     reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL);
167     return;
168   }
169 
170   const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType();
171   if (*newType != *oldType) {
172     error("Global type mismatch: " + existing->getName() + "\n>>> defined as " +
173           toString(*oldType) + " in " + toString(existing->getFile()) +
174           "\n>>> defined as " + toString(*newType) + " in " + toString(file));
175   }
176 }
177 
checkTagType(const Symbol * existing,const InputFile * file,const WasmSignature * newSig)178 static void checkTagType(const Symbol *existing, const InputFile *file,
179                          const WasmSignature *newSig) {
180   const auto *existingTag = dyn_cast<TagSymbol>(existing);
181   if (!isa<TagSymbol>(existing)) {
182     reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG);
183     return;
184   }
185 
186   const WasmSignature *oldSig = existingTag->signature;
187   if (*newSig != *oldSig)
188     warn("Tag signature mismatch: " + existing->getName() +
189          "\n>>> defined as " + toString(*oldSig) + " in " +
190          toString(existing->getFile()) + "\n>>> defined as " +
191          toString(*newSig) + " in " + toString(file));
192 }
193 
checkTableType(const Symbol * existing,const InputFile * file,const WasmTableType * newType)194 static void checkTableType(const Symbol *existing, const InputFile *file,
195                            const WasmTableType *newType) {
196   if (!isa<TableSymbol>(existing)) {
197     reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE);
198     return;
199   }
200 
201   const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType();
202   if (newType->ElemType != oldType->ElemType) {
203     error("Table type mismatch: " + existing->getName() + "\n>>> defined as " +
204           toString(*oldType) + " in " + toString(existing->getFile()) +
205           "\n>>> defined as " + toString(*newType) + " in " + toString(file));
206   }
207   // FIXME: No assertions currently on the limits.
208 }
209 
checkDataType(const Symbol * existing,const InputFile * file)210 static void checkDataType(const Symbol *existing, const InputFile *file) {
211   if (!isa<DataSymbol>(existing))
212     reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA);
213 }
214 
addSyntheticFunction(StringRef name,uint32_t flags,InputFunction * function)215 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
216                                                    uint32_t flags,
217                                                    InputFunction *function) {
218   LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
219   assert(!find(name));
220   syntheticFunctions.emplace_back(function);
221   return replaceSymbol<DefinedFunction>(insertName(name).first, name,
222                                         flags, nullptr, function);
223 }
224 
225 // Adds an optional, linker generated, data symbol.  The symbol will only be
226 // added if there is an undefine reference to it, or if it is explicitly
227 // exported via the --export flag.  Otherwise we don't add the symbol and return
228 // nullptr.
addOptionalDataSymbol(StringRef name,uint64_t value)229 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
230                                                 uint64_t value) {
231   Symbol *s = find(name);
232   if (!s && (config->exportAll || config->exportedSymbols.count(name) != 0))
233     s = insertName(name).first;
234   else if (!s || s->isDefined())
235     return nullptr;
236   LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
237   auto *rtn = replaceSymbol<DefinedData>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN);
238   rtn->setVA(value);
239   rtn->referenced = true;
240   return rtn;
241 }
242 
addSyntheticDataSymbol(StringRef name,uint32_t flags)243 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
244                                                  uint32_t flags) {
245   LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
246   assert(!find(name));
247   return replaceSymbol<DefinedData>(insertName(name).first, name, flags);
248 }
249 
addSyntheticGlobal(StringRef name,uint32_t flags,InputGlobal * global)250 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
251                                                InputGlobal *global) {
252   LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
253                     << "\n");
254   assert(!find(name));
255   syntheticGlobals.emplace_back(global);
256   return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags,
257                                       nullptr, global);
258 }
259 
addOptionalGlobalSymbol(StringRef name,InputGlobal * global)260 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
261                                                     InputGlobal *global) {
262   Symbol *s = find(name);
263   if (!s || s->isDefined())
264     return nullptr;
265   LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
266                     << "\n");
267   syntheticGlobals.emplace_back(global);
268   return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN,
269                                       nullptr, global);
270 }
271 
addSyntheticTable(StringRef name,uint32_t flags,InputTable * table)272 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
273                                              InputTable *table) {
274   LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
275                     << "\n");
276   Symbol *s = find(name);
277   assert(!s || s->isUndefined());
278   if (!s)
279     s = insertName(name).first;
280   syntheticTables.emplace_back(table);
281   return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table);
282 }
283 
shouldReplace(const Symbol * existing,InputFile * newFile,uint32_t newFlags)284 static bool shouldReplace(const Symbol *existing, InputFile *newFile,
285                           uint32_t newFlags) {
286   // If existing symbol is undefined, replace it.
287   if (!existing->isDefined()) {
288     LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
289                       << existing->getName() << "\n");
290     return true;
291   }
292 
293   // Now we have two defined symbols. If the new one is weak, we can ignore it.
294   if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
295     LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
296     return false;
297   }
298 
299   // If the existing symbol is weak, we should replace it.
300   if (existing->isWeak()) {
301     LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
302     return true;
303   }
304 
305   // Neither symbol is week. They conflict.
306   error("duplicate symbol: " + toString(*existing) + "\n>>> defined in " +
307         toString(existing->getFile()) + "\n>>> defined in " +
308         toString(newFile));
309   return true;
310 }
311 
addDefinedFunction(StringRef name,uint32_t flags,InputFile * file,InputFunction * function)312 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
313                                         InputFile *file,
314                                         InputFunction *function) {
315   LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
316                     << (function ? toString(function->signature) : "none")
317                     << "]\n");
318   Symbol *s;
319   bool wasInserted;
320   std::tie(s, wasInserted) = insert(name, file);
321 
322   auto replaceSym = [&](Symbol *sym) {
323     // If the new defined function doesn't have signature (i.e. bitcode
324     // functions) but the old symbol does, then preserve the old signature
325     const WasmSignature *oldSig = s->getSignature();
326     auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function);
327     if (!newSym->signature)
328       newSym->signature = oldSig;
329   };
330 
331   if (wasInserted || s->isLazy()) {
332     replaceSym(s);
333     return s;
334   }
335 
336   auto existingFunction = dyn_cast<FunctionSymbol>(s);
337   if (!existingFunction) {
338     reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
339     return s;
340   }
341 
342   bool checkSig = true;
343   if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
344     checkSig = ud->isCalledDirectly;
345 
346   if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) {
347     Symbol* variant;
348     if (getFunctionVariant(s, &function->signature, file, &variant))
349       // New variant, always replace
350       replaceSym(variant);
351     else if (shouldReplace(s, file, flags))
352       // Variant already exists, replace it after checking shouldReplace
353       replaceSym(variant);
354 
355     // This variant we found take the place in the symbol table as the primary
356     // variant.
357     replace(name, variant);
358     return variant;
359   }
360 
361   // Existing function with matching signature.
362   if (shouldReplace(s, file, flags))
363     replaceSym(s);
364 
365   return s;
366 }
367 
addDefinedData(StringRef name,uint32_t flags,InputFile * file,InputChunk * segment,uint64_t address,uint64_t size)368 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
369                                     InputFile *file, InputChunk *segment,
370                                     uint64_t address, uint64_t size) {
371   LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
372                     << "\n");
373   Symbol *s;
374   bool wasInserted;
375   std::tie(s, wasInserted) = insert(name, file);
376 
377   auto replaceSym = [&]() {
378     replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size);
379   };
380 
381   if (wasInserted || s->isLazy()) {
382     replaceSym();
383     return s;
384   }
385 
386   checkDataType(s, file);
387 
388   if (shouldReplace(s, file, flags))
389     replaceSym();
390   return s;
391 }
392 
addDefinedGlobal(StringRef name,uint32_t flags,InputFile * file,InputGlobal * global)393 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
394                                       InputFile *file, InputGlobal *global) {
395   LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
396 
397   Symbol *s;
398   bool wasInserted;
399   std::tie(s, wasInserted) = insert(name, file);
400 
401   auto replaceSym = [&]() {
402     replaceSymbol<DefinedGlobal>(s, name, flags, file, global);
403   };
404 
405   if (wasInserted || s->isLazy()) {
406     replaceSym();
407     return s;
408   }
409 
410   checkGlobalType(s, file, &global->getType());
411 
412   if (shouldReplace(s, file, flags))
413     replaceSym();
414   return s;
415 }
416 
addDefinedTag(StringRef name,uint32_t flags,InputFile * file,InputTag * tag)417 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
418                                    InputFile *file, InputTag *tag) {
419   LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
420 
421   Symbol *s;
422   bool wasInserted;
423   std::tie(s, wasInserted) = insert(name, file);
424 
425   auto replaceSym = [&]() {
426     replaceSymbol<DefinedTag>(s, name, flags, file, tag);
427   };
428 
429   if (wasInserted || s->isLazy()) {
430     replaceSym();
431     return s;
432   }
433 
434   checkTagType(s, file, &tag->signature);
435 
436   if (shouldReplace(s, file, flags))
437     replaceSym();
438   return s;
439 }
440 
addDefinedTable(StringRef name,uint32_t flags,InputFile * file,InputTable * table)441 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
442                                      InputFile *file, InputTable *table) {
443   LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
444 
445   Symbol *s;
446   bool wasInserted;
447   std::tie(s, wasInserted) = insert(name, file);
448 
449   auto replaceSym = [&]() {
450     replaceSymbol<DefinedTable>(s, name, flags, file, table);
451   };
452 
453   if (wasInserted || s->isLazy()) {
454     replaceSym();
455     return s;
456   }
457 
458   checkTableType(s, file, &table->getType());
459 
460   if (shouldReplace(s, file, flags))
461     replaceSym();
462   return s;
463 }
464 
465 // This function get called when an undefined symbol is added, and there is
466 // already an existing one in the symbols table.  In this case we check that
467 // custom 'import-module' and 'import-field' symbol attributes agree.
468 // With LTO these attributes are not available when the bitcode is read and only
469 // become available when the LTO object is read.  In this case we silently
470 // replace the empty attributes with the valid ones.
471 template <typename T>
setImportAttributes(T * existing,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file)472 static void setImportAttributes(T *existing,
473                                 std::optional<StringRef> importName,
474                                 std::optional<StringRef> importModule,
475                                 uint32_t flags, InputFile *file) {
476   if (importName) {
477     if (!existing->importName)
478       existing->importName = importName;
479     if (existing->importName != importName)
480       error("import name mismatch for symbol: " + toString(*existing) +
481             "\n>>> defined as " + *existing->importName + " in " +
482             toString(existing->getFile()) + "\n>>> defined as " + *importName +
483             " in " + toString(file));
484   }
485 
486   if (importModule) {
487     if (!existing->importModule)
488       existing->importModule = importModule;
489     if (existing->importModule != importModule)
490       error("import module mismatch for symbol: " + toString(*existing) +
491             "\n>>> defined as " + *existing->importModule + " in " +
492             toString(existing->getFile()) + "\n>>> defined as " +
493             *importModule + " in " + toString(file));
494   }
495 
496   // Update symbol binding, if the existing symbol is weak
497   uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
498   if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
499     existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
500   }
501 }
502 
addUndefinedFunction(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmSignature * sig,bool isCalledDirectly)503 Symbol *SymbolTable::addUndefinedFunction(StringRef name,
504                                           std::optional<StringRef> importName,
505                                           std::optional<StringRef> importModule,
506                                           uint32_t flags, InputFile *file,
507                                           const WasmSignature *sig,
508                                           bool isCalledDirectly) {
509   LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
510                     << (sig ? toString(*sig) : "none")
511                     << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
512                     << utohexstr(flags) << "\n");
513   assert(flags & WASM_SYMBOL_UNDEFINED);
514 
515   Symbol *s;
516   bool wasInserted;
517   std::tie(s, wasInserted) = insert(name, file);
518   if (s->traced)
519     printTraceSymbolUndefined(name, file);
520 
521   auto replaceSym = [&]() {
522     replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags,
523                                      file, sig, isCalledDirectly);
524   };
525 
526   if (wasInserted) {
527     replaceSym();
528   } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
529     if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
530       lazy->setWeak();
531       lazy->signature = sig;
532     } else {
533       lazy->fetch();
534       if (!config->whyExtract.empty())
535         config->whyExtractRecords.emplace_back(toString(file), s->getFile(),
536                                                *s);
537     }
538   } else {
539     auto existingFunction = dyn_cast<FunctionSymbol>(s);
540     if (!existingFunction) {
541       reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
542       return s;
543     }
544     if (!existingFunction->signature && sig)
545       existingFunction->signature = sig;
546     auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction);
547     if (isCalledDirectly && !signatureMatches(existingFunction, sig)) {
548       // If the existing undefined functions is not called directly then let
549       // this one take precedence.  Otherwise the existing function is either
550       // directly called or defined, in which case we need a function variant.
551       if (existingUndefined && !existingUndefined->isCalledDirectly)
552         replaceSym();
553       else if (getFunctionVariant(s, sig, file, &s))
554         replaceSym();
555     }
556     if (existingUndefined) {
557       setImportAttributes(existingUndefined, importName, importModule, flags,
558                           file);
559       if (isCalledDirectly)
560         existingUndefined->isCalledDirectly = true;
561     }
562   }
563 
564   return s;
565 }
566 
addUndefinedData(StringRef name,uint32_t flags,InputFile * file)567 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
568                                       InputFile *file) {
569   LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
570   assert(flags & WASM_SYMBOL_UNDEFINED);
571 
572   Symbol *s;
573   bool wasInserted;
574   std::tie(s, wasInserted) = insert(name, file);
575   if (s->traced)
576     printTraceSymbolUndefined(name, file);
577 
578   if (wasInserted) {
579     replaceSymbol<UndefinedData>(s, name, flags, file);
580   } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
581     if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
582       lazy->setWeak();
583     else
584       lazy->fetch();
585   } else if (s->isDefined()) {
586     checkDataType(s, file);
587   }
588   return s;
589 }
590 
addUndefinedGlobal(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmGlobalType * type)591 Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
592                                         std::optional<StringRef> importName,
593                                         std::optional<StringRef> importModule,
594                                         uint32_t flags, InputFile *file,
595                                         const WasmGlobalType *type) {
596   LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
597   assert(flags & WASM_SYMBOL_UNDEFINED);
598 
599   Symbol *s;
600   bool wasInserted;
601   std::tie(s, wasInserted) = insert(name, file);
602   if (s->traced)
603     printTraceSymbolUndefined(name, file);
604 
605   if (wasInserted)
606     replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags,
607                                    file, type);
608   else if (auto *lazy = dyn_cast<LazySymbol>(s))
609     lazy->fetch();
610   else if (s->isDefined())
611     checkGlobalType(s, file, type);
612   return s;
613 }
614 
addUndefinedTable(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmTableType * type)615 Symbol *SymbolTable::addUndefinedTable(StringRef name,
616                                        std::optional<StringRef> importName,
617                                        std::optional<StringRef> importModule,
618                                        uint32_t flags, InputFile *file,
619                                        const WasmTableType *type) {
620   LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
621   assert(flags & WASM_SYMBOL_UNDEFINED);
622 
623   Symbol *s;
624   bool wasInserted;
625   std::tie(s, wasInserted) = insert(name, file);
626   if (s->traced)
627     printTraceSymbolUndefined(name, file);
628 
629   if (wasInserted)
630     replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags,
631                                   file, type);
632   else if (auto *lazy = dyn_cast<LazySymbol>(s))
633     lazy->fetch();
634   else if (s->isDefined())
635     checkTableType(s, file, type);
636   return s;
637 }
638 
addUndefinedTag(StringRef name,std::optional<StringRef> importName,std::optional<StringRef> importModule,uint32_t flags,InputFile * file,const WasmSignature * sig)639 Symbol *SymbolTable::addUndefinedTag(StringRef name,
640                                      std::optional<StringRef> importName,
641                                      std::optional<StringRef> importModule,
642                                      uint32_t flags, InputFile *file,
643                                      const WasmSignature *sig) {
644   LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
645   assert(flags & WASM_SYMBOL_UNDEFINED);
646 
647   Symbol *s;
648   bool wasInserted;
649   std::tie(s, wasInserted) = insert(name, file);
650   if (s->traced)
651     printTraceSymbolUndefined(name, file);
652 
653   if (wasInserted)
654     replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file,
655                                 sig);
656   else if (auto *lazy = dyn_cast<LazySymbol>(s))
657     lazy->fetch();
658   else if (s->isDefined())
659     checkTagType(s, file, sig);
660   return s;
661 }
662 
createUndefinedIndirectFunctionTable(StringRef name)663 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
664   WasmLimits limits{0, 0, 0}; // Set by the writer.
665   WasmTableType *type = make<WasmTableType>();
666   type->ElemType = uint8_t(ValType::FUNCREF);
667   type->Limits = limits;
668   StringRef module(defaultModule);
669   uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
670   flags |= WASM_SYMBOL_UNDEFINED;
671   Symbol *sym = addUndefinedTable(name, name, module, flags, nullptr, type);
672   sym->markLive();
673   sym->forceExport = config->exportTable;
674   return cast<TableSymbol>(sym);
675 }
676 
createDefinedIndirectFunctionTable(StringRef name)677 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
678   const uint32_t invalidIndex = -1;
679   WasmLimits limits{0, 0, 0}; // Set by the writer.
680   WasmTableType type{uint8_t(ValType::FUNCREF), limits};
681   WasmTable desc{invalidIndex, type, name};
682   InputTable *table = make<InputTable>(desc, nullptr);
683   uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
684   TableSymbol *sym = addSyntheticTable(name, flags, table);
685   sym->markLive();
686   sym->forceExport = config->exportTable;
687   return sym;
688 }
689 
690 // Whether or not we need an indirect function table is usually a function of
691 // whether an input declares a need for it.  However sometimes it's possible for
692 // no input to need the indirect function table, but then a late
693 // addInternalGOTEntry causes a function to be allocated an address.  In that
694 // case address we synthesize a definition at the last minute.
resolveIndirectFunctionTable(bool required)695 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
696   Symbol *existing = find(functionTableName);
697   if (existing) {
698     if (!isa<TableSymbol>(existing)) {
699       error(Twine("reserved symbol must be of type table: `") +
700             functionTableName + "`");
701       return nullptr;
702     }
703     if (existing->isDefined()) {
704       error(Twine("reserved symbol must not be defined in input files: `") +
705             functionTableName + "`");
706       return nullptr;
707     }
708   }
709 
710   if (config->importTable) {
711     if (existing)
712       return cast<TableSymbol>(existing);
713     if (required)
714       return createUndefinedIndirectFunctionTable(functionTableName);
715   } else if ((existing && existing->isLive()) || config->exportTable ||
716              required) {
717     // A defined table is required.  Either because the user request an exported
718     // table or because the table symbol is already live.  The existing table is
719     // guaranteed to be undefined due to the check above.
720     return createDefinedIndirectFunctionTable(functionTableName);
721   }
722 
723   // An indirect function table will only be present in the symbol table if
724   // needed by a reloc; if we get here, we don't need one.
725   return nullptr;
726 }
727 
addLazy(ArchiveFile * file,const Archive::Symbol * sym)728 void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
729   LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
730   StringRef name = sym->getName();
731 
732   Symbol *s;
733   bool wasInserted;
734   std::tie(s, wasInserted) = insertName(name);
735 
736   if (wasInserted) {
737     replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
738     return;
739   }
740 
741   if (!s->isUndefined())
742     return;
743 
744   // The existing symbol is undefined, load a new one from the archive,
745   // unless the existing symbol is weak in which case replace the undefined
746   // symbols with a LazySymbol.
747   if (s->isWeak()) {
748     const WasmSignature *oldSig = nullptr;
749     // In the case of an UndefinedFunction we need to preserve the expected
750     // signature.
751     if (auto *f = dyn_cast<UndefinedFunction>(s))
752       oldSig = f->signature;
753     LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
754     auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
755                                             file, *sym);
756     newSym->signature = oldSig;
757     return;
758   }
759 
760   LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
761   const InputFile *oldFile = s->getFile();
762   file->addMember(sym);
763   if (!config->whyExtract.empty())
764     config->whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
765 }
766 
addComdat(StringRef name)767 bool SymbolTable::addComdat(StringRef name) {
768   return comdatGroups.insert(CachedHashStringRef(name)).second;
769 }
770 
771 // The new signature doesn't match.  Create a variant to the symbol with the
772 // signature encoded in the name and return that instead.  These symbols are
773 // then unified later in handleSymbolVariants.
getFunctionVariant(Symbol * sym,const WasmSignature * sig,const InputFile * file,Symbol ** out)774 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig,
775                                      const InputFile *file, Symbol **out) {
776   LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
777                     << " " << toString(*sig) << "\n");
778   Symbol *variant = nullptr;
779 
780   // Linear search through symbol variants.  Should never be more than two
781   // or three entries here.
782   auto &variants = symVariants[CachedHashStringRef(sym->getName())];
783   if (variants.empty())
784     variants.push_back(sym);
785 
786   for (Symbol* v : variants) {
787     if (*v->getSignature() == *sig) {
788       variant = v;
789       break;
790     }
791   }
792 
793   bool wasAdded = !variant;
794   if (wasAdded) {
795     // Create a new variant;
796     LLVM_DEBUG(dbgs() << "added new variant\n");
797     variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
798     variant->isUsedInRegularObj =
799         !file || file->kind() == InputFile::ObjectKind;
800     variant->canInline = true;
801     variant->traced = false;
802     variant->forceExport = false;
803     variants.push_back(variant);
804   } else {
805     LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n");
806     assert(*variant->getSignature() == *sig);
807   }
808 
809   *out = variant;
810   return wasAdded;
811 }
812 
813 // Set a flag for --trace-symbol so that we can print out a log message
814 // if a new symbol with the same name is inserted into the symbol table.
trace(StringRef name)815 void SymbolTable::trace(StringRef name) {
816   symMap.insert({CachedHashStringRef(name), -1});
817 }
818 
wrap(Symbol * sym,Symbol * real,Symbol * wrap)819 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
820   // Swap symbols as instructed by -wrap.
821   int &origIdx = symMap[CachedHashStringRef(sym->getName())];
822   int &realIdx= symMap[CachedHashStringRef(real->getName())];
823   int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
824   LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
825 
826   // Anyone looking up __real symbols should get the original
827   realIdx = origIdx;
828   // Anyone looking up the original should get the __wrap symbol
829   origIdx = wrapIdx;
830 }
831 
832 static const uint8_t unreachableFn[] = {
833     0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
834     0x00 /* opcode unreachable */, 0x0b /* opcode end */
835 };
836 
837 // Replace the given symbol body with an unreachable function.
838 // This is used by handleWeakUndefines in order to generate a callable
839 // equivalent of an undefined function and also handleSymbolVariants for
840 // undefined functions that don't match the signature of the definition.
replaceWithUnreachable(Symbol * sym,const WasmSignature & sig,StringRef debugName)841 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
842                                                    const WasmSignature &sig,
843                                                    StringRef debugName) {
844   auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName);
845   func->setBody(unreachableFn);
846   syntheticFunctions.emplace_back(func);
847   // Mark new symbols as local. For relocatable output we don't want them
848   // to be exported outside the object file.
849   replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL,
850                                  nullptr, func);
851   // Ensure the stub function doesn't get a table entry.  Its address
852   // should always compare equal to the null pointer.
853   sym->isStub = true;
854   return func;
855 }
856 
replaceWithUndefined(Symbol * sym)857 void SymbolTable::replaceWithUndefined(Symbol *sym) {
858   // Add a synthetic dummy for weak undefined functions.  These dummies will
859   // be GC'd if not used as the target of any "call" instructions.
860   StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
861   replaceWithUnreachable(sym, *sym->getSignature(), debugName);
862   // Hide our dummy to prevent export.
863   sym->setHidden(true);
864 }
865 
866 // For weak undefined functions, there may be "call" instructions that reference
867 // the symbol. In this case, we need to synthesise a dummy/stub function that
868 // will abort at runtime, so that relocations can still provided an operand to
869 // the call instruction that passes Wasm validation.
handleWeakUndefines()870 void SymbolTable::handleWeakUndefines() {
871   for (Symbol *sym : symbols()) {
872     if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
873       if (sym->getSignature()) {
874         replaceWithUndefined(sym);
875       } else {
876         // It is possible for undefined functions not to have a signature (eg.
877         // if added via "--undefined"), but weak undefined ones do have a
878         // signature.  Lazy symbols may not be functions and therefore Sig can
879         // still be null in some circumstance.
880         assert(!isa<FunctionSymbol>(sym));
881       }
882     }
883   }
884 }
885 
createUndefinedStub(const WasmSignature & sig)886 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
887   if (stubFunctions.count(sig))
888     return stubFunctions[sig];
889   LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
890   auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
891   sym->isUsedInRegularObj = true;
892   sym->canInline = true;
893   sym->traced = false;
894   sym->forceExport = false;
895   sym->signature = &sig;
896   replaceSymbol<DefinedFunction>(
897       sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr);
898   replaceWithUnreachable(sym, sig, "undefined_stub");
899   stubFunctions[sig] = sym;
900   return sym;
901 }
902 
reportFunctionSignatureMismatch(StringRef symName,FunctionSymbol * a,FunctionSymbol * b,bool isError)903 static void reportFunctionSignatureMismatch(StringRef symName,
904                                             FunctionSymbol *a,
905                                             FunctionSymbol *b, bool isError) {
906   std::string msg = ("function signature mismatch: " + symName +
907                      "\n>>> defined as " + toString(*a->signature) + " in " +
908                      toString(a->getFile()) + "\n>>> defined as " +
909                      toString(*b->signature) + " in " + toString(b->getFile()))
910                         .str();
911   if (isError)
912     error(msg);
913   else
914     warn(msg);
915 }
916 
917 // Remove any variant symbols that were created due to function signature
918 // mismatches.
handleSymbolVariants()919 void SymbolTable::handleSymbolVariants() {
920   for (auto pair : symVariants) {
921     // Push the initial symbol onto the list of variants.
922     StringRef symName = pair.first.val();
923     std::vector<Symbol *> &variants = pair.second;
924 
925 #ifndef NDEBUG
926     LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
927                       << ") variants: " << symName << "\n");
928     for (auto *s: variants) {
929       auto *f = cast<FunctionSymbol>(s);
930       LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
931                         << toString(*f->signature) << "\n");
932     }
933 #endif
934 
935     // Find the one definition.
936     DefinedFunction *defined = nullptr;
937     for (auto *symbol : variants) {
938       if (auto f = dyn_cast<DefinedFunction>(symbol)) {
939         defined = f;
940         break;
941       }
942     }
943 
944     // If there are no definitions, and the undefined symbols disagree on
945     // the signature, there is not we can do since we don't know which one
946     // to use as the signature on the import.
947     if (!defined) {
948       reportFunctionSignatureMismatch(symName,
949                                       cast<FunctionSymbol>(variants[0]),
950                                       cast<FunctionSymbol>(variants[1]), true);
951       return;
952     }
953 
954     for (auto *symbol : variants) {
955       if (symbol != defined) {
956         auto *f = cast<FunctionSymbol>(symbol);
957         reportFunctionSignatureMismatch(symName, f, defined, false);
958         StringRef debugName =
959             saver().save("signature_mismatch:" + toString(*f));
960         replaceWithUnreachable(f, *f->signature, debugName);
961       }
962     }
963   }
964 }
965 
966 } // namespace wasm
967 } // namespace lld
968