1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/LTO/LTO.h"
22 #include "llvm/Object/WindowsMachineFlag.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <utility>
26 
27 using namespace llvm;
28 
29 namespace lld {
30 namespace coff {
31 
32 StringRef ltrim1(StringRef s, const char *chars) {
33   if (!s.empty() && strchr(chars, s[0]))
34     return s.substr(1);
35   return s;
36 }
37 
38 void SymbolTable::addFile(InputFile *file) {
39   log("Reading " + toString(file));
40   if (file->lazy) {
41     if (auto *f = dyn_cast<BitcodeFile>(file))
42       f->parseLazy();
43     else
44       cast<ObjFile>(file)->parseLazy();
45   } else {
46     file->parse();
47     if (auto *f = dyn_cast<ObjFile>(file)) {
48       ctx.objFileInstances.push_back(f);
49     } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
50       ctx.bitcodeFileInstances.push_back(f);
51     } else if (auto *f = dyn_cast<ImportFile>(file)) {
52       ctx.importFileInstances.push_back(f);
53     }
54   }
55 
56   MachineTypes mt = file->getMachineType();
57   if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) {
58     config->machine = mt;
59     driver->addWinSysRootLibSearchPaths();
60   } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) {
61     error(toString(file) + ": machine type " + machineToStr(mt) +
62           " conflicts with " + machineToStr(config->machine));
63     return;
64   }
65 
66   driver->parseDirectives(file);
67 }
68 
69 static void errorOrWarn(const Twine &s) {
70   if (config->forceUnresolved)
71     warn(s);
72   else
73     error(s);
74 }
75 
76 // Causes the file associated with a lazy symbol to be linked in.
77 static void forceLazy(Symbol *s) {
78   s->pendingArchiveLoad = true;
79   switch (s->kind()) {
80   case Symbol::Kind::LazyArchiveKind: {
81     auto *l = cast<LazyArchive>(s);
82     l->file->addMember(l->sym);
83     break;
84   }
85   case Symbol::Kind::LazyObjectKind: {
86     InputFile *file = cast<LazyObject>(s)->file;
87     file->ctx.symtab.addFile(file);
88     break;
89   }
90   case Symbol::Kind::LazyDLLSymbolKind: {
91     auto *l = cast<LazyDLLSymbol>(s);
92     l->file->makeImport(l->sym);
93     break;
94   }
95   default:
96     llvm_unreachable(
97         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
98   }
99 }
100 
101 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
102 // This is generally the global variable or function whose definition contains
103 // Addr.
104 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
105   DefinedRegular *candidate = nullptr;
106 
107   for (Symbol *s : sc->file->getSymbols()) {
108     auto *d = dyn_cast_or_null<DefinedRegular>(s);
109     if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
110         d->getValue() > addr ||
111         (candidate && d->getValue() < candidate->getValue()))
112       continue;
113 
114     candidate = d;
115   }
116 
117   return candidate;
118 }
119 
120 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
121   std::string res("\n>>> referenced by ");
122   StringRef source = file->obj->getSourceFileName();
123   if (!source.empty())
124     res += source.str() + "\n>>>               ";
125   res += toString(file);
126   return {res};
127 }
128 
129 static Optional<std::pair<StringRef, uint32_t>>
130 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
131   Optional<DILineInfo> optionalLineInfo =
132       c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
133   if (!optionalLineInfo)
134     return None;
135   const DILineInfo &lineInfo = *optionalLineInfo;
136   if (lineInfo.FileName == DILineInfo::BadString)
137     return None;
138   return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
139 }
140 
141 static Optional<std::pair<StringRef, uint32_t>>
142 getFileLine(const SectionChunk *c, uint32_t addr) {
143   // MinGW can optionally use codeview, even if the default is dwarf.
144   Optional<std::pair<StringRef, uint32_t>> fileLine =
145       getFileLineCodeView(c, addr);
146   // If codeview didn't yield any result, check dwarf in MinGW mode.
147   if (!fileLine && config->mingw)
148     fileLine = getFileLineDwarf(c, addr);
149   return fileLine;
150 }
151 
152 // Given a file and the index of a symbol in that file, returns a description
153 // of all references to that symbol from that file. If no debug information is
154 // available, returns just the name of the file, else one string per actual
155 // reference as described in the debug info.
156 // Returns up to maxStrings string descriptions, along with the total number of
157 // locations found.
158 static std::pair<std::vector<std::string>, size_t>
159 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
160   struct Location {
161     Symbol *sym;
162     std::pair<StringRef, uint32_t> fileLine;
163   };
164   std::vector<Location> locations;
165   size_t numLocations = 0;
166 
167   for (Chunk *c : file->getChunks()) {
168     auto *sc = dyn_cast<SectionChunk>(c);
169     if (!sc)
170       continue;
171     for (const coff_relocation &r : sc->getRelocs()) {
172       if (r.SymbolTableIndex != symIndex)
173         continue;
174       numLocations++;
175       if (locations.size() >= maxStrings)
176         continue;
177 
178       Optional<std::pair<StringRef, uint32_t>> fileLine =
179           getFileLine(sc, r.VirtualAddress);
180       Symbol *sym = getSymbol(sc, r.VirtualAddress);
181       if (fileLine)
182         locations.push_back({sym, *fileLine});
183       else if (sym)
184         locations.push_back({sym, {"", 0}});
185     }
186   }
187 
188   if (maxStrings == 0)
189     return std::make_pair(std::vector<std::string>(), numLocations);
190 
191   if (numLocations == 0)
192     return std::make_pair(
193         std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
194 
195   std::vector<std::string> symbolLocations(locations.size());
196   size_t i = 0;
197   for (Location loc : locations) {
198     llvm::raw_string_ostream os(symbolLocations[i++]);
199     os << "\n>>> referenced by ";
200     if (!loc.fileLine.first.empty())
201       os << loc.fileLine.first << ":" << loc.fileLine.second
202          << "\n>>>               ";
203     os << toString(file);
204     if (loc.sym)
205       os << ":(" << toString(*loc.sym) << ')';
206   }
207   return std::make_pair(symbolLocations, numLocations);
208 }
209 
210 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
211   return getSymbolLocations(file, symIndex, SIZE_MAX).first;
212 }
213 
214 static std::pair<std::vector<std::string>, size_t>
215 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
216   if (auto *o = dyn_cast<ObjFile>(file))
217     return getSymbolLocations(o, symIndex, maxStrings);
218   if (auto *b = dyn_cast<BitcodeFile>(file)) {
219     std::vector<std::string> symbolLocations = getSymbolLocations(b);
220     size_t numLocations = symbolLocations.size();
221     if (symbolLocations.size() > maxStrings)
222       symbolLocations.resize(maxStrings);
223     return std::make_pair(symbolLocations, numLocations);
224   }
225   llvm_unreachable("unsupported file type passed to getSymbolLocations");
226   return std::make_pair(std::vector<std::string>(), (size_t)0);
227 }
228 
229 // For an undefined symbol, stores all files referencing it and the index of
230 // the undefined symbol in each file.
231 struct UndefinedDiag {
232   Symbol *sym;
233   struct File {
234     InputFile *file;
235     uint32_t symIndex;
236   };
237   std::vector<File> files;
238 };
239 
240 static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
241   std::string out;
242   llvm::raw_string_ostream os(out);
243   os << "undefined symbol: " << toString(*undefDiag.sym);
244 
245   const size_t maxUndefReferences = 3;
246   size_t numDisplayedRefs = 0, numRefs = 0;
247   for (const UndefinedDiag::File &ref : undefDiag.files) {
248     std::vector<std::string> symbolLocations;
249     size_t totalLocations = 0;
250     std::tie(symbolLocations, totalLocations) = getSymbolLocations(
251         ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
252 
253     numRefs += totalLocations;
254     numDisplayedRefs += symbolLocations.size();
255     for (const std::string &s : symbolLocations) {
256       os << s;
257     }
258   }
259   if (numDisplayedRefs < numRefs)
260     os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
261   errorOrWarn(os.str());
262 }
263 
264 void SymbolTable::loadMinGWSymbols() {
265   for (auto &i : symMap) {
266     Symbol *sym = i.second;
267     auto *undef = dyn_cast<Undefined>(sym);
268     if (!undef)
269       continue;
270     if (undef->getWeakAlias())
271       continue;
272 
273     StringRef name = undef->getName();
274 
275     if (config->machine == I386 && config->stdcallFixup) {
276       // Check if we can resolve an undefined decorated symbol by finding
277       // the intended target as an undecorated symbol (only with a leading
278       // underscore).
279       StringRef origName = name;
280       StringRef baseName = name;
281       // Trim down stdcall/fastcall/vectorcall symbols to the base name.
282       baseName = ltrim1(baseName, "_@");
283       baseName = baseName.substr(0, baseName.find('@'));
284       // Add a leading underscore, as it would be in cdecl form.
285       std::string newName = ("_" + baseName).str();
286       Symbol *l;
287       if (newName != origName && (l = find(newName)) != nullptr) {
288         // If we found a symbol and it is lazy; load it.
289         if (l->isLazy() && !l->pendingArchiveLoad) {
290           log("Loading lazy " + l->getName() + " from " +
291               l->getFile()->getName() + " for stdcall fixup");
292           forceLazy(l);
293         }
294         // If it's lazy or already defined, hook it up as weak alias.
295         if (l->isLazy() || isa<Defined>(l)) {
296           if (config->warnStdcallFixup)
297             warn("Resolving " + origName + " by linking to " + newName);
298           else
299             log("Resolving " + origName + " by linking to " + newName);
300           undef->weakAlias = l;
301           continue;
302         }
303       }
304     }
305 
306     if (config->autoImport) {
307       if (name.startswith("__imp_"))
308         continue;
309       // If we have an undefined symbol, but we have a lazy symbol we could
310       // load, load it.
311       Symbol *l = find(("__imp_" + name).str());
312       if (!l || l->pendingArchiveLoad || !l->isLazy())
313         continue;
314 
315       log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
316           " for automatic import");
317       forceLazy(l);
318     }
319   }
320 }
321 
322 Defined *SymbolTable::impSymbol(StringRef name) {
323   if (name.startswith("__imp_"))
324     return nullptr;
325   return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
326 }
327 
328 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
329   Defined *imp = impSymbol(name);
330   if (!imp)
331     return false;
332 
333   // Replace the reference directly to a variable with a reference
334   // to the import address table instead. This obviously isn't right,
335   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
336   // will add runtime pseudo relocations for every relocation against
337   // this Symbol. The runtime pseudo relocation framework expects the
338   // reference itself to point at the IAT entry.
339   size_t impSize = 0;
340   if (isa<DefinedImportData>(imp)) {
341     log("Automatically importing " + name + " from " +
342         cast<DefinedImportData>(imp)->getDLLName());
343     impSize = sizeof(DefinedImportData);
344   } else if (isa<DefinedRegular>(imp)) {
345     log("Automatically importing " + name + " from " +
346         toString(cast<DefinedRegular>(imp)->file));
347     impSize = sizeof(DefinedRegular);
348   } else {
349     warn("unable to automatically import " + name + " from " + imp->getName() +
350          " from " + toString(cast<DefinedRegular>(imp)->file) +
351          "; unexpected symbol type");
352     return false;
353   }
354   sym->replaceKeepingName(imp, impSize);
355   sym->isRuntimePseudoReloc = true;
356 
357   // There may exist symbols named .refptr.<name> which only consist
358   // of a single pointer to <name>. If it turns out <name> is
359   // automatically imported, we don't need to keep the .refptr.<name>
360   // pointer at all, but redirect all accesses to it to the IAT entry
361   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
362   DefinedRegular *refptr =
363       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
364   if (refptr && refptr->getChunk()->getSize() == config->wordsize) {
365     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
366     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
367       log("Replacing .refptr." + name + " with " + imp->getName());
368       refptr->getChunk()->live = false;
369       refptr->replaceKeepingName(imp, impSize);
370     }
371   }
372   return true;
373 }
374 
375 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
376 /// This function emits an "undefined symbol" diagnostic for each symbol in
377 /// undefs. If localImports is not nullptr, it also emits a "locally
378 /// defined symbol imported" diagnostic for symbols in localImports.
379 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
380 /// undefined symbols are referenced.
381 static void reportProblemSymbols(
382     const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
383     const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
384   // Return early if there is nothing to report (which should be
385   // the common case).
386   if (undefs.empty() && (!localImports || localImports->empty()))
387     return;
388 
389   for (Symbol *b : config->gcroot) {
390     if (undefs.count(b))
391       errorOrWarn("<root>: undefined symbol: " + toString(*b));
392     if (localImports)
393       if (Symbol *imp = localImports->lookup(b))
394         warn("<root>: locally defined symbol imported: " + toString(*imp) +
395              " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
396   }
397 
398   std::vector<UndefinedDiag> undefDiags;
399   DenseMap<Symbol *, int> firstDiag;
400 
401   auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
402     uint32_t symIndex = (uint32_t)-1;
403     for (Symbol *sym : symbols) {
404       ++symIndex;
405       if (!sym)
406         continue;
407       if (undefs.count(sym)) {
408         auto it = firstDiag.find(sym);
409         if (it == firstDiag.end()) {
410           firstDiag[sym] = undefDiags.size();
411           undefDiags.push_back({sym, {{file, symIndex}}});
412         } else {
413           undefDiags[it->second].files.push_back({file, symIndex});
414         }
415       }
416       if (localImports)
417         if (Symbol *imp = localImports->lookup(sym))
418           warn(toString(file) +
419                ": locally defined symbol imported: " + toString(*imp) +
420                " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
421     }
422   };
423 
424   for (ObjFile *file : ctx.objFileInstances)
425     processFile(file, file->getSymbols());
426 
427   if (needBitcodeFiles)
428     for (BitcodeFile *file : ctx.bitcodeFileInstances)
429       processFile(file, file->getSymbols());
430 
431   for (const UndefinedDiag &undefDiag : undefDiags)
432     reportUndefinedSymbol(undefDiag);
433 }
434 
435 void SymbolTable::reportUnresolvable() {
436   SmallPtrSet<Symbol *, 8> undefs;
437   for (auto &i : symMap) {
438     Symbol *sym = i.second;
439     auto *undef = dyn_cast<Undefined>(sym);
440     if (!undef || sym->deferUndefined)
441       continue;
442     if (undef->getWeakAlias())
443       continue;
444     StringRef name = undef->getName();
445     if (name.startswith("__imp_")) {
446       Symbol *imp = find(name.substr(strlen("__imp_")));
447       if (imp && isa<Defined>(imp))
448         continue;
449     }
450     if (name.contains("_PchSym_"))
451       continue;
452     if (config->autoImport && impSymbol(name))
453       continue;
454     undefs.insert(sym);
455   }
456 
457   reportProblemSymbols(ctx, undefs,
458                        /* localImports */ nullptr, true);
459 }
460 
461 void SymbolTable::resolveRemainingUndefines() {
462   SmallPtrSet<Symbol *, 8> undefs;
463   DenseMap<Symbol *, Symbol *> localImports;
464 
465   for (auto &i : symMap) {
466     Symbol *sym = i.second;
467     auto *undef = dyn_cast<Undefined>(sym);
468     if (!undef)
469       continue;
470     if (!sym->isUsedInRegularObj)
471       continue;
472 
473     StringRef name = undef->getName();
474 
475     // A weak alias may have been resolved, so check for that.
476     if (Defined *d = undef->getWeakAlias()) {
477       // We want to replace Sym with D. However, we can't just blindly
478       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
479       // internal symbol, and internal symbols are stored as "unparented"
480       // Symbols. For that reason we need to check which type of symbol we
481       // are dealing with and copy the correct number of bytes.
482       if (isa<DefinedRegular>(d))
483         memcpy(sym, d, sizeof(DefinedRegular));
484       else if (isa<DefinedAbsolute>(d))
485         memcpy(sym, d, sizeof(DefinedAbsolute));
486       else
487         memcpy(sym, d, sizeof(SymbolUnion));
488       continue;
489     }
490 
491     // If we can resolve a symbol by removing __imp_ prefix, do that.
492     // This odd rule is for compatibility with MSVC linker.
493     if (name.startswith("__imp_")) {
494       Symbol *imp = find(name.substr(strlen("__imp_")));
495       if (imp && isa<Defined>(imp)) {
496         auto *d = cast<Defined>(imp);
497         replaceSymbol<DefinedLocalImport>(sym, name, d);
498         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
499         localImports[sym] = d;
500         continue;
501       }
502     }
503 
504     // We don't want to report missing Microsoft precompiled headers symbols.
505     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
506     if (name.contains("_PchSym_"))
507       continue;
508 
509     if (config->autoImport && handleMinGWAutomaticImport(sym, name))
510       continue;
511 
512     // Remaining undefined symbols are not fatal if /force is specified.
513     // They are replaced with dummy defined symbols.
514     if (config->forceUnresolved)
515       replaceSymbol<DefinedAbsolute>(sym, name, 0);
516     undefs.insert(sym);
517   }
518 
519   reportProblemSymbols(
520       ctx, undefs, config->warnLocallyDefinedImported ? &localImports : nullptr,
521       false);
522 }
523 
524 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
525   bool inserted = false;
526   Symbol *&sym = symMap[CachedHashStringRef(name)];
527   if (!sym) {
528     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
529     sym->isUsedInRegularObj = false;
530     sym->pendingArchiveLoad = false;
531     sym->canInline = true;
532     inserted = true;
533   }
534   return {sym, inserted};
535 }
536 
537 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
538   std::pair<Symbol *, bool> result = insert(name);
539   if (!file || !isa<BitcodeFile>(file))
540     result.first->isUsedInRegularObj = true;
541   return result;
542 }
543 
544 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
545                                   bool isWeakAlias) {
546   Symbol *s;
547   bool wasInserted;
548   std::tie(s, wasInserted) = insert(name, f);
549   if (wasInserted || (s->isLazy() && isWeakAlias)) {
550     replaceSymbol<Undefined>(s, name);
551     return s;
552   }
553   if (s->isLazy())
554     forceLazy(s);
555   return s;
556 }
557 
558 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
559   StringRef name = sym.getName();
560   Symbol *s;
561   bool wasInserted;
562   std::tie(s, wasInserted) = insert(name);
563   if (wasInserted) {
564     replaceSymbol<LazyArchive>(s, f, sym);
565     return;
566   }
567   auto *u = dyn_cast<Undefined>(s);
568   if (!u || u->weakAlias || s->pendingArchiveLoad)
569     return;
570   s->pendingArchiveLoad = true;
571   f->addMember(sym);
572 }
573 
574 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
575   assert(f->lazy);
576   Symbol *s;
577   bool wasInserted;
578   std::tie(s, wasInserted) = insert(n, f);
579   if (wasInserted) {
580     replaceSymbol<LazyObject>(s, f, n);
581     return;
582   }
583   auto *u = dyn_cast<Undefined>(s);
584   if (!u || u->weakAlias || s->pendingArchiveLoad)
585     return;
586   s->pendingArchiveLoad = true;
587   f->lazy = false;
588   addFile(f);
589 }
590 
591 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
592                                    StringRef n) {
593   Symbol *s;
594   bool wasInserted;
595   std::tie(s, wasInserted) = insert(n);
596   if (wasInserted) {
597     replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
598     return;
599   }
600   auto *u = dyn_cast<Undefined>(s);
601   if (!u || u->weakAlias || s->pendingArchiveLoad)
602     return;
603   s->pendingArchiveLoad = true;
604   f->makeImport(sym);
605 }
606 
607 static std::string getSourceLocationBitcode(BitcodeFile *file) {
608   std::string res("\n>>> defined at ");
609   StringRef source = file->obj->getSourceFileName();
610   if (!source.empty())
611     res += source.str() + "\n>>>            ";
612   res += toString(file);
613   return res;
614 }
615 
616 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
617                                         uint32_t offset, StringRef name) {
618   Optional<std::pair<StringRef, uint32_t>> fileLine;
619   if (sc)
620     fileLine = getFileLine(sc, offset);
621   if (!fileLine)
622     fileLine = file->getVariableLocation(name);
623 
624   std::string res;
625   llvm::raw_string_ostream os(res);
626   os << "\n>>> defined at ";
627   if (fileLine)
628     os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
629   os << toString(file);
630   return os.str();
631 }
632 
633 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
634                                      uint32_t offset, StringRef name) {
635   if (!file)
636     return "";
637   if (auto *o = dyn_cast<ObjFile>(file))
638     return getSourceLocationObj(o, sc, offset, name);
639   if (auto *b = dyn_cast<BitcodeFile>(file))
640     return getSourceLocationBitcode(b);
641   return "\n>>> defined at " + toString(file);
642 }
643 
644 // Construct and print an error message in the form of:
645 //
646 //   lld-link: error: duplicate symbol: foo
647 //   >>> defined at bar.c:30
648 //   >>>            bar.o
649 //   >>> defined at baz.c:563
650 //   >>>            baz.o
651 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
652                                   SectionChunk *newSc,
653                                   uint32_t newSectionOffset) {
654   std::string msg;
655   llvm::raw_string_ostream os(msg);
656   os << "duplicate symbol: " << toString(*existing);
657 
658   DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
659   if (d && isa<ObjFile>(d->getFile())) {
660     os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
661                             existing->getName());
662   } else {
663     os << getSourceLocation(existing->getFile(), nullptr, 0, "");
664   }
665   os << getSourceLocation(newFile, newSc, newSectionOffset,
666                           existing->getName());
667 
668   if (config->forceMultiple)
669     warn(os.str());
670   else
671     error(os.str());
672 }
673 
674 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
675   Symbol *s;
676   bool wasInserted;
677   std::tie(s, wasInserted) = insert(n, nullptr);
678   s->isUsedInRegularObj = true;
679   if (wasInserted || isa<Undefined>(s) || s->isLazy())
680     replaceSymbol<DefinedAbsolute>(s, n, sym);
681   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
682     if (da->getVA() != sym.getValue())
683       reportDuplicate(s, nullptr);
684   } else if (!isa<DefinedCOFF>(s))
685     reportDuplicate(s, nullptr);
686   return s;
687 }
688 
689 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
690   Symbol *s;
691   bool wasInserted;
692   std::tie(s, wasInserted) = insert(n, nullptr);
693   s->isUsedInRegularObj = true;
694   if (wasInserted || isa<Undefined>(s) || s->isLazy())
695     replaceSymbol<DefinedAbsolute>(s, n, va);
696   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
697     if (da->getVA() != va)
698       reportDuplicate(s, nullptr);
699   } else if (!isa<DefinedCOFF>(s))
700     reportDuplicate(s, nullptr);
701   return s;
702 }
703 
704 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
705   Symbol *s;
706   bool wasInserted;
707   std::tie(s, wasInserted) = insert(n, nullptr);
708   s->isUsedInRegularObj = true;
709   if (wasInserted || isa<Undefined>(s) || s->isLazy())
710     replaceSymbol<DefinedSynthetic>(s, n, c);
711   else if (!isa<DefinedCOFF>(s))
712     reportDuplicate(s, nullptr);
713   return s;
714 }
715 
716 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
717                                 const coff_symbol_generic *sym, SectionChunk *c,
718                                 uint32_t sectionOffset) {
719   Symbol *s;
720   bool wasInserted;
721   std::tie(s, wasInserted) = insert(n, f);
722   if (wasInserted || !isa<DefinedRegular>(s))
723     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
724                                   /*IsExternal*/ true, sym, c);
725   else
726     reportDuplicate(s, f, c, sectionOffset);
727   return s;
728 }
729 
730 std::pair<DefinedRegular *, bool>
731 SymbolTable::addComdat(InputFile *f, StringRef n,
732                        const coff_symbol_generic *sym) {
733   Symbol *s;
734   bool wasInserted;
735   std::tie(s, wasInserted) = insert(n, f);
736   if (wasInserted || !isa<DefinedRegular>(s)) {
737     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
738                                   /*IsExternal*/ true, sym, nullptr);
739     return {cast<DefinedRegular>(s), true};
740   }
741   auto *existingSymbol = cast<DefinedRegular>(s);
742   if (!existingSymbol->isCOMDAT)
743     reportDuplicate(s, f);
744   return {existingSymbol, false};
745 }
746 
747 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
748                                const coff_symbol_generic *sym, CommonChunk *c) {
749   Symbol *s;
750   bool wasInserted;
751   std::tie(s, wasInserted) = insert(n, f);
752   if (wasInserted || !isa<DefinedCOFF>(s))
753     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
754   else if (auto *dc = dyn_cast<DefinedCommon>(s))
755     if (size > dc->getSize())
756       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
757   return s;
758 }
759 
760 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
761   Symbol *s;
762   bool wasInserted;
763   std::tie(s, wasInserted) = insert(n, nullptr);
764   s->isUsedInRegularObj = true;
765   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
766     replaceSymbol<DefinedImportData>(s, n, f);
767     return s;
768   }
769 
770   reportDuplicate(s, f);
771   return nullptr;
772 }
773 
774 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
775                                     uint16_t machine) {
776   Symbol *s;
777   bool wasInserted;
778   std::tie(s, wasInserted) = insert(name, nullptr);
779   s->isUsedInRegularObj = true;
780   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
781     replaceSymbol<DefinedImportThunk>(s, name, id, machine);
782     return s;
783   }
784 
785   reportDuplicate(s, id->file);
786   return nullptr;
787 }
788 
789 void SymbolTable::addLibcall(StringRef name) {
790   Symbol *sym = findUnderscore(name);
791   if (!sym)
792     return;
793 
794   if (auto *l = dyn_cast<LazyArchive>(sym)) {
795     MemoryBufferRef mb = l->getMemberBuffer();
796     if (isBitcode(mb))
797       addUndefined(sym->getName());
798   } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
799     if (isBitcode(o->file->mb))
800       addUndefined(sym->getName());
801   }
802 }
803 
804 std::vector<Chunk *> SymbolTable::getChunks() const {
805   std::vector<Chunk *> res;
806   for (ObjFile *file : ctx.objFileInstances) {
807     ArrayRef<Chunk *> v = file->getChunks();
808     res.insert(res.end(), v.begin(), v.end());
809   }
810   return res;
811 }
812 
813 Symbol *SymbolTable::find(StringRef name) const {
814   return symMap.lookup(CachedHashStringRef(name));
815 }
816 
817 Symbol *SymbolTable::findUnderscore(StringRef name) const {
818   if (config->machine == I386)
819     return find(("_" + name).str());
820   return find(name);
821 }
822 
823 // Return all symbols that start with Prefix, possibly ignoring the first
824 // character of Prefix or the first character symbol.
825 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
826   std::vector<Symbol *> syms;
827   for (auto pair : symMap) {
828     StringRef name = pair.first.val();
829     if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
830         name.drop_front().startswith(prefix) ||
831         name.drop_front().startswith(prefix.drop_front())) {
832       syms.push_back(pair.second);
833     }
834   }
835   return syms;
836 }
837 
838 Symbol *SymbolTable::findMangle(StringRef name) {
839   if (Symbol *sym = find(name))
840     if (!isa<Undefined>(sym))
841       return sym;
842 
843   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
844   // the symbol table once and collect all possibly matching symbols into this
845   // vector. Then compare each possibly matching symbol with each possible
846   // mangling.
847   std::vector<Symbol *> syms = getSymsWithPrefix(name);
848   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
849     std::string prefix = t.str();
850     for (auto *s : syms)
851       if (s->getName().startswith(prefix))
852         return s;
853     return nullptr;
854   };
855 
856   // For non-x86, just look for C++ functions.
857   if (config->machine != I386)
858     return findByPrefix("?" + name + "@@Y");
859 
860   if (!name.startswith("_"))
861     return nullptr;
862   // Search for x86 stdcall function.
863   if (Symbol *s = findByPrefix(name + "@"))
864     return s;
865   // Search for x86 fastcall function.
866   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
867     return s;
868   // Search for x86 vectorcall function.
869   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
870     return s;
871   // Search for x86 C++ non-member function.
872   return findByPrefix("?" + name.substr(1) + "@@Y");
873 }
874 
875 Symbol *SymbolTable::addUndefined(StringRef name) {
876   return addUndefined(name, nullptr, false);
877 }
878 
879 void SymbolTable::compileBitcodeFiles() {
880   if (ctx.bitcodeFileInstances.empty())
881     return;
882 
883   ScopedTimer t(ctx.ltoTimer);
884   lto.reset(new BitcodeCompiler());
885   for (BitcodeFile *f : ctx.bitcodeFileInstances)
886     lto->add(*f);
887   for (InputFile *newObj : lto->compile(ctx)) {
888     ObjFile *obj = cast<ObjFile>(newObj);
889     obj->parse();
890     ctx.objFileInstances.push_back(obj);
891   }
892 }
893 
894 } // namespace coff
895 } // namespace lld
896