1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "ConcatOutputSection.h"
11 #include "Config.h"
12 #include "InputFiles.h"
13 #include "InputSection.h"
14 #include "Symbols.h"
15 #include "SyntheticSections.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/Demangle/Demangle.h"
19 
20 using namespace llvm;
21 using namespace lld;
22 using namespace lld::macho;
23 
24 Symbol *SymbolTable::find(CachedHashStringRef cachedName) {
25   auto it = symMap.find(cachedName);
26   if (it == symMap.end())
27     return nullptr;
28   return symVector[it->second];
29 }
30 
31 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
32                                               const InputFile *file) {
33   auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
34 
35   Symbol *sym;
36   if (!p.second) {
37     // Name already present in the symbol table.
38     sym = symVector[p.first->second];
39   } else {
40     // Name is a new symbol.
41     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
42     symVector.push_back(sym);
43   }
44 
45   sym->isUsedInRegularObj |= !file || isa<ObjFile>(file);
46   return {sym, p.second};
47 }
48 
49 namespace {
50 struct DuplicateSymbolDiag {
51   // Pair containing source location and source file
52   const std::pair<std::string, std::string> src1;
53   const std::pair<std::string, std::string> src2;
54   const Symbol *sym;
55 
56   DuplicateSymbolDiag(const std::pair<std::string, std::string> src1,
57                       const std::pair<std::string, std::string> src2,
58                       const Symbol *sym)
59       : src1(src1), src2(src2), sym(sym) {}
60 };
61 SmallVector<DuplicateSymbolDiag> dupSymDiags;
62 } // namespace
63 
64 Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
65                                  InputSection *isec, uint64_t value,
66                                  uint64_t size, bool isWeakDef,
67                                  bool isPrivateExtern, bool isThumb,
68                                  bool isReferencedDynamically, bool noDeadStrip,
69                                  bool isWeakDefCanBeHidden) {
70   bool overridesWeakDef = false;
71   auto [s, wasInserted] = insert(name, file);
72 
73   assert(!file || !isa<BitcodeFile>(file) || !isec);
74 
75   if (!wasInserted) {
76     if (auto *defined = dyn_cast<Defined>(s)) {
77       if (isWeakDef) {
78         // See further comment in createDefined() in InputFiles.cpp
79         if (defined->isWeakDef()) {
80           defined->privateExtern &= isPrivateExtern;
81           defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
82           defined->referencedDynamically |= isReferencedDynamically;
83           defined->noDeadStrip |= noDeadStrip;
84         }
85         // FIXME: Handle this for bitcode files.
86         if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
87           concatIsec->wasCoalesced = true;
88         return defined;
89       }
90 
91       if (defined->isWeakDef()) {
92         // FIXME: Handle this for bitcode files.
93         if (auto concatIsec =
94                 dyn_cast_or_null<ConcatInputSection>(defined->isec)) {
95           concatIsec->wasCoalesced = true;
96           concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined));
97         }
98       } else {
99         std::string srcLoc1 = defined->getSourceLocation();
100         std::string srcLoc2 = isec ? isec->getSourceLocation(value) : "";
101         std::string srcFile1 = toString(defined->getFile());
102         std::string srcFile2 = toString(file);
103 
104         dupSymDiags.push_back({make_pair(srcLoc1, srcFile1),
105                                make_pair(srcLoc2, srcFile2), defined});
106       }
107 
108     } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
109       overridesWeakDef = !isWeakDef && dysym->isWeakDef();
110       dysym->unreference();
111     } else if (auto *undef = dyn_cast<Undefined>(s)) {
112       // Preserve the original bitcode file name (instead of using the object
113       // file name).
114       if (undef->wasBitcodeSymbol)
115         file = undef->getFile();
116     }
117     // Defined symbols take priority over other types of symbols, so in case
118     // of a name conflict, we fall through to the replaceSymbol() call below.
119   }
120 
121   // With -flat_namespace, all extern symbols in dylibs are interposable.
122   // FIXME: Add support for `-interposable` (PR53680).
123   bool interposable = config->namespaceKind == NamespaceKind::flat &&
124                       config->outputType != MachO::MH_EXECUTE &&
125                       !isPrivateExtern;
126   Defined *defined = replaceSymbol<Defined>(
127       s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
128       isPrivateExtern, /*includeInSymtab=*/true, isThumb,
129       isReferencedDynamically, noDeadStrip, overridesWeakDef,
130       isWeakDefCanBeHidden, interposable);
131   return defined;
132 }
133 
134 Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
135                                    InputFile *newFile, bool makePrivateExtern) {
136   bool isPrivateExtern = makePrivateExtern || src->privateExtern;
137   return addDefined(target, newFile, src->isec, src->value, src->size,
138                     src->isWeakDef(), isPrivateExtern, src->thumb,
139                     src->referencedDynamically, src->noDeadStrip,
140                     src->weakDefCanBeHidden);
141 }
142 
143 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
144                                   bool isWeakRef) {
145   auto [s, wasInserted] = insert(name, file);
146 
147   RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
148 
149   if (wasInserted)
150     replaceSymbol<Undefined>(s, name, file, refState,
151                              /*wasBitcodeSymbol=*/false);
152   else if (auto *lazy = dyn_cast<LazyArchive>(s))
153     lazy->fetchArchiveMember();
154   else if (isa<LazyObject>(s))
155     extract(*s->getFile(), s->getName());
156   else if (auto *dynsym = dyn_cast<DylibSymbol>(s))
157     dynsym->reference(refState);
158   else if (auto *undefined = dyn_cast<Undefined>(s))
159     undefined->refState = std::max(undefined->refState, refState);
160   return s;
161 }
162 
163 Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
164                                uint32_t align, bool isPrivateExtern) {
165   auto [s, wasInserted] = insert(name, file);
166 
167   if (!wasInserted) {
168     if (auto *common = dyn_cast<CommonSymbol>(s)) {
169       if (size < common->size)
170         return s;
171     } else if (isa<Defined>(s)) {
172       return s;
173     }
174     // Common symbols take priority over all non-Defined symbols, so in case of
175     // a name conflict, we fall through to the replaceSymbol() call below.
176   }
177 
178   replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern);
179   return s;
180 }
181 
182 Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
183                               bool isTlv) {
184   auto [s, wasInserted] = insert(name, file);
185 
186   RefState refState = RefState::Unreferenced;
187   if (!wasInserted) {
188     if (auto *defined = dyn_cast<Defined>(s)) {
189       if (isWeakDef && !defined->isWeakDef())
190         defined->overridesWeakDef = true;
191     } else if (auto *undefined = dyn_cast<Undefined>(s)) {
192       refState = undefined->refState;
193     } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
194       refState = dysym->getRefState();
195     }
196   }
197 
198   bool isDynamicLookup = file == nullptr;
199   if (wasInserted || isa<Undefined>(s) ||
200       (isa<DylibSymbol>(s) &&
201        ((!isWeakDef && s->isWeakDef()) ||
202         (!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) {
203     if (auto *dynsym = dyn_cast<DylibSymbol>(s))
204       dynsym->unreference();
205     replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv);
206   }
207 
208   return s;
209 }
210 
211 Symbol *SymbolTable::addDynamicLookup(StringRef name) {
212   return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false);
213 }
214 
215 Symbol *SymbolTable::addLazyArchive(StringRef name, ArchiveFile *file,
216                                     const object::Archive::Symbol &sym) {
217   auto [s, wasInserted] = insert(name, file);
218 
219   if (wasInserted) {
220     replaceSymbol<LazyArchive>(s, file, sym);
221   } else if (isa<Undefined>(s)) {
222     file->fetch(sym);
223   } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
224     if (dysym->isWeakDef()) {
225       if (dysym->getRefState() != RefState::Unreferenced)
226         file->fetch(sym);
227       else
228         replaceSymbol<LazyArchive>(s, file, sym);
229     }
230   }
231   return s;
232 }
233 
234 Symbol *SymbolTable::addLazyObject(StringRef name, InputFile &file) {
235   auto [s, wasInserted] = insert(name, &file);
236 
237   if (wasInserted) {
238     replaceSymbol<LazyObject>(s, file, name);
239   } else if (isa<Undefined>(s)) {
240     extract(file, name);
241   } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
242     if (dysym->isWeakDef()) {
243       if (dysym->getRefState() != RefState::Unreferenced)
244         extract(file, name);
245       else
246         replaceSymbol<LazyObject>(s, file, name);
247     }
248   }
249   return s;
250 }
251 
252 Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
253                                    uint64_t value, bool isPrivateExtern,
254                                    bool includeInSymtab,
255                                    bool referencedDynamically) {
256   assert(!isec || !isec->getFile()); // See makeSyntheticInputSection().
257   Defined *s =
258       addDefined(name, /*file=*/nullptr, isec, value, /*size=*/0,
259                  /*isWeakDef=*/false, isPrivateExtern, /*isThumb=*/false,
260                  referencedDynamically, /*noDeadStrip=*/false,
261                  /*isWeakDefCanBeHidden=*/false);
262   s->includeInSymtab = includeInSymtab;
263   return s;
264 }
265 
266 enum class Boundary {
267   Start,
268   End,
269 };
270 
271 static Defined *createBoundarySymbol(const Undefined &sym) {
272   return symtab->addSynthetic(
273       sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true,
274       /*includeInSymtab=*/false, /*referencedDynamically=*/false);
275 }
276 
277 static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect,
278                                         Boundary which) {
279   auto [segName, sectName] = segSect.split('$');
280 
281   // Attach the symbol to any InputSection that will end up in the right
282   // OutputSection -- it doesn't matter which one we pick.
283   // Don't bother looking through inputSections for a matching
284   // ConcatInputSection -- we need to create ConcatInputSection for
285   // non-existing sections anyways, and that codepath works even if we should
286   // already have a ConcatInputSection with the right name.
287 
288   OutputSection *osec = nullptr;
289   // This looks for __TEXT,__cstring etc.
290   for (SyntheticSection *ssec : syntheticSections)
291     if (ssec->segname == segName && ssec->name == sectName) {
292       osec = ssec->isec->parent;
293       break;
294     }
295 
296   if (!osec) {
297     ConcatInputSection *isec = makeSyntheticInputSection(segName, sectName);
298 
299     // This runs after markLive() and is only called for Undefineds that are
300     // live. Marking the isec live ensures an OutputSection is created that the
301     // start/end symbol can refer to.
302     assert(sym.isLive());
303     isec->live = true;
304 
305     // This runs after gatherInputSections(), so need to explicitly set parent
306     // and add to inputSections.
307     osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec);
308     inputSections.push_back(isec);
309   }
310 
311   if (which == Boundary::Start)
312     osec->sectionStartSymbols.push_back(createBoundarySymbol(sym));
313   else
314     osec->sectionEndSymbols.push_back(createBoundarySymbol(sym));
315 }
316 
317 static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName,
318                                         Boundary which) {
319   OutputSegment *seg = getOrCreateOutputSegment(segName);
320   if (which == Boundary::Start)
321     seg->segmentStartSymbols.push_back(createBoundarySymbol(sym));
322   else
323     seg->segmentEndSymbols.push_back(createBoundarySymbol(sym));
324 }
325 
326 // Try to find a definition for an undefined symbol.
327 // Returns true if a definition was found and no diagnostics are needed.
328 static bool recoverFromUndefinedSymbol(const Undefined &sym) {
329   // Handle start/end symbols.
330   StringRef name = sym.getName();
331   if (name.consume_front("section$start$")) {
332     handleSectionBoundarySymbol(sym, name, Boundary::Start);
333     return true;
334   }
335   if (name.consume_front("section$end$")) {
336     handleSectionBoundarySymbol(sym, name, Boundary::End);
337     return true;
338   }
339   if (name.consume_front("segment$start$")) {
340     handleSegmentBoundarySymbol(sym, name, Boundary::Start);
341     return true;
342   }
343   if (name.consume_front("segment$end$")) {
344     handleSegmentBoundarySymbol(sym, name, Boundary::End);
345     return true;
346   }
347 
348   // Leave dtrace symbols, since we will handle them when we do the relocation
349   if (name.startswith("___dtrace_"))
350     return true;
351 
352   // Handle -U.
353   if (config->explicitDynamicLookups.count(sym.getName())) {
354     symtab->addDynamicLookup(sym.getName());
355     return true;
356   }
357 
358   // Handle -undefined.
359   if (config->undefinedSymbolTreatment ==
360           UndefinedSymbolTreatment::dynamic_lookup ||
361       config->undefinedSymbolTreatment == UndefinedSymbolTreatment::suppress) {
362     symtab->addDynamicLookup(sym.getName());
363     return true;
364   }
365 
366   // We do not return true here, as we still need to print diagnostics.
367   if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::warning)
368     symtab->addDynamicLookup(sym.getName());
369 
370   return false;
371 }
372 
373 namespace {
374 struct UndefinedDiag {
375   struct SectionAndOffset {
376     const InputSection *isec;
377     uint64_t offset;
378   };
379 
380   std::vector<SectionAndOffset> codeReferences;
381   std::vector<std::string> otherReferences;
382 };
383 
384 MapVector<const Undefined *, UndefinedDiag> undefs;
385 }
386 
387 void macho::reportPendingDuplicateSymbols() {
388   for (const auto &duplicate : dupSymDiags) {
389     if (!config->deadStripDuplicates || duplicate.sym->isLive()) {
390       std::string message =
391           "duplicate symbol: " + toString(*duplicate.sym) + "\n>>> defined in ";
392       if (!duplicate.src1.first.empty())
393         message += duplicate.src1.first + "\n>>>            ";
394       message += duplicate.src1.second + "\n>>> defined in ";
395       if (!duplicate.src2.first.empty())
396         message += duplicate.src2.first + "\n>>>            ";
397       error(message + duplicate.src2.second);
398     }
399   }
400 }
401 
402 // Check whether the definition name def is a mangled function name that matches
403 // the reference name ref.
404 static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
405   llvm::ItaniumPartialDemangler d;
406   std::string name = def.str();
407   if (d.partialDemangle(name.c_str()))
408     return false;
409   char *buf = d.getFunctionName(nullptr, nullptr);
410   if (!buf)
411     return false;
412   bool ret = ref == buf;
413   free(buf);
414   return ret;
415 }
416 
417 // Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
418 // the suggested symbol, which is either in the symbol table, or in the same
419 // file of sym.
420 static const Symbol *getAlternativeSpelling(const Undefined &sym,
421                                             std::string &pre_hint,
422                                             std::string &post_hint) {
423   DenseMap<StringRef, const Symbol *> map;
424   if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
425     // Build a map of local defined symbols.
426     for (const Symbol *s : sym.getFile()->symbols)
427       if (auto *defined = dyn_cast_or_null<Defined>(s))
428         if (!defined->isExternal())
429           map.try_emplace(s->getName(), s);
430   }
431 
432   auto suggest = [&](StringRef newName) -> const Symbol * {
433     // If defined locally.
434     if (const Symbol *s = map.lookup(newName))
435       return s;
436 
437     // If in the symbol table and not undefined.
438     if (const Symbol *s = symtab->find(newName))
439       if (dyn_cast<Undefined>(s) == nullptr)
440         return s;
441 
442     return nullptr;
443   };
444 
445   // This loop enumerates all strings of Levenshtein distance 1 as typo
446   // correction candidates and suggests the one that exists as a non-undefined
447   // symbol.
448   StringRef name = sym.getName();
449   for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
450     // Insert a character before name[i].
451     std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
452     for (char c = '0'; c <= 'z'; ++c) {
453       newName[i] = c;
454       if (const Symbol *s = suggest(newName))
455         return s;
456     }
457     if (i == e)
458       break;
459 
460     // Substitute name[i].
461     newName = std::string(name);
462     for (char c = '0'; c <= 'z'; ++c) {
463       newName[i] = c;
464       if (const Symbol *s = suggest(newName))
465         return s;
466     }
467 
468     // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
469     // common.
470     if (i + 1 < e) {
471       newName[i] = name[i + 1];
472       newName[i + 1] = name[i];
473       if (const Symbol *s = suggest(newName))
474         return s;
475     }
476 
477     // Delete name[i].
478     newName = (name.substr(0, i) + name.substr(i + 1)).str();
479     if (const Symbol *s = suggest(newName))
480       return s;
481   }
482 
483   // Case mismatch, e.g. Foo vs FOO.
484   for (auto &it : map)
485     if (name.equals_insensitive(it.first))
486       return it.second;
487   for (Symbol *sym : symtab->getSymbols())
488     if (dyn_cast<Undefined>(sym) == nullptr &&
489         name.equals_insensitive(sym->getName()))
490       return sym;
491 
492   // The reference may be a mangled name while the definition is not. Suggest a
493   // missing extern "C".
494   if (name.startswith("__Z")) {
495     std::string buf = name.str();
496     llvm::ItaniumPartialDemangler d;
497     if (!d.partialDemangle(buf.c_str()))
498       if (char *buf = d.getFunctionName(nullptr, nullptr)) {
499         const Symbol *s = suggest((Twine("_") + buf).str());
500         free(buf);
501         if (s) {
502           pre_hint = ": extern \"C\" ";
503           return s;
504         }
505       }
506   } else {
507     StringRef name_without_underscore = name;
508     name_without_underscore.consume_front("_");
509     const Symbol *s = nullptr;
510     for (auto &it : map)
511       if (canSuggestExternCForCXX(name_without_underscore, it.first)) {
512         s = it.second;
513         break;
514       }
515     if (!s)
516       for (Symbol *sym : symtab->getSymbols())
517         if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) {
518           s = sym;
519           break;
520         }
521     if (s) {
522       pre_hint = " to declare ";
523       post_hint = " as extern \"C\"?";
524       return s;
525     }
526   }
527 
528   return nullptr;
529 }
530 
531 static void reportUndefinedSymbol(const Undefined &sym,
532                                   const UndefinedDiag &locations,
533                                   bool correctSpelling) {
534   std::string message = "undefined symbol";
535   if (config->archMultiple)
536     message += (" for arch " + getArchitectureName(config->arch())).str();
537   message += ": " + toString(sym);
538 
539   const size_t maxUndefinedReferences = 3;
540   size_t i = 0;
541   for (const std::string &loc : locations.otherReferences) {
542     if (i >= maxUndefinedReferences)
543       break;
544     message += "\n>>> referenced by " + loc;
545     ++i;
546   }
547 
548   for (const UndefinedDiag::SectionAndOffset &loc : locations.codeReferences) {
549     if (i >= maxUndefinedReferences)
550       break;
551     message += "\n>>> referenced by ";
552     std::string src = loc.isec->getSourceLocation(loc.offset);
553     if (!src.empty())
554       message += src + "\n>>>               ";
555     message += loc.isec->getLocation(loc.offset);
556     ++i;
557   }
558 
559   size_t totalReferences =
560       locations.otherReferences.size() + locations.codeReferences.size();
561   if (totalReferences > i)
562     message +=
563         ("\n>>> referenced " + Twine(totalReferences - i) + " more times")
564             .str();
565 
566   if (correctSpelling) {
567     std::string pre_hint = ": ", post_hint;
568     if (const Symbol *corrected =
569             getAlternativeSpelling(sym, pre_hint, post_hint)) {
570       message +=
571           "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint;
572       if (corrected->getFile())
573         message += "\n>>> defined in: " + toString(corrected->getFile());
574     }
575   }
576 
577   if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
578     error(message);
579   else if (config->undefinedSymbolTreatment ==
580            UndefinedSymbolTreatment::warning)
581     warn(message);
582   else
583     assert(false && "diagnostics make sense for -undefined error|warning only");
584 }
585 
586 void macho::reportPendingUndefinedSymbols() {
587   // Enable spell corrector for the first 2 diagnostics.
588   for (const auto &[i, undef] : llvm::enumerate(undefs))
589     reportUndefinedSymbol(*undef.first, undef.second, i < 2);
590 
591   // This function is called multiple times during execution. Clear the printed
592   // diagnostics to avoid printing the same things again the next time.
593   undefs.clear();
594 }
595 
596 void macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) {
597   if (recoverFromUndefinedSymbol(sym))
598     return;
599 
600   undefs[&sym].otherReferences.push_back(source.str());
601 }
602 
603 void macho::treatUndefinedSymbol(const Undefined &sym, const InputSection *isec,
604                                  uint64_t offset) {
605   if (recoverFromUndefinedSymbol(sym))
606     return;
607 
608   undefs[&sym].codeReferences.push_back({isec, offset});
609 }
610 
611 std::unique_ptr<SymbolTable> macho::symtab;
612