1 //===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the LLVM module linker.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "LinkDiagnosticInfo.h"
14 #include "llvm-c/Linker.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Comdat.h"
17 #include "llvm/IR/DiagnosticPrinter.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/LLVMContext.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Linker/Linker.h"
22 #include "llvm/Support/Error.h"
23 using namespace llvm;
24 
25 namespace {
26 
27 /// This is an implementation class for the LinkModules function, which is the
28 /// entrypoint for this file.
29 class ModuleLinker {
30   IRMover &Mover;
31   std::unique_ptr<Module> SrcM;
32 
33   SetVector<GlobalValue *> ValuesToLink;
34 
35   /// For symbol clashes, prefer those from Src.
36   unsigned Flags;
37 
38   /// List of global value names that should be internalized.
39   StringSet<> Internalize;
40 
41   /// Function that will perform the actual internalization. The reason for a
42   /// callback is that the linker cannot call internalizeModule without
43   /// creating a circular dependency between IPO and the linker.
44   std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
45 
46   /// Used as the callback for lazy linking.
47   /// The mover has just hit GV and we have to decide if it, and other members
48   /// of the same comdat, should be linked. Every member to be linked is passed
49   /// to Add.
50   void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
51 
52   bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
53   bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
54 
55   bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
56                             const GlobalValue &Src);
57 
58   /// Should we have mover and linker error diag info?
59   bool emitError(const Twine &Message) {
60     SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
61     return true;
62   }
63 
64   bool getComdatLeader(Module &M, StringRef ComdatName,
65                        const GlobalVariable *&GVar);
66   bool computeResultingSelectionKind(StringRef ComdatName,
67                                      Comdat::SelectionKind Src,
68                                      Comdat::SelectionKind Dst,
69                                      Comdat::SelectionKind &Result,
70                                      bool &LinkFromSrc);
71   std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
72       ComdatsChosen;
73   bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
74                        bool &LinkFromSrc);
75   // Keep track of the lazy linked global members of each comdat in source.
76   DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
77 
78   /// Given a global in the source module, return the global in the
79   /// destination module that is being linked to, if any.
80   GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
81     Module &DstM = Mover.getModule();
82     // If the source has no name it can't link.  If it has local linkage,
83     // there is no name match-up going on.
84     if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
85       return nullptr;
86 
87     // Otherwise see if we have a match in the destination module's symtab.
88     GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
89     if (!DGV)
90       return nullptr;
91 
92     // If we found a global with the same name in the dest module, but it has
93     // internal linkage, we are really not doing any linkage here.
94     if (DGV->hasLocalLinkage())
95       return nullptr;
96 
97     // Otherwise, we do in fact link to the destination global.
98     return DGV;
99   }
100 
101   /// Drop GV if it is a member of a comdat that we are dropping.
102   /// This can happen with COFF's largest selection kind.
103   void dropReplacedComdat(GlobalValue &GV,
104                           const DenseSet<const Comdat *> &ReplacedDstComdats);
105 
106   bool linkIfNeeded(GlobalValue &GV);
107 
108 public:
109   ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
110                std::function<void(Module &, const StringSet<> &)>
111                    InternalizeCallback = {})
112       : Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
113         InternalizeCallback(std::move(InternalizeCallback)) {}
114 
115   bool run();
116 };
117 }
118 
119 static GlobalValue::VisibilityTypes
120 getMinVisibility(GlobalValue::VisibilityTypes A,
121                  GlobalValue::VisibilityTypes B) {
122   if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
123     return GlobalValue::HiddenVisibility;
124   if (A == GlobalValue::ProtectedVisibility ||
125       B == GlobalValue::ProtectedVisibility)
126     return GlobalValue::ProtectedVisibility;
127   return GlobalValue::DefaultVisibility;
128 }
129 
130 bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
131                                    const GlobalVariable *&GVar) {
132   const GlobalValue *GVal = M.getNamedValue(ComdatName);
133   if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
134     GVal = GA->getBaseObject();
135     if (!GVal)
136       // We cannot resolve the size of the aliasee yet.
137       return emitError("Linking COMDATs named '" + ComdatName +
138                        "': COMDAT key involves incomputable alias size.");
139   }
140 
141   GVar = dyn_cast_or_null<GlobalVariable>(GVal);
142   if (!GVar)
143     return emitError(
144         "Linking COMDATs named '" + ComdatName +
145         "': GlobalVariable required for data dependent selection!");
146 
147   return false;
148 }
149 
150 bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
151                                                  Comdat::SelectionKind Src,
152                                                  Comdat::SelectionKind Dst,
153                                                  Comdat::SelectionKind &Result,
154                                                  bool &LinkFromSrc) {
155   Module &DstM = Mover.getModule();
156   // The ability to mix Comdat::SelectionKind::Any with
157   // Comdat::SelectionKind::Largest is a behavior that comes from COFF.
158   bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
159                          Dst == Comdat::SelectionKind::Largest;
160   bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
161                          Src == Comdat::SelectionKind::Largest;
162   if (DstAnyOrLargest && SrcAnyOrLargest) {
163     if (Dst == Comdat::SelectionKind::Largest ||
164         Src == Comdat::SelectionKind::Largest)
165       Result = Comdat::SelectionKind::Largest;
166     else
167       Result = Comdat::SelectionKind::Any;
168   } else if (Src == Dst) {
169     Result = Dst;
170   } else {
171     return emitError("Linking COMDATs named '" + ComdatName +
172                      "': invalid selection kinds!");
173   }
174 
175   switch (Result) {
176   case Comdat::SelectionKind::Any:
177     // Go with Dst.
178     LinkFromSrc = false;
179     break;
180   case Comdat::SelectionKind::NoDuplicates:
181     return emitError("Linking COMDATs named '" + ComdatName +
182                      "': noduplicates has been violated!");
183   case Comdat::SelectionKind::ExactMatch:
184   case Comdat::SelectionKind::Largest:
185   case Comdat::SelectionKind::SameSize: {
186     const GlobalVariable *DstGV;
187     const GlobalVariable *SrcGV;
188     if (getComdatLeader(DstM, ComdatName, DstGV) ||
189         getComdatLeader(*SrcM, ComdatName, SrcGV))
190       return true;
191 
192     const DataLayout &DstDL = DstM.getDataLayout();
193     const DataLayout &SrcDL = SrcM->getDataLayout();
194     uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
195     uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
196     if (Result == Comdat::SelectionKind::ExactMatch) {
197       if (SrcGV->getInitializer() != DstGV->getInitializer())
198         return emitError("Linking COMDATs named '" + ComdatName +
199                          "': ExactMatch violated!");
200       LinkFromSrc = false;
201     } else if (Result == Comdat::SelectionKind::Largest) {
202       LinkFromSrc = SrcSize > DstSize;
203     } else if (Result == Comdat::SelectionKind::SameSize) {
204       if (SrcSize != DstSize)
205         return emitError("Linking COMDATs named '" + ComdatName +
206                          "': SameSize violated!");
207       LinkFromSrc = false;
208     } else {
209       llvm_unreachable("unknown selection kind");
210     }
211     break;
212   }
213   }
214 
215   return false;
216 }
217 
218 bool ModuleLinker::getComdatResult(const Comdat *SrcC,
219                                    Comdat::SelectionKind &Result,
220                                    bool &LinkFromSrc) {
221   Module &DstM = Mover.getModule();
222   Comdat::SelectionKind SSK = SrcC->getSelectionKind();
223   StringRef ComdatName = SrcC->getName();
224   Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
225   Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
226 
227   if (DstCI == ComdatSymTab.end()) {
228     // Use the comdat if it is only available in one of the modules.
229     LinkFromSrc = true;
230     Result = SSK;
231     return false;
232   }
233 
234   const Comdat *DstC = &DstCI->second;
235   Comdat::SelectionKind DSK = DstC->getSelectionKind();
236   return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
237                                        LinkFromSrc);
238 }
239 
240 bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
241                                         const GlobalValue &Dest,
242                                         const GlobalValue &Src) {
243 
244   // Should we unconditionally use the Src?
245   if (shouldOverrideFromSrc()) {
246     LinkFromSrc = true;
247     return false;
248   }
249 
250   // We always have to add Src if it has appending linkage.
251   if (Src.hasAppendingLinkage()) {
252     LinkFromSrc = true;
253     return false;
254   }
255 
256   bool SrcIsDeclaration = Src.isDeclarationForLinker();
257   bool DestIsDeclaration = Dest.isDeclarationForLinker();
258 
259   if (SrcIsDeclaration) {
260     // If Src is external or if both Src & Dest are external..  Just link the
261     // external globals, we aren't adding anything.
262     if (Src.hasDLLImportStorageClass()) {
263       // If one of GVs is marked as DLLImport, result should be dllimport'ed.
264       LinkFromSrc = DestIsDeclaration;
265       return false;
266     }
267     // If the Dest is weak, use the source linkage.
268     if (Dest.hasExternalWeakLinkage()) {
269       LinkFromSrc = true;
270       return false;
271     }
272     // Link an available_externally over a declaration.
273     LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
274     return false;
275   }
276 
277   if (DestIsDeclaration) {
278     // If Dest is external but Src is not:
279     LinkFromSrc = true;
280     return false;
281   }
282 
283   if (Src.hasCommonLinkage()) {
284     if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
285       LinkFromSrc = true;
286       return false;
287     }
288 
289     if (!Dest.hasCommonLinkage()) {
290       LinkFromSrc = false;
291       return false;
292     }
293 
294     const DataLayout &DL = Dest.getParent()->getDataLayout();
295     uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
296     uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
297     LinkFromSrc = SrcSize > DestSize;
298     return false;
299   }
300 
301   if (Src.isWeakForLinker()) {
302     assert(!Dest.hasExternalWeakLinkage());
303     assert(!Dest.hasAvailableExternallyLinkage());
304 
305     if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
306       LinkFromSrc = true;
307       return false;
308     }
309 
310     LinkFromSrc = false;
311     return false;
312   }
313 
314   if (Dest.isWeakForLinker()) {
315     assert(Src.hasExternalLinkage());
316     LinkFromSrc = true;
317     return false;
318   }
319 
320   assert(!Src.hasExternalWeakLinkage());
321   assert(!Dest.hasExternalWeakLinkage());
322   assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
323          "Unexpected linkage type!");
324   return emitError("Linking globals named '" + Src.getName() +
325                    "': symbol multiply defined!");
326 }
327 
328 bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
329   GlobalValue *DGV = getLinkedToGlobal(&GV);
330 
331   if (shouldLinkOnlyNeeded()) {
332     // Always import variables with appending linkage.
333     if (!GV.hasAppendingLinkage()) {
334       // Don't import globals unless they are referenced by the destination
335       // module.
336       if (!DGV)
337         return false;
338       // Don't import globals that are already defined in the destination module
339       if (!DGV->isDeclaration())
340         return false;
341     }
342   }
343 
344   if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
345     auto *DGVar = dyn_cast<GlobalVariable>(DGV);
346     auto *SGVar = dyn_cast<GlobalVariable>(&GV);
347     if (DGVar && SGVar) {
348       if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
349           (!DGVar->isConstant() || !SGVar->isConstant())) {
350         DGVar->setConstant(false);
351         SGVar->setConstant(false);
352       }
353       if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
354         MaybeAlign Align(
355             std::max(DGVar->getAlignment(), SGVar->getAlignment()));
356         SGVar->setAlignment(Align);
357         DGVar->setAlignment(Align);
358       }
359     }
360 
361     GlobalValue::VisibilityTypes Visibility =
362         getMinVisibility(DGV->getVisibility(), GV.getVisibility());
363     DGV->setVisibility(Visibility);
364     GV.setVisibility(Visibility);
365 
366     GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
367         DGV->getUnnamedAddr(), GV.getUnnamedAddr());
368     DGV->setUnnamedAddr(UnnamedAddr);
369     GV.setUnnamedAddr(UnnamedAddr);
370   }
371 
372   if (!DGV && !shouldOverrideFromSrc() &&
373       (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
374        GV.hasAvailableExternallyLinkage()))
375     return false;
376 
377   if (GV.isDeclaration())
378     return false;
379 
380   if (const Comdat *SC = GV.getComdat()) {
381     bool LinkFromSrc;
382     Comdat::SelectionKind SK;
383     std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
384     if (!LinkFromSrc)
385       return false;
386   }
387 
388   bool LinkFromSrc = true;
389   if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
390     return true;
391   if (LinkFromSrc)
392     ValuesToLink.insert(&GV);
393   return false;
394 }
395 
396 void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
397   // Add these to the internalize list
398   if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
399       !shouldLinkOnlyNeeded())
400     return;
401 
402   if (InternalizeCallback)
403     Internalize.insert(GV.getName());
404   Add(GV);
405 
406   const Comdat *SC = GV.getComdat();
407   if (!SC)
408     return;
409   for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
410     GlobalValue *DGV = getLinkedToGlobal(GV2);
411     bool LinkFromSrc = true;
412     if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
413       return;
414     if (!LinkFromSrc)
415       continue;
416     if (InternalizeCallback)
417       Internalize.insert(GV2->getName());
418     Add(*GV2);
419   }
420 }
421 
422 void ModuleLinker::dropReplacedComdat(
423     GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
424   Comdat *C = GV.getComdat();
425   if (!C)
426     return;
427   if (!ReplacedDstComdats.count(C))
428     return;
429   if (GV.use_empty()) {
430     GV.eraseFromParent();
431     return;
432   }
433 
434   if (auto *F = dyn_cast<Function>(&GV)) {
435     F->deleteBody();
436   } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
437     Var->setInitializer(nullptr);
438   } else {
439     auto &Alias = cast<GlobalAlias>(GV);
440     Module &M = *Alias.getParent();
441     PointerType &Ty = *cast<PointerType>(Alias.getType());
442     GlobalValue *Declaration;
443     if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
444       Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
445     } else {
446       Declaration =
447           new GlobalVariable(M, Ty.getElementType(), /*isConstant*/ false,
448                              GlobalValue::ExternalLinkage,
449                              /*Initializer*/ nullptr);
450     }
451     Declaration->takeName(&Alias);
452     Alias.replaceAllUsesWith(Declaration);
453     Alias.eraseFromParent();
454   }
455 }
456 
457 bool ModuleLinker::run() {
458   Module &DstM = Mover.getModule();
459   DenseSet<const Comdat *> ReplacedDstComdats;
460 
461   for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
462     const Comdat &C = SMEC.getValue();
463     if (ComdatsChosen.count(&C))
464       continue;
465     Comdat::SelectionKind SK;
466     bool LinkFromSrc;
467     if (getComdatResult(&C, SK, LinkFromSrc))
468       return true;
469     ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
470 
471     if (!LinkFromSrc)
472       continue;
473 
474     Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
475     Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
476     if (DstCI == ComdatSymTab.end())
477       continue;
478 
479     // The source comdat is replacing the dest one.
480     const Comdat *DstC = &DstCI->second;
481     ReplacedDstComdats.insert(DstC);
482   }
483 
484   // Alias have to go first, since we are not able to find their comdats
485   // otherwise.
486   for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
487     GlobalAlias &GV = *I++;
488     dropReplacedComdat(GV, ReplacedDstComdats);
489   }
490 
491   for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
492     GlobalVariable &GV = *I++;
493     dropReplacedComdat(GV, ReplacedDstComdats);
494   }
495 
496   for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
497     Function &GV = *I++;
498     dropReplacedComdat(GV, ReplacedDstComdats);
499   }
500 
501   for (GlobalVariable &GV : SrcM->globals())
502     if (GV.hasLinkOnceLinkage())
503       if (const Comdat *SC = GV.getComdat())
504         LazyComdatMembers[SC].push_back(&GV);
505 
506   for (Function &SF : *SrcM)
507     if (SF.hasLinkOnceLinkage())
508       if (const Comdat *SC = SF.getComdat())
509         LazyComdatMembers[SC].push_back(&SF);
510 
511   for (GlobalAlias &GA : SrcM->aliases())
512     if (GA.hasLinkOnceLinkage())
513       if (const Comdat *SC = GA.getComdat())
514         LazyComdatMembers[SC].push_back(&GA);
515 
516   // Insert all of the globals in src into the DstM module... without linking
517   // initializers (which could refer to functions not yet mapped over).
518   for (GlobalVariable &GV : SrcM->globals())
519     if (linkIfNeeded(GV))
520       return true;
521 
522   for (Function &SF : *SrcM)
523     if (linkIfNeeded(SF))
524       return true;
525 
526   for (GlobalAlias &GA : SrcM->aliases())
527     if (linkIfNeeded(GA))
528       return true;
529 
530   for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
531     GlobalValue *GV = ValuesToLink[I];
532     const Comdat *SC = GV->getComdat();
533     if (!SC)
534       continue;
535     for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
536       GlobalValue *DGV = getLinkedToGlobal(GV2);
537       bool LinkFromSrc = true;
538       if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
539         return true;
540       if (LinkFromSrc)
541         ValuesToLink.insert(GV2);
542     }
543   }
544 
545   if (InternalizeCallback) {
546     for (GlobalValue *GV : ValuesToLink)
547       Internalize.insert(GV->getName());
548   }
549 
550   // FIXME: Propagate Errors through to the caller instead of emitting
551   // diagnostics.
552   bool HasErrors = false;
553   if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
554                            [this](GlobalValue &GV, IRMover::ValueAdder Add) {
555                              addLazyFor(GV, Add);
556                            },
557                            /* IsPerformingImport */ false)) {
558     handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
559       DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
560       HasErrors = true;
561     });
562   }
563   if (HasErrors)
564     return true;
565 
566   if (InternalizeCallback)
567     InternalizeCallback(DstM, Internalize);
568 
569   return false;
570 }
571 
572 Linker::Linker(Module &M) : Mover(M) {}
573 
574 bool Linker::linkInModule(
575     std::unique_ptr<Module> Src, unsigned Flags,
576     std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
577   ModuleLinker ModLinker(Mover, std::move(Src), Flags,
578                          std::move(InternalizeCallback));
579   return ModLinker.run();
580 }
581 
582 //===----------------------------------------------------------------------===//
583 // LinkModules entrypoint.
584 //===----------------------------------------------------------------------===//
585 
586 /// This function links two modules together, with the resulting Dest module
587 /// modified to be the composite of the two input modules. If an error occurs,
588 /// true is returned and ErrorMsg (if not null) is set to indicate the problem.
589 /// Upon failure, the Dest module could be in a modified state, and shouldn't be
590 /// relied on to be consistent.
591 bool Linker::linkModules(
592     Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
593     std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
594   Linker L(Dest);
595   return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
596 }
597 
598 //===----------------------------------------------------------------------===//
599 // C API.
600 //===----------------------------------------------------------------------===//
601 
602 LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
603   Module *D = unwrap(Dest);
604   std::unique_ptr<Module> M(unwrap(Src));
605   return Linker::linkModules(*D, std::move(M));
606 }
607