1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalAlias.h"
25 #include "llvm/IR/GlobalObject.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/Metadata.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/ModuleSummaryIndex.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/Linker/IRMover.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module'"));
129 
130 /// Summary file to use for function importing when using -function-import from
131 /// the command line.
132 static cl::opt<std::string>
133     SummaryFile("summary-file",
134                 cl::desc("The summary file to use for function importing."));
135 
136 /// Used when testing importing from distributed indexes via opt
137 // -function-import.
138 static cl::opt<bool>
139     ImportAllIndex("import-all-index",
140                    cl::desc("Import all external functions in index."));
141 
142 /// Pass a workload description file - an example of workload would be the
143 /// functions executed to satisfy a RPC request. A workload is defined by a root
144 /// function and the list of functions that are (frequently) needed to satisfy
145 /// it. The module that defines the root will have all those functions imported.
146 /// The file contains a JSON dictionary. The keys are root functions, the values
147 /// are lists of functions to import in the module defining the root. It is
148 /// assumed -funique-internal-linkage-names was used, thus ensuring function
149 /// names are unique even for local linkage ones.
150 static cl::opt<std::string> WorkloadDefinitions(
151     "thinlto-workload-def",
152     cl::desc("Pass a workload definition. This is a file containing a JSON "
153              "dictionary. The keys are root functions, the values are lists of "
154              "functions to import in the module defining the root. It is "
155              "assumed -funique-internal-linkage-names was used, to ensure "
156              "local linkage functions have unique names. For example: \n"
157              "{\n"
158              "  \"rootFunction_1\": [\"function_to_import_1\", "
159              "\"function_to_import_2\"], \n"
160              "  \"rootFunction_2\": [\"function_to_import_3\", "
161              "\"function_to_import_4\"] \n"
162              "}"),
163     cl::Hidden);
164 
165 // Load lazily a module from \p FileName in \p Context.
loadFile(const std::string & FileName,LLVMContext & Context)166 static std::unique_ptr<Module> loadFile(const std::string &FileName,
167                                         LLVMContext &Context) {
168   SMDiagnostic Err;
169   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
170   // Metadata isn't loaded until functions are imported, to minimize
171   // the memory overhead.
172   std::unique_ptr<Module> Result =
173       getLazyIRFileModule(FileName, Err, Context,
174                           /* ShouldLazyLoadMetadata = */ true);
175   if (!Result) {
176     Err.print("function-import", errs());
177     report_fatal_error("Abort");
178   }
179 
180   return Result;
181 }
182 
183 /// Given a list of possible callee implementation for a call site, qualify the
184 /// legality of importing each. The return is a range of pairs. Each pair
185 /// corresponds to a candidate. The first value is the ImportFailureReason for
186 /// that candidate, the second is the candidate.
qualifyCalleeCandidates(const ModuleSummaryIndex & Index,ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,StringRef CallerModulePath)187 static auto qualifyCalleeCandidates(
188     const ModuleSummaryIndex &Index,
189     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
190     StringRef CallerModulePath) {
191   return llvm::map_range(
192       CalleeSummaryList,
193       [&Index, CalleeSummaryList,
194        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
195           -> std::pair<FunctionImporter::ImportFailureReason,
196                        const GlobalValueSummary *> {
197         auto *GVSummary = SummaryPtr.get();
198         if (!Index.isGlobalValueLive(GVSummary))
199           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
200 
201         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
202           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
203                   GVSummary};
204 
205         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
206 
207         // Ignore any callees that aren't actually functions. This could happen
208         // in the case of GUID hash collisions. It could also happen in theory
209         // for SamplePGO profiles collected on old versions of the code after
210         // renaming, since we synthesize edges to any inlined callees appearing
211         // in the profile.
212         if (!Summary)
213           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
214 
215         // If this is a local function, make sure we import the copy
216         // in the caller's module. The only time a local function can
217         // share an entry in the index is if there is a local with the same name
218         // in another module that had the same source file name (in a different
219         // directory), where each was compiled in their own directory so there
220         // was not distinguishing path.
221         // However, do the import from another module if there is only one
222         // entry in the list - in that case this must be a reference due
223         // to indirect call profile data, since a function pointer can point to
224         // a local in another module.
225         if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
226             CalleeSummaryList.size() > 1 &&
227             Summary->modulePath() != CallerModulePath)
228           return {
229               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
230               GVSummary};
231 
232         // Skip if it isn't legal to import (e.g. may reference unpromotable
233         // locals).
234         if (Summary->notEligibleToImport())
235           return {FunctionImporter::ImportFailureReason::NotEligible,
236                   GVSummary};
237 
238         return {FunctionImporter::ImportFailureReason::None, GVSummary};
239       });
240 }
241 
242 /// Given a list of possible callee implementation for a call site, select one
243 /// that fits the \p Threshold. If none are found, the Reason will give the last
244 /// reason for the failure (last, in the order of CalleeSummaryList entries).
245 ///
246 /// FIXME: select "best" instead of first that fits. But what is "best"?
247 /// - The smallest: more likely to be inlined.
248 /// - The one with the least outgoing edges (already well optimized).
249 /// - One from a module already being imported from in order to reduce the
250 ///   number of source modules parsed/linked.
251 /// - One that has PGO data attached.
252 /// - [insert you fancy metric here]
253 static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex & Index,ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,unsigned Threshold,StringRef CallerModulePath,FunctionImporter::ImportFailureReason & Reason)254 selectCallee(const ModuleSummaryIndex &Index,
255              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
256              unsigned Threshold, StringRef CallerModulePath,
257              FunctionImporter::ImportFailureReason &Reason) {
258   auto QualifiedCandidates =
259       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
260   for (auto QualifiedValue : QualifiedCandidates) {
261     Reason = QualifiedValue.first;
262     if (Reason != FunctionImporter::ImportFailureReason::None)
263       continue;
264     auto *Summary =
265         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
266 
267     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
268         !ForceImportAll) {
269       Reason = FunctionImporter::ImportFailureReason::TooLarge;
270       continue;
271     }
272 
273     // Don't bother importing if we can't inline it anyway.
274     if (Summary->fflags().NoInline && !ForceImportAll) {
275       Reason = FunctionImporter::ImportFailureReason::NoInline;
276       continue;
277     }
278 
279     return Summary;
280   }
281   return nullptr;
282 }
283 
284 namespace {
285 
286 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
287 
288 } // anonymous namespace
289 
290 /// Import globals referenced by a function or other globals that are being
291 /// imported, if importing such global is possible.
292 class GlobalsImporter final {
293   const ModuleSummaryIndex &Index;
294   const GVSummaryMapTy &DefinedGVSummaries;
295   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
296       IsPrevailing;
297   FunctionImporter::ImportMapTy &ImportList;
298   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
299 
shouldImportGlobal(const ValueInfo & VI)300   bool shouldImportGlobal(const ValueInfo &VI) {
301     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
302     if (GVS == DefinedGVSummaries.end())
303       return true;
304     // We should not skip import if the module contains a non-prevailing
305     // definition with interposable linkage type. This is required for
306     // correctness in the situation where there is a prevailing def available
307     // for import and marked read-only. In this case, the non-prevailing def
308     // will be converted to a declaration, while the prevailing one becomes
309     // internal, thus no definitions will be available for linking. In order to
310     // prevent undefined symbol link error, the prevailing definition must be
311     // imported.
312     // FIXME: Consider adding a check that the suitable prevailing definition
313     // exists and marked read-only.
314     if (VI.getSummaryList().size() > 1 &&
315         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
316         !IsPrevailing(VI.getGUID(), GVS->second))
317       return true;
318 
319     return false;
320   }
321 
322   void
onImportingSummaryImpl(const GlobalValueSummary & Summary,SmallVectorImpl<const GlobalVarSummary * > & Worklist)323   onImportingSummaryImpl(const GlobalValueSummary &Summary,
324                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
325     for (const auto &VI : Summary.refs()) {
326       if (!shouldImportGlobal(VI)) {
327         LLVM_DEBUG(
328             dbgs() << "Ref ignored! Target already in destination module.\n");
329         continue;
330       }
331 
332       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
333 
334       // If this is a local variable, make sure we import the copy
335       // in the caller's module. The only time a local variable can
336       // share an entry in the index is if there is a local with the same name
337       // in another module that had the same source file name (in a different
338       // directory), where each was compiled in their own directory so there
339       // was not distinguishing path.
340       auto LocalNotInModule =
341           [&](const GlobalValueSummary *RefSummary) -> bool {
342         return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
343                RefSummary->modulePath() != Summary.modulePath();
344       };
345 
346       for (const auto &RefSummary : VI.getSummaryList()) {
347         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
348         // Functions could be referenced by global vars - e.g. a vtable; but we
349         // don't currently imagine a reason those would be imported here, rather
350         // than as part of the logic deciding which functions to import (i.e.
351         // based on profile information). Should we decide to handle them here,
352         // we can refactor accordingly at that time.
353         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
354             LocalNotInModule(GVS))
355           continue;
356         auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
357         // Only update stat and exports if we haven't already imported this
358         // variable.
359         if (!ILI.second)
360           break;
361         NumImportedGlobalVarsThinLink++;
362         // Any references made by this variable will be marked exported
363         // later, in ComputeCrossModuleImport, after import decisions are
364         // complete, which is more efficient than adding them here.
365         if (ExportLists)
366           (*ExportLists)[RefSummary->modulePath()].insert(VI);
367 
368         // If variable is not writeonly we attempt to recursively analyze
369         // its references in order to import referenced constants.
370         if (!Index.isWriteOnly(GVS))
371           Worklist.emplace_back(GVS);
372         break;
373       }
374     }
375   }
376 
377 public:
GlobalsImporter(const ModuleSummaryIndex & Index,const GVSummaryMapTy & DefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,FunctionImporter::ImportMapTy & ImportList,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)378   GlobalsImporter(
379       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
380       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
381           IsPrevailing,
382       FunctionImporter::ImportMapTy &ImportList,
383       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
384       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
385         IsPrevailing(IsPrevailing), ImportList(ImportList),
386         ExportLists(ExportLists) {}
387 
onImportingSummary(const GlobalValueSummary & Summary)388   void onImportingSummary(const GlobalValueSummary &Summary) {
389     SmallVector<const GlobalVarSummary *, 128> Worklist;
390     onImportingSummaryImpl(Summary, Worklist);
391     while (!Worklist.empty())
392       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
393   }
394 };
395 
396 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
397 
398 /// Determine the list of imports and exports for each module.
399 class ModuleImportsManager {
400 protected:
401   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
402       IsPrevailing;
403   const ModuleSummaryIndex &Index;
404   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
405 
ModuleImportsManager(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists=nullptr)406   ModuleImportsManager(
407       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
408           IsPrevailing,
409       const ModuleSummaryIndex &Index,
410       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
411       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
412 
413 public:
414   virtual ~ModuleImportsManager() = default;
415 
416   /// Given the list of globals defined in a module, compute the list of imports
417   /// as well as the list of "exports", i.e. the list of symbols referenced from
418   /// another module (that may require promotion).
419   virtual void
420   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
421                          StringRef ModName,
422                          FunctionImporter::ImportMapTy &ImportList);
423 
424   static std::unique_ptr<ModuleImportsManager>
425   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
426              IsPrevailing,
427          const ModuleSummaryIndex &Index,
428          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
429              nullptr);
430 };
431 
432 /// A ModuleImportsManager that operates based on a workload definition (see
433 /// -thinlto-workload-def). For modules that do not define workload roots, it
434 /// applies the base ModuleImportsManager import policy.
435 class WorkloadImportsManager : public ModuleImportsManager {
436   // Keep a module name -> value infos to import association. We use it to
437   // determine if a module's import list should be done by the base
438   // ModuleImportsManager or by us.
439   StringMap<DenseSet<ValueInfo>> Workloads;
440 
441   void
computeImportForModule(const GVSummaryMapTy & DefinedGVSummaries,StringRef ModName,FunctionImporter::ImportMapTy & ImportList)442   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
443                          StringRef ModName,
444                          FunctionImporter::ImportMapTy &ImportList) override {
445     auto SetIter = Workloads.find(ModName);
446     if (SetIter == Workloads.end()) {
447       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
448                         << " does not contain the root of any context.\n");
449       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
450                                                           ModName, ImportList);
451     }
452     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
453                       << " contains the root(s) of context(s).\n");
454 
455     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
456                         ExportLists);
457     auto &ValueInfos = SetIter->second;
458     SmallVector<EdgeInfo, 128> GlobWorklist;
459     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
460       auto It = DefinedGVSummaries.find(VI.getGUID());
461       if (It != DefinedGVSummaries.end() &&
462           IsPrevailing(VI.getGUID(), It->second)) {
463         LLVM_DEBUG(
464             dbgs() << "[Workload] " << VI.name()
465                    << " has the prevailing variant already in the module "
466                    << ModName << ". No need to import\n");
467         continue;
468       }
469       auto Candidates =
470           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
471 
472       const GlobalValueSummary *GVS = nullptr;
473       auto PotentialCandidates = llvm::map_range(
474           llvm::make_filter_range(
475               Candidates,
476               [&](const auto &Candidate) {
477                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
478                                   << " from " << Candidate.second->modulePath()
479                                   << " ImportFailureReason: "
480                                   << getFailureName(Candidate.first) << "\n");
481                 return Candidate.first ==
482                         FunctionImporter::ImportFailureReason::None;
483               }),
484           [](const auto &Candidate) { return Candidate.second; });
485       if (PotentialCandidates.empty()) {
486         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
487                           << " because can't find eligible Callee. Guid is: "
488                           << Function::getGUID(VI.name()) << "\n");
489         continue;
490       }
491       /// We will prefer importing the prevailing candidate, if not, we'll
492       /// still pick the first available candidate. The reason we want to make
493       /// sure we do import the prevailing candidate is because the goal of
494       /// workload-awareness is to enable optimizations specializing the call
495       /// graph of that workload. Suppose a function is already defined in the
496       /// module, but it's not the prevailing variant. Suppose also we do not
497       /// inline it (in fact, if it were interposable, we can't inline it),
498       /// but we could specialize it to the workload in other ways. However,
499       /// the linker would drop it in the favor of the prevailing copy.
500       /// Instead, by importing the prevailing variant (assuming also the use
501       /// of `-avail-extern-to-local`), we keep the specialization. We could
502       /// alteranatively make the non-prevailing variant local, but the
503       /// prevailing one is also the one for which we would have previously
504       /// collected profiles, making it preferrable.
505       auto PrevailingCandidates = llvm::make_filter_range(
506           PotentialCandidates, [&](const auto *Candidate) {
507             return IsPrevailing(VI.getGUID(), Candidate);
508           });
509       if (PrevailingCandidates.empty()) {
510         GVS = *PotentialCandidates.begin();
511         if (!llvm::hasSingleElement(PotentialCandidates) &&
512             GlobalValue::isLocalLinkage(GVS->linkage()))
513           LLVM_DEBUG(
514               dbgs()
515               << "[Workload] Found multiple non-prevailing candidates for "
516               << VI.name()
517               << ". This is unexpected. Are module paths passed to the "
518                  "compiler unique for the modules passed to the linker?");
519         // We could in theory have multiple (interposable) copies of a symbol
520         // when there is no prevailing candidate, if say the prevailing copy was
521         // in a native object being linked in. However, we should in theory be
522         // marking all of these non-prevailing IR copies dead in that case, in
523         // which case they won't be candidates.
524         assert(GVS->isLive());
525       } else {
526         assert(llvm::hasSingleElement(PrevailingCandidates));
527         GVS = *PrevailingCandidates.begin();
528       }
529 
530       auto ExportingModule = GVS->modulePath();
531       // We checked that for the prevailing case, but if we happen to have for
532       // example an internal that's defined in this module, it'd have no
533       // PrevailingCandidates.
534       if (ExportingModule == ModName) {
535         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
536                           << " because its defining module is the same as the "
537                              "current module\n");
538         continue;
539       }
540       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
541                         << ExportingModule << " : "
542                         << Function::getGUID(VI.name()) << "\n");
543       ImportList[ExportingModule].insert(VI.getGUID());
544       GVI.onImportingSummary(*GVS);
545       if (ExportLists)
546         (*ExportLists)[ExportingModule].insert(VI);
547     }
548     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
549   }
550 
551 public:
WorkloadImportsManager(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)552   WorkloadImportsManager(
553       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
554           IsPrevailing,
555       const ModuleSummaryIndex &Index,
556       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
557       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
558     // Since the workload def uses names, we need a quick lookup
559     // name->ValueInfo.
560     StringMap<ValueInfo> NameToValueInfo;
561     StringSet<> AmbiguousNames;
562     for (auto &I : Index) {
563       ValueInfo VI = Index.getValueInfo(I);
564       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
565         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
566     }
567     auto DbgReportIfAmbiguous = [&](StringRef Name) {
568       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
569         dbgs() << "[Workload] Function name " << Name
570                << " present in the workload definition is ambiguous. Consider "
571                   "compiling with -funique-internal-linkage-names.";
572       });
573     };
574     std::error_code EC;
575     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
576     if (std::error_code EC = BufferOrErr.getError()) {
577       report_fatal_error("Failed to open context file");
578       return;
579     }
580     auto Buffer = std::move(BufferOrErr.get());
581     std::map<std::string, std::vector<std::string>> WorkloadDefs;
582     json::Path::Root NullRoot;
583     // The JSON is supposed to contain a dictionary matching the type of
584     // WorkloadDefs. For example:
585     // {
586     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
587     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
588     // }
589     auto Parsed = json::parse(Buffer->getBuffer());
590     if (!Parsed)
591       report_fatal_error(Parsed.takeError());
592     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
593       report_fatal_error("Invalid thinlto contextual profile format.");
594     for (const auto &Workload : WorkloadDefs) {
595       const auto &Root = Workload.first;
596       DbgReportIfAmbiguous(Root);
597       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
598       const auto &AllCallees = Workload.second;
599       auto RootIt = NameToValueInfo.find(Root);
600       if (RootIt == NameToValueInfo.end()) {
601         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
602                           << " not found in this linkage unit.\n");
603         continue;
604       }
605       auto RootVI = RootIt->second;
606       if (RootVI.getSummaryList().size() != 1) {
607         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
608                           << " should have exactly one summary, but has "
609                           << RootVI.getSummaryList().size() << ". Skipping.\n");
610         continue;
611       }
612       StringRef RootDefiningModule =
613           RootVI.getSummaryList().front()->modulePath();
614       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
615                         << " is : " << RootDefiningModule << "\n");
616       auto &Set = Workloads[RootDefiningModule];
617       for (const auto &Callee : AllCallees) {
618         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
619         DbgReportIfAmbiguous(Callee);
620         auto ElemIt = NameToValueInfo.find(Callee);
621         if (ElemIt == NameToValueInfo.end()) {
622           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
623           continue;
624         }
625         Set.insert(ElemIt->second);
626       }
627       LLVM_DEBUG({
628         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
629                << " distinct callees.\n";
630         for (const auto &VI : Set) {
631           dbgs() << "[Workload] Root: " << Root
632                  << " Would include: " << VI.getGUID() << "\n";
633         }
634       });
635     }
636   }
637 };
638 
create(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)639 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
640     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
641         IsPrevailing,
642     const ModuleSummaryIndex &Index,
643     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
644   if (WorkloadDefinitions.empty()) {
645     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
646     return std::unique_ptr<ModuleImportsManager>(
647         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
648   }
649   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
650   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
651                                                   ExportLists);
652 }
653 
654 static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason)655 getFailureName(FunctionImporter::ImportFailureReason Reason) {
656   switch (Reason) {
657   case FunctionImporter::ImportFailureReason::None:
658     return "None";
659   case FunctionImporter::ImportFailureReason::GlobalVar:
660     return "GlobalVar";
661   case FunctionImporter::ImportFailureReason::NotLive:
662     return "NotLive";
663   case FunctionImporter::ImportFailureReason::TooLarge:
664     return "TooLarge";
665   case FunctionImporter::ImportFailureReason::InterposableLinkage:
666     return "InterposableLinkage";
667   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
668     return "LocalLinkageNotInModule";
669   case FunctionImporter::ImportFailureReason::NotEligible:
670     return "NotEligible";
671   case FunctionImporter::ImportFailureReason::NoInline:
672     return "NoInline";
673   }
674   llvm_unreachable("invalid reason");
675 }
676 
677 /// Compute the list of functions to import for a given caller. Mark these
678 /// imported functions and the symbols they reference in their source module as
679 /// exported from their source module.
computeImportForFunction(const FunctionSummary & Summary,const ModuleSummaryIndex & Index,const unsigned Threshold,const GVSummaryMapTy & DefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,SmallVectorImpl<EdgeInfo> & Worklist,GlobalsImporter & GVImporter,FunctionImporter::ImportMapTy & ImportList,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists,FunctionImporter::ImportThresholdsTy & ImportThresholds)680 static void computeImportForFunction(
681     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
682     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
683     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
684         isPrevailing,
685     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
686     FunctionImporter::ImportMapTy &ImportList,
687     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
688     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
689   GVImporter.onImportingSummary(Summary);
690   static int ImportCount = 0;
691   for (const auto &Edge : Summary.calls()) {
692     ValueInfo VI = Edge.first;
693     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
694                       << "\n");
695 
696     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
697       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
698                         << " reached.\n");
699       continue;
700     }
701 
702     if (DefinedGVSummaries.count(VI.getGUID())) {
703       // FIXME: Consider not skipping import if the module contains
704       // a non-prevailing def with interposable linkage. The prevailing copy
705       // can safely be imported (see shouldImportGlobal()).
706       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
707       continue;
708     }
709 
710     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
711       if (Hotness == CalleeInfo::HotnessType::Hot)
712         return ImportHotMultiplier;
713       if (Hotness == CalleeInfo::HotnessType::Cold)
714         return ImportColdMultiplier;
715       if (Hotness == CalleeInfo::HotnessType::Critical)
716         return ImportCriticalMultiplier;
717       return 1.0;
718     };
719 
720     const auto NewThreshold =
721         Threshold * GetBonusMultiplier(Edge.second.getHotness());
722 
723     auto IT = ImportThresholds.insert(std::make_pair(
724         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
725     bool PreviouslyVisited = !IT.second;
726     auto &ProcessedThreshold = std::get<0>(IT.first->second);
727     auto &CalleeSummary = std::get<1>(IT.first->second);
728     auto &FailureInfo = std::get<2>(IT.first->second);
729 
730     bool IsHotCallsite =
731         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
732     bool IsCriticalCallsite =
733         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
734 
735     const FunctionSummary *ResolvedCalleeSummary = nullptr;
736     if (CalleeSummary) {
737       assert(PreviouslyVisited);
738       // Since the traversal of the call graph is DFS, we can revisit a function
739       // a second time with a higher threshold. In this case, it is added back
740       // to the worklist with the new threshold (so that its own callee chains
741       // can be considered with the higher threshold).
742       if (NewThreshold <= ProcessedThreshold) {
743         LLVM_DEBUG(
744             dbgs() << "ignored! Target was already imported with Threshold "
745                    << ProcessedThreshold << "\n");
746         continue;
747       }
748       // Update with new larger threshold.
749       ProcessedThreshold = NewThreshold;
750       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
751     } else {
752       // If we already rejected importing a callee at the same or higher
753       // threshold, don't waste time calling selectCallee.
754       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
755         LLVM_DEBUG(
756             dbgs() << "ignored! Target was already rejected with Threshold "
757             << ProcessedThreshold << "\n");
758         if (PrintImportFailures) {
759           assert(FailureInfo &&
760                  "Expected FailureInfo for previously rejected candidate");
761           FailureInfo->Attempts++;
762         }
763         continue;
764       }
765 
766       FunctionImporter::ImportFailureReason Reason{};
767       CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
768                                    Summary.modulePath(), Reason);
769       if (!CalleeSummary) {
770         // Update with new larger threshold if this was a retry (otherwise
771         // we would have already inserted with NewThreshold above). Also
772         // update failure info if requested.
773         if (PreviouslyVisited) {
774           ProcessedThreshold = NewThreshold;
775           if (PrintImportFailures) {
776             assert(FailureInfo &&
777                    "Expected FailureInfo for previously rejected candidate");
778             FailureInfo->Reason = Reason;
779             FailureInfo->Attempts++;
780             FailureInfo->MaxHotness =
781                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
782           }
783         } else if (PrintImportFailures) {
784           assert(!FailureInfo &&
785                  "Expected no FailureInfo for newly rejected candidate");
786           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
787               VI, Edge.second.getHotness(), Reason, 1);
788         }
789         if (ForceImportAll) {
790           std::string Msg = std::string("Failed to import function ") +
791                             VI.name().str() + " due to " +
792                             getFailureName(Reason);
793           auto Error = make_error<StringError>(
794               Msg, make_error_code(errc::not_supported));
795           logAllUnhandledErrors(std::move(Error), errs(),
796                                 "Error importing module: ");
797           break;
798         } else {
799           LLVM_DEBUG(dbgs()
800                      << "ignored! No qualifying callee with summary found.\n");
801           continue;
802         }
803       }
804 
805       // "Resolve" the summary
806       CalleeSummary = CalleeSummary->getBaseObject();
807       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
808 
809       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
810               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
811              "selectCallee() didn't honor the threshold");
812 
813       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
814       auto ILI = ImportList[ExportModulePath].insert(VI.getGUID());
815       // We previously decided to import this GUID definition if it was already
816       // inserted in the set of imports from the exporting module.
817       bool PreviouslyImported = !ILI.second;
818       if (!PreviouslyImported) {
819         NumImportedFunctionsThinLink++;
820         if (IsHotCallsite)
821           NumImportedHotFunctionsThinLink++;
822         if (IsCriticalCallsite)
823           NumImportedCriticalFunctionsThinLink++;
824       }
825 
826       // Any calls/references made by this function will be marked exported
827       // later, in ComputeCrossModuleImport, after import decisions are
828       // complete, which is more efficient than adding them here.
829       if (ExportLists)
830         (*ExportLists)[ExportModulePath].insert(VI);
831     }
832 
833     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
834       // Adjust the threshold for next level of imported functions.
835       // The threshold is different for hot callsites because we can then
836       // inline chains of hot calls.
837       if (IsHotCallsite)
838         return Threshold * ImportHotInstrFactor;
839       return Threshold * ImportInstrFactor;
840     };
841 
842     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
843 
844     ImportCount++;
845 
846     // Insert the newly imported function to the worklist.
847     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
848   }
849 }
850 
computeImportForModule(const GVSummaryMapTy & DefinedGVSummaries,StringRef ModName,FunctionImporter::ImportMapTy & ImportList)851 void ModuleImportsManager::computeImportForModule(
852     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
853     FunctionImporter::ImportMapTy &ImportList) {
854   // Worklist contains the list of function imported in this module, for which
855   // we will analyse the callees and may import further down the callgraph.
856   SmallVector<EdgeInfo, 128> Worklist;
857   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
858                       ExportLists);
859   FunctionImporter::ImportThresholdsTy ImportThresholds;
860 
861   // Populate the worklist with the import for the functions in the current
862   // module
863   for (const auto &GVSummary : DefinedGVSummaries) {
864 #ifndef NDEBUG
865     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
866     // so this map look up (and possibly others) can be avoided.
867     auto VI = Index.getValueInfo(GVSummary.first);
868 #endif
869     if (!Index.isGlobalValueLive(GVSummary.second)) {
870       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
871       continue;
872     }
873     auto *FuncSummary =
874         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
875     if (!FuncSummary)
876       // Skip import for global variables
877       continue;
878     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
879     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
880                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
881                              ImportList, ExportLists, ImportThresholds);
882   }
883 
884   // Process the newly imported functions and add callees to the worklist.
885   while (!Worklist.empty()) {
886     auto GVInfo = Worklist.pop_back_val();
887     auto *Summary = std::get<0>(GVInfo);
888     auto Threshold = std::get<1>(GVInfo);
889 
890     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
891       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
892                                IsPrevailing, Worklist, GVI, ImportList,
893                                ExportLists, ImportThresholds);
894   }
895 
896   // Print stats about functions considered but rejected for importing
897   // when requested.
898   if (PrintImportFailures) {
899     dbgs() << "Missed imports into module " << ModName << "\n";
900     for (auto &I : ImportThresholds) {
901       auto &ProcessedThreshold = std::get<0>(I.second);
902       auto &CalleeSummary = std::get<1>(I.second);
903       auto &FailureInfo = std::get<2>(I.second);
904       if (CalleeSummary)
905         continue; // We are going to import.
906       assert(FailureInfo);
907       FunctionSummary *FS = nullptr;
908       if (!FailureInfo->VI.getSummaryList().empty())
909         FS = dyn_cast<FunctionSummary>(
910             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
911       dbgs() << FailureInfo->VI
912              << ": Reason = " << getFailureName(FailureInfo->Reason)
913              << ", Threshold = " << ProcessedThreshold
914              << ", Size = " << (FS ? (int)FS->instCount() : -1)
915              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
916              << ", Attempts = " << FailureInfo->Attempts << "\n";
917     }
918   }
919 }
920 
921 #ifndef NDEBUG
isGlobalVarSummary(const ModuleSummaryIndex & Index,ValueInfo VI)922 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
923   auto SL = VI.getSummaryList();
924   return SL.empty()
925              ? false
926              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
927 }
928 
isGlobalVarSummary(const ModuleSummaryIndex & Index,GlobalValue::GUID G)929 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
930                                GlobalValue::GUID G) {
931   if (const auto &VI = Index.getValueInfo(G))
932     return isGlobalVarSummary(Index, VI);
933   return false;
934 }
935 
936 template <class T>
numGlobalVarSummaries(const ModuleSummaryIndex & Index,T & Cont)937 static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
938                                       T &Cont) {
939   unsigned NumGVS = 0;
940   for (auto &V : Cont)
941     if (isGlobalVarSummary(Index, V))
942       ++NumGVS;
943   return NumGVS;
944 }
945 #endif
946 
947 #ifndef NDEBUG
checkVariableImport(const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ImportMapTy> & ImportLists,DenseMap<StringRef,FunctionImporter::ExportSetTy> & ExportLists)948 static bool checkVariableImport(
949     const ModuleSummaryIndex &Index,
950     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
951     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
952 
953   DenseSet<GlobalValue::GUID> FlattenedImports;
954 
955   for (auto &ImportPerModule : ImportLists)
956     for (auto &ExportPerModule : ImportPerModule.second)
957       FlattenedImports.insert(ExportPerModule.second.begin(),
958                               ExportPerModule.second.end());
959 
960   // Checks that all GUIDs of read/writeonly vars we see in export lists
961   // are also in the import lists. Otherwise we my face linker undefs,
962   // because readonly and writeonly vars are internalized in their
963   // source modules. The exception would be if it has a linkage type indicating
964   // that there may have been a copy existing in the importing module (e.g.
965   // linkonce_odr). In that case we cannot accurately do this checking.
966   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
967                                                   const ValueInfo &VI) {
968     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
969         Index.findSummaryInModule(VI, ModulePath));
970     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
971            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
972              GVS->linkage() == GlobalValue::WeakODRLinkage ||
973              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
974   };
975 
976   for (auto &ExportPerModule : ExportLists)
977     for (auto &VI : ExportPerModule.second)
978       if (!FlattenedImports.count(VI.getGUID()) &&
979           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
980         return false;
981 
982   return true;
983 }
984 #endif
985 
986 /// Compute all the import and export for every module using the Index.
ComputeCrossModuleImport(const ModuleSummaryIndex & Index,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,DenseMap<StringRef,FunctionImporter::ImportMapTy> & ImportLists,DenseMap<StringRef,FunctionImporter::ExportSetTy> & ExportLists)987 void llvm::ComputeCrossModuleImport(
988     const ModuleSummaryIndex &Index,
989     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
990     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
991         isPrevailing,
992     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
993     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
994   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
995   // For each module that has function defined, compute the import/export lists.
996   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
997     auto &ImportList = ImportLists[DefinedGVSummaries.first];
998     LLVM_DEBUG(dbgs() << "Computing import for Module '"
999                       << DefinedGVSummaries.first << "'\n");
1000     MIS->computeImportForModule(DefinedGVSummaries.second,
1001                                 DefinedGVSummaries.first, ImportList);
1002   }
1003 
1004   // When computing imports we only added the variables and functions being
1005   // imported to the export list. We also need to mark any references and calls
1006   // they make as exported as well. We do this here, as it is more efficient
1007   // since we may import the same values multiple times into different modules
1008   // during the import computation.
1009   for (auto &ELI : ExportLists) {
1010     FunctionImporter::ExportSetTy NewExports;
1011     const auto &DefinedGVSummaries =
1012         ModuleToDefinedGVSummaries.lookup(ELI.first);
1013     for (auto &EI : ELI.second) {
1014       // Find the copy defined in the exporting module so that we can mark the
1015       // values it references in that specific definition as exported.
1016       // Below we will add all references and called values, without regard to
1017       // whether they are also defined in this module. We subsequently prune the
1018       // list to only include those defined in the exporting module, see comment
1019       // there as to why.
1020       auto DS = DefinedGVSummaries.find(EI.getGUID());
1021       // Anything marked exported during the import computation must have been
1022       // defined in the exporting module.
1023       assert(DS != DefinedGVSummaries.end());
1024       auto *S = DS->getSecond();
1025       S = S->getBaseObject();
1026       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1027         // Export referenced functions and variables. We don't export/promote
1028         // objects referenced by writeonly variable initializer, because
1029         // we convert such variables initializers to "zeroinitializer".
1030         // See processGlobalForThinLTO.
1031         if (!Index.isWriteOnly(GVS))
1032           for (const auto &VI : GVS->refs())
1033             NewExports.insert(VI);
1034       } else {
1035         auto *FS = cast<FunctionSummary>(S);
1036         for (const auto &Edge : FS->calls())
1037           NewExports.insert(Edge.first);
1038         for (const auto &Ref : FS->refs())
1039           NewExports.insert(Ref);
1040       }
1041     }
1042     // Prune list computed above to only include values defined in the exporting
1043     // module. We do this after the above insertion since we may hit the same
1044     // ref/call target multiple times in above loop, and it is more efficient to
1045     // avoid a set lookup each time.
1046     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1047       if (!DefinedGVSummaries.count(EI->getGUID()))
1048         NewExports.erase(EI++);
1049       else
1050         ++EI;
1051     }
1052     ELI.second.insert(NewExports.begin(), NewExports.end());
1053   }
1054 
1055   assert(checkVariableImport(Index, ImportLists, ExportLists));
1056 #ifndef NDEBUG
1057   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1058                     << " modules:\n");
1059   for (auto &ModuleImports : ImportLists) {
1060     auto ModName = ModuleImports.first;
1061     auto &Exports = ExportLists[ModName];
1062     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1063     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1064                       << Exports.size() - NumGVS << " functions and " << NumGVS
1065                       << " vars. Imports from " << ModuleImports.second.size()
1066                       << " modules.\n");
1067     for (auto &Src : ModuleImports.second) {
1068       auto SrcModName = Src.first;
1069       unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second);
1070       LLVM_DEBUG(dbgs() << " - " << Src.second.size() - NumGVSPerMod
1071                         << " functions imported from " << SrcModName << "\n");
1072       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1073                         << " global vars imported from " << SrcModName << "\n");
1074     }
1075   }
1076 #endif
1077 }
1078 
1079 #ifndef NDEBUG
dumpImportListForModule(const ModuleSummaryIndex & Index,StringRef ModulePath,FunctionImporter::ImportMapTy & ImportList)1080 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1081                                     StringRef ModulePath,
1082                                     FunctionImporter::ImportMapTy &ImportList) {
1083   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1084                     << ImportList.size() << " modules.\n");
1085   for (auto &Src : ImportList) {
1086     auto SrcModName = Src.first;
1087     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second);
1088     LLVM_DEBUG(dbgs() << " - " << Src.second.size() - NumGVSPerMod
1089                       << " functions imported from " << SrcModName << "\n");
1090     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1091                       << SrcModName << "\n");
1092   }
1093 }
1094 #endif
1095 
1096 /// Compute all the imports for the given module using the Index.
1097 ///
1098 /// \p isPrevailing is a callback that will be called with a global value's GUID
1099 /// and summary and should return whether the module corresponding to the
1100 /// summary contains the linker-prevailing copy of that value.
1101 ///
1102 /// \p ImportList will be populated with a map that can be passed to
1103 /// FunctionImporter::importFunctions() above (see description there).
ComputeCrossModuleImportForModuleForTest(StringRef ModulePath,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,const ModuleSummaryIndex & Index,FunctionImporter::ImportMapTy & ImportList)1104 static void ComputeCrossModuleImportForModuleForTest(
1105     StringRef ModulePath,
1106     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1107         isPrevailing,
1108     const ModuleSummaryIndex &Index,
1109     FunctionImporter::ImportMapTy &ImportList) {
1110   // Collect the list of functions this module defines.
1111   // GUID -> Summary
1112   GVSummaryMapTy FunctionSummaryMap;
1113   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1114 
1115   // Compute the import list for this module.
1116   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1117   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1118   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1119 
1120 #ifndef NDEBUG
1121   dumpImportListForModule(Index, ModulePath, ImportList);
1122 #endif
1123 }
1124 
1125 /// Mark all external summaries in \p Index for import into the given module.
1126 /// Used for testing the case of distributed builds using a distributed index.
1127 ///
1128 /// \p ImportList will be populated with a map that can be passed to
1129 /// FunctionImporter::importFunctions() above (see description there).
ComputeCrossModuleImportForModuleFromIndexForTest(StringRef ModulePath,const ModuleSummaryIndex & Index,FunctionImporter::ImportMapTy & ImportList)1130 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1131     StringRef ModulePath, const ModuleSummaryIndex &Index,
1132     FunctionImporter::ImportMapTy &ImportList) {
1133   for (const auto &GlobalList : Index) {
1134     // Ignore entries for undefined references.
1135     if (GlobalList.second.SummaryList.empty())
1136       continue;
1137 
1138     auto GUID = GlobalList.first;
1139     assert(GlobalList.second.SummaryList.size() == 1 &&
1140            "Expected individual combined index to have one summary per GUID");
1141     auto &Summary = GlobalList.second.SummaryList[0];
1142     // Skip the summaries for the importing module. These are included to
1143     // e.g. record required linkage changes.
1144     if (Summary->modulePath() == ModulePath)
1145       continue;
1146     // Add an entry to provoke importing by thinBackend.
1147     ImportList[Summary->modulePath()].insert(GUID);
1148   }
1149 #ifndef NDEBUG
1150   dumpImportListForModule(Index, ModulePath, ImportList);
1151 #endif
1152 }
1153 
1154 // For SamplePGO, the indirect call targets for local functions will
1155 // have its original name annotated in profile. We try to find the
1156 // corresponding PGOFuncName as the GUID, and fix up the edges
1157 // accordingly.
updateValueInfoForIndirectCalls(ModuleSummaryIndex & Index,FunctionSummary * FS)1158 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1159                                      FunctionSummary *FS) {
1160   for (auto &EI : FS->mutableCalls()) {
1161     if (!EI.first.getSummaryList().empty())
1162       continue;
1163     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1164     if (GUID == 0)
1165       continue;
1166     // Update the edge to point directly to the correct GUID.
1167     auto VI = Index.getValueInfo(GUID);
1168     if (llvm::any_of(
1169             VI.getSummaryList(),
1170             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1171               // The mapping from OriginalId to GUID may return a GUID
1172               // that corresponds to a static variable. Filter it out here.
1173               // This can happen when
1174               // 1) There is a call to a library function which is not defined
1175               // in the index.
1176               // 2) There is a static variable with the  OriginalGUID identical
1177               // to the GUID of the library function in 1);
1178               // When this happens the static variable in 2) will be found,
1179               // which needs to be filtered out.
1180               return SummaryPtr->getSummaryKind() ==
1181                      GlobalValueSummary::GlobalVarKind;
1182             }))
1183       continue;
1184     EI.first = VI;
1185   }
1186 }
1187 
updateIndirectCalls(ModuleSummaryIndex & Index)1188 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1189   for (const auto &Entry : Index) {
1190     for (const auto &S : Entry.second.SummaryList) {
1191       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1192         updateValueInfoForIndirectCalls(Index, FS);
1193     }
1194   }
1195 }
1196 
computeDeadSymbolsAndUpdateIndirectCalls(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,function_ref<PrevailingType (GlobalValue::GUID)> isPrevailing)1197 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1198     ModuleSummaryIndex &Index,
1199     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1200     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1201   assert(!Index.withGlobalValueDeadStripping());
1202   if (!ComputeDead ||
1203       // Don't do anything when nothing is live, this is friendly with tests.
1204       GUIDPreservedSymbols.empty()) {
1205     // Still need to update indirect calls.
1206     updateIndirectCalls(Index);
1207     return;
1208   }
1209   unsigned LiveSymbols = 0;
1210   SmallVector<ValueInfo, 128> Worklist;
1211   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1212   for (auto GUID : GUIDPreservedSymbols) {
1213     ValueInfo VI = Index.getValueInfo(GUID);
1214     if (!VI)
1215       continue;
1216     for (const auto &S : VI.getSummaryList())
1217       S->setLive(true);
1218   }
1219 
1220   // Add values flagged in the index as live roots to the worklist.
1221   for (const auto &Entry : Index) {
1222     auto VI = Index.getValueInfo(Entry);
1223     for (const auto &S : Entry.second.SummaryList) {
1224       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1225         updateValueInfoForIndirectCalls(Index, FS);
1226       if (S->isLive()) {
1227         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1228         Worklist.push_back(VI);
1229         ++LiveSymbols;
1230         break;
1231       }
1232     }
1233   }
1234 
1235   // Make value live and add it to the worklist if it was not live before.
1236   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1237     // FIXME: If we knew which edges were created for indirect call profiles,
1238     // we could skip them here. Any that are live should be reached via
1239     // other edges, e.g. reference edges. Otherwise, using a profile collected
1240     // on a slightly different binary might provoke preserving, importing
1241     // and ultimately promoting calls to functions not linked into this
1242     // binary, which increases the binary size unnecessarily. Note that
1243     // if this code changes, the importer needs to change so that edges
1244     // to functions marked dead are skipped.
1245 
1246     if (llvm::any_of(VI.getSummaryList(),
1247                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1248                        return S->isLive();
1249                      }))
1250       return;
1251 
1252     // We only keep live symbols that are known to be non-prevailing if any are
1253     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1254     // later in the EliminateAvailableExternally pass and setting them to
1255     // not-live could break downstreams users of liveness information (PR36483)
1256     // or limit optimization opportunities.
1257     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1258       bool KeepAliveLinkage = false;
1259       bool Interposable = false;
1260       for (const auto &S : VI.getSummaryList()) {
1261         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1262             S->linkage() == GlobalValue::WeakODRLinkage ||
1263             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1264           KeepAliveLinkage = true;
1265         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1266           Interposable = true;
1267       }
1268 
1269       if (!IsAliasee) {
1270         if (!KeepAliveLinkage)
1271           return;
1272 
1273         if (Interposable)
1274           report_fatal_error(
1275               "Interposable and available_externally/linkonce_odr/weak_odr "
1276               "symbol");
1277       }
1278     }
1279 
1280     for (const auto &S : VI.getSummaryList())
1281       S->setLive(true);
1282     ++LiveSymbols;
1283     Worklist.push_back(VI);
1284   };
1285 
1286   while (!Worklist.empty()) {
1287     auto VI = Worklist.pop_back_val();
1288     for (const auto &Summary : VI.getSummaryList()) {
1289       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1290         // If this is an alias, visit the aliasee VI to ensure that all copies
1291         // are marked live and it is added to the worklist for further
1292         // processing of its references.
1293         visit(AS->getAliaseeVI(), true);
1294         continue;
1295       }
1296       for (auto Ref : Summary->refs())
1297         visit(Ref, false);
1298       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1299         for (auto Call : FS->calls())
1300           visit(Call.first, false);
1301     }
1302   }
1303   Index.setWithGlobalValueDeadStripping();
1304 
1305   unsigned DeadSymbols = Index.size() - LiveSymbols;
1306   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1307                     << " symbols Dead \n");
1308   NumDeadSymbols += DeadSymbols;
1309   NumLiveSymbols += LiveSymbols;
1310 }
1311 
1312 // Compute dead symbols and propagate constants in combined index.
computeDeadSymbolsWithConstProp(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,function_ref<PrevailingType (GlobalValue::GUID)> isPrevailing,bool ImportEnabled)1313 void llvm::computeDeadSymbolsWithConstProp(
1314     ModuleSummaryIndex &Index,
1315     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1316     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1317     bool ImportEnabled) {
1318   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1319                                            isPrevailing);
1320   if (ImportEnabled)
1321     Index.propagateAttributes(GUIDPreservedSymbols);
1322 }
1323 
1324 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1325 /// \p ModulePath.
gatherImportedSummariesForModule(StringRef ModulePath,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,const FunctionImporter::ImportMapTy & ImportList,std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex)1326 void llvm::gatherImportedSummariesForModule(
1327     StringRef ModulePath,
1328     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1329     const FunctionImporter::ImportMapTy &ImportList,
1330     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1331   // Include all summaries from the importing module.
1332   ModuleToSummariesForIndex[std::string(ModulePath)] =
1333       ModuleToDefinedGVSummaries.lookup(ModulePath);
1334   // Include summaries for imports.
1335   for (const auto &ILI : ImportList) {
1336     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1337     const auto &DefinedGVSummaries =
1338         ModuleToDefinedGVSummaries.lookup(ILI.first);
1339     for (const auto &GI : ILI.second) {
1340       const auto &DS = DefinedGVSummaries.find(GI);
1341       assert(DS != DefinedGVSummaries.end() &&
1342              "Expected a defined summary for imported global value");
1343       SummariesForIndex[GI] = DS->second;
1344     }
1345   }
1346 }
1347 
1348 /// Emit the files \p ModulePath will import from into \p OutputFilename.
EmitImportsFiles(StringRef ModulePath,StringRef OutputFilename,const std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex)1349 std::error_code llvm::EmitImportsFiles(
1350     StringRef ModulePath, StringRef OutputFilename,
1351     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1352   std::error_code EC;
1353   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
1354   if (EC)
1355     return EC;
1356   for (const auto &ILI : ModuleToSummariesForIndex)
1357     // The ModuleToSummariesForIndex map includes an entry for the current
1358     // Module (needed for writing out the index files). We don't want to
1359     // include it in the imports file, however, so filter it out.
1360     if (ILI.first != ModulePath)
1361       ImportsOS << ILI.first << "\n";
1362   return std::error_code();
1363 }
1364 
convertToDeclaration(GlobalValue & GV)1365 bool llvm::convertToDeclaration(GlobalValue &GV) {
1366   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1367                     << "\n");
1368   if (Function *F = dyn_cast<Function>(&GV)) {
1369     F->deleteBody();
1370     F->clearMetadata();
1371     F->setComdat(nullptr);
1372   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1373     V->setInitializer(nullptr);
1374     V->setLinkage(GlobalValue::ExternalLinkage);
1375     V->clearMetadata();
1376     V->setComdat(nullptr);
1377   } else {
1378     GlobalValue *NewGV;
1379     if (GV.getValueType()->isFunctionTy())
1380       NewGV =
1381           Function::Create(cast<FunctionType>(GV.getValueType()),
1382                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1383                            "", GV.getParent());
1384     else
1385       NewGV =
1386           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1387                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1388                              /*init*/ nullptr, "",
1389                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1390                              GV.getType()->getAddressSpace());
1391     NewGV->takeName(&GV);
1392     GV.replaceAllUsesWith(NewGV);
1393     return false;
1394   }
1395   if (!GV.isImplicitDSOLocal())
1396     GV.setDSOLocal(false);
1397   return true;
1398 }
1399 
thinLTOFinalizeInModule(Module & TheModule,const GVSummaryMapTy & DefinedGlobals,bool PropagateAttrs)1400 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1401                                    const GVSummaryMapTy &DefinedGlobals,
1402                                    bool PropagateAttrs) {
1403   DenseSet<Comdat *> NonPrevailingComdats;
1404   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1405     // See if the global summary analysis computed a new resolved linkage.
1406     const auto &GS = DefinedGlobals.find(GV.getGUID());
1407     if (GS == DefinedGlobals.end())
1408       return;
1409 
1410     if (Propagate)
1411       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1412         if (Function *F = dyn_cast<Function>(&GV)) {
1413           // TODO: propagate ReadNone and ReadOnly.
1414           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1415             F->setDoesNotAccessMemory();
1416 
1417           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1418             F->setOnlyReadsMemory();
1419 
1420           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1421             F->setDoesNotRecurse();
1422 
1423           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1424             F->setDoesNotThrow();
1425         }
1426       }
1427 
1428     auto NewLinkage = GS->second->linkage();
1429     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1430         // Don't internalize anything here, because the code below
1431         // lacks necessary correctness checks. Leave this job to
1432         // LLVM 'internalize' pass.
1433         GlobalValue::isLocalLinkage(NewLinkage) ||
1434         // In case it was dead and already converted to declaration.
1435         GV.isDeclaration())
1436       return;
1437 
1438     // Set the potentially more constraining visibility computed from summaries.
1439     // The DefaultVisibility condition is because older GlobalValueSummary does
1440     // not record DefaultVisibility and we don't want to change protected/hidden
1441     // to default.
1442     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1443       GV.setVisibility(GS->second->getVisibility());
1444 
1445     if (NewLinkage == GV.getLinkage())
1446       return;
1447 
1448     // Check for a non-prevailing def that has interposable linkage
1449     // (e.g. non-odr weak or linkonce). In that case we can't simply
1450     // convert to available_externally, since it would lose the
1451     // interposable property and possibly get inlined. Simply drop
1452     // the definition in that case.
1453     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1454         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1455       if (!convertToDeclaration(GV))
1456         // FIXME: Change this to collect replaced GVs and later erase
1457         // them from the parent module once thinLTOResolvePrevailingGUID is
1458         // changed to enable this for aliases.
1459         llvm_unreachable("Expected GV to be converted");
1460     } else {
1461       // If all copies of the original symbol had global unnamed addr and
1462       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1463       // and are constants, then it should be an auto hide symbol. In that case
1464       // the thin link would have marked it as CanAutoHide. Add hidden
1465       // visibility to the symbol to preserve the property.
1466       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1467           GS->second->canAutoHide()) {
1468         assert(GV.canBeOmittedFromSymbolTable());
1469         GV.setVisibility(GlobalValue::HiddenVisibility);
1470       }
1471 
1472       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1473                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1474                         << "\n");
1475       GV.setLinkage(NewLinkage);
1476     }
1477     // Remove declarations from comdats, including available_externally
1478     // as this is a declaration for the linker, and will be dropped eventually.
1479     // It is illegal for comdats to contain declarations.
1480     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1481     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1482       if (GO->getComdat()->getName() == GO->getName())
1483         NonPrevailingComdats.insert(GO->getComdat());
1484       GO->setComdat(nullptr);
1485     }
1486   };
1487 
1488   // Process functions and global now
1489   for (auto &GV : TheModule)
1490     FinalizeInModule(GV, PropagateAttrs);
1491   for (auto &GV : TheModule.globals())
1492     FinalizeInModule(GV);
1493   for (auto &GV : TheModule.aliases())
1494     FinalizeInModule(GV);
1495 
1496   // For a non-prevailing comdat, all its members must be available_externally.
1497   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1498   // local linkage GlobalValues.
1499   if (NonPrevailingComdats.empty())
1500     return;
1501   for (auto &GO : TheModule.global_objects()) {
1502     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1503       GO.setComdat(nullptr);
1504       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1505     }
1506   }
1507   bool Changed;
1508   do {
1509     Changed = false;
1510     // If an alias references a GlobalValue in a non-prevailing comdat, change
1511     // it to available_externally. For simplicity we only handle GlobalValue and
1512     // ConstantExpr with a base object. ConstantExpr without a base object is
1513     // unlikely used in a COMDAT.
1514     for (auto &GA : TheModule.aliases()) {
1515       if (GA.hasAvailableExternallyLinkage())
1516         continue;
1517       GlobalObject *Obj = GA.getAliaseeObject();
1518       assert(Obj && "aliasee without an base object is unimplemented");
1519       if (Obj->hasAvailableExternallyLinkage()) {
1520         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1521         Changed = true;
1522       }
1523     }
1524   } while (Changed);
1525 }
1526 
1527 /// Run internalization on \p TheModule based on symmary analysis.
thinLTOInternalizeModule(Module & TheModule,const GVSummaryMapTy & DefinedGlobals)1528 void llvm::thinLTOInternalizeModule(Module &TheModule,
1529                                     const GVSummaryMapTy &DefinedGlobals) {
1530   // Declare a callback for the internalize pass that will ask for every
1531   // candidate GlobalValue if it can be internalized or not.
1532   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1533     // It may be the case that GV is on a chain of an ifunc, its alias and
1534     // subsequent aliases. In this case, the summary for the value is not
1535     // available.
1536     if (isa<GlobalIFunc>(&GV) ||
1537         (isa<GlobalAlias>(&GV) &&
1538          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1539       return true;
1540 
1541     // Lookup the linkage recorded in the summaries during global analysis.
1542     auto GS = DefinedGlobals.find(GV.getGUID());
1543     if (GS == DefinedGlobals.end()) {
1544       // Must have been promoted (possibly conservatively). Find original
1545       // name so that we can access the correct summary and see if it can
1546       // be internalized again.
1547       // FIXME: Eventually we should control promotion instead of promoting
1548       // and internalizing again.
1549       StringRef OrigName =
1550           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1551       std::string OrigId = GlobalValue::getGlobalIdentifier(
1552           OrigName, GlobalValue::InternalLinkage,
1553           TheModule.getSourceFileName());
1554       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1555       if (GS == DefinedGlobals.end()) {
1556         // Also check the original non-promoted non-globalized name. In some
1557         // cases a preempted weak value is linked in as a local copy because
1558         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1559         // In that case, since it was originally not a local value, it was
1560         // recorded in the index using the original name.
1561         // FIXME: This may not be needed once PR27866 is fixed.
1562         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1563         assert(GS != DefinedGlobals.end());
1564       }
1565     }
1566     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1567   };
1568 
1569   // FIXME: See if we can just internalize directly here via linkage changes
1570   // based on the index, rather than invoking internalizeModule.
1571   internalizeModule(TheModule, MustPreserveGV);
1572 }
1573 
1574 /// Make alias a clone of its aliasee.
replaceAliasWithAliasee(Module * SrcModule,GlobalAlias * GA)1575 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1576   Function *Fn = cast<Function>(GA->getAliaseeObject());
1577 
1578   ValueToValueMapTy VMap;
1579   Function *NewFn = CloneFunction(Fn, VMap);
1580   // Clone should use the original alias's linkage, visibility and name, and we
1581   // ensure all uses of alias instead use the new clone (casted if necessary).
1582   NewFn->setLinkage(GA->getLinkage());
1583   NewFn->setVisibility(GA->getVisibility());
1584   GA->replaceAllUsesWith(NewFn);
1585   NewFn->takeName(GA);
1586   return NewFn;
1587 }
1588 
1589 // Internalize values that we marked with specific attribute
1590 // in processGlobalForThinLTO.
internalizeGVsAfterImport(Module & M)1591 static void internalizeGVsAfterImport(Module &M) {
1592   for (auto &GV : M.globals())
1593     // Skip GVs which have been converted to declarations
1594     // by dropDeadSymbols.
1595     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1596       GV.setLinkage(GlobalValue::InternalLinkage);
1597       GV.setVisibility(GlobalValue::DefaultVisibility);
1598     }
1599 }
1600 
1601 // Automatically import functions in Module \p DestModule based on the summaries
1602 // index.
importFunctions(Module & DestModule,const FunctionImporter::ImportMapTy & ImportList)1603 Expected<bool> FunctionImporter::importFunctions(
1604     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1605   LLVM_DEBUG(dbgs() << "Starting import for Module "
1606                     << DestModule.getModuleIdentifier() << "\n");
1607   unsigned ImportedCount = 0, ImportedGVCount = 0;
1608 
1609   IRMover Mover(DestModule);
1610   // Do the actual import of functions now, one Module at a time
1611   std::set<StringRef> ModuleNameOrderedList;
1612   for (const auto &FunctionsToImportPerModule : ImportList) {
1613     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1614   }
1615   for (const auto &Name : ModuleNameOrderedList) {
1616     // Get the module for the import
1617     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1618     assert(FunctionsToImportPerModule != ImportList.end());
1619     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1620     if (!SrcModuleOrErr)
1621       return SrcModuleOrErr.takeError();
1622     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1623     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1624            "Context mismatch");
1625 
1626     // If modules were created with lazy metadata loading, materialize it
1627     // now, before linking it (otherwise this will be a noop).
1628     if (Error Err = SrcModule->materializeMetadata())
1629       return std::move(Err);
1630 
1631     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1632     // Find the globals to import
1633     SetVector<GlobalValue *> GlobalsToImport;
1634     for (Function &F : *SrcModule) {
1635       if (!F.hasName())
1636         continue;
1637       auto GUID = F.getGUID();
1638       auto Import = ImportGUIDs.count(GUID);
1639       LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing function "
1640                         << GUID << " " << F.getName() << " from "
1641                         << SrcModule->getSourceFileName() << "\n");
1642       if (Import) {
1643         if (Error Err = F.materialize())
1644           return std::move(Err);
1645         if (EnableImportMetadata) {
1646           // Add 'thinlto_src_module' metadata for statistics and debugging.
1647           F.setMetadata(
1648               "thinlto_src_module",
1649               MDNode::get(DestModule.getContext(),
1650                           {MDString::get(DestModule.getContext(),
1651                                          SrcModule->getSourceFileName())}));
1652         }
1653         GlobalsToImport.insert(&F);
1654       }
1655     }
1656     for (GlobalVariable &GV : SrcModule->globals()) {
1657       if (!GV.hasName())
1658         continue;
1659       auto GUID = GV.getGUID();
1660       auto Import = ImportGUIDs.count(GUID);
1661       LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing global "
1662                         << GUID << " " << GV.getName() << " from "
1663                         << SrcModule->getSourceFileName() << "\n");
1664       if (Import) {
1665         if (Error Err = GV.materialize())
1666           return std::move(Err);
1667         ImportedGVCount += GlobalsToImport.insert(&GV);
1668       }
1669     }
1670     for (GlobalAlias &GA : SrcModule->aliases()) {
1671       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1672         continue;
1673       auto GUID = GA.getGUID();
1674       auto Import = ImportGUIDs.count(GUID);
1675       LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing alias "
1676                         << GUID << " " << GA.getName() << " from "
1677                         << SrcModule->getSourceFileName() << "\n");
1678       if (Import) {
1679         if (Error Err = GA.materialize())
1680           return std::move(Err);
1681         // Import alias as a copy of its aliasee.
1682         GlobalObject *GO = GA.getAliaseeObject();
1683         if (Error Err = GO->materialize())
1684           return std::move(Err);
1685         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1686         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1687                           << GO->getName() << " from "
1688                           << SrcModule->getSourceFileName() << "\n");
1689         if (EnableImportMetadata) {
1690           // Add 'thinlto_src_module' metadata for statistics and debugging.
1691           Fn->setMetadata(
1692               "thinlto_src_module",
1693               MDNode::get(DestModule.getContext(),
1694                           {MDString::get(DestModule.getContext(),
1695                                          SrcModule->getSourceFileName())}));
1696         }
1697         GlobalsToImport.insert(Fn);
1698       }
1699     }
1700 
1701     // Upgrade debug info after we're done materializing all the globals and we
1702     // have loaded all the required metadata!
1703     UpgradeDebugInfo(*SrcModule);
1704 
1705     // Set the partial sample profile ratio in the profile summary module flag
1706     // of the imported source module, if applicable, so that the profile summary
1707     // module flag will match with that of the destination module when it's
1708     // imported.
1709     SrcModule->setPartialSampleProfileRatio(Index);
1710 
1711     // Link in the specified functions.
1712     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1713                                &GlobalsToImport))
1714       return true;
1715 
1716     if (PrintImports) {
1717       for (const auto *GV : GlobalsToImport)
1718         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1719                << " from " << SrcModule->getSourceFileName() << "\n";
1720     }
1721 
1722     if (Error Err = Mover.move(std::move(SrcModule),
1723                                GlobalsToImport.getArrayRef(), nullptr,
1724                                /*IsPerformingImport=*/true))
1725       return createStringError(errc::invalid_argument,
1726                                Twine("Function Import: link error: ") +
1727                                    toString(std::move(Err)));
1728 
1729     ImportedCount += GlobalsToImport.size();
1730     NumImportedModules++;
1731   }
1732 
1733   internalizeGVsAfterImport(DestModule);
1734 
1735   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1736   NumImportedGlobalVars += ImportedGVCount;
1737 
1738   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1739                     << " functions for Module "
1740                     << DestModule.getModuleIdentifier() << "\n");
1741   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1742                     << " global variables for Module "
1743                     << DestModule.getModuleIdentifier() << "\n");
1744   return ImportedCount;
1745 }
1746 
doImportingForModuleForTest(Module & M,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing)1747 static bool doImportingForModuleForTest(
1748     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1749                    isPrevailing) {
1750   if (SummaryFile.empty())
1751     report_fatal_error("error: -function-import requires -summary-file\n");
1752   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1753       getModuleSummaryIndexForFile(SummaryFile);
1754   if (!IndexPtrOrErr) {
1755     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1756                           "Error loading file '" + SummaryFile + "': ");
1757     return false;
1758   }
1759   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1760 
1761   // First step is collecting the import list.
1762   FunctionImporter::ImportMapTy ImportList;
1763   // If requested, simply import all functions in the index. This is used
1764   // when testing distributed backend handling via the opt tool, when
1765   // we have distributed indexes containing exactly the summaries to import.
1766   if (ImportAllIndex)
1767     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1768                                                       *Index, ImportList);
1769   else
1770     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1771                                              isPrevailing, *Index, ImportList);
1772 
1773   // Conservatively mark all internal values as promoted. This interface is
1774   // only used when doing importing via the function importing pass. The pass
1775   // is only enabled when testing importing via the 'opt' tool, which does
1776   // not do the ThinLink that would normally determine what values to promote.
1777   for (auto &I : *Index) {
1778     for (auto &S : I.second.SummaryList) {
1779       if (GlobalValue::isLocalLinkage(S->linkage()))
1780         S->setLinkage(GlobalValue::ExternalLinkage);
1781     }
1782   }
1783 
1784   // Next we need to promote to global scope and rename any local values that
1785   // are potentially exported to other modules.
1786   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1787                              /*GlobalsToImport=*/nullptr)) {
1788     errs() << "Error renaming module\n";
1789     return true;
1790   }
1791 
1792   // Perform the import now.
1793   auto ModuleLoader = [&M](StringRef Identifier) {
1794     return loadFile(std::string(Identifier), M.getContext());
1795   };
1796   FunctionImporter Importer(*Index, ModuleLoader,
1797                             /*ClearDSOLocalOnDeclarations=*/false);
1798   Expected<bool> Result = Importer.importFunctions(M, ImportList);
1799 
1800   // FIXME: Probably need to propagate Errors through the pass manager.
1801   if (!Result) {
1802     logAllUnhandledErrors(Result.takeError(), errs(),
1803                           "Error importing module: ");
1804     return true;
1805   }
1806 
1807   return true;
1808 }
1809 
run(Module & M,ModuleAnalysisManager & AM)1810 PreservedAnalyses FunctionImportPass::run(Module &M,
1811                                           ModuleAnalysisManager &AM) {
1812   // This is only used for testing the function import pass via opt, where we
1813   // don't have prevailing information from the LTO context available, so just
1814   // conservatively assume everything is prevailing (which is fine for the very
1815   // limited use of prevailing checking in this pass).
1816   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
1817     return true;
1818   };
1819   if (!doImportingForModuleForTest(M, isPrevailing))
1820     return PreservedAnalyses::all();
1821 
1822   return PreservedAnalyses::none();
1823 }
1824