109467b48Spatrick //===-- Internalize.cpp - Mark functions internal -------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This pass loops over all of the functions and variables in the input module.
1009467b48Spatrick // If the function or variable does not need to be preserved according to the
1109467b48Spatrick // client supplied callback, it is marked as internal.
1209467b48Spatrick //
1309467b48Spatrick // This transformation would not be legal in a regular compilation, but it gets
1409467b48Spatrick // extra information from the linker about what is safe.
1509467b48Spatrick //
1609467b48Spatrick // For example: Internalizing a function with external linkage. Only if we are
1709467b48Spatrick // told it is only used from within this module, it is safe to do it.
1809467b48Spatrick //
1909467b48Spatrick //===----------------------------------------------------------------------===//
2009467b48Spatrick 
2109467b48Spatrick #include "llvm/Transforms/IPO/Internalize.h"
2209467b48Spatrick #include "llvm/ADT/Statistic.h"
2309467b48Spatrick #include "llvm/ADT/StringSet.h"
2473471bf0Spatrick #include "llvm/ADT/Triple.h"
2509467b48Spatrick #include "llvm/Analysis/CallGraph.h"
2609467b48Spatrick #include "llvm/IR/Module.h"
2709467b48Spatrick #include "llvm/InitializePasses.h"
2809467b48Spatrick #include "llvm/Pass.h"
2909467b48Spatrick #include "llvm/Support/CommandLine.h"
3009467b48Spatrick #include "llvm/Support/Debug.h"
31*d415bd75Srobert #include "llvm/Support/GlobPattern.h"
3209467b48Spatrick #include "llvm/Support/LineIterator.h"
3309467b48Spatrick #include "llvm/Support/MemoryBuffer.h"
3409467b48Spatrick #include "llvm/Support/raw_ostream.h"
3509467b48Spatrick #include "llvm/Transforms/IPO.h"
3609467b48Spatrick using namespace llvm;
3709467b48Spatrick 
3809467b48Spatrick #define DEBUG_TYPE "internalize"
3909467b48Spatrick 
4009467b48Spatrick STATISTIC(NumAliases, "Number of aliases internalized");
4109467b48Spatrick STATISTIC(NumFunctions, "Number of functions internalized");
4209467b48Spatrick STATISTIC(NumGlobals, "Number of global vars internalized");
4309467b48Spatrick 
44*d415bd75Srobert // APIFile - A file which contains a list of symbol glob patterns that should
45*d415bd75Srobert // not be marked external.
4609467b48Spatrick static cl::opt<std::string>
4709467b48Spatrick     APIFile("internalize-public-api-file", cl::value_desc("filename"),
4809467b48Spatrick             cl::desc("A file containing list of symbol names to preserve"));
4909467b48Spatrick 
50*d415bd75Srobert // APIList - A list of symbol glob patterns that should not be marked internal.
5109467b48Spatrick static cl::list<std::string>
5209467b48Spatrick     APIList("internalize-public-api-list", cl::value_desc("list"),
5309467b48Spatrick             cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
5409467b48Spatrick 
5509467b48Spatrick namespace {
5609467b48Spatrick // Helper to load an API list to preserve from file and expose it as a functor
5709467b48Spatrick // for internalization.
5809467b48Spatrick class PreserveAPIList {
5909467b48Spatrick public:
PreserveAPIList()6009467b48Spatrick   PreserveAPIList() {
6109467b48Spatrick     if (!APIFile.empty())
6209467b48Spatrick       LoadFile(APIFile);
63*d415bd75Srobert     for (StringRef Pattern : APIList)
64*d415bd75Srobert       addGlob(Pattern);
6509467b48Spatrick   }
6609467b48Spatrick 
operator ()(const GlobalValue & GV)6709467b48Spatrick   bool operator()(const GlobalValue &GV) {
68*d415bd75Srobert     return llvm::any_of(
69*d415bd75Srobert         ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
7009467b48Spatrick   }
7109467b48Spatrick 
7209467b48Spatrick private:
7309467b48Spatrick   // Contains the set of symbols loaded from file
74*d415bd75Srobert   SmallVector<GlobPattern> ExternalNames;
75*d415bd75Srobert 
addGlob(StringRef Pattern)76*d415bd75Srobert   void addGlob(StringRef Pattern) {
77*d415bd75Srobert     auto GlobOrErr = GlobPattern::create(Pattern);
78*d415bd75Srobert     if (!GlobOrErr) {
79*d415bd75Srobert       errs() << "WARNING: when loading pattern: '"
80*d415bd75Srobert              << toString(GlobOrErr.takeError()) << "' ignoring";
81*d415bd75Srobert       return;
82*d415bd75Srobert     }
83*d415bd75Srobert     ExternalNames.emplace_back(std::move(*GlobOrErr));
84*d415bd75Srobert   }
8509467b48Spatrick 
LoadFile(StringRef Filename)8609467b48Spatrick   void LoadFile(StringRef Filename) {
8709467b48Spatrick     // Load the APIFile...
88*d415bd75Srobert     ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
8909467b48Spatrick         MemoryBuffer::getFile(Filename);
90*d415bd75Srobert     if (!BufOrErr) {
9109467b48Spatrick       errs() << "WARNING: Internalize couldn't load file '" << Filename
9209467b48Spatrick              << "'! Continuing as if it's empty.\n";
9309467b48Spatrick       return; // Just continue as if the file were empty
9409467b48Spatrick     }
95*d415bd75Srobert     Buf = std::move(*BufOrErr);
96*d415bd75Srobert     for (line_iterator I(*Buf, true), E; I != E; ++I)
97*d415bd75Srobert       addGlob(*I);
9809467b48Spatrick   }
99*d415bd75Srobert 
100*d415bd75Srobert   std::shared_ptr<MemoryBuffer> Buf;
10109467b48Spatrick };
10209467b48Spatrick } // end anonymous namespace
10309467b48Spatrick 
shouldPreserveGV(const GlobalValue & GV)10409467b48Spatrick bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
10509467b48Spatrick   // Function must be defined here
10609467b48Spatrick   if (GV.isDeclaration())
10709467b48Spatrick     return true;
10809467b48Spatrick 
10909467b48Spatrick   // Available externally is really just a "declaration with a body".
11009467b48Spatrick   if (GV.hasAvailableExternallyLinkage())
11109467b48Spatrick     return true;
11209467b48Spatrick 
11309467b48Spatrick   // Assume that dllexported symbols are referenced elsewhere
11409467b48Spatrick   if (GV.hasDLLExportStorageClass())
11509467b48Spatrick     return true;
11609467b48Spatrick 
11773471bf0Spatrick   // As the name suggests, externally initialized variables need preserving as
11873471bf0Spatrick   // they would be initialized elsewhere externally.
11973471bf0Spatrick   if (const auto *G = dyn_cast<GlobalVariable>(&GV))
12073471bf0Spatrick     if (G->isExternallyInitialized())
12173471bf0Spatrick       return true;
12273471bf0Spatrick 
12309467b48Spatrick   // Already local, has nothing to do.
12409467b48Spatrick   if (GV.hasLocalLinkage())
12509467b48Spatrick     return false;
12609467b48Spatrick 
12709467b48Spatrick   // Check some special cases
12809467b48Spatrick   if (AlwaysPreserved.count(GV.getName()))
12909467b48Spatrick     return true;
13009467b48Spatrick 
13109467b48Spatrick   return MustPreserveGV(GV);
13209467b48Spatrick }
13309467b48Spatrick 
maybeInternalize(GlobalValue & GV,DenseMap<const Comdat *,ComdatInfo> & ComdatMap)13409467b48Spatrick bool InternalizePass::maybeInternalize(
13573471bf0Spatrick     GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
13673471bf0Spatrick   SmallString<0> ComdatName;
13709467b48Spatrick   if (Comdat *C = GV.getComdat()) {
13873471bf0Spatrick     // For GlobalAlias, C is the aliasee object's comdat which may have been
13973471bf0Spatrick     // redirected. So ComdatMap may not contain C.
14073471bf0Spatrick     if (ComdatMap.lookup(C).External)
14109467b48Spatrick       return false;
14209467b48Spatrick 
14373471bf0Spatrick     if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
14473471bf0Spatrick       // If a comdat with one member is not externally visible, we can drop it.
14573471bf0Spatrick       // Otherwise, the comdat can be used to establish dependencies among the
14673471bf0Spatrick       // group of sections. Thus we have to keep the comdat but switch it to
14773471bf0Spatrick       // nodeduplicate.
14873471bf0Spatrick       // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
14973471bf0Spatrick       // nodeduplicate.
15073471bf0Spatrick       ComdatInfo &Info = ComdatMap.find(C)->second;
15173471bf0Spatrick       if (Info.Size == 1)
15209467b48Spatrick         GO->setComdat(nullptr);
15373471bf0Spatrick       else if (!IsWasm)
15473471bf0Spatrick         C->setSelectionKind(Comdat::NoDeduplicate);
15573471bf0Spatrick     }
15609467b48Spatrick 
15709467b48Spatrick     if (GV.hasLocalLinkage())
15809467b48Spatrick       return false;
15909467b48Spatrick   } else {
16009467b48Spatrick     if (GV.hasLocalLinkage())
16109467b48Spatrick       return false;
16209467b48Spatrick 
16309467b48Spatrick     if (shouldPreserveGV(GV))
16409467b48Spatrick       return false;
16509467b48Spatrick   }
16609467b48Spatrick 
16709467b48Spatrick   GV.setVisibility(GlobalValue::DefaultVisibility);
16809467b48Spatrick   GV.setLinkage(GlobalValue::InternalLinkage);
16909467b48Spatrick   return true;
17009467b48Spatrick }
17109467b48Spatrick 
17273471bf0Spatrick // If GV is part of a comdat and is externally visible, update the comdat size
17373471bf0Spatrick // and keep track of its comdat so that we don't internalize any of its members.
checkComdat(GlobalValue & GV,DenseMap<const Comdat *,ComdatInfo> & ComdatMap)17473471bf0Spatrick void InternalizePass::checkComdat(
17573471bf0Spatrick     GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
17609467b48Spatrick   Comdat *C = GV.getComdat();
17709467b48Spatrick   if (!C)
17809467b48Spatrick     return;
17909467b48Spatrick 
18073471bf0Spatrick   ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
18173471bf0Spatrick   ++Info.Size;
18209467b48Spatrick   if (shouldPreserveGV(GV))
18373471bf0Spatrick     Info.External = true;
18409467b48Spatrick }
18509467b48Spatrick 
internalizeModule(Module & M,CallGraph * CG)18609467b48Spatrick bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
18709467b48Spatrick   bool Changed = false;
18809467b48Spatrick   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
18909467b48Spatrick 
19073471bf0Spatrick   SmallVector<GlobalValue *, 4> Used;
19109467b48Spatrick   collectUsedGlobalVariables(M, Used, false);
19209467b48Spatrick 
19373471bf0Spatrick   // Collect comdat size and visiblity information for the module.
19473471bf0Spatrick   DenseMap<const Comdat *, ComdatInfo> ComdatMap;
19509467b48Spatrick   if (!M.getComdatSymbolTable().empty()) {
19609467b48Spatrick     for (Function &F : M)
19773471bf0Spatrick       checkComdat(F, ComdatMap);
19809467b48Spatrick     for (GlobalVariable &GV : M.globals())
19973471bf0Spatrick       checkComdat(GV, ComdatMap);
20009467b48Spatrick     for (GlobalAlias &GA : M.aliases())
20173471bf0Spatrick       checkComdat(GA, ComdatMap);
20209467b48Spatrick   }
20309467b48Spatrick 
20409467b48Spatrick   // We must assume that globals in llvm.used have a reference that not even
20509467b48Spatrick   // the linker can see, so we don't internalize them.
20609467b48Spatrick   // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
20709467b48Spatrick   // linker can drop those symbols. If this pass is running as part of LTO,
20809467b48Spatrick   // one might think that it could just drop llvm.compiler.used. The problem
20909467b48Spatrick   // is that even in LTO llvm doesn't see every reference. For example,
21009467b48Spatrick   // we don't see references from function local inline assembly. To be
21109467b48Spatrick   // conservative, we internalize symbols in llvm.compiler.used, but we
21209467b48Spatrick   // keep llvm.compiler.used so that the symbol is not deleted by llvm.
21309467b48Spatrick   for (GlobalValue *V : Used) {
21409467b48Spatrick     AlwaysPreserved.insert(V->getName());
21509467b48Spatrick   }
21609467b48Spatrick 
21709467b48Spatrick   // Never internalize the llvm.used symbol.  It is used to implement
21809467b48Spatrick   // attribute((used)).
21909467b48Spatrick   // FIXME: Shouldn't this just filter on llvm.metadata section??
22009467b48Spatrick   AlwaysPreserved.insert("llvm.used");
22109467b48Spatrick   AlwaysPreserved.insert("llvm.compiler.used");
22209467b48Spatrick 
22309467b48Spatrick   // Never internalize anchors used by the machine module info, else the info
22409467b48Spatrick   // won't find them.  (see MachineModuleInfo.)
22509467b48Spatrick   AlwaysPreserved.insert("llvm.global_ctors");
22609467b48Spatrick   AlwaysPreserved.insert("llvm.global_dtors");
22709467b48Spatrick   AlwaysPreserved.insert("llvm.global.annotations");
22809467b48Spatrick 
22909467b48Spatrick   // Never internalize symbols code-gen inserts.
23009467b48Spatrick   // FIXME: We should probably add this (and the __stack_chk_guard) via some
23109467b48Spatrick   // type of call-back in CodeGen.
23209467b48Spatrick   AlwaysPreserved.insert("__stack_chk_fail");
23373471bf0Spatrick   if (Triple(M.getTargetTriple()).isOSAIX())
23473471bf0Spatrick     AlwaysPreserved.insert("__ssp_canary_word");
23573471bf0Spatrick   else
23609467b48Spatrick     AlwaysPreserved.insert("__stack_chk_guard");
23709467b48Spatrick 
238*d415bd75Srobert   // Mark all functions not in the api as internal.
239*d415bd75Srobert   IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
240*d415bd75Srobert   for (Function &I : M) {
241*d415bd75Srobert     if (!maybeInternalize(I, ComdatMap))
242*d415bd75Srobert       continue;
243*d415bd75Srobert     Changed = true;
244*d415bd75Srobert 
245*d415bd75Srobert     if (ExternalNode)
246*d415bd75Srobert       // Remove a callgraph edge from the external node to this function.
247*d415bd75Srobert       ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
248*d415bd75Srobert 
249*d415bd75Srobert     ++NumFunctions;
250*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
251*d415bd75Srobert   }
252*d415bd75Srobert 
25309467b48Spatrick   // Mark all global variables with initializers that are not in the api as
25409467b48Spatrick   // internal as well.
25509467b48Spatrick   for (auto &GV : M.globals()) {
25673471bf0Spatrick     if (!maybeInternalize(GV, ComdatMap))
25709467b48Spatrick       continue;
25809467b48Spatrick     Changed = true;
25909467b48Spatrick 
26009467b48Spatrick     ++NumGlobals;
26109467b48Spatrick     LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
26209467b48Spatrick   }
26309467b48Spatrick 
26409467b48Spatrick   // Mark all aliases that are not in the api as internal as well.
26509467b48Spatrick   for (auto &GA : M.aliases()) {
26673471bf0Spatrick     if (!maybeInternalize(GA, ComdatMap))
26709467b48Spatrick       continue;
26809467b48Spatrick     Changed = true;
26909467b48Spatrick 
27009467b48Spatrick     ++NumAliases;
27109467b48Spatrick     LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
27209467b48Spatrick   }
27309467b48Spatrick 
27409467b48Spatrick   return Changed;
27509467b48Spatrick }
27609467b48Spatrick 
InternalizePass()27709467b48Spatrick InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
27809467b48Spatrick 
run(Module & M,ModuleAnalysisManager & AM)27909467b48Spatrick PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
28009467b48Spatrick   if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
28109467b48Spatrick     return PreservedAnalyses::all();
28209467b48Spatrick 
28309467b48Spatrick   PreservedAnalyses PA;
28409467b48Spatrick   PA.preserve<CallGraphAnalysis>();
28509467b48Spatrick   return PA;
28609467b48Spatrick }
28709467b48Spatrick 
28809467b48Spatrick namespace {
28909467b48Spatrick class InternalizeLegacyPass : public ModulePass {
29009467b48Spatrick   // Client supplied callback to control wheter a symbol must be preserved.
29109467b48Spatrick   std::function<bool(const GlobalValue &)> MustPreserveGV;
29209467b48Spatrick 
29309467b48Spatrick public:
29409467b48Spatrick   static char ID; // Pass identification, replacement for typeid
29509467b48Spatrick 
InternalizeLegacyPass()29609467b48Spatrick   InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
29709467b48Spatrick 
InternalizeLegacyPass(std::function<bool (const GlobalValue &)> MustPreserveGV)29809467b48Spatrick   InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
29909467b48Spatrick       : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
30009467b48Spatrick     initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
30109467b48Spatrick   }
30209467b48Spatrick 
runOnModule(Module & M)30309467b48Spatrick   bool runOnModule(Module &M) override {
30409467b48Spatrick     if (skipModule(M))
30509467b48Spatrick       return false;
30609467b48Spatrick 
30709467b48Spatrick     CallGraphWrapperPass *CGPass =
30809467b48Spatrick         getAnalysisIfAvailable<CallGraphWrapperPass>();
30909467b48Spatrick     CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
31009467b48Spatrick     return internalizeModule(M, MustPreserveGV, CG);
31109467b48Spatrick   }
31209467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const31309467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
31409467b48Spatrick     AU.setPreservesCFG();
31509467b48Spatrick     AU.addPreserved<CallGraphWrapperPass>();
31609467b48Spatrick   }
31709467b48Spatrick };
31809467b48Spatrick }
31909467b48Spatrick 
32009467b48Spatrick char InternalizeLegacyPass::ID = 0;
32109467b48Spatrick INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
32209467b48Spatrick                 "Internalize Global Symbols", false, false)
32309467b48Spatrick 
createInternalizePass()32409467b48Spatrick ModulePass *llvm::createInternalizePass() {
32509467b48Spatrick   return new InternalizeLegacyPass();
32609467b48Spatrick }
32709467b48Spatrick 
createInternalizePass(std::function<bool (const GlobalValue &)> MustPreserveGV)32809467b48Spatrick ModulePass *llvm::createInternalizePass(
32909467b48Spatrick     std::function<bool(const GlobalValue &)> MustPreserveGV) {
33009467b48Spatrick   return new InternalizeLegacyPass(std::move(MustPreserveGV));
33109467b48Spatrick }
332