109467b48Spatrick //===- PartialInlining.cpp - Inline parts of functions --------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This pass performs partial inlining, typically by inlining an if statement
1009467b48Spatrick // that surrounds the body of the function.
1109467b48Spatrick //
1209467b48Spatrick //===----------------------------------------------------------------------===//
1309467b48Spatrick 
1409467b48Spatrick #include "llvm/Transforms/IPO/PartialInlining.h"
1509467b48Spatrick #include "llvm/ADT/DenseMap.h"
1609467b48Spatrick #include "llvm/ADT/DenseSet.h"
1709467b48Spatrick #include "llvm/ADT/STLExtras.h"
1809467b48Spatrick #include "llvm/ADT/SmallVector.h"
1909467b48Spatrick #include "llvm/ADT/Statistic.h"
2009467b48Spatrick #include "llvm/Analysis/BlockFrequencyInfo.h"
2109467b48Spatrick #include "llvm/Analysis/BranchProbabilityInfo.h"
2209467b48Spatrick #include "llvm/Analysis/InlineCost.h"
2309467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
2409467b48Spatrick #include "llvm/Analysis/OptimizationRemarkEmitter.h"
2509467b48Spatrick #include "llvm/Analysis/ProfileSummaryInfo.h"
2609467b48Spatrick #include "llvm/Analysis/TargetLibraryInfo.h"
2709467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
2809467b48Spatrick #include "llvm/IR/Attributes.h"
2909467b48Spatrick #include "llvm/IR/BasicBlock.h"
3009467b48Spatrick #include "llvm/IR/CFG.h"
3109467b48Spatrick #include "llvm/IR/DebugLoc.h"
3209467b48Spatrick #include "llvm/IR/DiagnosticInfo.h"
3309467b48Spatrick #include "llvm/IR/Dominators.h"
3409467b48Spatrick #include "llvm/IR/Function.h"
3509467b48Spatrick #include "llvm/IR/InstrTypes.h"
3609467b48Spatrick #include "llvm/IR/Instruction.h"
3709467b48Spatrick #include "llvm/IR/Instructions.h"
3809467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
3909467b48Spatrick #include "llvm/IR/Intrinsics.h"
4009467b48Spatrick #include "llvm/IR/Module.h"
41*d415bd75Srobert #include "llvm/IR/Operator.h"
42*d415bd75Srobert #include "llvm/IR/ProfDataUtils.h"
4309467b48Spatrick #include "llvm/IR/User.h"
4409467b48Spatrick #include "llvm/InitializePasses.h"
4509467b48Spatrick #include "llvm/Pass.h"
4609467b48Spatrick #include "llvm/Support/BlockFrequency.h"
4709467b48Spatrick #include "llvm/Support/BranchProbability.h"
4809467b48Spatrick #include "llvm/Support/Casting.h"
4909467b48Spatrick #include "llvm/Support/CommandLine.h"
5009467b48Spatrick #include "llvm/Support/ErrorHandling.h"
5109467b48Spatrick #include "llvm/Transforms/IPO.h"
5209467b48Spatrick #include "llvm/Transforms/Utils/Cloning.h"
5309467b48Spatrick #include "llvm/Transforms/Utils/CodeExtractor.h"
5409467b48Spatrick #include "llvm/Transforms/Utils/ValueMapper.h"
5509467b48Spatrick #include <algorithm>
5609467b48Spatrick #include <cassert>
5709467b48Spatrick #include <cstdint>
5809467b48Spatrick #include <memory>
5909467b48Spatrick #include <tuple>
6009467b48Spatrick #include <vector>
6109467b48Spatrick 
6209467b48Spatrick using namespace llvm;
6309467b48Spatrick 
6409467b48Spatrick #define DEBUG_TYPE "partial-inlining"
6509467b48Spatrick 
6609467b48Spatrick STATISTIC(NumPartialInlined,
6709467b48Spatrick           "Number of callsites functions partially inlined into.");
6809467b48Spatrick STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
6909467b48Spatrick                                         "cold outlined regions were partially "
7009467b48Spatrick                                         "inlined into its caller(s).");
7109467b48Spatrick STATISTIC(NumColdRegionsFound,
7209467b48Spatrick            "Number of cold single entry/exit regions found.");
7309467b48Spatrick STATISTIC(NumColdRegionsOutlined,
7409467b48Spatrick            "Number of cold single entry/exit regions outlined.");
7509467b48Spatrick 
7609467b48Spatrick // Command line option to disable partial-inlining. The default is false:
7709467b48Spatrick static cl::opt<bool>
7809467b48Spatrick     DisablePartialInlining("disable-partial-inlining", cl::init(false),
7909467b48Spatrick                            cl::Hidden, cl::desc("Disable partial inlining"));
8009467b48Spatrick // Command line option to disable multi-region partial-inlining. The default is
8109467b48Spatrick // false:
8209467b48Spatrick static cl::opt<bool> DisableMultiRegionPartialInline(
8309467b48Spatrick     "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
8409467b48Spatrick     cl::desc("Disable multi-region partial inlining"));
8509467b48Spatrick 
8609467b48Spatrick // Command line option to force outlining in regions with live exit variables.
8709467b48Spatrick // The default is false:
8809467b48Spatrick static cl::opt<bool>
8909467b48Spatrick     ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
9009467b48Spatrick                cl::desc("Force outline regions with live exits"));
9109467b48Spatrick 
9209467b48Spatrick // Command line option to enable marking outline functions with Cold Calling
9309467b48Spatrick // Convention. The default is false:
9409467b48Spatrick static cl::opt<bool>
9509467b48Spatrick     MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
9609467b48Spatrick                        cl::desc("Mark outline function calls with ColdCC"));
9709467b48Spatrick 
9809467b48Spatrick // This is an option used by testing:
9909467b48Spatrick static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
100*d415bd75Srobert 
10109467b48Spatrick                                       cl::ReallyHidden,
10209467b48Spatrick                                       cl::desc("Skip Cost Analysis"));
10309467b48Spatrick // Used to determine if a cold region is worth outlining based on
10409467b48Spatrick // its inlining cost compared to the original function.  Default is set at 10%.
10509467b48Spatrick // ie. if the cold region reduces the inlining cost of the original function by
10609467b48Spatrick // at least 10%.
10709467b48Spatrick static cl::opt<float> MinRegionSizeRatio(
10809467b48Spatrick     "min-region-size-ratio", cl::init(0.1), cl::Hidden,
10909467b48Spatrick     cl::desc("Minimum ratio comparing relative sizes of each "
11009467b48Spatrick              "outline candidate and original function"));
11109467b48Spatrick // Used to tune the minimum number of execution counts needed in the predecessor
11209467b48Spatrick // block to the cold edge. ie. confidence interval.
11309467b48Spatrick static cl::opt<unsigned>
11409467b48Spatrick     MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
11509467b48Spatrick                              cl::desc("Minimum block executions to consider "
11609467b48Spatrick                                       "its BranchProbabilityInfo valid"));
11709467b48Spatrick // Used to determine when an edge is considered cold. Default is set to 10%. ie.
11809467b48Spatrick // if the branch probability is 10% or less, then it is deemed as 'cold'.
11909467b48Spatrick static cl::opt<float> ColdBranchRatio(
12009467b48Spatrick     "cold-branch-ratio", cl::init(0.1), cl::Hidden,
12109467b48Spatrick     cl::desc("Minimum BranchProbability to consider a region cold."));
12209467b48Spatrick 
12309467b48Spatrick static cl::opt<unsigned> MaxNumInlineBlocks(
12409467b48Spatrick     "max-num-inline-blocks", cl::init(5), cl::Hidden,
12509467b48Spatrick     cl::desc("Max number of blocks to be partially inlined"));
12609467b48Spatrick 
12709467b48Spatrick // Command line option to set the maximum number of partial inlining allowed
12809467b48Spatrick // for the module. The default value of -1 means no limit.
12909467b48Spatrick static cl::opt<int> MaxNumPartialInlining(
130*d415bd75Srobert     "max-partial-inlining", cl::init(-1), cl::Hidden,
13109467b48Spatrick     cl::desc("Max number of partial inlining. The default is unlimited"));
13209467b48Spatrick 
13309467b48Spatrick // Used only when PGO or user annotated branch data is absent. It is
13409467b48Spatrick // the least value that is used to weigh the outline region. If BFI
13509467b48Spatrick // produces larger value, the BFI value will be used.
13609467b48Spatrick static cl::opt<int>
13709467b48Spatrick     OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138*d415bd75Srobert                              cl::Hidden,
13909467b48Spatrick                              cl::desc("Relative frequency of outline region to "
14009467b48Spatrick                                       "the entry block"));
14109467b48Spatrick 
14209467b48Spatrick static cl::opt<unsigned> ExtraOutliningPenalty(
14309467b48Spatrick     "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
14409467b48Spatrick     cl::desc("A debug option to add additional penalty to the computed one."));
14509467b48Spatrick 
14609467b48Spatrick namespace {
14709467b48Spatrick 
14809467b48Spatrick struct FunctionOutliningInfo {
14909467b48Spatrick   FunctionOutliningInfo() = default;
15009467b48Spatrick 
15109467b48Spatrick   // Returns the number of blocks to be inlined including all blocks
15209467b48Spatrick   // in Entries and one return block.
getNumInlinedBlocks__anon4298a4970111::FunctionOutliningInfo15373471bf0Spatrick   unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
15409467b48Spatrick 
15509467b48Spatrick   // A set of blocks including the function entry that guard
15609467b48Spatrick   // the region to be outlined.
15709467b48Spatrick   SmallVector<BasicBlock *, 4> Entries;
15809467b48Spatrick 
15909467b48Spatrick   // The return block that is not included in the outlined region.
16009467b48Spatrick   BasicBlock *ReturnBlock = nullptr;
16109467b48Spatrick 
16209467b48Spatrick   // The dominating block of the region to be outlined.
16309467b48Spatrick   BasicBlock *NonReturnBlock = nullptr;
16409467b48Spatrick 
16509467b48Spatrick   // The set of blocks in Entries that that are predecessors to ReturnBlock
16609467b48Spatrick   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
16709467b48Spatrick };
16809467b48Spatrick 
16909467b48Spatrick struct FunctionOutliningMultiRegionInfo {
170*d415bd75Srobert   FunctionOutliningMultiRegionInfo() = default;
17109467b48Spatrick 
17209467b48Spatrick   // Container for outline regions
17309467b48Spatrick   struct OutlineRegionInfo {
OutlineRegionInfo__anon4298a4970111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo17409467b48Spatrick     OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
17509467b48Spatrick                       BasicBlock *EntryBlock, BasicBlock *ExitBlock,
17609467b48Spatrick                       BasicBlock *ReturnBlock)
17709467b48Spatrick         : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
17809467b48Spatrick           ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
17909467b48Spatrick     SmallVector<BasicBlock *, 8> Region;
18009467b48Spatrick     BasicBlock *EntryBlock;
18109467b48Spatrick     BasicBlock *ExitBlock;
18209467b48Spatrick     BasicBlock *ReturnBlock;
18309467b48Spatrick   };
18409467b48Spatrick 
18509467b48Spatrick   SmallVector<OutlineRegionInfo, 4> ORI;
18609467b48Spatrick };
18709467b48Spatrick 
18809467b48Spatrick struct PartialInlinerImpl {
18909467b48Spatrick 
PartialInlinerImpl__anon4298a4970111::PartialInlinerImpl19009467b48Spatrick   PartialInlinerImpl(
191097a140dSpatrick       function_ref<AssumptionCache &(Function &)> GetAC,
19209467b48Spatrick       function_ref<AssumptionCache *(Function &)> LookupAC,
193097a140dSpatrick       function_ref<TargetTransformInfo &(Function &)> GTTI,
194097a140dSpatrick       function_ref<const TargetLibraryInfo &(Function &)> GTLI,
195097a140dSpatrick       ProfileSummaryInfo &ProfSI,
196097a140dSpatrick       function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
19709467b48Spatrick       : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
198097a140dSpatrick         GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
19909467b48Spatrick 
20009467b48Spatrick   bool run(Module &M);
20109467b48Spatrick   // Main part of the transformation that calls helper functions to find
20209467b48Spatrick   // outlining candidates, clone & outline the function, and attempt to
20309467b48Spatrick   // partially inline the resulting function. Returns true if
20409467b48Spatrick   // inlining was successful, false otherwise.  Also returns the outline
20509467b48Spatrick   // function (only if we partially inlined early returns) as there is a
20609467b48Spatrick   // possibility to further "peel" early return statements that were left in the
20709467b48Spatrick   // outline function due to code size.
20873471bf0Spatrick   std::pair<bool, Function *> unswitchFunction(Function &F);
20909467b48Spatrick 
21009467b48Spatrick   // This class speculatively clones the function to be partial inlined.
21109467b48Spatrick   // At the end of partial inlining, the remaining callsites to the cloned
21209467b48Spatrick   // function that are not partially inlined will be fixed up to reference
21309467b48Spatrick   // the original function, and the cloned function will be erased.
21409467b48Spatrick   struct FunctionCloner {
21509467b48Spatrick     // Two constructors, one for single region outlining, the other for
21609467b48Spatrick     // multi-region outlining.
21709467b48Spatrick     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
21809467b48Spatrick                    OptimizationRemarkEmitter &ORE,
21973471bf0Spatrick                    function_ref<AssumptionCache *(Function &)> LookupAC,
22073471bf0Spatrick                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
22109467b48Spatrick     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
22209467b48Spatrick                    OptimizationRemarkEmitter &ORE,
22373471bf0Spatrick                    function_ref<AssumptionCache *(Function &)> LookupAC,
22473471bf0Spatrick                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
22573471bf0Spatrick 
22609467b48Spatrick     ~FunctionCloner();
22709467b48Spatrick 
22809467b48Spatrick     // Prepare for function outlining: making sure there is only
22909467b48Spatrick     // one incoming edge from the extracted/outlined region to
23009467b48Spatrick     // the return block.
23173471bf0Spatrick     void normalizeReturnBlock() const;
23209467b48Spatrick 
23309467b48Spatrick     // Do function outlining for cold regions.
23409467b48Spatrick     bool doMultiRegionFunctionOutlining();
23509467b48Spatrick     // Do function outlining for region after early return block(s).
23609467b48Spatrick     // NOTE: For vararg functions that do the vararg handling in the outlined
23709467b48Spatrick     //       function, we temporarily generate IR that does not properly
23809467b48Spatrick     //       forward varargs to the outlined function. Calling InlineFunction
23909467b48Spatrick     //       will update calls to the outlined functions to properly forward
24009467b48Spatrick     //       the varargs.
24109467b48Spatrick     Function *doSingleRegionFunctionOutlining();
24209467b48Spatrick 
24309467b48Spatrick     Function *OrigFunc = nullptr;
24409467b48Spatrick     Function *ClonedFunc = nullptr;
24509467b48Spatrick 
24609467b48Spatrick     typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
24709467b48Spatrick     // Keep track of Outlined Functions and the basic block they're called from.
24809467b48Spatrick     SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
24909467b48Spatrick 
25009467b48Spatrick     // ClonedFunc is inlined in one of its callers after function
25109467b48Spatrick     // outlining.
25209467b48Spatrick     bool IsFunctionInlined = false;
25309467b48Spatrick     // The cost of the region to be outlined.
25473471bf0Spatrick     InstructionCost OutlinedRegionCost = 0;
25509467b48Spatrick     // ClonedOI is specific to outlining non-early return blocks.
25609467b48Spatrick     std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
25709467b48Spatrick     // ClonedOMRI is specific to outlining cold regions.
25809467b48Spatrick     std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
25909467b48Spatrick     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
26009467b48Spatrick     OptimizationRemarkEmitter &ORE;
26109467b48Spatrick     function_ref<AssumptionCache *(Function &)> LookupAC;
26273471bf0Spatrick     function_ref<TargetTransformInfo &(Function &)> GetTTI;
26309467b48Spatrick   };
26409467b48Spatrick 
26509467b48Spatrick private:
26609467b48Spatrick   int NumPartialInlining = 0;
267097a140dSpatrick   function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
26809467b48Spatrick   function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
269097a140dSpatrick   function_ref<TargetTransformInfo &(Function &)> GetTTI;
270097a140dSpatrick   function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
271097a140dSpatrick   function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
272097a140dSpatrick   ProfileSummaryInfo &PSI;
27309467b48Spatrick 
27409467b48Spatrick   // Return the frequency of the OutlininingBB relative to F's entry point.
27509467b48Spatrick   // The result is no larger than 1 and is represented using BP.
27609467b48Spatrick   // (Note that the outlined region's 'head' block can only have incoming
27709467b48Spatrick   // edges from the guarding entry blocks).
27873471bf0Spatrick   BranchProbability
27973471bf0Spatrick   getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
28009467b48Spatrick 
281097a140dSpatrick   // Return true if the callee of CB should be partially inlined with
28209467b48Spatrick   // profit.
283097a140dSpatrick   bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
28409467b48Spatrick                            BlockFrequency WeightedOutliningRcost,
28573471bf0Spatrick                            OptimizationRemarkEmitter &ORE) const;
28609467b48Spatrick 
28709467b48Spatrick   // Try to inline DuplicateFunction (cloned from F with call to
28809467b48Spatrick   // the OutlinedFunction into its callers. Return true
28909467b48Spatrick   // if there is any successful inlining.
29009467b48Spatrick   bool tryPartialInline(FunctionCloner &Cloner);
29109467b48Spatrick 
29209467b48Spatrick   // Compute the mapping from use site of DuplicationFunction to the enclosing
29309467b48Spatrick   // BB's profile count.
29473471bf0Spatrick   void
29573471bf0Spatrick   computeCallsiteToProfCountMap(Function *DuplicateFunction,
29673471bf0Spatrick                                 DenseMap<User *, uint64_t> &SiteCountMap) const;
29709467b48Spatrick 
isLimitReached__anon4298a4970111::PartialInlinerImpl29873471bf0Spatrick   bool isLimitReached() const {
29909467b48Spatrick     return (MaxNumPartialInlining != -1 &&
30009467b48Spatrick             NumPartialInlining >= MaxNumPartialInlining);
30109467b48Spatrick   }
30209467b48Spatrick 
getSupportedCallBase__anon4298a4970111::PartialInlinerImpl303097a140dSpatrick   static CallBase *getSupportedCallBase(User *U) {
304097a140dSpatrick     if (isa<CallInst>(U) || isa<InvokeInst>(U))
305097a140dSpatrick       return cast<CallBase>(U);
30609467b48Spatrick     llvm_unreachable("All uses must be calls");
307097a140dSpatrick     return nullptr;
30809467b48Spatrick   }
30909467b48Spatrick 
getOneCallSiteTo__anon4298a4970111::PartialInlinerImpl31073471bf0Spatrick   static CallBase *getOneCallSiteTo(Function &F) {
31173471bf0Spatrick     User *User = *F.user_begin();
312097a140dSpatrick     return getSupportedCallBase(User);
31309467b48Spatrick   }
31409467b48Spatrick 
getOneDebugLoc__anon4298a4970111::PartialInlinerImpl31573471bf0Spatrick   std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
316097a140dSpatrick     CallBase *CB = getOneCallSiteTo(F);
317097a140dSpatrick     DebugLoc DLoc = CB->getDebugLoc();
318097a140dSpatrick     BasicBlock *Block = CB->getParent();
31909467b48Spatrick     return std::make_tuple(DLoc, Block);
32009467b48Spatrick   }
32109467b48Spatrick 
32209467b48Spatrick   // Returns the costs associated with function outlining:
32309467b48Spatrick   // - The first value is the non-weighted runtime cost for making the call
32409467b48Spatrick   //   to the outlined function, including the addtional  setup cost in the
32509467b48Spatrick   //    outlined function itself;
32609467b48Spatrick   // - The second value is the estimated size of the new call sequence in
32709467b48Spatrick   //   basic block Cloner.OutliningCallBB;
32873471bf0Spatrick   std::tuple<InstructionCost, InstructionCost>
32973471bf0Spatrick   computeOutliningCosts(FunctionCloner &Cloner) const;
33009467b48Spatrick 
33109467b48Spatrick   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
33209467b48Spatrick   // approximate both the size and runtime cost (Note that in the current
33309467b48Spatrick   // inline cost analysis, there is no clear distinction there either).
33473471bf0Spatrick   static InstructionCost computeBBInlineCost(BasicBlock *BB,
33573471bf0Spatrick                                              TargetTransformInfo *TTI);
33609467b48Spatrick 
33773471bf0Spatrick   std::unique_ptr<FunctionOutliningInfo>
33873471bf0Spatrick   computeOutliningInfo(Function &F) const;
33973471bf0Spatrick 
34009467b48Spatrick   std::unique_ptr<FunctionOutliningMultiRegionInfo>
34173471bf0Spatrick   computeOutliningColdRegionsInfo(Function &F,
34273471bf0Spatrick                                   OptimizationRemarkEmitter &ORE) const;
34309467b48Spatrick };
34409467b48Spatrick 
34509467b48Spatrick struct PartialInlinerLegacyPass : public ModulePass {
34609467b48Spatrick   static char ID; // Pass identification, replacement for typeid
34709467b48Spatrick 
PartialInlinerLegacyPass__anon4298a4970111::PartialInlinerLegacyPass34809467b48Spatrick   PartialInlinerLegacyPass() : ModulePass(ID) {
34909467b48Spatrick     initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
35009467b48Spatrick   }
35109467b48Spatrick 
getAnalysisUsage__anon4298a4970111::PartialInlinerLegacyPass35209467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
35309467b48Spatrick     AU.addRequired<AssumptionCacheTracker>();
35409467b48Spatrick     AU.addRequired<ProfileSummaryInfoWrapperPass>();
35509467b48Spatrick     AU.addRequired<TargetTransformInfoWrapperPass>();
356097a140dSpatrick     AU.addRequired<TargetLibraryInfoWrapperPass>();
35709467b48Spatrick   }
35809467b48Spatrick 
runOnModule__anon4298a4970111::PartialInlinerLegacyPass35909467b48Spatrick   bool runOnModule(Module &M) override {
36009467b48Spatrick     if (skipModule(M))
36109467b48Spatrick       return false;
36209467b48Spatrick 
36309467b48Spatrick     AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
36409467b48Spatrick     TargetTransformInfoWrapperPass *TTIWP =
36509467b48Spatrick         &getAnalysis<TargetTransformInfoWrapperPass>();
366097a140dSpatrick     ProfileSummaryInfo &PSI =
367097a140dSpatrick         getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
36809467b48Spatrick 
369097a140dSpatrick     auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
37009467b48Spatrick       return ACT->getAssumptionCache(F);
37109467b48Spatrick     };
37209467b48Spatrick 
37309467b48Spatrick     auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
37409467b48Spatrick       return ACT->lookupAssumptionCache(F);
37509467b48Spatrick     };
37609467b48Spatrick 
377097a140dSpatrick     auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
37809467b48Spatrick       return TTIWP->getTTI(F);
37909467b48Spatrick     };
38009467b48Spatrick 
381097a140dSpatrick     auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
382097a140dSpatrick       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
383097a140dSpatrick     };
384097a140dSpatrick 
385097a140dSpatrick     return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
386097a140dSpatrick                               GetTLI, PSI)
38709467b48Spatrick         .run(M);
38809467b48Spatrick   }
38909467b48Spatrick };
39009467b48Spatrick 
39109467b48Spatrick } // end anonymous namespace
39209467b48Spatrick 
39309467b48Spatrick std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function & F,OptimizationRemarkEmitter & ORE) const39473471bf0Spatrick PartialInlinerImpl::computeOutliningColdRegionsInfo(
39573471bf0Spatrick     Function &F, OptimizationRemarkEmitter &ORE) const {
39673471bf0Spatrick   BasicBlock *EntryBlock = &F.front();
39709467b48Spatrick 
39873471bf0Spatrick   DominatorTree DT(F);
39909467b48Spatrick   LoopInfo LI(DT);
40073471bf0Spatrick   BranchProbabilityInfo BPI(F, LI);
40109467b48Spatrick   std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
40209467b48Spatrick   BlockFrequencyInfo *BFI;
40309467b48Spatrick   if (!GetBFI) {
40473471bf0Spatrick     ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
40509467b48Spatrick     BFI = ScopedBFI.get();
40609467b48Spatrick   } else
40773471bf0Spatrick     BFI = &(GetBFI(F));
40809467b48Spatrick 
40909467b48Spatrick   // Return if we don't have profiling information.
410097a140dSpatrick   if (!PSI.hasInstrumentationProfile())
41109467b48Spatrick     return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
41209467b48Spatrick 
41309467b48Spatrick   std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
41409467b48Spatrick       std::make_unique<FunctionOutliningMultiRegionInfo>();
41509467b48Spatrick 
41609467b48Spatrick   auto IsSingleExit =
41709467b48Spatrick       [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
41809467b48Spatrick     BasicBlock *ExitBlock = nullptr;
41909467b48Spatrick     for (auto *Block : BlockList) {
42073471bf0Spatrick       for (BasicBlock *Succ : successors(Block)) {
42173471bf0Spatrick         if (!is_contained(BlockList, Succ)) {
42209467b48Spatrick           if (ExitBlock) {
42309467b48Spatrick             ORE.emit([&]() {
42409467b48Spatrick               return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
42573471bf0Spatrick                                               &Succ->front())
42609467b48Spatrick                      << "Region dominated by "
42709467b48Spatrick                      << ore::NV("Block", BlockList.front()->getName())
42809467b48Spatrick                      << " has more than one region exit edge.";
42909467b48Spatrick             });
43009467b48Spatrick             return nullptr;
43173471bf0Spatrick           }
43273471bf0Spatrick 
43309467b48Spatrick           ExitBlock = Block;
43409467b48Spatrick         }
43509467b48Spatrick       }
43609467b48Spatrick     }
43709467b48Spatrick     return ExitBlock;
43809467b48Spatrick   };
43909467b48Spatrick 
44009467b48Spatrick   auto BBProfileCount = [BFI](BasicBlock *BB) {
441*d415bd75Srobert     return BFI->getBlockProfileCount(BB).value_or(0);
44209467b48Spatrick   };
44309467b48Spatrick 
44409467b48Spatrick   // Use the same computeBBInlineCost function to compute the cost savings of
44509467b48Spatrick   // the outlining the candidate region.
44673471bf0Spatrick   TargetTransformInfo *FTTI = &GetTTI(F);
44773471bf0Spatrick   InstructionCost OverallFunctionCost = 0;
44873471bf0Spatrick   for (auto &BB : F)
44973471bf0Spatrick     OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
45009467b48Spatrick 
45173471bf0Spatrick   LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
45273471bf0Spatrick                     << "\n";);
45373471bf0Spatrick 
45473471bf0Spatrick   InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
45573471bf0Spatrick       [&](auto Cost) { return Cost * MinRegionSizeRatio; });
45673471bf0Spatrick 
45709467b48Spatrick   BranchProbability MinBranchProbability(
45809467b48Spatrick       static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
45909467b48Spatrick       MinBlockCounterExecution);
46009467b48Spatrick   bool ColdCandidateFound = false;
46109467b48Spatrick   BasicBlock *CurrEntry = EntryBlock;
46209467b48Spatrick   std::vector<BasicBlock *> DFS;
46309467b48Spatrick   DenseMap<BasicBlock *, bool> VisitedMap;
46409467b48Spatrick   DFS.push_back(CurrEntry);
46509467b48Spatrick   VisitedMap[CurrEntry] = true;
46673471bf0Spatrick 
46709467b48Spatrick   // Use Depth First Search on the basic blocks to find CFG edges that are
46809467b48Spatrick   // considered cold.
46909467b48Spatrick   // Cold regions considered must also have its inline cost compared to the
47009467b48Spatrick   // overall inline cost of the original function.  The region is outlined only
47109467b48Spatrick   // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
47209467b48Spatrick   // more.
47309467b48Spatrick   while (!DFS.empty()) {
47473471bf0Spatrick     auto *ThisBB = DFS.back();
47509467b48Spatrick     DFS.pop_back();
47609467b48Spatrick     // Only consider regions with predecessor blocks that are considered
47709467b48Spatrick     // not-cold (default: part of the top 99.99% of all block counters)
47809467b48Spatrick     // AND greater than our minimum block execution count (default: 100).
47973471bf0Spatrick     if (PSI.isColdBlock(ThisBB, BFI) ||
48073471bf0Spatrick         BBProfileCount(ThisBB) < MinBlockCounterExecution)
48109467b48Spatrick       continue;
48273471bf0Spatrick     for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
48309467b48Spatrick       if (VisitedMap[*SI])
48409467b48Spatrick         continue;
48509467b48Spatrick       VisitedMap[*SI] = true;
48609467b48Spatrick       DFS.push_back(*SI);
48709467b48Spatrick       // If branch isn't cold, we skip to the next one.
48873471bf0Spatrick       BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
48909467b48Spatrick       if (SuccProb > MinBranchProbability)
49009467b48Spatrick         continue;
49173471bf0Spatrick 
49273471bf0Spatrick       LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
49373471bf0Spatrick                         << SI->getName()
49473471bf0Spatrick                         << "\nBranch Probability = " << SuccProb << "\n";);
49573471bf0Spatrick 
49609467b48Spatrick       SmallVector<BasicBlock *, 8> DominateVector;
49709467b48Spatrick       DT.getDescendants(*SI, DominateVector);
49873471bf0Spatrick       assert(!DominateVector.empty() &&
49973471bf0Spatrick              "SI should be reachable and have at least itself as descendant");
50073471bf0Spatrick 
50109467b48Spatrick       // We can only outline single entry regions (for now).
50273471bf0Spatrick       if (!DominateVector.front()->hasNPredecessors(1)) {
50373471bf0Spatrick         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
50473471bf0Spatrick                           << " doesn't have a single predecessor in the "
50573471bf0Spatrick                              "dominator tree\n";);
50609467b48Spatrick         continue;
50773471bf0Spatrick       }
50873471bf0Spatrick 
50909467b48Spatrick       BasicBlock *ExitBlock = nullptr;
51009467b48Spatrick       // We can only outline single exit regions (for now).
51173471bf0Spatrick       if (!(ExitBlock = IsSingleExit(DominateVector))) {
51273471bf0Spatrick         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
51373471bf0Spatrick                           << " doesn't have a unique successor\n";);
51409467b48Spatrick         continue;
51573471bf0Spatrick       }
51673471bf0Spatrick 
51773471bf0Spatrick       InstructionCost OutlineRegionCost = 0;
51809467b48Spatrick       for (auto *BB : DominateVector)
51973471bf0Spatrick         OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
52009467b48Spatrick 
52173471bf0Spatrick       LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
52273471bf0Spatrick                         << "\n";);
52309467b48Spatrick 
52473471bf0Spatrick       if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
52509467b48Spatrick         ORE.emit([&]() {
52609467b48Spatrick           return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
52709467b48Spatrick                                             &SI->front())
52873471bf0Spatrick                  << ore::NV("Callee", &F)
52973471bf0Spatrick                  << " inline cost-savings smaller than "
53009467b48Spatrick                  << ore::NV("Cost", MinOutlineRegionCost);
53109467b48Spatrick         });
53273471bf0Spatrick 
53373471bf0Spatrick         LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
53473471bf0Spatrick                           << MinOutlineRegionCost << "\n";);
53509467b48Spatrick         continue;
53609467b48Spatrick       }
53773471bf0Spatrick 
53809467b48Spatrick       // For now, ignore blocks that belong to a SISE region that is a
53909467b48Spatrick       // candidate for outlining.  In the future, we may want to look
54009467b48Spatrick       // at inner regions because the outer region may have live-exit
54109467b48Spatrick       // variables.
54209467b48Spatrick       for (auto *BB : DominateVector)
54309467b48Spatrick         VisitedMap[BB] = true;
54473471bf0Spatrick 
54509467b48Spatrick       // ReturnBlock here means the block after the outline call
54609467b48Spatrick       BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
54709467b48Spatrick       FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
54809467b48Spatrick           DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
54909467b48Spatrick       OutliningInfo->ORI.push_back(RegInfo);
55073471bf0Spatrick       LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
55173471bf0Spatrick                         << DominateVector.front()->getName() << "\n";);
55209467b48Spatrick       ColdCandidateFound = true;
55309467b48Spatrick       NumColdRegionsFound++;
55409467b48Spatrick     }
55509467b48Spatrick   }
55673471bf0Spatrick 
55709467b48Spatrick   if (ColdCandidateFound)
55809467b48Spatrick     return OutliningInfo;
55973471bf0Spatrick 
56009467b48Spatrick   return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
56109467b48Spatrick }
56209467b48Spatrick 
56309467b48Spatrick std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function & F) const56473471bf0Spatrick PartialInlinerImpl::computeOutliningInfo(Function &F) const {
56573471bf0Spatrick   BasicBlock *EntryBlock = &F.front();
56609467b48Spatrick   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
56709467b48Spatrick   if (!BR || BR->isUnconditional())
56809467b48Spatrick     return std::unique_ptr<FunctionOutliningInfo>();
56909467b48Spatrick 
57009467b48Spatrick   // Returns true if Succ is BB's successor
57109467b48Spatrick   auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
57209467b48Spatrick     return is_contained(successors(BB), Succ);
57309467b48Spatrick   };
57409467b48Spatrick 
57509467b48Spatrick   auto IsReturnBlock = [](BasicBlock *BB) {
57609467b48Spatrick     Instruction *TI = BB->getTerminator();
57709467b48Spatrick     return isa<ReturnInst>(TI);
57809467b48Spatrick   };
57909467b48Spatrick 
58009467b48Spatrick   auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
58109467b48Spatrick     if (IsReturnBlock(Succ1))
58209467b48Spatrick       return std::make_tuple(Succ1, Succ2);
58309467b48Spatrick     if (IsReturnBlock(Succ2))
58409467b48Spatrick       return std::make_tuple(Succ2, Succ1);
58509467b48Spatrick 
58609467b48Spatrick     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
58709467b48Spatrick   };
58809467b48Spatrick 
58909467b48Spatrick   // Detect a triangular shape:
59009467b48Spatrick   auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
59109467b48Spatrick     if (IsSuccessor(Succ1, Succ2))
59209467b48Spatrick       return std::make_tuple(Succ1, Succ2);
59309467b48Spatrick     if (IsSuccessor(Succ2, Succ1))
59409467b48Spatrick       return std::make_tuple(Succ2, Succ1);
59509467b48Spatrick 
59609467b48Spatrick     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
59709467b48Spatrick   };
59809467b48Spatrick 
59909467b48Spatrick   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
60009467b48Spatrick       std::make_unique<FunctionOutliningInfo>();
60109467b48Spatrick 
60209467b48Spatrick   BasicBlock *CurrEntry = EntryBlock;
60309467b48Spatrick   bool CandidateFound = false;
60409467b48Spatrick   do {
60509467b48Spatrick     // The number of blocks to be inlined has already reached
60609467b48Spatrick     // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
60709467b48Spatrick     // disables partial inlining for the function.
60873471bf0Spatrick     if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
60909467b48Spatrick       break;
61009467b48Spatrick 
61109467b48Spatrick     if (succ_size(CurrEntry) != 2)
61209467b48Spatrick       break;
61309467b48Spatrick 
61409467b48Spatrick     BasicBlock *Succ1 = *succ_begin(CurrEntry);
61509467b48Spatrick     BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
61609467b48Spatrick 
61709467b48Spatrick     BasicBlock *ReturnBlock, *NonReturnBlock;
61809467b48Spatrick     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
61909467b48Spatrick 
62009467b48Spatrick     if (ReturnBlock) {
62109467b48Spatrick       OutliningInfo->Entries.push_back(CurrEntry);
62209467b48Spatrick       OutliningInfo->ReturnBlock = ReturnBlock;
62309467b48Spatrick       OutliningInfo->NonReturnBlock = NonReturnBlock;
62409467b48Spatrick       CandidateFound = true;
62509467b48Spatrick       break;
62609467b48Spatrick     }
62709467b48Spatrick 
62873471bf0Spatrick     BasicBlock *CommSucc, *OtherSucc;
62909467b48Spatrick     std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
63009467b48Spatrick 
63109467b48Spatrick     if (!CommSucc)
63209467b48Spatrick       break;
63309467b48Spatrick 
63409467b48Spatrick     OutliningInfo->Entries.push_back(CurrEntry);
63509467b48Spatrick     CurrEntry = OtherSucc;
63609467b48Spatrick   } while (true);
63709467b48Spatrick 
63809467b48Spatrick   if (!CandidateFound)
63909467b48Spatrick     return std::unique_ptr<FunctionOutliningInfo>();
64009467b48Spatrick 
641*d415bd75Srobert   // There should not be any successors (not in the entry set) other than
64209467b48Spatrick   // {ReturnBlock, NonReturnBlock}
64373471bf0Spatrick   assert(OutliningInfo->Entries[0] == &F.front() &&
64409467b48Spatrick          "Function Entry must be the first in Entries vector");
64509467b48Spatrick   DenseSet<BasicBlock *> Entries;
64609467b48Spatrick   for (BasicBlock *E : OutliningInfo->Entries)
64709467b48Spatrick     Entries.insert(E);
64809467b48Spatrick 
64909467b48Spatrick   // Returns true of BB has Predecessor which is not
65009467b48Spatrick   // in Entries set.
65109467b48Spatrick   auto HasNonEntryPred = [Entries](BasicBlock *BB) {
65273471bf0Spatrick     for (auto *Pred : predecessors(BB)) {
65309467b48Spatrick       if (!Entries.count(Pred))
65409467b48Spatrick         return true;
65509467b48Spatrick     }
65609467b48Spatrick     return false;
65709467b48Spatrick   };
65809467b48Spatrick   auto CheckAndNormalizeCandidate =
65909467b48Spatrick       [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
66009467b48Spatrick         for (BasicBlock *E : OutliningInfo->Entries) {
66173471bf0Spatrick           for (auto *Succ : successors(E)) {
66209467b48Spatrick             if (Entries.count(Succ))
66309467b48Spatrick               continue;
66409467b48Spatrick             if (Succ == OutliningInfo->ReturnBlock)
66509467b48Spatrick               OutliningInfo->ReturnBlockPreds.push_back(E);
66609467b48Spatrick             else if (Succ != OutliningInfo->NonReturnBlock)
66709467b48Spatrick               return false;
66809467b48Spatrick           }
66909467b48Spatrick           // There should not be any outside incoming edges either:
67009467b48Spatrick           if (HasNonEntryPred(E))
67109467b48Spatrick             return false;
67209467b48Spatrick         }
67309467b48Spatrick         return true;
67409467b48Spatrick       };
67509467b48Spatrick 
67609467b48Spatrick   if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
67709467b48Spatrick     return std::unique_ptr<FunctionOutliningInfo>();
67809467b48Spatrick 
67909467b48Spatrick   // Now further growing the candidate's inlining region by
68009467b48Spatrick   // peeling off dominating blocks from the outlining region:
68173471bf0Spatrick   while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
68209467b48Spatrick     BasicBlock *Cand = OutliningInfo->NonReturnBlock;
68309467b48Spatrick     if (succ_size(Cand) != 2)
68409467b48Spatrick       break;
68509467b48Spatrick 
68609467b48Spatrick     if (HasNonEntryPred(Cand))
68709467b48Spatrick       break;
68809467b48Spatrick 
68909467b48Spatrick     BasicBlock *Succ1 = *succ_begin(Cand);
69009467b48Spatrick     BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
69109467b48Spatrick 
69209467b48Spatrick     BasicBlock *ReturnBlock, *NonReturnBlock;
69309467b48Spatrick     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
69409467b48Spatrick     if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
69509467b48Spatrick       break;
69609467b48Spatrick 
69709467b48Spatrick     if (NonReturnBlock->getSinglePredecessor() != Cand)
69809467b48Spatrick       break;
69909467b48Spatrick 
70009467b48Spatrick     // Now grow and update OutlininigInfo:
70109467b48Spatrick     OutliningInfo->Entries.push_back(Cand);
70209467b48Spatrick     OutliningInfo->NonReturnBlock = NonReturnBlock;
70309467b48Spatrick     OutliningInfo->ReturnBlockPreds.push_back(Cand);
70409467b48Spatrick     Entries.insert(Cand);
70509467b48Spatrick   }
70609467b48Spatrick 
70709467b48Spatrick   return OutliningInfo;
70809467b48Spatrick }
70909467b48Spatrick 
71009467b48Spatrick // Check if there is PGO data or user annotated branch data:
hasProfileData(const Function & F,const FunctionOutliningInfo & OI)71173471bf0Spatrick static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
71273471bf0Spatrick   if (F.hasProfileData())
71309467b48Spatrick     return true;
71409467b48Spatrick   // Now check if any of the entry block has MD_prof data:
71573471bf0Spatrick   for (auto *E : OI.Entries) {
71609467b48Spatrick     BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
71709467b48Spatrick     if (!BR || BR->isUnconditional())
71809467b48Spatrick       continue;
719*d415bd75Srobert     if (hasBranchWeightMD(*BR))
72009467b48Spatrick       return true;
72109467b48Spatrick   }
72209467b48Spatrick   return false;
72309467b48Spatrick }
72409467b48Spatrick 
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner) const72573471bf0Spatrick BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
72673471bf0Spatrick     FunctionCloner &Cloner) const {
72709467b48Spatrick   BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
72809467b48Spatrick   auto EntryFreq =
72909467b48Spatrick       Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
73009467b48Spatrick   auto OutliningCallFreq =
73109467b48Spatrick       Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
73209467b48Spatrick   // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
73309467b48Spatrick   // we outlined any regions, so we may encounter situations where the
73409467b48Spatrick   // OutliningCallFreq is *slightly* bigger than the EntryFreq.
73573471bf0Spatrick   if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
73609467b48Spatrick     OutliningCallFreq = EntryFreq;
73773471bf0Spatrick 
73809467b48Spatrick   auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
73909467b48Spatrick       OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
74009467b48Spatrick 
741*d415bd75Srobert   if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
74209467b48Spatrick     return OutlineRegionRelFreq;
74309467b48Spatrick 
74409467b48Spatrick   // When profile data is not available, we need to be conservative in
74509467b48Spatrick   // estimating the overall savings. Static branch prediction can usually
74609467b48Spatrick   // guess the branch direction right (taken/non-taken), but the guessed
74709467b48Spatrick   // branch probability is usually not biased enough. In case when the
74809467b48Spatrick   // outlined region is predicted to be likely, its probability needs
74909467b48Spatrick   // to be made higher (more biased) to not under-estimate the cost of
75009467b48Spatrick   // function outlining. On the other hand, if the outlined region
75109467b48Spatrick   // is predicted to be less likely, the predicted probablity is usually
75209467b48Spatrick   // higher than the actual. For instance, the actual probability of the
75309467b48Spatrick   // less likely target is only 5%, but the guessed probablity can be
754*d415bd75Srobert   // 40%. In the latter case, there is no need for further adjustment.
75509467b48Spatrick   // FIXME: add an option for this.
75609467b48Spatrick   if (OutlineRegionRelFreq < BranchProbability(45, 100))
75709467b48Spatrick     return OutlineRegionRelFreq;
75809467b48Spatrick 
75909467b48Spatrick   OutlineRegionRelFreq = std::max(
76009467b48Spatrick       OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
76109467b48Spatrick 
76209467b48Spatrick   return OutlineRegionRelFreq;
76309467b48Spatrick }
76409467b48Spatrick 
shouldPartialInline(CallBase & CB,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE) const76509467b48Spatrick bool PartialInlinerImpl::shouldPartialInline(
766097a140dSpatrick     CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
76773471bf0Spatrick     OptimizationRemarkEmitter &ORE) const {
76809467b48Spatrick   using namespace ore;
76909467b48Spatrick 
770097a140dSpatrick   Function *Callee = CB.getCalledFunction();
77109467b48Spatrick   assert(Callee == Cloner.ClonedFunc);
77209467b48Spatrick 
77309467b48Spatrick   if (SkipCostAnalysis)
774097a140dSpatrick     return isInlineViable(*Callee).isSuccess();
77509467b48Spatrick 
776097a140dSpatrick   Function *Caller = CB.getCaller();
777097a140dSpatrick   auto &CalleeTTI = GetTTI(*Callee);
77809467b48Spatrick   bool RemarksEnabled =
77909467b48Spatrick       Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
78009467b48Spatrick           DEBUG_TYPE);
781097a140dSpatrick   InlineCost IC =
782097a140dSpatrick       getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
783097a140dSpatrick                     GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
78409467b48Spatrick 
78509467b48Spatrick   if (IC.isAlways()) {
78609467b48Spatrick     ORE.emit([&]() {
787097a140dSpatrick       return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
78809467b48Spatrick              << NV("Callee", Cloner.OrigFunc)
78909467b48Spatrick              << " should always be fully inlined, not partially";
79009467b48Spatrick     });
79109467b48Spatrick     return false;
79209467b48Spatrick   }
79309467b48Spatrick 
79409467b48Spatrick   if (IC.isNever()) {
79509467b48Spatrick     ORE.emit([&]() {
796097a140dSpatrick       return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
79709467b48Spatrick              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
79809467b48Spatrick              << NV("Caller", Caller)
79909467b48Spatrick              << " because it should never be inlined (cost=never)";
80009467b48Spatrick     });
80109467b48Spatrick     return false;
80209467b48Spatrick   }
80309467b48Spatrick 
80409467b48Spatrick   if (!IC) {
80509467b48Spatrick     ORE.emit([&]() {
806097a140dSpatrick       return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
80709467b48Spatrick              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
80809467b48Spatrick              << NV("Caller", Caller) << " because too costly to inline (cost="
80909467b48Spatrick              << NV("Cost", IC.getCost()) << ", threshold="
81009467b48Spatrick              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
81109467b48Spatrick     });
81209467b48Spatrick     return false;
81309467b48Spatrick   }
81409467b48Spatrick   const DataLayout &DL = Caller->getParent()->getDataLayout();
81509467b48Spatrick 
81609467b48Spatrick   // The savings of eliminating the call:
817097a140dSpatrick   int NonWeightedSavings = getCallsiteCost(CB, DL);
81809467b48Spatrick   BlockFrequency NormWeightedSavings(NonWeightedSavings);
81909467b48Spatrick 
82009467b48Spatrick   // Weighted saving is smaller than weighted cost, return false
82109467b48Spatrick   if (NormWeightedSavings < WeightedOutliningRcost) {
82209467b48Spatrick     ORE.emit([&]() {
82309467b48Spatrick       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
824097a140dSpatrick                                         &CB)
82509467b48Spatrick              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
82609467b48Spatrick              << NV("Caller", Caller) << " runtime overhead (overhead="
82709467b48Spatrick              << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
82809467b48Spatrick              << ", savings="
82909467b48Spatrick              << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
83009467b48Spatrick              << ")"
83109467b48Spatrick              << " of making the outlined call is too high";
83209467b48Spatrick     });
83309467b48Spatrick 
83409467b48Spatrick     return false;
83509467b48Spatrick   }
83609467b48Spatrick 
83709467b48Spatrick   ORE.emit([&]() {
838097a140dSpatrick     return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
83909467b48Spatrick            << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
84009467b48Spatrick            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
84109467b48Spatrick            << " (threshold="
84209467b48Spatrick            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
84309467b48Spatrick   });
84409467b48Spatrick   return true;
84509467b48Spatrick }
84609467b48Spatrick 
84709467b48Spatrick // TODO: Ideally  we should share Inliner's InlineCost Analysis code.
84809467b48Spatrick // For now use a simplified version. The returned 'InlineCost' will be used
84909467b48Spatrick // to esimate the size cost as well as runtime cost of the BB.
85073471bf0Spatrick InstructionCost
computeBBInlineCost(BasicBlock * BB,TargetTransformInfo * TTI)85173471bf0Spatrick PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
85273471bf0Spatrick                                         TargetTransformInfo *TTI) {
85373471bf0Spatrick   InstructionCost InlineCost = 0;
85409467b48Spatrick   const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
855*d415bd75Srobert   int InstrCost = InlineConstants::getInstrCost();
85609467b48Spatrick   for (Instruction &I : BB->instructionsWithoutDebug()) {
85709467b48Spatrick     // Skip free instructions.
85809467b48Spatrick     switch (I.getOpcode()) {
85909467b48Spatrick     case Instruction::BitCast:
86009467b48Spatrick     case Instruction::PtrToInt:
86109467b48Spatrick     case Instruction::IntToPtr:
86209467b48Spatrick     case Instruction::Alloca:
86309467b48Spatrick     case Instruction::PHI:
86409467b48Spatrick       continue;
86509467b48Spatrick     case Instruction::GetElementPtr:
86609467b48Spatrick       if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
86709467b48Spatrick         continue;
86809467b48Spatrick       break;
86909467b48Spatrick     default:
87009467b48Spatrick       break;
87109467b48Spatrick     }
87209467b48Spatrick 
87309467b48Spatrick     if (I.isLifetimeStartOrEnd())
87409467b48Spatrick       continue;
87509467b48Spatrick 
87673471bf0Spatrick     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
87773471bf0Spatrick       Intrinsic::ID IID = II->getIntrinsicID();
87873471bf0Spatrick       SmallVector<Type *, 4> Tys;
87973471bf0Spatrick       FastMathFlags FMF;
88073471bf0Spatrick       for (Value *Val : II->args())
88173471bf0Spatrick         Tys.push_back(Val->getType());
88273471bf0Spatrick 
88373471bf0Spatrick       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
88473471bf0Spatrick         FMF = FPMO->getFastMathFlags();
88573471bf0Spatrick 
88673471bf0Spatrick       IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
88773471bf0Spatrick       InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
88873471bf0Spatrick       continue;
88973471bf0Spatrick     }
89073471bf0Spatrick 
89109467b48Spatrick     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
89209467b48Spatrick       InlineCost += getCallsiteCost(*CI, DL);
89309467b48Spatrick       continue;
89409467b48Spatrick     }
89509467b48Spatrick 
89609467b48Spatrick     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
89709467b48Spatrick       InlineCost += getCallsiteCost(*II, DL);
89809467b48Spatrick       continue;
89909467b48Spatrick     }
90009467b48Spatrick 
90109467b48Spatrick     if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
902*d415bd75Srobert       InlineCost += (SI->getNumCases() + 1) * InstrCost;
90309467b48Spatrick       continue;
90409467b48Spatrick     }
905*d415bd75Srobert     InlineCost += InstrCost;
90609467b48Spatrick   }
90773471bf0Spatrick 
90809467b48Spatrick   return InlineCost;
90909467b48Spatrick }
91009467b48Spatrick 
91173471bf0Spatrick std::tuple<InstructionCost, InstructionCost>
computeOutliningCosts(FunctionCloner & Cloner) const91273471bf0Spatrick PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
91373471bf0Spatrick   InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
91409467b48Spatrick   for (auto FuncBBPair : Cloner.OutlinedFunctions) {
91509467b48Spatrick     Function *OutlinedFunc = FuncBBPair.first;
91609467b48Spatrick     BasicBlock* OutliningCallBB = FuncBBPair.second;
91709467b48Spatrick     // Now compute the cost of the call sequence to the outlined function
91809467b48Spatrick     // 'OutlinedFunction' in BB 'OutliningCallBB':
91973471bf0Spatrick     auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
92073471bf0Spatrick     OutliningFuncCallCost +=
92173471bf0Spatrick         computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
92209467b48Spatrick 
92309467b48Spatrick     // Now compute the cost of the extracted/outlined function itself:
92409467b48Spatrick     for (BasicBlock &BB : *OutlinedFunc)
92573471bf0Spatrick       OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
92609467b48Spatrick   }
92709467b48Spatrick   assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
92809467b48Spatrick          "Outlined function cost should be no less than the outlined region");
92909467b48Spatrick 
93009467b48Spatrick   // The code extractor introduces a new root and exit stub blocks with
93109467b48Spatrick   // additional unconditional branches. Those branches will be eliminated
93209467b48Spatrick   // later with bb layout. The cost should be adjusted accordingly:
93309467b48Spatrick   OutlinedFunctionCost -=
934*d415bd75Srobert       2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
93509467b48Spatrick 
93673471bf0Spatrick   InstructionCost OutliningRuntimeOverhead =
93709467b48Spatrick       OutliningFuncCallCost +
93809467b48Spatrick       (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
93973471bf0Spatrick       ExtraOutliningPenalty.getValue();
94009467b48Spatrick 
94109467b48Spatrick   return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
94209467b48Spatrick }
94309467b48Spatrick 
94409467b48Spatrick // Create the callsite to profile count map which is
94509467b48Spatrick // used to update the original function's entry count,
94609467b48Spatrick // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap) const94709467b48Spatrick void PartialInlinerImpl::computeCallsiteToProfCountMap(
94809467b48Spatrick     Function *DuplicateFunction,
94973471bf0Spatrick     DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
95009467b48Spatrick   std::vector<User *> Users(DuplicateFunction->user_begin(),
95109467b48Spatrick                             DuplicateFunction->user_end());
95209467b48Spatrick   Function *CurrentCaller = nullptr;
95309467b48Spatrick   std::unique_ptr<BlockFrequencyInfo> TempBFI;
95409467b48Spatrick   BlockFrequencyInfo *CurrentCallerBFI = nullptr;
95509467b48Spatrick 
95609467b48Spatrick   auto ComputeCurrBFI = [&,this](Function *Caller) {
95709467b48Spatrick       // For the old pass manager:
95809467b48Spatrick       if (!GetBFI) {
95909467b48Spatrick         DominatorTree DT(*Caller);
96009467b48Spatrick         LoopInfo LI(DT);
96109467b48Spatrick         BranchProbabilityInfo BPI(*Caller, LI);
96209467b48Spatrick         TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
96309467b48Spatrick         CurrentCallerBFI = TempBFI.get();
96409467b48Spatrick       } else {
96509467b48Spatrick         // New pass manager:
966097a140dSpatrick         CurrentCallerBFI = &(GetBFI(*Caller));
96709467b48Spatrick       }
96809467b48Spatrick   };
96909467b48Spatrick 
97009467b48Spatrick   for (User *User : Users) {
971*d415bd75Srobert     // Don't bother with BlockAddress used by CallBr for asm goto.
972*d415bd75Srobert     if (isa<BlockAddress>(User))
973*d415bd75Srobert       continue;
974097a140dSpatrick     CallBase *CB = getSupportedCallBase(User);
975097a140dSpatrick     Function *Caller = CB->getCaller();
97609467b48Spatrick     if (CurrentCaller != Caller) {
97709467b48Spatrick       CurrentCaller = Caller;
97809467b48Spatrick       ComputeCurrBFI(Caller);
97909467b48Spatrick     } else {
98009467b48Spatrick       assert(CurrentCallerBFI && "CallerBFI is not set");
98109467b48Spatrick     }
982097a140dSpatrick     BasicBlock *CallBB = CB->getParent();
98309467b48Spatrick     auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
98409467b48Spatrick     if (Count)
98509467b48Spatrick       CallSiteToProfCountMap[User] = *Count;
98609467b48Spatrick     else
98709467b48Spatrick       CallSiteToProfCountMap[User] = 0;
98809467b48Spatrick   }
98909467b48Spatrick }
99009467b48Spatrick 
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)99109467b48Spatrick PartialInlinerImpl::FunctionCloner::FunctionCloner(
99209467b48Spatrick     Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
99373471bf0Spatrick     function_ref<AssumptionCache *(Function &)> LookupAC,
99473471bf0Spatrick     function_ref<TargetTransformInfo &(Function &)> GetTTI)
99573471bf0Spatrick     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
99609467b48Spatrick   ClonedOI = std::make_unique<FunctionOutliningInfo>();
99709467b48Spatrick 
99809467b48Spatrick   // Clone the function, so that we can hack away on it.
99909467b48Spatrick   ValueToValueMapTy VMap;
100009467b48Spatrick   ClonedFunc = CloneFunction(F, VMap);
100109467b48Spatrick 
100209467b48Spatrick   ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
100309467b48Spatrick   ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
100473471bf0Spatrick   for (BasicBlock *BB : OI->Entries)
100509467b48Spatrick     ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
100673471bf0Spatrick 
100709467b48Spatrick   for (BasicBlock *E : OI->ReturnBlockPreds) {
100809467b48Spatrick     BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
100909467b48Spatrick     ClonedOI->ReturnBlockPreds.push_back(NewE);
101009467b48Spatrick   }
101109467b48Spatrick   // Go ahead and update all uses to the duplicate, so that we can just
101209467b48Spatrick   // use the inliner functionality when we're done hacking.
101309467b48Spatrick   F->replaceAllUsesWith(ClonedFunc);
101409467b48Spatrick }
101509467b48Spatrick 
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)101609467b48Spatrick PartialInlinerImpl::FunctionCloner::FunctionCloner(
101709467b48Spatrick     Function *F, FunctionOutliningMultiRegionInfo *OI,
101809467b48Spatrick     OptimizationRemarkEmitter &ORE,
101973471bf0Spatrick     function_ref<AssumptionCache *(Function &)> LookupAC,
102073471bf0Spatrick     function_ref<TargetTransformInfo &(Function &)> GetTTI)
102173471bf0Spatrick     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
102209467b48Spatrick   ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
102309467b48Spatrick 
102409467b48Spatrick   // Clone the function, so that we can hack away on it.
102509467b48Spatrick   ValueToValueMapTy VMap;
102609467b48Spatrick   ClonedFunc = CloneFunction(F, VMap);
102709467b48Spatrick 
102809467b48Spatrick   // Go through all Outline Candidate Regions and update all BasicBlock
102909467b48Spatrick   // information.
103009467b48Spatrick   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
103109467b48Spatrick        OI->ORI) {
103209467b48Spatrick     SmallVector<BasicBlock *, 8> Region;
103373471bf0Spatrick     for (BasicBlock *BB : RegionInfo.Region)
103409467b48Spatrick       Region.push_back(cast<BasicBlock>(VMap[BB]));
103573471bf0Spatrick 
103609467b48Spatrick     BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
103709467b48Spatrick     BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
103809467b48Spatrick     BasicBlock *NewReturnBlock = nullptr;
103909467b48Spatrick     if (RegionInfo.ReturnBlock)
104009467b48Spatrick       NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
104109467b48Spatrick     FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
104209467b48Spatrick         Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
104309467b48Spatrick     ClonedOMRI->ORI.push_back(MappedRegionInfo);
104409467b48Spatrick   }
104509467b48Spatrick   // Go ahead and update all uses to the duplicate, so that we can just
104609467b48Spatrick   // use the inliner functionality when we're done hacking.
104709467b48Spatrick   F->replaceAllUsesWith(ClonedFunc);
104809467b48Spatrick }
104909467b48Spatrick 
normalizeReturnBlock() const105073471bf0Spatrick void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
105173471bf0Spatrick   auto GetFirstPHI = [](BasicBlock *BB) {
105209467b48Spatrick     BasicBlock::iterator I = BB->begin();
105309467b48Spatrick     PHINode *FirstPhi = nullptr;
105409467b48Spatrick     while (I != BB->end()) {
105509467b48Spatrick       PHINode *Phi = dyn_cast<PHINode>(I);
105609467b48Spatrick       if (!Phi)
105709467b48Spatrick         break;
105809467b48Spatrick       if (!FirstPhi) {
105909467b48Spatrick         FirstPhi = Phi;
106009467b48Spatrick         break;
106109467b48Spatrick       }
106209467b48Spatrick     }
106309467b48Spatrick     return FirstPhi;
106409467b48Spatrick   };
106509467b48Spatrick 
106609467b48Spatrick   // Shouldn't need to normalize PHIs if we're not outlining non-early return
106709467b48Spatrick   // blocks.
106809467b48Spatrick   if (!ClonedOI)
106909467b48Spatrick     return;
107009467b48Spatrick 
107109467b48Spatrick   // Special hackery is needed with PHI nodes that have inputs from more than
107209467b48Spatrick   // one extracted block.  For simplicity, just split the PHIs into a two-level
107309467b48Spatrick   // sequence of PHIs, some of which will go in the extracted region, and some
107409467b48Spatrick   // of which will go outside.
107509467b48Spatrick   BasicBlock *PreReturn = ClonedOI->ReturnBlock;
107609467b48Spatrick   // only split block when necessary:
107773471bf0Spatrick   PHINode *FirstPhi = GetFirstPHI(PreReturn);
107809467b48Spatrick   unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
107909467b48Spatrick 
108009467b48Spatrick   if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
108109467b48Spatrick     return;
108209467b48Spatrick 
108309467b48Spatrick   auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1084*d415bd75Srobert     if (llvm::all_equal(PN->incoming_values()))
1085*d415bd75Srobert       return PN->getIncomingValue(0);
108609467b48Spatrick     return nullptr;
108709467b48Spatrick   };
108809467b48Spatrick 
108909467b48Spatrick   ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
109009467b48Spatrick       ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
109109467b48Spatrick   BasicBlock::iterator I = PreReturn->begin();
109209467b48Spatrick   Instruction *Ins = &ClonedOI->ReturnBlock->front();
109309467b48Spatrick   SmallVector<Instruction *, 4> DeadPhis;
109409467b48Spatrick   while (I != PreReturn->end()) {
109509467b48Spatrick     PHINode *OldPhi = dyn_cast<PHINode>(I);
109609467b48Spatrick     if (!OldPhi)
109709467b48Spatrick       break;
109809467b48Spatrick 
109909467b48Spatrick     PHINode *RetPhi =
110009467b48Spatrick         PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
110109467b48Spatrick     OldPhi->replaceAllUsesWith(RetPhi);
110209467b48Spatrick     Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
110309467b48Spatrick 
110409467b48Spatrick     RetPhi->addIncoming(&*I, PreReturn);
110509467b48Spatrick     for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
110609467b48Spatrick       RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
110709467b48Spatrick       OldPhi->removeIncomingValue(E);
110809467b48Spatrick     }
110909467b48Spatrick 
111009467b48Spatrick     // After incoming values splitting, the old phi may become trivial.
111109467b48Spatrick     // Keeping the trivial phi can introduce definition inside the outline
111209467b48Spatrick     // region which is live-out, causing necessary overhead (load, store
111309467b48Spatrick     // arg passing etc).
111409467b48Spatrick     if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
111509467b48Spatrick       OldPhi->replaceAllUsesWith(OldPhiVal);
111609467b48Spatrick       DeadPhis.push_back(OldPhi);
111709467b48Spatrick     }
111809467b48Spatrick     ++I;
111909467b48Spatrick   }
112009467b48Spatrick   for (auto *DP : DeadPhis)
112109467b48Spatrick     DP->eraseFromParent();
112209467b48Spatrick 
112373471bf0Spatrick   for (auto *E : ClonedOI->ReturnBlockPreds)
112409467b48Spatrick     E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
112509467b48Spatrick }
112609467b48Spatrick 
doMultiRegionFunctionOutlining()112709467b48Spatrick bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
112809467b48Spatrick 
112973471bf0Spatrick   auto ComputeRegionCost =
113073471bf0Spatrick       [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
113173471bf0Spatrick     InstructionCost Cost = 0;
113209467b48Spatrick     for (BasicBlock* BB : Region)
113373471bf0Spatrick       Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
113409467b48Spatrick     return Cost;
113509467b48Spatrick   };
113609467b48Spatrick 
113709467b48Spatrick   assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
113809467b48Spatrick 
113909467b48Spatrick   if (ClonedOMRI->ORI.empty())
114009467b48Spatrick     return false;
114109467b48Spatrick 
114209467b48Spatrick   // The CodeExtractor needs a dominator tree.
114309467b48Spatrick   DominatorTree DT;
114409467b48Spatrick   DT.recalculate(*ClonedFunc);
114509467b48Spatrick 
114609467b48Spatrick   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
114709467b48Spatrick   LoopInfo LI(DT);
114809467b48Spatrick   BranchProbabilityInfo BPI(*ClonedFunc, LI);
114909467b48Spatrick   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
115009467b48Spatrick 
115109467b48Spatrick   // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
115209467b48Spatrick   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
115309467b48Spatrick 
115409467b48Spatrick   SetVector<Value *> Inputs, Outputs, Sinks;
115509467b48Spatrick   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
115609467b48Spatrick        ClonedOMRI->ORI) {
115773471bf0Spatrick     InstructionCost CurrentOutlinedRegionCost =
115873471bf0Spatrick         ComputeRegionCost(RegionInfo.Region);
115909467b48Spatrick 
116009467b48Spatrick     CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
116109467b48Spatrick                      ClonedFuncBFI.get(), &BPI,
116209467b48Spatrick                      LookupAC(*RegionInfo.EntryBlock->getParent()),
116309467b48Spatrick                      /* AllowVarargs */ false);
116409467b48Spatrick 
116509467b48Spatrick     CE.findInputsOutputs(Inputs, Outputs, Sinks);
116609467b48Spatrick 
116773471bf0Spatrick     LLVM_DEBUG({
116809467b48Spatrick       dbgs() << "inputs: " << Inputs.size() << "\n";
116909467b48Spatrick       dbgs() << "outputs: " << Outputs.size() << "\n";
117009467b48Spatrick       for (Value *value : Inputs)
117109467b48Spatrick         dbgs() << "value used in func: " << *value << "\n";
117209467b48Spatrick       for (Value *output : Outputs)
117309467b48Spatrick         dbgs() << "instr used in func: " << *output << "\n";
117473471bf0Spatrick     });
117573471bf0Spatrick 
117609467b48Spatrick     // Do not extract regions that have live exit variables.
117709467b48Spatrick     if (Outputs.size() > 0 && !ForceLiveExit)
117809467b48Spatrick       continue;
117909467b48Spatrick 
118073471bf0Spatrick     if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
118173471bf0Spatrick       CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1182097a140dSpatrick       BasicBlock *OutliningCallBB = OCS->getParent();
118309467b48Spatrick       assert(OutliningCallBB->getParent() == ClonedFunc);
118409467b48Spatrick       OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
118509467b48Spatrick       NumColdRegionsOutlined++;
118609467b48Spatrick       OutlinedRegionCost += CurrentOutlinedRegionCost;
118709467b48Spatrick 
118809467b48Spatrick       if (MarkOutlinedColdCC) {
118909467b48Spatrick         OutlinedFunc->setCallingConv(CallingConv::Cold);
1190097a140dSpatrick         OCS->setCallingConv(CallingConv::Cold);
119109467b48Spatrick       }
119209467b48Spatrick     } else
119309467b48Spatrick       ORE.emit([&]() {
119409467b48Spatrick         return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
119509467b48Spatrick                                         &RegionInfo.Region.front()->front())
119609467b48Spatrick                << "Failed to extract region at block "
119709467b48Spatrick                << ore::NV("Block", RegionInfo.Region.front());
119809467b48Spatrick       });
119909467b48Spatrick   }
120009467b48Spatrick 
120109467b48Spatrick   return !OutlinedFunctions.empty();
120209467b48Spatrick }
120309467b48Spatrick 
120409467b48Spatrick Function *
doSingleRegionFunctionOutlining()120509467b48Spatrick PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
120609467b48Spatrick   // Returns true if the block is to be partial inlined into the caller
120709467b48Spatrick   // (i.e. not to be extracted to the out of line function)
120809467b48Spatrick   auto ToBeInlined = [&, this](BasicBlock *BB) {
120909467b48Spatrick     return BB == ClonedOI->ReturnBlock ||
121073471bf0Spatrick            llvm::is_contained(ClonedOI->Entries, BB);
121109467b48Spatrick   };
121209467b48Spatrick 
121309467b48Spatrick   assert(ClonedOI && "Expecting OutlineInfo for single region outline");
121409467b48Spatrick   // The CodeExtractor needs a dominator tree.
121509467b48Spatrick   DominatorTree DT;
121609467b48Spatrick   DT.recalculate(*ClonedFunc);
121709467b48Spatrick 
121809467b48Spatrick   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
121909467b48Spatrick   LoopInfo LI(DT);
122009467b48Spatrick   BranchProbabilityInfo BPI(*ClonedFunc, LI);
122109467b48Spatrick   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
122209467b48Spatrick 
122309467b48Spatrick   // Gather up the blocks that we're going to extract.
122409467b48Spatrick   std::vector<BasicBlock *> ToExtract;
122573471bf0Spatrick   auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
122609467b48Spatrick   ToExtract.push_back(ClonedOI->NonReturnBlock);
122773471bf0Spatrick   OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
122873471bf0Spatrick       ClonedOI->NonReturnBlock, ClonedFuncTTI);
122909467b48Spatrick   for (BasicBlock &BB : *ClonedFunc)
123009467b48Spatrick     if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
123109467b48Spatrick       ToExtract.push_back(&BB);
123209467b48Spatrick       // FIXME: the code extractor may hoist/sink more code
123309467b48Spatrick       // into the outlined function which may make the outlining
123409467b48Spatrick       // overhead (the difference of the outlined function cost
123509467b48Spatrick       // and OutliningRegionCost) look larger.
123673471bf0Spatrick       OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
123709467b48Spatrick     }
123809467b48Spatrick 
123909467b48Spatrick   // Extract the body of the if.
124009467b48Spatrick   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
124109467b48Spatrick   Function *OutlinedFunc =
124209467b48Spatrick       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
124309467b48Spatrick                     ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
124409467b48Spatrick                     /* AllowVarargs */ true)
124509467b48Spatrick           .extractCodeRegion(CEAC);
124609467b48Spatrick 
124709467b48Spatrick   if (OutlinedFunc) {
124809467b48Spatrick     BasicBlock *OutliningCallBB =
124973471bf0Spatrick         PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
125009467b48Spatrick     assert(OutliningCallBB->getParent() == ClonedFunc);
125109467b48Spatrick     OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
125209467b48Spatrick   } else
125309467b48Spatrick     ORE.emit([&]() {
125409467b48Spatrick       return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
125509467b48Spatrick                                       &ToExtract.front()->front())
125609467b48Spatrick              << "Failed to extract region at block "
125709467b48Spatrick              << ore::NV("Block", ToExtract.front());
125809467b48Spatrick     });
125909467b48Spatrick 
126009467b48Spatrick   return OutlinedFunc;
126109467b48Spatrick }
126209467b48Spatrick 
~FunctionCloner()126309467b48Spatrick PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
126409467b48Spatrick   // Ditch the duplicate, since we're done with it, and rewrite all remaining
126509467b48Spatrick   // users (function pointers, etc.) back to the original function.
126609467b48Spatrick   ClonedFunc->replaceAllUsesWith(OrigFunc);
126709467b48Spatrick   ClonedFunc->eraseFromParent();
126809467b48Spatrick   if (!IsFunctionInlined) {
126909467b48Spatrick     // Remove each function that was speculatively created if there is no
127009467b48Spatrick     // reference.
127109467b48Spatrick     for (auto FuncBBPair : OutlinedFunctions) {
127209467b48Spatrick       Function *Func = FuncBBPair.first;
127309467b48Spatrick       Func->eraseFromParent();
127409467b48Spatrick     }
127509467b48Spatrick   }
127609467b48Spatrick }
127709467b48Spatrick 
unswitchFunction(Function & F)127873471bf0Spatrick std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
127973471bf0Spatrick   if (F.hasAddressTaken())
128009467b48Spatrick     return {false, nullptr};
128109467b48Spatrick 
128209467b48Spatrick   // Let inliner handle it
128373471bf0Spatrick   if (F.hasFnAttribute(Attribute::AlwaysInline))
128409467b48Spatrick     return {false, nullptr};
128509467b48Spatrick 
128673471bf0Spatrick   if (F.hasFnAttribute(Attribute::NoInline))
128709467b48Spatrick     return {false, nullptr};
128809467b48Spatrick 
128973471bf0Spatrick   if (PSI.isFunctionEntryCold(&F))
129009467b48Spatrick     return {false, nullptr};
129109467b48Spatrick 
129273471bf0Spatrick   if (F.users().empty())
129309467b48Spatrick     return {false, nullptr};
129409467b48Spatrick 
129573471bf0Spatrick   OptimizationRemarkEmitter ORE(&F);
129609467b48Spatrick 
129709467b48Spatrick   // Only try to outline cold regions if we have a profile summary, which
129809467b48Spatrick   // implies we have profiling information.
129973471bf0Spatrick   if (PSI.hasProfileSummary() && F.hasProfileData() &&
130009467b48Spatrick       !DisableMultiRegionPartialInline) {
130109467b48Spatrick     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
130209467b48Spatrick         computeOutliningColdRegionsInfo(F, ORE);
130309467b48Spatrick     if (OMRI) {
130473471bf0Spatrick       FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
130509467b48Spatrick 
130673471bf0Spatrick       LLVM_DEBUG({
1307097a140dSpatrick         dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1308097a140dSpatrick         dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
130909467b48Spatrick                << "\n";
131073471bf0Spatrick       });
131173471bf0Spatrick 
131209467b48Spatrick       bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
131309467b48Spatrick 
131409467b48Spatrick       if (DidOutline) {
131573471bf0Spatrick         LLVM_DEBUG({
131609467b48Spatrick           dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
131709467b48Spatrick           Cloner.ClonedFunc->print(dbgs());
131809467b48Spatrick           dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
131973471bf0Spatrick         });
132009467b48Spatrick 
132109467b48Spatrick         if (tryPartialInline(Cloner))
132209467b48Spatrick           return {true, nullptr};
132309467b48Spatrick       }
132409467b48Spatrick     }
132509467b48Spatrick   }
132609467b48Spatrick 
132709467b48Spatrick   // Fall-thru to regular partial inlining if we:
132809467b48Spatrick   //    i) can't find any cold regions to outline, or
132909467b48Spatrick   //   ii) can't inline the outlined function anywhere.
133009467b48Spatrick   std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
133109467b48Spatrick   if (!OI)
133209467b48Spatrick     return {false, nullptr};
133309467b48Spatrick 
133473471bf0Spatrick   FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
133573471bf0Spatrick   Cloner.normalizeReturnBlock();
133609467b48Spatrick 
133709467b48Spatrick   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
133809467b48Spatrick 
133909467b48Spatrick   if (!OutlinedFunction)
134009467b48Spatrick     return {false, nullptr};
134109467b48Spatrick 
134273471bf0Spatrick   if (tryPartialInline(Cloner))
134309467b48Spatrick     return {true, OutlinedFunction};
134409467b48Spatrick 
134509467b48Spatrick   return {false, nullptr};
134609467b48Spatrick }
134709467b48Spatrick 
tryPartialInline(FunctionCloner & Cloner)134809467b48Spatrick bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
134909467b48Spatrick   if (Cloner.OutlinedFunctions.empty())
135009467b48Spatrick     return false;
135109467b48Spatrick 
135273471bf0Spatrick   auto OutliningCosts = computeOutliningCosts(Cloner);
135373471bf0Spatrick 
1354*d415bd75Srobert   InstructionCost SizeCost = std::get<0>(OutliningCosts);
1355*d415bd75Srobert   InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1356*d415bd75Srobert 
1357*d415bd75Srobert   assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1358*d415bd75Srobert          "Expected valid costs");
135909467b48Spatrick 
136009467b48Spatrick   // Only calculate RelativeToEntryFreq when we are doing single region
136109467b48Spatrick   // outlining.
136209467b48Spatrick   BranchProbability RelativeToEntryFreq;
136373471bf0Spatrick   if (Cloner.ClonedOI)
136409467b48Spatrick     RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
136573471bf0Spatrick   else
136609467b48Spatrick     // RelativeToEntryFreq doesn't make sense when we have more than one
136709467b48Spatrick     // outlined call because each call will have a different relative frequency
136809467b48Spatrick     // to the entry block.  We can consider using the average, but the
136909467b48Spatrick     // usefulness of that information is questionable. For now, assume we never
137009467b48Spatrick     // execute the calls to outlined functions.
137109467b48Spatrick     RelativeToEntryFreq = BranchProbability(0, 1);
137209467b48Spatrick 
1373*d415bd75Srobert   BlockFrequency WeightedRcost =
1374*d415bd75Srobert       BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
137509467b48Spatrick 
137609467b48Spatrick   // The call sequence(s) to the outlined function(s) are larger than the sum of
137709467b48Spatrick   // the original outlined region size(s), it does not increase the chances of
137809467b48Spatrick   // inlining the function with outlining (The inliner uses the size increase to
137909467b48Spatrick   // model the cost of inlining a callee).
138009467b48Spatrick   if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
138109467b48Spatrick     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
138209467b48Spatrick     DebugLoc DLoc;
138309467b48Spatrick     BasicBlock *Block;
138473471bf0Spatrick     std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
138509467b48Spatrick     OrigFuncORE.emit([&]() {
138609467b48Spatrick       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
138709467b48Spatrick                                         DLoc, Block)
138809467b48Spatrick              << ore::NV("Function", Cloner.OrigFunc)
138909467b48Spatrick              << " not partially inlined into callers (Original Size = "
139009467b48Spatrick              << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
139109467b48Spatrick              << ", Size of call sequence to outlined function = "
139209467b48Spatrick              << ore::NV("NewSize", SizeCost) << ")";
139309467b48Spatrick     });
139409467b48Spatrick     return false;
139509467b48Spatrick   }
139609467b48Spatrick 
139709467b48Spatrick   assert(Cloner.OrigFunc->users().empty() &&
139809467b48Spatrick          "F's users should all be replaced!");
139909467b48Spatrick 
140009467b48Spatrick   std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
140109467b48Spatrick                             Cloner.ClonedFunc->user_end());
140209467b48Spatrick 
140309467b48Spatrick   DenseMap<User *, uint64_t> CallSiteToProfCountMap;
140409467b48Spatrick   auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
140509467b48Spatrick   if (CalleeEntryCount)
140609467b48Spatrick     computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
140709467b48Spatrick 
140809467b48Spatrick   uint64_t CalleeEntryCountV =
1409*d415bd75Srobert       (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
141009467b48Spatrick 
141109467b48Spatrick   bool AnyInline = false;
141209467b48Spatrick   for (User *User : Users) {
1413*d415bd75Srobert     // Don't bother with BlockAddress used by CallBr for asm goto.
1414*d415bd75Srobert     if (isa<BlockAddress>(User))
1415*d415bd75Srobert       continue;
1416*d415bd75Srobert 
1417097a140dSpatrick     CallBase *CB = getSupportedCallBase(User);
141809467b48Spatrick 
141973471bf0Spatrick     if (isLimitReached())
142009467b48Spatrick       continue;
142109467b48Spatrick 
1422097a140dSpatrick     OptimizationRemarkEmitter CallerORE(CB->getCaller());
1423097a140dSpatrick     if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
142409467b48Spatrick       continue;
142509467b48Spatrick 
142609467b48Spatrick     // Construct remark before doing the inlining, as after successful inlining
142709467b48Spatrick     // the callsite is removed.
1428097a140dSpatrick     OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
142909467b48Spatrick     OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1430097a140dSpatrick        << ore::NV("Caller", CB->getCaller());
143109467b48Spatrick 
1432097a140dSpatrick     InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
143309467b48Spatrick     // We can only forward varargs when we outlined a single region, else we
143409467b48Spatrick     // bail on vararg functions.
1435*d415bd75Srobert     if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
143609467b48Spatrick                         (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1437097a140dSpatrick                                          : nullptr))
1438097a140dSpatrick              .isSuccess())
143909467b48Spatrick       continue;
144009467b48Spatrick 
144109467b48Spatrick     CallerORE.emit(OR);
144209467b48Spatrick 
144309467b48Spatrick     // Now update the entry count:
144409467b48Spatrick     if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
144509467b48Spatrick       uint64_t CallSiteCount = CallSiteToProfCountMap[User];
144609467b48Spatrick       CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
144709467b48Spatrick     }
144809467b48Spatrick 
144909467b48Spatrick     AnyInline = true;
145009467b48Spatrick     NumPartialInlining++;
145109467b48Spatrick     // Update the stats
145209467b48Spatrick     if (Cloner.ClonedOI)
145309467b48Spatrick       NumPartialInlined++;
145409467b48Spatrick     else
145509467b48Spatrick       NumColdOutlinePartialInlined++;
145609467b48Spatrick   }
145709467b48Spatrick 
145809467b48Spatrick   if (AnyInline) {
145909467b48Spatrick     Cloner.IsFunctionInlined = true;
146009467b48Spatrick     if (CalleeEntryCount)
1461*d415bd75Srobert       Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1462*d415bd75Srobert           CalleeEntryCountV, CalleeEntryCount->getType()));
146309467b48Spatrick     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
146409467b48Spatrick     OrigFuncORE.emit([&]() {
146509467b48Spatrick       return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
146609467b48Spatrick              << "Partially inlined into at least one caller";
146709467b48Spatrick     });
146809467b48Spatrick   }
146909467b48Spatrick 
147009467b48Spatrick   return AnyInline;
147109467b48Spatrick }
147209467b48Spatrick 
run(Module & M)147309467b48Spatrick bool PartialInlinerImpl::run(Module &M) {
147409467b48Spatrick   if (DisablePartialInlining)
147509467b48Spatrick     return false;
147609467b48Spatrick 
147709467b48Spatrick   std::vector<Function *> Worklist;
147809467b48Spatrick   Worklist.reserve(M.size());
147909467b48Spatrick   for (Function &F : M)
148009467b48Spatrick     if (!F.use_empty() && !F.isDeclaration())
148109467b48Spatrick       Worklist.push_back(&F);
148209467b48Spatrick 
148309467b48Spatrick   bool Changed = false;
148409467b48Spatrick   while (!Worklist.empty()) {
148509467b48Spatrick     Function *CurrFunc = Worklist.back();
148609467b48Spatrick     Worklist.pop_back();
148709467b48Spatrick 
148809467b48Spatrick     if (CurrFunc->use_empty())
148909467b48Spatrick       continue;
149009467b48Spatrick 
149173471bf0Spatrick     std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
149209467b48Spatrick     if (Result.second)
149309467b48Spatrick       Worklist.push_back(Result.second);
149409467b48Spatrick     Changed |= Result.first;
149509467b48Spatrick   }
149609467b48Spatrick 
149709467b48Spatrick   return Changed;
149809467b48Spatrick }
149909467b48Spatrick 
150009467b48Spatrick char PartialInlinerLegacyPass::ID = 0;
150109467b48Spatrick 
150209467b48Spatrick INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
150309467b48Spatrick                       "Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)150409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
150509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
150609467b48Spatrick INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1507097a140dSpatrick INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
150809467b48Spatrick INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
150909467b48Spatrick                     "Partial Inliner", false, false)
151009467b48Spatrick 
151109467b48Spatrick ModulePass *llvm::createPartialInliningPass() {
151209467b48Spatrick   return new PartialInlinerLegacyPass();
151309467b48Spatrick }
151409467b48Spatrick 
run(Module & M,ModuleAnalysisManager & AM)151509467b48Spatrick PreservedAnalyses PartialInlinerPass::run(Module &M,
151609467b48Spatrick                                           ModuleAnalysisManager &AM) {
151709467b48Spatrick   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
151809467b48Spatrick 
1519097a140dSpatrick   auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
152009467b48Spatrick     return FAM.getResult<AssumptionAnalysis>(F);
152109467b48Spatrick   };
152209467b48Spatrick 
152309467b48Spatrick   auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
152409467b48Spatrick     return FAM.getCachedResult<AssumptionAnalysis>(F);
152509467b48Spatrick   };
152609467b48Spatrick 
1527097a140dSpatrick   auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
152809467b48Spatrick     return FAM.getResult<BlockFrequencyAnalysis>(F);
152909467b48Spatrick   };
153009467b48Spatrick 
1531097a140dSpatrick   auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
153209467b48Spatrick     return FAM.getResult<TargetIRAnalysis>(F);
153309467b48Spatrick   };
153409467b48Spatrick 
1535097a140dSpatrick   auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1536097a140dSpatrick     return FAM.getResult<TargetLibraryAnalysis>(F);
1537097a140dSpatrick   };
153809467b48Spatrick 
1539097a140dSpatrick   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
1540097a140dSpatrick 
1541097a140dSpatrick   if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1542097a140dSpatrick                          GetTLI, PSI, GetBFI)
154309467b48Spatrick           .run(M))
154409467b48Spatrick     return PreservedAnalyses::none();
154509467b48Spatrick   return PreservedAnalyses::all();
154609467b48Spatrick }
1547