109467b48Spatrick //===- PartialInlining.cpp - Inline parts of functions --------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This pass performs partial inlining, typically by inlining an if statement
1009467b48Spatrick // that surrounds the body of the function.
1109467b48Spatrick //
1209467b48Spatrick //===----------------------------------------------------------------------===//
1309467b48Spatrick
1409467b48Spatrick #include "llvm/Transforms/IPO/PartialInlining.h"
1509467b48Spatrick #include "llvm/ADT/DenseMap.h"
1609467b48Spatrick #include "llvm/ADT/DenseSet.h"
1709467b48Spatrick #include "llvm/ADT/STLExtras.h"
1809467b48Spatrick #include "llvm/ADT/SmallVector.h"
1909467b48Spatrick #include "llvm/ADT/Statistic.h"
2009467b48Spatrick #include "llvm/Analysis/BlockFrequencyInfo.h"
2109467b48Spatrick #include "llvm/Analysis/BranchProbabilityInfo.h"
2209467b48Spatrick #include "llvm/Analysis/InlineCost.h"
2309467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
2409467b48Spatrick #include "llvm/Analysis/OptimizationRemarkEmitter.h"
2509467b48Spatrick #include "llvm/Analysis/ProfileSummaryInfo.h"
2609467b48Spatrick #include "llvm/Analysis/TargetLibraryInfo.h"
2709467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
2809467b48Spatrick #include "llvm/IR/Attributes.h"
2909467b48Spatrick #include "llvm/IR/BasicBlock.h"
3009467b48Spatrick #include "llvm/IR/CFG.h"
3109467b48Spatrick #include "llvm/IR/DebugLoc.h"
3209467b48Spatrick #include "llvm/IR/DiagnosticInfo.h"
3309467b48Spatrick #include "llvm/IR/Dominators.h"
3409467b48Spatrick #include "llvm/IR/Function.h"
3509467b48Spatrick #include "llvm/IR/InstrTypes.h"
3609467b48Spatrick #include "llvm/IR/Instruction.h"
3709467b48Spatrick #include "llvm/IR/Instructions.h"
3809467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
3909467b48Spatrick #include "llvm/IR/Intrinsics.h"
4009467b48Spatrick #include "llvm/IR/Module.h"
41*d415bd75Srobert #include "llvm/IR/Operator.h"
42*d415bd75Srobert #include "llvm/IR/ProfDataUtils.h"
4309467b48Spatrick #include "llvm/IR/User.h"
4409467b48Spatrick #include "llvm/InitializePasses.h"
4509467b48Spatrick #include "llvm/Pass.h"
4609467b48Spatrick #include "llvm/Support/BlockFrequency.h"
4709467b48Spatrick #include "llvm/Support/BranchProbability.h"
4809467b48Spatrick #include "llvm/Support/Casting.h"
4909467b48Spatrick #include "llvm/Support/CommandLine.h"
5009467b48Spatrick #include "llvm/Support/ErrorHandling.h"
5109467b48Spatrick #include "llvm/Transforms/IPO.h"
5209467b48Spatrick #include "llvm/Transforms/Utils/Cloning.h"
5309467b48Spatrick #include "llvm/Transforms/Utils/CodeExtractor.h"
5409467b48Spatrick #include "llvm/Transforms/Utils/ValueMapper.h"
5509467b48Spatrick #include <algorithm>
5609467b48Spatrick #include <cassert>
5709467b48Spatrick #include <cstdint>
5809467b48Spatrick #include <memory>
5909467b48Spatrick #include <tuple>
6009467b48Spatrick #include <vector>
6109467b48Spatrick
6209467b48Spatrick using namespace llvm;
6309467b48Spatrick
6409467b48Spatrick #define DEBUG_TYPE "partial-inlining"
6509467b48Spatrick
6609467b48Spatrick STATISTIC(NumPartialInlined,
6709467b48Spatrick "Number of callsites functions partially inlined into.");
6809467b48Spatrick STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
6909467b48Spatrick "cold outlined regions were partially "
7009467b48Spatrick "inlined into its caller(s).");
7109467b48Spatrick STATISTIC(NumColdRegionsFound,
7209467b48Spatrick "Number of cold single entry/exit regions found.");
7309467b48Spatrick STATISTIC(NumColdRegionsOutlined,
7409467b48Spatrick "Number of cold single entry/exit regions outlined.");
7509467b48Spatrick
7609467b48Spatrick // Command line option to disable partial-inlining. The default is false:
7709467b48Spatrick static cl::opt<bool>
7809467b48Spatrick DisablePartialInlining("disable-partial-inlining", cl::init(false),
7909467b48Spatrick cl::Hidden, cl::desc("Disable partial inlining"));
8009467b48Spatrick // Command line option to disable multi-region partial-inlining. The default is
8109467b48Spatrick // false:
8209467b48Spatrick static cl::opt<bool> DisableMultiRegionPartialInline(
8309467b48Spatrick "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
8409467b48Spatrick cl::desc("Disable multi-region partial inlining"));
8509467b48Spatrick
8609467b48Spatrick // Command line option to force outlining in regions with live exit variables.
8709467b48Spatrick // The default is false:
8809467b48Spatrick static cl::opt<bool>
8909467b48Spatrick ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
9009467b48Spatrick cl::desc("Force outline regions with live exits"));
9109467b48Spatrick
9209467b48Spatrick // Command line option to enable marking outline functions with Cold Calling
9309467b48Spatrick // Convention. The default is false:
9409467b48Spatrick static cl::opt<bool>
9509467b48Spatrick MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
9609467b48Spatrick cl::desc("Mark outline function calls with ColdCC"));
9709467b48Spatrick
9809467b48Spatrick // This is an option used by testing:
9909467b48Spatrick static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
100*d415bd75Srobert
10109467b48Spatrick cl::ReallyHidden,
10209467b48Spatrick cl::desc("Skip Cost Analysis"));
10309467b48Spatrick // Used to determine if a cold region is worth outlining based on
10409467b48Spatrick // its inlining cost compared to the original function. Default is set at 10%.
10509467b48Spatrick // ie. if the cold region reduces the inlining cost of the original function by
10609467b48Spatrick // at least 10%.
10709467b48Spatrick static cl::opt<float> MinRegionSizeRatio(
10809467b48Spatrick "min-region-size-ratio", cl::init(0.1), cl::Hidden,
10909467b48Spatrick cl::desc("Minimum ratio comparing relative sizes of each "
11009467b48Spatrick "outline candidate and original function"));
11109467b48Spatrick // Used to tune the minimum number of execution counts needed in the predecessor
11209467b48Spatrick // block to the cold edge. ie. confidence interval.
11309467b48Spatrick static cl::opt<unsigned>
11409467b48Spatrick MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
11509467b48Spatrick cl::desc("Minimum block executions to consider "
11609467b48Spatrick "its BranchProbabilityInfo valid"));
11709467b48Spatrick // Used to determine when an edge is considered cold. Default is set to 10%. ie.
11809467b48Spatrick // if the branch probability is 10% or less, then it is deemed as 'cold'.
11909467b48Spatrick static cl::opt<float> ColdBranchRatio(
12009467b48Spatrick "cold-branch-ratio", cl::init(0.1), cl::Hidden,
12109467b48Spatrick cl::desc("Minimum BranchProbability to consider a region cold."));
12209467b48Spatrick
12309467b48Spatrick static cl::opt<unsigned> MaxNumInlineBlocks(
12409467b48Spatrick "max-num-inline-blocks", cl::init(5), cl::Hidden,
12509467b48Spatrick cl::desc("Max number of blocks to be partially inlined"));
12609467b48Spatrick
12709467b48Spatrick // Command line option to set the maximum number of partial inlining allowed
12809467b48Spatrick // for the module. The default value of -1 means no limit.
12909467b48Spatrick static cl::opt<int> MaxNumPartialInlining(
130*d415bd75Srobert "max-partial-inlining", cl::init(-1), cl::Hidden,
13109467b48Spatrick cl::desc("Max number of partial inlining. The default is unlimited"));
13209467b48Spatrick
13309467b48Spatrick // Used only when PGO or user annotated branch data is absent. It is
13409467b48Spatrick // the least value that is used to weigh the outline region. If BFI
13509467b48Spatrick // produces larger value, the BFI value will be used.
13609467b48Spatrick static cl::opt<int>
13709467b48Spatrick OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138*d415bd75Srobert cl::Hidden,
13909467b48Spatrick cl::desc("Relative frequency of outline region to "
14009467b48Spatrick "the entry block"));
14109467b48Spatrick
14209467b48Spatrick static cl::opt<unsigned> ExtraOutliningPenalty(
14309467b48Spatrick "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
14409467b48Spatrick cl::desc("A debug option to add additional penalty to the computed one."));
14509467b48Spatrick
14609467b48Spatrick namespace {
14709467b48Spatrick
14809467b48Spatrick struct FunctionOutliningInfo {
14909467b48Spatrick FunctionOutliningInfo() = default;
15009467b48Spatrick
15109467b48Spatrick // Returns the number of blocks to be inlined including all blocks
15209467b48Spatrick // in Entries and one return block.
getNumInlinedBlocks__anon4298a4970111::FunctionOutliningInfo15373471bf0Spatrick unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
15409467b48Spatrick
15509467b48Spatrick // A set of blocks including the function entry that guard
15609467b48Spatrick // the region to be outlined.
15709467b48Spatrick SmallVector<BasicBlock *, 4> Entries;
15809467b48Spatrick
15909467b48Spatrick // The return block that is not included in the outlined region.
16009467b48Spatrick BasicBlock *ReturnBlock = nullptr;
16109467b48Spatrick
16209467b48Spatrick // The dominating block of the region to be outlined.
16309467b48Spatrick BasicBlock *NonReturnBlock = nullptr;
16409467b48Spatrick
16509467b48Spatrick // The set of blocks in Entries that that are predecessors to ReturnBlock
16609467b48Spatrick SmallVector<BasicBlock *, 4> ReturnBlockPreds;
16709467b48Spatrick };
16809467b48Spatrick
16909467b48Spatrick struct FunctionOutliningMultiRegionInfo {
170*d415bd75Srobert FunctionOutliningMultiRegionInfo() = default;
17109467b48Spatrick
17209467b48Spatrick // Container for outline regions
17309467b48Spatrick struct OutlineRegionInfo {
OutlineRegionInfo__anon4298a4970111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo17409467b48Spatrick OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
17509467b48Spatrick BasicBlock *EntryBlock, BasicBlock *ExitBlock,
17609467b48Spatrick BasicBlock *ReturnBlock)
17709467b48Spatrick : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
17809467b48Spatrick ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
17909467b48Spatrick SmallVector<BasicBlock *, 8> Region;
18009467b48Spatrick BasicBlock *EntryBlock;
18109467b48Spatrick BasicBlock *ExitBlock;
18209467b48Spatrick BasicBlock *ReturnBlock;
18309467b48Spatrick };
18409467b48Spatrick
18509467b48Spatrick SmallVector<OutlineRegionInfo, 4> ORI;
18609467b48Spatrick };
18709467b48Spatrick
18809467b48Spatrick struct PartialInlinerImpl {
18909467b48Spatrick
PartialInlinerImpl__anon4298a4970111::PartialInlinerImpl19009467b48Spatrick PartialInlinerImpl(
191097a140dSpatrick function_ref<AssumptionCache &(Function &)> GetAC,
19209467b48Spatrick function_ref<AssumptionCache *(Function &)> LookupAC,
193097a140dSpatrick function_ref<TargetTransformInfo &(Function &)> GTTI,
194097a140dSpatrick function_ref<const TargetLibraryInfo &(Function &)> GTLI,
195097a140dSpatrick ProfileSummaryInfo &ProfSI,
196097a140dSpatrick function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
19709467b48Spatrick : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
198097a140dSpatrick GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
19909467b48Spatrick
20009467b48Spatrick bool run(Module &M);
20109467b48Spatrick // Main part of the transformation that calls helper functions to find
20209467b48Spatrick // outlining candidates, clone & outline the function, and attempt to
20309467b48Spatrick // partially inline the resulting function. Returns true if
20409467b48Spatrick // inlining was successful, false otherwise. Also returns the outline
20509467b48Spatrick // function (only if we partially inlined early returns) as there is a
20609467b48Spatrick // possibility to further "peel" early return statements that were left in the
20709467b48Spatrick // outline function due to code size.
20873471bf0Spatrick std::pair<bool, Function *> unswitchFunction(Function &F);
20909467b48Spatrick
21009467b48Spatrick // This class speculatively clones the function to be partial inlined.
21109467b48Spatrick // At the end of partial inlining, the remaining callsites to the cloned
21209467b48Spatrick // function that are not partially inlined will be fixed up to reference
21309467b48Spatrick // the original function, and the cloned function will be erased.
21409467b48Spatrick struct FunctionCloner {
21509467b48Spatrick // Two constructors, one for single region outlining, the other for
21609467b48Spatrick // multi-region outlining.
21709467b48Spatrick FunctionCloner(Function *F, FunctionOutliningInfo *OI,
21809467b48Spatrick OptimizationRemarkEmitter &ORE,
21973471bf0Spatrick function_ref<AssumptionCache *(Function &)> LookupAC,
22073471bf0Spatrick function_ref<TargetTransformInfo &(Function &)> GetTTI);
22109467b48Spatrick FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
22209467b48Spatrick OptimizationRemarkEmitter &ORE,
22373471bf0Spatrick function_ref<AssumptionCache *(Function &)> LookupAC,
22473471bf0Spatrick function_ref<TargetTransformInfo &(Function &)> GetTTI);
22573471bf0Spatrick
22609467b48Spatrick ~FunctionCloner();
22709467b48Spatrick
22809467b48Spatrick // Prepare for function outlining: making sure there is only
22909467b48Spatrick // one incoming edge from the extracted/outlined region to
23009467b48Spatrick // the return block.
23173471bf0Spatrick void normalizeReturnBlock() const;
23209467b48Spatrick
23309467b48Spatrick // Do function outlining for cold regions.
23409467b48Spatrick bool doMultiRegionFunctionOutlining();
23509467b48Spatrick // Do function outlining for region after early return block(s).
23609467b48Spatrick // NOTE: For vararg functions that do the vararg handling in the outlined
23709467b48Spatrick // function, we temporarily generate IR that does not properly
23809467b48Spatrick // forward varargs to the outlined function. Calling InlineFunction
23909467b48Spatrick // will update calls to the outlined functions to properly forward
24009467b48Spatrick // the varargs.
24109467b48Spatrick Function *doSingleRegionFunctionOutlining();
24209467b48Spatrick
24309467b48Spatrick Function *OrigFunc = nullptr;
24409467b48Spatrick Function *ClonedFunc = nullptr;
24509467b48Spatrick
24609467b48Spatrick typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
24709467b48Spatrick // Keep track of Outlined Functions and the basic block they're called from.
24809467b48Spatrick SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
24909467b48Spatrick
25009467b48Spatrick // ClonedFunc is inlined in one of its callers after function
25109467b48Spatrick // outlining.
25209467b48Spatrick bool IsFunctionInlined = false;
25309467b48Spatrick // The cost of the region to be outlined.
25473471bf0Spatrick InstructionCost OutlinedRegionCost = 0;
25509467b48Spatrick // ClonedOI is specific to outlining non-early return blocks.
25609467b48Spatrick std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
25709467b48Spatrick // ClonedOMRI is specific to outlining cold regions.
25809467b48Spatrick std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
25909467b48Spatrick std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
26009467b48Spatrick OptimizationRemarkEmitter &ORE;
26109467b48Spatrick function_ref<AssumptionCache *(Function &)> LookupAC;
26273471bf0Spatrick function_ref<TargetTransformInfo &(Function &)> GetTTI;
26309467b48Spatrick };
26409467b48Spatrick
26509467b48Spatrick private:
26609467b48Spatrick int NumPartialInlining = 0;
267097a140dSpatrick function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
26809467b48Spatrick function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
269097a140dSpatrick function_ref<TargetTransformInfo &(Function &)> GetTTI;
270097a140dSpatrick function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
271097a140dSpatrick function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
272097a140dSpatrick ProfileSummaryInfo &PSI;
27309467b48Spatrick
27409467b48Spatrick // Return the frequency of the OutlininingBB relative to F's entry point.
27509467b48Spatrick // The result is no larger than 1 and is represented using BP.
27609467b48Spatrick // (Note that the outlined region's 'head' block can only have incoming
27709467b48Spatrick // edges from the guarding entry blocks).
27873471bf0Spatrick BranchProbability
27973471bf0Spatrick getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
28009467b48Spatrick
281097a140dSpatrick // Return true if the callee of CB should be partially inlined with
28209467b48Spatrick // profit.
283097a140dSpatrick bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
28409467b48Spatrick BlockFrequency WeightedOutliningRcost,
28573471bf0Spatrick OptimizationRemarkEmitter &ORE) const;
28609467b48Spatrick
28709467b48Spatrick // Try to inline DuplicateFunction (cloned from F with call to
28809467b48Spatrick // the OutlinedFunction into its callers. Return true
28909467b48Spatrick // if there is any successful inlining.
29009467b48Spatrick bool tryPartialInline(FunctionCloner &Cloner);
29109467b48Spatrick
29209467b48Spatrick // Compute the mapping from use site of DuplicationFunction to the enclosing
29309467b48Spatrick // BB's profile count.
29473471bf0Spatrick void
29573471bf0Spatrick computeCallsiteToProfCountMap(Function *DuplicateFunction,
29673471bf0Spatrick DenseMap<User *, uint64_t> &SiteCountMap) const;
29709467b48Spatrick
isLimitReached__anon4298a4970111::PartialInlinerImpl29873471bf0Spatrick bool isLimitReached() const {
29909467b48Spatrick return (MaxNumPartialInlining != -1 &&
30009467b48Spatrick NumPartialInlining >= MaxNumPartialInlining);
30109467b48Spatrick }
30209467b48Spatrick
getSupportedCallBase__anon4298a4970111::PartialInlinerImpl303097a140dSpatrick static CallBase *getSupportedCallBase(User *U) {
304097a140dSpatrick if (isa<CallInst>(U) || isa<InvokeInst>(U))
305097a140dSpatrick return cast<CallBase>(U);
30609467b48Spatrick llvm_unreachable("All uses must be calls");
307097a140dSpatrick return nullptr;
30809467b48Spatrick }
30909467b48Spatrick
getOneCallSiteTo__anon4298a4970111::PartialInlinerImpl31073471bf0Spatrick static CallBase *getOneCallSiteTo(Function &F) {
31173471bf0Spatrick User *User = *F.user_begin();
312097a140dSpatrick return getSupportedCallBase(User);
31309467b48Spatrick }
31409467b48Spatrick
getOneDebugLoc__anon4298a4970111::PartialInlinerImpl31573471bf0Spatrick std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
316097a140dSpatrick CallBase *CB = getOneCallSiteTo(F);
317097a140dSpatrick DebugLoc DLoc = CB->getDebugLoc();
318097a140dSpatrick BasicBlock *Block = CB->getParent();
31909467b48Spatrick return std::make_tuple(DLoc, Block);
32009467b48Spatrick }
32109467b48Spatrick
32209467b48Spatrick // Returns the costs associated with function outlining:
32309467b48Spatrick // - The first value is the non-weighted runtime cost for making the call
32409467b48Spatrick // to the outlined function, including the addtional setup cost in the
32509467b48Spatrick // outlined function itself;
32609467b48Spatrick // - The second value is the estimated size of the new call sequence in
32709467b48Spatrick // basic block Cloner.OutliningCallBB;
32873471bf0Spatrick std::tuple<InstructionCost, InstructionCost>
32973471bf0Spatrick computeOutliningCosts(FunctionCloner &Cloner) const;
33009467b48Spatrick
33109467b48Spatrick // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
33209467b48Spatrick // approximate both the size and runtime cost (Note that in the current
33309467b48Spatrick // inline cost analysis, there is no clear distinction there either).
33473471bf0Spatrick static InstructionCost computeBBInlineCost(BasicBlock *BB,
33573471bf0Spatrick TargetTransformInfo *TTI);
33609467b48Spatrick
33773471bf0Spatrick std::unique_ptr<FunctionOutliningInfo>
33873471bf0Spatrick computeOutliningInfo(Function &F) const;
33973471bf0Spatrick
34009467b48Spatrick std::unique_ptr<FunctionOutliningMultiRegionInfo>
34173471bf0Spatrick computeOutliningColdRegionsInfo(Function &F,
34273471bf0Spatrick OptimizationRemarkEmitter &ORE) const;
34309467b48Spatrick };
34409467b48Spatrick
34509467b48Spatrick struct PartialInlinerLegacyPass : public ModulePass {
34609467b48Spatrick static char ID; // Pass identification, replacement for typeid
34709467b48Spatrick
PartialInlinerLegacyPass__anon4298a4970111::PartialInlinerLegacyPass34809467b48Spatrick PartialInlinerLegacyPass() : ModulePass(ID) {
34909467b48Spatrick initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
35009467b48Spatrick }
35109467b48Spatrick
getAnalysisUsage__anon4298a4970111::PartialInlinerLegacyPass35209467b48Spatrick void getAnalysisUsage(AnalysisUsage &AU) const override {
35309467b48Spatrick AU.addRequired<AssumptionCacheTracker>();
35409467b48Spatrick AU.addRequired<ProfileSummaryInfoWrapperPass>();
35509467b48Spatrick AU.addRequired<TargetTransformInfoWrapperPass>();
356097a140dSpatrick AU.addRequired<TargetLibraryInfoWrapperPass>();
35709467b48Spatrick }
35809467b48Spatrick
runOnModule__anon4298a4970111::PartialInlinerLegacyPass35909467b48Spatrick bool runOnModule(Module &M) override {
36009467b48Spatrick if (skipModule(M))
36109467b48Spatrick return false;
36209467b48Spatrick
36309467b48Spatrick AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
36409467b48Spatrick TargetTransformInfoWrapperPass *TTIWP =
36509467b48Spatrick &getAnalysis<TargetTransformInfoWrapperPass>();
366097a140dSpatrick ProfileSummaryInfo &PSI =
367097a140dSpatrick getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
36809467b48Spatrick
369097a140dSpatrick auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
37009467b48Spatrick return ACT->getAssumptionCache(F);
37109467b48Spatrick };
37209467b48Spatrick
37309467b48Spatrick auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
37409467b48Spatrick return ACT->lookupAssumptionCache(F);
37509467b48Spatrick };
37609467b48Spatrick
377097a140dSpatrick auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
37809467b48Spatrick return TTIWP->getTTI(F);
37909467b48Spatrick };
38009467b48Spatrick
381097a140dSpatrick auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
382097a140dSpatrick return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
383097a140dSpatrick };
384097a140dSpatrick
385097a140dSpatrick return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
386097a140dSpatrick GetTLI, PSI)
38709467b48Spatrick .run(M);
38809467b48Spatrick }
38909467b48Spatrick };
39009467b48Spatrick
39109467b48Spatrick } // end anonymous namespace
39209467b48Spatrick
39309467b48Spatrick std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function & F,OptimizationRemarkEmitter & ORE) const39473471bf0Spatrick PartialInlinerImpl::computeOutliningColdRegionsInfo(
39573471bf0Spatrick Function &F, OptimizationRemarkEmitter &ORE) const {
39673471bf0Spatrick BasicBlock *EntryBlock = &F.front();
39709467b48Spatrick
39873471bf0Spatrick DominatorTree DT(F);
39909467b48Spatrick LoopInfo LI(DT);
40073471bf0Spatrick BranchProbabilityInfo BPI(F, LI);
40109467b48Spatrick std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
40209467b48Spatrick BlockFrequencyInfo *BFI;
40309467b48Spatrick if (!GetBFI) {
40473471bf0Spatrick ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
40509467b48Spatrick BFI = ScopedBFI.get();
40609467b48Spatrick } else
40773471bf0Spatrick BFI = &(GetBFI(F));
40809467b48Spatrick
40909467b48Spatrick // Return if we don't have profiling information.
410097a140dSpatrick if (!PSI.hasInstrumentationProfile())
41109467b48Spatrick return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
41209467b48Spatrick
41309467b48Spatrick std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
41409467b48Spatrick std::make_unique<FunctionOutliningMultiRegionInfo>();
41509467b48Spatrick
41609467b48Spatrick auto IsSingleExit =
41709467b48Spatrick [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
41809467b48Spatrick BasicBlock *ExitBlock = nullptr;
41909467b48Spatrick for (auto *Block : BlockList) {
42073471bf0Spatrick for (BasicBlock *Succ : successors(Block)) {
42173471bf0Spatrick if (!is_contained(BlockList, Succ)) {
42209467b48Spatrick if (ExitBlock) {
42309467b48Spatrick ORE.emit([&]() {
42409467b48Spatrick return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
42573471bf0Spatrick &Succ->front())
42609467b48Spatrick << "Region dominated by "
42709467b48Spatrick << ore::NV("Block", BlockList.front()->getName())
42809467b48Spatrick << " has more than one region exit edge.";
42909467b48Spatrick });
43009467b48Spatrick return nullptr;
43173471bf0Spatrick }
43273471bf0Spatrick
43309467b48Spatrick ExitBlock = Block;
43409467b48Spatrick }
43509467b48Spatrick }
43609467b48Spatrick }
43709467b48Spatrick return ExitBlock;
43809467b48Spatrick };
43909467b48Spatrick
44009467b48Spatrick auto BBProfileCount = [BFI](BasicBlock *BB) {
441*d415bd75Srobert return BFI->getBlockProfileCount(BB).value_or(0);
44209467b48Spatrick };
44309467b48Spatrick
44409467b48Spatrick // Use the same computeBBInlineCost function to compute the cost savings of
44509467b48Spatrick // the outlining the candidate region.
44673471bf0Spatrick TargetTransformInfo *FTTI = &GetTTI(F);
44773471bf0Spatrick InstructionCost OverallFunctionCost = 0;
44873471bf0Spatrick for (auto &BB : F)
44973471bf0Spatrick OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
45009467b48Spatrick
45173471bf0Spatrick LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
45273471bf0Spatrick << "\n";);
45373471bf0Spatrick
45473471bf0Spatrick InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
45573471bf0Spatrick [&](auto Cost) { return Cost * MinRegionSizeRatio; });
45673471bf0Spatrick
45709467b48Spatrick BranchProbability MinBranchProbability(
45809467b48Spatrick static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
45909467b48Spatrick MinBlockCounterExecution);
46009467b48Spatrick bool ColdCandidateFound = false;
46109467b48Spatrick BasicBlock *CurrEntry = EntryBlock;
46209467b48Spatrick std::vector<BasicBlock *> DFS;
46309467b48Spatrick DenseMap<BasicBlock *, bool> VisitedMap;
46409467b48Spatrick DFS.push_back(CurrEntry);
46509467b48Spatrick VisitedMap[CurrEntry] = true;
46673471bf0Spatrick
46709467b48Spatrick // Use Depth First Search on the basic blocks to find CFG edges that are
46809467b48Spatrick // considered cold.
46909467b48Spatrick // Cold regions considered must also have its inline cost compared to the
47009467b48Spatrick // overall inline cost of the original function. The region is outlined only
47109467b48Spatrick // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
47209467b48Spatrick // more.
47309467b48Spatrick while (!DFS.empty()) {
47473471bf0Spatrick auto *ThisBB = DFS.back();
47509467b48Spatrick DFS.pop_back();
47609467b48Spatrick // Only consider regions with predecessor blocks that are considered
47709467b48Spatrick // not-cold (default: part of the top 99.99% of all block counters)
47809467b48Spatrick // AND greater than our minimum block execution count (default: 100).
47973471bf0Spatrick if (PSI.isColdBlock(ThisBB, BFI) ||
48073471bf0Spatrick BBProfileCount(ThisBB) < MinBlockCounterExecution)
48109467b48Spatrick continue;
48273471bf0Spatrick for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
48309467b48Spatrick if (VisitedMap[*SI])
48409467b48Spatrick continue;
48509467b48Spatrick VisitedMap[*SI] = true;
48609467b48Spatrick DFS.push_back(*SI);
48709467b48Spatrick // If branch isn't cold, we skip to the next one.
48873471bf0Spatrick BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
48909467b48Spatrick if (SuccProb > MinBranchProbability)
49009467b48Spatrick continue;
49173471bf0Spatrick
49273471bf0Spatrick LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
49373471bf0Spatrick << SI->getName()
49473471bf0Spatrick << "\nBranch Probability = " << SuccProb << "\n";);
49573471bf0Spatrick
49609467b48Spatrick SmallVector<BasicBlock *, 8> DominateVector;
49709467b48Spatrick DT.getDescendants(*SI, DominateVector);
49873471bf0Spatrick assert(!DominateVector.empty() &&
49973471bf0Spatrick "SI should be reachable and have at least itself as descendant");
50073471bf0Spatrick
50109467b48Spatrick // We can only outline single entry regions (for now).
50273471bf0Spatrick if (!DominateVector.front()->hasNPredecessors(1)) {
50373471bf0Spatrick LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
50473471bf0Spatrick << " doesn't have a single predecessor in the "
50573471bf0Spatrick "dominator tree\n";);
50609467b48Spatrick continue;
50773471bf0Spatrick }
50873471bf0Spatrick
50909467b48Spatrick BasicBlock *ExitBlock = nullptr;
51009467b48Spatrick // We can only outline single exit regions (for now).
51173471bf0Spatrick if (!(ExitBlock = IsSingleExit(DominateVector))) {
51273471bf0Spatrick LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
51373471bf0Spatrick << " doesn't have a unique successor\n";);
51409467b48Spatrick continue;
51573471bf0Spatrick }
51673471bf0Spatrick
51773471bf0Spatrick InstructionCost OutlineRegionCost = 0;
51809467b48Spatrick for (auto *BB : DominateVector)
51973471bf0Spatrick OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
52009467b48Spatrick
52173471bf0Spatrick LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
52273471bf0Spatrick << "\n";);
52309467b48Spatrick
52473471bf0Spatrick if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
52509467b48Spatrick ORE.emit([&]() {
52609467b48Spatrick return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
52709467b48Spatrick &SI->front())
52873471bf0Spatrick << ore::NV("Callee", &F)
52973471bf0Spatrick << " inline cost-savings smaller than "
53009467b48Spatrick << ore::NV("Cost", MinOutlineRegionCost);
53109467b48Spatrick });
53273471bf0Spatrick
53373471bf0Spatrick LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
53473471bf0Spatrick << MinOutlineRegionCost << "\n";);
53509467b48Spatrick continue;
53609467b48Spatrick }
53773471bf0Spatrick
53809467b48Spatrick // For now, ignore blocks that belong to a SISE region that is a
53909467b48Spatrick // candidate for outlining. In the future, we may want to look
54009467b48Spatrick // at inner regions because the outer region may have live-exit
54109467b48Spatrick // variables.
54209467b48Spatrick for (auto *BB : DominateVector)
54309467b48Spatrick VisitedMap[BB] = true;
54473471bf0Spatrick
54509467b48Spatrick // ReturnBlock here means the block after the outline call
54609467b48Spatrick BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
54709467b48Spatrick FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
54809467b48Spatrick DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
54909467b48Spatrick OutliningInfo->ORI.push_back(RegInfo);
55073471bf0Spatrick LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
55173471bf0Spatrick << DominateVector.front()->getName() << "\n";);
55209467b48Spatrick ColdCandidateFound = true;
55309467b48Spatrick NumColdRegionsFound++;
55409467b48Spatrick }
55509467b48Spatrick }
55673471bf0Spatrick
55709467b48Spatrick if (ColdCandidateFound)
55809467b48Spatrick return OutliningInfo;
55973471bf0Spatrick
56009467b48Spatrick return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
56109467b48Spatrick }
56209467b48Spatrick
56309467b48Spatrick std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function & F) const56473471bf0Spatrick PartialInlinerImpl::computeOutliningInfo(Function &F) const {
56573471bf0Spatrick BasicBlock *EntryBlock = &F.front();
56609467b48Spatrick BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
56709467b48Spatrick if (!BR || BR->isUnconditional())
56809467b48Spatrick return std::unique_ptr<FunctionOutliningInfo>();
56909467b48Spatrick
57009467b48Spatrick // Returns true if Succ is BB's successor
57109467b48Spatrick auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
57209467b48Spatrick return is_contained(successors(BB), Succ);
57309467b48Spatrick };
57409467b48Spatrick
57509467b48Spatrick auto IsReturnBlock = [](BasicBlock *BB) {
57609467b48Spatrick Instruction *TI = BB->getTerminator();
57709467b48Spatrick return isa<ReturnInst>(TI);
57809467b48Spatrick };
57909467b48Spatrick
58009467b48Spatrick auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
58109467b48Spatrick if (IsReturnBlock(Succ1))
58209467b48Spatrick return std::make_tuple(Succ1, Succ2);
58309467b48Spatrick if (IsReturnBlock(Succ2))
58409467b48Spatrick return std::make_tuple(Succ2, Succ1);
58509467b48Spatrick
58609467b48Spatrick return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
58709467b48Spatrick };
58809467b48Spatrick
58909467b48Spatrick // Detect a triangular shape:
59009467b48Spatrick auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
59109467b48Spatrick if (IsSuccessor(Succ1, Succ2))
59209467b48Spatrick return std::make_tuple(Succ1, Succ2);
59309467b48Spatrick if (IsSuccessor(Succ2, Succ1))
59409467b48Spatrick return std::make_tuple(Succ2, Succ1);
59509467b48Spatrick
59609467b48Spatrick return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
59709467b48Spatrick };
59809467b48Spatrick
59909467b48Spatrick std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
60009467b48Spatrick std::make_unique<FunctionOutliningInfo>();
60109467b48Spatrick
60209467b48Spatrick BasicBlock *CurrEntry = EntryBlock;
60309467b48Spatrick bool CandidateFound = false;
60409467b48Spatrick do {
60509467b48Spatrick // The number of blocks to be inlined has already reached
60609467b48Spatrick // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
60709467b48Spatrick // disables partial inlining for the function.
60873471bf0Spatrick if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
60909467b48Spatrick break;
61009467b48Spatrick
61109467b48Spatrick if (succ_size(CurrEntry) != 2)
61209467b48Spatrick break;
61309467b48Spatrick
61409467b48Spatrick BasicBlock *Succ1 = *succ_begin(CurrEntry);
61509467b48Spatrick BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
61609467b48Spatrick
61709467b48Spatrick BasicBlock *ReturnBlock, *NonReturnBlock;
61809467b48Spatrick std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
61909467b48Spatrick
62009467b48Spatrick if (ReturnBlock) {
62109467b48Spatrick OutliningInfo->Entries.push_back(CurrEntry);
62209467b48Spatrick OutliningInfo->ReturnBlock = ReturnBlock;
62309467b48Spatrick OutliningInfo->NonReturnBlock = NonReturnBlock;
62409467b48Spatrick CandidateFound = true;
62509467b48Spatrick break;
62609467b48Spatrick }
62709467b48Spatrick
62873471bf0Spatrick BasicBlock *CommSucc, *OtherSucc;
62909467b48Spatrick std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
63009467b48Spatrick
63109467b48Spatrick if (!CommSucc)
63209467b48Spatrick break;
63309467b48Spatrick
63409467b48Spatrick OutliningInfo->Entries.push_back(CurrEntry);
63509467b48Spatrick CurrEntry = OtherSucc;
63609467b48Spatrick } while (true);
63709467b48Spatrick
63809467b48Spatrick if (!CandidateFound)
63909467b48Spatrick return std::unique_ptr<FunctionOutliningInfo>();
64009467b48Spatrick
641*d415bd75Srobert // There should not be any successors (not in the entry set) other than
64209467b48Spatrick // {ReturnBlock, NonReturnBlock}
64373471bf0Spatrick assert(OutliningInfo->Entries[0] == &F.front() &&
64409467b48Spatrick "Function Entry must be the first in Entries vector");
64509467b48Spatrick DenseSet<BasicBlock *> Entries;
64609467b48Spatrick for (BasicBlock *E : OutliningInfo->Entries)
64709467b48Spatrick Entries.insert(E);
64809467b48Spatrick
64909467b48Spatrick // Returns true of BB has Predecessor which is not
65009467b48Spatrick // in Entries set.
65109467b48Spatrick auto HasNonEntryPred = [Entries](BasicBlock *BB) {
65273471bf0Spatrick for (auto *Pred : predecessors(BB)) {
65309467b48Spatrick if (!Entries.count(Pred))
65409467b48Spatrick return true;
65509467b48Spatrick }
65609467b48Spatrick return false;
65709467b48Spatrick };
65809467b48Spatrick auto CheckAndNormalizeCandidate =
65909467b48Spatrick [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
66009467b48Spatrick for (BasicBlock *E : OutliningInfo->Entries) {
66173471bf0Spatrick for (auto *Succ : successors(E)) {
66209467b48Spatrick if (Entries.count(Succ))
66309467b48Spatrick continue;
66409467b48Spatrick if (Succ == OutliningInfo->ReturnBlock)
66509467b48Spatrick OutliningInfo->ReturnBlockPreds.push_back(E);
66609467b48Spatrick else if (Succ != OutliningInfo->NonReturnBlock)
66709467b48Spatrick return false;
66809467b48Spatrick }
66909467b48Spatrick // There should not be any outside incoming edges either:
67009467b48Spatrick if (HasNonEntryPred(E))
67109467b48Spatrick return false;
67209467b48Spatrick }
67309467b48Spatrick return true;
67409467b48Spatrick };
67509467b48Spatrick
67609467b48Spatrick if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
67709467b48Spatrick return std::unique_ptr<FunctionOutliningInfo>();
67809467b48Spatrick
67909467b48Spatrick // Now further growing the candidate's inlining region by
68009467b48Spatrick // peeling off dominating blocks from the outlining region:
68173471bf0Spatrick while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
68209467b48Spatrick BasicBlock *Cand = OutliningInfo->NonReturnBlock;
68309467b48Spatrick if (succ_size(Cand) != 2)
68409467b48Spatrick break;
68509467b48Spatrick
68609467b48Spatrick if (HasNonEntryPred(Cand))
68709467b48Spatrick break;
68809467b48Spatrick
68909467b48Spatrick BasicBlock *Succ1 = *succ_begin(Cand);
69009467b48Spatrick BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
69109467b48Spatrick
69209467b48Spatrick BasicBlock *ReturnBlock, *NonReturnBlock;
69309467b48Spatrick std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
69409467b48Spatrick if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
69509467b48Spatrick break;
69609467b48Spatrick
69709467b48Spatrick if (NonReturnBlock->getSinglePredecessor() != Cand)
69809467b48Spatrick break;
69909467b48Spatrick
70009467b48Spatrick // Now grow and update OutlininigInfo:
70109467b48Spatrick OutliningInfo->Entries.push_back(Cand);
70209467b48Spatrick OutliningInfo->NonReturnBlock = NonReturnBlock;
70309467b48Spatrick OutliningInfo->ReturnBlockPreds.push_back(Cand);
70409467b48Spatrick Entries.insert(Cand);
70509467b48Spatrick }
70609467b48Spatrick
70709467b48Spatrick return OutliningInfo;
70809467b48Spatrick }
70909467b48Spatrick
71009467b48Spatrick // Check if there is PGO data or user annotated branch data:
hasProfileData(const Function & F,const FunctionOutliningInfo & OI)71173471bf0Spatrick static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
71273471bf0Spatrick if (F.hasProfileData())
71309467b48Spatrick return true;
71409467b48Spatrick // Now check if any of the entry block has MD_prof data:
71573471bf0Spatrick for (auto *E : OI.Entries) {
71609467b48Spatrick BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
71709467b48Spatrick if (!BR || BR->isUnconditional())
71809467b48Spatrick continue;
719*d415bd75Srobert if (hasBranchWeightMD(*BR))
72009467b48Spatrick return true;
72109467b48Spatrick }
72209467b48Spatrick return false;
72309467b48Spatrick }
72409467b48Spatrick
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner) const72573471bf0Spatrick BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
72673471bf0Spatrick FunctionCloner &Cloner) const {
72709467b48Spatrick BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
72809467b48Spatrick auto EntryFreq =
72909467b48Spatrick Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
73009467b48Spatrick auto OutliningCallFreq =
73109467b48Spatrick Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
73209467b48Spatrick // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
73309467b48Spatrick // we outlined any regions, so we may encounter situations where the
73409467b48Spatrick // OutliningCallFreq is *slightly* bigger than the EntryFreq.
73573471bf0Spatrick if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
73609467b48Spatrick OutliningCallFreq = EntryFreq;
73773471bf0Spatrick
73809467b48Spatrick auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
73909467b48Spatrick OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
74009467b48Spatrick
741*d415bd75Srobert if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
74209467b48Spatrick return OutlineRegionRelFreq;
74309467b48Spatrick
74409467b48Spatrick // When profile data is not available, we need to be conservative in
74509467b48Spatrick // estimating the overall savings. Static branch prediction can usually
74609467b48Spatrick // guess the branch direction right (taken/non-taken), but the guessed
74709467b48Spatrick // branch probability is usually not biased enough. In case when the
74809467b48Spatrick // outlined region is predicted to be likely, its probability needs
74909467b48Spatrick // to be made higher (more biased) to not under-estimate the cost of
75009467b48Spatrick // function outlining. On the other hand, if the outlined region
75109467b48Spatrick // is predicted to be less likely, the predicted probablity is usually
75209467b48Spatrick // higher than the actual. For instance, the actual probability of the
75309467b48Spatrick // less likely target is only 5%, but the guessed probablity can be
754*d415bd75Srobert // 40%. In the latter case, there is no need for further adjustment.
75509467b48Spatrick // FIXME: add an option for this.
75609467b48Spatrick if (OutlineRegionRelFreq < BranchProbability(45, 100))
75709467b48Spatrick return OutlineRegionRelFreq;
75809467b48Spatrick
75909467b48Spatrick OutlineRegionRelFreq = std::max(
76009467b48Spatrick OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
76109467b48Spatrick
76209467b48Spatrick return OutlineRegionRelFreq;
76309467b48Spatrick }
76409467b48Spatrick
shouldPartialInline(CallBase & CB,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE) const76509467b48Spatrick bool PartialInlinerImpl::shouldPartialInline(
766097a140dSpatrick CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
76773471bf0Spatrick OptimizationRemarkEmitter &ORE) const {
76809467b48Spatrick using namespace ore;
76909467b48Spatrick
770097a140dSpatrick Function *Callee = CB.getCalledFunction();
77109467b48Spatrick assert(Callee == Cloner.ClonedFunc);
77209467b48Spatrick
77309467b48Spatrick if (SkipCostAnalysis)
774097a140dSpatrick return isInlineViable(*Callee).isSuccess();
77509467b48Spatrick
776097a140dSpatrick Function *Caller = CB.getCaller();
777097a140dSpatrick auto &CalleeTTI = GetTTI(*Callee);
77809467b48Spatrick bool RemarksEnabled =
77909467b48Spatrick Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
78009467b48Spatrick DEBUG_TYPE);
781097a140dSpatrick InlineCost IC =
782097a140dSpatrick getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
783097a140dSpatrick GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
78409467b48Spatrick
78509467b48Spatrick if (IC.isAlways()) {
78609467b48Spatrick ORE.emit([&]() {
787097a140dSpatrick return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
78809467b48Spatrick << NV("Callee", Cloner.OrigFunc)
78909467b48Spatrick << " should always be fully inlined, not partially";
79009467b48Spatrick });
79109467b48Spatrick return false;
79209467b48Spatrick }
79309467b48Spatrick
79409467b48Spatrick if (IC.isNever()) {
79509467b48Spatrick ORE.emit([&]() {
796097a140dSpatrick return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
79709467b48Spatrick << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
79809467b48Spatrick << NV("Caller", Caller)
79909467b48Spatrick << " because it should never be inlined (cost=never)";
80009467b48Spatrick });
80109467b48Spatrick return false;
80209467b48Spatrick }
80309467b48Spatrick
80409467b48Spatrick if (!IC) {
80509467b48Spatrick ORE.emit([&]() {
806097a140dSpatrick return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
80709467b48Spatrick << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
80809467b48Spatrick << NV("Caller", Caller) << " because too costly to inline (cost="
80909467b48Spatrick << NV("Cost", IC.getCost()) << ", threshold="
81009467b48Spatrick << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
81109467b48Spatrick });
81209467b48Spatrick return false;
81309467b48Spatrick }
81409467b48Spatrick const DataLayout &DL = Caller->getParent()->getDataLayout();
81509467b48Spatrick
81609467b48Spatrick // The savings of eliminating the call:
817097a140dSpatrick int NonWeightedSavings = getCallsiteCost(CB, DL);
81809467b48Spatrick BlockFrequency NormWeightedSavings(NonWeightedSavings);
81909467b48Spatrick
82009467b48Spatrick // Weighted saving is smaller than weighted cost, return false
82109467b48Spatrick if (NormWeightedSavings < WeightedOutliningRcost) {
82209467b48Spatrick ORE.emit([&]() {
82309467b48Spatrick return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
824097a140dSpatrick &CB)
82509467b48Spatrick << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
82609467b48Spatrick << NV("Caller", Caller) << " runtime overhead (overhead="
82709467b48Spatrick << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
82809467b48Spatrick << ", savings="
82909467b48Spatrick << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
83009467b48Spatrick << ")"
83109467b48Spatrick << " of making the outlined call is too high";
83209467b48Spatrick });
83309467b48Spatrick
83409467b48Spatrick return false;
83509467b48Spatrick }
83609467b48Spatrick
83709467b48Spatrick ORE.emit([&]() {
838097a140dSpatrick return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
83909467b48Spatrick << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
84009467b48Spatrick << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
84109467b48Spatrick << " (threshold="
84209467b48Spatrick << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
84309467b48Spatrick });
84409467b48Spatrick return true;
84509467b48Spatrick }
84609467b48Spatrick
84709467b48Spatrick // TODO: Ideally we should share Inliner's InlineCost Analysis code.
84809467b48Spatrick // For now use a simplified version. The returned 'InlineCost' will be used
84909467b48Spatrick // to esimate the size cost as well as runtime cost of the BB.
85073471bf0Spatrick InstructionCost
computeBBInlineCost(BasicBlock * BB,TargetTransformInfo * TTI)85173471bf0Spatrick PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
85273471bf0Spatrick TargetTransformInfo *TTI) {
85373471bf0Spatrick InstructionCost InlineCost = 0;
85409467b48Spatrick const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
855*d415bd75Srobert int InstrCost = InlineConstants::getInstrCost();
85609467b48Spatrick for (Instruction &I : BB->instructionsWithoutDebug()) {
85709467b48Spatrick // Skip free instructions.
85809467b48Spatrick switch (I.getOpcode()) {
85909467b48Spatrick case Instruction::BitCast:
86009467b48Spatrick case Instruction::PtrToInt:
86109467b48Spatrick case Instruction::IntToPtr:
86209467b48Spatrick case Instruction::Alloca:
86309467b48Spatrick case Instruction::PHI:
86409467b48Spatrick continue;
86509467b48Spatrick case Instruction::GetElementPtr:
86609467b48Spatrick if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
86709467b48Spatrick continue;
86809467b48Spatrick break;
86909467b48Spatrick default:
87009467b48Spatrick break;
87109467b48Spatrick }
87209467b48Spatrick
87309467b48Spatrick if (I.isLifetimeStartOrEnd())
87409467b48Spatrick continue;
87509467b48Spatrick
87673471bf0Spatrick if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
87773471bf0Spatrick Intrinsic::ID IID = II->getIntrinsicID();
87873471bf0Spatrick SmallVector<Type *, 4> Tys;
87973471bf0Spatrick FastMathFlags FMF;
88073471bf0Spatrick for (Value *Val : II->args())
88173471bf0Spatrick Tys.push_back(Val->getType());
88273471bf0Spatrick
88373471bf0Spatrick if (auto *FPMO = dyn_cast<FPMathOperator>(II))
88473471bf0Spatrick FMF = FPMO->getFastMathFlags();
88573471bf0Spatrick
88673471bf0Spatrick IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
88773471bf0Spatrick InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
88873471bf0Spatrick continue;
88973471bf0Spatrick }
89073471bf0Spatrick
89109467b48Spatrick if (CallInst *CI = dyn_cast<CallInst>(&I)) {
89209467b48Spatrick InlineCost += getCallsiteCost(*CI, DL);
89309467b48Spatrick continue;
89409467b48Spatrick }
89509467b48Spatrick
89609467b48Spatrick if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
89709467b48Spatrick InlineCost += getCallsiteCost(*II, DL);
89809467b48Spatrick continue;
89909467b48Spatrick }
90009467b48Spatrick
90109467b48Spatrick if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
902*d415bd75Srobert InlineCost += (SI->getNumCases() + 1) * InstrCost;
90309467b48Spatrick continue;
90409467b48Spatrick }
905*d415bd75Srobert InlineCost += InstrCost;
90609467b48Spatrick }
90773471bf0Spatrick
90809467b48Spatrick return InlineCost;
90909467b48Spatrick }
91009467b48Spatrick
91173471bf0Spatrick std::tuple<InstructionCost, InstructionCost>
computeOutliningCosts(FunctionCloner & Cloner) const91273471bf0Spatrick PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
91373471bf0Spatrick InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
91409467b48Spatrick for (auto FuncBBPair : Cloner.OutlinedFunctions) {
91509467b48Spatrick Function *OutlinedFunc = FuncBBPair.first;
91609467b48Spatrick BasicBlock* OutliningCallBB = FuncBBPair.second;
91709467b48Spatrick // Now compute the cost of the call sequence to the outlined function
91809467b48Spatrick // 'OutlinedFunction' in BB 'OutliningCallBB':
91973471bf0Spatrick auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
92073471bf0Spatrick OutliningFuncCallCost +=
92173471bf0Spatrick computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
92209467b48Spatrick
92309467b48Spatrick // Now compute the cost of the extracted/outlined function itself:
92409467b48Spatrick for (BasicBlock &BB : *OutlinedFunc)
92573471bf0Spatrick OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
92609467b48Spatrick }
92709467b48Spatrick assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
92809467b48Spatrick "Outlined function cost should be no less than the outlined region");
92909467b48Spatrick
93009467b48Spatrick // The code extractor introduces a new root and exit stub blocks with
93109467b48Spatrick // additional unconditional branches. Those branches will be eliminated
93209467b48Spatrick // later with bb layout. The cost should be adjusted accordingly:
93309467b48Spatrick OutlinedFunctionCost -=
934*d415bd75Srobert 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
93509467b48Spatrick
93673471bf0Spatrick InstructionCost OutliningRuntimeOverhead =
93709467b48Spatrick OutliningFuncCallCost +
93809467b48Spatrick (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
93973471bf0Spatrick ExtraOutliningPenalty.getValue();
94009467b48Spatrick
94109467b48Spatrick return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
94209467b48Spatrick }
94309467b48Spatrick
94409467b48Spatrick // Create the callsite to profile count map which is
94509467b48Spatrick // used to update the original function's entry count,
94609467b48Spatrick // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap) const94709467b48Spatrick void PartialInlinerImpl::computeCallsiteToProfCountMap(
94809467b48Spatrick Function *DuplicateFunction,
94973471bf0Spatrick DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
95009467b48Spatrick std::vector<User *> Users(DuplicateFunction->user_begin(),
95109467b48Spatrick DuplicateFunction->user_end());
95209467b48Spatrick Function *CurrentCaller = nullptr;
95309467b48Spatrick std::unique_ptr<BlockFrequencyInfo> TempBFI;
95409467b48Spatrick BlockFrequencyInfo *CurrentCallerBFI = nullptr;
95509467b48Spatrick
95609467b48Spatrick auto ComputeCurrBFI = [&,this](Function *Caller) {
95709467b48Spatrick // For the old pass manager:
95809467b48Spatrick if (!GetBFI) {
95909467b48Spatrick DominatorTree DT(*Caller);
96009467b48Spatrick LoopInfo LI(DT);
96109467b48Spatrick BranchProbabilityInfo BPI(*Caller, LI);
96209467b48Spatrick TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
96309467b48Spatrick CurrentCallerBFI = TempBFI.get();
96409467b48Spatrick } else {
96509467b48Spatrick // New pass manager:
966097a140dSpatrick CurrentCallerBFI = &(GetBFI(*Caller));
96709467b48Spatrick }
96809467b48Spatrick };
96909467b48Spatrick
97009467b48Spatrick for (User *User : Users) {
971*d415bd75Srobert // Don't bother with BlockAddress used by CallBr for asm goto.
972*d415bd75Srobert if (isa<BlockAddress>(User))
973*d415bd75Srobert continue;
974097a140dSpatrick CallBase *CB = getSupportedCallBase(User);
975097a140dSpatrick Function *Caller = CB->getCaller();
97609467b48Spatrick if (CurrentCaller != Caller) {
97709467b48Spatrick CurrentCaller = Caller;
97809467b48Spatrick ComputeCurrBFI(Caller);
97909467b48Spatrick } else {
98009467b48Spatrick assert(CurrentCallerBFI && "CallerBFI is not set");
98109467b48Spatrick }
982097a140dSpatrick BasicBlock *CallBB = CB->getParent();
98309467b48Spatrick auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
98409467b48Spatrick if (Count)
98509467b48Spatrick CallSiteToProfCountMap[User] = *Count;
98609467b48Spatrick else
98709467b48Spatrick CallSiteToProfCountMap[User] = 0;
98809467b48Spatrick }
98909467b48Spatrick }
99009467b48Spatrick
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)99109467b48Spatrick PartialInlinerImpl::FunctionCloner::FunctionCloner(
99209467b48Spatrick Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
99373471bf0Spatrick function_ref<AssumptionCache *(Function &)> LookupAC,
99473471bf0Spatrick function_ref<TargetTransformInfo &(Function &)> GetTTI)
99573471bf0Spatrick : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
99609467b48Spatrick ClonedOI = std::make_unique<FunctionOutliningInfo>();
99709467b48Spatrick
99809467b48Spatrick // Clone the function, so that we can hack away on it.
99909467b48Spatrick ValueToValueMapTy VMap;
100009467b48Spatrick ClonedFunc = CloneFunction(F, VMap);
100109467b48Spatrick
100209467b48Spatrick ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
100309467b48Spatrick ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
100473471bf0Spatrick for (BasicBlock *BB : OI->Entries)
100509467b48Spatrick ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
100673471bf0Spatrick
100709467b48Spatrick for (BasicBlock *E : OI->ReturnBlockPreds) {
100809467b48Spatrick BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
100909467b48Spatrick ClonedOI->ReturnBlockPreds.push_back(NewE);
101009467b48Spatrick }
101109467b48Spatrick // Go ahead and update all uses to the duplicate, so that we can just
101209467b48Spatrick // use the inliner functionality when we're done hacking.
101309467b48Spatrick F->replaceAllUsesWith(ClonedFunc);
101409467b48Spatrick }
101509467b48Spatrick
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)101609467b48Spatrick PartialInlinerImpl::FunctionCloner::FunctionCloner(
101709467b48Spatrick Function *F, FunctionOutliningMultiRegionInfo *OI,
101809467b48Spatrick OptimizationRemarkEmitter &ORE,
101973471bf0Spatrick function_ref<AssumptionCache *(Function &)> LookupAC,
102073471bf0Spatrick function_ref<TargetTransformInfo &(Function &)> GetTTI)
102173471bf0Spatrick : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
102209467b48Spatrick ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
102309467b48Spatrick
102409467b48Spatrick // Clone the function, so that we can hack away on it.
102509467b48Spatrick ValueToValueMapTy VMap;
102609467b48Spatrick ClonedFunc = CloneFunction(F, VMap);
102709467b48Spatrick
102809467b48Spatrick // Go through all Outline Candidate Regions and update all BasicBlock
102909467b48Spatrick // information.
103009467b48Spatrick for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
103109467b48Spatrick OI->ORI) {
103209467b48Spatrick SmallVector<BasicBlock *, 8> Region;
103373471bf0Spatrick for (BasicBlock *BB : RegionInfo.Region)
103409467b48Spatrick Region.push_back(cast<BasicBlock>(VMap[BB]));
103573471bf0Spatrick
103609467b48Spatrick BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
103709467b48Spatrick BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
103809467b48Spatrick BasicBlock *NewReturnBlock = nullptr;
103909467b48Spatrick if (RegionInfo.ReturnBlock)
104009467b48Spatrick NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
104109467b48Spatrick FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
104209467b48Spatrick Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
104309467b48Spatrick ClonedOMRI->ORI.push_back(MappedRegionInfo);
104409467b48Spatrick }
104509467b48Spatrick // Go ahead and update all uses to the duplicate, so that we can just
104609467b48Spatrick // use the inliner functionality when we're done hacking.
104709467b48Spatrick F->replaceAllUsesWith(ClonedFunc);
104809467b48Spatrick }
104909467b48Spatrick
normalizeReturnBlock() const105073471bf0Spatrick void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
105173471bf0Spatrick auto GetFirstPHI = [](BasicBlock *BB) {
105209467b48Spatrick BasicBlock::iterator I = BB->begin();
105309467b48Spatrick PHINode *FirstPhi = nullptr;
105409467b48Spatrick while (I != BB->end()) {
105509467b48Spatrick PHINode *Phi = dyn_cast<PHINode>(I);
105609467b48Spatrick if (!Phi)
105709467b48Spatrick break;
105809467b48Spatrick if (!FirstPhi) {
105909467b48Spatrick FirstPhi = Phi;
106009467b48Spatrick break;
106109467b48Spatrick }
106209467b48Spatrick }
106309467b48Spatrick return FirstPhi;
106409467b48Spatrick };
106509467b48Spatrick
106609467b48Spatrick // Shouldn't need to normalize PHIs if we're not outlining non-early return
106709467b48Spatrick // blocks.
106809467b48Spatrick if (!ClonedOI)
106909467b48Spatrick return;
107009467b48Spatrick
107109467b48Spatrick // Special hackery is needed with PHI nodes that have inputs from more than
107209467b48Spatrick // one extracted block. For simplicity, just split the PHIs into a two-level
107309467b48Spatrick // sequence of PHIs, some of which will go in the extracted region, and some
107409467b48Spatrick // of which will go outside.
107509467b48Spatrick BasicBlock *PreReturn = ClonedOI->ReturnBlock;
107609467b48Spatrick // only split block when necessary:
107773471bf0Spatrick PHINode *FirstPhi = GetFirstPHI(PreReturn);
107809467b48Spatrick unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
107909467b48Spatrick
108009467b48Spatrick if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
108109467b48Spatrick return;
108209467b48Spatrick
108309467b48Spatrick auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1084*d415bd75Srobert if (llvm::all_equal(PN->incoming_values()))
1085*d415bd75Srobert return PN->getIncomingValue(0);
108609467b48Spatrick return nullptr;
108709467b48Spatrick };
108809467b48Spatrick
108909467b48Spatrick ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
109009467b48Spatrick ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
109109467b48Spatrick BasicBlock::iterator I = PreReturn->begin();
109209467b48Spatrick Instruction *Ins = &ClonedOI->ReturnBlock->front();
109309467b48Spatrick SmallVector<Instruction *, 4> DeadPhis;
109409467b48Spatrick while (I != PreReturn->end()) {
109509467b48Spatrick PHINode *OldPhi = dyn_cast<PHINode>(I);
109609467b48Spatrick if (!OldPhi)
109709467b48Spatrick break;
109809467b48Spatrick
109909467b48Spatrick PHINode *RetPhi =
110009467b48Spatrick PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
110109467b48Spatrick OldPhi->replaceAllUsesWith(RetPhi);
110209467b48Spatrick Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
110309467b48Spatrick
110409467b48Spatrick RetPhi->addIncoming(&*I, PreReturn);
110509467b48Spatrick for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
110609467b48Spatrick RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
110709467b48Spatrick OldPhi->removeIncomingValue(E);
110809467b48Spatrick }
110909467b48Spatrick
111009467b48Spatrick // After incoming values splitting, the old phi may become trivial.
111109467b48Spatrick // Keeping the trivial phi can introduce definition inside the outline
111209467b48Spatrick // region which is live-out, causing necessary overhead (load, store
111309467b48Spatrick // arg passing etc).
111409467b48Spatrick if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
111509467b48Spatrick OldPhi->replaceAllUsesWith(OldPhiVal);
111609467b48Spatrick DeadPhis.push_back(OldPhi);
111709467b48Spatrick }
111809467b48Spatrick ++I;
111909467b48Spatrick }
112009467b48Spatrick for (auto *DP : DeadPhis)
112109467b48Spatrick DP->eraseFromParent();
112209467b48Spatrick
112373471bf0Spatrick for (auto *E : ClonedOI->ReturnBlockPreds)
112409467b48Spatrick E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
112509467b48Spatrick }
112609467b48Spatrick
doMultiRegionFunctionOutlining()112709467b48Spatrick bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
112809467b48Spatrick
112973471bf0Spatrick auto ComputeRegionCost =
113073471bf0Spatrick [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
113173471bf0Spatrick InstructionCost Cost = 0;
113209467b48Spatrick for (BasicBlock* BB : Region)
113373471bf0Spatrick Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
113409467b48Spatrick return Cost;
113509467b48Spatrick };
113609467b48Spatrick
113709467b48Spatrick assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
113809467b48Spatrick
113909467b48Spatrick if (ClonedOMRI->ORI.empty())
114009467b48Spatrick return false;
114109467b48Spatrick
114209467b48Spatrick // The CodeExtractor needs a dominator tree.
114309467b48Spatrick DominatorTree DT;
114409467b48Spatrick DT.recalculate(*ClonedFunc);
114509467b48Spatrick
114609467b48Spatrick // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
114709467b48Spatrick LoopInfo LI(DT);
114809467b48Spatrick BranchProbabilityInfo BPI(*ClonedFunc, LI);
114909467b48Spatrick ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
115009467b48Spatrick
115109467b48Spatrick // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
115209467b48Spatrick CodeExtractorAnalysisCache CEAC(*ClonedFunc);
115309467b48Spatrick
115409467b48Spatrick SetVector<Value *> Inputs, Outputs, Sinks;
115509467b48Spatrick for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
115609467b48Spatrick ClonedOMRI->ORI) {
115773471bf0Spatrick InstructionCost CurrentOutlinedRegionCost =
115873471bf0Spatrick ComputeRegionCost(RegionInfo.Region);
115909467b48Spatrick
116009467b48Spatrick CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
116109467b48Spatrick ClonedFuncBFI.get(), &BPI,
116209467b48Spatrick LookupAC(*RegionInfo.EntryBlock->getParent()),
116309467b48Spatrick /* AllowVarargs */ false);
116409467b48Spatrick
116509467b48Spatrick CE.findInputsOutputs(Inputs, Outputs, Sinks);
116609467b48Spatrick
116773471bf0Spatrick LLVM_DEBUG({
116809467b48Spatrick dbgs() << "inputs: " << Inputs.size() << "\n";
116909467b48Spatrick dbgs() << "outputs: " << Outputs.size() << "\n";
117009467b48Spatrick for (Value *value : Inputs)
117109467b48Spatrick dbgs() << "value used in func: " << *value << "\n";
117209467b48Spatrick for (Value *output : Outputs)
117309467b48Spatrick dbgs() << "instr used in func: " << *output << "\n";
117473471bf0Spatrick });
117573471bf0Spatrick
117609467b48Spatrick // Do not extract regions that have live exit variables.
117709467b48Spatrick if (Outputs.size() > 0 && !ForceLiveExit)
117809467b48Spatrick continue;
117909467b48Spatrick
118073471bf0Spatrick if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
118173471bf0Spatrick CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1182097a140dSpatrick BasicBlock *OutliningCallBB = OCS->getParent();
118309467b48Spatrick assert(OutliningCallBB->getParent() == ClonedFunc);
118409467b48Spatrick OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
118509467b48Spatrick NumColdRegionsOutlined++;
118609467b48Spatrick OutlinedRegionCost += CurrentOutlinedRegionCost;
118709467b48Spatrick
118809467b48Spatrick if (MarkOutlinedColdCC) {
118909467b48Spatrick OutlinedFunc->setCallingConv(CallingConv::Cold);
1190097a140dSpatrick OCS->setCallingConv(CallingConv::Cold);
119109467b48Spatrick }
119209467b48Spatrick } else
119309467b48Spatrick ORE.emit([&]() {
119409467b48Spatrick return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
119509467b48Spatrick &RegionInfo.Region.front()->front())
119609467b48Spatrick << "Failed to extract region at block "
119709467b48Spatrick << ore::NV("Block", RegionInfo.Region.front());
119809467b48Spatrick });
119909467b48Spatrick }
120009467b48Spatrick
120109467b48Spatrick return !OutlinedFunctions.empty();
120209467b48Spatrick }
120309467b48Spatrick
120409467b48Spatrick Function *
doSingleRegionFunctionOutlining()120509467b48Spatrick PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
120609467b48Spatrick // Returns true if the block is to be partial inlined into the caller
120709467b48Spatrick // (i.e. not to be extracted to the out of line function)
120809467b48Spatrick auto ToBeInlined = [&, this](BasicBlock *BB) {
120909467b48Spatrick return BB == ClonedOI->ReturnBlock ||
121073471bf0Spatrick llvm::is_contained(ClonedOI->Entries, BB);
121109467b48Spatrick };
121209467b48Spatrick
121309467b48Spatrick assert(ClonedOI && "Expecting OutlineInfo for single region outline");
121409467b48Spatrick // The CodeExtractor needs a dominator tree.
121509467b48Spatrick DominatorTree DT;
121609467b48Spatrick DT.recalculate(*ClonedFunc);
121709467b48Spatrick
121809467b48Spatrick // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
121909467b48Spatrick LoopInfo LI(DT);
122009467b48Spatrick BranchProbabilityInfo BPI(*ClonedFunc, LI);
122109467b48Spatrick ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
122209467b48Spatrick
122309467b48Spatrick // Gather up the blocks that we're going to extract.
122409467b48Spatrick std::vector<BasicBlock *> ToExtract;
122573471bf0Spatrick auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
122609467b48Spatrick ToExtract.push_back(ClonedOI->NonReturnBlock);
122773471bf0Spatrick OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
122873471bf0Spatrick ClonedOI->NonReturnBlock, ClonedFuncTTI);
122909467b48Spatrick for (BasicBlock &BB : *ClonedFunc)
123009467b48Spatrick if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
123109467b48Spatrick ToExtract.push_back(&BB);
123209467b48Spatrick // FIXME: the code extractor may hoist/sink more code
123309467b48Spatrick // into the outlined function which may make the outlining
123409467b48Spatrick // overhead (the difference of the outlined function cost
123509467b48Spatrick // and OutliningRegionCost) look larger.
123673471bf0Spatrick OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
123709467b48Spatrick }
123809467b48Spatrick
123909467b48Spatrick // Extract the body of the if.
124009467b48Spatrick CodeExtractorAnalysisCache CEAC(*ClonedFunc);
124109467b48Spatrick Function *OutlinedFunc =
124209467b48Spatrick CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
124309467b48Spatrick ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
124409467b48Spatrick /* AllowVarargs */ true)
124509467b48Spatrick .extractCodeRegion(CEAC);
124609467b48Spatrick
124709467b48Spatrick if (OutlinedFunc) {
124809467b48Spatrick BasicBlock *OutliningCallBB =
124973471bf0Spatrick PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
125009467b48Spatrick assert(OutliningCallBB->getParent() == ClonedFunc);
125109467b48Spatrick OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
125209467b48Spatrick } else
125309467b48Spatrick ORE.emit([&]() {
125409467b48Spatrick return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
125509467b48Spatrick &ToExtract.front()->front())
125609467b48Spatrick << "Failed to extract region at block "
125709467b48Spatrick << ore::NV("Block", ToExtract.front());
125809467b48Spatrick });
125909467b48Spatrick
126009467b48Spatrick return OutlinedFunc;
126109467b48Spatrick }
126209467b48Spatrick
~FunctionCloner()126309467b48Spatrick PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
126409467b48Spatrick // Ditch the duplicate, since we're done with it, and rewrite all remaining
126509467b48Spatrick // users (function pointers, etc.) back to the original function.
126609467b48Spatrick ClonedFunc->replaceAllUsesWith(OrigFunc);
126709467b48Spatrick ClonedFunc->eraseFromParent();
126809467b48Spatrick if (!IsFunctionInlined) {
126909467b48Spatrick // Remove each function that was speculatively created if there is no
127009467b48Spatrick // reference.
127109467b48Spatrick for (auto FuncBBPair : OutlinedFunctions) {
127209467b48Spatrick Function *Func = FuncBBPair.first;
127309467b48Spatrick Func->eraseFromParent();
127409467b48Spatrick }
127509467b48Spatrick }
127609467b48Spatrick }
127709467b48Spatrick
unswitchFunction(Function & F)127873471bf0Spatrick std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
127973471bf0Spatrick if (F.hasAddressTaken())
128009467b48Spatrick return {false, nullptr};
128109467b48Spatrick
128209467b48Spatrick // Let inliner handle it
128373471bf0Spatrick if (F.hasFnAttribute(Attribute::AlwaysInline))
128409467b48Spatrick return {false, nullptr};
128509467b48Spatrick
128673471bf0Spatrick if (F.hasFnAttribute(Attribute::NoInline))
128709467b48Spatrick return {false, nullptr};
128809467b48Spatrick
128973471bf0Spatrick if (PSI.isFunctionEntryCold(&F))
129009467b48Spatrick return {false, nullptr};
129109467b48Spatrick
129273471bf0Spatrick if (F.users().empty())
129309467b48Spatrick return {false, nullptr};
129409467b48Spatrick
129573471bf0Spatrick OptimizationRemarkEmitter ORE(&F);
129609467b48Spatrick
129709467b48Spatrick // Only try to outline cold regions if we have a profile summary, which
129809467b48Spatrick // implies we have profiling information.
129973471bf0Spatrick if (PSI.hasProfileSummary() && F.hasProfileData() &&
130009467b48Spatrick !DisableMultiRegionPartialInline) {
130109467b48Spatrick std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
130209467b48Spatrick computeOutliningColdRegionsInfo(F, ORE);
130309467b48Spatrick if (OMRI) {
130473471bf0Spatrick FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
130509467b48Spatrick
130673471bf0Spatrick LLVM_DEBUG({
1307097a140dSpatrick dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1308097a140dSpatrick dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
130909467b48Spatrick << "\n";
131073471bf0Spatrick });
131173471bf0Spatrick
131209467b48Spatrick bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
131309467b48Spatrick
131409467b48Spatrick if (DidOutline) {
131573471bf0Spatrick LLVM_DEBUG({
131609467b48Spatrick dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
131709467b48Spatrick Cloner.ClonedFunc->print(dbgs());
131809467b48Spatrick dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
131973471bf0Spatrick });
132009467b48Spatrick
132109467b48Spatrick if (tryPartialInline(Cloner))
132209467b48Spatrick return {true, nullptr};
132309467b48Spatrick }
132409467b48Spatrick }
132509467b48Spatrick }
132609467b48Spatrick
132709467b48Spatrick // Fall-thru to regular partial inlining if we:
132809467b48Spatrick // i) can't find any cold regions to outline, or
132909467b48Spatrick // ii) can't inline the outlined function anywhere.
133009467b48Spatrick std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
133109467b48Spatrick if (!OI)
133209467b48Spatrick return {false, nullptr};
133309467b48Spatrick
133473471bf0Spatrick FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
133573471bf0Spatrick Cloner.normalizeReturnBlock();
133609467b48Spatrick
133709467b48Spatrick Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
133809467b48Spatrick
133909467b48Spatrick if (!OutlinedFunction)
134009467b48Spatrick return {false, nullptr};
134109467b48Spatrick
134273471bf0Spatrick if (tryPartialInline(Cloner))
134309467b48Spatrick return {true, OutlinedFunction};
134409467b48Spatrick
134509467b48Spatrick return {false, nullptr};
134609467b48Spatrick }
134709467b48Spatrick
tryPartialInline(FunctionCloner & Cloner)134809467b48Spatrick bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
134909467b48Spatrick if (Cloner.OutlinedFunctions.empty())
135009467b48Spatrick return false;
135109467b48Spatrick
135273471bf0Spatrick auto OutliningCosts = computeOutliningCosts(Cloner);
135373471bf0Spatrick
1354*d415bd75Srobert InstructionCost SizeCost = std::get<0>(OutliningCosts);
1355*d415bd75Srobert InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1356*d415bd75Srobert
1357*d415bd75Srobert assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1358*d415bd75Srobert "Expected valid costs");
135909467b48Spatrick
136009467b48Spatrick // Only calculate RelativeToEntryFreq when we are doing single region
136109467b48Spatrick // outlining.
136209467b48Spatrick BranchProbability RelativeToEntryFreq;
136373471bf0Spatrick if (Cloner.ClonedOI)
136409467b48Spatrick RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
136573471bf0Spatrick else
136609467b48Spatrick // RelativeToEntryFreq doesn't make sense when we have more than one
136709467b48Spatrick // outlined call because each call will have a different relative frequency
136809467b48Spatrick // to the entry block. We can consider using the average, but the
136909467b48Spatrick // usefulness of that information is questionable. For now, assume we never
137009467b48Spatrick // execute the calls to outlined functions.
137109467b48Spatrick RelativeToEntryFreq = BranchProbability(0, 1);
137209467b48Spatrick
1373*d415bd75Srobert BlockFrequency WeightedRcost =
1374*d415bd75Srobert BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
137509467b48Spatrick
137609467b48Spatrick // The call sequence(s) to the outlined function(s) are larger than the sum of
137709467b48Spatrick // the original outlined region size(s), it does not increase the chances of
137809467b48Spatrick // inlining the function with outlining (The inliner uses the size increase to
137909467b48Spatrick // model the cost of inlining a callee).
138009467b48Spatrick if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
138109467b48Spatrick OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
138209467b48Spatrick DebugLoc DLoc;
138309467b48Spatrick BasicBlock *Block;
138473471bf0Spatrick std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
138509467b48Spatrick OrigFuncORE.emit([&]() {
138609467b48Spatrick return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
138709467b48Spatrick DLoc, Block)
138809467b48Spatrick << ore::NV("Function", Cloner.OrigFunc)
138909467b48Spatrick << " not partially inlined into callers (Original Size = "
139009467b48Spatrick << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
139109467b48Spatrick << ", Size of call sequence to outlined function = "
139209467b48Spatrick << ore::NV("NewSize", SizeCost) << ")";
139309467b48Spatrick });
139409467b48Spatrick return false;
139509467b48Spatrick }
139609467b48Spatrick
139709467b48Spatrick assert(Cloner.OrigFunc->users().empty() &&
139809467b48Spatrick "F's users should all be replaced!");
139909467b48Spatrick
140009467b48Spatrick std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
140109467b48Spatrick Cloner.ClonedFunc->user_end());
140209467b48Spatrick
140309467b48Spatrick DenseMap<User *, uint64_t> CallSiteToProfCountMap;
140409467b48Spatrick auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
140509467b48Spatrick if (CalleeEntryCount)
140609467b48Spatrick computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
140709467b48Spatrick
140809467b48Spatrick uint64_t CalleeEntryCountV =
1409*d415bd75Srobert (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
141009467b48Spatrick
141109467b48Spatrick bool AnyInline = false;
141209467b48Spatrick for (User *User : Users) {
1413*d415bd75Srobert // Don't bother with BlockAddress used by CallBr for asm goto.
1414*d415bd75Srobert if (isa<BlockAddress>(User))
1415*d415bd75Srobert continue;
1416*d415bd75Srobert
1417097a140dSpatrick CallBase *CB = getSupportedCallBase(User);
141809467b48Spatrick
141973471bf0Spatrick if (isLimitReached())
142009467b48Spatrick continue;
142109467b48Spatrick
1422097a140dSpatrick OptimizationRemarkEmitter CallerORE(CB->getCaller());
1423097a140dSpatrick if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
142409467b48Spatrick continue;
142509467b48Spatrick
142609467b48Spatrick // Construct remark before doing the inlining, as after successful inlining
142709467b48Spatrick // the callsite is removed.
1428097a140dSpatrick OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
142909467b48Spatrick OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1430097a140dSpatrick << ore::NV("Caller", CB->getCaller());
143109467b48Spatrick
1432097a140dSpatrick InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
143309467b48Spatrick // We can only forward varargs when we outlined a single region, else we
143409467b48Spatrick // bail on vararg functions.
1435*d415bd75Srobert if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
143609467b48Spatrick (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1437097a140dSpatrick : nullptr))
1438097a140dSpatrick .isSuccess())
143909467b48Spatrick continue;
144009467b48Spatrick
144109467b48Spatrick CallerORE.emit(OR);
144209467b48Spatrick
144309467b48Spatrick // Now update the entry count:
144409467b48Spatrick if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
144509467b48Spatrick uint64_t CallSiteCount = CallSiteToProfCountMap[User];
144609467b48Spatrick CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
144709467b48Spatrick }
144809467b48Spatrick
144909467b48Spatrick AnyInline = true;
145009467b48Spatrick NumPartialInlining++;
145109467b48Spatrick // Update the stats
145209467b48Spatrick if (Cloner.ClonedOI)
145309467b48Spatrick NumPartialInlined++;
145409467b48Spatrick else
145509467b48Spatrick NumColdOutlinePartialInlined++;
145609467b48Spatrick }
145709467b48Spatrick
145809467b48Spatrick if (AnyInline) {
145909467b48Spatrick Cloner.IsFunctionInlined = true;
146009467b48Spatrick if (CalleeEntryCount)
1461*d415bd75Srobert Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1462*d415bd75Srobert CalleeEntryCountV, CalleeEntryCount->getType()));
146309467b48Spatrick OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
146409467b48Spatrick OrigFuncORE.emit([&]() {
146509467b48Spatrick return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
146609467b48Spatrick << "Partially inlined into at least one caller";
146709467b48Spatrick });
146809467b48Spatrick }
146909467b48Spatrick
147009467b48Spatrick return AnyInline;
147109467b48Spatrick }
147209467b48Spatrick
run(Module & M)147309467b48Spatrick bool PartialInlinerImpl::run(Module &M) {
147409467b48Spatrick if (DisablePartialInlining)
147509467b48Spatrick return false;
147609467b48Spatrick
147709467b48Spatrick std::vector<Function *> Worklist;
147809467b48Spatrick Worklist.reserve(M.size());
147909467b48Spatrick for (Function &F : M)
148009467b48Spatrick if (!F.use_empty() && !F.isDeclaration())
148109467b48Spatrick Worklist.push_back(&F);
148209467b48Spatrick
148309467b48Spatrick bool Changed = false;
148409467b48Spatrick while (!Worklist.empty()) {
148509467b48Spatrick Function *CurrFunc = Worklist.back();
148609467b48Spatrick Worklist.pop_back();
148709467b48Spatrick
148809467b48Spatrick if (CurrFunc->use_empty())
148909467b48Spatrick continue;
149009467b48Spatrick
149173471bf0Spatrick std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
149209467b48Spatrick if (Result.second)
149309467b48Spatrick Worklist.push_back(Result.second);
149409467b48Spatrick Changed |= Result.first;
149509467b48Spatrick }
149609467b48Spatrick
149709467b48Spatrick return Changed;
149809467b48Spatrick }
149909467b48Spatrick
150009467b48Spatrick char PartialInlinerLegacyPass::ID = 0;
150109467b48Spatrick
150209467b48Spatrick INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
150309467b48Spatrick "Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)150409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
150509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
150609467b48Spatrick INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1507097a140dSpatrick INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
150809467b48Spatrick INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
150909467b48Spatrick "Partial Inliner", false, false)
151009467b48Spatrick
151109467b48Spatrick ModulePass *llvm::createPartialInliningPass() {
151209467b48Spatrick return new PartialInlinerLegacyPass();
151309467b48Spatrick }
151409467b48Spatrick
run(Module & M,ModuleAnalysisManager & AM)151509467b48Spatrick PreservedAnalyses PartialInlinerPass::run(Module &M,
151609467b48Spatrick ModuleAnalysisManager &AM) {
151709467b48Spatrick auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
151809467b48Spatrick
1519097a140dSpatrick auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
152009467b48Spatrick return FAM.getResult<AssumptionAnalysis>(F);
152109467b48Spatrick };
152209467b48Spatrick
152309467b48Spatrick auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
152409467b48Spatrick return FAM.getCachedResult<AssumptionAnalysis>(F);
152509467b48Spatrick };
152609467b48Spatrick
1527097a140dSpatrick auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
152809467b48Spatrick return FAM.getResult<BlockFrequencyAnalysis>(F);
152909467b48Spatrick };
153009467b48Spatrick
1531097a140dSpatrick auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
153209467b48Spatrick return FAM.getResult<TargetIRAnalysis>(F);
153309467b48Spatrick };
153409467b48Spatrick
1535097a140dSpatrick auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1536097a140dSpatrick return FAM.getResult<TargetLibraryAnalysis>(F);
1537097a140dSpatrick };
153809467b48Spatrick
1539097a140dSpatrick ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
1540097a140dSpatrick
1541097a140dSpatrick if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1542097a140dSpatrick GetTLI, PSI, GetBFI)
154309467b48Spatrick .run(M))
154409467b48Spatrick return PreservedAnalyses::none();
154509467b48Spatrick return PreservedAnalyses::all();
154609467b48Spatrick }
1547