1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/LazyCallGraph.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/MLModelRunner.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/InstIterator.h"
28 #include "llvm/IR/PassManager.h"
29 #include "llvm/Support/CommandLine.h"
30
31 using namespace llvm;
32
33 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
34 #include "llvm/Analysis/ReleaseModeModelRunner.h"
35 // codegen-ed file
36 #include "InlinerSizeModel.h" // NOLINT
37
38 std::unique_ptr<InlineAdvisor>
getReleaseModeAdvisor(Module & M,ModuleAnalysisManager & MAM)39 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
40 auto AOTRunner =
41 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
42 M.getContext(), FeatureMap, DecisionName);
43 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
44 }
45 #endif
46
47 #define DEBUG_TYPE "inline-ml"
48
49 static cl::opt<float> SizeIncreaseThreshold(
50 "ml-advisor-size-increase-threshold", cl::Hidden,
51 cl::desc("Maximum factor by which expected native size may increase before "
52 "blocking any further inlining."),
53 cl::init(2.0));
54
55 static cl::opt<bool> KeepFPICache(
56 "ml-advisor-keep-fpi-cache", cl::Hidden,
57 cl::desc(
58 "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
59 cl::init(false));
60
61 // clang-format off
62 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
63 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
64 // InlineCost features - these must come first
65 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
66 #undef POPULATE_NAMES
67
68 // Non-cost features
69 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
70 INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
71 #undef POPULATE_NAMES
72 };
73 // clang-format on
74
75 const char *const llvm::DecisionName = "inlining_decision";
76 const char *const llvm::DefaultDecisionName = "inlining_default";
77 const char *const llvm::RewardName = "delta_size";
78
getInlinableCS(Instruction & I)79 CallBase *getInlinableCS(Instruction &I) {
80 if (auto *CS = dyn_cast<CallBase>(&I))
81 if (Function *Callee = CS->getCalledFunction()) {
82 if (!Callee->isDeclaration()) {
83 return CS;
84 }
85 }
86 return nullptr;
87 }
88
MLInlineAdvisor(Module & M,ModuleAnalysisManager & MAM,std::unique_ptr<MLModelRunner> Runner)89 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
90 std::unique_ptr<MLModelRunner> Runner)
91 : InlineAdvisor(
92 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
93 ModelRunner(std::move(Runner)),
94 CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
95 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
96 assert(ModelRunner);
97
98 // Extract the 'call site height' feature - the position of a call site
99 // relative to the farthest statically reachable SCC node. We don't mutate
100 // this value while inlining happens. Empirically, this feature proved
101 // critical in behavioral cloning - i.e. training a model to mimic the manual
102 // heuristic's decisions - and, thus, equally important for training for
103 // improvement.
104 CallGraph CGraph(M);
105 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
106 const std::vector<CallGraphNode *> &CGNodes = *I;
107 unsigned Level = 0;
108 for (auto *CGNode : CGNodes) {
109 Function *F = CGNode->getFunction();
110 if (!F || F->isDeclaration())
111 continue;
112 for (auto &I : instructions(F)) {
113 if (auto *CS = getInlinableCS(I)) {
114 auto *Called = CS->getCalledFunction();
115 auto Pos = FunctionLevels.find(&CG.get(*Called));
116 // In bottom up traversal, an inlinable callee is either in the
117 // same SCC, or to a function in a visited SCC. So not finding its
118 // level means we haven't visited it yet, meaning it's in this SCC.
119 if (Pos == FunctionLevels.end())
120 continue;
121 Level = std::max(Level, Pos->second + 1);
122 }
123 }
124 }
125 for (auto *CGNode : CGNodes) {
126 Function *F = CGNode->getFunction();
127 if (F && !F->isDeclaration())
128 FunctionLevels[&CG.get(*F)] = Level;
129 }
130 }
131 for (auto KVP : FunctionLevels) {
132 AllNodes.insert(KVP.first);
133 EdgeCount += getLocalCalls(KVP.first->getFunction());
134 }
135 NodeCount = AllNodes.size();
136 }
137
getInitialFunctionLevel(const Function & F) const138 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
139 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
140 }
141
onPassEntry(LazyCallGraph::SCC * LastSCC)142 void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) {
143 if (!LastSCC || ForceStop)
144 return;
145 FPICache.clear();
146 // Function passes executed between InlinerPass runs may have changed the
147 // module-wide features.
148 // The cgscc pass manager rules are such that:
149 // - if a pass leads to merging SCCs, then the pipeline is restarted on the
150 // merged SCC
151 // - if a pass leads to splitting the SCC, then we continue with one of the
152 // splits
153 // This means that the NodesInLastSCC is a superset (not strict) of the nodes
154 // that subsequent passes would have processed
155 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
156 // they'd be adjacent to Nodes in the last SCC. So we just need to check the
157 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
158 // care about the nature of the Edge (call or ref).
159 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
160 while (!NodesInLastSCC.empty()) {
161 const auto *N = *NodesInLastSCC.begin();
162 NodesInLastSCC.erase(N);
163 // The Function wrapped by N could have been deleted since we last saw it.
164 if (N->isDead()) {
165 assert(!N->getFunction().isDeclaration());
166 continue;
167 }
168 ++NodeCount;
169 EdgeCount += getLocalCalls(N->getFunction());
170 for (const auto &E : *(*N)) {
171 const auto *AdjNode = &E.getNode();
172 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
173 auto I = AllNodes.insert(AdjNode);
174 if (I.second)
175 NodesInLastSCC.insert(AdjNode);
176 }
177 }
178
179 EdgeCount -= EdgesOfLastSeenNodes;
180 EdgesOfLastSeenNodes = 0;
181
182 // (Re)use NodesInLastSCC to remember the nodes in the SCC right now,
183 // in case the SCC is split before onPassExit and some nodes are split out
184 assert(NodesInLastSCC.empty());
185 for (const auto &N : *LastSCC)
186 NodesInLastSCC.insert(&N);
187 }
188
onPassExit(LazyCallGraph::SCC * LastSCC)189 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
190 // No need to keep this around - function passes will invalidate it.
191 if (!KeepFPICache)
192 FPICache.clear();
193 if (!LastSCC || ForceStop)
194 return;
195 // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
196 // we update the node count and edge count from the subset of these nodes that
197 // survived.
198 EdgesOfLastSeenNodes = 0;
199
200 // Check on nodes that were in SCC onPassEntry
201 for (auto I = NodesInLastSCC.begin(); I != NodesInLastSCC.end();) {
202 if ((*I)->isDead())
203 NodesInLastSCC.erase(*I++);
204 else
205 EdgesOfLastSeenNodes += getLocalCalls((*I++)->getFunction());
206 }
207
208 // Check on nodes that may have got added to SCC
209 for (const auto &N : *LastSCC) {
210 assert(!N.isDead());
211 auto I = NodesInLastSCC.insert(&N);
212 if (I.second)
213 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
214 }
215 assert(NodeCount >= NodesInLastSCC.size());
216 assert(EdgeCount >= EdgesOfLastSeenNodes);
217 }
218
getLocalCalls(Function & F)219 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
220 return getCachedFPI(F).DirectCallsToDefinedFunctions;
221 }
222
223 // Update the internal state of the advisor, and force invalidate feature
224 // analysis. Currently, we maintain minimal (and very simple) global state - the
225 // number of functions and the number of static calls. We also keep track of the
226 // total IR size in this module, to stop misbehaving policies at a certain bloat
227 // factor (SizeIncreaseThreshold)
onSuccessfulInlining(const MLInlineAdvice & Advice,bool CalleeWasDeleted)228 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
229 bool CalleeWasDeleted) {
230 assert(!ForceStop);
231 Function *Caller = Advice.getCaller();
232 Function *Callee = Advice.getCallee();
233 // The caller features aren't valid anymore.
234 {
235 PreservedAnalyses PA = PreservedAnalyses::all();
236 PA.abandon<FunctionPropertiesAnalysis>();
237 PA.abandon<DominatorTreeAnalysis>();
238 PA.abandon<LoopAnalysis>();
239 FAM.invalidate(*Caller, PA);
240 }
241 Advice.updateCachedCallerFPI(FAM);
242 int64_t IRSizeAfter =
243 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
244 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
245 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
246 ForceStop = true;
247
248 // We can delta-update module-wide features. We know the inlining only changed
249 // the caller, and maybe the callee (by deleting the latter).
250 // Nodes are simple to update.
251 // For edges, we 'forget' the edges that the caller and callee used to have
252 // before inlining, and add back what they currently have together.
253 int64_t NewCallerAndCalleeEdges =
254 getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
255
256 if (CalleeWasDeleted)
257 --NodeCount;
258 else
259 NewCallerAndCalleeEdges +=
260 getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
261 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
262 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
263 }
264
getModuleIRSize() const265 int64_t MLInlineAdvisor::getModuleIRSize() const {
266 int64_t Ret = 0;
267 for (auto &F : M)
268 if (!F.isDeclaration())
269 Ret += getIRSize(F);
270 return Ret;
271 }
272
getCachedFPI(Function & F) const273 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
274 auto InsertPair =
275 FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
276 if (!InsertPair.second)
277 return InsertPair.first->second;
278 InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
279 return InsertPair.first->second;
280 }
281
getAdviceImpl(CallBase & CB)282 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
283 if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
284 return Skip;
285
286 auto &Caller = *CB.getCaller();
287 auto &Callee = *CB.getCalledFunction();
288
289 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
290 return FAM.getResult<AssumptionAnalysis>(F);
291 };
292 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
293 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
294
295 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
296 // If this is a "never inline" case, there won't be any changes to internal
297 // state we need to track, so we can just return the base InlineAdvice, which
298 // will do nothing interesting.
299 // Same thing if this is a recursive case.
300 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
301 &Caller == &Callee)
302 return getMandatoryAdvice(CB, false);
303
304 bool Mandatory =
305 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
306
307 // If we need to stop, we won't want to track anymore any state changes, so
308 // we just return the base InlineAdvice, which acts as a noop.
309 if (ForceStop) {
310 ORE.emit([&] {
311 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
312 << "Won't attempt inlining because module size grew too much.";
313 });
314 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
315 }
316
317 int CostEstimate = 0;
318 if (!Mandatory) {
319 auto IsCallSiteInlinable =
320 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
321 if (!IsCallSiteInlinable) {
322 // We can't inline this for correctness reasons, so return the base
323 // InlineAdvice, as we don't care about tracking any state changes (which
324 // won't happen).
325 return std::make_unique<InlineAdvice>(this, CB, ORE, false);
326 }
327 CostEstimate = *IsCallSiteInlinable;
328 }
329
330 const auto CostFeatures =
331 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
332 if (!CostFeatures) {
333 return std::make_unique<InlineAdvice>(this, CB, ORE, false);
334 }
335
336 if (Mandatory)
337 return getMandatoryAdvice(CB, true);
338
339 auto NrCtantParams = 0;
340 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
341 NrCtantParams += (isa<Constant>(*I));
342 }
343
344 auto &CallerBefore = getCachedFPI(Caller);
345 auto &CalleeBefore = getCachedFPI(Callee);
346
347 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
348 CalleeBefore.BasicBlockCount;
349 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
350 getInitialFunctionLevel(Caller);
351 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
352 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
353 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
354 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
355 CallerBefore.Uses;
356 *ModelRunner->getTensor<int64_t>(
357 FeatureIndex::CallerConditionallyExecutedBlocks) =
358 CallerBefore.BlocksReachedFromConditionalInstruction;
359 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
360 CallerBefore.BasicBlockCount;
361 *ModelRunner->getTensor<int64_t>(
362 FeatureIndex::CalleeConditionallyExecutedBlocks) =
363 CalleeBefore.BlocksReachedFromConditionalInstruction;
364 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
365 CalleeBefore.Uses;
366 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
367
368 // Add the cost features
369 for (size_t I = 0;
370 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
371 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
372 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
373 }
374
375 return getAdviceFromModel(CB, ORE);
376 }
377
378 std::unique_ptr<MLInlineAdvice>
getAdviceFromModel(CallBase & CB,OptimizationRemarkEmitter & ORE)379 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
380 OptimizationRemarkEmitter &ORE) {
381 return std::make_unique<MLInlineAdvice>(
382 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
383 }
384
385 std::unique_ptr<InlineAdvice>
getSkipAdviceIfUnreachableCallsite(CallBase & CB)386 MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase &CB) {
387 if (!FAM.getResult<DominatorTreeAnalysis>(*CB.getCaller())
388 .isReachableFromEntry(CB.getParent()))
389 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), false);
390 return nullptr;
391 }
392
getMandatoryAdvice(CallBase & CB,bool Advice)393 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
394 bool Advice) {
395 // Make sure we track inlinings in all cases - mandatory or not.
396 if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
397 return Skip;
398 if (Advice && !ForceStop)
399 return getMandatoryAdviceImpl(CB);
400
401 // If this is a "never inline" case, there won't be any changes to internal
402 // state we need to track, so we can just return the base InlineAdvice, which
403 // will do nothing interesting.
404 // Same if we are forced to stop - we don't track anymore.
405 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
406 }
407
408 std::unique_ptr<MLInlineAdvice>
getMandatoryAdviceImpl(CallBase & CB)409 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
410 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
411 }
412
print(raw_ostream & OS) const413 void MLInlineAdvisor::print(raw_ostream &OS) const {
414 OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
415 << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n";
416 OS << "[MLInlineAdvisor] FPI:\n";
417 for (auto I : FPICache) {
418 OS << I.first->getName() << ":\n";
419 I.second.print(OS);
420 OS << "\n";
421 }
422 OS << "\n";
423 }
424
MLInlineAdvice(MLInlineAdvisor * Advisor,CallBase & CB,OptimizationRemarkEmitter & ORE,bool Recommendation)425 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
426 OptimizationRemarkEmitter &ORE,
427 bool Recommendation)
428 : InlineAdvice(Advisor, CB, ORE, Recommendation),
429 CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
430 CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
431 CallerAndCalleeEdges(Advisor->isForcedToStop()
432 ? 0
433 : (Advisor->getLocalCalls(*Caller) +
434 Advisor->getLocalCalls(*Callee))),
435 PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
436 if (Recommendation)
437 FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
438 }
439
reportContextForRemark(DiagnosticInfoOptimizationBase & OR)440 void MLInlineAdvice::reportContextForRemark(
441 DiagnosticInfoOptimizationBase &OR) {
442 using namespace ore;
443 OR << NV("Callee", Callee->getName());
444 for (size_t I = 0; I < NumberOfFeatures; ++I)
445 OR << NV(FeatureMap[I].name(),
446 *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
447 OR << NV("ShouldInline", isInliningRecommended());
448 }
449
updateCachedCallerFPI(FunctionAnalysisManager & FAM) const450 void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const {
451 FPU->finish(FAM);
452 }
453
recordInliningImpl()454 void MLInlineAdvice::recordInliningImpl() {
455 ORE.emit([&]() {
456 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
457 reportContextForRemark(R);
458 return R;
459 });
460 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
461 }
462
recordInliningWithCalleeDeletedImpl()463 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
464 ORE.emit([&]() {
465 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
466 Block);
467 reportContextForRemark(R);
468 return R;
469 });
470 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
471 }
472
recordUnsuccessfulInliningImpl(const InlineResult & Result)473 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
474 const InlineResult &Result) {
475 getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
476 ORE.emit([&]() {
477 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
478 DLoc, Block);
479 reportContextForRemark(R);
480 return R;
481 });
482 }
recordUnattemptedInliningImpl()483 void MLInlineAdvice::recordUnattemptedInliningImpl() {
484 assert(!FPU);
485 ORE.emit([&]() {
486 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
487 reportContextForRemark(R);
488 return R;
489 });
490 }
491