1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "GenCodeGenModule.h"
10 #include "EstimateFunctionSize.h"
11 #include "AdaptorCommon/ImplicitArgs.hpp"
12 #include "Compiler/CISACodeGen/helper.h"
13 #include "Compiler/DebugInfo/ScalarVISAModule.h"
14 #include "Compiler/CodeGenContextWrapper.hpp"
15 #include "Compiler/IGCPassSupport.h"
16 #include "Compiler/MetaDataUtilsWrapper.h"
17 #include "common/igc_regkeys.hpp"
18 #include "common/LLVMWarningsPush.hpp"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/Attributes.h"
22 #include "llvmWrapper/Analysis/InlineCost.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SCCIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvmWrapper/IR/CallSite.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/Transforms/IPO.h"
31 #include "llvm/Transforms/IPO/Inliner.h"
32 #include "llvmWrapper/Transforms/Utils/Cloning.h"
33 #include "llvm/IR/DebugInfo.h"
34 #include "llvm/IR/DIBuilder.h"
35 #include "common/LLVMWarningsPop.hpp"
36 #include "DebugInfo/VISADebugEmitter.hpp"
37 #include <numeric>
38 #include <utility>
39 #include "Probe/Assertion.h"
40 
41 using namespace llvm;
42 using namespace IGC;
43 
44 char GenXCodeGenModule::ID = 0;
45 
46 IGC_INITIALIZE_PASS_BEGIN(GenXCodeGenModule, "GenXCodeGenModule", "GenXCodeGenModule", false, false)
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)47 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
48 IGC_INITIALIZE_PASS_DEPENDENCY(GenXFunctionGroupAnalysis)
49 IGC_INITIALIZE_PASS_END(GenXCodeGenModule, "GenXCodeGenModule", "GenXCodeGenModule", false, false)
50 
51 llvm::ModulePass* IGC::createGenXCodeGenModulePass()
52 {
53     initializeGenXCodeGenModulePass(*PassRegistry::getPassRegistry());
54     return new GenXCodeGenModule;
55 }
56 
GenXCodeGenModule()57 GenXCodeGenModule::GenXCodeGenModule()
58     : llvm::ModulePass(ID), FGA(nullptr), pMdUtils(nullptr), Modified(false)
59 {
60     initializeGenXCodeGenModulePass(*PassRegistry::getPassRegistry());
61 }
62 
~GenXCodeGenModule()63 GenXCodeGenModule::~GenXCodeGenModule() {}
64 
getAnalysisUsage(AnalysisUsage & AU) const65 void GenXCodeGenModule::getAnalysisUsage(AnalysisUsage& AU) const
66 {
67     AU.addRequired<MetaDataUtilsWrapper>();
68     AU.addRequired<GenXFunctionGroupAnalysis>();
69     AU.addRequired<CodeGenContextWrapper>();
70     AU.addRequired<llvm::CallGraphWrapperPass>();
71 }
72 
73 // Update cloned function's metadata.
CloneFuncMetadata(IGCMD::MetaDataUtils * pM,llvm::Function * ClonedF,llvm::Function * F)74 static inline void CloneFuncMetadata(IGCMD::MetaDataUtils* pM,
75     llvm::Function* ClonedF,
76     llvm::Function* F)
77 {
78     using namespace IGC::IGCMD;
79     auto Info = pM->getFunctionsInfoItem(F);
80     auto NewInfo = FunctionInfoMetaDataHandle(FunctionInfoMetaData::get());
81 
82     // Copy function type info.
83     if (Info->isTypeHasValue())
84     {
85         NewInfo->setType(Info->getType());
86     }
87 
88     // Copy explicit argument info, if any.
89     unsigned i = 0;
90     for (auto AI = Info->begin_ArgInfoList(), AE = Info->begin_ArgInfoList();
91         AI != AE; ++AI)
92     {
93         NewInfo->addArgInfoListItem(Info->getArgInfoListItem(i));
94         i++;
95     }
96 
97     // Copy implicit argument info, if any.
98     i = 0;
99     for (auto AI = Info->begin_ImplicitArgInfoList(),
100         AE = Info->end_ImplicitArgInfoList();
101         AI != AE; ++AI)
102     {
103         NewInfo->addImplicitArgInfoListItem(Info->getImplicitArgInfoListItem(i));
104         i++;
105     }
106 
107     pM->setFunctionsInfoItem(ClonedF, Info);
108 }
109 
cloneFunc(Function * F)110 Function* GenXCodeGenModule::cloneFunc(Function* F)
111 {
112     ValueToValueMapTy VMap;
113 
114     Function* ClonedFunc = CloneFunction(F, VMap);
115     //if the function is not added as part of clone, add it
116     if (!F->getParent()->getFunction(ClonedFunc->getName()))
117         F->getParent()->getFunctionList().push_back(ClonedFunc);
118     CloneFuncMetadata(pMdUtils, ClonedFunc, F);
119 
120     return ClonedFunc;
121 }
122 
getCallerFunc(Value * user)123 inline Function* getCallerFunc(Value* user)
124 {
125     IGC_ASSERT(nullptr != user);
126     Function* caller = nullptr;
127     if (CallInst * CI = dyn_cast<CallInst>(user))
128     {
129         IGC_ASSERT(nullptr != CI->getParent());
130         caller = CI->getParent()->getParent();
131     }
132     IGC_ASSERT_MESSAGE(caller, "cannot be indirect call");
133     return caller;
134 }
135 
processFunction(Function & F)136 void GenXCodeGenModule::processFunction(Function& F)
137 {
138     // See what FunctionGroups this Function is called from.
139     SetVector<std::pair<FunctionGroup*, Function*>> CallerFGs;
140     for (auto U : F.users())
141     {
142         Function* Caller = getCallerFunc(U);
143         FunctionGroup* FG = FGA->getGroup(Caller);
144         Function* SubGrpH = FGA->useStackCall(&F) ? (&F) : FGA->getSubGroupMap(Caller);
145         if (FG == nullptr || SubGrpH == nullptr)
146             continue;
147         CallerFGs.insert(std::make_pair(FG, SubGrpH));
148     }
149 
150     IGC_ASSERT(CallerFGs.size() >= 1);
151 
152     // Get the cloning threshold. If the number of function groups a function belongs to
153     // exceeds the threshold, instead of cloning the function N times, make it an indirect call
154     // and use relocation instead. The function will only be compiled once and runtime must relocate
155     // its address for each caller. This greatly saves on compile time when there are many function
156     // groups that all call the same function.
157     auto cloneTheshold = IGC_GET_FLAG_VALUE(FunctionCloningThreshold);
158     if (F.hasFnAttribute("visaStackCall") &&
159         cloneTheshold > 0 && CallerFGs.size() > cloneTheshold)
160     {
161         auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
162         auto IFG = FGA->getIndirectCallGroup();
163         IGC_ASSERT(IFG);
164         F.addFnAttr("referenced-indirectly");
165         pCtx->m_enableFunctionPointer = true;
166         FGA->addToFunctionGroup(&F, IFG, &F);
167         return;
168     }
169 
170     bool FirstPair = true;
171     for (auto FGPair : CallerFGs)
172     {
173         if (FirstPair)
174         {
175             FGA->addToFunctionGroup(&F, FGPair.first, FGPair.second);
176             FirstPair = false;
177         }
178         else
179         {
180             // clone the function, add to this function group
181             auto FCloned = cloneFunc(&F);
182 
183             // Copy F's property over
184             copyFuncProperties(FCloned, &F);
185 
186             Function* SubGrpH = FGA->useStackCall(&F) ? FCloned : FGPair.second;
187             FGA->addToFunctionGroup(FCloned, FGPair.first, SubGrpH);
188             Modified = true;
189             // update the edge after clone, it can handle self-recursion
190             for (auto UI = F.use_begin(), UE = F.use_end(); UI != UE; /*empty*/)
191             {
192                 // Increment iterator after setting U to change the use.
193                 Use* U = &*UI++;
194                 Function* Caller = cast<CallInst>(U->getUser())->getParent()->getParent();
195                 FunctionGroup* InFG = FGA->getGroup(Caller);
196                 Function* InSubGrpH = FGA->useStackCall(&F) ? FCloned : FGA->getSubGroupMap(Caller);
197                 if (InFG == FGPair.first && InSubGrpH == SubGrpH)
198                 {
199                     *U = FCloned;
200                 }
201             }
202         }
203     }
204 }
205 
processSCC(std::vector<llvm::CallGraphNode * > * SCCNodes)206 void GenXCodeGenModule::processSCC(std::vector<llvm::CallGraphNode*>* SCCNodes)
207 {
208     // force stack-call for every function in SCC
209     for (CallGraphNode* Node : (*SCCNodes))
210     {
211         Function* F = Node->getFunction();
212         F->addFnAttr("visaStackCall");
213     }
214 
215     // See what FunctionGroups this SCC is called from.
216     SetVector<FunctionGroup*> CallerFGs;
217     for (CallGraphNode* Node : (*SCCNodes))
218     {
219         Function* F = Node->getFunction();
220         for (auto U : F->users())
221         {
222             Function* Caller = getCallerFunc(U);
223             FunctionGroup* FG = FGA->getGroup(Caller);
224             if (FG == nullptr)
225                 continue;
226             CallerFGs.insert(FG);
227         }
228     }
229     IGC_ASSERT(CallerFGs.size() >= 1);
230 
231     // Use the same cloning threshold for single function SCCs, but making every stack function
232     // in the SCC indirect calls to prevent cloning the entire SCC N times.
233     auto cloneTheshold = IGC_GET_FLAG_VALUE(FunctionCloningThreshold);
234     if (cloneTheshold > 0 && CallerFGs.size() > cloneTheshold)
235     {
236         auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
237         for (CallGraphNode* Node : (*SCCNodes))
238         {
239             Function* F = Node->getFunction();
240             auto IFG = FGA->getIndirectCallGroup();
241             IGC_ASSERT(IFG && F->hasFnAttribute("visaStackCall"));
242             F->addFnAttr("referenced-indirectly");
243             pCtx->m_enableFunctionPointer = true;
244             FGA->addToFunctionGroup(F, IFG, F);
245         }
246         return;
247     }
248 
249     bool FirstPair = true;
250     for (auto FG : CallerFGs)
251     {
252         if (FirstPair)
253         {
254             for (CallGraphNode* Node : (*SCCNodes))
255             {
256                 Function* F = Node->getFunction();
257                 FGA->addToFunctionGroup(F, FG, F);
258             }
259             FirstPair = false;
260         }
261         else
262         {
263             // clone the functions in scc, add to this function group
264             llvm::DenseMap<Function*, Function*> CloneMap;
265             for (CallGraphNode* Node : (*SCCNodes))
266             {
267                 Function* F = Node->getFunction();
268                 auto FCloned = cloneFunc(F);
269 
270                 // Copy properties
271                 copyFuncProperties(FCloned, F);
272 
273                 FGA->addToFunctionGroup(FCloned, FG, FCloned);
274                 CloneMap.insert(std::make_pair(F, FCloned));
275             }
276             Modified = true;
277             // update the call-edges for every function in SCC,
278             // move edges to the cloned SCC, including the recursion edge
279             for (CallGraphNode* Node : (*SCCNodes))
280             {
281                 Function* F = Node->getFunction();
282                 for (auto UI = F->use_begin(), UE = F->use_end(); UI != UE; /*empty*/)
283                 {
284                     // Increment iterator after setting U to change the use.
285                     Use* U = &*UI++;
286                     Function* Caller = cast<CallInst>(U->getUser())->getParent()->getParent();
287                     FunctionGroup* InFG = FGA->getGroup(Caller);
288                     if (InFG == FG)
289                     {
290                         *U = CloneMap[F];
291                     }
292                 }
293             }
294         }
295     }
296 }
297 
setFuncProperties(CallGraph & CG)298 void GenXCodeGenModule::setFuncProperties(CallGraph& CG)
299 {
300     for (auto I = scc_begin(&CG), IE = scc_end(&CG); I != IE; ++I)
301     {
302         const std::vector<CallGraphNode*>& SCCNodes = (*I);
303         for (CallGraphNode* Node : SCCNodes)
304         {
305             Function* F = Node->getFunction();
306             if (F != nullptr && !F->isDeclaration())
307             {
308                 if (Node->empty()) {
309                     FGA->setLeafFunc(F);
310                 }
311             }
312         }
313     }
314 }
315 
copyFuncProperties(llvm::Function * To,llvm::Function * From)316 void GenXCodeGenModule::copyFuncProperties(llvm::Function* To, llvm::Function* From)
317 {
318     FGA->copyFuncProperties(To, From);
319 }
320 
321 /// Deduce non-null argument attributes for subroutines.
DeduceNonNullAttribute(Module & M)322 static bool DeduceNonNullAttribute(Module& M)
323 {
324     bool Modifided = false;
325     for (auto& F : M.getFunctionList())
326     {
327         if (F.isDeclaration() || F.hasExternalLinkage() || F.hasAddressTaken())
328             continue;
329 
330         bool Skip = false;
331         for (auto U : F.users())
332         {
333             if (!isa<CallInst>(U))
334             {
335                 Skip = true;
336                 break;
337             }
338         }
339         if (Skip)
340             continue;
341 
342         for (auto& Arg : F.args())
343         {
344             // Only for used pointer arguments.
345             if (Arg.use_empty() || !Arg.getType()->isPointerTy())
346                 continue;
347 
348             // If all call sites are passing a non-null value to this argument, then
349             // this argument cannot be a null ptr.
350             bool NotNull = true;
351             for (auto U : F.users())
352             {
353                 auto CI = cast<CallInst>(U);
354                 Value* V = CI->getArgOperand(Arg.getArgNo());
355                 auto DL = F.getParent()->getDataLayout();
356                 if (!isKnownNonZero(V, DL))
357                 {
358                     NotNull = false;
359                     break;
360                 }
361             }
362 
363             if (NotNull) {
364                 // FIXME: Below lines possibly can be refactored to be simpler.
365                 AttributeList attrSet = AttributeList::get(Arg.getParent()->getContext(), Arg.getArgNo() + 1, llvm::Attribute::NonNull);
366                 Arg.addAttr(attrSet.getAttribute(Arg.getArgNo() + 1, llvm::Attribute::NonNull));
367                 Modifided = true;
368             }
369         }
370     }
371     return Modifided;
372 }
373 
runOnModule(Module & M)374 bool GenXCodeGenModule::runOnModule(Module& M)
375 {
376     FGA = &getAnalysis<GenXFunctionGroupAnalysis>();
377 
378     // Already built.
379     if (FGA->getModule())
380         return false;
381 
382     pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
383     CallGraph& CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
384 
385     setFuncProperties(CG);
386 
387     std::vector<std::vector<CallGraphNode*>*> SCCVec;
388     for (auto I = scc_begin(&CG), IE = scc_end(&CG); I != IE; ++I)
389     {
390         std::vector<CallGraphNode*>* SCCNodes = new std::vector<CallGraphNode*>((*I));
391         SCCVec.push_back(SCCNodes);
392     }
393 
394     // Add all indirect functions to the default kernel group
395     FGA->addIndirectFuncsToKernelGroup(&M);
396 
397     for (auto I = SCCVec.rbegin(), IE = SCCVec.rend(); I != IE; ++I)
398     {
399         std::vector<CallGraphNode*>* SCCNodes = (*I);
400         for (CallGraphNode* Node : (*SCCNodes))
401         {
402             Function* F = Node->getFunction();
403             if (!F || F->isDeclaration()) continue;
404             // skip functions belonging to the indirect call group
405             if (FGA->isIndirectCallGroup(F)) continue;
406 
407             if (isEntryFunc(pMdUtils, F))
408             {
409                 FGA->setSubGroupMap(F, F);
410                 FGA->createFunctionGroup(F);
411             }
412             else if (SCCNodes->size() == 1)
413             {
414                 processFunction(*F);
415             }
416             else
417             {
418                 processSCC(SCCNodes);
419                 break;
420             }
421         }
422         delete SCCNodes;
423     }
424 
425     this->pMdUtils->save(M.getContext());
426 
427     // Check and set FG attribute flags
428     FGA->setGroupAttributes();
429 
430     // By swapping, we sort the function list to ensure codegen order for
431     // functions. This relies on llvm module pass manager's implementation detail.
432     SmallVector<Function*, 16> OrderedList;
433     for (auto GI = FGA->begin(), GE = FGA->end(); GI != GE; ++GI)
434     {
435         for (auto SubGI = (*GI)->Functions.begin(), SubGE = (*GI)->Functions.end();
436             SubGI != SubGE; ++SubGI)
437         {
438             for (auto FI = (*SubGI)->begin(), FE = (*SubGI)->end(); FI != FE; ++FI)
439             {
440                 OrderedList.push_back(*FI);
441             }
442         }
443     }
444 
445     //  Input L1 = [A, B, C, D, E]       // Functions in groups
446     //  Input L2 = [A, C, G, B, D, E, F] // Functions in the module
447     // Output L2 = [A, B, C, D, E, G, F] // Ordered functions in the module
448     IGC_ASSERT_MESSAGE(OrderedList.size() <= M.size(), "out of sync");
449     Function* CurF = &(*M.begin());
450     for (auto I = OrderedList.begin(), E = OrderedList.end(); I != E; ++I)
451     {
452         IGC_ASSERT(nullptr != CurF);
453         Function* F = *I;
454         if (CurF != F)
455             // Move F before CurF. This just works See BasicBlock::moveBefore.
456             // CurF remains the same for the next iteration.
457             M.getFunctionList().splice(CurF->getIterator(), M.getFunctionList(), F);
458         else
459         {
460             auto it = CurF->getIterator();
461             CurF = &*(++it);
462         }
463     }
464 
465     // Changing simd size from 32 to 16 for function groups with function calls due to slicing
466     for (auto GI = FGA->begin(), GE = FGA->end(); GI != GE; ++GI)
467     {
468         FunctionGroup* FG = *GI;
469         if (!FG->isSingle() || FG->hasStackCall())
470         {
471             Function* Kernel = FG->getHead();
472             IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
473             int simd_size = funcInfoMD->getSubGroupSize()->getSIMD_size();
474             if (simd_size == 32)
475                 funcInfoMD->getSubGroupSize()->setSIMD_size(16);
476         }
477     }
478 
479     IGC_ASSERT(FGA->verify());
480 
481     FGA->setModule(&M);
482     Modified |= DeduceNonNullAttribute(M);
483 
484     return Modified;
485 }
486 
487 ////////////////////////////////////////////////////////////////////////////////
488 /// GenXFunctionGroupAnalysis implementation detail
489 ////////////////////////////////////////////////////////////////////////////////
490 
491 char GenXFunctionGroupAnalysis::ID = 0;
492 
493 IGC_INITIALIZE_PASS_BEGIN(GenXFunctionGroupAnalysis, "GenXFunctionGroupAnalysis", "GenXFunctionGroupAnalysis", false, true)
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)494 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
495 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
496 IGC_INITIALIZE_PASS_END(GenXFunctionGroupAnalysis, "GenXFunctionGroupAnalysis", "GenXFunctionGroupAnalysis", false, true)
497 
498 llvm::ImmutablePass* IGC::createGenXFunctionGroupAnalysisPass() {
499     initializeGenXFunctionGroupAnalysisPass(*PassRegistry::getPassRegistry());
500     return new GenXFunctionGroupAnalysis;
501 }
502 
GenXFunctionGroupAnalysis()503 GenXFunctionGroupAnalysis::GenXFunctionGroupAnalysis()
504     : ImmutablePass(ID), M(nullptr) {
505     initializeGenXFunctionGroupAnalysisPass(*PassRegistry::getPassRegistry());
506 }
507 
getAnalysisUsage(AnalysisUsage & AU) const508 void GenXFunctionGroupAnalysis::getAnalysisUsage(AnalysisUsage& AU) const {
509     AU.addRequired<CodeGenContextWrapper>();
510     AU.addRequired<MetaDataUtilsWrapper>();
511     AU.setPreservesAll();
512 }
513 
verify()514 bool GenXFunctionGroupAnalysis::verify()
515 {
516     for (auto GI = begin(), GE = end(); GI != GE; ++GI)
517     {
518         for (auto SubGI = (*GI)->Functions.begin(), SubGE = (*GI)->Functions.end();
519             SubGI != SubGE; ++SubGI)
520         {
521             for (auto FI = (*SubGI)->begin(), FE = (*SubGI)->end(); FI != FE; ++FI)
522             {
523                 Function* F = *FI;
524                 if (F->hasFnAttribute("referenced-indirectly"))
525                 {
526                     continue;
527                 }
528                 // If F is an unused non-kernel function, although it should have been
529                 // deleted, that is fine.
530                 for (auto U : F->users())
531                 {
532                     Function* Caller = getCallerFunc(U);
533                     FunctionGroup* CallerFG = getGroup(Caller);
534                     // Caller's FG should be the same as FG. Otherwise, something is wrong.
535                     if (CallerFG != (*GI))
536                     {
537                         printf("%s\n", F->getName().data());
538                         printf("%s\n", Caller->getName().data());
539                         return false;
540                     }
541                     Function* SubGrpH = getSubGroupMap(F);
542                     // Caller's sub-group header must be the first element of the sub-vector
543                     if (SubGrpH != (*SubGI)->front())
544                         return false;
545                 }
546             }
547         }
548     }
549     // Everything is OK.
550     return true;
551 }
552 
useStackCall(llvm::Function * F)553 bool GenXFunctionGroupAnalysis::useStackCall(llvm::Function* F)
554 {
555     return (F->hasFnAttribute("visaStackCall"));
556 }
557 
setGroupAttributes()558 void GenXFunctionGroupAnalysis::setGroupAttributes()
559 {
560     for (auto FG : Groups)
561     {
562         // Ignore the Indirect Call Group, as there is no actual callgraph
563         if (FG == IndirectCallGroup) continue;
564 
565         // Set stackcalls for function groups with more than one function
566         FG->m_hasStackCall = (FG->Functions.size() > 1);
567 
568         for (auto FI = FG->begin(), FE = FG->end(); FI != FE; ++FI)
569         {
570             Function* F = *FI;
571 
572             // check all functions in the group to see if there's an vla alloca
573             // function attribute "hasVLA" should be set at ProcessFuncAttributes pass
574             if (F->hasFnAttribute("hasVLA"))
575             {
576                 FG->m_hasVaribleLengthAlloca = true;
577             }
578 
579             // check if FG uses recursion. The "forceRecurse" attribute is set in
580             // ProcessFuncAttributes pass by using Tarjan's algorithm to find recursion.
581             if (F->hasFnAttribute("forceRecurse"))
582             {
583                 FG->m_hasRecursion = true;
584             }
585 
586             // For the remaining attributes we need to loop through all the call instructions
587             for (auto ii = inst_begin(*FI), ei = inst_end(*FI); ii != ei; ii++)
588             {
589                 if (CallInst* call = dyn_cast<CallInst>(&*ii))
590                 {
591                     Function* calledF = call->getCalledFunction();
592                     if (call->isInlineAsm())
593                     {
594                         // Uses inline asm call
595                         FG->m_hasInlineAsm = true;
596                     }
597                     else if (calledF && calledF->hasFnAttribute("referenced-indirectly"))
598                     {
599                         // This is the case where the callee has the "referenced-indirectly" attribute, but we still
600                         // see the callgraph. The callee may not belong to the same FG as the caller, but it still
601                         // counts as a stackcall.
602                         FG->m_hasStackCall = true;
603                     }
604                     else if (!calledF || (calledF->isDeclaration() && calledF->hasFnAttribute("referenced-indirectly")))
605                     {
606                         // This is the true indirect call case, where either the callee's address is taken, or it belongs
607                         // to an external module. We do not know the callgraph in this case, so set the indirectcall flag.
608                         FG->m_hasStackCall = true;
609                         FG->m_hasIndirectCall = true;
610                     }
611                 }
612             }
613         }
614     }
615 }
616 
addIndirectFuncsToKernelGroup(llvm::Module * pModule)617 void GenXFunctionGroupAnalysis::addIndirectFuncsToKernelGroup(llvm::Module* pModule)
618 {
619     auto pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
620 
621     Function* defaultKernel = IGC::getIntelSymbolTableVoidProgram(pModule);
622     if (!defaultKernel)
623     {
624         return;
625     }
626 
627     IGC_ASSERT(getGroup(defaultKernel) == nullptr);
628     setSubGroupMap(defaultKernel, defaultKernel);
629     IndirectCallGroup = createFunctionGroup(defaultKernel);
630 
631     // Add all externally linked functions into the default kernel group
632     for (auto I = pModule->begin(), E = pModule->end(); I != E; ++I)
633     {
634         Function* F = &(*I);
635         if (F->isDeclaration() || isEntryFunc(pMdUtils, F)) continue;
636 
637         if (F->hasFnAttribute("referenced-indirectly") || F->use_empty())
638         {
639             IGC_ASSERT(getGroup(F) == nullptr);
640             addToFunctionGroup(F, IndirectCallGroup, F);
641         }
642     }
643 }
644 
rebuild(llvm::Module * Mod)645 bool GenXFunctionGroupAnalysis::rebuild(llvm::Module* Mod) {
646     clear();
647     auto pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
648 
649     // Re-add all indirect functions to the default kernel group
650     addIndirectFuncsToKernelGroup(Mod);
651 
652     // Build and verify function list layout.
653     // Given a list of functions, [K1, A, B, K2, C, K3, D, E, F], we build groups
654     // [K1, A, B], [K2, C], [K3, D, E, F] and verify that none of CallInst escapes
655     // from its group. It is rather cheap to build and verify when there is no
656     // subroutine in this module.
657     //
658     FunctionGroup* CurFG = nullptr;
659     Function* CurSubGrpH = nullptr;
660     for (auto I = Mod->begin(), E = Mod->end(); I != E; ++I)
661     {
662         Function* F = &(*I);
663 
664         // Skip declarations.
665         if (F->empty())
666             continue;
667         // Skip functions belonging to the indirect call group
668         if (isIndirectCallGroup(F))
669             continue;
670 
671         if (isEntryFunc(pMdUtils, F))
672         {
673             CurFG = createFunctionGroup(F);
674             CurSubGrpH = F;
675         }
676         else
677         {
678             if (useStackCall(F))
679                 CurSubGrpH = F;
680             if (CurFG && CurSubGrpH)
681                 addToFunctionGroup(F, CurFG, CurSubGrpH);
682             else
683             {
684                 clear();
685                 return false;
686             }
687         }
688     }
689 
690     // Reset attribute flags
691     setGroupAttributes();
692 
693     // Verification.
694     if (!verify())
695     {
696         clear();
697         return false;
698     }
699 
700     // Commit.
701     M = Mod;
702     return true;
703 }
704 
replaceEntryFunc(Function * OF,Function * NF)705 void GenXFunctionGroupAnalysis::replaceEntryFunc(Function* OF, Function* NF)
706 {
707     auto groupMapIter = GroupMap.find(OF);
708     FunctionGroup* FG = groupMapIter->second;
709     GroupMap.erase(groupMapIter);
710     GroupMap.insert(std::make_pair(NF, FG));
711 
712     FG->replaceGroupHead(OF, NF);
713 
714     // For Entry func, SubGroupMap needs to be updated as well
715     auto SGIter = SubGroupMap.find(OF);
716     if (SGIter != SubGroupMap.end())
717     {
718         SubGroupMap.erase(SGIter);
719         SubGroupMap.insert(std::make_pair(NF, NF));
720     }
721     DenseMap<const Function*, Function*>::iterator SGII, SGIE;
722     for (SGII = SubGroupMap.begin(), SGIE = SubGroupMap.end();
723         SGII != SGIE; ++SGII)
724     {
725         Function* SGH = SGII->second;
726         if (SGH == OF)
727         {
728             SGII->second = NF;
729         }
730     }
731 }
732 
clear()733 void GenXFunctionGroupAnalysis::clear()
734 {
735     GroupMap.clear();
736     SubGroupMap.clear();
737     for (auto I = begin(), E = end(); I != E; ++I)
738         delete* I;
739     Groups.clear();
740     IndirectCallGroup = nullptr;
741     M = nullptr;
742 }
743 
getGroup(const Function * F)744 FunctionGroup* GenXFunctionGroupAnalysis::getGroup(const Function* F)
745 {
746     auto I = GroupMap.find(F);
747     if (I == GroupMap.end())
748         return nullptr;
749     return I->second;
750 }
751 
getGroupForHead(const Function * F)752 FunctionGroup* GenXFunctionGroupAnalysis::getGroupForHead(const Function* F)
753 {
754     auto FG = getGroup(F);
755     if (FG->getHead() == F)
756         return FG;
757     return nullptr;
758 }
759 
addToFunctionGroup(Function * F,FunctionGroup * FG,Function * SubGrpH)760 void GenXFunctionGroupAnalysis::addToFunctionGroup(Function* F,
761     FunctionGroup* FG,
762     Function* SubGrpH)
763 {
764     IGC_ASSERT_MESSAGE(!GroupMap[F], "Function already attached to FunctionGroup");
765     GroupMap[F] = FG;
766     setSubGroupMap(F, SubGrpH);
767     if (F == SubGrpH)
768     {
769         auto* SubGrp = new llvm::SmallVector<llvm::AssertingVH<llvm::Function>, 8>();
770         SubGrp->push_back(F);
771         FG->Functions.push_back(SubGrp);
772     }
773     else
774     {
775         for (auto I = FG->Functions.begin(), E = FG->Functions.end(); I != E; I++)
776         {
777             auto* SubGrp = (*I);
778             IGC_ASSERT(nullptr != SubGrp);
779             if (SubGrp->front() == SubGrpH)
780             {
781                 SubGrp->push_back(F);
782                 return;
783             }
784         }
785         IGC_ASSERT(0);
786     }
787 }
788 
createFunctionGroup(Function * F)789 FunctionGroup* GenXFunctionGroupAnalysis::createFunctionGroup(Function* F)
790 {
791     auto FG = new FunctionGroup;
792     Groups.push_back(FG);
793     addToFunctionGroup(F, FG, F);
794     return FG;
795 }
796 
print(raw_ostream & os)797 void GenXFunctionGroupAnalysis::print(raw_ostream& os)
798 {
799     if (!M)
800     {
801         os << "(nil)\n";
802         return;
803     }
804 
805     unsigned TotalSize = 0;
806     for (auto GI = begin(), GE = end(); GI != GE; ++GI)
807     {
808         for (auto SubGI = (*GI)->Functions.begin(), SubGE = (*GI)->Functions.end();
809             SubGI != SubGE; ++SubGI)
810         {
811             for (auto FI = (*SubGI)->begin(), FE = (*SubGI)->end(); FI != FE; ++FI)
812             {
813                 Function* F = *FI;
814                 unsigned Size = std::accumulate(
815                     F->begin(), F->end(), 0,
816                     [](unsigned s, BasicBlock& BB) { return (unsigned)BB.size() + s; });
817                 TotalSize += Size;
818                 if (F == (*GI)->getHead())
819                     os << "K: " << F->getName() << " [" << Size << "]\n";
820                 else if (F == (*SubGI)->front())
821                     os << "  F: " << F->getName() << " [" << Size << "]\n";
822                 else
823                     os << "     " << F->getName() << " [" << Size << "]\n";
824             }
825         }
826     }
827     os << "Module: " << M->getModuleIdentifier() << " [" << TotalSize << "]\n";
828 }
829 
830 #if defined(_DEBUG)
dump()831 void GenXFunctionGroupAnalysis::dump() {
832     print(llvm::errs());
833 }
834 #endif
835 
836 namespace {
837 
838     /// \brief Custom inliner for subroutines.
839     class SubroutineInliner : public LegacyInlinerBase
840     {
841         EstimateFunctionSize* FSA;
842     public:
843         static char ID; // Pass identification, replacement for typeid
844 
845         // Use extremely low threshold.
SubroutineInliner()846         SubroutineInliner()
847             : LegacyInlinerBase(ID, /*InsertLifetime*/ false),
848             FSA(nullptr) {}
849 
850         InlineCost getInlineCost(IGCLLVM::CallSiteRef CS) override;
851 
852         void getAnalysisUsage(AnalysisUsage& AU) const override;
853         bool runOnSCC(CallGraphSCC& SCC) override;
854 
855         using llvm::Pass::doFinalization;
doFinalization(CallGraph & CG)856         bool doFinalization(CallGraph& CG) override {
857             return removeDeadFunctions(CG);
858         }
859     };
860 
861 } // namespace
862 
863 IGC_INITIALIZE_PASS_BEGIN(SubroutineInliner, "SubroutineInliner", "SubroutineInliner", false, false)
864 IGC_INITIALIZE_PASS_DEPENDENCY(EstimateFunctionSize)
865 IGC_INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
866 IGC_INITIALIZE_PASS_END(SubroutineInliner, "SubroutineInliner", "SubroutineInliner", false, false)
867 
868 char SubroutineInliner::ID = 0;
869 
getAnalysisUsage(AnalysisUsage & AU) const870 void SubroutineInliner::getAnalysisUsage(AnalysisUsage& AU) const
871 {
872     AU.addRequired<EstimateFunctionSize>();
873     AU.addRequired<CodeGenContextWrapper>();
874     LegacyInlinerBase::getAnalysisUsage(AU);
875 }
876 
runOnSCC(CallGraphSCC & SCC)877 bool SubroutineInliner::runOnSCC(CallGraphSCC& SCC)
878 {
879     FSA = &getAnalysis<EstimateFunctionSize>();
880     return LegacyInlinerBase::runOnSCC(SCC);
881 }
882 
883 /// \brief Get the inline cost for the subroutine-inliner.
884 ///
getInlineCost(IGCLLVM::CallSiteRef CS)885 InlineCost SubroutineInliner::getInlineCost(IGCLLVM::CallSiteRef CS)
886 {
887     Function* Callee = CS.getCalledFunction();
888     Function* Caller = CS.getCaller();
889     CodeGenContext* pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
890 
891     // Inline direct calls to functions with always inline attribute or a function
892     // whose estimated size is under certain predefined limit.
893     if (Callee && !Callee->isDeclaration() && isInlineViable(*Callee)
894 #if LLVM_VERSION_MAJOR >= 11
895         .isSuccess()
896 #endif
897         )
898     {
899         if (CS.hasFnAttr(llvm::Attribute::AlwaysInline))
900             return IGCLLVM::InlineCost::getAlways();
901 
902         int FCtrl = IGC_GET_FLAG_VALUE(FunctionControl);
903 
904         if (IGC::ForceAlwaysInline())
905             return IGCLLVM::InlineCost::getAlways();
906 
907         if (pCtx->m_enableSubroutine == false)
908             return IGCLLVM::InlineCost::getAlways();
909 
910         if (pCtx->type == ShaderType::OPENCL_SHADER &&
911             Callee->hasFnAttribute(llvm::Attribute::NoInline))
912             return IGCLLVM::InlineCost::getNever();
913 
914         if (Callee->hasFnAttribute("KMPLOCK"))
915             return IGCLLVM::InlineCost::getNever();
916 
917         if (Callee->hasFnAttribute("igc-force-stackcall"))
918             return IGCLLVM::InlineCost::getNever();
919 
920         if (FCtrl == FLAG_FCALL_DEFAULT)
921         {
922             std::size_t PerFuncThreshold = IGC_GET_FLAG_VALUE(SubroutineInlinerThreshold);
923 
924             // A single block function containing only a few instructions.
925             auto isTrivialCall = [](const llvm::Function* F) {
926                 if (!F->empty() && F->size() == 1)
927                     return F->front().size() <= 5;
928                 return false;
929             };
930 
931             if (FSA->getExpandedSize(Caller) <= PerFuncThreshold)
932             {
933                 return IGCLLVM::InlineCost::getAlways();
934             }
935             else if (isTrivialCall(Callee) || FSA->onlyCalledOnce(Callee))
936             {
937                 return IGCLLVM::InlineCost::getAlways();
938             }
939             else if (!FSA->shouldEnableSubroutine())
940             {
941                 // This function returns true if the estimated total inlining size exceeds some module threshold.
942                 // If we don't exceed it, and there's no preference on inline vs noinline, we just inline.
943                 return IGCLLVM::InlineCost::getAlways();
944             }
945         }
946     }
947 
948     return IGCLLVM::InlineCost::getNever();
949 }
950 
createSubroutineInlinerPass()951 Pass* IGC::createSubroutineInlinerPass()
952 {
953     initializeSubroutineInlinerPass(*PassRegistry::getPassRegistry());
954     return new SubroutineInliner();
955 }
956