1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "GenCodeGenModule.h"
10 #include "EstimateFunctionSize.h"
11 #include "AdaptorCommon/ImplicitArgs.hpp"
12 #include "Compiler/CISACodeGen/helper.h"
13 #include "Compiler/DebugInfo/ScalarVISAModule.h"
14 #include "Compiler/CodeGenContextWrapper.hpp"
15 #include "Compiler/IGCPassSupport.h"
16 #include "Compiler/MetaDataUtilsWrapper.h"
17 #include "common/igc_regkeys.hpp"
18 #include "common/LLVMWarningsPush.hpp"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/Attributes.h"
22 #include "llvmWrapper/Analysis/InlineCost.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SCCIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvmWrapper/IR/CallSite.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/Transforms/IPO.h"
31 #include "llvm/Transforms/IPO/Inliner.h"
32 #include "llvmWrapper/Transforms/Utils/Cloning.h"
33 #include "llvm/IR/DebugInfo.h"
34 #include "llvm/IR/DIBuilder.h"
35 #include "common/LLVMWarningsPop.hpp"
36 #include "DebugInfo/VISADebugEmitter.hpp"
37 #include <numeric>
38 #include <utility>
39 #include "Probe/Assertion.h"
40
41 using namespace llvm;
42 using namespace IGC;
43
44 char GenXCodeGenModule::ID = 0;
45
46 IGC_INITIALIZE_PASS_BEGIN(GenXCodeGenModule, "GenXCodeGenModule", "GenXCodeGenModule", false, false)
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)47 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
48 IGC_INITIALIZE_PASS_DEPENDENCY(GenXFunctionGroupAnalysis)
49 IGC_INITIALIZE_PASS_END(GenXCodeGenModule, "GenXCodeGenModule", "GenXCodeGenModule", false, false)
50
51 llvm::ModulePass* IGC::createGenXCodeGenModulePass()
52 {
53 initializeGenXCodeGenModulePass(*PassRegistry::getPassRegistry());
54 return new GenXCodeGenModule;
55 }
56
GenXCodeGenModule()57 GenXCodeGenModule::GenXCodeGenModule()
58 : llvm::ModulePass(ID), FGA(nullptr), pMdUtils(nullptr), Modified(false)
59 {
60 initializeGenXCodeGenModulePass(*PassRegistry::getPassRegistry());
61 }
62
~GenXCodeGenModule()63 GenXCodeGenModule::~GenXCodeGenModule() {}
64
getAnalysisUsage(AnalysisUsage & AU) const65 void GenXCodeGenModule::getAnalysisUsage(AnalysisUsage& AU) const
66 {
67 AU.addRequired<MetaDataUtilsWrapper>();
68 AU.addRequired<GenXFunctionGroupAnalysis>();
69 AU.addRequired<CodeGenContextWrapper>();
70 AU.addRequired<llvm::CallGraphWrapperPass>();
71 }
72
73 // Update cloned function's metadata.
CloneFuncMetadata(IGCMD::MetaDataUtils * pM,llvm::Function * ClonedF,llvm::Function * F)74 static inline void CloneFuncMetadata(IGCMD::MetaDataUtils* pM,
75 llvm::Function* ClonedF,
76 llvm::Function* F)
77 {
78 using namespace IGC::IGCMD;
79 auto Info = pM->getFunctionsInfoItem(F);
80 auto NewInfo = FunctionInfoMetaDataHandle(FunctionInfoMetaData::get());
81
82 // Copy function type info.
83 if (Info->isTypeHasValue())
84 {
85 NewInfo->setType(Info->getType());
86 }
87
88 // Copy explicit argument info, if any.
89 unsigned i = 0;
90 for (auto AI = Info->begin_ArgInfoList(), AE = Info->begin_ArgInfoList();
91 AI != AE; ++AI)
92 {
93 NewInfo->addArgInfoListItem(Info->getArgInfoListItem(i));
94 i++;
95 }
96
97 // Copy implicit argument info, if any.
98 i = 0;
99 for (auto AI = Info->begin_ImplicitArgInfoList(),
100 AE = Info->end_ImplicitArgInfoList();
101 AI != AE; ++AI)
102 {
103 NewInfo->addImplicitArgInfoListItem(Info->getImplicitArgInfoListItem(i));
104 i++;
105 }
106
107 pM->setFunctionsInfoItem(ClonedF, Info);
108 }
109
cloneFunc(Function * F)110 Function* GenXCodeGenModule::cloneFunc(Function* F)
111 {
112 ValueToValueMapTy VMap;
113
114 Function* ClonedFunc = CloneFunction(F, VMap);
115 //if the function is not added as part of clone, add it
116 if (!F->getParent()->getFunction(ClonedFunc->getName()))
117 F->getParent()->getFunctionList().push_back(ClonedFunc);
118 CloneFuncMetadata(pMdUtils, ClonedFunc, F);
119
120 return ClonedFunc;
121 }
122
getCallerFunc(Value * user)123 inline Function* getCallerFunc(Value* user)
124 {
125 IGC_ASSERT(nullptr != user);
126 Function* caller = nullptr;
127 if (CallInst * CI = dyn_cast<CallInst>(user))
128 {
129 IGC_ASSERT(nullptr != CI->getParent());
130 caller = CI->getParent()->getParent();
131 }
132 IGC_ASSERT_MESSAGE(caller, "cannot be indirect call");
133 return caller;
134 }
135
processFunction(Function & F)136 void GenXCodeGenModule::processFunction(Function& F)
137 {
138 // See what FunctionGroups this Function is called from.
139 SetVector<std::pair<FunctionGroup*, Function*>> CallerFGs;
140 for (auto U : F.users())
141 {
142 Function* Caller = getCallerFunc(U);
143 FunctionGroup* FG = FGA->getGroup(Caller);
144 Function* SubGrpH = FGA->useStackCall(&F) ? (&F) : FGA->getSubGroupMap(Caller);
145 if (FG == nullptr || SubGrpH == nullptr)
146 continue;
147 CallerFGs.insert(std::make_pair(FG, SubGrpH));
148 }
149
150 IGC_ASSERT(CallerFGs.size() >= 1);
151
152 // Get the cloning threshold. If the number of function groups a function belongs to
153 // exceeds the threshold, instead of cloning the function N times, make it an indirect call
154 // and use relocation instead. The function will only be compiled once and runtime must relocate
155 // its address for each caller. This greatly saves on compile time when there are many function
156 // groups that all call the same function.
157 auto cloneTheshold = IGC_GET_FLAG_VALUE(FunctionCloningThreshold);
158 if (F.hasFnAttribute("visaStackCall") &&
159 cloneTheshold > 0 && CallerFGs.size() > cloneTheshold)
160 {
161 auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
162 auto IFG = FGA->getIndirectCallGroup();
163 IGC_ASSERT(IFG);
164 F.addFnAttr("referenced-indirectly");
165 pCtx->m_enableFunctionPointer = true;
166 FGA->addToFunctionGroup(&F, IFG, &F);
167 return;
168 }
169
170 bool FirstPair = true;
171 for (auto FGPair : CallerFGs)
172 {
173 if (FirstPair)
174 {
175 FGA->addToFunctionGroup(&F, FGPair.first, FGPair.second);
176 FirstPair = false;
177 }
178 else
179 {
180 // clone the function, add to this function group
181 auto FCloned = cloneFunc(&F);
182
183 // Copy F's property over
184 copyFuncProperties(FCloned, &F);
185
186 Function* SubGrpH = FGA->useStackCall(&F) ? FCloned : FGPair.second;
187 FGA->addToFunctionGroup(FCloned, FGPair.first, SubGrpH);
188 Modified = true;
189 // update the edge after clone, it can handle self-recursion
190 for (auto UI = F.use_begin(), UE = F.use_end(); UI != UE; /*empty*/)
191 {
192 // Increment iterator after setting U to change the use.
193 Use* U = &*UI++;
194 Function* Caller = cast<CallInst>(U->getUser())->getParent()->getParent();
195 FunctionGroup* InFG = FGA->getGroup(Caller);
196 Function* InSubGrpH = FGA->useStackCall(&F) ? FCloned : FGA->getSubGroupMap(Caller);
197 if (InFG == FGPair.first && InSubGrpH == SubGrpH)
198 {
199 *U = FCloned;
200 }
201 }
202 }
203 }
204 }
205
processSCC(std::vector<llvm::CallGraphNode * > * SCCNodes)206 void GenXCodeGenModule::processSCC(std::vector<llvm::CallGraphNode*>* SCCNodes)
207 {
208 // force stack-call for every function in SCC
209 for (CallGraphNode* Node : (*SCCNodes))
210 {
211 Function* F = Node->getFunction();
212 F->addFnAttr("visaStackCall");
213 }
214
215 // See what FunctionGroups this SCC is called from.
216 SetVector<FunctionGroup*> CallerFGs;
217 for (CallGraphNode* Node : (*SCCNodes))
218 {
219 Function* F = Node->getFunction();
220 for (auto U : F->users())
221 {
222 Function* Caller = getCallerFunc(U);
223 FunctionGroup* FG = FGA->getGroup(Caller);
224 if (FG == nullptr)
225 continue;
226 CallerFGs.insert(FG);
227 }
228 }
229 IGC_ASSERT(CallerFGs.size() >= 1);
230
231 // Use the same cloning threshold for single function SCCs, but making every stack function
232 // in the SCC indirect calls to prevent cloning the entire SCC N times.
233 auto cloneTheshold = IGC_GET_FLAG_VALUE(FunctionCloningThreshold);
234 if (cloneTheshold > 0 && CallerFGs.size() > cloneTheshold)
235 {
236 auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
237 for (CallGraphNode* Node : (*SCCNodes))
238 {
239 Function* F = Node->getFunction();
240 auto IFG = FGA->getIndirectCallGroup();
241 IGC_ASSERT(IFG && F->hasFnAttribute("visaStackCall"));
242 F->addFnAttr("referenced-indirectly");
243 pCtx->m_enableFunctionPointer = true;
244 FGA->addToFunctionGroup(F, IFG, F);
245 }
246 return;
247 }
248
249 bool FirstPair = true;
250 for (auto FG : CallerFGs)
251 {
252 if (FirstPair)
253 {
254 for (CallGraphNode* Node : (*SCCNodes))
255 {
256 Function* F = Node->getFunction();
257 FGA->addToFunctionGroup(F, FG, F);
258 }
259 FirstPair = false;
260 }
261 else
262 {
263 // clone the functions in scc, add to this function group
264 llvm::DenseMap<Function*, Function*> CloneMap;
265 for (CallGraphNode* Node : (*SCCNodes))
266 {
267 Function* F = Node->getFunction();
268 auto FCloned = cloneFunc(F);
269
270 // Copy properties
271 copyFuncProperties(FCloned, F);
272
273 FGA->addToFunctionGroup(FCloned, FG, FCloned);
274 CloneMap.insert(std::make_pair(F, FCloned));
275 }
276 Modified = true;
277 // update the call-edges for every function in SCC,
278 // move edges to the cloned SCC, including the recursion edge
279 for (CallGraphNode* Node : (*SCCNodes))
280 {
281 Function* F = Node->getFunction();
282 for (auto UI = F->use_begin(), UE = F->use_end(); UI != UE; /*empty*/)
283 {
284 // Increment iterator after setting U to change the use.
285 Use* U = &*UI++;
286 Function* Caller = cast<CallInst>(U->getUser())->getParent()->getParent();
287 FunctionGroup* InFG = FGA->getGroup(Caller);
288 if (InFG == FG)
289 {
290 *U = CloneMap[F];
291 }
292 }
293 }
294 }
295 }
296 }
297
setFuncProperties(CallGraph & CG)298 void GenXCodeGenModule::setFuncProperties(CallGraph& CG)
299 {
300 for (auto I = scc_begin(&CG), IE = scc_end(&CG); I != IE; ++I)
301 {
302 const std::vector<CallGraphNode*>& SCCNodes = (*I);
303 for (CallGraphNode* Node : SCCNodes)
304 {
305 Function* F = Node->getFunction();
306 if (F != nullptr && !F->isDeclaration())
307 {
308 if (Node->empty()) {
309 FGA->setLeafFunc(F);
310 }
311 }
312 }
313 }
314 }
315
copyFuncProperties(llvm::Function * To,llvm::Function * From)316 void GenXCodeGenModule::copyFuncProperties(llvm::Function* To, llvm::Function* From)
317 {
318 FGA->copyFuncProperties(To, From);
319 }
320
321 /// Deduce non-null argument attributes for subroutines.
DeduceNonNullAttribute(Module & M)322 static bool DeduceNonNullAttribute(Module& M)
323 {
324 bool Modifided = false;
325 for (auto& F : M.getFunctionList())
326 {
327 if (F.isDeclaration() || F.hasExternalLinkage() || F.hasAddressTaken())
328 continue;
329
330 bool Skip = false;
331 for (auto U : F.users())
332 {
333 if (!isa<CallInst>(U))
334 {
335 Skip = true;
336 break;
337 }
338 }
339 if (Skip)
340 continue;
341
342 for (auto& Arg : F.args())
343 {
344 // Only for used pointer arguments.
345 if (Arg.use_empty() || !Arg.getType()->isPointerTy())
346 continue;
347
348 // If all call sites are passing a non-null value to this argument, then
349 // this argument cannot be a null ptr.
350 bool NotNull = true;
351 for (auto U : F.users())
352 {
353 auto CI = cast<CallInst>(U);
354 Value* V = CI->getArgOperand(Arg.getArgNo());
355 auto DL = F.getParent()->getDataLayout();
356 if (!isKnownNonZero(V, DL))
357 {
358 NotNull = false;
359 break;
360 }
361 }
362
363 if (NotNull) {
364 // FIXME: Below lines possibly can be refactored to be simpler.
365 AttributeList attrSet = AttributeList::get(Arg.getParent()->getContext(), Arg.getArgNo() + 1, llvm::Attribute::NonNull);
366 Arg.addAttr(attrSet.getAttribute(Arg.getArgNo() + 1, llvm::Attribute::NonNull));
367 Modifided = true;
368 }
369 }
370 }
371 return Modifided;
372 }
373
runOnModule(Module & M)374 bool GenXCodeGenModule::runOnModule(Module& M)
375 {
376 FGA = &getAnalysis<GenXFunctionGroupAnalysis>();
377
378 // Already built.
379 if (FGA->getModule())
380 return false;
381
382 pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
383 CallGraph& CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
384
385 setFuncProperties(CG);
386
387 std::vector<std::vector<CallGraphNode*>*> SCCVec;
388 for (auto I = scc_begin(&CG), IE = scc_end(&CG); I != IE; ++I)
389 {
390 std::vector<CallGraphNode*>* SCCNodes = new std::vector<CallGraphNode*>((*I));
391 SCCVec.push_back(SCCNodes);
392 }
393
394 // Add all indirect functions to the default kernel group
395 FGA->addIndirectFuncsToKernelGroup(&M);
396
397 for (auto I = SCCVec.rbegin(), IE = SCCVec.rend(); I != IE; ++I)
398 {
399 std::vector<CallGraphNode*>* SCCNodes = (*I);
400 for (CallGraphNode* Node : (*SCCNodes))
401 {
402 Function* F = Node->getFunction();
403 if (!F || F->isDeclaration()) continue;
404 // skip functions belonging to the indirect call group
405 if (FGA->isIndirectCallGroup(F)) continue;
406
407 if (isEntryFunc(pMdUtils, F))
408 {
409 FGA->setSubGroupMap(F, F);
410 FGA->createFunctionGroup(F);
411 }
412 else if (SCCNodes->size() == 1)
413 {
414 processFunction(*F);
415 }
416 else
417 {
418 processSCC(SCCNodes);
419 break;
420 }
421 }
422 delete SCCNodes;
423 }
424
425 this->pMdUtils->save(M.getContext());
426
427 // Check and set FG attribute flags
428 FGA->setGroupAttributes();
429
430 // By swapping, we sort the function list to ensure codegen order for
431 // functions. This relies on llvm module pass manager's implementation detail.
432 SmallVector<Function*, 16> OrderedList;
433 for (auto GI = FGA->begin(), GE = FGA->end(); GI != GE; ++GI)
434 {
435 for (auto SubGI = (*GI)->Functions.begin(), SubGE = (*GI)->Functions.end();
436 SubGI != SubGE; ++SubGI)
437 {
438 for (auto FI = (*SubGI)->begin(), FE = (*SubGI)->end(); FI != FE; ++FI)
439 {
440 OrderedList.push_back(*FI);
441 }
442 }
443 }
444
445 // Input L1 = [A, B, C, D, E] // Functions in groups
446 // Input L2 = [A, C, G, B, D, E, F] // Functions in the module
447 // Output L2 = [A, B, C, D, E, G, F] // Ordered functions in the module
448 IGC_ASSERT_MESSAGE(OrderedList.size() <= M.size(), "out of sync");
449 Function* CurF = &(*M.begin());
450 for (auto I = OrderedList.begin(), E = OrderedList.end(); I != E; ++I)
451 {
452 IGC_ASSERT(nullptr != CurF);
453 Function* F = *I;
454 if (CurF != F)
455 // Move F before CurF. This just works See BasicBlock::moveBefore.
456 // CurF remains the same for the next iteration.
457 M.getFunctionList().splice(CurF->getIterator(), M.getFunctionList(), F);
458 else
459 {
460 auto it = CurF->getIterator();
461 CurF = &*(++it);
462 }
463 }
464
465 // Changing simd size from 32 to 16 for function groups with function calls due to slicing
466 for (auto GI = FGA->begin(), GE = FGA->end(); GI != GE; ++GI)
467 {
468 FunctionGroup* FG = *GI;
469 if (!FG->isSingle() || FG->hasStackCall())
470 {
471 Function* Kernel = FG->getHead();
472 IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
473 int simd_size = funcInfoMD->getSubGroupSize()->getSIMD_size();
474 if (simd_size == 32)
475 funcInfoMD->getSubGroupSize()->setSIMD_size(16);
476 }
477 }
478
479 IGC_ASSERT(FGA->verify());
480
481 FGA->setModule(&M);
482 Modified |= DeduceNonNullAttribute(M);
483
484 return Modified;
485 }
486
487 ////////////////////////////////////////////////////////////////////////////////
488 /// GenXFunctionGroupAnalysis implementation detail
489 ////////////////////////////////////////////////////////////////////////////////
490
491 char GenXFunctionGroupAnalysis::ID = 0;
492
493 IGC_INITIALIZE_PASS_BEGIN(GenXFunctionGroupAnalysis, "GenXFunctionGroupAnalysis", "GenXFunctionGroupAnalysis", false, true)
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)494 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
495 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
496 IGC_INITIALIZE_PASS_END(GenXFunctionGroupAnalysis, "GenXFunctionGroupAnalysis", "GenXFunctionGroupAnalysis", false, true)
497
498 llvm::ImmutablePass* IGC::createGenXFunctionGroupAnalysisPass() {
499 initializeGenXFunctionGroupAnalysisPass(*PassRegistry::getPassRegistry());
500 return new GenXFunctionGroupAnalysis;
501 }
502
GenXFunctionGroupAnalysis()503 GenXFunctionGroupAnalysis::GenXFunctionGroupAnalysis()
504 : ImmutablePass(ID), M(nullptr) {
505 initializeGenXFunctionGroupAnalysisPass(*PassRegistry::getPassRegistry());
506 }
507
getAnalysisUsage(AnalysisUsage & AU) const508 void GenXFunctionGroupAnalysis::getAnalysisUsage(AnalysisUsage& AU) const {
509 AU.addRequired<CodeGenContextWrapper>();
510 AU.addRequired<MetaDataUtilsWrapper>();
511 AU.setPreservesAll();
512 }
513
verify()514 bool GenXFunctionGroupAnalysis::verify()
515 {
516 for (auto GI = begin(), GE = end(); GI != GE; ++GI)
517 {
518 for (auto SubGI = (*GI)->Functions.begin(), SubGE = (*GI)->Functions.end();
519 SubGI != SubGE; ++SubGI)
520 {
521 for (auto FI = (*SubGI)->begin(), FE = (*SubGI)->end(); FI != FE; ++FI)
522 {
523 Function* F = *FI;
524 if (F->hasFnAttribute("referenced-indirectly"))
525 {
526 continue;
527 }
528 // If F is an unused non-kernel function, although it should have been
529 // deleted, that is fine.
530 for (auto U : F->users())
531 {
532 Function* Caller = getCallerFunc(U);
533 FunctionGroup* CallerFG = getGroup(Caller);
534 // Caller's FG should be the same as FG. Otherwise, something is wrong.
535 if (CallerFG != (*GI))
536 {
537 printf("%s\n", F->getName().data());
538 printf("%s\n", Caller->getName().data());
539 return false;
540 }
541 Function* SubGrpH = getSubGroupMap(F);
542 // Caller's sub-group header must be the first element of the sub-vector
543 if (SubGrpH != (*SubGI)->front())
544 return false;
545 }
546 }
547 }
548 }
549 // Everything is OK.
550 return true;
551 }
552
useStackCall(llvm::Function * F)553 bool GenXFunctionGroupAnalysis::useStackCall(llvm::Function* F)
554 {
555 return (F->hasFnAttribute("visaStackCall"));
556 }
557
setGroupAttributes()558 void GenXFunctionGroupAnalysis::setGroupAttributes()
559 {
560 for (auto FG : Groups)
561 {
562 // Ignore the Indirect Call Group, as there is no actual callgraph
563 if (FG == IndirectCallGroup) continue;
564
565 // Set stackcalls for function groups with more than one function
566 FG->m_hasStackCall = (FG->Functions.size() > 1);
567
568 for (auto FI = FG->begin(), FE = FG->end(); FI != FE; ++FI)
569 {
570 Function* F = *FI;
571
572 // check all functions in the group to see if there's an vla alloca
573 // function attribute "hasVLA" should be set at ProcessFuncAttributes pass
574 if (F->hasFnAttribute("hasVLA"))
575 {
576 FG->m_hasVaribleLengthAlloca = true;
577 }
578
579 // check if FG uses recursion. The "forceRecurse" attribute is set in
580 // ProcessFuncAttributes pass by using Tarjan's algorithm to find recursion.
581 if (F->hasFnAttribute("forceRecurse"))
582 {
583 FG->m_hasRecursion = true;
584 }
585
586 // For the remaining attributes we need to loop through all the call instructions
587 for (auto ii = inst_begin(*FI), ei = inst_end(*FI); ii != ei; ii++)
588 {
589 if (CallInst* call = dyn_cast<CallInst>(&*ii))
590 {
591 Function* calledF = call->getCalledFunction();
592 if (call->isInlineAsm())
593 {
594 // Uses inline asm call
595 FG->m_hasInlineAsm = true;
596 }
597 else if (calledF && calledF->hasFnAttribute("referenced-indirectly"))
598 {
599 // This is the case where the callee has the "referenced-indirectly" attribute, but we still
600 // see the callgraph. The callee may not belong to the same FG as the caller, but it still
601 // counts as a stackcall.
602 FG->m_hasStackCall = true;
603 }
604 else if (!calledF || (calledF->isDeclaration() && calledF->hasFnAttribute("referenced-indirectly")))
605 {
606 // This is the true indirect call case, where either the callee's address is taken, or it belongs
607 // to an external module. We do not know the callgraph in this case, so set the indirectcall flag.
608 FG->m_hasStackCall = true;
609 FG->m_hasIndirectCall = true;
610 }
611 }
612 }
613 }
614 }
615 }
616
addIndirectFuncsToKernelGroup(llvm::Module * pModule)617 void GenXFunctionGroupAnalysis::addIndirectFuncsToKernelGroup(llvm::Module* pModule)
618 {
619 auto pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
620
621 Function* defaultKernel = IGC::getIntelSymbolTableVoidProgram(pModule);
622 if (!defaultKernel)
623 {
624 return;
625 }
626
627 IGC_ASSERT(getGroup(defaultKernel) == nullptr);
628 setSubGroupMap(defaultKernel, defaultKernel);
629 IndirectCallGroup = createFunctionGroup(defaultKernel);
630
631 // Add all externally linked functions into the default kernel group
632 for (auto I = pModule->begin(), E = pModule->end(); I != E; ++I)
633 {
634 Function* F = &(*I);
635 if (F->isDeclaration() || isEntryFunc(pMdUtils, F)) continue;
636
637 if (F->hasFnAttribute("referenced-indirectly") || F->use_empty())
638 {
639 IGC_ASSERT(getGroup(F) == nullptr);
640 addToFunctionGroup(F, IndirectCallGroup, F);
641 }
642 }
643 }
644
rebuild(llvm::Module * Mod)645 bool GenXFunctionGroupAnalysis::rebuild(llvm::Module* Mod) {
646 clear();
647 auto pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
648
649 // Re-add all indirect functions to the default kernel group
650 addIndirectFuncsToKernelGroup(Mod);
651
652 // Build and verify function list layout.
653 // Given a list of functions, [K1, A, B, K2, C, K3, D, E, F], we build groups
654 // [K1, A, B], [K2, C], [K3, D, E, F] and verify that none of CallInst escapes
655 // from its group. It is rather cheap to build and verify when there is no
656 // subroutine in this module.
657 //
658 FunctionGroup* CurFG = nullptr;
659 Function* CurSubGrpH = nullptr;
660 for (auto I = Mod->begin(), E = Mod->end(); I != E; ++I)
661 {
662 Function* F = &(*I);
663
664 // Skip declarations.
665 if (F->empty())
666 continue;
667 // Skip functions belonging to the indirect call group
668 if (isIndirectCallGroup(F))
669 continue;
670
671 if (isEntryFunc(pMdUtils, F))
672 {
673 CurFG = createFunctionGroup(F);
674 CurSubGrpH = F;
675 }
676 else
677 {
678 if (useStackCall(F))
679 CurSubGrpH = F;
680 if (CurFG && CurSubGrpH)
681 addToFunctionGroup(F, CurFG, CurSubGrpH);
682 else
683 {
684 clear();
685 return false;
686 }
687 }
688 }
689
690 // Reset attribute flags
691 setGroupAttributes();
692
693 // Verification.
694 if (!verify())
695 {
696 clear();
697 return false;
698 }
699
700 // Commit.
701 M = Mod;
702 return true;
703 }
704
replaceEntryFunc(Function * OF,Function * NF)705 void GenXFunctionGroupAnalysis::replaceEntryFunc(Function* OF, Function* NF)
706 {
707 auto groupMapIter = GroupMap.find(OF);
708 FunctionGroup* FG = groupMapIter->second;
709 GroupMap.erase(groupMapIter);
710 GroupMap.insert(std::make_pair(NF, FG));
711
712 FG->replaceGroupHead(OF, NF);
713
714 // For Entry func, SubGroupMap needs to be updated as well
715 auto SGIter = SubGroupMap.find(OF);
716 if (SGIter != SubGroupMap.end())
717 {
718 SubGroupMap.erase(SGIter);
719 SubGroupMap.insert(std::make_pair(NF, NF));
720 }
721 DenseMap<const Function*, Function*>::iterator SGII, SGIE;
722 for (SGII = SubGroupMap.begin(), SGIE = SubGroupMap.end();
723 SGII != SGIE; ++SGII)
724 {
725 Function* SGH = SGII->second;
726 if (SGH == OF)
727 {
728 SGII->second = NF;
729 }
730 }
731 }
732
clear()733 void GenXFunctionGroupAnalysis::clear()
734 {
735 GroupMap.clear();
736 SubGroupMap.clear();
737 for (auto I = begin(), E = end(); I != E; ++I)
738 delete* I;
739 Groups.clear();
740 IndirectCallGroup = nullptr;
741 M = nullptr;
742 }
743
getGroup(const Function * F)744 FunctionGroup* GenXFunctionGroupAnalysis::getGroup(const Function* F)
745 {
746 auto I = GroupMap.find(F);
747 if (I == GroupMap.end())
748 return nullptr;
749 return I->second;
750 }
751
getGroupForHead(const Function * F)752 FunctionGroup* GenXFunctionGroupAnalysis::getGroupForHead(const Function* F)
753 {
754 auto FG = getGroup(F);
755 if (FG->getHead() == F)
756 return FG;
757 return nullptr;
758 }
759
addToFunctionGroup(Function * F,FunctionGroup * FG,Function * SubGrpH)760 void GenXFunctionGroupAnalysis::addToFunctionGroup(Function* F,
761 FunctionGroup* FG,
762 Function* SubGrpH)
763 {
764 IGC_ASSERT_MESSAGE(!GroupMap[F], "Function already attached to FunctionGroup");
765 GroupMap[F] = FG;
766 setSubGroupMap(F, SubGrpH);
767 if (F == SubGrpH)
768 {
769 auto* SubGrp = new llvm::SmallVector<llvm::AssertingVH<llvm::Function>, 8>();
770 SubGrp->push_back(F);
771 FG->Functions.push_back(SubGrp);
772 }
773 else
774 {
775 for (auto I = FG->Functions.begin(), E = FG->Functions.end(); I != E; I++)
776 {
777 auto* SubGrp = (*I);
778 IGC_ASSERT(nullptr != SubGrp);
779 if (SubGrp->front() == SubGrpH)
780 {
781 SubGrp->push_back(F);
782 return;
783 }
784 }
785 IGC_ASSERT(0);
786 }
787 }
788
createFunctionGroup(Function * F)789 FunctionGroup* GenXFunctionGroupAnalysis::createFunctionGroup(Function* F)
790 {
791 auto FG = new FunctionGroup;
792 Groups.push_back(FG);
793 addToFunctionGroup(F, FG, F);
794 return FG;
795 }
796
print(raw_ostream & os)797 void GenXFunctionGroupAnalysis::print(raw_ostream& os)
798 {
799 if (!M)
800 {
801 os << "(nil)\n";
802 return;
803 }
804
805 unsigned TotalSize = 0;
806 for (auto GI = begin(), GE = end(); GI != GE; ++GI)
807 {
808 for (auto SubGI = (*GI)->Functions.begin(), SubGE = (*GI)->Functions.end();
809 SubGI != SubGE; ++SubGI)
810 {
811 for (auto FI = (*SubGI)->begin(), FE = (*SubGI)->end(); FI != FE; ++FI)
812 {
813 Function* F = *FI;
814 unsigned Size = std::accumulate(
815 F->begin(), F->end(), 0,
816 [](unsigned s, BasicBlock& BB) { return (unsigned)BB.size() + s; });
817 TotalSize += Size;
818 if (F == (*GI)->getHead())
819 os << "K: " << F->getName() << " [" << Size << "]\n";
820 else if (F == (*SubGI)->front())
821 os << " F: " << F->getName() << " [" << Size << "]\n";
822 else
823 os << " " << F->getName() << " [" << Size << "]\n";
824 }
825 }
826 }
827 os << "Module: " << M->getModuleIdentifier() << " [" << TotalSize << "]\n";
828 }
829
830 #if defined(_DEBUG)
dump()831 void GenXFunctionGroupAnalysis::dump() {
832 print(llvm::errs());
833 }
834 #endif
835
836 namespace {
837
838 /// \brief Custom inliner for subroutines.
839 class SubroutineInliner : public LegacyInlinerBase
840 {
841 EstimateFunctionSize* FSA;
842 public:
843 static char ID; // Pass identification, replacement for typeid
844
845 // Use extremely low threshold.
SubroutineInliner()846 SubroutineInliner()
847 : LegacyInlinerBase(ID, /*InsertLifetime*/ false),
848 FSA(nullptr) {}
849
850 InlineCost getInlineCost(IGCLLVM::CallSiteRef CS) override;
851
852 void getAnalysisUsage(AnalysisUsage& AU) const override;
853 bool runOnSCC(CallGraphSCC& SCC) override;
854
855 using llvm::Pass::doFinalization;
doFinalization(CallGraph & CG)856 bool doFinalization(CallGraph& CG) override {
857 return removeDeadFunctions(CG);
858 }
859 };
860
861 } // namespace
862
863 IGC_INITIALIZE_PASS_BEGIN(SubroutineInliner, "SubroutineInliner", "SubroutineInliner", false, false)
864 IGC_INITIALIZE_PASS_DEPENDENCY(EstimateFunctionSize)
865 IGC_INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
866 IGC_INITIALIZE_PASS_END(SubroutineInliner, "SubroutineInliner", "SubroutineInliner", false, false)
867
868 char SubroutineInliner::ID = 0;
869
getAnalysisUsage(AnalysisUsage & AU) const870 void SubroutineInliner::getAnalysisUsage(AnalysisUsage& AU) const
871 {
872 AU.addRequired<EstimateFunctionSize>();
873 AU.addRequired<CodeGenContextWrapper>();
874 LegacyInlinerBase::getAnalysisUsage(AU);
875 }
876
runOnSCC(CallGraphSCC & SCC)877 bool SubroutineInliner::runOnSCC(CallGraphSCC& SCC)
878 {
879 FSA = &getAnalysis<EstimateFunctionSize>();
880 return LegacyInlinerBase::runOnSCC(SCC);
881 }
882
883 /// \brief Get the inline cost for the subroutine-inliner.
884 ///
getInlineCost(IGCLLVM::CallSiteRef CS)885 InlineCost SubroutineInliner::getInlineCost(IGCLLVM::CallSiteRef CS)
886 {
887 Function* Callee = CS.getCalledFunction();
888 Function* Caller = CS.getCaller();
889 CodeGenContext* pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
890
891 // Inline direct calls to functions with always inline attribute or a function
892 // whose estimated size is under certain predefined limit.
893 if (Callee && !Callee->isDeclaration() && isInlineViable(*Callee)
894 #if LLVM_VERSION_MAJOR >= 11
895 .isSuccess()
896 #endif
897 )
898 {
899 if (CS.hasFnAttr(llvm::Attribute::AlwaysInline))
900 return IGCLLVM::InlineCost::getAlways();
901
902 int FCtrl = IGC_GET_FLAG_VALUE(FunctionControl);
903
904 if (IGC::ForceAlwaysInline())
905 return IGCLLVM::InlineCost::getAlways();
906
907 if (pCtx->m_enableSubroutine == false)
908 return IGCLLVM::InlineCost::getAlways();
909
910 if (pCtx->type == ShaderType::OPENCL_SHADER &&
911 Callee->hasFnAttribute(llvm::Attribute::NoInline))
912 return IGCLLVM::InlineCost::getNever();
913
914 if (Callee->hasFnAttribute("KMPLOCK"))
915 return IGCLLVM::InlineCost::getNever();
916
917 if (Callee->hasFnAttribute("igc-force-stackcall"))
918 return IGCLLVM::InlineCost::getNever();
919
920 if (FCtrl == FLAG_FCALL_DEFAULT)
921 {
922 std::size_t PerFuncThreshold = IGC_GET_FLAG_VALUE(SubroutineInlinerThreshold);
923
924 // A single block function containing only a few instructions.
925 auto isTrivialCall = [](const llvm::Function* F) {
926 if (!F->empty() && F->size() == 1)
927 return F->front().size() <= 5;
928 return false;
929 };
930
931 if (FSA->getExpandedSize(Caller) <= PerFuncThreshold)
932 {
933 return IGCLLVM::InlineCost::getAlways();
934 }
935 else if (isTrivialCall(Callee) || FSA->onlyCalledOnce(Callee))
936 {
937 return IGCLLVM::InlineCost::getAlways();
938 }
939 else if (!FSA->shouldEnableSubroutine())
940 {
941 // This function returns true if the estimated total inlining size exceeds some module threshold.
942 // If we don't exceed it, and there's no preference on inline vs noinline, we just inline.
943 return IGCLLVM::InlineCost::getAlways();
944 }
945 }
946 }
947
948 return IGCLLVM::InlineCost::getNever();
949 }
950
createSubroutineInlinerPass()951 Pass* IGC::createSubroutineInlinerPass()
952 {
953 initializeSubroutineInlinerPass(*PassRegistry::getPassRegistry());
954 return new SubroutineInliner();
955 }
956