1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "vc/GenXCodeGen/GenXOCLRuntimeInfo.h"
10 
11 #include "ConstantEncoder.h"
12 #include "GenX.h"
13 #include "GenXModule.h"
14 #include "GenXSubtarget.h"
15 #include "GenXTargetMachine.h"
16 #include "GenXUtil.h"
17 #include "OCLRuntimeInfoPrinter.h"
18 
19 #include "vc/GenXOpts/Utils/InternalMetadata.h"
20 
21 #include "llvm/GenXIntrinsics/GenXIntrinsics.h"
22 
23 #include <visaBuilder_interface.h>
24 
25 #include <llvm/CodeGen/TargetPassConfig.h>
26 #include <llvm/IR/Argument.h>
27 #include <llvm/IR/Constants.h>
28 #include <llvm/IR/DataLayout.h>
29 #include <llvm/IR/Type.h>
30 #include <llvm/IR/Value.h>
31 #include <llvm/InitializePasses.h>
32 
33 #include <algorithm>
34 #include <cctype>
35 #include <functional>
36 #include <iterator>
37 #include <stack>
38 
39 #include "Probe/Assertion.h"
40 
41 #define CISA_CALL(c)                                                           \
42   do {                                                                         \
43     auto Result = (c);                                                         \
44     (void)Result;                                                              \
45     IGC_ASSERT_MESSAGE(Result == 0, "Call to VISA API failed: " #c);           \
46   } while (0);
47 
48 using namespace llvm;
49 
50 char GenXOCLRuntimeInfo::ID = 0;
51 
52 //===----------------------------------------------------------------------===//
53 //
54 // Kernel argument info implementation.
55 //
56 //===----------------------------------------------------------------------===//
57 // Supported kernel argument attributes.
58 struct OCLAttributes {
59   // Type qualifiers for resources.
60   static constexpr auto ReadOnly = "read_only";
61   static constexpr auto WriteOnly = "write_only";
62   static constexpr auto ReadWrite = "read_write";
63 
64   // Buffer surface.
65   static constexpr auto Buffer = "buffer_t";
66   // SVM pointer to buffer.
67   static constexpr auto SVM = "svmptr_t";
68   // OpenCL-like types.
69   static constexpr auto Sampler = "sampler_t";
70   static constexpr auto Image1d = "image1d_t";
71   static constexpr auto Image1dArray = "image1d_array_t";
72   // Same as 1D image. Seems that there is no difference in runtime.
73   static constexpr auto Image1dBuffer = "image1d_buffer_t";
74   static constexpr auto Image2d = "image2d_t";
75   static constexpr auto Image2dArray = "image2d_array_t";
76   static constexpr auto Image2dMediaBlock = "image2d_media_block_t";
77   static constexpr auto Image3d = "image3d_t";
78 };
79 
80 namespace llvm {
81 class KernelArgBuilder final {
82   using ArgKindType = GenXOCLRuntimeInfo::KernelArgInfo::KindType;
83   using ArgAccessKindType = GenXOCLRuntimeInfo::KernelArgInfo::AccessKindType;
84 
85   const genx::KernelMetadata &KM;
86   const DataLayout &DL;
87   const GenXSubtarget &ST;
88   const GenXBackendConfig &BC;
89 
90 public:
KernelArgBuilder(const genx::KernelMetadata & KMIn,const DataLayout & DLIn,const GenXSubtarget & STIn,const GenXBackendConfig & BCIn)91   KernelArgBuilder(const genx::KernelMetadata &KMIn, const DataLayout &DLIn,
92                    const GenXSubtarget &STIn, const GenXBackendConfig &BCIn)
93       : KM(KMIn), DL(DLIn), ST(STIn), BC(BCIn) {}
94 
95   GenXOCLRuntimeInfo::KernelArgInfo
96   translateArgument(const Argument &Arg) const;
97 
98 private:
getStrPred(const char * Attr)99   static auto getStrPred(const char *Attr) {
100     return [Attr](StringRef Token) { return Token == Attr; };
101   }
102 
103   ArgKindType getOCLArgKind(ArrayRef<StringRef> Tokens, unsigned ArgNo) const;
104   ArgAccessKindType getOCLArgAccessKind(ArrayRef<StringRef> Tokens,
105                                         ArgKindType Kind) const;
106   std::pair<ArgKindType, ArgAccessKindType>
107   translateArgDesc(unsigned ArgNo) const;
108   unsigned getArgSizeInBytes(const Argument &Arg) const;
109 };
110 } // namespace llvm
111 
112 KernelArgBuilder::ArgAccessKindType
getOCLArgAccessKind(ArrayRef<StringRef> Tokens,ArgKindType Kind) const113 KernelArgBuilder::getOCLArgAccessKind(ArrayRef<StringRef> Tokens,
114                                       ArgKindType Kind) const {
115   switch (Kind) {
116   case ArgKindType::Buffer:
117   case ArgKindType::Image1D:
118   case ArgKindType::Image1DArray:
119   case ArgKindType::Image2D:
120   case ArgKindType::Image2DArray:
121   case ArgKindType::Image2DMediaBlock:
122   case ArgKindType::Image3D:
123   case ArgKindType::SVM:
124   case ArgKindType::BindlessBuffer:
125     if (any_of(Tokens, getStrPred(OCLAttributes::ReadOnly)))
126       return ArgAccessKindType::ReadOnly;
127     if (any_of(Tokens, getStrPred(OCLAttributes::WriteOnly)))
128       return ArgAccessKindType::WriteOnly;
129     return ArgAccessKindType::ReadWrite;
130   default:
131     return ArgAccessKindType::None;
132   }
133 }
134 
135 KernelArgBuilder::ArgKindType
getOCLArgKind(ArrayRef<StringRef> Tokens,unsigned ArgNo) const136 KernelArgBuilder::getOCLArgKind(ArrayRef<StringRef> Tokens,
137                                 unsigned ArgNo) const {
138   unsigned RawKind = KM.getArgKind(ArgNo);
139 
140   // Implicit arguments.
141   genx::KernelArgInfo KAI{RawKind};
142   if (KAI.isLocalSize())
143     return ArgKindType::LocalSize;
144   if (KAI.isGroupCount())
145     return ArgKindType::GroupCount;
146   if (KAI.isPrintBuffer())
147     return ArgKindType::PrintBuffer;
148   if (KAI.isPrivateBase())
149     return ArgKindType::PrivateBase;
150   if (KAI.isByValSVM())
151     return ArgKindType::ByValSVM;
152 
153   // Explicit arguments.
154   switch (KM.getArgCategory(ArgNo)) {
155   default:
156     return ArgKindType::General;
157   case genx::RegCategory::GENERAL:
158     if (any_of(Tokens, getStrPred(OCLAttributes::SVM)))
159       return ArgKindType::SVM;
160     // Bindless buffers have general category but buffer annotation.
161     if (any_of(Tokens, getStrPred(OCLAttributes::Buffer)))
162       return ArgKindType::BindlessBuffer;
163     return ArgKindType::General;
164   case genx::RegCategory::SURFACE:
165     if (any_of(Tokens, getStrPred(OCLAttributes::Image1d)))
166       return ArgKindType::Image1D;
167     if (any_of(Tokens, getStrPred(OCLAttributes::Image1dArray)))
168       return ArgKindType::Image1DArray;
169     if (any_of(Tokens, getStrPred(OCLAttributes::Image1dBuffer)))
170       return ArgKindType::Image1D;
171     if (any_of(Tokens, getStrPred(OCLAttributes::Image2d))) {
172       if (BC.usePlain2DImages())
173         return ArgKindType::Image2D;
174       // Legacy behavior to treat all 2d images as media block.
175       return ArgKindType::Image2DMediaBlock;
176     }
177     if (any_of(Tokens, getStrPred(OCLAttributes::Image2dArray)))
178       return ArgKindType::Image2DArray;
179     if (any_of(Tokens, getStrPred(OCLAttributes::Image2dMediaBlock))) {
180       return ArgKindType::Image2DMediaBlock;
181     }
182     if (any_of(Tokens, getStrPred(OCLAttributes::Image3d)))
183       return ArgKindType::Image3D;
184     return ArgKindType::Buffer;
185   case genx::RegCategory::SAMPLER:
186     return ArgKindType::Sampler;
187   }
188 }
189 
190 // Retrieve Kind and AccessKind from given ArgTypeDesc in metadata.
191 std::pair<KernelArgBuilder::ArgKindType, KernelArgBuilder::ArgAccessKindType>
translateArgDesc(unsigned ArgNo) const192 KernelArgBuilder::translateArgDesc(unsigned ArgNo) const {
193   std::string Translated{KM.getArgTypeDesc(ArgNo)};
194   // Transform each separator to space.
195   std::transform(Translated.begin(), Translated.end(), Translated.begin(),
196                  [](char C) {
197                    if (C != '-' && C != '_' && C != '=' && !std::isalnum(C))
198                      return ' ';
199                    return C;
200                  });
201 
202   // Split and delete duplicates.
203   SmallVector<StringRef, 4> Tokens;
204   StringRef(Translated)
205       .split(Tokens, ' ', -1 /* MaxSplit */, false /* AllowEmpty */);
206   std::sort(Tokens.begin(), Tokens.end());
207   Tokens.erase(std::unique(Tokens.begin(), Tokens.end()), Tokens.end());
208 
209   const ArgKindType Kind = getOCLArgKind(Tokens, ArgNo);
210   const ArgAccessKindType AccessKind = getOCLArgAccessKind(Tokens, Kind);
211   return {Kind, AccessKind};
212 }
213 
getArgSizeInBytes(const Argument & Arg) const214 unsigned KernelArgBuilder::getArgSizeInBytes(const Argument &Arg) const {
215   Type *ArgTy = Arg.getType();
216   if (ArgTy->isPointerTy())
217     return DL.getPointerTypeSize(ArgTy);
218   if (KM.isBufferType(Arg.getArgNo()))
219     return DL.getPointerSize();
220   return ArgTy->getPrimitiveSizeInBits() / genx::ByteBits;
221 }
222 
223 GenXOCLRuntimeInfo::KernelArgInfo
translateArgument(const Argument & Arg) const224 KernelArgBuilder::translateArgument(const Argument &Arg) const {
225   GenXOCLRuntimeInfo::KernelArgInfo Info;
226   const unsigned ArgNo = Arg.getArgNo();
227   std::tie(Info.Kind, Info.AccessKind) = translateArgDesc(ArgNo);
228   Info.Offset = KM.getArgOffset(ArgNo);
229   Info.SizeInBytes = getArgSizeInBytes(Arg);
230   Info.BTI = KM.getBTI(ArgNo);
231   // For implicit arguments that are byval argument linearization, index !=
232   // ArgNo in the IR function.
233   Info.Index = KM.getArgIndex(ArgNo);
234   // Linearization arguments have a non-zero offset in the original explicit
235   // byval arg.
236   Info.OffsetInArg = KM.getOffsetInArg(ArgNo);
237 
238   return Info;
239 }
240 
241 //===----------------------------------------------------------------------===//
242 //
243 // Kernel info implementation.
244 //
245 //===----------------------------------------------------------------------===//
246 // Just perform linear instructions scan to find usage stats.
setInstructionUsageProperties(const FunctionGroup & FG,const GenXBackendConfig & BC)247 void GenXOCLRuntimeInfo::KernelInfo::setInstructionUsageProperties(
248     const FunctionGroup &FG, const GenXBackendConfig &BC) {
249   for (Function *F : FG) {
250     for (BasicBlock &BB : *F) {
251       for (Instruction &I : BB) {
252         switch (GenXIntrinsic::getGenXIntrinsicID(&I)) {
253         default:
254           break;
255         case GenXIntrinsic::genx_group_id_x:
256         case GenXIntrinsic::genx_group_id_y:
257         case GenXIntrinsic::genx_group_id_z:
258           UsesGroupId = true;
259           break;
260         case GenXIntrinsic::genx_barrier:
261         case GenXIntrinsic::genx_sbarrier:
262           UsesBarriers = true;
263           break;
264         case GenXIntrinsic::genx_ssdp4a:
265         case GenXIntrinsic::genx_sudp4a:
266         case GenXIntrinsic::genx_usdp4a:
267         case GenXIntrinsic::genx_uudp4a:
268         case GenXIntrinsic::genx_ssdp4a_sat:
269         case GenXIntrinsic::genx_sudp4a_sat:
270         case GenXIntrinsic::genx_usdp4a_sat:
271         case GenXIntrinsic::genx_uudp4a_sat:
272         case GenXIntrinsic::genx_dpas:
273         case GenXIntrinsic::genx_dpas2:
274         case GenXIntrinsic::genx_dpasw:
275         case GenXIntrinsic::genx_dpas_nosrc0:
276         case GenXIntrinsic::genx_dpasw_nosrc0:
277           UsesDPAS = true;
278           break;
279 #if 0
280         // ThreadPrivateMemSize was not copied to igcmc structures
281         // always defaulting to zero and everything worked. After
282         // removal of igcmc structures TPMSize started to be
283         // initialized to values other than zero and some ispc tests
284         // started to fail.
285         // Restore old behavior as temporary fix until proper
286         // investigation will be performed. This is really strange.
287         case GenXIntrinsic::genx_alloca:
288           ThreadPrivateMemSize = BC.getStackSurfaceMaxSize();
289           break;
290 #endif
291         }
292       }
293     }
294   }
295 }
296 
setMetadataProperties(genx::KernelMetadata & KM,const GenXSubtarget & ST)297 void GenXOCLRuntimeInfo::KernelInfo::setMetadataProperties(
298     genx::KernelMetadata &KM, const GenXSubtarget &ST) {
299   Name = KM.getName().str();
300   SLMSize = KM.getSLMSize();
301 
302 }
303 
setArgumentProperties(const Function & Kernel,const genx::KernelMetadata & KM,const GenXSubtarget & ST,const GenXBackendConfig & BC)304 void GenXOCLRuntimeInfo::KernelInfo::setArgumentProperties(
305     const Function &Kernel, const genx::KernelMetadata &KM,
306     const GenXSubtarget &ST, const GenXBackendConfig &BC) {
307   IGC_ASSERT_MESSAGE(Kernel.arg_size() == KM.getNumArgs(),
308     "Expected same number of arguments");
309   // Some arguments are part of thread payload and do not require
310   // entries in arguments info for OCL runtime.
311   auto NonPayloadArgs =
312       make_filter_range(Kernel.args(), [&KM](const Argument &Arg) {
313         uint32_t ArgKind = KM.getArgKind(Arg.getArgNo());
314         genx::KernelArgInfo KAI(ArgKind);
315         return !KAI.isLocalIDs();
316       });
317   KernelArgBuilder ArgBuilder{KM, Kernel.getParent()->getDataLayout(), ST, BC};
318   transform(NonPayloadArgs, std::back_inserter(ArgInfos),
319             [&ArgBuilder](const Argument &Arg) {
320               return ArgBuilder.translateArgument(Arg);
321             });
322   UsesReadWriteImages = std::any_of(
323       ArgInfos.begin(), ArgInfos.end(), [](const KernelArgInfo &AI) {
324         return AI.isImage() &&
325                AI.getAccessKind() == KernelArgInfo::AccessKindType::ReadWrite;
326       });
327 }
328 
setPrintStrings(const Module & KernelModule)329 void GenXOCLRuntimeInfo::KernelInfo::setPrintStrings(
330     const Module &KernelModule) {
331   const auto *StringsMeta = KernelModule.getNamedMetadata("cm_print_strings");
332   if (!StringsMeta)
333     return;
334   std::transform(StringsMeta->op_begin(), StringsMeta->op_end(),
335                  std::back_inserter(PrintStrings), [](const auto *StringMeta) {
336                    StringRef Str =
337                        cast<MDString>(StringMeta->getOperand(0))->getString();
338                    return std::string{Str.begin(), Str.end()};
339                  });
340 }
341 
KernelInfo(const GenXSubtarget & ST)342 GenXOCLRuntimeInfo::KernelInfo::KernelInfo(const GenXSubtarget &ST)
343     : Name{"Intel_Symbol_Table_Void_Program"}, GRFSizeInBytes{
344                                                    ST.getGRFByteSize()} {}
345 
KernelInfo(const FunctionGroup & FG,const GenXSubtarget & ST,const GenXBackendConfig & BC)346 GenXOCLRuntimeInfo::KernelInfo::KernelInfo(const FunctionGroup &FG,
347                                            const GenXSubtarget &ST,
348                                            const GenXBackendConfig &BC) {
349   setInstructionUsageProperties(FG, BC);
350 
351   GRFSizeInBytes = ST.getGRFByteSize();
352 
353   int StackAmount = genx::getStackAmount(FG.getHead());
354   if (StackAmount == genx::VC_STACK_USAGE_UNKNOWN)
355     StackAmount = BC.getStatelessPrivateMemSize();
356   StatelessPrivateMemSize = StackAmount;
357 
358   SupportsDebugging = BC.emitDebuggableKernels();
359 
360   genx::KernelMetadata KM{FG.getHead()};
361   IGC_ASSERT_MESSAGE(KM.isKernel(), "Expected kernel as head of function group");
362   setMetadataProperties(KM, ST);
363   setArgumentProperties(*FG.getHead(), KM, ST, BC);
364   setPrintStrings(*FG.getHead()->getParent());
365 }
366 
367 //===----------------------------------------------------------------------===//
368 //
369 // Compiled kernel implementation.
370 //
371 //===----------------------------------------------------------------------===//
CompiledKernel(KernelInfo && KI,const FINALIZER_INFO & JI,const GTPinInfo & GI,std::vector<char> DbgInfoIn)372 GenXOCLRuntimeInfo::CompiledKernel::CompiledKernel(KernelInfo &&KI,
373                                                    const FINALIZER_INFO &JI,
374                                                    const GTPinInfo &GI,
375                                                    std::vector<char> DbgInfoIn)
376     : CompilerInfo(std::move(KI)), JitterInfo(JI),
377       GtpinInfo(GI),
378       DebugInfo{std::move(DbgInfoIn)} {
379 }
380 
381 //===----------------------------------------------------------------------===//
382 //
383 // Runtime info pass implementation.
384 //
385 //===----------------------------------------------------------------------===//
386 namespace {
387 
388 // Relates to GenXOCLRuntimeInfo::SectionInfo. GenXOCLRuntimeInfo::SectionInfo
389 // can be created from this struct.
390 struct RawSectionInfo {
391   genx::BinaryDataAccumulator<const GlobalVariable *> Data;
392   GenXOCLRuntimeInfo::RelocationSeq Relocations;
393 };
394 
395 struct GVEncodingInfo {
396   const GlobalVariable *GV;
397   // Alignment requirments of a global variable that will be encoded after
398   // the considered GV variable.
399   unsigned NextGVAlignment;
400 };
401 
402 struct ModuleDataT {
403   RawSectionInfo Constant;
404   RawSectionInfo Global;
405 
406   ModuleDataT() = default;
407   ModuleDataT(const Module &M);
408 };
409 
410 template <vISA::GenSymType SymbolClass, typename InputIter, typename OutputIter>
constructSymbols(InputIter First,InputIter Last,OutputIter Out)411 void constructSymbols(InputIter First, InputIter Last, OutputIter Out) {
412   std::transform(First, Last, Out, [](const auto &Section) -> vISA::ZESymEntry {
413     return {SymbolClass, static_cast<uint32_t>(Section.Info.Offset),
414             static_cast<uint32_t>(Section.Info.getSize()),
415             Section.Key->getName().str()};
416   });
417 }
418 
constructFunctionSymbols(genx::BinaryDataAccumulator<const Function * > & GenBinary,bool HasKernel)419 static GenXOCLRuntimeInfo::SymbolSeq constructFunctionSymbols(
420     genx::BinaryDataAccumulator<const Function *> &GenBinary, bool HasKernel) {
421   GenXOCLRuntimeInfo::SymbolSeq Symbols;
422   Symbols.reserve(GenBinary.getNumSections());
423   if (HasKernel) {
424     auto &KernelSection = GenBinary.front();
425     Symbols.emplace_back(vISA::GenSymType::S_KERNEL, KernelSection.Info.Offset,
426                          KernelSection.Info.getSize(),
427                          KernelSection.Key->getName().str());
428   }
429 
430   // Skipping first section if binary has a kernel.
431   constructSymbols<vISA::GenSymType::S_FUNC>(
432       HasKernel ? std::next(GenBinary.begin()) : GenBinary.begin(),
433       GenBinary.end(), std::back_inserter(Symbols));
434 
435   return Symbols;
436 }
437 
438 } // namespace
439 
440 // Appends the binary of function/kernel represented by \p Func and \p BuiltFunc
441 // to \p GenBinary.
442 static void
appendFuncBinary(genx::BinaryDataAccumulator<const Function * > & GenBinary,const Function & Func,const VISAKernel & BuiltFunc)443 appendFuncBinary(genx::BinaryDataAccumulator<const Function *> &GenBinary,
444                  const Function &Func, const VISAKernel &BuiltFunc) {
445   void *GenBin = nullptr;
446   int GenBinSize = 0;
447   CISA_CALL(BuiltFunc.GetGenxBinary(GenBin, GenBinSize));
448   IGC_ASSERT_MESSAGE(GenBin,
449       "Unexpected null buffer or zero-sized kernel (compilation failed?)");
450   IGC_ASSERT_MESSAGE(GenBinSize,
451       "Unexpected null buffer or zero-sized kernel (compilation failed?)");
452   GenBinary.append(&Func, ArrayRef<uint8_t>{static_cast<uint8_t *>(GenBin),
453                                             static_cast<size_t>(GenBinSize)});
454   freeBlock(GenBin);
455 }
456 
457 // Loads if it is possible external files.
458 // Returns the success status of the loading.
459 static bool
loadGenBinaryFromFile(genx::BinaryDataAccumulator<const Function * > & GenBinary,const Function & F,vc::ShaderOverrider const & Loader,vc::ShaderOverrider::Extensions Ext)460 loadGenBinaryFromFile(genx::BinaryDataAccumulator<const Function *> &GenBinary,
461                       const Function &F, vc::ShaderOverrider const &Loader,
462                       vc::ShaderOverrider::Extensions Ext) {
463   void *GenBin = nullptr;
464   int GenBinSize = 0;
465 
466   if (!Loader.override(GenBin, GenBinSize, F.getName(), Ext))
467     return false;
468 
469   if (!GenBin || !GenBinSize) {
470     llvm::errs()
471         << "Unexpected null buffer or zero-sized kernel (loading failed?)\n";
472     return false;
473   }
474 
475   GenBinary.append(&F, ArrayRef<uint8_t>{static_cast<uint8_t *>(GenBin),
476                                          static_cast<size_t>(GenBinSize)});
477   freeBlock(GenBin);
478   return true;
479 }
480 
481 // Constructs gen binary for Function but loading is from injected file.
482 // Returns the success status of the overriding.
483 static bool
tryOverrideBinary(genx::BinaryDataAccumulator<const Function * > & GenBinary,const Function & F,vc::ShaderOverrider const & Loader)484 tryOverrideBinary(genx::BinaryDataAccumulator<const Function *> &GenBinary,
485                   const Function &F, vc::ShaderOverrider const &Loader) {
486   using Extensions = vc::ShaderOverrider::Extensions;
487 
488   // Attempts to override .asm
489   if (loadGenBinaryFromFile(GenBinary, F, Loader, Extensions::ASM))
490     return true;
491 
492   // If it has failed then attempts to override .dat file
493   return loadGenBinaryFromFile(GenBinary, F, Loader, Extensions::DAT);
494 }
495 
496 // Either loads binary from VISABuilder or overrides from file.
loadBinary(genx::BinaryDataAccumulator<const Function * > & GenBinary,VISABuilder & VB,const Function & F,GenXBackendConfig const & BC)497 static void loadBinary(genx::BinaryDataAccumulator<const Function *> &GenBinary,
498                        VISABuilder &VB, const Function &F,
499                        GenXBackendConfig const &BC) {
500   // Attempt to override
501   if (BC.hasShaderOverrider() &&
502       tryOverrideBinary(GenBinary, F, BC.getShaderOverrider()))
503     return;
504 
505   // If there is no overriding or attemp fails, then gets binary from compilation
506   VISAKernel *BuiltKernel = VB.GetVISAKernel(F.getName().str());
507   IGC_ASSERT_MESSAGE(BuiltKernel, "Kernel is null");
508   appendFuncBinary(GenBinary, F, *BuiltKernel);
509 }
510 
511 template <typename UnaryPred>
collectCalledFunctions(const FunctionGroup & FG,UnaryPred && Pred)512 std::vector<const Function *> collectCalledFunctions(const FunctionGroup &FG,
513                                                      UnaryPred &&Pred) {
514   std::vector<const Function *> Collected;
515   std::set<const FunctionGroup *> Visited;
516   std::stack<const FunctionGroup *> Worklist;
517   Worklist.push(&FG);
518 
519   while (!Worklist.empty()) {
520     const FunctionGroup *CurFG = Worklist.top();
521     Worklist.pop();
522     if (Visited.count(CurFG))
523       continue;
524 
525     for (const FunctionGroup *SubFG : CurFG->subgroups())
526       Worklist.push(SubFG);
527     Visited.insert(CurFG);
528 
529     const Function *SubgroupHead = CurFG->getHead();
530     if (Pred(SubgroupHead))
531       Collected.push_back(SubgroupHead);
532   }
533 
534   return Collected;
535 }
536 
537 // Constructs gen binary for provided function group \p FG.
538 static genx::BinaryDataAccumulator<const Function *>
getGenBinary(const FunctionGroup & FG,VISABuilder & VB,GenXBackendConfig const & BC)539 getGenBinary(const FunctionGroup &FG, VISABuilder &VB,
540              GenXBackendConfig const &BC) {
541   Function const *Kernel = FG.getHead();
542   genx::BinaryDataAccumulator<const Function *> GenBinary;
543   // load kernel
544   loadBinary(GenBinary, VB, *Kernel, BC);
545   return std::move(GenBinary);
546 }
547 
appendGlobalVariableData(RawSectionInfo & Sect,GVEncodingInfo GVInfo,const DataLayout & DL)548 static void appendGlobalVariableData(RawSectionInfo &Sect,
549                                      GVEncodingInfo GVInfo,
550                                      const DataLayout &DL) {
551   std::vector<char> Data;
552   GenXOCLRuntimeInfo::RelocationSeq Relocations;
553   vc::encodeConstant(*GVInfo.GV->getInitializer(), DL, std::back_inserter(Data),
554                      std::back_inserter(Relocations));
555 
556   const auto CurrentGVAddress = Sect.Data.getFullSize();
557   const auto UnalignedNextGVAddress = CurrentGVAddress + Data.size();
558   const auto AlignedNextGVAddress =
559       alignTo(UnalignedNextGVAddress, GVInfo.NextGVAlignment);
560 
561   // Pad before the next global.
562   std::fill_n(std::back_inserter(Data),
563               AlignedNextGVAddress - UnalignedNextGVAddress, 0);
564 
565   // vc::encodeConstant calculates offsets relative to GV. Need to make it
566   // relative to section start.
567   vc::shiftRelocations(std::make_move_iterator(Relocations.begin()),
568                        std::make_move_iterator(Relocations.end()),
569                        std::back_inserter(Sect.Relocations), CurrentGVAddress);
570 
571   Sect.Data.append(GVInfo.GV, Data.begin(), Data.end());
572 }
573 
getAlignment(const GlobalVariable & GV)574 static unsigned getAlignment(const GlobalVariable &GV) {
575   unsigned Align = GV.getAlignment();
576   if (Align)
577     return Align;
578   return GV.getParent()->getDataLayout().getABITypeAlignment(GV.getValueType());
579 }
580 
581 template <typename GlobalsRangeT>
582 std::vector<GVEncodingInfo>
prepareGlobalInfosForEncoding(GlobalsRangeT && Globals)583 prepareGlobalInfosForEncoding(GlobalsRangeT &&Globals) {
584   auto RealGlobals = make_filter_range(Globals, [](const GlobalVariable &GV) {
585     return genx::isRealGlobalVariable(GV);
586   });
587   if (RealGlobals.begin() == RealGlobals.end())
588     return {};
589   std::vector<GVEncodingInfo> Infos;
590   std::transform(RealGlobals.begin(), std::prev(RealGlobals.end()),
591                  std::next(RealGlobals.begin()), std::back_inserter(Infos),
592                  [](const GlobalVariable &GV, const GlobalVariable &NextGV) {
593                    return GVEncodingInfo{&GV, getAlignment(NextGV)};
594                  });
595   Infos.push_back({&*std::prev(RealGlobals.end()), 1u});
596   return std::move(Infos);
597 }
598 
ModuleDataT(const Module & M)599 ModuleDataT::ModuleDataT(const Module &M) {
600   std::vector<GVEncodingInfo> GVInfos =
601       prepareGlobalInfosForEncoding(M.globals());
602   for (auto GVInfo : GVInfos) {
603     if (GVInfo.GV->isConstant())
604       appendGlobalVariableData(Constant, GVInfo, M.getDataLayout());
605     else
606       appendGlobalVariableData(Global, GVInfo, M.getDataLayout());
607   }
608 }
609 
getModuleInfo(const Module & M)610 static GenXOCLRuntimeInfo::ModuleInfoT getModuleInfo(const Module &M) {
611   ModuleDataT ModuleData{M};
612   GenXOCLRuntimeInfo::ModuleInfoT ModuleInfo;
613 
614   constructSymbols<vISA::GenSymType::S_GLOBAL_VAR_CONST>(
615       ModuleData.Constant.Data.begin(), ModuleData.Constant.Data.end(),
616       std::back_inserter(ModuleInfo.Constant.Symbols));
617   constructSymbols<vISA::GenSymType::S_GLOBAL_VAR>(
618       ModuleData.Global.Data.begin(), ModuleData.Global.Data.end(),
619       std::back_inserter(ModuleInfo.Global.Symbols));
620 
621   ModuleInfo.Constant.Relocations = std::move(ModuleData.Constant.Relocations);
622   ModuleInfo.Global.Relocations = std::move(ModuleData.Global.Relocations);
623 
624   ModuleInfo.Constant.Data.Buffer =
625       std::move(ModuleData.Constant.Data).emitConsolidatedData();
626   // IGC always sets 0
627   ModuleInfo.Constant.Data.Alignment = 0;
628   ModuleInfo.Constant.Data.AdditionalZeroedSpace = 0;
629 
630   ModuleInfo.Global.Data.Buffer =
631       std::move(ModuleData.Global.Data).emitConsolidatedData();
632   ModuleInfo.Global.Data.Alignment = 0;
633   ModuleInfo.Global.Data.AdditionalZeroedSpace = 0;
634 
635   return std::move(ModuleInfo);
636 }
637 
638 namespace {
639 
640 class RuntimeInfoCollector final {
641   const FunctionGroupAnalysis &FGA;
642   const GenXBackendConfig &BC;
643   VISABuilder &VB;
644   const GenXSubtarget &ST;
645   const Module &M;
646   const GenXDebugInfo &DBG;
647 
648 public:
649   using KernelStorageTy = GenXOCLRuntimeInfo::KernelStorageTy;
650   using CompiledKernel = GenXOCLRuntimeInfo::CompiledKernel;
651   using CompiledModuleT = GenXOCLRuntimeInfo::CompiledModuleT;
652 
653 public:
RuntimeInfoCollector(const FunctionGroupAnalysis & InFGA,const GenXBackendConfig & InBC,VISABuilder & InVB,const GenXSubtarget & InST,const Module & InM,const GenXDebugInfo & InDbg)654   RuntimeInfoCollector(const FunctionGroupAnalysis &InFGA,
655                        const GenXBackendConfig &InBC, VISABuilder &InVB,
656                        const GenXSubtarget &InST, const Module &InM,
657                        const GenXDebugInfo &InDbg)
658       : FGA{InFGA}, BC{InBC}, VB{InVB}, ST{InST}, M{InM}, DBG{InDbg} {}
659 
660   CompiledModuleT run();
661 
662 private:
663   CompiledKernel collectFunctionGroupInfo(const FunctionGroup &FG) const;
664   CompiledKernel collectFunctionSubgroupsInfo(
665       const std::vector<FunctionGroup *> &Subgroups) const;
666 };
667 
668 } // namespace
669 
run()670 RuntimeInfoCollector::CompiledModuleT RuntimeInfoCollector::run() {
671   KernelStorageTy Kernels;
672   std::transform(FGA.begin(), FGA.end(), std::back_inserter(Kernels),
673                  [this](const FunctionGroup *FG) {
674                    return collectFunctionGroupInfo(*FG);
675                  });
676   std::vector<FunctionGroup *> IndirectlyReferencedFuncs;
677   std::copy_if(FGA.subgroup_begin(), FGA.subgroup_end(),
678                std::back_inserter(IndirectlyReferencedFuncs),
679                [](const FunctionGroup *FG) {
680                  return genx::isReferencedIndirectly(FG->getHead());
681                });
682   if (!IndirectlyReferencedFuncs.empty())
683     Kernels.push_back(collectFunctionSubgroupsInfo(IndirectlyReferencedFuncs));
684   return {getModuleInfo(M), std::move(Kernels),
685           M.getDataLayout().getPointerSize()};
686 }
687 
688 RuntimeInfoCollector::CompiledKernel
collectFunctionGroupInfo(const FunctionGroup & FG) const689 RuntimeInfoCollector::collectFunctionGroupInfo(const FunctionGroup &FG) const {
690   using KernelInfo = GenXOCLRuntimeInfo::KernelInfo;
691   using GTPinInfo = GenXOCLRuntimeInfo::GTPinInfo;
692   using CompiledKernel = GenXOCLRuntimeInfo::CompiledKernel;
693 
694   // Compiler info.
695   KernelInfo Info{FG, ST, BC};
696 
697   const Function *KernelFunction = FG.getHead();
698   const std::string KernelName = KernelFunction->getName().str();
699   VISAKernel *VK = VB.GetVISAKernel(KernelName);
700   IGC_ASSERT_MESSAGE(VK, "Kernel is null");
701   FINALIZER_INFO *JitInfo = nullptr;
702   CISA_CALL(VK->GetJitInfo(JitInfo));
703   IGC_ASSERT_MESSAGE(JitInfo, "Jit info is not set by finalizer");
704   // TODO: this a temporary solution for spill mem size
705   // calculation. This has to be redesign properly, maybe w/ multiple
706   // KernelInfos or by introducing FunctionInfos
707   const auto StackCalls = collectCalledFunctions(
708       FG, [](const Function *F) { return genx::requiresStackCall(F); });
709   for (const Function *F : StackCalls) {
710     const std::string FuncName = F->getName().str();
711     VISAKernel *VF = VB.GetVISAKernel(FuncName);
712     IGC_ASSERT_MESSAGE(VF, "Function is null");
713     FINALIZER_INFO *FuncJitInfo = nullptr;
714     CISA_CALL(VF->GetJitInfo(FuncJitInfo));
715     IGC_ASSERT_MESSAGE(FuncJitInfo, "Func jit info is not set by finalizer");
716     JitInfo->isSpill |= FuncJitInfo->isSpill;
717     JitInfo->hasStackcalls |= FuncJitInfo->hasStackcalls;
718     JitInfo->spillMemUsed += FuncJitInfo->spillMemUsed;
719   }
720 
721   genx::BinaryDataAccumulator<const Function *> GenBinary =
722       getGenBinary(FG, VB, BC);
723 
724   const auto& Dbg = DBG.getModuleDebug();
725   auto DbgIt = Dbg.find(KernelFunction);
726   std::vector<char> DebugData;
727   if (DbgIt != std::end(Dbg)) {
728     const auto &ElfImage = DbgIt->second;
729     DebugData = {ElfImage.begin(), ElfImage.end()};
730   }
731   CISA_CALL(VK->GetRelocations(Info.Func.Relocations));
732   // Still have to duplicate function relocations because they are constructed
733   // inside Finalizer.
734   CISA_CALL(VK->GetGenRelocEntryBuffer(Info.LegacyFuncRelocations.Buffer,
735                                        Info.LegacyFuncRelocations.Size,
736                                        Info.LegacyFuncRelocations.Entries));
737   Info.Func.Symbols = constructFunctionSymbols(GenBinary, /*HasKernel=*/true);
738 
739   void *GTPinBuffer = nullptr;
740   unsigned GTPinBufferSize = 0;
741   CISA_CALL(VK->GetGTPinBuffer(GTPinBuffer, GTPinBufferSize));
742 
743   auto *GTPinBytes = static_cast<char *>(GTPinBuffer);
744   GTPinInfo gtpin{GTPinBytes, GTPinBytes + GTPinBufferSize};
745 
746   Info.Func.Data.Buffer = std::move(GenBinary).emitConsolidatedData();
747   return CompiledKernel{std::move(Info), *JitInfo, std::move(gtpin),
748                         std::move(DebugData)};
749 }
750 
751 RuntimeInfoCollector::CompiledKernel
collectFunctionSubgroupsInfo(const std::vector<FunctionGroup * > & Subgroups) const752 RuntimeInfoCollector::collectFunctionSubgroupsInfo(
753     const std::vector<FunctionGroup *> &Subgroups) const {
754   using KernelInfo = GenXOCLRuntimeInfo::KernelInfo;
755   using CompiledKernel = GenXOCLRuntimeInfo::CompiledKernel;
756 
757   IGC_ASSERT(!Subgroups.empty());
758   KernelInfo Info{ST};
759 
760   genx::BinaryDataAccumulator<const Function *> GenBinary;
761   for (auto *FG : Subgroups) {
762     auto *Func = FG->getHead();
763     IGC_ASSERT(genx::fg::isSubGroupHead(*Func));
764     loadBinary(GenBinary, VB, *Func, BC);
765   }
766   Info.Func.Symbols = constructFunctionSymbols(GenBinary, /*HasKernel*/false);
767   Info.Func.Data.Buffer = GenBinary.emitConsolidatedData();
768 
769   return CompiledKernel{std::move(Info), FINALIZER_INFO{}, /*GtpinInfo*/ {},
770                         /*DebugInfo*/ {}};
771 }
772 
getAnalysisUsage(AnalysisUsage & AU) const773 void GenXOCLRuntimeInfo::getAnalysisUsage(AnalysisUsage &AU) const {
774   AU.addRequired<FunctionGroupAnalysis>();
775   AU.addRequired<GenXBackendConfig>();
776   AU.addRequired<GenXModule>();
777   AU.addRequired<GenXDebugInfo>();
778   AU.addRequired<TargetPassConfig>();
779   AU.setPreservesAll();
780 }
781 
runOnModule(Module & M)782 bool GenXOCLRuntimeInfo::runOnModule(Module &M) {
783   const auto &FGA = getAnalysis<FunctionGroupAnalysis>();
784   const auto &BC = getAnalysis<GenXBackendConfig>();
785   // Getters for builders are not constant.
786   auto &GM = getAnalysis<GenXModule>();
787   const auto &ST = getAnalysis<TargetPassConfig>()
788                        .getTM<GenXTargetMachine>()
789                        .getGenXSubtarget();
790   const auto &DBG = getAnalysis<GenXDebugInfo>();
791 
792   VISABuilder &VB =
793       *(GM.HasInlineAsm() ? GM.GetVISAAsmReader() : GM.GetCisaBuilder());
794 
795   CompiledModule = RuntimeInfoCollector{FGA, BC, VB, ST, M, DBG}.run();
796   return false;
797 }
798 
print(raw_ostream & OS,const Module * M) const799 void GenXOCLRuntimeInfo::print(raw_ostream &OS, const Module *M) const {
800   vc::printOCLRuntimeInfo(OS, CompiledModule);
801 }
802 
803 INITIALIZE_PASS_BEGIN(GenXOCLRuntimeInfo, "GenXOCLRuntimeInfo",
804                       "GenXOCLRuntimeInfo", false, true)
805 INITIALIZE_PASS_DEPENDENCY(FunctionGroupAnalysis);
806 INITIALIZE_PASS_DEPENDENCY(GenXBackendConfig);
807 INITIALIZE_PASS_DEPENDENCY(GenXModule);
808 INITIALIZE_PASS_DEPENDENCY(GenXDebugInfo);
809 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig);
810 INITIALIZE_PASS_END(GenXOCLRuntimeInfo, "GenXOCLRuntimeInfo",
811                     "GenXOCLRuntimeInfo", false, true)
812