1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "vc/GenXCodeGen/GenXOCLRuntimeInfo.h"
10
11 #include "ConstantEncoder.h"
12 #include "GenX.h"
13 #include "GenXModule.h"
14 #include "GenXSubtarget.h"
15 #include "GenXTargetMachine.h"
16 #include "GenXUtil.h"
17 #include "OCLRuntimeInfoPrinter.h"
18
19 #include "vc/GenXOpts/Utils/InternalMetadata.h"
20
21 #include "llvm/GenXIntrinsics/GenXIntrinsics.h"
22
23 #include <visaBuilder_interface.h>
24
25 #include <llvm/CodeGen/TargetPassConfig.h>
26 #include <llvm/IR/Argument.h>
27 #include <llvm/IR/Constants.h>
28 #include <llvm/IR/DataLayout.h>
29 #include <llvm/IR/Type.h>
30 #include <llvm/IR/Value.h>
31 #include <llvm/InitializePasses.h>
32
33 #include <algorithm>
34 #include <cctype>
35 #include <functional>
36 #include <iterator>
37 #include <stack>
38
39 #include "Probe/Assertion.h"
40
41 #define CISA_CALL(c) \
42 do { \
43 auto Result = (c); \
44 (void)Result; \
45 IGC_ASSERT_MESSAGE(Result == 0, "Call to VISA API failed: " #c); \
46 } while (0);
47
48 using namespace llvm;
49
50 char GenXOCLRuntimeInfo::ID = 0;
51
52 //===----------------------------------------------------------------------===//
53 //
54 // Kernel argument info implementation.
55 //
56 //===----------------------------------------------------------------------===//
57 // Supported kernel argument attributes.
58 struct OCLAttributes {
59 // Type qualifiers for resources.
60 static constexpr auto ReadOnly = "read_only";
61 static constexpr auto WriteOnly = "write_only";
62 static constexpr auto ReadWrite = "read_write";
63
64 // Buffer surface.
65 static constexpr auto Buffer = "buffer_t";
66 // SVM pointer to buffer.
67 static constexpr auto SVM = "svmptr_t";
68 // OpenCL-like types.
69 static constexpr auto Sampler = "sampler_t";
70 static constexpr auto Image1d = "image1d_t";
71 static constexpr auto Image1dArray = "image1d_array_t";
72 // Same as 1D image. Seems that there is no difference in runtime.
73 static constexpr auto Image1dBuffer = "image1d_buffer_t";
74 static constexpr auto Image2d = "image2d_t";
75 static constexpr auto Image2dArray = "image2d_array_t";
76 static constexpr auto Image2dMediaBlock = "image2d_media_block_t";
77 static constexpr auto Image3d = "image3d_t";
78 };
79
80 namespace llvm {
81 class KernelArgBuilder final {
82 using ArgKindType = GenXOCLRuntimeInfo::KernelArgInfo::KindType;
83 using ArgAccessKindType = GenXOCLRuntimeInfo::KernelArgInfo::AccessKindType;
84
85 const genx::KernelMetadata &KM;
86 const DataLayout &DL;
87 const GenXSubtarget &ST;
88 const GenXBackendConfig &BC;
89
90 public:
KernelArgBuilder(const genx::KernelMetadata & KMIn,const DataLayout & DLIn,const GenXSubtarget & STIn,const GenXBackendConfig & BCIn)91 KernelArgBuilder(const genx::KernelMetadata &KMIn, const DataLayout &DLIn,
92 const GenXSubtarget &STIn, const GenXBackendConfig &BCIn)
93 : KM(KMIn), DL(DLIn), ST(STIn), BC(BCIn) {}
94
95 GenXOCLRuntimeInfo::KernelArgInfo
96 translateArgument(const Argument &Arg) const;
97
98 private:
getStrPred(const char * Attr)99 static auto getStrPred(const char *Attr) {
100 return [Attr](StringRef Token) { return Token == Attr; };
101 }
102
103 ArgKindType getOCLArgKind(ArrayRef<StringRef> Tokens, unsigned ArgNo) const;
104 ArgAccessKindType getOCLArgAccessKind(ArrayRef<StringRef> Tokens,
105 ArgKindType Kind) const;
106 std::pair<ArgKindType, ArgAccessKindType>
107 translateArgDesc(unsigned ArgNo) const;
108 unsigned getArgSizeInBytes(const Argument &Arg) const;
109 };
110 } // namespace llvm
111
112 KernelArgBuilder::ArgAccessKindType
getOCLArgAccessKind(ArrayRef<StringRef> Tokens,ArgKindType Kind) const113 KernelArgBuilder::getOCLArgAccessKind(ArrayRef<StringRef> Tokens,
114 ArgKindType Kind) const {
115 switch (Kind) {
116 case ArgKindType::Buffer:
117 case ArgKindType::Image1D:
118 case ArgKindType::Image1DArray:
119 case ArgKindType::Image2D:
120 case ArgKindType::Image2DArray:
121 case ArgKindType::Image2DMediaBlock:
122 case ArgKindType::Image3D:
123 case ArgKindType::SVM:
124 case ArgKindType::BindlessBuffer:
125 if (any_of(Tokens, getStrPred(OCLAttributes::ReadOnly)))
126 return ArgAccessKindType::ReadOnly;
127 if (any_of(Tokens, getStrPred(OCLAttributes::WriteOnly)))
128 return ArgAccessKindType::WriteOnly;
129 return ArgAccessKindType::ReadWrite;
130 default:
131 return ArgAccessKindType::None;
132 }
133 }
134
135 KernelArgBuilder::ArgKindType
getOCLArgKind(ArrayRef<StringRef> Tokens,unsigned ArgNo) const136 KernelArgBuilder::getOCLArgKind(ArrayRef<StringRef> Tokens,
137 unsigned ArgNo) const {
138 unsigned RawKind = KM.getArgKind(ArgNo);
139
140 // Implicit arguments.
141 genx::KernelArgInfo KAI{RawKind};
142 if (KAI.isLocalSize())
143 return ArgKindType::LocalSize;
144 if (KAI.isGroupCount())
145 return ArgKindType::GroupCount;
146 if (KAI.isPrintBuffer())
147 return ArgKindType::PrintBuffer;
148 if (KAI.isPrivateBase())
149 return ArgKindType::PrivateBase;
150 if (KAI.isByValSVM())
151 return ArgKindType::ByValSVM;
152
153 // Explicit arguments.
154 switch (KM.getArgCategory(ArgNo)) {
155 default:
156 return ArgKindType::General;
157 case genx::RegCategory::GENERAL:
158 if (any_of(Tokens, getStrPred(OCLAttributes::SVM)))
159 return ArgKindType::SVM;
160 // Bindless buffers have general category but buffer annotation.
161 if (any_of(Tokens, getStrPred(OCLAttributes::Buffer)))
162 return ArgKindType::BindlessBuffer;
163 return ArgKindType::General;
164 case genx::RegCategory::SURFACE:
165 if (any_of(Tokens, getStrPred(OCLAttributes::Image1d)))
166 return ArgKindType::Image1D;
167 if (any_of(Tokens, getStrPred(OCLAttributes::Image1dArray)))
168 return ArgKindType::Image1DArray;
169 if (any_of(Tokens, getStrPred(OCLAttributes::Image1dBuffer)))
170 return ArgKindType::Image1D;
171 if (any_of(Tokens, getStrPred(OCLAttributes::Image2d))) {
172 if (BC.usePlain2DImages())
173 return ArgKindType::Image2D;
174 // Legacy behavior to treat all 2d images as media block.
175 return ArgKindType::Image2DMediaBlock;
176 }
177 if (any_of(Tokens, getStrPred(OCLAttributes::Image2dArray)))
178 return ArgKindType::Image2DArray;
179 if (any_of(Tokens, getStrPred(OCLAttributes::Image2dMediaBlock))) {
180 return ArgKindType::Image2DMediaBlock;
181 }
182 if (any_of(Tokens, getStrPred(OCLAttributes::Image3d)))
183 return ArgKindType::Image3D;
184 return ArgKindType::Buffer;
185 case genx::RegCategory::SAMPLER:
186 return ArgKindType::Sampler;
187 }
188 }
189
190 // Retrieve Kind and AccessKind from given ArgTypeDesc in metadata.
191 std::pair<KernelArgBuilder::ArgKindType, KernelArgBuilder::ArgAccessKindType>
translateArgDesc(unsigned ArgNo) const192 KernelArgBuilder::translateArgDesc(unsigned ArgNo) const {
193 std::string Translated{KM.getArgTypeDesc(ArgNo)};
194 // Transform each separator to space.
195 std::transform(Translated.begin(), Translated.end(), Translated.begin(),
196 [](char C) {
197 if (C != '-' && C != '_' && C != '=' && !std::isalnum(C))
198 return ' ';
199 return C;
200 });
201
202 // Split and delete duplicates.
203 SmallVector<StringRef, 4> Tokens;
204 StringRef(Translated)
205 .split(Tokens, ' ', -1 /* MaxSplit */, false /* AllowEmpty */);
206 std::sort(Tokens.begin(), Tokens.end());
207 Tokens.erase(std::unique(Tokens.begin(), Tokens.end()), Tokens.end());
208
209 const ArgKindType Kind = getOCLArgKind(Tokens, ArgNo);
210 const ArgAccessKindType AccessKind = getOCLArgAccessKind(Tokens, Kind);
211 return {Kind, AccessKind};
212 }
213
getArgSizeInBytes(const Argument & Arg) const214 unsigned KernelArgBuilder::getArgSizeInBytes(const Argument &Arg) const {
215 Type *ArgTy = Arg.getType();
216 if (ArgTy->isPointerTy())
217 return DL.getPointerTypeSize(ArgTy);
218 if (KM.isBufferType(Arg.getArgNo()))
219 return DL.getPointerSize();
220 return ArgTy->getPrimitiveSizeInBits() / genx::ByteBits;
221 }
222
223 GenXOCLRuntimeInfo::KernelArgInfo
translateArgument(const Argument & Arg) const224 KernelArgBuilder::translateArgument(const Argument &Arg) const {
225 GenXOCLRuntimeInfo::KernelArgInfo Info;
226 const unsigned ArgNo = Arg.getArgNo();
227 std::tie(Info.Kind, Info.AccessKind) = translateArgDesc(ArgNo);
228 Info.Offset = KM.getArgOffset(ArgNo);
229 Info.SizeInBytes = getArgSizeInBytes(Arg);
230 Info.BTI = KM.getBTI(ArgNo);
231 // For implicit arguments that are byval argument linearization, index !=
232 // ArgNo in the IR function.
233 Info.Index = KM.getArgIndex(ArgNo);
234 // Linearization arguments have a non-zero offset in the original explicit
235 // byval arg.
236 Info.OffsetInArg = KM.getOffsetInArg(ArgNo);
237
238 return Info;
239 }
240
241 //===----------------------------------------------------------------------===//
242 //
243 // Kernel info implementation.
244 //
245 //===----------------------------------------------------------------------===//
246 // Just perform linear instructions scan to find usage stats.
setInstructionUsageProperties(const FunctionGroup & FG,const GenXBackendConfig & BC)247 void GenXOCLRuntimeInfo::KernelInfo::setInstructionUsageProperties(
248 const FunctionGroup &FG, const GenXBackendConfig &BC) {
249 for (Function *F : FG) {
250 for (BasicBlock &BB : *F) {
251 for (Instruction &I : BB) {
252 switch (GenXIntrinsic::getGenXIntrinsicID(&I)) {
253 default:
254 break;
255 case GenXIntrinsic::genx_group_id_x:
256 case GenXIntrinsic::genx_group_id_y:
257 case GenXIntrinsic::genx_group_id_z:
258 UsesGroupId = true;
259 break;
260 case GenXIntrinsic::genx_barrier:
261 case GenXIntrinsic::genx_sbarrier:
262 UsesBarriers = true;
263 break;
264 case GenXIntrinsic::genx_ssdp4a:
265 case GenXIntrinsic::genx_sudp4a:
266 case GenXIntrinsic::genx_usdp4a:
267 case GenXIntrinsic::genx_uudp4a:
268 case GenXIntrinsic::genx_ssdp4a_sat:
269 case GenXIntrinsic::genx_sudp4a_sat:
270 case GenXIntrinsic::genx_usdp4a_sat:
271 case GenXIntrinsic::genx_uudp4a_sat:
272 case GenXIntrinsic::genx_dpas:
273 case GenXIntrinsic::genx_dpas2:
274 case GenXIntrinsic::genx_dpasw:
275 case GenXIntrinsic::genx_dpas_nosrc0:
276 case GenXIntrinsic::genx_dpasw_nosrc0:
277 UsesDPAS = true;
278 break;
279 #if 0
280 // ThreadPrivateMemSize was not copied to igcmc structures
281 // always defaulting to zero and everything worked. After
282 // removal of igcmc structures TPMSize started to be
283 // initialized to values other than zero and some ispc tests
284 // started to fail.
285 // Restore old behavior as temporary fix until proper
286 // investigation will be performed. This is really strange.
287 case GenXIntrinsic::genx_alloca:
288 ThreadPrivateMemSize = BC.getStackSurfaceMaxSize();
289 break;
290 #endif
291 }
292 }
293 }
294 }
295 }
296
setMetadataProperties(genx::KernelMetadata & KM,const GenXSubtarget & ST)297 void GenXOCLRuntimeInfo::KernelInfo::setMetadataProperties(
298 genx::KernelMetadata &KM, const GenXSubtarget &ST) {
299 Name = KM.getName().str();
300 SLMSize = KM.getSLMSize();
301
302 }
303
setArgumentProperties(const Function & Kernel,const genx::KernelMetadata & KM,const GenXSubtarget & ST,const GenXBackendConfig & BC)304 void GenXOCLRuntimeInfo::KernelInfo::setArgumentProperties(
305 const Function &Kernel, const genx::KernelMetadata &KM,
306 const GenXSubtarget &ST, const GenXBackendConfig &BC) {
307 IGC_ASSERT_MESSAGE(Kernel.arg_size() == KM.getNumArgs(),
308 "Expected same number of arguments");
309 // Some arguments are part of thread payload and do not require
310 // entries in arguments info for OCL runtime.
311 auto NonPayloadArgs =
312 make_filter_range(Kernel.args(), [&KM](const Argument &Arg) {
313 uint32_t ArgKind = KM.getArgKind(Arg.getArgNo());
314 genx::KernelArgInfo KAI(ArgKind);
315 return !KAI.isLocalIDs();
316 });
317 KernelArgBuilder ArgBuilder{KM, Kernel.getParent()->getDataLayout(), ST, BC};
318 transform(NonPayloadArgs, std::back_inserter(ArgInfos),
319 [&ArgBuilder](const Argument &Arg) {
320 return ArgBuilder.translateArgument(Arg);
321 });
322 UsesReadWriteImages = std::any_of(
323 ArgInfos.begin(), ArgInfos.end(), [](const KernelArgInfo &AI) {
324 return AI.isImage() &&
325 AI.getAccessKind() == KernelArgInfo::AccessKindType::ReadWrite;
326 });
327 }
328
setPrintStrings(const Module & KernelModule)329 void GenXOCLRuntimeInfo::KernelInfo::setPrintStrings(
330 const Module &KernelModule) {
331 const auto *StringsMeta = KernelModule.getNamedMetadata("cm_print_strings");
332 if (!StringsMeta)
333 return;
334 std::transform(StringsMeta->op_begin(), StringsMeta->op_end(),
335 std::back_inserter(PrintStrings), [](const auto *StringMeta) {
336 StringRef Str =
337 cast<MDString>(StringMeta->getOperand(0))->getString();
338 return std::string{Str.begin(), Str.end()};
339 });
340 }
341
KernelInfo(const GenXSubtarget & ST)342 GenXOCLRuntimeInfo::KernelInfo::KernelInfo(const GenXSubtarget &ST)
343 : Name{"Intel_Symbol_Table_Void_Program"}, GRFSizeInBytes{
344 ST.getGRFByteSize()} {}
345
KernelInfo(const FunctionGroup & FG,const GenXSubtarget & ST,const GenXBackendConfig & BC)346 GenXOCLRuntimeInfo::KernelInfo::KernelInfo(const FunctionGroup &FG,
347 const GenXSubtarget &ST,
348 const GenXBackendConfig &BC) {
349 setInstructionUsageProperties(FG, BC);
350
351 GRFSizeInBytes = ST.getGRFByteSize();
352
353 int StackAmount = genx::getStackAmount(FG.getHead());
354 if (StackAmount == genx::VC_STACK_USAGE_UNKNOWN)
355 StackAmount = BC.getStatelessPrivateMemSize();
356 StatelessPrivateMemSize = StackAmount;
357
358 SupportsDebugging = BC.emitDebuggableKernels();
359
360 genx::KernelMetadata KM{FG.getHead()};
361 IGC_ASSERT_MESSAGE(KM.isKernel(), "Expected kernel as head of function group");
362 setMetadataProperties(KM, ST);
363 setArgumentProperties(*FG.getHead(), KM, ST, BC);
364 setPrintStrings(*FG.getHead()->getParent());
365 }
366
367 //===----------------------------------------------------------------------===//
368 //
369 // Compiled kernel implementation.
370 //
371 //===----------------------------------------------------------------------===//
CompiledKernel(KernelInfo && KI,const FINALIZER_INFO & JI,const GTPinInfo & GI,std::vector<char> DbgInfoIn)372 GenXOCLRuntimeInfo::CompiledKernel::CompiledKernel(KernelInfo &&KI,
373 const FINALIZER_INFO &JI,
374 const GTPinInfo &GI,
375 std::vector<char> DbgInfoIn)
376 : CompilerInfo(std::move(KI)), JitterInfo(JI),
377 GtpinInfo(GI),
378 DebugInfo{std::move(DbgInfoIn)} {
379 }
380
381 //===----------------------------------------------------------------------===//
382 //
383 // Runtime info pass implementation.
384 //
385 //===----------------------------------------------------------------------===//
386 namespace {
387
388 // Relates to GenXOCLRuntimeInfo::SectionInfo. GenXOCLRuntimeInfo::SectionInfo
389 // can be created from this struct.
390 struct RawSectionInfo {
391 genx::BinaryDataAccumulator<const GlobalVariable *> Data;
392 GenXOCLRuntimeInfo::RelocationSeq Relocations;
393 };
394
395 struct GVEncodingInfo {
396 const GlobalVariable *GV;
397 // Alignment requirments of a global variable that will be encoded after
398 // the considered GV variable.
399 unsigned NextGVAlignment;
400 };
401
402 struct ModuleDataT {
403 RawSectionInfo Constant;
404 RawSectionInfo Global;
405
406 ModuleDataT() = default;
407 ModuleDataT(const Module &M);
408 };
409
410 template <vISA::GenSymType SymbolClass, typename InputIter, typename OutputIter>
constructSymbols(InputIter First,InputIter Last,OutputIter Out)411 void constructSymbols(InputIter First, InputIter Last, OutputIter Out) {
412 std::transform(First, Last, Out, [](const auto &Section) -> vISA::ZESymEntry {
413 return {SymbolClass, static_cast<uint32_t>(Section.Info.Offset),
414 static_cast<uint32_t>(Section.Info.getSize()),
415 Section.Key->getName().str()};
416 });
417 }
418
constructFunctionSymbols(genx::BinaryDataAccumulator<const Function * > & GenBinary,bool HasKernel)419 static GenXOCLRuntimeInfo::SymbolSeq constructFunctionSymbols(
420 genx::BinaryDataAccumulator<const Function *> &GenBinary, bool HasKernel) {
421 GenXOCLRuntimeInfo::SymbolSeq Symbols;
422 Symbols.reserve(GenBinary.getNumSections());
423 if (HasKernel) {
424 auto &KernelSection = GenBinary.front();
425 Symbols.emplace_back(vISA::GenSymType::S_KERNEL, KernelSection.Info.Offset,
426 KernelSection.Info.getSize(),
427 KernelSection.Key->getName().str());
428 }
429
430 // Skipping first section if binary has a kernel.
431 constructSymbols<vISA::GenSymType::S_FUNC>(
432 HasKernel ? std::next(GenBinary.begin()) : GenBinary.begin(),
433 GenBinary.end(), std::back_inserter(Symbols));
434
435 return Symbols;
436 }
437
438 } // namespace
439
440 // Appends the binary of function/kernel represented by \p Func and \p BuiltFunc
441 // to \p GenBinary.
442 static void
appendFuncBinary(genx::BinaryDataAccumulator<const Function * > & GenBinary,const Function & Func,const VISAKernel & BuiltFunc)443 appendFuncBinary(genx::BinaryDataAccumulator<const Function *> &GenBinary,
444 const Function &Func, const VISAKernel &BuiltFunc) {
445 void *GenBin = nullptr;
446 int GenBinSize = 0;
447 CISA_CALL(BuiltFunc.GetGenxBinary(GenBin, GenBinSize));
448 IGC_ASSERT_MESSAGE(GenBin,
449 "Unexpected null buffer or zero-sized kernel (compilation failed?)");
450 IGC_ASSERT_MESSAGE(GenBinSize,
451 "Unexpected null buffer or zero-sized kernel (compilation failed?)");
452 GenBinary.append(&Func, ArrayRef<uint8_t>{static_cast<uint8_t *>(GenBin),
453 static_cast<size_t>(GenBinSize)});
454 freeBlock(GenBin);
455 }
456
457 // Loads if it is possible external files.
458 // Returns the success status of the loading.
459 static bool
loadGenBinaryFromFile(genx::BinaryDataAccumulator<const Function * > & GenBinary,const Function & F,vc::ShaderOverrider const & Loader,vc::ShaderOverrider::Extensions Ext)460 loadGenBinaryFromFile(genx::BinaryDataAccumulator<const Function *> &GenBinary,
461 const Function &F, vc::ShaderOverrider const &Loader,
462 vc::ShaderOverrider::Extensions Ext) {
463 void *GenBin = nullptr;
464 int GenBinSize = 0;
465
466 if (!Loader.override(GenBin, GenBinSize, F.getName(), Ext))
467 return false;
468
469 if (!GenBin || !GenBinSize) {
470 llvm::errs()
471 << "Unexpected null buffer or zero-sized kernel (loading failed?)\n";
472 return false;
473 }
474
475 GenBinary.append(&F, ArrayRef<uint8_t>{static_cast<uint8_t *>(GenBin),
476 static_cast<size_t>(GenBinSize)});
477 freeBlock(GenBin);
478 return true;
479 }
480
481 // Constructs gen binary for Function but loading is from injected file.
482 // Returns the success status of the overriding.
483 static bool
tryOverrideBinary(genx::BinaryDataAccumulator<const Function * > & GenBinary,const Function & F,vc::ShaderOverrider const & Loader)484 tryOverrideBinary(genx::BinaryDataAccumulator<const Function *> &GenBinary,
485 const Function &F, vc::ShaderOverrider const &Loader) {
486 using Extensions = vc::ShaderOverrider::Extensions;
487
488 // Attempts to override .asm
489 if (loadGenBinaryFromFile(GenBinary, F, Loader, Extensions::ASM))
490 return true;
491
492 // If it has failed then attempts to override .dat file
493 return loadGenBinaryFromFile(GenBinary, F, Loader, Extensions::DAT);
494 }
495
496 // Either loads binary from VISABuilder or overrides from file.
loadBinary(genx::BinaryDataAccumulator<const Function * > & GenBinary,VISABuilder & VB,const Function & F,GenXBackendConfig const & BC)497 static void loadBinary(genx::BinaryDataAccumulator<const Function *> &GenBinary,
498 VISABuilder &VB, const Function &F,
499 GenXBackendConfig const &BC) {
500 // Attempt to override
501 if (BC.hasShaderOverrider() &&
502 tryOverrideBinary(GenBinary, F, BC.getShaderOverrider()))
503 return;
504
505 // If there is no overriding or attemp fails, then gets binary from compilation
506 VISAKernel *BuiltKernel = VB.GetVISAKernel(F.getName().str());
507 IGC_ASSERT_MESSAGE(BuiltKernel, "Kernel is null");
508 appendFuncBinary(GenBinary, F, *BuiltKernel);
509 }
510
511 template <typename UnaryPred>
collectCalledFunctions(const FunctionGroup & FG,UnaryPred && Pred)512 std::vector<const Function *> collectCalledFunctions(const FunctionGroup &FG,
513 UnaryPred &&Pred) {
514 std::vector<const Function *> Collected;
515 std::set<const FunctionGroup *> Visited;
516 std::stack<const FunctionGroup *> Worklist;
517 Worklist.push(&FG);
518
519 while (!Worklist.empty()) {
520 const FunctionGroup *CurFG = Worklist.top();
521 Worklist.pop();
522 if (Visited.count(CurFG))
523 continue;
524
525 for (const FunctionGroup *SubFG : CurFG->subgroups())
526 Worklist.push(SubFG);
527 Visited.insert(CurFG);
528
529 const Function *SubgroupHead = CurFG->getHead();
530 if (Pred(SubgroupHead))
531 Collected.push_back(SubgroupHead);
532 }
533
534 return Collected;
535 }
536
537 // Constructs gen binary for provided function group \p FG.
538 static genx::BinaryDataAccumulator<const Function *>
getGenBinary(const FunctionGroup & FG,VISABuilder & VB,GenXBackendConfig const & BC)539 getGenBinary(const FunctionGroup &FG, VISABuilder &VB,
540 GenXBackendConfig const &BC) {
541 Function const *Kernel = FG.getHead();
542 genx::BinaryDataAccumulator<const Function *> GenBinary;
543 // load kernel
544 loadBinary(GenBinary, VB, *Kernel, BC);
545 return std::move(GenBinary);
546 }
547
appendGlobalVariableData(RawSectionInfo & Sect,GVEncodingInfo GVInfo,const DataLayout & DL)548 static void appendGlobalVariableData(RawSectionInfo &Sect,
549 GVEncodingInfo GVInfo,
550 const DataLayout &DL) {
551 std::vector<char> Data;
552 GenXOCLRuntimeInfo::RelocationSeq Relocations;
553 vc::encodeConstant(*GVInfo.GV->getInitializer(), DL, std::back_inserter(Data),
554 std::back_inserter(Relocations));
555
556 const auto CurrentGVAddress = Sect.Data.getFullSize();
557 const auto UnalignedNextGVAddress = CurrentGVAddress + Data.size();
558 const auto AlignedNextGVAddress =
559 alignTo(UnalignedNextGVAddress, GVInfo.NextGVAlignment);
560
561 // Pad before the next global.
562 std::fill_n(std::back_inserter(Data),
563 AlignedNextGVAddress - UnalignedNextGVAddress, 0);
564
565 // vc::encodeConstant calculates offsets relative to GV. Need to make it
566 // relative to section start.
567 vc::shiftRelocations(std::make_move_iterator(Relocations.begin()),
568 std::make_move_iterator(Relocations.end()),
569 std::back_inserter(Sect.Relocations), CurrentGVAddress);
570
571 Sect.Data.append(GVInfo.GV, Data.begin(), Data.end());
572 }
573
getAlignment(const GlobalVariable & GV)574 static unsigned getAlignment(const GlobalVariable &GV) {
575 unsigned Align = GV.getAlignment();
576 if (Align)
577 return Align;
578 return GV.getParent()->getDataLayout().getABITypeAlignment(GV.getValueType());
579 }
580
581 template <typename GlobalsRangeT>
582 std::vector<GVEncodingInfo>
prepareGlobalInfosForEncoding(GlobalsRangeT && Globals)583 prepareGlobalInfosForEncoding(GlobalsRangeT &&Globals) {
584 auto RealGlobals = make_filter_range(Globals, [](const GlobalVariable &GV) {
585 return genx::isRealGlobalVariable(GV);
586 });
587 if (RealGlobals.begin() == RealGlobals.end())
588 return {};
589 std::vector<GVEncodingInfo> Infos;
590 std::transform(RealGlobals.begin(), std::prev(RealGlobals.end()),
591 std::next(RealGlobals.begin()), std::back_inserter(Infos),
592 [](const GlobalVariable &GV, const GlobalVariable &NextGV) {
593 return GVEncodingInfo{&GV, getAlignment(NextGV)};
594 });
595 Infos.push_back({&*std::prev(RealGlobals.end()), 1u});
596 return std::move(Infos);
597 }
598
ModuleDataT(const Module & M)599 ModuleDataT::ModuleDataT(const Module &M) {
600 std::vector<GVEncodingInfo> GVInfos =
601 prepareGlobalInfosForEncoding(M.globals());
602 for (auto GVInfo : GVInfos) {
603 if (GVInfo.GV->isConstant())
604 appendGlobalVariableData(Constant, GVInfo, M.getDataLayout());
605 else
606 appendGlobalVariableData(Global, GVInfo, M.getDataLayout());
607 }
608 }
609
getModuleInfo(const Module & M)610 static GenXOCLRuntimeInfo::ModuleInfoT getModuleInfo(const Module &M) {
611 ModuleDataT ModuleData{M};
612 GenXOCLRuntimeInfo::ModuleInfoT ModuleInfo;
613
614 constructSymbols<vISA::GenSymType::S_GLOBAL_VAR_CONST>(
615 ModuleData.Constant.Data.begin(), ModuleData.Constant.Data.end(),
616 std::back_inserter(ModuleInfo.Constant.Symbols));
617 constructSymbols<vISA::GenSymType::S_GLOBAL_VAR>(
618 ModuleData.Global.Data.begin(), ModuleData.Global.Data.end(),
619 std::back_inserter(ModuleInfo.Global.Symbols));
620
621 ModuleInfo.Constant.Relocations = std::move(ModuleData.Constant.Relocations);
622 ModuleInfo.Global.Relocations = std::move(ModuleData.Global.Relocations);
623
624 ModuleInfo.Constant.Data.Buffer =
625 std::move(ModuleData.Constant.Data).emitConsolidatedData();
626 // IGC always sets 0
627 ModuleInfo.Constant.Data.Alignment = 0;
628 ModuleInfo.Constant.Data.AdditionalZeroedSpace = 0;
629
630 ModuleInfo.Global.Data.Buffer =
631 std::move(ModuleData.Global.Data).emitConsolidatedData();
632 ModuleInfo.Global.Data.Alignment = 0;
633 ModuleInfo.Global.Data.AdditionalZeroedSpace = 0;
634
635 return std::move(ModuleInfo);
636 }
637
638 namespace {
639
640 class RuntimeInfoCollector final {
641 const FunctionGroupAnalysis &FGA;
642 const GenXBackendConfig &BC;
643 VISABuilder &VB;
644 const GenXSubtarget &ST;
645 const Module &M;
646 const GenXDebugInfo &DBG;
647
648 public:
649 using KernelStorageTy = GenXOCLRuntimeInfo::KernelStorageTy;
650 using CompiledKernel = GenXOCLRuntimeInfo::CompiledKernel;
651 using CompiledModuleT = GenXOCLRuntimeInfo::CompiledModuleT;
652
653 public:
RuntimeInfoCollector(const FunctionGroupAnalysis & InFGA,const GenXBackendConfig & InBC,VISABuilder & InVB,const GenXSubtarget & InST,const Module & InM,const GenXDebugInfo & InDbg)654 RuntimeInfoCollector(const FunctionGroupAnalysis &InFGA,
655 const GenXBackendConfig &InBC, VISABuilder &InVB,
656 const GenXSubtarget &InST, const Module &InM,
657 const GenXDebugInfo &InDbg)
658 : FGA{InFGA}, BC{InBC}, VB{InVB}, ST{InST}, M{InM}, DBG{InDbg} {}
659
660 CompiledModuleT run();
661
662 private:
663 CompiledKernel collectFunctionGroupInfo(const FunctionGroup &FG) const;
664 CompiledKernel collectFunctionSubgroupsInfo(
665 const std::vector<FunctionGroup *> &Subgroups) const;
666 };
667
668 } // namespace
669
run()670 RuntimeInfoCollector::CompiledModuleT RuntimeInfoCollector::run() {
671 KernelStorageTy Kernels;
672 std::transform(FGA.begin(), FGA.end(), std::back_inserter(Kernels),
673 [this](const FunctionGroup *FG) {
674 return collectFunctionGroupInfo(*FG);
675 });
676 std::vector<FunctionGroup *> IndirectlyReferencedFuncs;
677 std::copy_if(FGA.subgroup_begin(), FGA.subgroup_end(),
678 std::back_inserter(IndirectlyReferencedFuncs),
679 [](const FunctionGroup *FG) {
680 return genx::isReferencedIndirectly(FG->getHead());
681 });
682 if (!IndirectlyReferencedFuncs.empty())
683 Kernels.push_back(collectFunctionSubgroupsInfo(IndirectlyReferencedFuncs));
684 return {getModuleInfo(M), std::move(Kernels),
685 M.getDataLayout().getPointerSize()};
686 }
687
688 RuntimeInfoCollector::CompiledKernel
collectFunctionGroupInfo(const FunctionGroup & FG) const689 RuntimeInfoCollector::collectFunctionGroupInfo(const FunctionGroup &FG) const {
690 using KernelInfo = GenXOCLRuntimeInfo::KernelInfo;
691 using GTPinInfo = GenXOCLRuntimeInfo::GTPinInfo;
692 using CompiledKernel = GenXOCLRuntimeInfo::CompiledKernel;
693
694 // Compiler info.
695 KernelInfo Info{FG, ST, BC};
696
697 const Function *KernelFunction = FG.getHead();
698 const std::string KernelName = KernelFunction->getName().str();
699 VISAKernel *VK = VB.GetVISAKernel(KernelName);
700 IGC_ASSERT_MESSAGE(VK, "Kernel is null");
701 FINALIZER_INFO *JitInfo = nullptr;
702 CISA_CALL(VK->GetJitInfo(JitInfo));
703 IGC_ASSERT_MESSAGE(JitInfo, "Jit info is not set by finalizer");
704 // TODO: this a temporary solution for spill mem size
705 // calculation. This has to be redesign properly, maybe w/ multiple
706 // KernelInfos or by introducing FunctionInfos
707 const auto StackCalls = collectCalledFunctions(
708 FG, [](const Function *F) { return genx::requiresStackCall(F); });
709 for (const Function *F : StackCalls) {
710 const std::string FuncName = F->getName().str();
711 VISAKernel *VF = VB.GetVISAKernel(FuncName);
712 IGC_ASSERT_MESSAGE(VF, "Function is null");
713 FINALIZER_INFO *FuncJitInfo = nullptr;
714 CISA_CALL(VF->GetJitInfo(FuncJitInfo));
715 IGC_ASSERT_MESSAGE(FuncJitInfo, "Func jit info is not set by finalizer");
716 JitInfo->isSpill |= FuncJitInfo->isSpill;
717 JitInfo->hasStackcalls |= FuncJitInfo->hasStackcalls;
718 JitInfo->spillMemUsed += FuncJitInfo->spillMemUsed;
719 }
720
721 genx::BinaryDataAccumulator<const Function *> GenBinary =
722 getGenBinary(FG, VB, BC);
723
724 const auto& Dbg = DBG.getModuleDebug();
725 auto DbgIt = Dbg.find(KernelFunction);
726 std::vector<char> DebugData;
727 if (DbgIt != std::end(Dbg)) {
728 const auto &ElfImage = DbgIt->second;
729 DebugData = {ElfImage.begin(), ElfImage.end()};
730 }
731 CISA_CALL(VK->GetRelocations(Info.Func.Relocations));
732 // Still have to duplicate function relocations because they are constructed
733 // inside Finalizer.
734 CISA_CALL(VK->GetGenRelocEntryBuffer(Info.LegacyFuncRelocations.Buffer,
735 Info.LegacyFuncRelocations.Size,
736 Info.LegacyFuncRelocations.Entries));
737 Info.Func.Symbols = constructFunctionSymbols(GenBinary, /*HasKernel=*/true);
738
739 void *GTPinBuffer = nullptr;
740 unsigned GTPinBufferSize = 0;
741 CISA_CALL(VK->GetGTPinBuffer(GTPinBuffer, GTPinBufferSize));
742
743 auto *GTPinBytes = static_cast<char *>(GTPinBuffer);
744 GTPinInfo gtpin{GTPinBytes, GTPinBytes + GTPinBufferSize};
745
746 Info.Func.Data.Buffer = std::move(GenBinary).emitConsolidatedData();
747 return CompiledKernel{std::move(Info), *JitInfo, std::move(gtpin),
748 std::move(DebugData)};
749 }
750
751 RuntimeInfoCollector::CompiledKernel
collectFunctionSubgroupsInfo(const std::vector<FunctionGroup * > & Subgroups) const752 RuntimeInfoCollector::collectFunctionSubgroupsInfo(
753 const std::vector<FunctionGroup *> &Subgroups) const {
754 using KernelInfo = GenXOCLRuntimeInfo::KernelInfo;
755 using CompiledKernel = GenXOCLRuntimeInfo::CompiledKernel;
756
757 IGC_ASSERT(!Subgroups.empty());
758 KernelInfo Info{ST};
759
760 genx::BinaryDataAccumulator<const Function *> GenBinary;
761 for (auto *FG : Subgroups) {
762 auto *Func = FG->getHead();
763 IGC_ASSERT(genx::fg::isSubGroupHead(*Func));
764 loadBinary(GenBinary, VB, *Func, BC);
765 }
766 Info.Func.Symbols = constructFunctionSymbols(GenBinary, /*HasKernel*/false);
767 Info.Func.Data.Buffer = GenBinary.emitConsolidatedData();
768
769 return CompiledKernel{std::move(Info), FINALIZER_INFO{}, /*GtpinInfo*/ {},
770 /*DebugInfo*/ {}};
771 }
772
getAnalysisUsage(AnalysisUsage & AU) const773 void GenXOCLRuntimeInfo::getAnalysisUsage(AnalysisUsage &AU) const {
774 AU.addRequired<FunctionGroupAnalysis>();
775 AU.addRequired<GenXBackendConfig>();
776 AU.addRequired<GenXModule>();
777 AU.addRequired<GenXDebugInfo>();
778 AU.addRequired<TargetPassConfig>();
779 AU.setPreservesAll();
780 }
781
runOnModule(Module & M)782 bool GenXOCLRuntimeInfo::runOnModule(Module &M) {
783 const auto &FGA = getAnalysis<FunctionGroupAnalysis>();
784 const auto &BC = getAnalysis<GenXBackendConfig>();
785 // Getters for builders are not constant.
786 auto &GM = getAnalysis<GenXModule>();
787 const auto &ST = getAnalysis<TargetPassConfig>()
788 .getTM<GenXTargetMachine>()
789 .getGenXSubtarget();
790 const auto &DBG = getAnalysis<GenXDebugInfo>();
791
792 VISABuilder &VB =
793 *(GM.HasInlineAsm() ? GM.GetVISAAsmReader() : GM.GetCisaBuilder());
794
795 CompiledModule = RuntimeInfoCollector{FGA, BC, VB, ST, M, DBG}.run();
796 return false;
797 }
798
print(raw_ostream & OS,const Module * M) const799 void GenXOCLRuntimeInfo::print(raw_ostream &OS, const Module *M) const {
800 vc::printOCLRuntimeInfo(OS, CompiledModule);
801 }
802
803 INITIALIZE_PASS_BEGIN(GenXOCLRuntimeInfo, "GenXOCLRuntimeInfo",
804 "GenXOCLRuntimeInfo", false, true)
805 INITIALIZE_PASS_DEPENDENCY(FunctionGroupAnalysis);
806 INITIALIZE_PASS_DEPENDENCY(GenXBackendConfig);
807 INITIALIZE_PASS_DEPENDENCY(GenXModule);
808 INITIALIZE_PASS_DEPENDENCY(GenXDebugInfo);
809 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig);
810 INITIALIZE_PASS_END(GenXOCLRuntimeInfo, "GenXOCLRuntimeInfo",
811 "GenXOCLRuntimeInfo", false, true)
812