1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "zebin_builder.hpp"
10 
11 #include "../../../Compiler/CodeGenPublic.h"
12 
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/MC/MCELFObjectWriter.h"
16 #include "common/LLVMWarningsPop.hpp"
17 #include "Probe/Assertion.h"
18 
19 using namespace IGC;
20 using namespace iOpenCL;
21 using namespace zebin;
22 using namespace CLElfLib;   // ElfReader related typedefs
23 using namespace llvm;
24 
ZEBinaryBuilder(const PLATFORM plat,bool is64BitPointer,const IGC::SOpenCLProgramInfo & programInfo,const uint8_t * spvData,uint32_t spvSize)25 ZEBinaryBuilder::ZEBinaryBuilder(
26     const PLATFORM plat, bool is64BitPointer, const IGC::SOpenCLProgramInfo& programInfo,
27     const uint8_t* spvData, uint32_t spvSize)
28     : mPlatform(plat), mBuilder(is64BitPointer)
29 {
30     G6HWC::InitializeCapsGen8(&mHWCaps);
31 
32     // FIXME: Most fields leaves as 0
33     TargetMetadata metadata;
34     metadata.generatorSpecificFlags = TargetMetadata::GeneratorSpecificFlags::NONE;
35     metadata.minHwRevisionId = plat.usRevId;
36     metadata.maxHwRevisionId = plat.usRevId;
37     metadata.generatorId = TargetMetadata::GeneratorId::IGC;
38     mBuilder.setTargetMetadata(metadata);
39 
40     addProgramScopeInfo(programInfo);
41 
42     if (spvData != nullptr)
43         addSPIRV(spvData, spvSize);
44 }
45 
setProductFamily(PRODUCT_FAMILY value)46 void ZEBinaryBuilder::setProductFamily(PRODUCT_FAMILY value)
47 {
48     mBuilder.setProductFamily(value);
49 }
50 
setGfxCoreFamily(GFXCORE_FAMILY value)51 void ZEBinaryBuilder::setGfxCoreFamily(GFXCORE_FAMILY value)
52 {
53     mBuilder.setGfxCoreFamily(value);
54 }
55 
createKernel(const char * rawIsaBinary,unsigned int rawIsaBinarySize,const SOpenCLKernelInfo & annotations,const uint32_t grfSize,const CBTILayout & layout,const std::string & visaasm,bool isProgramDebuggable)56 void ZEBinaryBuilder::createKernel(
57     const char*  rawIsaBinary,
58     unsigned int rawIsaBinarySize,
59     const SOpenCLKernelInfo& annotations,
60     const uint32_t grfSize,
61     const CBTILayout& layout,
62     const std::string& visaasm,
63     bool isProgramDebuggable)
64 {
65     ZEELFObjectBuilder::SectionID textID =
66         addKernelBinary(annotations.m_kernelName, rawIsaBinary, rawIsaBinarySize);
67     addKernelSymbols(textID, annotations);
68     addKernelRelocations(textID, annotations);
69 
70     zeInfoKernel& zeKernel = mZEInfoBuilder.createKernel(annotations.m_kernelName);
71     addKernelExecEnv(annotations, zeKernel);
72     addKernelExperimentalProperties(annotations, zeKernel);
73     if (annotations.m_threadPayload.HasLocalIDx ||
74         annotations.m_threadPayload.HasLocalIDy ||
75         annotations.m_threadPayload.HasLocalIDz) {
76         addLocalIds(annotations.m_executionEnivronment.CompiledSIMDSize,
77             grfSize,
78             annotations.m_threadPayload.HasLocalIDx,
79             annotations.m_threadPayload.HasLocalIDy,
80             annotations.m_threadPayload.HasLocalIDz,
81             zeKernel);
82     }
83     addPayloadArgsAndBTI(annotations, zeKernel);
84     addMemoryBuffer(annotations, zeKernel);
85     addGTPinInfo(annotations);
86     if (!visaasm.empty())
87         addKernelVISAAsm(annotations.m_kernelName, visaasm);
88     if (isProgramDebuggable)
89         addKernelDebugEnv(annotations, layout, zeKernel);
90 }
91 
addGTPinInfo(const IGC::SOpenCLKernelInfo & annotations)92 void ZEBinaryBuilder::addGTPinInfo(const IGC::SOpenCLKernelInfo& annotations)
93 {
94     const IGC::SKernelProgram* program = &(annotations.m_kernelProgram);
95     uint8_t* buffer = nullptr;
96     uint32_t size = 0;
97     switch (annotations.m_executionEnivronment.CompiledSIMDSize) {
98     case 1:
99         buffer = (uint8_t*)program->simd1.m_gtpinBuffer;
100         size = program->simd1.m_gtpinBufferSize;
101         break;
102     case 8:
103         buffer = (uint8_t*)program->simd8.m_gtpinBuffer;
104         size = program->simd8.m_gtpinBufferSize;
105         break;
106     case 16:
107         buffer = (uint8_t*)program->simd16.m_gtpinBuffer;
108         size = program->simd16.m_gtpinBufferSize;
109         break;
110     case 32:
111         buffer = (uint8_t*)program->simd32.m_gtpinBuffer;
112         size = program->simd32.m_gtpinBufferSize;
113         break;
114     }
115 
116     if (buffer != nullptr && size)
117         mBuilder.addSectionGTPinInfo(annotations.m_kernelName, buffer, size);
118 }
119 
addProgramScopeInfo(const IGC::SOpenCLProgramInfo & programInfo)120 void ZEBinaryBuilder::addProgramScopeInfo(const IGC::SOpenCLProgramInfo& programInfo)
121 {
122     addGlobalConstants(programInfo);
123     addGlobals(programInfo);
124     addProgramSymbols(programInfo);
125     addProgramRelocations(programInfo);
126 }
127 
addGlobalConstants(const IGC::SOpenCLProgramInfo & annotations)128 void ZEBinaryBuilder::addGlobalConstants(const IGC::SOpenCLProgramInfo& annotations)
129 {
130     // General constants: .data.const and .bss.const
131     // create a data section for global constant variables
132     if (annotations.m_initConstantAnnotation && annotations.m_initConstantAnnotation->AllocSize) {
133         auto& ca = annotations.m_initConstantAnnotation;
134         // the normal .data.const size
135         uint32_t dataSize = ca->InlineData.size();
136         // the zero-initialize variables size, the .bss.const size
137         uint32_t bssSize = ca->AllocSize - dataSize;
138         uint32_t alignment = ca->Alignment;
139 
140         if (IGC_IS_FLAG_ENABLED(AllocateZeroInitializedVarsInBss)) {
141             zebin::ZEELFObjectBuilder::SectionID normal_id = -1, bss_id = -1;
142             if (dataSize) {
143                 // if the bss section existed, we leave the alignment in bss section.
144                 // that in our design the entire global buffer is the size of normal section (.const) plus bss section
145                 // we do not want to add the alignment twice on the both sections
146                 // Alos set the padding size to 0 that we always put the padding into bss section
147                 uint32_t normal_alignment = bssSize ? 0 : alignment;
148                 normal_id = mBuilder.addSectionData("const", (const uint8_t*)ca->InlineData.data(),
149                     dataSize, 0, normal_alignment);
150             }
151             if (bssSize) {
152                 bss_id = mBuilder.addSectionBss("const", bssSize, alignment);
153             }
154 
155             // set mGlobalConstSectID to normal_id if existed, and bss_id if not.
156             // mGlobalConstSectID will be used for symbol section reference. We always refer to normal_id section
157             // even if the the symbol is defeind in bss section when normal_id section exists
158             mGlobalConstSectID = dataSize ? normal_id : bss_id;
159         } else {
160             // before runtime can support bss section, we create all 0s in .const.data section by adding
161             // bssSize of padding
162             mGlobalConstSectID = mBuilder.addSectionData("const", (const uint8_t*)ca->InlineData.data(),
163                 dataSize, bssSize, alignment);
164         }
165     }
166 
167     // String literals for printf: .data.const.string
168     if (annotations.m_initConstantStringAnnotation &&
169         annotations.m_initConstantStringAnnotation->AllocSize) {
170         auto& caString = annotations.m_initConstantStringAnnotation;
171         uint32_t dataSize = caString->InlineData.size();
172         uint32_t paddingSize = caString->AllocSize - dataSize;
173         uint32_t alignment = caString->Alignment;
174         mConstStringSectID = mBuilder.addSectionData("const.string", (const uint8_t*)caString->InlineData.data(),
175             dataSize, paddingSize, alignment);
176     }
177 }
178 
addGlobals(const IGC::SOpenCLProgramInfo & annotations)179 void ZEBinaryBuilder::addGlobals(const IGC::SOpenCLProgramInfo& annotations)
180 {
181     if (annotations.m_initGlobalAnnotation == nullptr)
182         return;
183 
184     // create a data section for global variables
185     auto& ca = annotations.m_initGlobalAnnotation;
186 
187     if (!ca->AllocSize)
188         return;
189 
190     uint32_t dataSize = ca->InlineData.size();
191     uint32_t bssSize = ca->AllocSize - dataSize;
192     uint32_t alignment = ca->Alignment;
193 
194     if (IGC_IS_FLAG_ENABLED(AllocateZeroInitializedVarsInBss)) {
195         // The .bss.global section size is the bssSize (ca->AllocSize - ca->InlineData.size()),
196         // and the normal .data.global size is dataSize (ca->InlineData.size())
197         zebin::ZEELFObjectBuilder::SectionID normal_id = -1, bss_id = -1;
198         if (dataSize) {
199             uint32_t normal_alignment = bssSize ? 0 : alignment;
200             normal_id = mBuilder.addSectionData("global", (const uint8_t*)ca->InlineData.data(),
201                 dataSize, 0, normal_alignment);
202         }
203         if (bssSize) {
204             bss_id = mBuilder.addSectionBss("global", bssSize, alignment);
205         }
206         // mGlobalSectID is the section id that will be referenced by global symbols.
207         // It should be .data.global if existed. If there's only .bss.global section, then all global
208         // symbols reference to .bss.global section, so set the mGlobalConstSectID to it
209         mGlobalSectID = dataSize ? normal_id : bss_id;
210     } else {
211         // before runtime can support bss section, we create all 0s in .global.data section by adding
212         // bssSize of padding
213         mGlobalSectID = mBuilder.addSectionData("global", (const uint8_t*)ca->InlineData.data(),
214             dataSize, bssSize, alignment);
215     }
216 }
217 
addSPIRV(const uint8_t * data,uint32_t size)218 void ZEBinaryBuilder::addSPIRV(const uint8_t* data, uint32_t size)
219 {
220     mBuilder.addSectionSpirv("", data, size);
221 }
222 
addKernelBinary(const std::string & kernelName,const char * kernelBinary,unsigned int kernelBinarySize)223 ZEELFObjectBuilder::SectionID ZEBinaryBuilder::addKernelBinary(const std::string& kernelName,
224     const char* kernelBinary, unsigned int kernelBinarySize)
225 {
226     return mBuilder.addSectionText(kernelName, (const uint8_t*)kernelBinary,
227         kernelBinarySize, mHWCaps.InstructionCachePrefetchSize, sizeof(DWORD));
228 }
229 
addPayloadArgsAndBTI(const SOpenCLKernelInfo & annotations,zeInfoKernel & zeinfoKernel)230 void ZEBinaryBuilder::addPayloadArgsAndBTI(
231     const SOpenCLKernelInfo& annotations,
232     zeInfoKernel& zeinfoKernel)
233 {
234     // copy the payload arguments into zeinfoKernel
235     zeinfoKernel.payload_arguments.insert(
236         zeinfoKernel.payload_arguments.end(),
237         annotations.m_zePayloadArgs.begin(),
238         annotations.m_zePayloadArgs.end());
239 
240     // copy the bit table into zeinfoKernel
241     zeinfoKernel.binding_table_indices.insert(
242         zeinfoKernel.binding_table_indices.end(),
243         annotations.m_zeBTIArgs.begin(),
244         annotations.m_zeBTIArgs.end());
245 }
246 
addMemoryBuffer(const IGC::SOpenCLKernelInfo & annotations,zebin::zeInfoKernel & zeinfoKernel)247 void ZEBinaryBuilder::addMemoryBuffer(
248     const IGC::SOpenCLKernelInfo& annotations,
249     zebin::zeInfoKernel& zeinfoKernel)
250 {
251     // scracth0 is either
252     //  - contains privates and both igc and vISA stack, or
253     //  - contains only vISA stack
254     uint32_t scratch0 =
255         annotations.m_executionEnivronment.PerThreadScratchSpace;
256     // scratch1 is privates on stack
257     uint32_t scratch1 =
258         annotations.m_executionEnivronment.PerThreadScratchSpaceSlot1;
259     // private_on_global: privates and IGC stack on stateless
260     uint32_t private_on_global =
261         annotations.m_executionEnivronment.PerThreadPrivateOnStatelessSize;
262 
263     //  single scratch space have everything
264     if (scratch0 && !scratch1 && !private_on_global) {
265         ZEInfoBuilder::addScratchPerThreadMemoryBuffer(zeinfoKernel.per_thread_memory_buffers,
266             PreDefinedAttrGetter::MemBufferUsage::single_space,
267             0,
268             scratch0
269         );
270         return;
271     }
272 
273     if (scratch0)
274         ZEInfoBuilder::addScratchPerThreadMemoryBuffer(zeinfoKernel.per_thread_memory_buffers,
275             PreDefinedAttrGetter::MemBufferUsage::spill_fill_space,
276             0,
277             scratch0);
278     if (scratch1)
279         ZEInfoBuilder::addScratchPerThreadMemoryBuffer(zeinfoKernel.per_thread_memory_buffers,
280             PreDefinedAttrGetter::MemBufferUsage::private_space,
281             1,
282             scratch1);
283     if (private_on_global) {
284         ZEInfoBuilder::addPerSIMTThreadGlobalMemoryBuffer(zeinfoKernel.per_thread_memory_buffers,
285             PreDefinedAttrGetter::MemBufferUsage::private_space,
286             private_on_global);
287         // FIXME: IGC currently generate global buffer with size assume to be per-simt-thread
288         // ZEInfoBuilder::addPerThreadMemoryBuffer(zeinfoKernel.per_thread_memory_buffers,
289         //    PreDefinedAttrGetter::MemBufferType::global,
290         //    PreDefinedAttrGetter::MemBufferUsage::private_space,
291         //    private_on_global);
292     }
293 }
294 
getSymbolElfType(const vISA::ZESymEntry & sym)295 uint8_t ZEBinaryBuilder::getSymbolElfType(const vISA::ZESymEntry& sym)
296 {
297     switch (sym.s_type) {
298     case vISA::GenSymType::S_NOTYPE:
299         return llvm::ELF::STT_NOTYPE;
300 
301     case vISA::GenSymType::S_UNDEF:
302         return llvm::ELF::STT_NOTYPE;
303 
304     case vISA::GenSymType::S_FUNC:
305     case vISA::GenSymType::S_KERNEL:
306         return llvm::ELF::STT_FUNC;
307 
308     case vISA::GenSymType::S_GLOBAL_VAR:
309     case vISA::GenSymType::S_GLOBAL_VAR_CONST:
310     case vISA::GenSymType::S_CONST_SAMPLER:
311         return llvm::ELF::STT_OBJECT;
312     default:
313         break;
314     }
315     return llvm::ELF::STT_NOTYPE;
316 }
317 
getSymbolElfBinding(const vISA::ZESymEntry & sym)318 uint8_t ZEBinaryBuilder::getSymbolElfBinding(const vISA::ZESymEntry& sym)
319 {
320     // all symbols we have now that could be exposed must have
321     // global binding
322     switch (sym.s_type) {
323     case vISA::GenSymType::S_KERNEL:
324         return llvm::ELF::STB_LOCAL;
325 
326     case vISA::GenSymType::S_NOTYPE:
327     case vISA::GenSymType::S_UNDEF:
328     case vISA::GenSymType::S_FUNC:
329     case vISA::GenSymType::S_GLOBAL_VAR:
330     case vISA::GenSymType::S_GLOBAL_VAR_CONST:
331     case vISA::GenSymType::S_CONST_SAMPLER:
332         return llvm::ELF::STB_GLOBAL;
333     default:
334         break;
335     }
336     IGC_ASSERT(0);
337     return llvm::ELF::STB_GLOBAL;
338 }
339 
addSymbol(const vISA::ZESymEntry & sym,ZEELFObjectBuilder::SectionID targetSect)340 void ZEBinaryBuilder::addSymbol(const vISA::ZESymEntry& sym, ZEELFObjectBuilder::SectionID targetSect)
341 {
342     mBuilder.addSymbol(sym.s_name, sym.s_offset, sym.s_size,
343         getSymbolElfBinding(sym), getSymbolElfType(sym),
344         (sym.s_type == vISA::GenSymType::S_UNDEF) ? -1 : targetSect);
345 }
346 
addProgramSymbols(const IGC::SOpenCLProgramInfo & annotations)347 void ZEBinaryBuilder::addProgramSymbols(const IGC::SOpenCLProgramInfo& annotations)
348 {
349     const IGC::SOpenCLProgramInfo::ZEBinProgramSymbolTable& symbols = annotations.m_zebinSymbolTable;
350 
351     // add symbols defined in global constant section
352     IGC_ASSERT(symbols.globalConst.empty() || mGlobalConstSectID != -1);
353     for (auto sym : symbols.globalConst)
354         addSymbol(sym, mGlobalConstSectID);
355 
356     // add symbols defined in global string constant section
357     IGC_ASSERT(symbols.globalStringConst.empty() || mConstStringSectID != -1);
358     for (auto sym : symbols.globalStringConst)
359         addSymbol(sym, mConstStringSectID);
360 
361     // add symbols defined in global section
362     IGC_ASSERT(symbols.global.empty() || mGlobalSectID != -1);
363     for (auto sym : symbols.global)
364         addSymbol(sym, mGlobalSectID);
365 
366 }
367 
addKernelSymbols(ZEELFObjectBuilder::SectionID kernelSectId,const IGC::SOpenCLKernelInfo & annotations)368 void ZEBinaryBuilder::addKernelSymbols(
369     ZEELFObjectBuilder::SectionID kernelSectId,
370     const IGC::SOpenCLKernelInfo& annotations)
371 {
372     // get symbol list from the current process SKernelProgram
373     auto symbols = [](int simdSize, const IGC::SKernelProgram& program) {
374         if (simdSize == 8)
375             return program.simd8.m_symbols;
376         else if (simdSize == 16)
377             return program.simd16.m_symbols;
378         else if (simdSize == 32)
379             return program.simd32.m_symbols;
380         else
381             return program.simd1.m_symbols;
382     } (annotations.m_executionEnivronment.CompiledSIMDSize,
383         annotations.m_kernelProgram);
384 
385     // add local symbols of this kernel binary
386     for (auto sym : symbols.local) {
387         IGC_ASSERT(sym.s_type != vISA::GenSymType::S_UNDEF);
388         addSymbol(sym, kernelSectId);
389     }
390 
391     // add function symbols defined in kernel text
392     for (auto sym : symbols.function)
393         addSymbol(sym, kernelSectId);
394 
395     // we do not support sampler symbols now
396     IGC_ASSERT(symbols.sampler.empty());
397 }
398 
addProgramRelocations(const IGC::SOpenCLProgramInfo & annotations)399 void ZEBinaryBuilder::addProgramRelocations(const IGC::SOpenCLProgramInfo& annotations)
400 {
401     const IGC::SOpenCLProgramInfo::ZEBinRelocTable& relocs = annotations.m_GlobalPointerAddressRelocAnnotation;
402 
403     // FIXME: For r_type, zebin::R_TYPE_ZEBIN should have the same enum value as visa::GenRelocType.
404     // Take the value directly
405     IGC_ASSERT(relocs.globalConstReloc.empty() || mGlobalConstSectID != -1);
406     for (auto reloc : relocs.globalConstReloc)
407         mBuilder.addRelRelocation(reloc.r_offset, reloc.r_symbol, static_cast<zebin::R_TYPE_ZEBIN>(reloc.r_type), mGlobalConstSectID);
408 
409     IGC_ASSERT(relocs.globalReloc.empty() || mGlobalSectID != -1);
410     for (auto reloc : relocs.globalReloc)
411         mBuilder.addRelRelocation(reloc.r_offset, reloc.r_symbol, static_cast<zebin::R_TYPE_ZEBIN>(reloc.r_type), mGlobalSectID);
412 }
413 
addKernelRelocations(ZEELFObjectBuilder::SectionID targetId,const IGC::SOpenCLKernelInfo & annotations)414 void ZEBinaryBuilder::addKernelRelocations(
415     ZEELFObjectBuilder::SectionID targetId,
416     const IGC::SOpenCLKernelInfo& annotations)
417 {
418     // get relocation list from the current process SKernelProgram
419     auto relocs = [](int simdSize, const IGC::SKernelProgram& program) {
420         if (simdSize == 8)
421             return program.simd8.m_relocs;
422         else if (simdSize == 16)
423             return program.simd16.m_relocs;
424         else if (simdSize == 32)
425             return program.simd32.m_relocs;
426         else
427             return program.simd1.m_relocs;
428     } (annotations.m_executionEnivronment.CompiledSIMDSize, annotations.m_kernelProgram);
429 
430     // FIXME: For r_type, zebin::R_TYPE_ZEBIN should have the same enum value as visa::GenRelocType.
431     // Take the value directly
432     if (!relocs.empty())
433         for (auto reloc : relocs)
434             mBuilder.addRelRelocation(reloc.r_offset, reloc.r_symbol, (zebin::R_TYPE_ZEBIN)reloc.r_type, targetId);
435 }
436 
addKernelExperimentalProperties(const SOpenCLKernelInfo & annotations,zeInfoKernel & zeinfoKernel)437 void ZEBinaryBuilder::addKernelExperimentalProperties(const SOpenCLKernelInfo& annotations,
438     zeInfoKernel& zeinfoKernel)
439 {
440     // Write to zeinfoKernel only when the attribute is enabled
441     if (IGC_IS_FLAG_ENABLED(DumpHasNonKernelArgLdSt)) {
442         ZEInfoBuilder::addExpPropertiesHasNonKernelArgLdSt(zeinfoKernel,
443             annotations.m_hasNonKernelArgLoad,
444             annotations.m_hasNonKernelArgStore,
445             annotations.m_hasNonKernelArgAtomic);
446     }
447 }
448 
addKernelExecEnv(const SOpenCLKernelInfo & annotations,zeInfoKernel & zeinfoKernel)449 void ZEBinaryBuilder::addKernelExecEnv(const SOpenCLKernelInfo& annotations,
450     zeInfoKernel& zeinfoKernel)
451 {
452     zeInfoExecutionEnv& env = zeinfoKernel.execution_env;
453 
454     env.barrier_count = annotations.m_executionEnivronment.HasBarriers;
455     env.disable_mid_thread_preemption = annotations.m_executionEnivronment.DisableMidThreadPreemption;
456     env.grf_count = annotations.m_executionEnivronment.NumGRFRequired;
457     env.has_4gb_buffers = annotations.m_executionEnivronment.CompiledForGreaterThan4GBBuffers;
458     env.has_device_enqueue = annotations.m_executionEnivronment.HasDeviceEnqueue;
459     env.has_fence_for_image_access = annotations.m_executionEnivronment.HasReadWriteImages;
460     env.has_global_atomics = annotations.m_executionEnivronment.HasGlobalAtomics;
461     env.has_stack_calls = annotations.m_executionEnivronment.HasStackCalls;
462     env.inline_data_payload_size = annotations.m_threadPayload.PassInlineDataSize;
463     env.offset_to_skip_per_thread_data_load = annotations.m_threadPayload.OffsetToSkipPerThreadDataLoad;;
464     env.offset_to_skip_set_ffid_gp = annotations.m_threadPayload.OffsetToSkipSetFFIDGP;;
465     env.required_sub_group_size = annotations.m_executionEnivronment.CompiledSubGroupsNumber;
466     if(annotations.m_executionEnivronment.HasFixedWorkGroupSize)
467     {
468         env.required_work_group_size.push_back(annotations.m_executionEnivronment.FixedWorkgroupSize[0]);
469         env.required_work_group_size.push_back(annotations.m_executionEnivronment.FixedWorkgroupSize[1]);
470         env.required_work_group_size.push_back(annotations.m_executionEnivronment.FixedWorkgroupSize[2]);
471     }
472     env.simd_size = annotations.m_executionEnivronment.CompiledSIMDSize;
473     // set slm size to inline local size
474     env.slm_size = annotations.m_executionEnivronment.SumFixedTGSMSizes ;
475     env.subgroup_independent_forward_progress = annotations.m_executionEnivronment.SubgroupIndependentForwardProgressRequired;
476     if (annotations.m_executionEnivronment.WorkgroupWalkOrder[0] ||
477         annotations.m_executionEnivronment.WorkgroupWalkOrder[1] ||
478         annotations.m_executionEnivronment.WorkgroupWalkOrder[2]) {
479         env.work_group_walk_order_dimensions.push_back(annotations.m_executionEnivronment.WorkgroupWalkOrder[0]);
480         env.work_group_walk_order_dimensions.push_back(annotations.m_executionEnivronment.WorkgroupWalkOrder[1]);
481         env.work_group_walk_order_dimensions.push_back(annotations.m_executionEnivronment.WorkgroupWalkOrder[2]);
482     }
483 }
484 
addLocalIds(uint32_t simdSize,uint32_t grfSize,bool has_local_id_x,bool has_local_id_y,bool has_local_id_z,zebin::zeInfoKernel & zeinfoKernel)485 void ZEBinaryBuilder::addLocalIds(uint32_t simdSize, uint32_t grfSize,
486     bool has_local_id_x, bool has_local_id_y, bool has_local_id_z,
487     zebin::zeInfoKernel& zeinfoKernel)
488 {
489     // simdSize 1 is CM kernel, using arg_type::packed_local_ids format
490     if (simdSize == 1) {
491         // Currently there's only one kind of per-thread argument, hard-coded the
492         // offset to 0 and for packed_local_ids, its size is 6 bytes (int16*3) always
493         mZEInfoBuilder.addPerThreadPayloadArgument(
494             zeinfoKernel.per_thread_payload_arguments,
495             PreDefinedAttrGetter::ArgType::packed_local_ids, 0, 6);
496         return;
497     }
498     // otherwise, using arg_type::local_id format
499     IGC_ASSERT(simdSize);
500     IGC_ASSERT(grfSize);
501     // each id takes 2 bytes
502     int32_t per_id_size = 2 * simdSize;
503     // byte size for one id have to be grf align
504     per_id_size = (per_id_size % grfSize) == 0 ?
505         per_id_size : ((per_id_size / grfSize) + 1) * grfSize;
506     // total_size = num_of_ids * per_id_size
507     int32_t total_size = per_id_size * ((has_local_id_x ? 1 : 0) +
508         (has_local_id_y ? 1 : 0) + (has_local_id_z ? 1 : 0));
509     mZEInfoBuilder.addPerThreadPayloadArgument(
510         zeinfoKernel.per_thread_payload_arguments,
511         PreDefinedAttrGetter::ArgType::local_id, 0, total_size);
512 }
513 
514 // Calculate correct (pure) size of ELF binary, because debugDataSize taken from pOutput->m_debugDataVISASize
515 // contains something else.
516 // If ELF is validated successfully then return a calculated size. Othwerwise, return 0.
calcElfSize(void * elfBin,size_t elfSize)517 size_t ZEBinaryBuilder::calcElfSize(void* elfBin, size_t elfSize)
518 {
519     SElf64Header* elf64Header = (SElf64Header*)elfBin;
520     size_t elfBinSize = 0; // Correct (pure) size of ELF binary to be calculated
521 
522     if (elfSize == 0)
523     {
524         IGC_ASSERT_MESSAGE(false, "Empty ELF file - nothing to be transfered to zeBinary");
525         return 0; // ELF binary incorrect
526     }
527 
528     if ((elfSize < ID_IDX_NUM_BYTES) ||
529         (elf64Header->Identity[ID_IDX_MAGIC0] != ELF_MAG0) || (elf64Header->Identity[ID_IDX_MAGIC1] != ELF_MAG1) ||
530         (elf64Header->Identity[ID_IDX_MAGIC2] != ELF_MAG2) || (elf64Header->Identity[ID_IDX_MAGIC3] != ELF_MAG3) ||
531         (elf64Header->Identity[ID_IDX_CLASS] != EH_CLASS_64))
532     {
533         IGC_ASSERT_MESSAGE(false, "ELF file header incorrect - nothing to be transfered to zeBinary");
534         return 0; // ELF binary incorrect
535     }
536 
537     size_t idxSectionHdrOffset = 0; // Indexed section header offset
538     SElf64SectionHeader* sectionHeader = NULL;
539 
540     // Calculate correct (pure) size of ELF binary, because debugDataSize i.e. pOutput->m_debugDataVISASize
541     // contains something else.
542     elfBinSize += elf64Header->ElfHeaderSize;
543 
544     // ELF binary scanning to calculate a size of elf binary w/o alignment and additional data overhead.
545     for (unsigned int i = 0; i < elf64Header->NumSectionHeaderEntries; i++)
546     {
547         idxSectionHdrOffset = (size_t)elf64Header->SectionHeadersOffset + (i * elf64Header->SectionHeaderEntrySize);
548         sectionHeader = (SElf64SectionHeader*)((char*)elf64Header + idxSectionHdrOffset);
549 
550         // Tally up the sizes
551         elfBinSize += (size_t)sectionHeader->DataSize;
552         elfBinSize += (size_t)elf64Header->SectionHeaderEntrySize;
553     }
554 
555     return elfBinSize;
556 }
557 
558 // Finds a symbol name in ELF binary and returns a symbol entry
559 // that will later be transformed to ZE binary format
getElfSymbol(CElfReader * elfReader,const unsigned int symtabIdx,ELF::Elf64_Sym & symtabEntry,char * & symName)560 void ZEBinaryBuilder::getElfSymbol(CElfReader* elfReader, const unsigned int symtabIdx, ELF::Elf64_Sym &symtabEntry,
561     char* &symName)
562 {
563     IGC_ASSERT_MESSAGE(elfReader->GetElfHeader()->SectionHeaderEntrySize == 64, "ELF entry size 64 supported only");
564 
565     // To find a symbol name for example for relocation first we have to do
566     // a lookup into .symtab (to find an index of the string in the .strtab)
567     // then we have to find this name in .strtab.
568 
569     // Get data of .symtab and .strtab sections in ELF binary.
570     char* symtabData = NULL;
571     size_t symtabDataSize = 0;
572     elfReader->GetSectionData(".symtab", symtabData, symtabDataSize);
573     char* strtabData = NULL;
574     size_t strtabDataSize = 0;
575     elfReader->GetSectionData(".strtab", strtabData, strtabDataSize);
576     if (strtabDataSize <= 1)
577         elfReader->GetSectionData(".shstrtab", strtabData, strtabDataSize);
578 
579     if (!symtabData || !strtabData)
580     {
581         return;
582     }
583 
584     // Perform lookup into .symtab.
585     unsigned int symtabEntrySize = sizeof(llvm::ELF::Elf64_Sym);
586     symtabEntry = *(llvm::ELF::Elf64_Sym*)(symtabData + symtabIdx * symtabEntrySize);
587 
588     // Then find the name in .strtab (String Table), where data may look as showed below:
589     //  .debug_abbrev .text.stackcall .debug_ranges .debug_str .debug_info
590     // ^NULL         ^NULL           ^NULL         ^NULL      ^NULL       ^NULL
591     //
592     // Each symtab entry contains 'st_shndx' filed, which is an index of a name (not a byte offset)
593     // located in the String Table. To find for example a symbol name indexed as 3, the 3rd NULL
594     // character must be found in the String Table, which is followed by the name of this symbol
595     // ('.debug_ranges' in the example above).
596 
597     unsigned int ndx = symtabEntry.st_shndx; // No. of NULL characters to be skipped in .strtab
598     while (ndx--)              // Iterate thru names/strings from the beginning of .strtab data
599     {
600         while (*strtabData++); // Find \0 terminator at the end of a given name
601         strtabData++;          // Move a pointer to the first character of the next name
602     }
603     strtabData--;              // When a symbol name found, location of the \0 terminator is returned
604                                // (not location of a name following this)
605     symName = strtabData;
606 }
607 
608 // Copy every section of ELF file (a buffer in memory) to zeBinary
addElfSections(void * elfBin,size_t elfSize)609 void ZEBinaryBuilder::addElfSections(void* elfBin, size_t elfSize)
610 {
611     // Correct (pure) size of ELF binary to be calculated
612     size_t pureElfBinSize = calcElfSize(elfBin, elfSize);
613     if (!pureElfBinSize)
614     {
615         return; // ELF file incorrect
616     }
617 
618     SElf64Header* elf64Header = (SElf64Header*)elfBin;
619     size_t entrySize = elf64Header->SectionHeaderEntrySize;  // Get the section header entry size
620 
621     CElfReader* elfReader = CElfReader::Create((char*)elfBin, pureElfBinSize);
622     RAIIElf ElfObj(elfReader);
623 
624     if (!elfReader || !elfReader->IsValidElf64(elfBin, pureElfBinSize))
625     {
626         IGC_ASSERT_MESSAGE(false, "ELF file invalid - nothing to be transfered to zeBinary");
627         return;
628     }
629 
630     // Find .symtab and .strtab (or shstrtab) sections in ELF binary.
631     const SElf64SectionHeader* symtabSectionHeader = elfReader->GetSectionHeader(".symtab");
632     const SElf64SectionHeader* strtabSectionHeader = elfReader->GetSectionHeader(".strtab");
633     if (strtabSectionHeader->DataSize <= 1)
634     {
635         strtabSectionHeader = elfReader->GetSectionHeader(".shstrtab");
636     }
637 
638     if (!strtabSectionHeader || !symtabSectionHeader)
639     {
640         IGC_ASSERT_MESSAGE(false, "Some ELF file sections not found - nothing to be transfered to zeBinary");
641         return;
642     }
643 
644     ZEELFObjectBuilder::SectionID zeBinSectionID = 0;
645 
646     char* secData = NULL;
647     size_t secDataSize = 0;
648     std::vector<std::string> zeBinSymbols;      // ELF symbols added to zeBinary for a given section; to avoid duplicated symbols.
649 
650     // ELF binary scanning sections with copying whole sections one by one to zeBinary, except:
651     // - empty sections
652     // - Text section
653     // - relocation sections
654     // Also adjusting relocations found in relocation (.rela) sections.
655     // Note:
656     // - 64-bit ELF supported only
657     // - .rel sections not supported
658 
659     for (unsigned int elfSectionIdx = 1; elfSectionIdx < elf64Header->NumSectionHeaderEntries; elfSectionIdx++)
660     {
661         if (elfReader->GetSectionData(elfSectionIdx, secData, secDataSize) != SUCCESS)
662         {
663             IGC_ASSERT_MESSAGE(false, "ELF file section data not found");
664             continue;
665         }
666 
667         if (secDataSize > 0) //pSectionHeader->DataSize > 0)
668         {
669             // Get section header to filter some section types.
670             const SElf64SectionHeader* sectionHeader = elfReader->GetSectionHeader(elfSectionIdx);
671             if (sectionHeader != nullptr)
672             {
673                 if (sectionHeader->Type == ELF::SHT_REL)
674                 {
675                     IGC_ASSERT_MESSAGE(false, "ELF file relocation sections w/o addend not supported");
676                     continue;
677                 }
678                 else if (sectionHeader->Type == ELF::SHT_RELA)
679                 {
680                     int relocEntrySize = (entrySize == 64) ? sizeof(struct ELF::Elf64_Rela) : sizeof(struct ELF::Elf32_Rela);
681                     IGC_ASSERT_MESSAGE((secDataSize % relocEntrySize) == 0, "Incorrect relocation section size");
682                     IGC_ASSERT_MESSAGE((entrySize == 64) || (entrySize == 32), "Incorrect relocation entry size");
683 
684                     // If .rela.foo is being processed then find zeBinary section ID of previously added .foo section
685                     ZEELFObjectBuilder::SectionID nonRelaSectionID =
686                         mBuilder.getSectionIDBySectionName(elfReader->GetSectionName(elfSectionIdx) + sizeof(".rela") - 1);
687                     // Local symbols with the same name are allowed in zebinary if defined in different sections.
688                     zeBinSymbols.clear();
689 
690                     if (entrySize == 64)
691                     {
692                         uint64_t relocEntryNum = secDataSize / relocEntrySize;
693                         struct ELF::Elf64_Rela relocEntry;
694 
695                         for (uint64_t i = 0; i < relocEntryNum; i++)
696                         {
697                             relocEntry = *(struct ELF::Elf64_Rela*)(secData + i * relocEntrySize);
698                             const uint32_t symtabEntrySize = sizeof(ELF::Elf64_Sym);
699                             uint64_t symtabEntryNum = symtabSectionHeader->DataSize / symtabEntrySize;
700 
701                             if ((relocEntry.r_info >> 32) < symtabEntryNum)  // index
702                             {
703                                 ELF::Elf64_Sym symtabEntry;
704                                 char* symName = NULL;
705                                 // To find a symbol name of relocation for adding to zeBinary, first we have to do
706                                 // a lookup into .symtab then we have to find this name in .strtab.
707                                 getElfSymbol(elfReader, relocEntry.r_info >> 32 /*index*/, symtabEntry, symName);
708 
709                                 vISA::ZESymEntry zeSym(
710                                     (vISA::GenSymType)symtabEntry.st_info,
711                                     (uint32_t)symtabEntry.st_value,
712                                     (uint32_t)symtabEntry.st_size,
713                                     symName);  // Symbol's name
714 
715                                 // Avoid symbol duplications - check whether a current symbol has been previously added.
716                                 bool isSymbolAdded = false;
717                                 for (auto zeBinSym : zeBinSymbols)
718                                 {
719                                     if (!zeBinSym.compare(zeSym.s_name))
720                                     {
721                                         isSymbolAdded = true;  // A current symbol has been previously added.
722                                         break;
723                                     }
724                                 }
725 
726                                 // Add either a non-global symbol, or a global symbol which is not duplicated.
727                                 if (!isSymbolAdded)
728                                 {
729                                     // A current symbol has not been previously added so do it now.
730                                     // Note: All symbols in ELF are local.
731                                     mBuilder.addSymbol(
732                                         zeSym.s_name, zeSym.s_offset, zeSym.s_size, ELF::STB_LOCAL, getSymbolElfType(zeSym), nonRelaSectionID);
733                                     zeBinSymbols.push_back(zeSym.s_name);
734                                 }
735 
736                                 unsigned int relocType = relocEntry.r_info & 0xF;
737                                 zebin::R_TYPE_ZEBIN zebinType = R_ZE_NONE;
738 
739                                 if (relocType == ELF::R_X86_64_64)
740                                     zebinType = R_ZE_SYM_ADDR;
741                                 else if (relocType == ELF::R_X86_64_32)
742                                     zebinType = R_ZE_SYM_ADDR_32;
743                                 else
744                                     IGC_ASSERT_MESSAGE(false, "Unsupported ELF relocation type");
745 
746                                 mBuilder.addRelaRelocation(
747                                     relocEntry.r_offset, zeSym.s_name, zebinType, relocEntry.r_addend, nonRelaSectionID);
748                             }
749                         }
750                     }
751                     else // entrySize == 32
752                     {
753                         IGC_ASSERT_MESSAGE(false, "ELF 64-bit entry size supported only");
754                     }
755                 }
756                 else if (const char* sectionName = elfReader->GetSectionName(elfSectionIdx))
757                 {
758                     if (!memcmp(sectionName, ".debug", sizeof(".debug") - 1))
759                     {
760                         // Non-empty, non-relocation and non-text debug section to be copied from ELF to zeBinary.
761                         zeBinSectionID = mBuilder.addSectionDebug(sectionName, (uint8_t*)secData, secDataSize); // no padding, no alignment
762                     }
763                 }
764             }
765         }
766     }
767 }
768 
getBinaryObject(llvm::raw_pwrite_stream & os)769 void ZEBinaryBuilder::getBinaryObject(llvm::raw_pwrite_stream& os)
770 {
771     if (!mZEInfoBuilder.empty())
772         mBuilder.addSectionZEInfo(mZEInfoBuilder.getZEInfoContainer());
773     mBuilder.finalize(os);
774 }
775 
getBinaryObject(Util::BinaryStream & outputStream)776 void ZEBinaryBuilder::getBinaryObject(Util::BinaryStream& outputStream)
777 {
778     llvm::SmallVector<char, 64> buf;
779     llvm::raw_svector_ostream llvm_os(buf);
780     getBinaryObject(llvm_os);
781     outputStream.Write(buf.data(), buf.size());
782 }
783 
printBinaryObject(const std::string & filename)784 void ZEBinaryBuilder::printBinaryObject(const std::string& filename)
785 {
786     std::error_code EC;
787     llvm::raw_fd_ostream os(filename, EC);
788     mBuilder.finalize(os);
789     os.close();
790 }
791 
addKernelDebugEnv(const SOpenCLKernelInfo & annotations,const CBTILayout & layout,zeInfoKernel & zeinfoKernel)792 void ZEBinaryBuilder::addKernelDebugEnv(const SOpenCLKernelInfo& annotations,
793                                         const CBTILayout& layout,
794                                         zeInfoKernel& zeinfoKernel)
795 {
796     zeInfoDebugEnv& env = zeinfoKernel.debug_env;
797     env.sip_surface_bti = layout.GetSystemThreadBindingTableIndex();
798     // Now set the sip surface offset to 0 directly. Currently the surface offset
799     // is computed locally when creating patch tokens.
800     env.sip_surface_offset = 0;
801 }
802 
addKernelVISAAsm(const std::string & kernel,const std::string & visaasm)803 void ZEBinaryBuilder::addKernelVISAAsm(const std::string& kernel,
804                                        const std::string& visaasm)
805 {
806     IGC_ASSERT(!visaasm.empty());
807     mBuilder.addSectionVISAAsm(
808         kernel,
809         reinterpret_cast<const uint8_t*>(visaasm.data()),
810         visaasm.size());
811 }
812