1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "level_zero/core/source/module/module_imp.h"
9 
10 #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h"
11 #include "shared/source/compiler_interface/intermediate_representations.h"
12 #include "shared/source/compiler_interface/linker.h"
13 #include "shared/source/device/device.h"
14 #include "shared/source/device_binary_format/debug_zebin.h"
15 #include "shared/source/device_binary_format/device_binary_formats.h"
16 #include "shared/source/device_binary_format/elf/elf.h"
17 #include "shared/source/device_binary_format/elf/elf_encoder.h"
18 #include "shared/source/device_binary_format/elf/ocl_elf.h"
19 #include "shared/source/helpers/api_specific_config.h"
20 #include "shared/source/helpers/constants.h"
21 #include "shared/source/helpers/kernel_helpers.h"
22 #include "shared/source/helpers/string.h"
23 #include "shared/source/memory_manager/memory_manager.h"
24 #include "shared/source/memory_manager/memory_operations_handler.h"
25 #include "shared/source/memory_manager/unified_memory_manager.h"
26 #include "shared/source/program/kernel_info.h"
27 #include "shared/source/program/program_initialization.h"
28 #include "shared/source/source_level_debugger/source_level_debugger.h"
29 
30 #include "level_zero/core/source/device/device.h"
31 #include "level_zero/core/source/kernel/kernel.h"
32 #include "level_zero/core/source/module/module_build_log.h"
33 
34 #include "compiler_options.h"
35 #include "program_debug_data.h"
36 
37 #include <memory>
38 #include <unordered_map>
39 
40 namespace L0 {
41 
42 namespace BuildOptions {
43 NEO::ConstStringRef optDisable = "-ze-opt-disable";
44 NEO::ConstStringRef optLevel = "-ze-opt-level";
45 NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required";
46 NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg";
47 NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable";
48 } // namespace BuildOptions
49 
ModuleTranslationUnit(L0::Device * device)50 ModuleTranslationUnit::ModuleTranslationUnit(L0::Device *device)
51     : device(device) {
52 }
53 
~ModuleTranslationUnit()54 ModuleTranslationUnit::~ModuleTranslationUnit() {
55     if (globalConstBuffer) {
56         auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
57 
58         if (svmAllocsManager->getSVMAlloc(reinterpret_cast<void *>(globalConstBuffer->getGpuAddress()))) {
59             svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(globalConstBuffer->getGpuAddress()));
60         } else {
61             this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalConstBuffer);
62         }
63     }
64 
65     if (globalVarBuffer) {
66         auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
67 
68         if (svmAllocsManager->getSVMAlloc(reinterpret_cast<void *>(globalVarBuffer->getGpuAddress()))) {
69             svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(globalVarBuffer->getGpuAddress()));
70         } else {
71             this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalVarBuffer);
72         }
73     }
74 
75     if (this->debugData != nullptr) {
76         for (std::vector<char *>::iterator iter = alignedvIsas.begin(); iter != alignedvIsas.end(); ++iter) {
77             alignedFree(static_cast<void *>(*iter));
78         }
79     }
80 }
81 
generateElfFromSpirV(std::vector<const char * > inputSpirVs,std::vector<uint32_t> inputModuleSizes)82 std::vector<uint8_t> ModuleTranslationUnit::generateElfFromSpirV(std::vector<const char *> inputSpirVs, std::vector<uint32_t> inputModuleSizes) {
83     NEO::Elf::ElfEncoder<> elfEncoder(true, false, 1U);
84     elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS;
85 
86     StackVec<uint32_t, 64> specConstIds;
87     StackVec<uint64_t, 64> specConstValues;
88     for (uint32_t i = 0; i < static_cast<uint32_t>(inputSpirVs.size()); i++) {
89         if (specConstantsValues.size() > 0) {
90             specConstIds.clear();
91             specConstValues.clear();
92             specConstIds.reserve(specConstantsValues.size());
93             specConstValues.reserve(specConstantsValues.size());
94             for (const auto &specConst : specConstantsValues) {
95                 specConstIds.push_back(specConst.first);
96                 specConstValues.push_back(specConst.second);
97             }
98             elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_IDS, NEO::Elf::SectionNamesOpenCl::spirvSpecConstIds,
99                                      ArrayRef<const uint8_t>::fromAny(specConstIds.begin(), specConstIds.size()));
100             elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_VALUES, NEO::Elf::SectionNamesOpenCl::spirvSpecConstValues,
101                                      ArrayRef<const uint8_t>::fromAny(specConstValues.begin(), specConstValues.size()));
102         }
103 
104         auto sectionType = NEO::Elf::SHT_OPENCL_SPIRV;
105         NEO::ConstStringRef sectionName = NEO::Elf::SectionNamesOpenCl::spirvObject;
106         elfEncoder.appendSection(sectionType, sectionName, ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(inputSpirVs[i]), inputModuleSizes[i]));
107     }
108 
109     return elfEncoder.encode();
110 }
111 
generateCompilerOptions(const char * buildOptions,const char * internalBuildOptions)112 std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOptions, const char *internalBuildOptions) {
113     if (nullptr != buildOptions) {
114         options = buildOptions;
115     }
116     std::string internalOptions = NEO::CompilerOptions::concatenate(internalBuildOptions, BuildOptions::hasBufferOffsetArg);
117 
118     if (device->getNEODevice()->getDeviceInfo().debuggerActive) {
119         if (NEO::SourceLevelDebugger::shouldAppendOptDisable(*device->getSourceLevelDebugger())) {
120             NEO::CompilerOptions::concatenateAppend(options, BuildOptions::optDisable);
121         }
122 
123         options = NEO::CompilerOptions::concatenate(options, NEO::CompilerOptions::generateDebugInfo);
124         internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable);
125     }
126 
127     if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() ||
128         device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
129         internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired);
130     }
131 
132     return internalOptions;
133 }
134 
processSpecConstantInfo(NEO::CompilerInterface * compilerInterface,const ze_module_constants_t * pConstants,const char * input,uint32_t inputSize)135 bool ModuleTranslationUnit::processSpecConstantInfo(NEO::CompilerInterface *compilerInterface, const ze_module_constants_t *pConstants, const char *input, uint32_t inputSize) {
136     if (pConstants) {
137         NEO::SpecConstantInfo specConstInfo;
138         auto retVal = compilerInterface->getSpecConstantsInfo(*device->getNEODevice(), ArrayRef<const char>(input, inputSize), specConstInfo);
139         if (retVal != NEO::TranslationOutput::ErrorCode::Success) {
140             return false;
141         }
142         for (uint32_t i = 0; i < pConstants->numConstants; i++) {
143             uint64_t specConstantValue = 0;
144             uint32_t specConstantId = pConstants->pConstantIds[i];
145             auto atributeSize = 0u;
146             uint32_t j;
147             for (j = 0; j < specConstInfo.sizesBuffer->GetSize<uint32_t>(); j++) {
148                 if (specConstantId == specConstInfo.idsBuffer->GetMemory<uint32_t>()[j]) {
149                     atributeSize = specConstInfo.sizesBuffer->GetMemory<uint32_t>()[j];
150                     break;
151                 }
152             }
153             if (j == specConstInfo.sizesBuffer->GetSize<uint32_t>()) {
154                 return false;
155             }
156             memcpy_s(&specConstantValue, sizeof(uint64_t),
157                      const_cast<void *>(pConstants->pConstantValues[i]), atributeSize);
158             specConstantsValues[specConstantId] = specConstantValue;
159         }
160     }
161     return true;
162 }
163 
compileGenBinary(NEO::TranslationInput inputArgs,bool staticLink)164 bool ModuleTranslationUnit::compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) {
165     auto compilerInterface = device->getNEODevice()->getCompilerInterface();
166     UNRECOVERABLE_IF(nullptr == compilerInterface);
167 
168     inputArgs.specializedValues = this->specConstantsValues;
169 
170     NEO::TranslationOutput compilerOuput = {};
171     NEO::TranslationOutput::ErrorCode compilerErr;
172 
173     if (staticLink) {
174         compilerErr = compilerInterface->link(*device->getNEODevice(), inputArgs, compilerOuput);
175     } else {
176         compilerErr = compilerInterface->build(*device->getNEODevice(), inputArgs, compilerOuput);
177     }
178 
179     this->updateBuildLog(compilerOuput.frontendCompilerLog);
180     this->updateBuildLog(compilerOuput.backendCompilerLog);
181 
182     if (NEO::TranslationOutput::ErrorCode::Success != compilerErr) {
183         return false;
184     }
185 
186     this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem);
187     this->irBinarySize = compilerOuput.intermediateRepresentation.size;
188     this->unpackedDeviceBinary = std::move(compilerOuput.deviceBinary.mem);
189     this->unpackedDeviceBinarySize = compilerOuput.deviceBinary.size;
190     this->debugData = std::move(compilerOuput.debugData.mem);
191     this->debugDataSize = compilerOuput.debugData.size;
192 
193     return processUnpackedBinary();
194 }
195 
staticLinkSpirV(std::vector<const char * > inputSpirVs,std::vector<uint32_t> inputModuleSizes,const char * buildOptions,const char * internalBuildOptions,std::vector<const ze_module_constants_t * > specConstants)196 bool ModuleTranslationUnit::staticLinkSpirV(std::vector<const char *> inputSpirVs, std::vector<uint32_t> inputModuleSizes, const char *buildOptions, const char *internalBuildOptions,
197                                             std::vector<const ze_module_constants_t *> specConstants) {
198     auto compilerInterface = device->getNEODevice()->getCompilerInterface();
199     UNRECOVERABLE_IF(nullptr == compilerInterface);
200 
201     std::string internalOptions = this->generateCompilerOptions(buildOptions, internalBuildOptions);
202 
203     for (uint32_t i = 0; i < static_cast<uint32_t>(specConstants.size()); i++) {
204         auto specConstantResult = this->processSpecConstantInfo(compilerInterface, specConstants[i], inputSpirVs[i], inputModuleSizes[i]);
205         if (!specConstantResult) {
206             return false;
207         }
208     }
209 
210     NEO::TranslationInput linkInputArgs = {IGC::CodeType::elf, IGC::CodeType::oclGenBin};
211 
212     auto spirvElfSource = generateElfFromSpirV(inputSpirVs, inputModuleSizes);
213 
214     linkInputArgs.src = ArrayRef<const char>(reinterpret_cast<const char *>(spirvElfSource.data()), spirvElfSource.size());
215     linkInputArgs.apiOptions = ArrayRef<const char>(options.c_str(), options.length());
216     linkInputArgs.internalOptions = ArrayRef<const char>(internalOptions.c_str(), internalOptions.length());
217     return this->compileGenBinary(linkInputArgs, true);
218 }
219 
buildFromSpirV(const char * input,uint32_t inputSize,const char * buildOptions,const char * internalBuildOptions,const ze_module_constants_t * pConstants)220 bool ModuleTranslationUnit::buildFromSpirV(const char *input, uint32_t inputSize, const char *buildOptions, const char *internalBuildOptions,
221                                            const ze_module_constants_t *pConstants) {
222     auto compilerInterface = device->getNEODevice()->getCompilerInterface();
223     UNRECOVERABLE_IF(nullptr == compilerInterface);
224 
225     std::string internalOptions = this->generateCompilerOptions(buildOptions, internalBuildOptions);
226 
227     auto specConstantResult = this->processSpecConstantInfo(compilerInterface, pConstants, input, inputSize);
228     if (!specConstantResult)
229         return false;
230 
231     NEO::TranslationInput inputArgs = {IGC::CodeType::spirV, IGC::CodeType::oclGenBin};
232 
233     inputArgs.src = ArrayRef<const char>(input, inputSize);
234     inputArgs.apiOptions = ArrayRef<const char>(options.c_str(), options.length());
235     inputArgs.internalOptions = ArrayRef<const char>(internalOptions.c_str(), internalOptions.length());
236     return this->compileGenBinary(inputArgs, false);
237 }
238 
createFromNativeBinary(const char * input,size_t inputSize)239 bool ModuleTranslationUnit::createFromNativeBinary(const char *input, size_t inputSize) {
240     UNRECOVERABLE_IF((nullptr == device) || (nullptr == device->getNEODevice()));
241     auto productAbbreviation = NEO::hardwarePrefix[device->getNEODevice()->getHardwareInfo().platform.eProductFamily];
242 
243     NEO::TargetDevice targetDevice = {};
244     targetDevice.coreFamily = device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
245     targetDevice.productFamily = device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
246     targetDevice.stepping = device->getNEODevice()->getHardwareInfo().platform.usRevId;
247     targetDevice.maxPointerSizeInBytes = sizeof(uintptr_t);
248     std::string decodeErrors;
249     std::string decodeWarnings;
250     ArrayRef<const uint8_t> archive(reinterpret_cast<const uint8_t *>(input), inputSize);
251     auto singleDeviceBinary = unpackSingleDeviceBinary(archive, NEO::ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice,
252                                                        decodeErrors, decodeWarnings);
253     if (decodeWarnings.empty() == false) {
254         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str());
255     }
256 
257     if (singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty()) {
258         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str());
259         return false;
260     } else {
261         this->irBinary = makeCopy(reinterpret_cast<const char *>(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size());
262         this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size();
263         this->options = singleDeviceBinary.buildOptions.str();
264 
265         if (false == singleDeviceBinary.debugData.empty()) {
266             this->debugData = makeCopy(reinterpret_cast<const char *>(singleDeviceBinary.debugData.begin()), singleDeviceBinary.debugData.size());
267             this->debugDataSize = singleDeviceBinary.debugData.size();
268         }
269 
270         bool rebuild = NEO::DebugManager.flags.RebuildPrecompiledKernels.get() && irBinarySize != 0;
271         if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == rebuild)) {
272             this->unpackedDeviceBinary = makeCopy<char>(reinterpret_cast<const char *>(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size());
273             this->unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size();
274             this->packedDeviceBinary = makeCopy<char>(reinterpret_cast<const char *>(archive.begin()), archive.size());
275             this->packedDeviceBinarySize = archive.size();
276         }
277     }
278 
279     if (nullptr == this->unpackedDeviceBinary) {
280         if (!shouldSuppressRebuildWarning) {
281             updateBuildLog(NEO::CompilerWarnings::recompiledFromIr.str());
282         }
283 
284         return buildFromSpirV(this->irBinary.get(), static_cast<uint32_t>(this->irBinarySize), this->options.c_str(), "", nullptr);
285     } else {
286         return processUnpackedBinary();
287     }
288 }
289 
processUnpackedBinary()290 bool ModuleTranslationUnit::processUnpackedBinary() {
291     if (0 == unpackedDeviceBinarySize) {
292         return false;
293     }
294     auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize);
295     NEO::SingleDeviceBinary binary = {};
296     binary.deviceBinary = blob;
297     binary.targetDevice.grfSize = device->getHwInfo().capabilityTable.grfSize;
298     std::string decodeErrors;
299     std::string decodeWarnings;
300 
301     NEO::DecodeError decodeError;
302     NEO::DeviceBinaryFormat singleDeviceBinaryFormat;
303     programInfo.levelZeroDynamicLinkProgram = true;
304     std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, binary, decodeErrors, decodeWarnings);
305     if (decodeWarnings.empty() == false) {
306         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str());
307     }
308 
309     if (NEO::DecodeError::Success != decodeError) {
310         PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str());
311         return false;
312     }
313 
314     if (programInfo.decodedElf.elfFileHeader) {
315         NEO::LinkerInput::SectionNameToSegmentIdMap nameToKernelId;
316 
317         uint32_t id = 0;
318         for (auto &kernelInfo : this->programInfo.kernelInfos) {
319             nameToKernelId[kernelInfo->kernelDescriptor.kernelMetadata.kernelName] = id;
320             id++;
321         }
322         programInfo.prepareLinkerInputStorage();
323         programInfo.linkerInput->undefinedSymbolsAllowed = programInfo.levelZeroDynamicLinkProgram;
324         programInfo.linkerInput->decodeElfSymbolTableAndRelocations(programInfo.decodedElf, nameToKernelId);
325     }
326 
327     processDebugData();
328 
329     size_t slmNeeded = NEO::getMaxInlineSlmNeeded(programInfo);
330     size_t slmAvailable = 0U;
331     NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants;
332     slmAvailable = static_cast<size_t>(device->getDeviceInfo().localMemSize);
333     deviceInfoConstants.maxWorkGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
334     deviceInfoConstants.computeUnitsUsedForScratch = static_cast<uint32_t>(device->getDeviceInfo().computeUnitsUsedForScratch);
335     deviceInfoConstants.slmWindowSize = static_cast<uint32_t>(device->getDeviceInfo().localMemSize);
336     if (NEO::requiresLocalMemoryWindowVA(programInfo)) {
337         deviceInfoConstants.slmWindow = device->getNEODevice()->getExecutionEnvironment()->memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment);
338     }
339 
340     if (slmNeeded > slmAvailable) {
341         return false;
342     }
343 
344     auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
345     if (programInfo.globalConstants.size != 0) {
346         this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalConstants.size, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData);
347     }
348 
349     if (programInfo.globalVariables.size != 0) {
350         this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalVariables.size, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData);
351     }
352 
353     for (auto &kernelInfo : this->programInfo.kernelInfos) {
354         kernelInfo->apply(deviceInfoConstants);
355     }
356 
357     auto gfxCore = device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
358     auto stepping = device->getNEODevice()->getHardwareInfo().platform.usRevId;
359 
360     if (this->packedDeviceBinary != nullptr) {
361         return true;
362     }
363 
364     NEO::SingleDeviceBinary singleDeviceBinary;
365     singleDeviceBinary.buildOptions = this->options;
366     singleDeviceBinary.targetDevice.coreFamily = gfxCore;
367     singleDeviceBinary.targetDevice.stepping = stepping;
368     singleDeviceBinary.deviceBinary = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize);
369     singleDeviceBinary.intermediateRepresentation = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->irBinary.get()), this->irBinarySize);
370     singleDeviceBinary.debugData = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->debugData.get()), this->debugDataSize);
371     std::string packWarnings;
372     std::string packErrors;
373     auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings);
374     if (packedDeviceBinary.empty()) {
375         DEBUG_BREAK_IF(true);
376         return false;
377     }
378     this->packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size());
379     this->packedDeviceBinarySize = packedDeviceBinary.size();
380 
381     return true;
382 }
383 
updateBuildLog(const std::string & newLogEntry)384 void ModuleTranslationUnit::updateBuildLog(const std::string &newLogEntry) {
385     if (newLogEntry.empty() || ('\0' == newLogEntry[0])) {
386         return;
387     }
388 
389     buildLog += newLogEntry.c_str();
390     if ('\n' != *buildLog.rbegin()) {
391         buildLog.append("\n");
392     }
393 }
394 
processDebugData()395 void ModuleTranslationUnit::processDebugData() {
396     if (this->debugData != nullptr) {
397         iOpenCL::SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast<iOpenCL::SProgramDebugDataHeaderIGC *>(debugData.get());
398 
399         DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != programInfo.kernelInfos.size());
400 
401         const iOpenCL::SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast<iOpenCL::SKernelDebugDataHeaderIGC *>(
402             ptrOffset(programDebugHeader, sizeof(iOpenCL::SProgramDebugDataHeaderIGC)));
403 
404         const char *kernelName = nullptr;
405         const char *kernelDebugData = nullptr;
406 
407         for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) {
408             kernelName = reinterpret_cast<const char *>(ptrOffset(kernelDebugHeader, sizeof(iOpenCL::SKernelDebugDataHeaderIGC)));
409 
410             auto kernelInfo = programInfo.kernelInfos[i];
411             UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelMetadata.kernelName.compare(0, kernelInfo->kernelDescriptor.kernelMetadata.kernelName.size(), kernelName) != 0);
412 
413             kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize);
414 
415             kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
416 
417             char *alignedAlloc = static_cast<char *>(alignedMalloc(kernelDebugHeader->SizeVisaDbgInBytes, MemoryConstants::pageSize));
418             memcpy_s(static_cast<void *>(alignedAlloc), kernelDebugHeader->SizeVisaDbgInBytes, kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes);
419 
420             kernelInfo->kernelDescriptor.external.debugData->vIsa = alignedAlloc;
421             kernelInfo->kernelDescriptor.external.debugData->genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes);
422             kernelInfo->kernelDescriptor.external.debugData->vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes;
423             kernelInfo->kernelDescriptor.external.debugData->genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes;
424 
425             kernelDebugData = ptrOffset(kernelDebugData, static_cast<size_t>(kernelDebugHeader->SizeVisaDbgInBytes) + kernelDebugHeader->SizeGenIsaDbgInBytes);
426             kernelDebugHeader = reinterpret_cast<const iOpenCL::SKernelDebugDataHeaderIGC *>(kernelDebugData);
427             alignedvIsas.push_back(alignedAlloc);
428         }
429     }
430 }
431 
ModuleImp(Device * device,ModuleBuildLog * moduleBuildLog,ModuleType type)432 ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
433     : device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
434       moduleBuildLog(moduleBuildLog), type(type) {
435     productFamily = device->getHwInfo().platform.eProductFamily;
436 }
437 
~ModuleImp()438 ModuleImp::~ModuleImp() {
439     kernelImmDatas.clear();
440 }
441 
getZebinSegments()442 NEO::Debug::Segments ModuleImp::getZebinSegments() {
443     NEO::Debug::Segments segments;
444 
445     auto varBuffer = translationUnit->globalVarBuffer;
446     if (varBuffer) {
447         segments.varData = {varBuffer->getGpuAddressToPatch(), {reinterpret_cast<uint8_t *>(varBuffer->getUnderlyingBuffer()), varBuffer->getUnderlyingBufferSize()}};
448     }
449 
450     auto constBuffer = translationUnit->globalConstBuffer;
451     if (constBuffer) {
452         segments.constData = {constBuffer->getGpuAddressToPatch(), {reinterpret_cast<uint8_t *>(constBuffer->getUnderlyingBuffer()), constBuffer->getUnderlyingBufferSize()}};
453     }
454 
455     auto stringBuffer = translationUnit->programInfo.globalStrings;
456     if (stringBuffer.initData) {
457         segments.stringData = {reinterpret_cast<uintptr_t>(stringBuffer.initData),
458                                {reinterpret_cast<const uint8_t *>(stringBuffer.initData), stringBuffer.size}};
459     }
460 
461     for (auto &kernImmData : this->kernelImmDatas) {
462         const auto &isa = kernImmData->getIsaGraphicsAllocation();
463         NEO::Debug::Segments::Segment kernelSegment = {isa->getGpuAddressToPatch(), {reinterpret_cast<uint8_t *>(isa->getUnderlyingBuffer()), isa->getUnderlyingBufferSize()}};
464         segments.nameToSegMap.insert(std::pair(kernImmData->getDescriptor().kernelMetadata.kernelName, kernelSegment));
465     }
466 
467     return segments;
468 }
469 
initialize(const ze_module_desc_t * desc,NEO::Device * neoDevice)470 bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) {
471     bool success = true;
472 
473     std::string buildOptions;
474     std::string internalBuildOptions;
475 
476     if (desc->pNext) {
477         const ze_base_desc_t *expDesc = reinterpret_cast<const ze_base_desc_t *>(desc->pNext);
478         if (expDesc->stype == ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC) {
479             if (desc->format != ZE_MODULE_FORMAT_IL_SPIRV) {
480                 return false;
481             }
482             const ze_module_program_exp_desc_t *programExpDesc =
483                 reinterpret_cast<const ze_module_program_exp_desc_t *>(expDesc);
484             std::vector<const char *> inputSpirVs;
485             std::vector<uint32_t> inputModuleSizes;
486             std::vector<const ze_module_constants_t *> specConstants;
487 
488             this->createBuildOptions(nullptr, buildOptions, internalBuildOptions);
489 
490             for (uint32_t i = 0; i < static_cast<uint32_t>(programExpDesc->count); i++) {
491                 std::string tmpBuildOptions;
492                 std::string tmpInternalBuildOptions;
493                 inputSpirVs.push_back(reinterpret_cast<const char *>(programExpDesc->pInputModules[i]));
494                 auto inputSizesInfo = const_cast<size_t *>(programExpDesc->inputSizes);
495                 uint32_t inputSize = static_cast<uint32_t>(inputSizesInfo[i]);
496                 inputModuleSizes.push_back(inputSize);
497                 if (programExpDesc->pConstants) {
498                     specConstants.push_back(programExpDesc->pConstants[i]);
499                 }
500                 if (programExpDesc->pBuildFlags) {
501                     this->createBuildOptions(programExpDesc->pBuildFlags[i], tmpBuildOptions, tmpInternalBuildOptions);
502                     buildOptions = buildOptions + tmpBuildOptions;
503                     internalBuildOptions = internalBuildOptions + tmpInternalBuildOptions;
504                 }
505             }
506 
507             success = this->translationUnit->staticLinkSpirV(inputSpirVs,
508                                                              inputModuleSizes,
509                                                              buildOptions.c_str(),
510                                                              internalBuildOptions.c_str(),
511                                                              specConstants);
512         } else {
513             return false;
514         }
515     } else {
516         std::string buildFlagsInput{desc->pBuildFlags != nullptr ? desc->pBuildFlags : ""};
517         this->translationUnit->shouldSuppressRebuildWarning = NEO::CompilerOptions::extract(NEO::CompilerOptions::noRecompiledFromIr, buildFlagsInput);
518         this->createBuildOptions(buildFlagsInput.c_str(), buildOptions, internalBuildOptions);
519 
520         if (type == ModuleType::User && NEO::DebugManager.flags.InjectInternalBuildOptions.get() != "unk") {
521             NEO::CompilerOptions::concatenateAppend(internalBuildOptions, NEO::DebugManager.flags.InjectInternalBuildOptions.get());
522         }
523 
524         if (desc->format == ZE_MODULE_FORMAT_NATIVE) {
525             success = this->translationUnit->createFromNativeBinary(
526                 reinterpret_cast<const char *>(desc->pInputModule), desc->inputSize);
527         } else if (desc->format == ZE_MODULE_FORMAT_IL_SPIRV) {
528             success = this->translationUnit->buildFromSpirV(reinterpret_cast<const char *>(desc->pInputModule),
529                                                             static_cast<uint32_t>(desc->inputSize),
530                                                             buildOptions.c_str(),
531                                                             internalBuildOptions.c_str(),
532                                                             desc->pConstants);
533         } else {
534             return false;
535         }
536     }
537 
538     this->updateBuildLog(neoDevice);
539     verifyDebugCapabilities();
540 
541     if (false == success) {
542         return false;
543     }
544 
545     kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size());
546     for (auto &ki : this->translationUnit->programInfo.kernelInfos) {
547         std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device)};
548         kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
549                                   this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
550                                   this->type == ModuleType::Builtin);
551         kernelImmDatas.push_back(std::move(kernelImmData));
552     }
553     this->maxGroupSize = static_cast<uint32_t>(this->translationUnit->device->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
554 
555     checkIfPrivateMemoryPerDispatchIsNeeded();
556 
557     success = this->linkBinary();
558 
559     if (debugEnabled) {
560         passDebugData();
561     }
562 
563     auto &hwInfo = neoDevice->getHardwareInfo();
564     auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
565 
566     if (this->isFullyLinked) {
567         for (auto &ki : kernelImmDatas) {
568 
569             if (this->type == ModuleType::User && !ki->isIsaCopiedToAllocation()) {
570 
571                 NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *ki->getIsaGraphicsAllocation()),
572                                                                       *neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap,
573                                                                       static_cast<size_t>(ki->getKernelInfo()->heapInfo.KernelHeapSize));
574 
575                 ki->setIsaCopiedToAllocation();
576             }
577 
578             if (device->getL0Debugger()) {
579                 NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
580                 if (memoryOperationsIface) {
581                     auto allocation = ki->getIsaGraphicsAllocation();
582                     memoryOperationsIface->makeResident(neoDevice, ArrayRef<NEO::GraphicsAllocation *>(&allocation, 1));
583                 }
584             }
585         }
586     }
587     return success;
588 }
589 
createDebugZebin()590 void ModuleImp::createDebugZebin() {
591     auto refBin = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize);
592     auto segments = getZebinSegments();
593     auto debugZebin = NEO::Debug::createDebugZebin(refBin, segments);
594 
595     translationUnit->debugDataSize = debugZebin.size();
596     translationUnit->debugData.reset(new char[translationUnit->debugDataSize]);
597     memcpy_s(translationUnit->debugData.get(), translationUnit->debugDataSize,
598              debugZebin.data(), debugZebin.size());
599 }
600 
passDebugData()601 void ModuleImp::passDebugData() {
602     auto refBin = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize);
603     if (NEO::isDeviceBinaryFormat<NEO::DeviceBinaryFormat::Zebin>(refBin)) {
604         createDebugZebin();
605         if (device->getSourceLevelDebugger()) {
606             NEO::DebugData debugData; // pass debug zebin in vIsa field
607             debugData.vIsa = reinterpret_cast<const char *>(translationUnit->debugData.get());
608             debugData.vIsaSize = static_cast<uint32_t>(translationUnit->debugDataSize);
609             device->getSourceLevelDebugger()->notifyKernelDebugData(&debugData, "debug_zebin", nullptr, 0);
610         }
611     } else {
612         if (device->getSourceLevelDebugger()) {
613             for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) {
614                 NEO::DebugData *notifyDebugData = kernelInfo->kernelDescriptor.external.debugData.get();
615                 NEO::DebugData relocatedDebugData;
616 
617                 if (kernelInfo->kernelDescriptor.external.relocatedDebugData.get()) {
618                     relocatedDebugData.genIsa = kernelInfo->kernelDescriptor.external.debugData->genIsa;
619                     relocatedDebugData.genIsaSize = kernelInfo->kernelDescriptor.external.debugData->genIsaSize;
620                     relocatedDebugData.vIsa = reinterpret_cast<char *>(kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
621                     relocatedDebugData.vIsaSize = kernelInfo->kernelDescriptor.external.debugData->vIsaSize;
622                     notifyDebugData = &relocatedDebugData;
623                 }
624 
625                 device->getSourceLevelDebugger()->notifyKernelDebugData(notifyDebugData,
626                                                                         kernelInfo->kernelDescriptor.kernelMetadata.kernelName,
627                                                                         kernelInfo->heapInfo.pKernelHeap,
628                                                                         kernelInfo->heapInfo.KernelHeapSize);
629             }
630         }
631     }
632 }
633 
getKernelImmutableData(const char * functionName) const634 const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const {
635     for (auto &kernelImmData : kernelImmDatas) {
636         if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) {
637             return kernelImmData.get();
638         }
639     }
640     return nullptr;
641 }
642 
createBuildOptions(const char * pBuildFlags,std::string & apiOptions,std::string & internalBuildOptions)643 void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) {
644     if (pBuildFlags != nullptr) {
645         std::string buildFlags(pBuildFlags);
646 
647         apiOptions = pBuildFlags;
648         moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optDisable, BuildOptions::optDisable);
649         moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optLevel, BuildOptions::optLevel);
650         moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired, BuildOptions::greaterThan4GbRequired);
651         moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::allowZebin, NEO::CompilerOptions::allowZebin);
652 
653         createBuildExtraOptions(apiOptions, internalBuildOptions);
654     }
655     if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {
656         NEO::CompilerOptions::concatenateAppend(internalBuildOptions, NEO::CompilerOptions::bindlessMode.str());
657     }
658 }
659 
updateBuildLog(NEO::Device * neoDevice)660 void ModuleImp::updateBuildLog(NEO::Device *neoDevice) {
661     if (this->moduleBuildLog) {
662         moduleBuildLog->appendString(this->translationUnit->buildLog.c_str(), this->translationUnit->buildLog.size());
663     }
664 }
665 
createKernel(const ze_kernel_desc_t * desc,ze_kernel_handle_t * phFunction)666 ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
667                                     ze_kernel_handle_t *phFunction) {
668     ze_result_t res;
669     if (!isFullyLinked) {
670         return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED;
671     }
672     auto kernel = Kernel::create(productFamily, this, desc, &res);
673 
674     if (res == ZE_RESULT_SUCCESS) {
675         *phFunction = kernel->toHandle();
676     }
677 
678     return res;
679 }
680 
getNativeBinary(size_t * pSize,uint8_t * pModuleNativeBinary)681 ze_result_t ModuleImp::getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) {
682     auto genBinary = this->translationUnit->packedDeviceBinary.get();
683 
684     *pSize = this->translationUnit->packedDeviceBinarySize;
685     if (pModuleNativeBinary != nullptr) {
686         memcpy_s(pModuleNativeBinary, this->translationUnit->packedDeviceBinarySize, genBinary, this->translationUnit->packedDeviceBinarySize);
687     }
688     return ZE_RESULT_SUCCESS;
689 }
690 
getDebugInfo(size_t * pDebugDataSize,uint8_t * pDebugData)691 ze_result_t ModuleImp::getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) {
692     if (translationUnit == nullptr) {
693         return ZE_RESULT_ERROR_UNINITIALIZED;
694     }
695     auto refBin = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize);
696     if (nullptr == translationUnit->debugData.get() && NEO::isDeviceBinaryFormat<NEO::DeviceBinaryFormat::Zebin>(refBin)) {
697         createDebugZebin();
698     }
699     if (pDebugData != nullptr) {
700         if (*pDebugDataSize < translationUnit->debugDataSize) {
701             return ZE_RESULT_ERROR_INVALID_ARGUMENT;
702         }
703         memcpy_s(pDebugData, *pDebugDataSize, translationUnit->debugData.get(), translationUnit->debugDataSize);
704     }
705     *pDebugDataSize = translationUnit->debugDataSize;
706     return ZE_RESULT_SUCCESS;
707 }
708 
copyPatchedSegments(const NEO::Linker::PatchableSegments & isaSegmentsForPatching)709 void ModuleImp::copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching) {
710     if (this->translationUnit->programInfo.linkerInput && this->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
711         for (auto &kernelImmData : this->kernelImmDatas) {
712             if (nullptr == kernelImmData->getIsaGraphicsAllocation()) {
713                 continue;
714             }
715 
716             UNRECOVERABLE_IF(kernelImmData->isIsaCopiedToAllocation());
717 
718             kernelImmData->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
719             kernelImmData->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
720             auto segmentId = &kernelImmData - &this->kernelImmDatas[0];
721             this->device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(kernelImmData->getIsaGraphicsAllocation(), 0,
722                                                                                         isaSegmentsForPatching[segmentId].hostPointer,
723                                                                                         isaSegmentsForPatching[segmentId].segmentSize);
724 
725             kernelImmData->setIsaCopiedToAllocation();
726         }
727     }
728 }
729 
linkBinary()730 bool ModuleImp::linkBinary() {
731     using namespace NEO;
732     auto linkerInput = this->translationUnit->programInfo.linkerInput.get();
733     if (linkerInput == nullptr) {
734         isFullyLinked = true;
735         return true;
736     }
737     Linker linker(*linkerInput);
738     Linker::SegmentInfo globals;
739     Linker::SegmentInfo constants;
740     Linker::SegmentInfo exportedFunctions;
741     Linker::SegmentInfo strings;
742     GraphicsAllocation *globalsForPatching = translationUnit->globalVarBuffer;
743     GraphicsAllocation *constantsForPatching = translationUnit->globalConstBuffer;
744     if (globalsForPatching != nullptr) {
745         globals.gpuAddress = static_cast<uintptr_t>(globalsForPatching->getGpuAddress());
746         globals.segmentSize = globalsForPatching->getUnderlyingBufferSize();
747     }
748     if (constantsForPatching != nullptr) {
749         constants.gpuAddress = static_cast<uintptr_t>(constantsForPatching->getGpuAddress());
750         constants.segmentSize = constantsForPatching->getUnderlyingBufferSize();
751     }
752     if (translationUnit->programInfo.globalStrings.initData != nullptr) {
753         strings.gpuAddress = reinterpret_cast<uintptr_t>(translationUnit->programInfo.globalStrings.initData);
754         strings.segmentSize = translationUnit->programInfo.globalStrings.size;
755     }
756     if (linkerInput->getExportedFunctionsSegmentId() >= 0) {
757         auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
758         this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation();
759         exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch());
760         exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize();
761     }
762     Linker::PatchableSegments isaSegmentsForPatching;
763     std::vector<std::vector<char>> patchedIsaTempStorage;
764     Linker::KernelDescriptorsT kernelDescriptors;
765     if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
766         patchedIsaTempStorage.reserve(this->kernelImmDatas.size());
767         kernelDescriptors.reserve(this->kernelImmDatas.size());
768         for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) {
769             auto &kernHeapInfo = kernelInfo->heapInfo;
770             const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
771             patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize));
772             isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize});
773             kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
774         }
775     }
776 
777     auto linkStatus = linker.link(globals, constants, exportedFunctions, strings,
778                                   globalsForPatching, constantsForPatching,
779                                   isaSegmentsForPatching, unresolvedExternalsInfo, this->device->getNEODevice(),
780                                   translationUnit->programInfo.globalConstants.initData,
781                                   translationUnit->programInfo.globalVariables.initData, kernelDescriptors);
782     this->symbols = linker.extractRelocatedSymbols();
783     if (LinkingStatus::LinkedFully != linkStatus) {
784         if (moduleBuildLog) {
785             std::vector<std::string> kernelNames;
786             for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) {
787                 kernelNames.push_back("kernel : " + kernelInfo->kernelDescriptor.kernelMetadata.kernelName);
788             }
789             auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames);
790             moduleBuildLog->appendString(error.c_str(), error.size());
791         }
792         isFullyLinked = false;
793         return LinkingStatus::LinkedPartially == linkStatus;
794     } else if (type != ModuleType::Builtin) {
795         copyPatchedSegments(isaSegmentsForPatching);
796     }
797     DBG_LOG(PrintRelocations, NEO::constructRelocationsDebugMessage(this->symbols));
798     isFullyLinked = true;
799     for (auto kernelId = 0u; kernelId < kernelImmDatas.size(); kernelId++) {
800         auto &kernImmData = kernelImmDatas[kernelId];
801 
802         kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() +
803                                                      ((this->exportedFunctionsSurface != nullptr) ? 1 : 0) + this->importedSymbolAllocations.size());
804 
805         if (nullptr != this->exportedFunctionsSurface) {
806             kernImmData->getResidencyContainer().push_back(this->exportedFunctionsSurface);
807         }
808         kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), this->importedSymbolAllocations.begin(),
809                                                     this->importedSymbolAllocations.end());
810     }
811     return true;
812 }
813 
getFunctionPointer(const char * pFunctionName,void ** pfnFunction)814 ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnFunction) {
815     auto symbolIt = symbols.find(pFunctionName);
816     if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) {
817         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
818     }
819 
820     *pfnFunction = reinterpret_cast<void *>(symbolIt->second.gpuAddress);
821     return ZE_RESULT_SUCCESS;
822 }
823 
getGlobalPointer(const char * pGlobalName,size_t * pSize,void ** pPtr)824 ze_result_t ModuleImp::getGlobalPointer(const char *pGlobalName, size_t *pSize, void **pPtr) {
825     auto symbolIt = symbols.find(pGlobalName);
826     if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) {
827         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
828     }
829     if (pPtr) {
830         *pPtr = reinterpret_cast<void *>(symbolIt->second.gpuAddress);
831     }
832     if (pSize) {
833         *pSize = symbolIt->second.symbol.size;
834     }
835     return ZE_RESULT_SUCCESS;
836 }
837 
create(Device * device,const ze_module_desc_t * desc,ModuleBuildLog * moduleBuildLog,ModuleType type)838 Module *Module::create(Device *device, const ze_module_desc_t *desc,
839                        ModuleBuildLog *moduleBuildLog, ModuleType type) {
840     auto module = new ModuleImp(device, moduleBuildLog, type);
841 
842     bool success = module->initialize(desc, device->getNEODevice());
843     if (success == false) {
844         module->destroy();
845         return nullptr;
846     }
847 
848     return module;
849 }
850 
getKernelNames(uint32_t * pCount,const char ** pNames)851 ze_result_t ModuleImp::getKernelNames(uint32_t *pCount, const char **pNames) {
852     auto &kernelImmDatas = this->getKernelImmutableDataVector();
853     if (*pCount == 0) {
854         *pCount = static_cast<uint32_t>(kernelImmDatas.size());
855         return ZE_RESULT_SUCCESS;
856     }
857 
858     if (*pCount > static_cast<uint32_t>(kernelImmDatas.size())) {
859         *pCount = static_cast<uint32_t>(kernelImmDatas.size());
860     }
861 
862     uint32_t outCount = 0;
863     for (auto &kernelImmData : kernelImmDatas) {
864         *(pNames + outCount) = kernelImmData->getDescriptor().kernelMetadata.kernelName.c_str();
865         outCount++;
866         if (outCount == *pCount) {
867             break;
868         }
869     }
870 
871     return ZE_RESULT_SUCCESS;
872 }
873 
isDebugEnabled() const874 bool ModuleImp::isDebugEnabled() const {
875     return debugEnabled;
876 }
877 
verifyDebugCapabilities()878 void ModuleImp::verifyDebugCapabilities() {
879     bool debugCapabilities = device->getNEODevice()->getDebugger() != nullptr;
880 
881     if (debugCapabilities) {
882         //verify all kernels are debuggable
883         for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) {
884             bool systemThreadSurfaceAvailable = NEO::isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful) ||
885                                                 NEO::isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindless);
886 
887             debugCapabilities &= systemThreadSurfaceAvailable;
888         }
889     }
890     debugEnabled = debugCapabilities;
891 }
892 
checkIfPrivateMemoryPerDispatchIsNeeded()893 void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() {
894     size_t modulePrivateMemorySize = 0;
895     for (auto &kernelImmData : this->kernelImmDatas) {
896         if (0 == kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) {
897             continue;
898         }
899         auto kernelPrivateMemorySize = NEO::KernelHelper::getPrivateSurfaceSize(kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize,
900                                                                                 this->device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch);
901         modulePrivateMemorySize += kernelPrivateMemorySize;
902     }
903 
904     this->allocatePrivateMemoryPerDispatch = false;
905     if (modulePrivateMemorySize > 0U) {
906         auto globalMemorySize = device->getNEODevice()->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(device->getNEODevice()->getDeviceBitfield().to_ulong()));
907         this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize > globalMemorySize;
908     }
909 }
910 
getProperties(ze_module_properties_t * pModuleProperties)911 ze_result_t ModuleImp::getProperties(ze_module_properties_t *pModuleProperties) {
912 
913     pModuleProperties->flags = 0;
914 
915     if (!unresolvedExternalsInfo.empty()) {
916         pModuleProperties->flags |= ZE_MODULE_PROPERTY_FLAG_IMPORTS;
917     }
918 
919     return ZE_RESULT_SUCCESS;
920 }
921 
performDynamicLink(uint32_t numModules,ze_module_handle_t * phModules,ze_module_build_log_handle_t * phLinkLog)922 ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
923                                           ze_module_handle_t *phModules,
924                                           ze_module_build_log_handle_t *phLinkLog) {
925     ModuleBuildLog *moduleLinkLog = nullptr;
926     if (phLinkLog) {
927         moduleLinkLog = ModuleBuildLog::create();
928         *phLinkLog = moduleLinkLog->toHandle();
929     }
930     for (auto i = 0u; i < numModules; i++) {
931         auto moduleId = static_cast<ModuleImp *>(Module::fromHandle(phModules[i]));
932         if (moduleId->isFullyLinked) {
933             continue;
934         }
935         NEO::Linker::PatchableSegments isaSegmentsForPatching;
936         std::vector<std::vector<char>> patchedIsaTempStorage;
937         uint32_t numPatchedSymbols = 0u;
938         std::vector<std::string> unresolvedSymbolLogMessages;
939         if (moduleId->translationUnit->programInfo.linkerInput && moduleId->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
940             patchedIsaTempStorage.reserve(moduleId->kernelImmDatas.size());
941             for (const auto &kernelInfo : moduleId->translationUnit->programInfo.kernelInfos) {
942                 auto &kernHeapInfo = kernelInfo->heapInfo;
943                 const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
944                 patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize));
945                 isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize});
946             }
947             for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) {
948                 if (moduleLinkLog) {
949                     std::stringstream logMessage;
950                     logMessage << "Module <" << moduleId << ">: "
951                                << " Unresolved Symbol <" << unresolvedExternal.unresolvedRelocation.symbolName << ">";
952                     unresolvedSymbolLogMessages.push_back(logMessage.str());
953                 }
954                 for (auto i = 0u; i < numModules; i++) {
955                     auto moduleHandle = static_cast<ModuleImp *>(Module::fromHandle(phModules[i]));
956                     auto symbolIt = moduleHandle->symbols.find(unresolvedExternal.unresolvedRelocation.symbolName);
957                     if (symbolIt != moduleHandle->symbols.end()) {
958                         auto relocAddress = ptrOffset(isaSegmentsForPatching[unresolvedExternal.instructionsSegmentId].hostPointer,
959                                                       static_cast<uintptr_t>(unresolvedExternal.unresolvedRelocation.offset));
960 
961                         NEO::Linker::patchAddress(relocAddress, symbolIt->second, unresolvedExternal.unresolvedRelocation);
962                         numPatchedSymbols++;
963                         moduleId->importedSymbolAllocations.insert(moduleHandle->exportedFunctionsSurface);
964 
965                         if (moduleLinkLog) {
966                             std::stringstream logMessage;
967                             logMessage << " Successfully Resolved Thru Dynamic Link to Module <" << moduleHandle << ">";
968                             unresolvedSymbolLogMessages.back().append(logMessage.str());
969                         }
970 
971                         // Apply the exported functions surface state from the export module to the import module if it exists.
972                         // Enables import modules to access the exported functions during kernel execution.
973                         for (auto &kernImmData : moduleId->kernelImmDatas) {
974                             kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() +
975                                                                          ((moduleHandle->exportedFunctionsSurface != nullptr) ? 1 : 0) + moduleId->importedSymbolAllocations.size());
976 
977                             if (nullptr != moduleHandle->exportedFunctionsSurface) {
978                                 kernImmData->getResidencyContainer().push_back(moduleHandle->exportedFunctionsSurface);
979                             }
980                             kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), moduleId->importedSymbolAllocations.begin(),
981                                                                         moduleId->importedSymbolAllocations.end());
982                         }
983                         break;
984                     }
985                 }
986             }
987         }
988         if (moduleLinkLog) {
989             for (int i = 0; i < (int)unresolvedSymbolLogMessages.size(); i++) {
990                 moduleLinkLog->appendString(unresolvedSymbolLogMessages[i].c_str(), unresolvedSymbolLogMessages[i].size());
991             }
992         }
993         if (numPatchedSymbols != moduleId->unresolvedExternalsInfo.size()) {
994             return ZE_RESULT_ERROR_MODULE_LINK_FAILURE;
995         }
996         moduleId->copyPatchedSegments(isaSegmentsForPatching);
997         moduleId->isFullyLinked = true;
998     }
999     return ZE_RESULT_SUCCESS;
1000 }
1001 
moveBuildOption(std::string & dstOptionsSet,std::string & srcOptionSet,NEO::ConstStringRef dstOptionName,NEO::ConstStringRef srcOptionName)1002 bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO::ConstStringRef dstOptionName, NEO::ConstStringRef srcOptionName) {
1003     const char optDelim = ' ';
1004     const char valDelim = '=';
1005 
1006     auto optInSrcPos = srcOptionSet.find(srcOptionName.begin());
1007     if (std::string::npos == optInSrcPos) {
1008         return false;
1009     }
1010 
1011     std::string dstOptionStr(dstOptionName);
1012     auto optInSrcEndPos = srcOptionSet.find(optDelim, optInSrcPos);
1013     if (srcOptionName == BuildOptions::optLevel) {
1014         auto valInSrcPos = srcOptionSet.find(valDelim, optInSrcPos);
1015         if (std::string::npos == valInSrcPos) {
1016             return false;
1017         }
1018         dstOptionStr += srcOptionSet.substr(valInSrcPos + 1, optInSrcEndPos);
1019     }
1020     srcOptionSet.erase(optInSrcPos, (optInSrcEndPos - optInSrcPos));
1021     NEO::CompilerOptions::concatenateAppend(dstOptionsSet, dstOptionStr);
1022     return true;
1023 }
1024 
1025 } // namespace L0
1026