1 /*
2 * Copyright (C) 2020-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "level_zero/core/source/module/module_imp.h"
9
10 #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h"
11 #include "shared/source/compiler_interface/intermediate_representations.h"
12 #include "shared/source/compiler_interface/linker.h"
13 #include "shared/source/device/device.h"
14 #include "shared/source/device_binary_format/debug_zebin.h"
15 #include "shared/source/device_binary_format/device_binary_formats.h"
16 #include "shared/source/device_binary_format/elf/elf.h"
17 #include "shared/source/device_binary_format/elf/elf_encoder.h"
18 #include "shared/source/device_binary_format/elf/ocl_elf.h"
19 #include "shared/source/helpers/api_specific_config.h"
20 #include "shared/source/helpers/constants.h"
21 #include "shared/source/helpers/kernel_helpers.h"
22 #include "shared/source/helpers/string.h"
23 #include "shared/source/memory_manager/memory_manager.h"
24 #include "shared/source/memory_manager/memory_operations_handler.h"
25 #include "shared/source/memory_manager/unified_memory_manager.h"
26 #include "shared/source/program/kernel_info.h"
27 #include "shared/source/program/program_initialization.h"
28 #include "shared/source/source_level_debugger/source_level_debugger.h"
29
30 #include "level_zero/core/source/device/device.h"
31 #include "level_zero/core/source/kernel/kernel.h"
32 #include "level_zero/core/source/module/module_build_log.h"
33
34 #include "compiler_options.h"
35 #include "program_debug_data.h"
36
37 #include <memory>
38 #include <unordered_map>
39
40 namespace L0 {
41
42 namespace BuildOptions {
43 NEO::ConstStringRef optDisable = "-ze-opt-disable";
44 NEO::ConstStringRef optLevel = "-ze-opt-level";
45 NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required";
46 NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg";
47 NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable";
48 } // namespace BuildOptions
49
ModuleTranslationUnit(L0::Device * device)50 ModuleTranslationUnit::ModuleTranslationUnit(L0::Device *device)
51 : device(device) {
52 }
53
~ModuleTranslationUnit()54 ModuleTranslationUnit::~ModuleTranslationUnit() {
55 if (globalConstBuffer) {
56 auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
57
58 if (svmAllocsManager->getSVMAlloc(reinterpret_cast<void *>(globalConstBuffer->getGpuAddress()))) {
59 svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(globalConstBuffer->getGpuAddress()));
60 } else {
61 this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalConstBuffer);
62 }
63 }
64
65 if (globalVarBuffer) {
66 auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
67
68 if (svmAllocsManager->getSVMAlloc(reinterpret_cast<void *>(globalVarBuffer->getGpuAddress()))) {
69 svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(globalVarBuffer->getGpuAddress()));
70 } else {
71 this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalVarBuffer);
72 }
73 }
74
75 if (this->debugData != nullptr) {
76 for (std::vector<char *>::iterator iter = alignedvIsas.begin(); iter != alignedvIsas.end(); ++iter) {
77 alignedFree(static_cast<void *>(*iter));
78 }
79 }
80 }
81
generateElfFromSpirV(std::vector<const char * > inputSpirVs,std::vector<uint32_t> inputModuleSizes)82 std::vector<uint8_t> ModuleTranslationUnit::generateElfFromSpirV(std::vector<const char *> inputSpirVs, std::vector<uint32_t> inputModuleSizes) {
83 NEO::Elf::ElfEncoder<> elfEncoder(true, false, 1U);
84 elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS;
85
86 StackVec<uint32_t, 64> specConstIds;
87 StackVec<uint64_t, 64> specConstValues;
88 for (uint32_t i = 0; i < static_cast<uint32_t>(inputSpirVs.size()); i++) {
89 if (specConstantsValues.size() > 0) {
90 specConstIds.clear();
91 specConstValues.clear();
92 specConstIds.reserve(specConstantsValues.size());
93 specConstValues.reserve(specConstantsValues.size());
94 for (const auto &specConst : specConstantsValues) {
95 specConstIds.push_back(specConst.first);
96 specConstValues.push_back(specConst.second);
97 }
98 elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_IDS, NEO::Elf::SectionNamesOpenCl::spirvSpecConstIds,
99 ArrayRef<const uint8_t>::fromAny(specConstIds.begin(), specConstIds.size()));
100 elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_VALUES, NEO::Elf::SectionNamesOpenCl::spirvSpecConstValues,
101 ArrayRef<const uint8_t>::fromAny(specConstValues.begin(), specConstValues.size()));
102 }
103
104 auto sectionType = NEO::Elf::SHT_OPENCL_SPIRV;
105 NEO::ConstStringRef sectionName = NEO::Elf::SectionNamesOpenCl::spirvObject;
106 elfEncoder.appendSection(sectionType, sectionName, ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(inputSpirVs[i]), inputModuleSizes[i]));
107 }
108
109 return elfEncoder.encode();
110 }
111
generateCompilerOptions(const char * buildOptions,const char * internalBuildOptions)112 std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOptions, const char *internalBuildOptions) {
113 if (nullptr != buildOptions) {
114 options = buildOptions;
115 }
116 std::string internalOptions = NEO::CompilerOptions::concatenate(internalBuildOptions, BuildOptions::hasBufferOffsetArg);
117
118 if (device->getNEODevice()->getDeviceInfo().debuggerActive) {
119 if (NEO::SourceLevelDebugger::shouldAppendOptDisable(*device->getSourceLevelDebugger())) {
120 NEO::CompilerOptions::concatenateAppend(options, BuildOptions::optDisable);
121 }
122
123 options = NEO::CompilerOptions::concatenate(options, NEO::CompilerOptions::generateDebugInfo);
124 internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable);
125 }
126
127 if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() ||
128 device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
129 internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired);
130 }
131
132 return internalOptions;
133 }
134
processSpecConstantInfo(NEO::CompilerInterface * compilerInterface,const ze_module_constants_t * pConstants,const char * input,uint32_t inputSize)135 bool ModuleTranslationUnit::processSpecConstantInfo(NEO::CompilerInterface *compilerInterface, const ze_module_constants_t *pConstants, const char *input, uint32_t inputSize) {
136 if (pConstants) {
137 NEO::SpecConstantInfo specConstInfo;
138 auto retVal = compilerInterface->getSpecConstantsInfo(*device->getNEODevice(), ArrayRef<const char>(input, inputSize), specConstInfo);
139 if (retVal != NEO::TranslationOutput::ErrorCode::Success) {
140 return false;
141 }
142 for (uint32_t i = 0; i < pConstants->numConstants; i++) {
143 uint64_t specConstantValue = 0;
144 uint32_t specConstantId = pConstants->pConstantIds[i];
145 auto atributeSize = 0u;
146 uint32_t j;
147 for (j = 0; j < specConstInfo.sizesBuffer->GetSize<uint32_t>(); j++) {
148 if (specConstantId == specConstInfo.idsBuffer->GetMemory<uint32_t>()[j]) {
149 atributeSize = specConstInfo.sizesBuffer->GetMemory<uint32_t>()[j];
150 break;
151 }
152 }
153 if (j == specConstInfo.sizesBuffer->GetSize<uint32_t>()) {
154 return false;
155 }
156 memcpy_s(&specConstantValue, sizeof(uint64_t),
157 const_cast<void *>(pConstants->pConstantValues[i]), atributeSize);
158 specConstantsValues[specConstantId] = specConstantValue;
159 }
160 }
161 return true;
162 }
163
compileGenBinary(NEO::TranslationInput inputArgs,bool staticLink)164 bool ModuleTranslationUnit::compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) {
165 auto compilerInterface = device->getNEODevice()->getCompilerInterface();
166 UNRECOVERABLE_IF(nullptr == compilerInterface);
167
168 inputArgs.specializedValues = this->specConstantsValues;
169
170 NEO::TranslationOutput compilerOuput = {};
171 NEO::TranslationOutput::ErrorCode compilerErr;
172
173 if (staticLink) {
174 compilerErr = compilerInterface->link(*device->getNEODevice(), inputArgs, compilerOuput);
175 } else {
176 compilerErr = compilerInterface->build(*device->getNEODevice(), inputArgs, compilerOuput);
177 }
178
179 this->updateBuildLog(compilerOuput.frontendCompilerLog);
180 this->updateBuildLog(compilerOuput.backendCompilerLog);
181
182 if (NEO::TranslationOutput::ErrorCode::Success != compilerErr) {
183 return false;
184 }
185
186 this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem);
187 this->irBinarySize = compilerOuput.intermediateRepresentation.size;
188 this->unpackedDeviceBinary = std::move(compilerOuput.deviceBinary.mem);
189 this->unpackedDeviceBinarySize = compilerOuput.deviceBinary.size;
190 this->debugData = std::move(compilerOuput.debugData.mem);
191 this->debugDataSize = compilerOuput.debugData.size;
192
193 return processUnpackedBinary();
194 }
195
staticLinkSpirV(std::vector<const char * > inputSpirVs,std::vector<uint32_t> inputModuleSizes,const char * buildOptions,const char * internalBuildOptions,std::vector<const ze_module_constants_t * > specConstants)196 bool ModuleTranslationUnit::staticLinkSpirV(std::vector<const char *> inputSpirVs, std::vector<uint32_t> inputModuleSizes, const char *buildOptions, const char *internalBuildOptions,
197 std::vector<const ze_module_constants_t *> specConstants) {
198 auto compilerInterface = device->getNEODevice()->getCompilerInterface();
199 UNRECOVERABLE_IF(nullptr == compilerInterface);
200
201 std::string internalOptions = this->generateCompilerOptions(buildOptions, internalBuildOptions);
202
203 for (uint32_t i = 0; i < static_cast<uint32_t>(specConstants.size()); i++) {
204 auto specConstantResult = this->processSpecConstantInfo(compilerInterface, specConstants[i], inputSpirVs[i], inputModuleSizes[i]);
205 if (!specConstantResult) {
206 return false;
207 }
208 }
209
210 NEO::TranslationInput linkInputArgs = {IGC::CodeType::elf, IGC::CodeType::oclGenBin};
211
212 auto spirvElfSource = generateElfFromSpirV(inputSpirVs, inputModuleSizes);
213
214 linkInputArgs.src = ArrayRef<const char>(reinterpret_cast<const char *>(spirvElfSource.data()), spirvElfSource.size());
215 linkInputArgs.apiOptions = ArrayRef<const char>(options.c_str(), options.length());
216 linkInputArgs.internalOptions = ArrayRef<const char>(internalOptions.c_str(), internalOptions.length());
217 return this->compileGenBinary(linkInputArgs, true);
218 }
219
buildFromSpirV(const char * input,uint32_t inputSize,const char * buildOptions,const char * internalBuildOptions,const ze_module_constants_t * pConstants)220 bool ModuleTranslationUnit::buildFromSpirV(const char *input, uint32_t inputSize, const char *buildOptions, const char *internalBuildOptions,
221 const ze_module_constants_t *pConstants) {
222 auto compilerInterface = device->getNEODevice()->getCompilerInterface();
223 UNRECOVERABLE_IF(nullptr == compilerInterface);
224
225 std::string internalOptions = this->generateCompilerOptions(buildOptions, internalBuildOptions);
226
227 auto specConstantResult = this->processSpecConstantInfo(compilerInterface, pConstants, input, inputSize);
228 if (!specConstantResult)
229 return false;
230
231 NEO::TranslationInput inputArgs = {IGC::CodeType::spirV, IGC::CodeType::oclGenBin};
232
233 inputArgs.src = ArrayRef<const char>(input, inputSize);
234 inputArgs.apiOptions = ArrayRef<const char>(options.c_str(), options.length());
235 inputArgs.internalOptions = ArrayRef<const char>(internalOptions.c_str(), internalOptions.length());
236 return this->compileGenBinary(inputArgs, false);
237 }
238
createFromNativeBinary(const char * input,size_t inputSize)239 bool ModuleTranslationUnit::createFromNativeBinary(const char *input, size_t inputSize) {
240 UNRECOVERABLE_IF((nullptr == device) || (nullptr == device->getNEODevice()));
241 auto productAbbreviation = NEO::hardwarePrefix[device->getNEODevice()->getHardwareInfo().platform.eProductFamily];
242
243 NEO::TargetDevice targetDevice = {};
244 targetDevice.coreFamily = device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
245 targetDevice.productFamily = device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
246 targetDevice.stepping = device->getNEODevice()->getHardwareInfo().platform.usRevId;
247 targetDevice.maxPointerSizeInBytes = sizeof(uintptr_t);
248 std::string decodeErrors;
249 std::string decodeWarnings;
250 ArrayRef<const uint8_t> archive(reinterpret_cast<const uint8_t *>(input), inputSize);
251 auto singleDeviceBinary = unpackSingleDeviceBinary(archive, NEO::ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice,
252 decodeErrors, decodeWarnings);
253 if (decodeWarnings.empty() == false) {
254 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str());
255 }
256
257 if (singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty()) {
258 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str());
259 return false;
260 } else {
261 this->irBinary = makeCopy(reinterpret_cast<const char *>(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size());
262 this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size();
263 this->options = singleDeviceBinary.buildOptions.str();
264
265 if (false == singleDeviceBinary.debugData.empty()) {
266 this->debugData = makeCopy(reinterpret_cast<const char *>(singleDeviceBinary.debugData.begin()), singleDeviceBinary.debugData.size());
267 this->debugDataSize = singleDeviceBinary.debugData.size();
268 }
269
270 bool rebuild = NEO::DebugManager.flags.RebuildPrecompiledKernels.get() && irBinarySize != 0;
271 if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == rebuild)) {
272 this->unpackedDeviceBinary = makeCopy<char>(reinterpret_cast<const char *>(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size());
273 this->unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size();
274 this->packedDeviceBinary = makeCopy<char>(reinterpret_cast<const char *>(archive.begin()), archive.size());
275 this->packedDeviceBinarySize = archive.size();
276 }
277 }
278
279 if (nullptr == this->unpackedDeviceBinary) {
280 if (!shouldSuppressRebuildWarning) {
281 updateBuildLog(NEO::CompilerWarnings::recompiledFromIr.str());
282 }
283
284 return buildFromSpirV(this->irBinary.get(), static_cast<uint32_t>(this->irBinarySize), this->options.c_str(), "", nullptr);
285 } else {
286 return processUnpackedBinary();
287 }
288 }
289
processUnpackedBinary()290 bool ModuleTranslationUnit::processUnpackedBinary() {
291 if (0 == unpackedDeviceBinarySize) {
292 return false;
293 }
294 auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize);
295 NEO::SingleDeviceBinary binary = {};
296 binary.deviceBinary = blob;
297 binary.targetDevice.grfSize = device->getHwInfo().capabilityTable.grfSize;
298 std::string decodeErrors;
299 std::string decodeWarnings;
300
301 NEO::DecodeError decodeError;
302 NEO::DeviceBinaryFormat singleDeviceBinaryFormat;
303 programInfo.levelZeroDynamicLinkProgram = true;
304 std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, binary, decodeErrors, decodeWarnings);
305 if (decodeWarnings.empty() == false) {
306 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str());
307 }
308
309 if (NEO::DecodeError::Success != decodeError) {
310 PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str());
311 return false;
312 }
313
314 if (programInfo.decodedElf.elfFileHeader) {
315 NEO::LinkerInput::SectionNameToSegmentIdMap nameToKernelId;
316
317 uint32_t id = 0;
318 for (auto &kernelInfo : this->programInfo.kernelInfos) {
319 nameToKernelId[kernelInfo->kernelDescriptor.kernelMetadata.kernelName] = id;
320 id++;
321 }
322 programInfo.prepareLinkerInputStorage();
323 programInfo.linkerInput->undefinedSymbolsAllowed = programInfo.levelZeroDynamicLinkProgram;
324 programInfo.linkerInput->decodeElfSymbolTableAndRelocations(programInfo.decodedElf, nameToKernelId);
325 }
326
327 processDebugData();
328
329 size_t slmNeeded = NEO::getMaxInlineSlmNeeded(programInfo);
330 size_t slmAvailable = 0U;
331 NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants;
332 slmAvailable = static_cast<size_t>(device->getDeviceInfo().localMemSize);
333 deviceInfoConstants.maxWorkGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
334 deviceInfoConstants.computeUnitsUsedForScratch = static_cast<uint32_t>(device->getDeviceInfo().computeUnitsUsedForScratch);
335 deviceInfoConstants.slmWindowSize = static_cast<uint32_t>(device->getDeviceInfo().localMemSize);
336 if (NEO::requiresLocalMemoryWindowVA(programInfo)) {
337 deviceInfoConstants.slmWindow = device->getNEODevice()->getExecutionEnvironment()->memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment);
338 }
339
340 if (slmNeeded > slmAvailable) {
341 return false;
342 }
343
344 auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
345 if (programInfo.globalConstants.size != 0) {
346 this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalConstants.size, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData);
347 }
348
349 if (programInfo.globalVariables.size != 0) {
350 this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalVariables.size, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData);
351 }
352
353 for (auto &kernelInfo : this->programInfo.kernelInfos) {
354 kernelInfo->apply(deviceInfoConstants);
355 }
356
357 auto gfxCore = device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
358 auto stepping = device->getNEODevice()->getHardwareInfo().platform.usRevId;
359
360 if (this->packedDeviceBinary != nullptr) {
361 return true;
362 }
363
364 NEO::SingleDeviceBinary singleDeviceBinary;
365 singleDeviceBinary.buildOptions = this->options;
366 singleDeviceBinary.targetDevice.coreFamily = gfxCore;
367 singleDeviceBinary.targetDevice.stepping = stepping;
368 singleDeviceBinary.deviceBinary = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize);
369 singleDeviceBinary.intermediateRepresentation = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->irBinary.get()), this->irBinarySize);
370 singleDeviceBinary.debugData = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->debugData.get()), this->debugDataSize);
371 std::string packWarnings;
372 std::string packErrors;
373 auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings);
374 if (packedDeviceBinary.empty()) {
375 DEBUG_BREAK_IF(true);
376 return false;
377 }
378 this->packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size());
379 this->packedDeviceBinarySize = packedDeviceBinary.size();
380
381 return true;
382 }
383
updateBuildLog(const std::string & newLogEntry)384 void ModuleTranslationUnit::updateBuildLog(const std::string &newLogEntry) {
385 if (newLogEntry.empty() || ('\0' == newLogEntry[0])) {
386 return;
387 }
388
389 buildLog += newLogEntry.c_str();
390 if ('\n' != *buildLog.rbegin()) {
391 buildLog.append("\n");
392 }
393 }
394
processDebugData()395 void ModuleTranslationUnit::processDebugData() {
396 if (this->debugData != nullptr) {
397 iOpenCL::SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast<iOpenCL::SProgramDebugDataHeaderIGC *>(debugData.get());
398
399 DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != programInfo.kernelInfos.size());
400
401 const iOpenCL::SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast<iOpenCL::SKernelDebugDataHeaderIGC *>(
402 ptrOffset(programDebugHeader, sizeof(iOpenCL::SProgramDebugDataHeaderIGC)));
403
404 const char *kernelName = nullptr;
405 const char *kernelDebugData = nullptr;
406
407 for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) {
408 kernelName = reinterpret_cast<const char *>(ptrOffset(kernelDebugHeader, sizeof(iOpenCL::SKernelDebugDataHeaderIGC)));
409
410 auto kernelInfo = programInfo.kernelInfos[i];
411 UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelMetadata.kernelName.compare(0, kernelInfo->kernelDescriptor.kernelMetadata.kernelName.size(), kernelName) != 0);
412
413 kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize);
414
415 kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
416
417 char *alignedAlloc = static_cast<char *>(alignedMalloc(kernelDebugHeader->SizeVisaDbgInBytes, MemoryConstants::pageSize));
418 memcpy_s(static_cast<void *>(alignedAlloc), kernelDebugHeader->SizeVisaDbgInBytes, kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes);
419
420 kernelInfo->kernelDescriptor.external.debugData->vIsa = alignedAlloc;
421 kernelInfo->kernelDescriptor.external.debugData->genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes);
422 kernelInfo->kernelDescriptor.external.debugData->vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes;
423 kernelInfo->kernelDescriptor.external.debugData->genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes;
424
425 kernelDebugData = ptrOffset(kernelDebugData, static_cast<size_t>(kernelDebugHeader->SizeVisaDbgInBytes) + kernelDebugHeader->SizeGenIsaDbgInBytes);
426 kernelDebugHeader = reinterpret_cast<const iOpenCL::SKernelDebugDataHeaderIGC *>(kernelDebugData);
427 alignedvIsas.push_back(alignedAlloc);
428 }
429 }
430 }
431
ModuleImp(Device * device,ModuleBuildLog * moduleBuildLog,ModuleType type)432 ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
433 : device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
434 moduleBuildLog(moduleBuildLog), type(type) {
435 productFamily = device->getHwInfo().platform.eProductFamily;
436 }
437
~ModuleImp()438 ModuleImp::~ModuleImp() {
439 kernelImmDatas.clear();
440 }
441
getZebinSegments()442 NEO::Debug::Segments ModuleImp::getZebinSegments() {
443 NEO::Debug::Segments segments;
444
445 auto varBuffer = translationUnit->globalVarBuffer;
446 if (varBuffer) {
447 segments.varData = {varBuffer->getGpuAddressToPatch(), {reinterpret_cast<uint8_t *>(varBuffer->getUnderlyingBuffer()), varBuffer->getUnderlyingBufferSize()}};
448 }
449
450 auto constBuffer = translationUnit->globalConstBuffer;
451 if (constBuffer) {
452 segments.constData = {constBuffer->getGpuAddressToPatch(), {reinterpret_cast<uint8_t *>(constBuffer->getUnderlyingBuffer()), constBuffer->getUnderlyingBufferSize()}};
453 }
454
455 auto stringBuffer = translationUnit->programInfo.globalStrings;
456 if (stringBuffer.initData) {
457 segments.stringData = {reinterpret_cast<uintptr_t>(stringBuffer.initData),
458 {reinterpret_cast<const uint8_t *>(stringBuffer.initData), stringBuffer.size}};
459 }
460
461 for (auto &kernImmData : this->kernelImmDatas) {
462 const auto &isa = kernImmData->getIsaGraphicsAllocation();
463 NEO::Debug::Segments::Segment kernelSegment = {isa->getGpuAddressToPatch(), {reinterpret_cast<uint8_t *>(isa->getUnderlyingBuffer()), isa->getUnderlyingBufferSize()}};
464 segments.nameToSegMap.insert(std::pair(kernImmData->getDescriptor().kernelMetadata.kernelName, kernelSegment));
465 }
466
467 return segments;
468 }
469
initialize(const ze_module_desc_t * desc,NEO::Device * neoDevice)470 bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) {
471 bool success = true;
472
473 std::string buildOptions;
474 std::string internalBuildOptions;
475
476 if (desc->pNext) {
477 const ze_base_desc_t *expDesc = reinterpret_cast<const ze_base_desc_t *>(desc->pNext);
478 if (expDesc->stype == ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC) {
479 if (desc->format != ZE_MODULE_FORMAT_IL_SPIRV) {
480 return false;
481 }
482 const ze_module_program_exp_desc_t *programExpDesc =
483 reinterpret_cast<const ze_module_program_exp_desc_t *>(expDesc);
484 std::vector<const char *> inputSpirVs;
485 std::vector<uint32_t> inputModuleSizes;
486 std::vector<const ze_module_constants_t *> specConstants;
487
488 this->createBuildOptions(nullptr, buildOptions, internalBuildOptions);
489
490 for (uint32_t i = 0; i < static_cast<uint32_t>(programExpDesc->count); i++) {
491 std::string tmpBuildOptions;
492 std::string tmpInternalBuildOptions;
493 inputSpirVs.push_back(reinterpret_cast<const char *>(programExpDesc->pInputModules[i]));
494 auto inputSizesInfo = const_cast<size_t *>(programExpDesc->inputSizes);
495 uint32_t inputSize = static_cast<uint32_t>(inputSizesInfo[i]);
496 inputModuleSizes.push_back(inputSize);
497 if (programExpDesc->pConstants) {
498 specConstants.push_back(programExpDesc->pConstants[i]);
499 }
500 if (programExpDesc->pBuildFlags) {
501 this->createBuildOptions(programExpDesc->pBuildFlags[i], tmpBuildOptions, tmpInternalBuildOptions);
502 buildOptions = buildOptions + tmpBuildOptions;
503 internalBuildOptions = internalBuildOptions + tmpInternalBuildOptions;
504 }
505 }
506
507 success = this->translationUnit->staticLinkSpirV(inputSpirVs,
508 inputModuleSizes,
509 buildOptions.c_str(),
510 internalBuildOptions.c_str(),
511 specConstants);
512 } else {
513 return false;
514 }
515 } else {
516 std::string buildFlagsInput{desc->pBuildFlags != nullptr ? desc->pBuildFlags : ""};
517 this->translationUnit->shouldSuppressRebuildWarning = NEO::CompilerOptions::extract(NEO::CompilerOptions::noRecompiledFromIr, buildFlagsInput);
518 this->createBuildOptions(buildFlagsInput.c_str(), buildOptions, internalBuildOptions);
519
520 if (type == ModuleType::User && NEO::DebugManager.flags.InjectInternalBuildOptions.get() != "unk") {
521 NEO::CompilerOptions::concatenateAppend(internalBuildOptions, NEO::DebugManager.flags.InjectInternalBuildOptions.get());
522 }
523
524 if (desc->format == ZE_MODULE_FORMAT_NATIVE) {
525 success = this->translationUnit->createFromNativeBinary(
526 reinterpret_cast<const char *>(desc->pInputModule), desc->inputSize);
527 } else if (desc->format == ZE_MODULE_FORMAT_IL_SPIRV) {
528 success = this->translationUnit->buildFromSpirV(reinterpret_cast<const char *>(desc->pInputModule),
529 static_cast<uint32_t>(desc->inputSize),
530 buildOptions.c_str(),
531 internalBuildOptions.c_str(),
532 desc->pConstants);
533 } else {
534 return false;
535 }
536 }
537
538 this->updateBuildLog(neoDevice);
539 verifyDebugCapabilities();
540
541 if (false == success) {
542 return false;
543 }
544
545 kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size());
546 for (auto &ki : this->translationUnit->programInfo.kernelInfos) {
547 std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device)};
548 kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
549 this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
550 this->type == ModuleType::Builtin);
551 kernelImmDatas.push_back(std::move(kernelImmData));
552 }
553 this->maxGroupSize = static_cast<uint32_t>(this->translationUnit->device->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
554
555 checkIfPrivateMemoryPerDispatchIsNeeded();
556
557 success = this->linkBinary();
558
559 if (debugEnabled) {
560 passDebugData();
561 }
562
563 auto &hwInfo = neoDevice->getHardwareInfo();
564 auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
565
566 if (this->isFullyLinked) {
567 for (auto &ki : kernelImmDatas) {
568
569 if (this->type == ModuleType::User && !ki->isIsaCopiedToAllocation()) {
570
571 NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *ki->getIsaGraphicsAllocation()),
572 *neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap,
573 static_cast<size_t>(ki->getKernelInfo()->heapInfo.KernelHeapSize));
574
575 ki->setIsaCopiedToAllocation();
576 }
577
578 if (device->getL0Debugger()) {
579 NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
580 if (memoryOperationsIface) {
581 auto allocation = ki->getIsaGraphicsAllocation();
582 memoryOperationsIface->makeResident(neoDevice, ArrayRef<NEO::GraphicsAllocation *>(&allocation, 1));
583 }
584 }
585 }
586 }
587 return success;
588 }
589
createDebugZebin()590 void ModuleImp::createDebugZebin() {
591 auto refBin = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize);
592 auto segments = getZebinSegments();
593 auto debugZebin = NEO::Debug::createDebugZebin(refBin, segments);
594
595 translationUnit->debugDataSize = debugZebin.size();
596 translationUnit->debugData.reset(new char[translationUnit->debugDataSize]);
597 memcpy_s(translationUnit->debugData.get(), translationUnit->debugDataSize,
598 debugZebin.data(), debugZebin.size());
599 }
600
passDebugData()601 void ModuleImp::passDebugData() {
602 auto refBin = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize);
603 if (NEO::isDeviceBinaryFormat<NEO::DeviceBinaryFormat::Zebin>(refBin)) {
604 createDebugZebin();
605 if (device->getSourceLevelDebugger()) {
606 NEO::DebugData debugData; // pass debug zebin in vIsa field
607 debugData.vIsa = reinterpret_cast<const char *>(translationUnit->debugData.get());
608 debugData.vIsaSize = static_cast<uint32_t>(translationUnit->debugDataSize);
609 device->getSourceLevelDebugger()->notifyKernelDebugData(&debugData, "debug_zebin", nullptr, 0);
610 }
611 } else {
612 if (device->getSourceLevelDebugger()) {
613 for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) {
614 NEO::DebugData *notifyDebugData = kernelInfo->kernelDescriptor.external.debugData.get();
615 NEO::DebugData relocatedDebugData;
616
617 if (kernelInfo->kernelDescriptor.external.relocatedDebugData.get()) {
618 relocatedDebugData.genIsa = kernelInfo->kernelDescriptor.external.debugData->genIsa;
619 relocatedDebugData.genIsaSize = kernelInfo->kernelDescriptor.external.debugData->genIsaSize;
620 relocatedDebugData.vIsa = reinterpret_cast<char *>(kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
621 relocatedDebugData.vIsaSize = kernelInfo->kernelDescriptor.external.debugData->vIsaSize;
622 notifyDebugData = &relocatedDebugData;
623 }
624
625 device->getSourceLevelDebugger()->notifyKernelDebugData(notifyDebugData,
626 kernelInfo->kernelDescriptor.kernelMetadata.kernelName,
627 kernelInfo->heapInfo.pKernelHeap,
628 kernelInfo->heapInfo.KernelHeapSize);
629 }
630 }
631 }
632 }
633
getKernelImmutableData(const char * functionName) const634 const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const {
635 for (auto &kernelImmData : kernelImmDatas) {
636 if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) {
637 return kernelImmData.get();
638 }
639 }
640 return nullptr;
641 }
642
createBuildOptions(const char * pBuildFlags,std::string & apiOptions,std::string & internalBuildOptions)643 void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) {
644 if (pBuildFlags != nullptr) {
645 std::string buildFlags(pBuildFlags);
646
647 apiOptions = pBuildFlags;
648 moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optDisable, BuildOptions::optDisable);
649 moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optLevel, BuildOptions::optLevel);
650 moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired, BuildOptions::greaterThan4GbRequired);
651 moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::allowZebin, NEO::CompilerOptions::allowZebin);
652
653 createBuildExtraOptions(apiOptions, internalBuildOptions);
654 }
655 if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {
656 NEO::CompilerOptions::concatenateAppend(internalBuildOptions, NEO::CompilerOptions::bindlessMode.str());
657 }
658 }
659
updateBuildLog(NEO::Device * neoDevice)660 void ModuleImp::updateBuildLog(NEO::Device *neoDevice) {
661 if (this->moduleBuildLog) {
662 moduleBuildLog->appendString(this->translationUnit->buildLog.c_str(), this->translationUnit->buildLog.size());
663 }
664 }
665
createKernel(const ze_kernel_desc_t * desc,ze_kernel_handle_t * phFunction)666 ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
667 ze_kernel_handle_t *phFunction) {
668 ze_result_t res;
669 if (!isFullyLinked) {
670 return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED;
671 }
672 auto kernel = Kernel::create(productFamily, this, desc, &res);
673
674 if (res == ZE_RESULT_SUCCESS) {
675 *phFunction = kernel->toHandle();
676 }
677
678 return res;
679 }
680
getNativeBinary(size_t * pSize,uint8_t * pModuleNativeBinary)681 ze_result_t ModuleImp::getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) {
682 auto genBinary = this->translationUnit->packedDeviceBinary.get();
683
684 *pSize = this->translationUnit->packedDeviceBinarySize;
685 if (pModuleNativeBinary != nullptr) {
686 memcpy_s(pModuleNativeBinary, this->translationUnit->packedDeviceBinarySize, genBinary, this->translationUnit->packedDeviceBinarySize);
687 }
688 return ZE_RESULT_SUCCESS;
689 }
690
getDebugInfo(size_t * pDebugDataSize,uint8_t * pDebugData)691 ze_result_t ModuleImp::getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) {
692 if (translationUnit == nullptr) {
693 return ZE_RESULT_ERROR_UNINITIALIZED;
694 }
695 auto refBin = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize);
696 if (nullptr == translationUnit->debugData.get() && NEO::isDeviceBinaryFormat<NEO::DeviceBinaryFormat::Zebin>(refBin)) {
697 createDebugZebin();
698 }
699 if (pDebugData != nullptr) {
700 if (*pDebugDataSize < translationUnit->debugDataSize) {
701 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
702 }
703 memcpy_s(pDebugData, *pDebugDataSize, translationUnit->debugData.get(), translationUnit->debugDataSize);
704 }
705 *pDebugDataSize = translationUnit->debugDataSize;
706 return ZE_RESULT_SUCCESS;
707 }
708
copyPatchedSegments(const NEO::Linker::PatchableSegments & isaSegmentsForPatching)709 void ModuleImp::copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching) {
710 if (this->translationUnit->programInfo.linkerInput && this->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
711 for (auto &kernelImmData : this->kernelImmDatas) {
712 if (nullptr == kernelImmData->getIsaGraphicsAllocation()) {
713 continue;
714 }
715
716 UNRECOVERABLE_IF(kernelImmData->isIsaCopiedToAllocation());
717
718 kernelImmData->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
719 kernelImmData->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
720 auto segmentId = &kernelImmData - &this->kernelImmDatas[0];
721 this->device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(kernelImmData->getIsaGraphicsAllocation(), 0,
722 isaSegmentsForPatching[segmentId].hostPointer,
723 isaSegmentsForPatching[segmentId].segmentSize);
724
725 kernelImmData->setIsaCopiedToAllocation();
726 }
727 }
728 }
729
linkBinary()730 bool ModuleImp::linkBinary() {
731 using namespace NEO;
732 auto linkerInput = this->translationUnit->programInfo.linkerInput.get();
733 if (linkerInput == nullptr) {
734 isFullyLinked = true;
735 return true;
736 }
737 Linker linker(*linkerInput);
738 Linker::SegmentInfo globals;
739 Linker::SegmentInfo constants;
740 Linker::SegmentInfo exportedFunctions;
741 Linker::SegmentInfo strings;
742 GraphicsAllocation *globalsForPatching = translationUnit->globalVarBuffer;
743 GraphicsAllocation *constantsForPatching = translationUnit->globalConstBuffer;
744 if (globalsForPatching != nullptr) {
745 globals.gpuAddress = static_cast<uintptr_t>(globalsForPatching->getGpuAddress());
746 globals.segmentSize = globalsForPatching->getUnderlyingBufferSize();
747 }
748 if (constantsForPatching != nullptr) {
749 constants.gpuAddress = static_cast<uintptr_t>(constantsForPatching->getGpuAddress());
750 constants.segmentSize = constantsForPatching->getUnderlyingBufferSize();
751 }
752 if (translationUnit->programInfo.globalStrings.initData != nullptr) {
753 strings.gpuAddress = reinterpret_cast<uintptr_t>(translationUnit->programInfo.globalStrings.initData);
754 strings.segmentSize = translationUnit->programInfo.globalStrings.size;
755 }
756 if (linkerInput->getExportedFunctionsSegmentId() >= 0) {
757 auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
758 this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation();
759 exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch());
760 exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize();
761 }
762 Linker::PatchableSegments isaSegmentsForPatching;
763 std::vector<std::vector<char>> patchedIsaTempStorage;
764 Linker::KernelDescriptorsT kernelDescriptors;
765 if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
766 patchedIsaTempStorage.reserve(this->kernelImmDatas.size());
767 kernelDescriptors.reserve(this->kernelImmDatas.size());
768 for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) {
769 auto &kernHeapInfo = kernelInfo->heapInfo;
770 const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
771 patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize));
772 isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize});
773 kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
774 }
775 }
776
777 auto linkStatus = linker.link(globals, constants, exportedFunctions, strings,
778 globalsForPatching, constantsForPatching,
779 isaSegmentsForPatching, unresolvedExternalsInfo, this->device->getNEODevice(),
780 translationUnit->programInfo.globalConstants.initData,
781 translationUnit->programInfo.globalVariables.initData, kernelDescriptors);
782 this->symbols = linker.extractRelocatedSymbols();
783 if (LinkingStatus::LinkedFully != linkStatus) {
784 if (moduleBuildLog) {
785 std::vector<std::string> kernelNames;
786 for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) {
787 kernelNames.push_back("kernel : " + kernelInfo->kernelDescriptor.kernelMetadata.kernelName);
788 }
789 auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames);
790 moduleBuildLog->appendString(error.c_str(), error.size());
791 }
792 isFullyLinked = false;
793 return LinkingStatus::LinkedPartially == linkStatus;
794 } else if (type != ModuleType::Builtin) {
795 copyPatchedSegments(isaSegmentsForPatching);
796 }
797 DBG_LOG(PrintRelocations, NEO::constructRelocationsDebugMessage(this->symbols));
798 isFullyLinked = true;
799 for (auto kernelId = 0u; kernelId < kernelImmDatas.size(); kernelId++) {
800 auto &kernImmData = kernelImmDatas[kernelId];
801
802 kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() +
803 ((this->exportedFunctionsSurface != nullptr) ? 1 : 0) + this->importedSymbolAllocations.size());
804
805 if (nullptr != this->exportedFunctionsSurface) {
806 kernImmData->getResidencyContainer().push_back(this->exportedFunctionsSurface);
807 }
808 kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), this->importedSymbolAllocations.begin(),
809 this->importedSymbolAllocations.end());
810 }
811 return true;
812 }
813
getFunctionPointer(const char * pFunctionName,void ** pfnFunction)814 ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnFunction) {
815 auto symbolIt = symbols.find(pFunctionName);
816 if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) {
817 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
818 }
819
820 *pfnFunction = reinterpret_cast<void *>(symbolIt->second.gpuAddress);
821 return ZE_RESULT_SUCCESS;
822 }
823
getGlobalPointer(const char * pGlobalName,size_t * pSize,void ** pPtr)824 ze_result_t ModuleImp::getGlobalPointer(const char *pGlobalName, size_t *pSize, void **pPtr) {
825 auto symbolIt = symbols.find(pGlobalName);
826 if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) {
827 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
828 }
829 if (pPtr) {
830 *pPtr = reinterpret_cast<void *>(symbolIt->second.gpuAddress);
831 }
832 if (pSize) {
833 *pSize = symbolIt->second.symbol.size;
834 }
835 return ZE_RESULT_SUCCESS;
836 }
837
create(Device * device,const ze_module_desc_t * desc,ModuleBuildLog * moduleBuildLog,ModuleType type)838 Module *Module::create(Device *device, const ze_module_desc_t *desc,
839 ModuleBuildLog *moduleBuildLog, ModuleType type) {
840 auto module = new ModuleImp(device, moduleBuildLog, type);
841
842 bool success = module->initialize(desc, device->getNEODevice());
843 if (success == false) {
844 module->destroy();
845 return nullptr;
846 }
847
848 return module;
849 }
850
getKernelNames(uint32_t * pCount,const char ** pNames)851 ze_result_t ModuleImp::getKernelNames(uint32_t *pCount, const char **pNames) {
852 auto &kernelImmDatas = this->getKernelImmutableDataVector();
853 if (*pCount == 0) {
854 *pCount = static_cast<uint32_t>(kernelImmDatas.size());
855 return ZE_RESULT_SUCCESS;
856 }
857
858 if (*pCount > static_cast<uint32_t>(kernelImmDatas.size())) {
859 *pCount = static_cast<uint32_t>(kernelImmDatas.size());
860 }
861
862 uint32_t outCount = 0;
863 for (auto &kernelImmData : kernelImmDatas) {
864 *(pNames + outCount) = kernelImmData->getDescriptor().kernelMetadata.kernelName.c_str();
865 outCount++;
866 if (outCount == *pCount) {
867 break;
868 }
869 }
870
871 return ZE_RESULT_SUCCESS;
872 }
873
isDebugEnabled() const874 bool ModuleImp::isDebugEnabled() const {
875 return debugEnabled;
876 }
877
verifyDebugCapabilities()878 void ModuleImp::verifyDebugCapabilities() {
879 bool debugCapabilities = device->getNEODevice()->getDebugger() != nullptr;
880
881 if (debugCapabilities) {
882 //verify all kernels are debuggable
883 for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) {
884 bool systemThreadSurfaceAvailable = NEO::isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful) ||
885 NEO::isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindless);
886
887 debugCapabilities &= systemThreadSurfaceAvailable;
888 }
889 }
890 debugEnabled = debugCapabilities;
891 }
892
checkIfPrivateMemoryPerDispatchIsNeeded()893 void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() {
894 size_t modulePrivateMemorySize = 0;
895 for (auto &kernelImmData : this->kernelImmDatas) {
896 if (0 == kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) {
897 continue;
898 }
899 auto kernelPrivateMemorySize = NEO::KernelHelper::getPrivateSurfaceSize(kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize,
900 this->device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch);
901 modulePrivateMemorySize += kernelPrivateMemorySize;
902 }
903
904 this->allocatePrivateMemoryPerDispatch = false;
905 if (modulePrivateMemorySize > 0U) {
906 auto globalMemorySize = device->getNEODevice()->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(device->getNEODevice()->getDeviceBitfield().to_ulong()));
907 this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize > globalMemorySize;
908 }
909 }
910
getProperties(ze_module_properties_t * pModuleProperties)911 ze_result_t ModuleImp::getProperties(ze_module_properties_t *pModuleProperties) {
912
913 pModuleProperties->flags = 0;
914
915 if (!unresolvedExternalsInfo.empty()) {
916 pModuleProperties->flags |= ZE_MODULE_PROPERTY_FLAG_IMPORTS;
917 }
918
919 return ZE_RESULT_SUCCESS;
920 }
921
performDynamicLink(uint32_t numModules,ze_module_handle_t * phModules,ze_module_build_log_handle_t * phLinkLog)922 ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
923 ze_module_handle_t *phModules,
924 ze_module_build_log_handle_t *phLinkLog) {
925 ModuleBuildLog *moduleLinkLog = nullptr;
926 if (phLinkLog) {
927 moduleLinkLog = ModuleBuildLog::create();
928 *phLinkLog = moduleLinkLog->toHandle();
929 }
930 for (auto i = 0u; i < numModules; i++) {
931 auto moduleId = static_cast<ModuleImp *>(Module::fromHandle(phModules[i]));
932 if (moduleId->isFullyLinked) {
933 continue;
934 }
935 NEO::Linker::PatchableSegments isaSegmentsForPatching;
936 std::vector<std::vector<char>> patchedIsaTempStorage;
937 uint32_t numPatchedSymbols = 0u;
938 std::vector<std::string> unresolvedSymbolLogMessages;
939 if (moduleId->translationUnit->programInfo.linkerInput && moduleId->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
940 patchedIsaTempStorage.reserve(moduleId->kernelImmDatas.size());
941 for (const auto &kernelInfo : moduleId->translationUnit->programInfo.kernelInfos) {
942 auto &kernHeapInfo = kernelInfo->heapInfo;
943 const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
944 patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize));
945 isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize});
946 }
947 for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) {
948 if (moduleLinkLog) {
949 std::stringstream logMessage;
950 logMessage << "Module <" << moduleId << ">: "
951 << " Unresolved Symbol <" << unresolvedExternal.unresolvedRelocation.symbolName << ">";
952 unresolvedSymbolLogMessages.push_back(logMessage.str());
953 }
954 for (auto i = 0u; i < numModules; i++) {
955 auto moduleHandle = static_cast<ModuleImp *>(Module::fromHandle(phModules[i]));
956 auto symbolIt = moduleHandle->symbols.find(unresolvedExternal.unresolvedRelocation.symbolName);
957 if (symbolIt != moduleHandle->symbols.end()) {
958 auto relocAddress = ptrOffset(isaSegmentsForPatching[unresolvedExternal.instructionsSegmentId].hostPointer,
959 static_cast<uintptr_t>(unresolvedExternal.unresolvedRelocation.offset));
960
961 NEO::Linker::patchAddress(relocAddress, symbolIt->second, unresolvedExternal.unresolvedRelocation);
962 numPatchedSymbols++;
963 moduleId->importedSymbolAllocations.insert(moduleHandle->exportedFunctionsSurface);
964
965 if (moduleLinkLog) {
966 std::stringstream logMessage;
967 logMessage << " Successfully Resolved Thru Dynamic Link to Module <" << moduleHandle << ">";
968 unresolvedSymbolLogMessages.back().append(logMessage.str());
969 }
970
971 // Apply the exported functions surface state from the export module to the import module if it exists.
972 // Enables import modules to access the exported functions during kernel execution.
973 for (auto &kernImmData : moduleId->kernelImmDatas) {
974 kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() +
975 ((moduleHandle->exportedFunctionsSurface != nullptr) ? 1 : 0) + moduleId->importedSymbolAllocations.size());
976
977 if (nullptr != moduleHandle->exportedFunctionsSurface) {
978 kernImmData->getResidencyContainer().push_back(moduleHandle->exportedFunctionsSurface);
979 }
980 kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), moduleId->importedSymbolAllocations.begin(),
981 moduleId->importedSymbolAllocations.end());
982 }
983 break;
984 }
985 }
986 }
987 }
988 if (moduleLinkLog) {
989 for (int i = 0; i < (int)unresolvedSymbolLogMessages.size(); i++) {
990 moduleLinkLog->appendString(unresolvedSymbolLogMessages[i].c_str(), unresolvedSymbolLogMessages[i].size());
991 }
992 }
993 if (numPatchedSymbols != moduleId->unresolvedExternalsInfo.size()) {
994 return ZE_RESULT_ERROR_MODULE_LINK_FAILURE;
995 }
996 moduleId->copyPatchedSegments(isaSegmentsForPatching);
997 moduleId->isFullyLinked = true;
998 }
999 return ZE_RESULT_SUCCESS;
1000 }
1001
moveBuildOption(std::string & dstOptionsSet,std::string & srcOptionSet,NEO::ConstStringRef dstOptionName,NEO::ConstStringRef srcOptionName)1002 bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO::ConstStringRef dstOptionName, NEO::ConstStringRef srcOptionName) {
1003 const char optDelim = ' ';
1004 const char valDelim = '=';
1005
1006 auto optInSrcPos = srcOptionSet.find(srcOptionName.begin());
1007 if (std::string::npos == optInSrcPos) {
1008 return false;
1009 }
1010
1011 std::string dstOptionStr(dstOptionName);
1012 auto optInSrcEndPos = srcOptionSet.find(optDelim, optInSrcPos);
1013 if (srcOptionName == BuildOptions::optLevel) {
1014 auto valInSrcPos = srcOptionSet.find(valDelim, optInSrcPos);
1015 if (std::string::npos == valInSrcPos) {
1016 return false;
1017 }
1018 dstOptionStr += srcOptionSet.substr(valInSrcPos + 1, optInSrcEndPos);
1019 }
1020 srcOptionSet.erase(optInSrcPos, (optInSrcEndPos - optInSrcPos));
1021 NEO::CompilerOptions::concatenateAppend(dstOptionsSet, dstOptionStr);
1022 return true;
1023 }
1024
1025 } // namespace L0
1026