1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "level_zero/core/source/device/device_imp.h"
9 
10 #include "shared/source/built_ins/sip.h"
11 #include "shared/source/command_container/implicit_scaling.h"
12 #include "shared/source/debug_settings/debug_settings_manager.h"
13 #include "shared/source/device/device.h"
14 #include "shared/source/device/device_info.h"
15 #include "shared/source/device/sub_device.h"
16 #include "shared/source/execution_environment/execution_environment.h"
17 #include "shared/source/execution_environment/root_device_environment.h"
18 #include "shared/source/gmm_helper/gmm_helper.h"
19 #include "shared/source/helpers/common_types.h"
20 #include "shared/source/helpers/constants.h"
21 #include "shared/source/helpers/engine_node_helper.h"
22 #include "shared/source/helpers/hw_helper.h"
23 #include "shared/source/helpers/string.h"
24 #include "shared/source/helpers/topology_map.h"
25 #include "shared/source/kernel/grf_config.h"
26 #include "shared/source/memory_manager/memory_manager.h"
27 #include "shared/source/os_interface/hw_info_config.h"
28 #include "shared/source/os_interface/os_interface.h"
29 #include "shared/source/os_interface/os_time.h"
30 #include "shared/source/source_level_debugger/source_level_debugger.h"
31 #include "shared/source/utilities/debug_settings_reader_creator.h"
32 
33 #include "level_zero/core/source/builtin/builtin_functions_lib.h"
34 #include "level_zero/core/source/cache/cache_reservation.h"
35 #include "level_zero/core/source/cmdlist/cmdlist.h"
36 #include "level_zero/core/source/cmdqueue/cmdqueue.h"
37 #include "level_zero/core/source/context/context_imp.h"
38 #include "level_zero/core/source/driver/driver_handle_imp.h"
39 #include "level_zero/core/source/event/event.h"
40 #include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
41 #include "level_zero/core/source/image/image.h"
42 #include "level_zero/core/source/module/module.h"
43 #include "level_zero/core/source/printf_handler/printf_handler.h"
44 #include "level_zero/core/source/sampler/sampler.h"
45 #include "level_zero/tools/source/debug/debug_session.h"
46 #include "level_zero/tools/source/metrics/metric.h"
47 #include "level_zero/tools/source/sysman/sysman.h"
48 
49 namespace NEO {
50 bool releaseFP64Override();
51 } // namespace NEO
52 
53 namespace L0 {
54 
getDriverHandle()55 DriverHandle *DeviceImp::getDriverHandle() {
56     return this->driverHandle;
57 }
58 
setDriverHandle(DriverHandle * driverHandle)59 void DeviceImp::setDriverHandle(DriverHandle *driverHandle) {
60     this->driverHandle = driverHandle;
61 }
62 
canAccessPeer(ze_device_handle_t hPeerDevice,ze_bool_t * value)63 ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) {
64     *value = false;
65 
66     DeviceImp *pPeerDevice = static_cast<DeviceImp *>(Device::fromHandle(hPeerDevice));
67     uint32_t peerRootDeviceIndex = pPeerDevice->getNEODevice()->getRootDeviceIndex();
68 
69     if (this->crossAccessEnabledDevices.find(peerRootDeviceIndex) != this->crossAccessEnabledDevices.end()) {
70         *value = this->crossAccessEnabledDevices[peerRootDeviceIndex];
71     } else if (this->getNEODevice()->getRootDeviceIndex() == peerRootDeviceIndex) {
72         *value = true;
73     } else {
74         ze_command_list_handle_t commandList = nullptr;
75         ze_command_list_desc_t listDescriptor = {};
76         listDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
77         listDescriptor.pNext = nullptr;
78         listDescriptor.flags = 0;
79         listDescriptor.commandQueueGroupOrdinal = 0;
80 
81         ze_command_queue_handle_t commandQueue = nullptr;
82         ze_command_queue_desc_t queueDescriptor = {};
83         queueDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
84         queueDescriptor.pNext = nullptr;
85         queueDescriptor.flags = 0;
86         queueDescriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;
87         queueDescriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
88         queueDescriptor.ordinal = 0;
89         queueDescriptor.index = 0;
90 
91         this->createCommandList(&listDescriptor, &commandList);
92         this->createCommandQueue(&queueDescriptor, &commandQueue);
93 
94         auto driverHandle = this->getDriverHandle();
95         DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(driverHandle);
96 
97         ze_context_handle_t context;
98         ze_context_desc_t contextDesc = {};
99         contextDesc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC;
100         driverHandleImp->createContext(&contextDesc, 0u, nullptr, &context);
101         ContextImp *contextImp = static_cast<ContextImp *>(context);
102 
103         void *memory = nullptr;
104         void *peerMemory = nullptr;
105 
106         ze_device_mem_alloc_desc_t deviceDesc = {};
107         deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
108         deviceDesc.ordinal = 0;
109         deviceDesc.flags = 0;
110         deviceDesc.pNext = nullptr;
111 
112         ze_device_mem_alloc_desc_t peerDeviceDesc = {};
113         peerDeviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
114         peerDeviceDesc.ordinal = 0;
115         peerDeviceDesc.flags = 0;
116         peerDeviceDesc.pNext = nullptr;
117 
118         contextImp->allocDeviceMem(this->toHandle(), &deviceDesc, 8, 1, &memory);
119         contextImp->allocDeviceMem(hPeerDevice, &peerDeviceDesc, 8, 1, &peerMemory);
120 
121         auto ret = L0::CommandList::fromHandle(commandList)->appendMemoryCopy(peerMemory, memory, 8, nullptr, 0, nullptr);
122         L0::CommandList::fromHandle(commandList)->close();
123 
124         if (ret == ZE_RESULT_SUCCESS) {
125             ret = L0::CommandQueue::fromHandle(commandQueue)->executeCommandLists(1, &commandList, nullptr, true);
126             if (ret == ZE_RESULT_SUCCESS) {
127                 this->crossAccessEnabledDevices[peerRootDeviceIndex] = true;
128                 pPeerDevice->crossAccessEnabledDevices[this->getNEODevice()->getRootDeviceIndex()] = true;
129                 L0::CommandQueue::fromHandle(commandQueue)->synchronize(std::numeric_limits<uint64_t>::max());
130                 *value = true;
131             }
132         }
133 
134         contextImp->freeMem(peerMemory);
135         contextImp->freeMem(memory);
136 
137         L0::Context::fromHandle(context)->destroy();
138         L0::CommandQueue::fromHandle(commandQueue)->destroy();
139         L0::CommandList::fromHandle(commandList)->destroy();
140     }
141 
142     return ZE_RESULT_SUCCESS;
143 }
144 
createCommandList(const ze_command_list_desc_t * desc,ze_command_list_handle_t * commandList)145 ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
146                                          ze_command_list_handle_t *commandList) {
147     auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
148     if (desc->commandQueueGroupOrdinal >= engineGroups.size()) {
149         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
150     }
151     auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
152     ze_result_t returnValue = ZE_RESULT_SUCCESS;
153     auto engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType;
154     *commandList = CommandList::create(productFamily, this, engineGroupType, desc->flags, returnValue);
155 
156     return returnValue;
157 }
158 
createCommandListImmediate(const ze_command_queue_desc_t * desc,ze_command_list_handle_t * phCommandList)159 ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
160                                                   ze_command_list_handle_t *phCommandList) {
161     auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
162     if (desc->ordinal >= engineGroups.size()) {
163         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
164     }
165     auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
166     auto engineGroupType = engineGroups[desc->ordinal].engineGroupType;
167     ze_result_t returnValue = ZE_RESULT_SUCCESS;
168     *phCommandList = CommandList::createImmediate(productFamily, this, desc, false, engineGroupType, returnValue);
169 
170     return returnValue;
171 }
172 
createCommandQueue(const ze_command_queue_desc_t * desc,ze_command_queue_handle_t * commandQueue)173 ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
174                                           ze_command_queue_handle_t *commandQueue) {
175     auto &platform = neoDevice->getHardwareInfo().platform;
176 
177     NEO::CommandStreamReceiver *csr = nullptr;
178     auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
179     if (desc->ordinal >= engineGroups.size()) {
180         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
181     }
182     if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW) {
183         getCsrForLowPriority(&csr);
184     } else {
185         auto ret = getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index);
186         if (ret != ZE_RESULT_SUCCESS) {
187             return ret;
188         }
189     }
190 
191     UNRECOVERABLE_IF(csr == nullptr);
192 
193     auto &hwHelper = NEO::HwHelper::get(platform.eRenderCoreFamily);
194     bool isCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
195 
196     ze_result_t returnValue = ZE_RESULT_SUCCESS;
197     *commandQueue = CommandQueue::create(platform.eProductFamily, this, csr, desc, isCopyOnly, false, returnValue);
198 
199     return returnValue;
200 }
201 
getCommandQueueGroupProperties(uint32_t * pCount,ze_command_queue_group_properties_t * pCommandQueueGroupProperties)202 ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
203                                                       ze_command_queue_group_properties_t *pCommandQueueGroupProperties) {
204     NEO::Device *activeDevice = getActiveDevice();
205     auto &engineGroups = activeDevice->getRegularEngineGroups();
206     uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
207 
208     if (*pCount == 0) {
209         *pCount = numEngineGroups;
210         return ZE_RESULT_SUCCESS;
211     }
212 
213     const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
214     auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
215     auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
216 
217     *pCount = std::min(numEngineGroups, *pCount);
218     for (uint32_t i = 0; i < *pCount; i++) {
219         if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) {
220             pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
221                                                     ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
222                                                     ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS |
223                                                     ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
224             pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = std::numeric_limits<size_t>::max();
225         }
226         if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) {
227             pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
228                                                     ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
229                                                     ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
230             pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = std::numeric_limits<size_t>::max();
231         }
232         if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) {
233             pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY;
234             pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = hwHelper.getMaxFillPaternSizeForCopyEngine();
235         }
236         l0HwHelper.setAdditionalGroupProperty(pCommandQueueGroupProperties[i], engineGroups[i].engineGroupType);
237         pCommandQueueGroupProperties[i].numQueues = static_cast<uint32_t>(engineGroups[i].engines.size());
238     }
239 
240     return ZE_RESULT_SUCCESS;
241 }
242 
createImage(const ze_image_desc_t * desc,ze_image_handle_t * phImage)243 ze_result_t DeviceImp::createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) {
244     auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
245     Image *pImage = nullptr;
246     auto result = Image::create(productFamily, this, desc, &pImage);
247     if (result == ZE_RESULT_SUCCESS) {
248         *phImage = pImage->toHandle();
249     }
250 
251     return result;
252 }
253 
createSampler(const ze_sampler_desc_t * desc,ze_sampler_handle_t * sampler)254 ze_result_t DeviceImp::createSampler(const ze_sampler_desc_t *desc,
255                                      ze_sampler_handle_t *sampler) {
256     auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
257     *sampler = Sampler::create(productFamily, this, desc);
258 
259     return ZE_RESULT_SUCCESS;
260 }
261 
createModule(const ze_module_desc_t * desc,ze_module_handle_t * module,ze_module_build_log_handle_t * buildLog,ModuleType type)262 ze_result_t DeviceImp::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module,
263                                     ze_module_build_log_handle_t *buildLog, ModuleType type) {
264     ModuleBuildLog *moduleBuildLog = nullptr;
265 
266     if (buildLog) {
267         moduleBuildLog = ModuleBuildLog::create();
268         *buildLog = moduleBuildLog->toHandle();
269     }
270     auto modulePtr = Module::create(this, desc, moduleBuildLog, type);
271     if (modulePtr == nullptr) {
272         return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
273     }
274 
275     *module = modulePtr;
276 
277     return ZE_RESULT_SUCCESS;
278 }
279 
getComputeProperties(ze_device_compute_properties_t * pComputeProperties)280 ze_result_t DeviceImp::getComputeProperties(ze_device_compute_properties_t *pComputeProperties) {
281     const auto &deviceInfo = this->neoDevice->getDeviceInfo();
282 
283     pComputeProperties->maxTotalGroupSize = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize);
284 
285     pComputeProperties->maxGroupSizeX = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[0]);
286     pComputeProperties->maxGroupSizeY = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[1]);
287     pComputeProperties->maxGroupSizeZ = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[2]);
288 
289     pComputeProperties->maxGroupCountX = std::numeric_limits<uint32_t>::max();
290     pComputeProperties->maxGroupCountY = std::numeric_limits<uint32_t>::max();
291     pComputeProperties->maxGroupCountZ = std::numeric_limits<uint32_t>::max();
292 
293     pComputeProperties->maxSharedLocalMemory = static_cast<uint32_t>(deviceInfo.localMemSize);
294 
295     pComputeProperties->numSubGroupSizes = static_cast<uint32_t>(deviceInfo.maxSubGroups.size());
296 
297     for (uint32_t i = 0; i < pComputeProperties->numSubGroupSizes; ++i) {
298         pComputeProperties->subGroupSizes[i] = static_cast<uint32_t>(deviceInfo.maxSubGroups[i]);
299     }
300 
301     return ZE_RESULT_SUCCESS;
302 }
303 
getP2PProperties(ze_device_handle_t hPeerDevice,ze_device_p2p_properties_t * pP2PProperties)304 ze_result_t DeviceImp::getP2PProperties(ze_device_handle_t hPeerDevice,
305                                         ze_device_p2p_properties_t *pP2PProperties) {
306     pP2PProperties->flags = 0;
307     return ZE_RESULT_SUCCESS;
308 }
309 
getMemoryProperties(uint32_t * pCount,ze_device_memory_properties_t * pMemProperties)310 ze_result_t DeviceImp::getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) {
311     if (*pCount == 0) {
312         *pCount = 1;
313         return ZE_RESULT_SUCCESS;
314     }
315 
316     if (*pCount > 1) {
317         *pCount = 1;
318     }
319 
320     if (nullptr == pMemProperties) {
321         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
322     }
323 
324     const auto &deviceInfo = this->neoDevice->getDeviceInfo();
325     auto &hwInfo = this->getHwInfo();
326     auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
327     strcpy_s(pMemProperties->name, ZE_MAX_DEVICE_NAME, hwInfoConfig.getDeviceMemoryName().c_str());
328     pMemProperties->maxClockRate = hwInfoConfig.getDeviceMemoryMaxClkRate(&hwInfo);
329     pMemProperties->maxBusWidth = deviceInfo.addressBits;
330     if (this->isImplicitScalingCapable() ||
331         this->getNEODevice()->getNumGenericSubDevices() == 0) {
332         pMemProperties->totalSize = deviceInfo.globalMemSize;
333     } else {
334         pMemProperties->totalSize = deviceInfo.globalMemSize / this->numSubDevices;
335     }
336 
337     pMemProperties->flags = 0;
338     return ZE_RESULT_SUCCESS;
339 }
340 
getMemoryAccessProperties(ze_device_memory_access_properties_t * pMemAccessProperties)341 ze_result_t DeviceImp::getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) {
342     auto &hwInfo = this->getHwInfo();
343     auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
344     pMemAccessProperties->hostAllocCapabilities =
345         static_cast<ze_memory_access_cap_flags_t>(hwInfoConfig.getHostMemCapabilities(&hwInfo));
346     pMemAccessProperties->deviceAllocCapabilities =
347         ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC;
348     pMemAccessProperties->sharedSingleDeviceAllocCapabilities =
349         ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC;
350     pMemAccessProperties->sharedCrossDeviceAllocCapabilities = 0;
351     pMemAccessProperties->sharedSystemAllocCapabilities = 0;
352 
353     return ZE_RESULT_SUCCESS;
354 }
355 
356 static constexpr ze_device_fp_flags_t defaultFpFlags = static_cast<ze_device_fp_flags_t>(ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST |
357                                                                                          ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO |
358                                                                                          ZE_DEVICE_FP_FLAG_ROUND_TO_INF |
359                                                                                          ZE_DEVICE_FP_FLAG_INF_NAN |
360                                                                                          ZE_DEVICE_FP_FLAG_DENORM |
361                                                                                          ZE_DEVICE_FP_FLAG_FMA);
362 
getKernelProperties(ze_device_module_properties_t * pKernelProperties)363 ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKernelProperties) {
364     const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
365     const auto &deviceInfo = this->neoDevice->getDeviceInfo();
366     auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
367 
368     std::string ilVersion = deviceInfo.ilVersion;
369     size_t majorVersionPos = ilVersion.find('_');
370     size_t minorVersionPos = ilVersion.find('.');
371 
372     if (majorVersionPos != std::string::npos && minorVersionPos != std::string::npos) {
373         uint32_t majorSpirvVersion = static_cast<uint32_t>(std::stoul(ilVersion.substr(majorVersionPos + 1, minorVersionPos)));
374         uint32_t minorSpirvVersion = static_cast<uint32_t>(std::stoul(ilVersion.substr(minorVersionPos + 1)));
375         pKernelProperties->spirvVersionSupported = ZE_MAKE_VERSION(majorSpirvVersion, minorSpirvVersion);
376     } else {
377         DEBUG_BREAK_IF(true);
378         return ZE_RESULT_ERROR_UNKNOWN;
379     }
380 
381     pKernelProperties->flags = ZE_DEVICE_MODULE_FLAG_FP16;
382     if (hardwareInfo.capabilityTable.ftrSupportsInteger64BitAtomics) {
383         pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS;
384     }
385     pKernelProperties->fp16flags = defaultFpFlags;
386     pKernelProperties->fp32flags = defaultFpFlags;
387 
388     if (NEO::DebugManager.flags.OverrideDefaultFP64Settings.get() == 1) {
389         pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64;
390         pKernelProperties->fp64flags = defaultFpFlags | ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
391         pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
392     } else {
393         pKernelProperties->fp64flags = 0;
394         if (hardwareInfo.capabilityTable.ftrSupportsFP64) {
395             pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64;
396             pKernelProperties->fp64flags |= defaultFpFlags;
397             if (hardwareInfo.capabilityTable.ftrSupports64BitMath) {
398                 pKernelProperties->fp64flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
399                 pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
400             }
401         }
402     }
403 
404     pKernelProperties->nativeKernelSupported.id[0] = 0;
405 
406     processAdditionalKernelProperties(hwHelper, pKernelProperties);
407 
408     pKernelProperties->maxArgumentsSize = static_cast<uint32_t>(this->neoDevice->getDeviceInfo().maxParameterSize);
409 
410     pKernelProperties->printfBufferSize = static_cast<uint32_t>(this->neoDevice->getDeviceInfo().printfBufferSize);
411 
412     auto &hwInfo = this->getHwInfo();
413     auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
414 
415     void *pNext = pKernelProperties->pNext;
416     while (pNext) {
417         ze_base_desc_t *extendedProperties = reinterpret_cast<ze_base_desc_t *>(pKernelProperties->pNext);
418         if (extendedProperties->stype == ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES) {
419             ze_float_atomic_ext_properties_t *floatProperties =
420                 reinterpret_cast<ze_float_atomic_ext_properties_t *>(extendedProperties);
421             hwInfoConfig.getKernelExtendedProperties(&floatProperties->fp16Flags,
422                                                      &floatProperties->fp32Flags,
423                                                      &floatProperties->fp64Flags);
424         } else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES) {
425             ze_scheduling_hint_exp_properties_t *hintProperties =
426                 reinterpret_cast<ze_scheduling_hint_exp_properties_t *>(extendedProperties);
427             auto supportedThreadArbitrationPolicies = hwInfoConfig.getKernelSupportedThreadArbitrationPolicies();
428             hintProperties->schedulingHintFlags = 0;
429             for (uint32_t &p : supportedThreadArbitrationPolicies) {
430                 switch (p) {
431                 case NEO::ThreadArbitrationPolicy::AgeBased:
432                     hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST;
433                     break;
434                 case NEO::ThreadArbitrationPolicy::RoundRobin:
435                     hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
436                     break;
437                 case NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency:
438                     hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN;
439                     break;
440                 }
441             }
442         }
443         pNext = const_cast<void *>(extendedProperties->pNext);
444     }
445 
446     return ZE_RESULT_SUCCESS;
447 }
448 
getProperties(ze_device_properties_t * pDeviceProperties)449 ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) {
450     const auto &deviceInfo = this->neoDevice->getDeviceInfo();
451     const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
452     auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
453 
454     pDeviceProperties->type = ZE_DEVICE_TYPE_GPU;
455 
456     pDeviceProperties->vendorId = deviceInfo.vendorId;
457 
458     pDeviceProperties->deviceId = hardwareInfo.platform.usDeviceID;
459 
460     pDeviceProperties->flags = 0u;
461 
462     std::array<uint8_t, NEO::HwInfoConfig::uuidSize> deviceUuid;
463     if (this->neoDevice->getUuid(deviceUuid)) {
464         std::copy_n(std::begin(deviceUuid), ZE_MAX_DEVICE_UUID_SIZE, std::begin(pDeviceProperties->uuid.id));
465     } else {
466 
467         uint32_t rootDeviceIndex = this->neoDevice->getRootDeviceIndex();
468 
469         memset(pDeviceProperties->uuid.id, 0, ZE_MAX_DEVICE_UUID_SIZE);
470         memcpy_s(pDeviceProperties->uuid.id, sizeof(uint32_t), &deviceInfo.vendorId, sizeof(deviceInfo.vendorId));
471         memcpy_s(pDeviceProperties->uuid.id + sizeof(uint32_t), sizeof(uint32_t), &hardwareInfo.platform.usDeviceID, sizeof(hardwareInfo.platform.usDeviceID));
472         memcpy_s(pDeviceProperties->uuid.id + (2 * sizeof(uint32_t)), sizeof(uint32_t), &rootDeviceIndex, sizeof(rootDeviceIndex));
473     }
474 
475     pDeviceProperties->subdeviceId = isSubdevice ? static_cast<NEO::SubDevice *>(neoDevice)->getSubDeviceIndex() : 0;
476 
477     pDeviceProperties->coreClockRate = deviceInfo.maxClockFrequency;
478 
479     pDeviceProperties->maxMemAllocSize = this->neoDevice->getDeviceInfo().maxMemAllocSize;
480 
481     pDeviceProperties->maxCommandQueuePriority = 0;
482 
483     pDeviceProperties->maxHardwareContexts = 1024 * 64;
484 
485     pDeviceProperties->numThreadsPerEU = deviceInfo.numThreadsPerEU;
486 
487     pDeviceProperties->physicalEUSimdWidth = hwHelper.getMinimalSIMDSize();
488 
489     pDeviceProperties->numEUsPerSubslice = hardwareInfo.gtSystemInfo.MaxEuPerSubSlice;
490 
491     if (NEO::DebugManager.flags.DebugApiUsed.get() == 1) {
492         pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.MaxSubSlicesSupported / hardwareInfo.gtSystemInfo.MaxSlicesSupported;
493     } else {
494         pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.SubSliceCount / hardwareInfo.gtSystemInfo.SliceCount;
495     }
496 
497     pDeviceProperties->numSlices = hardwareInfo.gtSystemInfo.SliceCount;
498 
499     if (isImplicitScalingCapable()) {
500         pDeviceProperties->numSlices *= neoDevice->getNumGenericSubDevices();
501     }
502 
503     if ((NEO::DebugManager.flags.UseCyclesPerSecondTimer.get() == 1) ||
504         (pDeviceProperties->stype == ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2)) {
505         pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerClock;
506     } else {
507         pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerResolution;
508     }
509 
510     pDeviceProperties->timestampValidBits = hardwareInfo.capabilityTable.timestampValidBits;
511 
512     pDeviceProperties->kernelTimestampValidBits = hardwareInfo.capabilityTable.kernelTimestampValidBits;
513 
514     if (hardwareInfo.capabilityTable.isIntegratedDevice) {
515         pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
516     }
517 
518     if (isSubdevice) {
519         pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE;
520     }
521 
522     if (this->neoDevice->getDeviceInfo().errorCorrectionSupport) {
523         pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_ECC;
524     }
525 
526     if (hardwareInfo.capabilityTable.supportsOnDemandPageFaults) {
527         pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING;
528     }
529 
530     memset(pDeviceProperties->name, 0, ZE_MAX_DEVICE_NAME);
531 
532     std::string name = getNEODevice()->getDeviceInfo().name;
533     memcpy_s(pDeviceProperties->name, name.length(), name.c_str(), name.length());
534 
535     return ZE_RESULT_SUCCESS;
536 }
537 
getExternalMemoryProperties(ze_device_external_memory_properties_t * pExternalMemoryProperties)538 ze_result_t DeviceImp::getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) {
539     pExternalMemoryProperties->imageExportTypes = 0u;
540     pExternalMemoryProperties->imageImportTypes = 0u;
541     pExternalMemoryProperties->memoryAllocationExportTypes = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF;
542     pExternalMemoryProperties->memoryAllocationImportTypes = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF;
543 
544     return ZE_RESULT_SUCCESS;
545 }
546 
getGlobalTimestamps(uint64_t * hostTimestamp,uint64_t * deviceTimestamp)547 ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) {
548     NEO::TimeStampData queueTimeStamp;
549     bool retVal = this->neoDevice->getOSTime()->getCpuGpuTime(&queueTimeStamp);
550     if (!retVal)
551         return ZE_RESULT_ERROR_DEVICE_LOST;
552 
553     *deviceTimestamp = queueTimeStamp.GPUTimeStamp;
554 
555     retVal = this->neoDevice->getOSTime()->getCpuTime(hostTimestamp);
556     if (!retVal)
557         return ZE_RESULT_ERROR_DEVICE_LOST;
558 
559     return ZE_RESULT_SUCCESS;
560 }
561 
getSubDevices(uint32_t * pCount,ze_device_handle_t * phSubdevices)562 ze_result_t DeviceImp::getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) {
563     if (*pCount == 0) {
564         *pCount = this->numSubDevices;
565         return ZE_RESULT_SUCCESS;
566     }
567 
568     if (phSubdevices == nullptr) {
569         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
570     }
571 
572     if (*pCount > this->numSubDevices) {
573         *pCount = this->numSubDevices;
574     }
575 
576     for (uint32_t i = 0; i < *pCount; i++) {
577         phSubdevices[i] = this->subDevices[i];
578     }
579 
580     return ZE_RESULT_SUCCESS;
581 }
582 
getCacheProperties(uint32_t * pCount,ze_device_cache_properties_t * pCacheProperties)583 ze_result_t DeviceImp::getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) {
584     if (*pCount == 0) {
585         *pCount = 1;
586         return ZE_RESULT_SUCCESS;
587     }
588 
589     if (*pCount > 1) {
590         *pCount = 1;
591     }
592 
593     const auto &hardwareInfo = this->getHwInfo();
594     pCacheProperties[0].cacheSize = hardwareInfo.gtSystemInfo.L3BankCount * 128 * KB;
595     pCacheProperties[0].flags = 0;
596 
597     if (pCacheProperties->pNext) {
598         auto extendedProperties = reinterpret_cast<ze_device_cache_properties_t *>(pCacheProperties->pNext);
599         if (extendedProperties->stype == ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC) {
600             auto cacheReservationProperties = reinterpret_cast<ze_cache_reservation_ext_desc_t *>(extendedProperties);
601             cacheReservationProperties->maxCacheReservationSize = cacheReservation->getMaxCacheReservationSize();
602         } else {
603             return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
604         }
605     }
606 
607     return ZE_RESULT_SUCCESS;
608 }
609 
reserveCache(size_t cacheLevel,size_t cacheReservationSize)610 ze_result_t DeviceImp::reserveCache(size_t cacheLevel, size_t cacheReservationSize) {
611     if (cacheReservation->getMaxCacheReservationSize() == 0) {
612         return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
613     }
614 
615     if (cacheLevel == 0) {
616         cacheLevel = 3;
617     }
618 
619     auto result = cacheReservation->reserveCache(cacheLevel, cacheReservationSize);
620     if (result == false) {
621         return ZE_RESULT_ERROR_UNINITIALIZED;
622     }
623 
624     return ZE_RESULT_SUCCESS;
625 }
626 
setCacheAdvice(void * ptr,size_t regionSize,ze_cache_ext_region_t cacheRegion)627 ze_result_t DeviceImp::setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) {
628     if (cacheReservation->getMaxCacheReservationSize() == 0) {
629         return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
630     }
631 
632     if (cacheRegion == ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT) {
633         cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_NON_RESERVED_REGION;
634     }
635 
636     auto result = cacheReservation->setCacheAdvice(ptr, regionSize, cacheRegion);
637     if (result == false) {
638         return ZE_RESULT_ERROR_UNINITIALIZED;
639     }
640 
641     return ZE_RESULT_SUCCESS;
642 }
643 
imageGetProperties(const ze_image_desc_t * desc,ze_image_properties_t * pImageProperties)644 ze_result_t DeviceImp::imageGetProperties(const ze_image_desc_t *desc,
645                                           ze_image_properties_t *pImageProperties) {
646     const auto &deviceInfo = this->neoDevice->getDeviceInfo();
647 
648     if (deviceInfo.imageSupport) {
649         pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAG_LINEAR;
650     } else {
651         pImageProperties->samplerFilterFlags = 0;
652     }
653 
654     return ZE_RESULT_SUCCESS;
655 }
656 
getDeviceImageProperties(ze_device_image_properties_t * pDeviceImageProperties)657 ze_result_t DeviceImp::getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) {
658     const auto &deviceInfo = this->neoDevice->getDeviceInfo();
659 
660     if (deviceInfo.imageSupport) {
661         pDeviceImageProperties->maxImageDims1D = static_cast<uint32_t>(deviceInfo.image2DMaxWidth);
662         pDeviceImageProperties->maxImageDims2D = static_cast<uint32_t>(deviceInfo.image2DMaxHeight);
663         pDeviceImageProperties->maxImageDims3D = static_cast<uint32_t>(deviceInfo.image3DMaxDepth);
664         pDeviceImageProperties->maxImageBufferSize = deviceInfo.imageMaxBufferSize;
665         pDeviceImageProperties->maxImageArraySlices = static_cast<uint32_t>(deviceInfo.imageMaxArraySize);
666         pDeviceImageProperties->maxSamplers = deviceInfo.maxSamplers;
667         pDeviceImageProperties->maxReadImageArgs = deviceInfo.maxReadImageArgs;
668         pDeviceImageProperties->maxWriteImageArgs = deviceInfo.maxWriteImageArgs;
669     } else {
670         pDeviceImageProperties->maxImageDims1D = 0u;
671         pDeviceImageProperties->maxImageDims2D = 0u;
672         pDeviceImageProperties->maxImageDims3D = 0u;
673         pDeviceImageProperties->maxImageBufferSize = 0u;
674         pDeviceImageProperties->maxImageArraySlices = 0u;
675         pDeviceImageProperties->maxSamplers = 0u;
676         pDeviceImageProperties->maxReadImageArgs = 0u;
677         pDeviceImageProperties->maxWriteImageArgs = 0u;
678     }
679 
680     return ZE_RESULT_SUCCESS;
681 }
682 
getDebugProperties(zet_device_debug_properties_t * pDebugProperties)683 ze_result_t DeviceImp::getDebugProperties(zet_device_debug_properties_t *pDebugProperties) {
684     bool isDebugAttachAvailable = getOsInterface().isDebugAttachAvailable();
685     auto &stateSaveAreaHeader = NEO::SipKernel::getBindlessDebugSipKernel(*this->getNEODevice()).getStateSaveAreaHeader();
686 
687     if (stateSaveAreaHeader.size() == 0) {
688         PRINT_DEBUGGER_INFO_LOG("Context state save area header missing", "");
689         isDebugAttachAvailable = false;
690     }
691 
692     if (isDebugAttachAvailable && !isSubdevice) {
693         pDebugProperties->flags = zet_device_debug_property_flag_t::ZET_DEVICE_DEBUG_PROPERTY_FLAG_ATTACH;
694     } else {
695         pDebugProperties->flags = 0;
696     }
697     return ZE_RESULT_SUCCESS;
698 }
699 
systemBarrier()700 ze_result_t DeviceImp::systemBarrier() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; }
701 
activateMetricGroupsDeferred(uint32_t count,zet_metric_group_handle_t * phMetricGroups)702 ze_result_t DeviceImp::activateMetricGroupsDeferred(uint32_t count,
703                                                     zet_metric_group_handle_t *phMetricGroups) {
704     ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
705     if (!this->isSubdevice && this->isImplicitScalingCapable()) {
706         for (auto subDevice : this->subDevices) {
707             result = subDevice->getMetricContext().activateMetricGroupsDeferred(count, phMetricGroups);
708             if (result != ZE_RESULT_SUCCESS)
709                 break;
710         }
711     } else {
712         result = metricContext->activateMetricGroupsDeferred(count, phMetricGroups);
713     }
714     return result;
715 }
716 
getExecEnvironment()717 void *DeviceImp::getExecEnvironment() { return execEnvironment; }
718 
getBuiltinFunctionsLib()719 BuiltinFunctionsLib *DeviceImp::getBuiltinFunctionsLib() { return builtins.get(); }
720 
getMOCS(bool l3enabled,bool l1enabled)721 uint32_t DeviceImp::getMOCS(bool l3enabled, bool l1enabled) {
722     return getHwHelper().getMocsIndex(*getNEODevice()->getGmmHelper(), l3enabled, l1enabled) << 1;
723 }
724 
getHwHelper()725 NEO::HwHelper &DeviceImp::getHwHelper() {
726     const auto &hardwareInfo = neoDevice->getHardwareInfo();
727     return NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
728 }
729 
getOsInterface()730 NEO::OSInterface &DeviceImp::getOsInterface() { return *neoDevice->getRootDeviceEnvironment().osInterface; }
731 
getPlatformInfo() const732 uint32_t DeviceImp::getPlatformInfo() const {
733     const auto &hardwareInfo = neoDevice->getHardwareInfo();
734     return hardwareInfo.platform.eRenderCoreFamily;
735 }
736 
getMetricContext()737 MetricContext &DeviceImp::getMetricContext() { return *metricContext; }
738 
activateMetricGroups()739 void DeviceImp::activateMetricGroups() {
740     if (metricContext != nullptr) {
741         if (metricContext->isImplicitScalingCapable()) {
742             for (uint32_t i = 0; i < numSubDevices; i++) {
743                 subDevices[i]->getMetricContext().activateMetricGroups();
744             }
745         } else {
746             metricContext->activateMetricGroups();
747         }
748     }
749 }
getMaxNumHwThreads() const750 uint32_t DeviceImp::getMaxNumHwThreads() const { return maxNumHwThreads; }
751 
getHwInfo() const752 const NEO::HardwareInfo &DeviceImp::getHwInfo() const { return neoDevice->getHardwareInfo(); }
753 
754 // Use this method to reinitialize L0::Device *device, that was created during zeInit, with the help of Device::create
deviceReinit(DriverHandle * driverHandle,L0::Device * device,std::unique_ptr<NEO::Device> & neoDevice,ze_result_t * returnValue)755 Device *Device::deviceReinit(DriverHandle *driverHandle, L0::Device *device, std::unique_ptr<NEO::Device> &neoDevice, ze_result_t *returnValue) {
756     auto pNeoDevice = neoDevice.release();
757 
758     return Device::create(driverHandle, pNeoDevice, false, returnValue, device);
759 }
760 
create(DriverHandle * driverHandle,NEO::Device * neoDevice,bool isSubDevice,ze_result_t * returnValue)761 Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue) {
762     return Device::create(driverHandle, neoDevice, isSubDevice, returnValue, nullptr);
763 }
764 
create(DriverHandle * driverHandle,NEO::Device * neoDevice,bool isSubDevice,ze_result_t * returnValue,L0::Device * deviceL0)765 Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue, L0::Device *deviceL0) {
766     L0::DeviceImp *device = nullptr;
767     if (deviceL0 == nullptr) {
768         device = new DeviceImp;
769     } else {
770         device = static_cast<L0::DeviceImp *>(deviceL0);
771     }
772 
773     UNRECOVERABLE_IF(device == nullptr);
774 
775     device->setDriverHandle(driverHandle);
776     neoDevice->setSpecializedDevice(device);
777 
778     device->neoDevice = neoDevice;
779     neoDevice->incRefInternal();
780 
781     device->execEnvironment = (void *)neoDevice->getExecutionEnvironment();
782     device->allocationsForReuse = std::make_unique<NEO::AllocationsList>();
783     device->implicitScalingCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(neoDevice->getDeviceBitfield(), true);
784     device->metricContext = MetricContext::create(*device);
785     device->builtins = BuiltinFunctionsLib::create(
786         device, neoDevice->getBuiltIns());
787     device->cacheReservation = CacheReservation::create(*device);
788     device->maxNumHwThreads = NEO::HwHelper::getMaxThreadsForVfe(neoDevice->getHardwareInfo());
789 
790     auto debugSurfaceSize = NEO::SipKernel::maxDbgSurfaceSize;
791     std::vector<char> stateSaveAreaHeader;
792 
793     if (neoDevice->getCompilerInterface()) {
794         if (neoDevice->getPreemptionMode() == NEO::PreemptionMode::MidThread || neoDevice->getDebugger()) {
795             bool ret = NEO::SipKernel::initSipKernel(NEO::SipKernel::getSipKernelType(*neoDevice), *neoDevice);
796             UNRECOVERABLE_IF(!ret);
797 
798             stateSaveAreaHeader = NEO::SipKernel::getSipKernel(*neoDevice).getStateSaveAreaHeader();
799             debugSurfaceSize = NEO::SipKernel::getSipKernel(*neoDevice).getStateSaveAreaSize();
800         }
801     } else {
802         *returnValue = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
803     }
804 
805     const bool allocateDebugSurface = (device->getL0Debugger() || neoDevice->getDeviceInfo().debuggerActive) && !isSubDevice;
806     NEO::GraphicsAllocation *debugSurface = nullptr;
807     if (allocateDebugSurface) {
808         debugSurface = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
809             {device->getRootDeviceIndex(), true,
810              debugSurfaceSize,
811              NEO::GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA,
812              false,
813              false,
814              device->getNEODevice()->getDeviceBitfield()});
815         device->setDebugSurface(debugSurface);
816     }
817 
818     if (debugSurface && stateSaveAreaHeader.size() > 0) {
819         auto &hwInfo = neoDevice->getHardwareInfo();
820         auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
821         NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
822                                                               *neoDevice, debugSurface, 0, stateSaveAreaHeader.data(),
823                                                               stateSaveAreaHeader.size());
824     }
825 
826     for (auto &neoSubDevice : neoDevice->getSubDevices()) {
827         if (!neoSubDevice) {
828             continue;
829         }
830 
831         ze_device_handle_t subDevice = Device::create(driverHandle,
832                                                       neoSubDevice,
833                                                       true, returnValue, nullptr);
834         if (subDevice == nullptr) {
835             return nullptr;
836         }
837         static_cast<DeviceImp *>(subDevice)->isSubdevice = true;
838         static_cast<DeviceImp *>(subDevice)->setDebugSurface(debugSurface);
839         device->subDevices.push_back(static_cast<Device *>(subDevice));
840     }
841     device->numSubDevices = static_cast<uint32_t>(device->subDevices.size());
842 
843     auto supportDualStorageSharedMemory = neoDevice->getMemoryManager()->isLocalMemorySupported(device->neoDevice->getRootDeviceIndex());
844     if (NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
845         supportDualStorageSharedMemory = NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
846     }
847 
848     if (supportDualStorageSharedMemory) {
849         ze_command_queue_desc_t cmdQueueDesc = {};
850         cmdQueueDesc.ordinal = 0;
851         cmdQueueDesc.index = 0;
852         cmdQueueDesc.flags = 0;
853         cmdQueueDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
854         cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
855         ze_result_t resultValue = ZE_RESULT_SUCCESS;
856         device->pageFaultCommandList =
857             CommandList::createImmediate(
858                 device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, NEO::EngineGroupType::Copy, resultValue);
859     }
860 
861     if (device->getSourceLevelDebugger()) {
862         auto osInterface = neoDevice->getRootDeviceEnvironment().osInterface.get();
863         device->getSourceLevelDebugger()
864             ->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0);
865     }
866     device->createSysmanHandle(isSubDevice);
867     device->resourcesReleased = false;
868     return device;
869 }
870 
releaseResources()871 void DeviceImp::releaseResources() {
872     if (resourcesReleased) {
873         return;
874     }
875     if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get() &&
876         !neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger->isLegacy()) {
877         neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.reset(nullptr);
878     }
879     for (uint32_t i = 0; i < this->numSubDevices; i++) {
880         delete this->subDevices[i];
881     }
882     this->subDevices.clear();
883     this->numSubDevices = 0;
884 
885     if (this->pageFaultCommandList) {
886         this->pageFaultCommandList->destroy();
887         this->pageFaultCommandList = nullptr;
888     }
889     metricContext.reset();
890     builtins.reset();
891     cacheReservation.reset();
892 
893     if (allocationsForReuse.get()) {
894         allocationsForReuse->freeAllGraphicsAllocations(neoDevice);
895         allocationsForReuse.reset();
896     }
897 
898     if (getSourceLevelDebugger()) {
899         getSourceLevelDebugger()->notifyDeviceDestruction();
900     }
901 
902     if (!isSubdevice) {
903         if (this->debugSurface) {
904             this->neoDevice->getMemoryManager()->freeGraphicsMemory(this->debugSurface);
905             this->debugSurface = nullptr;
906         }
907     }
908 
909     if (neoDevice) {
910         neoDevice->decRefInternal();
911         neoDevice = nullptr;
912     }
913 
914     resourcesReleased = true;
915 }
916 
~DeviceImp()917 DeviceImp::~DeviceImp() {
918     releaseResources();
919 
920     if (!isSubdevice) {
921         if (pSysmanDevice != nullptr) {
922             delete pSysmanDevice;
923             pSysmanDevice = nullptr;
924         }
925     }
926 }
927 
getDevicePreemptionMode() const928 NEO::PreemptionMode DeviceImp::getDevicePreemptionMode() const {
929     return neoDevice->getPreemptionMode();
930 }
931 
getDeviceInfo() const932 const NEO::DeviceInfo &DeviceImp::getDeviceInfo() const {
933     return neoDevice->getDeviceInfo();
934 }
935 
allocateManagedMemoryFromHostPtr(void * buffer,size_t size,struct CommandList * commandList)936 NEO::GraphicsAllocation *DeviceImp::allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) {
937     char *baseAddress = reinterpret_cast<char *>(buffer);
938     NEO::GraphicsAllocation *allocation = nullptr;
939     bool allocFound = false;
940     std::vector<NEO::SvmAllocationData *> allocDataArray = driverHandle->findAllocationsWithinRange(buffer, size, &allocFound);
941     if (allocFound) {
942         return allocDataArray[0]->gpuAllocations.getGraphicsAllocation(getRootDeviceIndex());
943     }
944 
945     if (!allocDataArray.empty()) {
946         UNRECOVERABLE_IF(commandList == nullptr);
947         for (auto allocData : allocDataArray) {
948             allocation = allocData->gpuAllocations.getGraphicsAllocation(getRootDeviceIndex());
949             char *allocAddress = reinterpret_cast<char *>(allocation->getGpuAddress());
950             size_t allocSize = allocData->size;
951 
952             driverHandle->getSvmAllocsManager()->removeSVMAlloc(*allocData);
953             neoDevice->getMemoryManager()->freeGraphicsMemory(allocation);
954             commandList->eraseDeallocationContainerEntry(allocation);
955             commandList->eraseResidencyContainerEntry(allocation);
956 
957             if (allocAddress < baseAddress) {
958                 buffer = reinterpret_cast<void *>(allocAddress);
959                 baseAddress += size;
960                 size = ptrDiff(baseAddress, allocAddress);
961                 baseAddress = reinterpret_cast<char *>(buffer);
962             } else {
963                 allocAddress += allocSize;
964                 baseAddress += size;
965                 if (allocAddress > baseAddress) {
966                     baseAddress = reinterpret_cast<char *>(buffer);
967                     size = ptrDiff(allocAddress, baseAddress);
968                 } else {
969                     baseAddress = reinterpret_cast<char *>(buffer);
970                 }
971             }
972         }
973     }
974 
975     allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
976         {getRootDeviceIndex(), false, size, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false, neoDevice->getDeviceBitfield()},
977         buffer);
978 
979     if (allocation == nullptr) {
980         return allocation;
981     }
982 
983     NEO::SvmAllocationData allocData(getRootDeviceIndex());
984     allocData.gpuAllocations.addAllocation(allocation);
985     allocData.cpuAllocation = nullptr;
986     allocData.size = size;
987     allocData.memoryType = InternalMemoryType::NOT_SPECIFIED;
988     allocData.device = nullptr;
989     driverHandle->getSvmAllocsManager()->insertSVMAlloc(allocData);
990 
991     return allocation;
992 }
993 
allocateMemoryFromHostPtr(const void * buffer,size_t size,bool hostCopyAllowed)994 NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) {
995     NEO::AllocationProperties properties = {getRootDeviceIndex(), false, size,
996                                             NEO::GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR,
997                                             false, neoDevice->getDeviceBitfield()};
998     properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true;
999     auto allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties,
1000                                                                                           buffer);
1001     if (allocation == nullptr && hostCopyAllowed) {
1002         allocation = neoDevice->getMemoryManager()->allocateInternalGraphicsMemoryWithHostCopy(neoDevice->getRootDeviceIndex(),
1003                                                                                                neoDevice->getDeviceBitfield(),
1004                                                                                                buffer,
1005                                                                                                size);
1006     }
1007 
1008     return allocation;
1009 }
1010 
obtainReusableAllocation(size_t requiredSize,NEO::GraphicsAllocation::AllocationType type)1011 NEO::GraphicsAllocation *DeviceImp::obtainReusableAllocation(size_t requiredSize, NEO::GraphicsAllocation::AllocationType type) {
1012     auto alloc = allocationsForReuse->detachAllocation(requiredSize, nullptr, nullptr, type);
1013     if (alloc == nullptr)
1014         return nullptr;
1015     else
1016         return alloc.release();
1017 }
1018 
storeReusableAllocation(NEO::GraphicsAllocation & alloc)1019 void DeviceImp::storeReusableAllocation(NEO::GraphicsAllocation &alloc) {
1020     allocationsForReuse->pushFrontOne(alloc);
1021 }
1022 
getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver ** csr,uint32_t ordinal,uint32_t index)1023 ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) {
1024     auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
1025     if ((ordinal >= engineGroups.size()) ||
1026         (index >= engineGroups[ordinal].engines.size())) {
1027         return ZE_RESULT_ERROR_INVALID_ARGUMENT;
1028     }
1029     *csr = engineGroups[ordinal].engines[index].commandStreamReceiver;
1030     return ZE_RESULT_SUCCESS;
1031 }
1032 
getCsrForLowPriority(NEO::CommandStreamReceiver ** csr)1033 ze_result_t DeviceImp::getCsrForLowPriority(NEO::CommandStreamReceiver **csr) {
1034     NEO::Device *activeDevice = getActiveDevice();
1035     for (auto &it : activeDevice->getAllEngines()) {
1036         if (it.osContext->isLowPriority()) {
1037             *csr = it.commandStreamReceiver;
1038             return ZE_RESULT_SUCCESS;
1039         }
1040     }
1041     // if the code falls through, we have no low priority context created by neoDevice.
1042     UNRECOVERABLE_IF(true);
1043     return ZE_RESULT_ERROR_UNKNOWN;
1044 }
1045 
getDebugSession(const zet_debug_config_t & config)1046 DebugSession *DeviceImp::getDebugSession(const zet_debug_config_t &config) {
1047     return debugSession.get();
1048 }
1049 
createDebugSession(const zet_debug_config_t & config,ze_result_t & result)1050 DebugSession *DeviceImp::createDebugSession(const zet_debug_config_t &config, ze_result_t &result) {
1051     if (!this->isSubdevice) {
1052         auto session = DebugSession::create(config, this, result);
1053         debugSession.reset(session);
1054     } else {
1055         result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
1056     }
1057     return debugSession.get();
1058 }
1059 
toPhysicalSliceId(const NEO::TopologyMap & topologyMap,uint32_t & slice,uint32_t & deviceIndex)1060 bool DeviceImp::toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &deviceIndex) {
1061     auto hwInfo = neoDevice->getRootDeviceEnvironment().getHardwareInfo();
1062     uint32_t subDeviceCount = NEO::HwHelper::getSubDevicesCount(hwInfo);
1063     auto deviceBitfield = neoDevice->getDeviceBitfield();
1064 
1065     if (topologyMap.size() == subDeviceCount && !isSubdevice) {
1066         uint32_t sliceId = slice;
1067         for (uint32_t i = 0; i < topologyMap.size(); i++) {
1068             if (sliceId < topologyMap.at(i).sliceIndices.size()) {
1069                 slice = topologyMap.at(i).sliceIndices[sliceId];
1070                 deviceIndex = i;
1071                 return true;
1072             }
1073             sliceId = sliceId - static_cast<uint32_t>(topologyMap.at(i).sliceIndices.size());
1074         }
1075     } else if (isSubdevice) {
1076         UNRECOVERABLE_IF(!deviceBitfield.any());
1077         uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
1078 
1079         if (topologyMap.find(subDeviceIndex) != topologyMap.end()) {
1080             if (slice < topologyMap.at(subDeviceIndex).sliceIndices.size()) {
1081                 deviceIndex = subDeviceIndex;
1082                 slice = topologyMap.at(subDeviceIndex).sliceIndices[slice];
1083                 return true;
1084             }
1085         }
1086     }
1087 
1088     return false;
1089 }
1090 
toApiSliceId(const NEO::TopologyMap & topologyMap,uint32_t & slice,uint32_t deviceIndex)1091 bool DeviceImp::toApiSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t deviceIndex) {
1092     auto deviceBitfield = neoDevice->getDeviceBitfield();
1093 
1094     if (isSubdevice) {
1095         UNRECOVERABLE_IF(!deviceBitfield.any());
1096         deviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
1097     }
1098 
1099     if (topologyMap.find(deviceIndex) != topologyMap.end()) {
1100         uint32_t apiSliceId = 0;
1101         if (!isSubdevice) {
1102             for (uint32_t devId = 0; devId < deviceIndex; devId++) {
1103                 apiSliceId += static_cast<uint32_t>(topologyMap.at(devId).sliceIndices.size());
1104             }
1105         }
1106 
1107         for (uint32_t i = 0; i < topologyMap.at(deviceIndex).sliceIndices.size(); i++) {
1108             if (static_cast<uint32_t>(topologyMap.at(deviceIndex).sliceIndices[i]) == slice) {
1109                 apiSliceId += i;
1110                 slice = apiSliceId;
1111                 return true;
1112             }
1113         }
1114     }
1115 
1116     return false;
1117 }
1118 
getActiveDevice() const1119 NEO::Device *DeviceImp::getActiveDevice() const {
1120     if (neoDevice->getNumGenericSubDevices() > 1u) {
1121         if (isImplicitScalingCapable()) {
1122             return this->neoDevice;
1123         }
1124         return this->neoDevice->getSubDevice(0);
1125     }
1126     return this->neoDevice;
1127 }
1128 
1129 } // namespace L0
1130