1 /*
2 * Copyright (C) 2020-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "level_zero/core/source/device/device_imp.h"
9
10 #include "shared/source/built_ins/sip.h"
11 #include "shared/source/command_container/implicit_scaling.h"
12 #include "shared/source/debug_settings/debug_settings_manager.h"
13 #include "shared/source/device/device.h"
14 #include "shared/source/device/device_info.h"
15 #include "shared/source/device/sub_device.h"
16 #include "shared/source/execution_environment/execution_environment.h"
17 #include "shared/source/execution_environment/root_device_environment.h"
18 #include "shared/source/gmm_helper/gmm_helper.h"
19 #include "shared/source/helpers/common_types.h"
20 #include "shared/source/helpers/constants.h"
21 #include "shared/source/helpers/engine_node_helper.h"
22 #include "shared/source/helpers/hw_helper.h"
23 #include "shared/source/helpers/string.h"
24 #include "shared/source/helpers/topology_map.h"
25 #include "shared/source/kernel/grf_config.h"
26 #include "shared/source/memory_manager/memory_manager.h"
27 #include "shared/source/os_interface/hw_info_config.h"
28 #include "shared/source/os_interface/os_interface.h"
29 #include "shared/source/os_interface/os_time.h"
30 #include "shared/source/source_level_debugger/source_level_debugger.h"
31 #include "shared/source/utilities/debug_settings_reader_creator.h"
32
33 #include "level_zero/core/source/builtin/builtin_functions_lib.h"
34 #include "level_zero/core/source/cache/cache_reservation.h"
35 #include "level_zero/core/source/cmdlist/cmdlist.h"
36 #include "level_zero/core/source/cmdqueue/cmdqueue.h"
37 #include "level_zero/core/source/context/context_imp.h"
38 #include "level_zero/core/source/driver/driver_handle_imp.h"
39 #include "level_zero/core/source/event/event.h"
40 #include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
41 #include "level_zero/core/source/image/image.h"
42 #include "level_zero/core/source/module/module.h"
43 #include "level_zero/core/source/printf_handler/printf_handler.h"
44 #include "level_zero/core/source/sampler/sampler.h"
45 #include "level_zero/tools/source/debug/debug_session.h"
46 #include "level_zero/tools/source/metrics/metric.h"
47 #include "level_zero/tools/source/sysman/sysman.h"
48
49 namespace NEO {
50 bool releaseFP64Override();
51 } // namespace NEO
52
53 namespace L0 {
54
getDriverHandle()55 DriverHandle *DeviceImp::getDriverHandle() {
56 return this->driverHandle;
57 }
58
setDriverHandle(DriverHandle * driverHandle)59 void DeviceImp::setDriverHandle(DriverHandle *driverHandle) {
60 this->driverHandle = driverHandle;
61 }
62
canAccessPeer(ze_device_handle_t hPeerDevice,ze_bool_t * value)63 ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) {
64 *value = false;
65
66 DeviceImp *pPeerDevice = static_cast<DeviceImp *>(Device::fromHandle(hPeerDevice));
67 uint32_t peerRootDeviceIndex = pPeerDevice->getNEODevice()->getRootDeviceIndex();
68
69 if (this->crossAccessEnabledDevices.find(peerRootDeviceIndex) != this->crossAccessEnabledDevices.end()) {
70 *value = this->crossAccessEnabledDevices[peerRootDeviceIndex];
71 } else if (this->getNEODevice()->getRootDeviceIndex() == peerRootDeviceIndex) {
72 *value = true;
73 } else {
74 ze_command_list_handle_t commandList = nullptr;
75 ze_command_list_desc_t listDescriptor = {};
76 listDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
77 listDescriptor.pNext = nullptr;
78 listDescriptor.flags = 0;
79 listDescriptor.commandQueueGroupOrdinal = 0;
80
81 ze_command_queue_handle_t commandQueue = nullptr;
82 ze_command_queue_desc_t queueDescriptor = {};
83 queueDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
84 queueDescriptor.pNext = nullptr;
85 queueDescriptor.flags = 0;
86 queueDescriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;
87 queueDescriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
88 queueDescriptor.ordinal = 0;
89 queueDescriptor.index = 0;
90
91 this->createCommandList(&listDescriptor, &commandList);
92 this->createCommandQueue(&queueDescriptor, &commandQueue);
93
94 auto driverHandle = this->getDriverHandle();
95 DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(driverHandle);
96
97 ze_context_handle_t context;
98 ze_context_desc_t contextDesc = {};
99 contextDesc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC;
100 driverHandleImp->createContext(&contextDesc, 0u, nullptr, &context);
101 ContextImp *contextImp = static_cast<ContextImp *>(context);
102
103 void *memory = nullptr;
104 void *peerMemory = nullptr;
105
106 ze_device_mem_alloc_desc_t deviceDesc = {};
107 deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
108 deviceDesc.ordinal = 0;
109 deviceDesc.flags = 0;
110 deviceDesc.pNext = nullptr;
111
112 ze_device_mem_alloc_desc_t peerDeviceDesc = {};
113 peerDeviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
114 peerDeviceDesc.ordinal = 0;
115 peerDeviceDesc.flags = 0;
116 peerDeviceDesc.pNext = nullptr;
117
118 contextImp->allocDeviceMem(this->toHandle(), &deviceDesc, 8, 1, &memory);
119 contextImp->allocDeviceMem(hPeerDevice, &peerDeviceDesc, 8, 1, &peerMemory);
120
121 auto ret = L0::CommandList::fromHandle(commandList)->appendMemoryCopy(peerMemory, memory, 8, nullptr, 0, nullptr);
122 L0::CommandList::fromHandle(commandList)->close();
123
124 if (ret == ZE_RESULT_SUCCESS) {
125 ret = L0::CommandQueue::fromHandle(commandQueue)->executeCommandLists(1, &commandList, nullptr, true);
126 if (ret == ZE_RESULT_SUCCESS) {
127 this->crossAccessEnabledDevices[peerRootDeviceIndex] = true;
128 pPeerDevice->crossAccessEnabledDevices[this->getNEODevice()->getRootDeviceIndex()] = true;
129 L0::CommandQueue::fromHandle(commandQueue)->synchronize(std::numeric_limits<uint64_t>::max());
130 *value = true;
131 }
132 }
133
134 contextImp->freeMem(peerMemory);
135 contextImp->freeMem(memory);
136
137 L0::Context::fromHandle(context)->destroy();
138 L0::CommandQueue::fromHandle(commandQueue)->destroy();
139 L0::CommandList::fromHandle(commandList)->destroy();
140 }
141
142 return ZE_RESULT_SUCCESS;
143 }
144
createCommandList(const ze_command_list_desc_t * desc,ze_command_list_handle_t * commandList)145 ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
146 ze_command_list_handle_t *commandList) {
147 auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
148 if (desc->commandQueueGroupOrdinal >= engineGroups.size()) {
149 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
150 }
151 auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
152 ze_result_t returnValue = ZE_RESULT_SUCCESS;
153 auto engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType;
154 *commandList = CommandList::create(productFamily, this, engineGroupType, desc->flags, returnValue);
155
156 return returnValue;
157 }
158
createCommandListImmediate(const ze_command_queue_desc_t * desc,ze_command_list_handle_t * phCommandList)159 ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
160 ze_command_list_handle_t *phCommandList) {
161 auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
162 if (desc->ordinal >= engineGroups.size()) {
163 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
164 }
165 auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
166 auto engineGroupType = engineGroups[desc->ordinal].engineGroupType;
167 ze_result_t returnValue = ZE_RESULT_SUCCESS;
168 *phCommandList = CommandList::createImmediate(productFamily, this, desc, false, engineGroupType, returnValue);
169
170 return returnValue;
171 }
172
createCommandQueue(const ze_command_queue_desc_t * desc,ze_command_queue_handle_t * commandQueue)173 ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
174 ze_command_queue_handle_t *commandQueue) {
175 auto &platform = neoDevice->getHardwareInfo().platform;
176
177 NEO::CommandStreamReceiver *csr = nullptr;
178 auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
179 if (desc->ordinal >= engineGroups.size()) {
180 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
181 }
182 if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW) {
183 getCsrForLowPriority(&csr);
184 } else {
185 auto ret = getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index);
186 if (ret != ZE_RESULT_SUCCESS) {
187 return ret;
188 }
189 }
190
191 UNRECOVERABLE_IF(csr == nullptr);
192
193 auto &hwHelper = NEO::HwHelper::get(platform.eRenderCoreFamily);
194 bool isCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
195
196 ze_result_t returnValue = ZE_RESULT_SUCCESS;
197 *commandQueue = CommandQueue::create(platform.eProductFamily, this, csr, desc, isCopyOnly, false, returnValue);
198
199 return returnValue;
200 }
201
getCommandQueueGroupProperties(uint32_t * pCount,ze_command_queue_group_properties_t * pCommandQueueGroupProperties)202 ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
203 ze_command_queue_group_properties_t *pCommandQueueGroupProperties) {
204 NEO::Device *activeDevice = getActiveDevice();
205 auto &engineGroups = activeDevice->getRegularEngineGroups();
206 uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
207
208 if (*pCount == 0) {
209 *pCount = numEngineGroups;
210 return ZE_RESULT_SUCCESS;
211 }
212
213 const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
214 auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
215 auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
216
217 *pCount = std::min(numEngineGroups, *pCount);
218 for (uint32_t i = 0; i < *pCount; i++) {
219 if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) {
220 pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
221 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
222 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS |
223 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
224 pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = std::numeric_limits<size_t>::max();
225 }
226 if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) {
227 pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
228 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
229 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
230 pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = std::numeric_limits<size_t>::max();
231 }
232 if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) {
233 pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY;
234 pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = hwHelper.getMaxFillPaternSizeForCopyEngine();
235 }
236 l0HwHelper.setAdditionalGroupProperty(pCommandQueueGroupProperties[i], engineGroups[i].engineGroupType);
237 pCommandQueueGroupProperties[i].numQueues = static_cast<uint32_t>(engineGroups[i].engines.size());
238 }
239
240 return ZE_RESULT_SUCCESS;
241 }
242
createImage(const ze_image_desc_t * desc,ze_image_handle_t * phImage)243 ze_result_t DeviceImp::createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) {
244 auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
245 Image *pImage = nullptr;
246 auto result = Image::create(productFamily, this, desc, &pImage);
247 if (result == ZE_RESULT_SUCCESS) {
248 *phImage = pImage->toHandle();
249 }
250
251 return result;
252 }
253
createSampler(const ze_sampler_desc_t * desc,ze_sampler_handle_t * sampler)254 ze_result_t DeviceImp::createSampler(const ze_sampler_desc_t *desc,
255 ze_sampler_handle_t *sampler) {
256 auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
257 *sampler = Sampler::create(productFamily, this, desc);
258
259 return ZE_RESULT_SUCCESS;
260 }
261
createModule(const ze_module_desc_t * desc,ze_module_handle_t * module,ze_module_build_log_handle_t * buildLog,ModuleType type)262 ze_result_t DeviceImp::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module,
263 ze_module_build_log_handle_t *buildLog, ModuleType type) {
264 ModuleBuildLog *moduleBuildLog = nullptr;
265
266 if (buildLog) {
267 moduleBuildLog = ModuleBuildLog::create();
268 *buildLog = moduleBuildLog->toHandle();
269 }
270 auto modulePtr = Module::create(this, desc, moduleBuildLog, type);
271 if (modulePtr == nullptr) {
272 return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
273 }
274
275 *module = modulePtr;
276
277 return ZE_RESULT_SUCCESS;
278 }
279
getComputeProperties(ze_device_compute_properties_t * pComputeProperties)280 ze_result_t DeviceImp::getComputeProperties(ze_device_compute_properties_t *pComputeProperties) {
281 const auto &deviceInfo = this->neoDevice->getDeviceInfo();
282
283 pComputeProperties->maxTotalGroupSize = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize);
284
285 pComputeProperties->maxGroupSizeX = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[0]);
286 pComputeProperties->maxGroupSizeY = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[1]);
287 pComputeProperties->maxGroupSizeZ = static_cast<uint32_t>(deviceInfo.maxWorkItemSizes[2]);
288
289 pComputeProperties->maxGroupCountX = std::numeric_limits<uint32_t>::max();
290 pComputeProperties->maxGroupCountY = std::numeric_limits<uint32_t>::max();
291 pComputeProperties->maxGroupCountZ = std::numeric_limits<uint32_t>::max();
292
293 pComputeProperties->maxSharedLocalMemory = static_cast<uint32_t>(deviceInfo.localMemSize);
294
295 pComputeProperties->numSubGroupSizes = static_cast<uint32_t>(deviceInfo.maxSubGroups.size());
296
297 for (uint32_t i = 0; i < pComputeProperties->numSubGroupSizes; ++i) {
298 pComputeProperties->subGroupSizes[i] = static_cast<uint32_t>(deviceInfo.maxSubGroups[i]);
299 }
300
301 return ZE_RESULT_SUCCESS;
302 }
303
getP2PProperties(ze_device_handle_t hPeerDevice,ze_device_p2p_properties_t * pP2PProperties)304 ze_result_t DeviceImp::getP2PProperties(ze_device_handle_t hPeerDevice,
305 ze_device_p2p_properties_t *pP2PProperties) {
306 pP2PProperties->flags = 0;
307 return ZE_RESULT_SUCCESS;
308 }
309
getMemoryProperties(uint32_t * pCount,ze_device_memory_properties_t * pMemProperties)310 ze_result_t DeviceImp::getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) {
311 if (*pCount == 0) {
312 *pCount = 1;
313 return ZE_RESULT_SUCCESS;
314 }
315
316 if (*pCount > 1) {
317 *pCount = 1;
318 }
319
320 if (nullptr == pMemProperties) {
321 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
322 }
323
324 const auto &deviceInfo = this->neoDevice->getDeviceInfo();
325 auto &hwInfo = this->getHwInfo();
326 auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
327 strcpy_s(pMemProperties->name, ZE_MAX_DEVICE_NAME, hwInfoConfig.getDeviceMemoryName().c_str());
328 pMemProperties->maxClockRate = hwInfoConfig.getDeviceMemoryMaxClkRate(&hwInfo);
329 pMemProperties->maxBusWidth = deviceInfo.addressBits;
330 if (this->isImplicitScalingCapable() ||
331 this->getNEODevice()->getNumGenericSubDevices() == 0) {
332 pMemProperties->totalSize = deviceInfo.globalMemSize;
333 } else {
334 pMemProperties->totalSize = deviceInfo.globalMemSize / this->numSubDevices;
335 }
336
337 pMemProperties->flags = 0;
338 return ZE_RESULT_SUCCESS;
339 }
340
getMemoryAccessProperties(ze_device_memory_access_properties_t * pMemAccessProperties)341 ze_result_t DeviceImp::getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) {
342 auto &hwInfo = this->getHwInfo();
343 auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
344 pMemAccessProperties->hostAllocCapabilities =
345 static_cast<ze_memory_access_cap_flags_t>(hwInfoConfig.getHostMemCapabilities(&hwInfo));
346 pMemAccessProperties->deviceAllocCapabilities =
347 ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC;
348 pMemAccessProperties->sharedSingleDeviceAllocCapabilities =
349 ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC;
350 pMemAccessProperties->sharedCrossDeviceAllocCapabilities = 0;
351 pMemAccessProperties->sharedSystemAllocCapabilities = 0;
352
353 return ZE_RESULT_SUCCESS;
354 }
355
356 static constexpr ze_device_fp_flags_t defaultFpFlags = static_cast<ze_device_fp_flags_t>(ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST |
357 ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO |
358 ZE_DEVICE_FP_FLAG_ROUND_TO_INF |
359 ZE_DEVICE_FP_FLAG_INF_NAN |
360 ZE_DEVICE_FP_FLAG_DENORM |
361 ZE_DEVICE_FP_FLAG_FMA);
362
getKernelProperties(ze_device_module_properties_t * pKernelProperties)363 ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKernelProperties) {
364 const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
365 const auto &deviceInfo = this->neoDevice->getDeviceInfo();
366 auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
367
368 std::string ilVersion = deviceInfo.ilVersion;
369 size_t majorVersionPos = ilVersion.find('_');
370 size_t minorVersionPos = ilVersion.find('.');
371
372 if (majorVersionPos != std::string::npos && minorVersionPos != std::string::npos) {
373 uint32_t majorSpirvVersion = static_cast<uint32_t>(std::stoul(ilVersion.substr(majorVersionPos + 1, minorVersionPos)));
374 uint32_t minorSpirvVersion = static_cast<uint32_t>(std::stoul(ilVersion.substr(minorVersionPos + 1)));
375 pKernelProperties->spirvVersionSupported = ZE_MAKE_VERSION(majorSpirvVersion, minorSpirvVersion);
376 } else {
377 DEBUG_BREAK_IF(true);
378 return ZE_RESULT_ERROR_UNKNOWN;
379 }
380
381 pKernelProperties->flags = ZE_DEVICE_MODULE_FLAG_FP16;
382 if (hardwareInfo.capabilityTable.ftrSupportsInteger64BitAtomics) {
383 pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS;
384 }
385 pKernelProperties->fp16flags = defaultFpFlags;
386 pKernelProperties->fp32flags = defaultFpFlags;
387
388 if (NEO::DebugManager.flags.OverrideDefaultFP64Settings.get() == 1) {
389 pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64;
390 pKernelProperties->fp64flags = defaultFpFlags | ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
391 pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
392 } else {
393 pKernelProperties->fp64flags = 0;
394 if (hardwareInfo.capabilityTable.ftrSupportsFP64) {
395 pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64;
396 pKernelProperties->fp64flags |= defaultFpFlags;
397 if (hardwareInfo.capabilityTable.ftrSupports64BitMath) {
398 pKernelProperties->fp64flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
399 pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT;
400 }
401 }
402 }
403
404 pKernelProperties->nativeKernelSupported.id[0] = 0;
405
406 processAdditionalKernelProperties(hwHelper, pKernelProperties);
407
408 pKernelProperties->maxArgumentsSize = static_cast<uint32_t>(this->neoDevice->getDeviceInfo().maxParameterSize);
409
410 pKernelProperties->printfBufferSize = static_cast<uint32_t>(this->neoDevice->getDeviceInfo().printfBufferSize);
411
412 auto &hwInfo = this->getHwInfo();
413 auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
414
415 void *pNext = pKernelProperties->pNext;
416 while (pNext) {
417 ze_base_desc_t *extendedProperties = reinterpret_cast<ze_base_desc_t *>(pKernelProperties->pNext);
418 if (extendedProperties->stype == ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES) {
419 ze_float_atomic_ext_properties_t *floatProperties =
420 reinterpret_cast<ze_float_atomic_ext_properties_t *>(extendedProperties);
421 hwInfoConfig.getKernelExtendedProperties(&floatProperties->fp16Flags,
422 &floatProperties->fp32Flags,
423 &floatProperties->fp64Flags);
424 } else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES) {
425 ze_scheduling_hint_exp_properties_t *hintProperties =
426 reinterpret_cast<ze_scheduling_hint_exp_properties_t *>(extendedProperties);
427 auto supportedThreadArbitrationPolicies = hwInfoConfig.getKernelSupportedThreadArbitrationPolicies();
428 hintProperties->schedulingHintFlags = 0;
429 for (uint32_t &p : supportedThreadArbitrationPolicies) {
430 switch (p) {
431 case NEO::ThreadArbitrationPolicy::AgeBased:
432 hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST;
433 break;
434 case NEO::ThreadArbitrationPolicy::RoundRobin:
435 hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
436 break;
437 case NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency:
438 hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN;
439 break;
440 }
441 }
442 }
443 pNext = const_cast<void *>(extendedProperties->pNext);
444 }
445
446 return ZE_RESULT_SUCCESS;
447 }
448
getProperties(ze_device_properties_t * pDeviceProperties)449 ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) {
450 const auto &deviceInfo = this->neoDevice->getDeviceInfo();
451 const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
452 auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
453
454 pDeviceProperties->type = ZE_DEVICE_TYPE_GPU;
455
456 pDeviceProperties->vendorId = deviceInfo.vendorId;
457
458 pDeviceProperties->deviceId = hardwareInfo.platform.usDeviceID;
459
460 pDeviceProperties->flags = 0u;
461
462 std::array<uint8_t, NEO::HwInfoConfig::uuidSize> deviceUuid;
463 if (this->neoDevice->getUuid(deviceUuid)) {
464 std::copy_n(std::begin(deviceUuid), ZE_MAX_DEVICE_UUID_SIZE, std::begin(pDeviceProperties->uuid.id));
465 } else {
466
467 uint32_t rootDeviceIndex = this->neoDevice->getRootDeviceIndex();
468
469 memset(pDeviceProperties->uuid.id, 0, ZE_MAX_DEVICE_UUID_SIZE);
470 memcpy_s(pDeviceProperties->uuid.id, sizeof(uint32_t), &deviceInfo.vendorId, sizeof(deviceInfo.vendorId));
471 memcpy_s(pDeviceProperties->uuid.id + sizeof(uint32_t), sizeof(uint32_t), &hardwareInfo.platform.usDeviceID, sizeof(hardwareInfo.platform.usDeviceID));
472 memcpy_s(pDeviceProperties->uuid.id + (2 * sizeof(uint32_t)), sizeof(uint32_t), &rootDeviceIndex, sizeof(rootDeviceIndex));
473 }
474
475 pDeviceProperties->subdeviceId = isSubdevice ? static_cast<NEO::SubDevice *>(neoDevice)->getSubDeviceIndex() : 0;
476
477 pDeviceProperties->coreClockRate = deviceInfo.maxClockFrequency;
478
479 pDeviceProperties->maxMemAllocSize = this->neoDevice->getDeviceInfo().maxMemAllocSize;
480
481 pDeviceProperties->maxCommandQueuePriority = 0;
482
483 pDeviceProperties->maxHardwareContexts = 1024 * 64;
484
485 pDeviceProperties->numThreadsPerEU = deviceInfo.numThreadsPerEU;
486
487 pDeviceProperties->physicalEUSimdWidth = hwHelper.getMinimalSIMDSize();
488
489 pDeviceProperties->numEUsPerSubslice = hardwareInfo.gtSystemInfo.MaxEuPerSubSlice;
490
491 if (NEO::DebugManager.flags.DebugApiUsed.get() == 1) {
492 pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.MaxSubSlicesSupported / hardwareInfo.gtSystemInfo.MaxSlicesSupported;
493 } else {
494 pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.SubSliceCount / hardwareInfo.gtSystemInfo.SliceCount;
495 }
496
497 pDeviceProperties->numSlices = hardwareInfo.gtSystemInfo.SliceCount;
498
499 if (isImplicitScalingCapable()) {
500 pDeviceProperties->numSlices *= neoDevice->getNumGenericSubDevices();
501 }
502
503 if ((NEO::DebugManager.flags.UseCyclesPerSecondTimer.get() == 1) ||
504 (pDeviceProperties->stype == ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2)) {
505 pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerClock;
506 } else {
507 pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerResolution;
508 }
509
510 pDeviceProperties->timestampValidBits = hardwareInfo.capabilityTable.timestampValidBits;
511
512 pDeviceProperties->kernelTimestampValidBits = hardwareInfo.capabilityTable.kernelTimestampValidBits;
513
514 if (hardwareInfo.capabilityTable.isIntegratedDevice) {
515 pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
516 }
517
518 if (isSubdevice) {
519 pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE;
520 }
521
522 if (this->neoDevice->getDeviceInfo().errorCorrectionSupport) {
523 pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_ECC;
524 }
525
526 if (hardwareInfo.capabilityTable.supportsOnDemandPageFaults) {
527 pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING;
528 }
529
530 memset(pDeviceProperties->name, 0, ZE_MAX_DEVICE_NAME);
531
532 std::string name = getNEODevice()->getDeviceInfo().name;
533 memcpy_s(pDeviceProperties->name, name.length(), name.c_str(), name.length());
534
535 return ZE_RESULT_SUCCESS;
536 }
537
getExternalMemoryProperties(ze_device_external_memory_properties_t * pExternalMemoryProperties)538 ze_result_t DeviceImp::getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) {
539 pExternalMemoryProperties->imageExportTypes = 0u;
540 pExternalMemoryProperties->imageImportTypes = 0u;
541 pExternalMemoryProperties->memoryAllocationExportTypes = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF;
542 pExternalMemoryProperties->memoryAllocationImportTypes = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF;
543
544 return ZE_RESULT_SUCCESS;
545 }
546
getGlobalTimestamps(uint64_t * hostTimestamp,uint64_t * deviceTimestamp)547 ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) {
548 NEO::TimeStampData queueTimeStamp;
549 bool retVal = this->neoDevice->getOSTime()->getCpuGpuTime(&queueTimeStamp);
550 if (!retVal)
551 return ZE_RESULT_ERROR_DEVICE_LOST;
552
553 *deviceTimestamp = queueTimeStamp.GPUTimeStamp;
554
555 retVal = this->neoDevice->getOSTime()->getCpuTime(hostTimestamp);
556 if (!retVal)
557 return ZE_RESULT_ERROR_DEVICE_LOST;
558
559 return ZE_RESULT_SUCCESS;
560 }
561
getSubDevices(uint32_t * pCount,ze_device_handle_t * phSubdevices)562 ze_result_t DeviceImp::getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) {
563 if (*pCount == 0) {
564 *pCount = this->numSubDevices;
565 return ZE_RESULT_SUCCESS;
566 }
567
568 if (phSubdevices == nullptr) {
569 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
570 }
571
572 if (*pCount > this->numSubDevices) {
573 *pCount = this->numSubDevices;
574 }
575
576 for (uint32_t i = 0; i < *pCount; i++) {
577 phSubdevices[i] = this->subDevices[i];
578 }
579
580 return ZE_RESULT_SUCCESS;
581 }
582
getCacheProperties(uint32_t * pCount,ze_device_cache_properties_t * pCacheProperties)583 ze_result_t DeviceImp::getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) {
584 if (*pCount == 0) {
585 *pCount = 1;
586 return ZE_RESULT_SUCCESS;
587 }
588
589 if (*pCount > 1) {
590 *pCount = 1;
591 }
592
593 const auto &hardwareInfo = this->getHwInfo();
594 pCacheProperties[0].cacheSize = hardwareInfo.gtSystemInfo.L3BankCount * 128 * KB;
595 pCacheProperties[0].flags = 0;
596
597 if (pCacheProperties->pNext) {
598 auto extendedProperties = reinterpret_cast<ze_device_cache_properties_t *>(pCacheProperties->pNext);
599 if (extendedProperties->stype == ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC) {
600 auto cacheReservationProperties = reinterpret_cast<ze_cache_reservation_ext_desc_t *>(extendedProperties);
601 cacheReservationProperties->maxCacheReservationSize = cacheReservation->getMaxCacheReservationSize();
602 } else {
603 return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
604 }
605 }
606
607 return ZE_RESULT_SUCCESS;
608 }
609
reserveCache(size_t cacheLevel,size_t cacheReservationSize)610 ze_result_t DeviceImp::reserveCache(size_t cacheLevel, size_t cacheReservationSize) {
611 if (cacheReservation->getMaxCacheReservationSize() == 0) {
612 return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
613 }
614
615 if (cacheLevel == 0) {
616 cacheLevel = 3;
617 }
618
619 auto result = cacheReservation->reserveCache(cacheLevel, cacheReservationSize);
620 if (result == false) {
621 return ZE_RESULT_ERROR_UNINITIALIZED;
622 }
623
624 return ZE_RESULT_SUCCESS;
625 }
626
setCacheAdvice(void * ptr,size_t regionSize,ze_cache_ext_region_t cacheRegion)627 ze_result_t DeviceImp::setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) {
628 if (cacheReservation->getMaxCacheReservationSize() == 0) {
629 return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
630 }
631
632 if (cacheRegion == ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT) {
633 cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_NON_RESERVED_REGION;
634 }
635
636 auto result = cacheReservation->setCacheAdvice(ptr, regionSize, cacheRegion);
637 if (result == false) {
638 return ZE_RESULT_ERROR_UNINITIALIZED;
639 }
640
641 return ZE_RESULT_SUCCESS;
642 }
643
imageGetProperties(const ze_image_desc_t * desc,ze_image_properties_t * pImageProperties)644 ze_result_t DeviceImp::imageGetProperties(const ze_image_desc_t *desc,
645 ze_image_properties_t *pImageProperties) {
646 const auto &deviceInfo = this->neoDevice->getDeviceInfo();
647
648 if (deviceInfo.imageSupport) {
649 pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAG_LINEAR;
650 } else {
651 pImageProperties->samplerFilterFlags = 0;
652 }
653
654 return ZE_RESULT_SUCCESS;
655 }
656
getDeviceImageProperties(ze_device_image_properties_t * pDeviceImageProperties)657 ze_result_t DeviceImp::getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) {
658 const auto &deviceInfo = this->neoDevice->getDeviceInfo();
659
660 if (deviceInfo.imageSupport) {
661 pDeviceImageProperties->maxImageDims1D = static_cast<uint32_t>(deviceInfo.image2DMaxWidth);
662 pDeviceImageProperties->maxImageDims2D = static_cast<uint32_t>(deviceInfo.image2DMaxHeight);
663 pDeviceImageProperties->maxImageDims3D = static_cast<uint32_t>(deviceInfo.image3DMaxDepth);
664 pDeviceImageProperties->maxImageBufferSize = deviceInfo.imageMaxBufferSize;
665 pDeviceImageProperties->maxImageArraySlices = static_cast<uint32_t>(deviceInfo.imageMaxArraySize);
666 pDeviceImageProperties->maxSamplers = deviceInfo.maxSamplers;
667 pDeviceImageProperties->maxReadImageArgs = deviceInfo.maxReadImageArgs;
668 pDeviceImageProperties->maxWriteImageArgs = deviceInfo.maxWriteImageArgs;
669 } else {
670 pDeviceImageProperties->maxImageDims1D = 0u;
671 pDeviceImageProperties->maxImageDims2D = 0u;
672 pDeviceImageProperties->maxImageDims3D = 0u;
673 pDeviceImageProperties->maxImageBufferSize = 0u;
674 pDeviceImageProperties->maxImageArraySlices = 0u;
675 pDeviceImageProperties->maxSamplers = 0u;
676 pDeviceImageProperties->maxReadImageArgs = 0u;
677 pDeviceImageProperties->maxWriteImageArgs = 0u;
678 }
679
680 return ZE_RESULT_SUCCESS;
681 }
682
getDebugProperties(zet_device_debug_properties_t * pDebugProperties)683 ze_result_t DeviceImp::getDebugProperties(zet_device_debug_properties_t *pDebugProperties) {
684 bool isDebugAttachAvailable = getOsInterface().isDebugAttachAvailable();
685 auto &stateSaveAreaHeader = NEO::SipKernel::getBindlessDebugSipKernel(*this->getNEODevice()).getStateSaveAreaHeader();
686
687 if (stateSaveAreaHeader.size() == 0) {
688 PRINT_DEBUGGER_INFO_LOG("Context state save area header missing", "");
689 isDebugAttachAvailable = false;
690 }
691
692 if (isDebugAttachAvailable && !isSubdevice) {
693 pDebugProperties->flags = zet_device_debug_property_flag_t::ZET_DEVICE_DEBUG_PROPERTY_FLAG_ATTACH;
694 } else {
695 pDebugProperties->flags = 0;
696 }
697 return ZE_RESULT_SUCCESS;
698 }
699
systemBarrier()700 ze_result_t DeviceImp::systemBarrier() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; }
701
activateMetricGroupsDeferred(uint32_t count,zet_metric_group_handle_t * phMetricGroups)702 ze_result_t DeviceImp::activateMetricGroupsDeferred(uint32_t count,
703 zet_metric_group_handle_t *phMetricGroups) {
704 ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
705 if (!this->isSubdevice && this->isImplicitScalingCapable()) {
706 for (auto subDevice : this->subDevices) {
707 result = subDevice->getMetricContext().activateMetricGroupsDeferred(count, phMetricGroups);
708 if (result != ZE_RESULT_SUCCESS)
709 break;
710 }
711 } else {
712 result = metricContext->activateMetricGroupsDeferred(count, phMetricGroups);
713 }
714 return result;
715 }
716
getExecEnvironment()717 void *DeviceImp::getExecEnvironment() { return execEnvironment; }
718
getBuiltinFunctionsLib()719 BuiltinFunctionsLib *DeviceImp::getBuiltinFunctionsLib() { return builtins.get(); }
720
getMOCS(bool l3enabled,bool l1enabled)721 uint32_t DeviceImp::getMOCS(bool l3enabled, bool l1enabled) {
722 return getHwHelper().getMocsIndex(*getNEODevice()->getGmmHelper(), l3enabled, l1enabled) << 1;
723 }
724
getHwHelper()725 NEO::HwHelper &DeviceImp::getHwHelper() {
726 const auto &hardwareInfo = neoDevice->getHardwareInfo();
727 return NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
728 }
729
getOsInterface()730 NEO::OSInterface &DeviceImp::getOsInterface() { return *neoDevice->getRootDeviceEnvironment().osInterface; }
731
getPlatformInfo() const732 uint32_t DeviceImp::getPlatformInfo() const {
733 const auto &hardwareInfo = neoDevice->getHardwareInfo();
734 return hardwareInfo.platform.eRenderCoreFamily;
735 }
736
getMetricContext()737 MetricContext &DeviceImp::getMetricContext() { return *metricContext; }
738
activateMetricGroups()739 void DeviceImp::activateMetricGroups() {
740 if (metricContext != nullptr) {
741 if (metricContext->isImplicitScalingCapable()) {
742 for (uint32_t i = 0; i < numSubDevices; i++) {
743 subDevices[i]->getMetricContext().activateMetricGroups();
744 }
745 } else {
746 metricContext->activateMetricGroups();
747 }
748 }
749 }
getMaxNumHwThreads() const750 uint32_t DeviceImp::getMaxNumHwThreads() const { return maxNumHwThreads; }
751
getHwInfo() const752 const NEO::HardwareInfo &DeviceImp::getHwInfo() const { return neoDevice->getHardwareInfo(); }
753
754 // Use this method to reinitialize L0::Device *device, that was created during zeInit, with the help of Device::create
deviceReinit(DriverHandle * driverHandle,L0::Device * device,std::unique_ptr<NEO::Device> & neoDevice,ze_result_t * returnValue)755 Device *Device::deviceReinit(DriverHandle *driverHandle, L0::Device *device, std::unique_ptr<NEO::Device> &neoDevice, ze_result_t *returnValue) {
756 auto pNeoDevice = neoDevice.release();
757
758 return Device::create(driverHandle, pNeoDevice, false, returnValue, device);
759 }
760
create(DriverHandle * driverHandle,NEO::Device * neoDevice,bool isSubDevice,ze_result_t * returnValue)761 Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue) {
762 return Device::create(driverHandle, neoDevice, isSubDevice, returnValue, nullptr);
763 }
764
create(DriverHandle * driverHandle,NEO::Device * neoDevice,bool isSubDevice,ze_result_t * returnValue,L0::Device * deviceL0)765 Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue, L0::Device *deviceL0) {
766 L0::DeviceImp *device = nullptr;
767 if (deviceL0 == nullptr) {
768 device = new DeviceImp;
769 } else {
770 device = static_cast<L0::DeviceImp *>(deviceL0);
771 }
772
773 UNRECOVERABLE_IF(device == nullptr);
774
775 device->setDriverHandle(driverHandle);
776 neoDevice->setSpecializedDevice(device);
777
778 device->neoDevice = neoDevice;
779 neoDevice->incRefInternal();
780
781 device->execEnvironment = (void *)neoDevice->getExecutionEnvironment();
782 device->allocationsForReuse = std::make_unique<NEO::AllocationsList>();
783 device->implicitScalingCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(neoDevice->getDeviceBitfield(), true);
784 device->metricContext = MetricContext::create(*device);
785 device->builtins = BuiltinFunctionsLib::create(
786 device, neoDevice->getBuiltIns());
787 device->cacheReservation = CacheReservation::create(*device);
788 device->maxNumHwThreads = NEO::HwHelper::getMaxThreadsForVfe(neoDevice->getHardwareInfo());
789
790 auto debugSurfaceSize = NEO::SipKernel::maxDbgSurfaceSize;
791 std::vector<char> stateSaveAreaHeader;
792
793 if (neoDevice->getCompilerInterface()) {
794 if (neoDevice->getPreemptionMode() == NEO::PreemptionMode::MidThread || neoDevice->getDebugger()) {
795 bool ret = NEO::SipKernel::initSipKernel(NEO::SipKernel::getSipKernelType(*neoDevice), *neoDevice);
796 UNRECOVERABLE_IF(!ret);
797
798 stateSaveAreaHeader = NEO::SipKernel::getSipKernel(*neoDevice).getStateSaveAreaHeader();
799 debugSurfaceSize = NEO::SipKernel::getSipKernel(*neoDevice).getStateSaveAreaSize();
800 }
801 } else {
802 *returnValue = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
803 }
804
805 const bool allocateDebugSurface = (device->getL0Debugger() || neoDevice->getDeviceInfo().debuggerActive) && !isSubDevice;
806 NEO::GraphicsAllocation *debugSurface = nullptr;
807 if (allocateDebugSurface) {
808 debugSurface = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
809 {device->getRootDeviceIndex(), true,
810 debugSurfaceSize,
811 NEO::GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA,
812 false,
813 false,
814 device->getNEODevice()->getDeviceBitfield()});
815 device->setDebugSurface(debugSurface);
816 }
817
818 if (debugSurface && stateSaveAreaHeader.size() > 0) {
819 auto &hwInfo = neoDevice->getHardwareInfo();
820 auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
821 NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
822 *neoDevice, debugSurface, 0, stateSaveAreaHeader.data(),
823 stateSaveAreaHeader.size());
824 }
825
826 for (auto &neoSubDevice : neoDevice->getSubDevices()) {
827 if (!neoSubDevice) {
828 continue;
829 }
830
831 ze_device_handle_t subDevice = Device::create(driverHandle,
832 neoSubDevice,
833 true, returnValue, nullptr);
834 if (subDevice == nullptr) {
835 return nullptr;
836 }
837 static_cast<DeviceImp *>(subDevice)->isSubdevice = true;
838 static_cast<DeviceImp *>(subDevice)->setDebugSurface(debugSurface);
839 device->subDevices.push_back(static_cast<Device *>(subDevice));
840 }
841 device->numSubDevices = static_cast<uint32_t>(device->subDevices.size());
842
843 auto supportDualStorageSharedMemory = neoDevice->getMemoryManager()->isLocalMemorySupported(device->neoDevice->getRootDeviceIndex());
844 if (NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
845 supportDualStorageSharedMemory = NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
846 }
847
848 if (supportDualStorageSharedMemory) {
849 ze_command_queue_desc_t cmdQueueDesc = {};
850 cmdQueueDesc.ordinal = 0;
851 cmdQueueDesc.index = 0;
852 cmdQueueDesc.flags = 0;
853 cmdQueueDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
854 cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
855 ze_result_t resultValue = ZE_RESULT_SUCCESS;
856 device->pageFaultCommandList =
857 CommandList::createImmediate(
858 device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, NEO::EngineGroupType::Copy, resultValue);
859 }
860
861 if (device->getSourceLevelDebugger()) {
862 auto osInterface = neoDevice->getRootDeviceEnvironment().osInterface.get();
863 device->getSourceLevelDebugger()
864 ->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0);
865 }
866 device->createSysmanHandle(isSubDevice);
867 device->resourcesReleased = false;
868 return device;
869 }
870
releaseResources()871 void DeviceImp::releaseResources() {
872 if (resourcesReleased) {
873 return;
874 }
875 if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get() &&
876 !neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger->isLegacy()) {
877 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.reset(nullptr);
878 }
879 for (uint32_t i = 0; i < this->numSubDevices; i++) {
880 delete this->subDevices[i];
881 }
882 this->subDevices.clear();
883 this->numSubDevices = 0;
884
885 if (this->pageFaultCommandList) {
886 this->pageFaultCommandList->destroy();
887 this->pageFaultCommandList = nullptr;
888 }
889 metricContext.reset();
890 builtins.reset();
891 cacheReservation.reset();
892
893 if (allocationsForReuse.get()) {
894 allocationsForReuse->freeAllGraphicsAllocations(neoDevice);
895 allocationsForReuse.reset();
896 }
897
898 if (getSourceLevelDebugger()) {
899 getSourceLevelDebugger()->notifyDeviceDestruction();
900 }
901
902 if (!isSubdevice) {
903 if (this->debugSurface) {
904 this->neoDevice->getMemoryManager()->freeGraphicsMemory(this->debugSurface);
905 this->debugSurface = nullptr;
906 }
907 }
908
909 if (neoDevice) {
910 neoDevice->decRefInternal();
911 neoDevice = nullptr;
912 }
913
914 resourcesReleased = true;
915 }
916
~DeviceImp()917 DeviceImp::~DeviceImp() {
918 releaseResources();
919
920 if (!isSubdevice) {
921 if (pSysmanDevice != nullptr) {
922 delete pSysmanDevice;
923 pSysmanDevice = nullptr;
924 }
925 }
926 }
927
getDevicePreemptionMode() const928 NEO::PreemptionMode DeviceImp::getDevicePreemptionMode() const {
929 return neoDevice->getPreemptionMode();
930 }
931
getDeviceInfo() const932 const NEO::DeviceInfo &DeviceImp::getDeviceInfo() const {
933 return neoDevice->getDeviceInfo();
934 }
935
allocateManagedMemoryFromHostPtr(void * buffer,size_t size,struct CommandList * commandList)936 NEO::GraphicsAllocation *DeviceImp::allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) {
937 char *baseAddress = reinterpret_cast<char *>(buffer);
938 NEO::GraphicsAllocation *allocation = nullptr;
939 bool allocFound = false;
940 std::vector<NEO::SvmAllocationData *> allocDataArray = driverHandle->findAllocationsWithinRange(buffer, size, &allocFound);
941 if (allocFound) {
942 return allocDataArray[0]->gpuAllocations.getGraphicsAllocation(getRootDeviceIndex());
943 }
944
945 if (!allocDataArray.empty()) {
946 UNRECOVERABLE_IF(commandList == nullptr);
947 for (auto allocData : allocDataArray) {
948 allocation = allocData->gpuAllocations.getGraphicsAllocation(getRootDeviceIndex());
949 char *allocAddress = reinterpret_cast<char *>(allocation->getGpuAddress());
950 size_t allocSize = allocData->size;
951
952 driverHandle->getSvmAllocsManager()->removeSVMAlloc(*allocData);
953 neoDevice->getMemoryManager()->freeGraphicsMemory(allocation);
954 commandList->eraseDeallocationContainerEntry(allocation);
955 commandList->eraseResidencyContainerEntry(allocation);
956
957 if (allocAddress < baseAddress) {
958 buffer = reinterpret_cast<void *>(allocAddress);
959 baseAddress += size;
960 size = ptrDiff(baseAddress, allocAddress);
961 baseAddress = reinterpret_cast<char *>(buffer);
962 } else {
963 allocAddress += allocSize;
964 baseAddress += size;
965 if (allocAddress > baseAddress) {
966 baseAddress = reinterpret_cast<char *>(buffer);
967 size = ptrDiff(allocAddress, baseAddress);
968 } else {
969 baseAddress = reinterpret_cast<char *>(buffer);
970 }
971 }
972 }
973 }
974
975 allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
976 {getRootDeviceIndex(), false, size, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false, neoDevice->getDeviceBitfield()},
977 buffer);
978
979 if (allocation == nullptr) {
980 return allocation;
981 }
982
983 NEO::SvmAllocationData allocData(getRootDeviceIndex());
984 allocData.gpuAllocations.addAllocation(allocation);
985 allocData.cpuAllocation = nullptr;
986 allocData.size = size;
987 allocData.memoryType = InternalMemoryType::NOT_SPECIFIED;
988 allocData.device = nullptr;
989 driverHandle->getSvmAllocsManager()->insertSVMAlloc(allocData);
990
991 return allocation;
992 }
993
allocateMemoryFromHostPtr(const void * buffer,size_t size,bool hostCopyAllowed)994 NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) {
995 NEO::AllocationProperties properties = {getRootDeviceIndex(), false, size,
996 NEO::GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR,
997 false, neoDevice->getDeviceBitfield()};
998 properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true;
999 auto allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties,
1000 buffer);
1001 if (allocation == nullptr && hostCopyAllowed) {
1002 allocation = neoDevice->getMemoryManager()->allocateInternalGraphicsMemoryWithHostCopy(neoDevice->getRootDeviceIndex(),
1003 neoDevice->getDeviceBitfield(),
1004 buffer,
1005 size);
1006 }
1007
1008 return allocation;
1009 }
1010
obtainReusableAllocation(size_t requiredSize,NEO::GraphicsAllocation::AllocationType type)1011 NEO::GraphicsAllocation *DeviceImp::obtainReusableAllocation(size_t requiredSize, NEO::GraphicsAllocation::AllocationType type) {
1012 auto alloc = allocationsForReuse->detachAllocation(requiredSize, nullptr, nullptr, type);
1013 if (alloc == nullptr)
1014 return nullptr;
1015 else
1016 return alloc.release();
1017 }
1018
storeReusableAllocation(NEO::GraphicsAllocation & alloc)1019 void DeviceImp::storeReusableAllocation(NEO::GraphicsAllocation &alloc) {
1020 allocationsForReuse->pushFrontOne(alloc);
1021 }
1022
getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver ** csr,uint32_t ordinal,uint32_t index)1023 ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) {
1024 auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
1025 if ((ordinal >= engineGroups.size()) ||
1026 (index >= engineGroups[ordinal].engines.size())) {
1027 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
1028 }
1029 *csr = engineGroups[ordinal].engines[index].commandStreamReceiver;
1030 return ZE_RESULT_SUCCESS;
1031 }
1032
getCsrForLowPriority(NEO::CommandStreamReceiver ** csr)1033 ze_result_t DeviceImp::getCsrForLowPriority(NEO::CommandStreamReceiver **csr) {
1034 NEO::Device *activeDevice = getActiveDevice();
1035 for (auto &it : activeDevice->getAllEngines()) {
1036 if (it.osContext->isLowPriority()) {
1037 *csr = it.commandStreamReceiver;
1038 return ZE_RESULT_SUCCESS;
1039 }
1040 }
1041 // if the code falls through, we have no low priority context created by neoDevice.
1042 UNRECOVERABLE_IF(true);
1043 return ZE_RESULT_ERROR_UNKNOWN;
1044 }
1045
getDebugSession(const zet_debug_config_t & config)1046 DebugSession *DeviceImp::getDebugSession(const zet_debug_config_t &config) {
1047 return debugSession.get();
1048 }
1049
createDebugSession(const zet_debug_config_t & config,ze_result_t & result)1050 DebugSession *DeviceImp::createDebugSession(const zet_debug_config_t &config, ze_result_t &result) {
1051 if (!this->isSubdevice) {
1052 auto session = DebugSession::create(config, this, result);
1053 debugSession.reset(session);
1054 } else {
1055 result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
1056 }
1057 return debugSession.get();
1058 }
1059
toPhysicalSliceId(const NEO::TopologyMap & topologyMap,uint32_t & slice,uint32_t & deviceIndex)1060 bool DeviceImp::toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &deviceIndex) {
1061 auto hwInfo = neoDevice->getRootDeviceEnvironment().getHardwareInfo();
1062 uint32_t subDeviceCount = NEO::HwHelper::getSubDevicesCount(hwInfo);
1063 auto deviceBitfield = neoDevice->getDeviceBitfield();
1064
1065 if (topologyMap.size() == subDeviceCount && !isSubdevice) {
1066 uint32_t sliceId = slice;
1067 for (uint32_t i = 0; i < topologyMap.size(); i++) {
1068 if (sliceId < topologyMap.at(i).sliceIndices.size()) {
1069 slice = topologyMap.at(i).sliceIndices[sliceId];
1070 deviceIndex = i;
1071 return true;
1072 }
1073 sliceId = sliceId - static_cast<uint32_t>(topologyMap.at(i).sliceIndices.size());
1074 }
1075 } else if (isSubdevice) {
1076 UNRECOVERABLE_IF(!deviceBitfield.any());
1077 uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
1078
1079 if (topologyMap.find(subDeviceIndex) != topologyMap.end()) {
1080 if (slice < topologyMap.at(subDeviceIndex).sliceIndices.size()) {
1081 deviceIndex = subDeviceIndex;
1082 slice = topologyMap.at(subDeviceIndex).sliceIndices[slice];
1083 return true;
1084 }
1085 }
1086 }
1087
1088 return false;
1089 }
1090
toApiSliceId(const NEO::TopologyMap & topologyMap,uint32_t & slice,uint32_t deviceIndex)1091 bool DeviceImp::toApiSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t deviceIndex) {
1092 auto deviceBitfield = neoDevice->getDeviceBitfield();
1093
1094 if (isSubdevice) {
1095 UNRECOVERABLE_IF(!deviceBitfield.any());
1096 deviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
1097 }
1098
1099 if (topologyMap.find(deviceIndex) != topologyMap.end()) {
1100 uint32_t apiSliceId = 0;
1101 if (!isSubdevice) {
1102 for (uint32_t devId = 0; devId < deviceIndex; devId++) {
1103 apiSliceId += static_cast<uint32_t>(topologyMap.at(devId).sliceIndices.size());
1104 }
1105 }
1106
1107 for (uint32_t i = 0; i < topologyMap.at(deviceIndex).sliceIndices.size(); i++) {
1108 if (static_cast<uint32_t>(topologyMap.at(deviceIndex).sliceIndices[i]) == slice) {
1109 apiSliceId += i;
1110 slice = apiSliceId;
1111 return true;
1112 }
1113 }
1114 }
1115
1116 return false;
1117 }
1118
getActiveDevice() const1119 NEO::Device *DeviceImp::getActiveDevice() const {
1120 if (neoDevice->getNumGenericSubDevices() > 1u) {
1121 if (isImplicitScalingCapable()) {
1122 return this->neoDevice;
1123 }
1124 return this->neoDevice->getSubDevice(0);
1125 }
1126 return this->neoDevice;
1127 }
1128
1129 } // namespace L0
1130