1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "opencl/source/context/context.h"
9
10 #include "shared/source/built_ins/built_ins.h"
11 #include "shared/source/command_stream/command_stream_receiver.h"
12 #include "shared/source/compiler_interface/compiler_interface.h"
13 #include "shared/source/debug_settings/debug_settings_manager.h"
14 #include "shared/source/helpers/get_info.h"
15 #include "shared/source/helpers/ptr_math.h"
16 #include "shared/source/helpers/string.h"
17 #include "shared/source/memory_manager/deferred_deleter.h"
18 #include "shared/source/memory_manager/memory_manager.h"
19 #include "shared/source/memory_manager/unified_memory_manager.h"
20
21 #include "opencl/source/cl_device/cl_device.h"
22 #include "opencl/source/command_queue/command_queue.h"
23 #include "opencl/source/device_queue/device_queue.h"
24 #include "opencl/source/execution_environment/cl_execution_environment.h"
25 #include "opencl/source/gtpin/gtpin_notify.h"
26 #include "opencl/source/helpers/get_info_status_mapper.h"
27 #include "opencl/source/helpers/surface_formats.h"
28 #include "opencl/source/mem_obj/image.h"
29 #include "opencl/source/platform/platform.h"
30 #include "opencl/source/scheduler/scheduler_kernel.h"
31 #include "opencl/source/sharings/sharing.h"
32 #include "opencl/source/sharings/sharing_factory.h"
33
34 #include "d3d_sharing_functions.h"
35
36 #include <algorithm>
37 #include <memory>
38
39 namespace NEO {
40
Context(void (CL_CALLBACK * funcNotify)(const char *,const void *,size_t,void *),void * data)41 Context::Context(
42 void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
43 void *data) {
44 contextCallback = funcNotify;
45 userData = data;
46 sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
47 schedulerBuiltIn = std::make_unique<BuiltInKernel>();
48 }
49
~Context()50 Context::~Context() {
51 delete[] properties;
52
53 for (auto rootDeviceIndex = 0u; rootDeviceIndex < specialQueues.size(); rootDeviceIndex++) {
54 if (specialQueues[rootDeviceIndex]) {
55 delete specialQueues[rootDeviceIndex];
56 }
57 }
58 if (svmAllocsManager) {
59 delete svmAllocsManager;
60 }
61 if (driverDiagnostics) {
62 delete driverDiagnostics;
63 }
64 if (memoryManager && memoryManager->isAsyncDeleterEnabled()) {
65 memoryManager->getDeferredDeleter()->removeClient();
66 }
67 gtpinNotifyContextDestroy((cl_context)this);
68 destructorCallbacks.invoke(this);
69 for (auto &device : devices) {
70 device->decRefInternal();
71 }
72 delete static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
73 delete schedulerBuiltIn->pProgram;
74 schedulerBuiltIn->pKernel = nullptr;
75 schedulerBuiltIn->pProgram = nullptr;
76 }
77
setDestructorCallback(void (CL_CALLBACK * funcNotify)(cl_context,void *),void * userData)78 cl_int Context::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *),
79 void *userData) {
80 std::unique_lock<std::mutex> theLock(mtx);
81 destructorCallbacks.add(funcNotify, userData);
82 return CL_SUCCESS;
83 }
84
tryGetExistingHostPtrAllocation(const void * ptr,size_t size,uint32_t rootDeviceIndex,GraphicsAllocation * & allocation,InternalMemoryType & memoryType,bool & isCpuCopyAllowed)85 cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
86 size_t size,
87 uint32_t rootDeviceIndex,
88 GraphicsAllocation *&allocation,
89 InternalMemoryType &memoryType,
90 bool &isCpuCopyAllowed) {
91 cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed);
92 if (retVal != CL_SUCCESS || allocation != nullptr) {
93 return retVal;
94 }
95
96 retVal = tryGetExistingMapAllocation(ptr, size, allocation);
97 return retVal;
98 }
99
tryGetExistingSvmAllocation(const void * ptr,size_t size,uint32_t rootDeviceIndex,GraphicsAllocation * & allocation,InternalMemoryType & memoryType,bool & isCpuCopyAllowed)100 cl_int Context::tryGetExistingSvmAllocation(const void *ptr,
101 size_t size,
102 uint32_t rootDeviceIndex,
103 GraphicsAllocation *&allocation,
104 InternalMemoryType &memoryType,
105 bool &isCpuCopyAllowed) {
106 if (getSVMAllocsManager()) {
107 SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr);
108 if (svmEntry) {
109 memoryType = svmEntry->memoryType;
110 if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {
111 return CL_INVALID_OPERATION;
112 }
113 allocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
114 if (isCpuCopyAllowed) {
115 if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) {
116 isCpuCopyAllowed = false;
117 }
118 }
119 }
120 }
121 return CL_SUCCESS;
122 }
123
tryGetExistingMapAllocation(const void * ptr,size_t size,GraphicsAllocation * & allocation)124 cl_int Context::tryGetExistingMapAllocation(const void *ptr,
125 size_t size,
126 GraphicsAllocation *&allocation) {
127 if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) {
128 if (mapInfo.graphicsAllocation) {
129 allocation = mapInfo.graphicsAllocation;
130 }
131 }
132 return CL_SUCCESS;
133 }
134
getRootDeviceIndices() const135 const std::set<uint32_t> &Context::getRootDeviceIndices() const {
136 return rootDeviceIndices;
137 }
138
getMaxRootDeviceIndex() const139 uint32_t Context::getMaxRootDeviceIndex() const {
140 return maxRootDeviceIndex;
141 }
142
getDefaultDeviceQueue()143 DeviceQueue *Context::getDefaultDeviceQueue() {
144 return defaultDeviceQueue;
145 }
146
setDefaultDeviceQueue(DeviceQueue * queue)147 void Context::setDefaultDeviceQueue(DeviceQueue *queue) {
148 defaultDeviceQueue = queue;
149 }
150
getSpecialQueue(uint32_t rootDeviceIndex)151 CommandQueue *Context::getSpecialQueue(uint32_t rootDeviceIndex) {
152 return specialQueues[rootDeviceIndex];
153 }
154
setSpecialQueue(CommandQueue * commandQueue,uint32_t rootDeviceIndex)155 void Context::setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex) {
156 specialQueues[rootDeviceIndex] = commandQueue;
157 }
overrideSpecialQueueAndDecrementRefCount(CommandQueue * commandQueue,uint32_t rootDeviceIndex)158 void Context::overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex) {
159 setSpecialQueue(commandQueue, rootDeviceIndex);
160 commandQueue->setIsSpecialCommandQueue(true);
161 //decrement ref count that special queue added
162 this->decRefInternal();
163 };
164
areMultiStorageAllocationsPreferred()165 bool Context::areMultiStorageAllocationsPreferred() {
166 return this->contextType != ContextType::CONTEXT_TYPE_SPECIALIZED;
167 }
168
createImpl(const cl_context_properties * properties,const ClDeviceVector & inputDevices,void (CL_CALLBACK * funcNotify)(const char *,const void *,size_t,void *),void * data,cl_int & errcodeRet)169 bool Context::createImpl(const cl_context_properties *properties,
170 const ClDeviceVector &inputDevices,
171 void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
172 void *data, cl_int &errcodeRet) {
173
174 auto propertiesCurrent = properties;
175 bool interopUserSync = false;
176 int32_t driverDiagnosticsUsed = -1;
177 auto sharingBuilder = sharingFactory.build();
178
179 std::unique_ptr<DriverDiagnostics> driverDiagnostics;
180 while (propertiesCurrent && *propertiesCurrent) {
181 errcodeRet = CL_SUCCESS;
182
183 auto propertyType = propertiesCurrent[0];
184 auto propertyValue = propertiesCurrent[1];
185 propertiesCurrent += 2;
186
187 switch (propertyType) {
188 case CL_CONTEXT_PLATFORM:
189 break;
190 case CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL:
191 driverDiagnosticsUsed = static_cast<int32_t>(propertyValue);
192 break;
193 case CL_CONTEXT_INTEROP_USER_SYNC:
194 interopUserSync = propertyValue > 0;
195 break;
196 default:
197 if (!sharingBuilder->processProperties(propertyType, propertyValue)) {
198 errcodeRet = CL_INVALID_PROPERTY;
199 return false;
200 }
201 break;
202 }
203 }
204
205 auto numProperties = ptrDiff(propertiesCurrent, properties) / sizeof(cl_context_properties);
206 cl_context_properties *propertiesNew = nullptr;
207
208 // copy the user properties if there are any
209 if (numProperties) {
210 propertiesNew = new cl_context_properties[numProperties + 1];
211 memcpy_s(propertiesNew, (numProperties + 1) * sizeof(cl_context_properties), properties, numProperties * sizeof(cl_context_properties));
212 propertiesNew[numProperties] = 0;
213 numProperties++;
214 }
215
216 if (DebugManager.flags.PrintDriverDiagnostics.get() != -1) {
217 driverDiagnosticsUsed = DebugManager.flags.PrintDriverDiagnostics.get();
218 }
219 if (driverDiagnosticsUsed >= 0) {
220 driverDiagnostics.reset(new DriverDiagnostics((cl_diagnostics_verbose_level)driverDiagnosticsUsed));
221 }
222
223 this->numProperties = numProperties;
224 this->properties = propertiesNew;
225 this->setInteropUserSyncEnabled(interopUserSync);
226
227 if (!sharingBuilder->finalizeProperties(*this, errcodeRet)) {
228 return false;
229 }
230
231 bool containsDeviceWithSubdevices = false;
232 for (const auto &device : inputDevices) {
233 rootDeviceIndices.insert(device->getRootDeviceIndex());
234 containsDeviceWithSubdevices |= device->getNumGenericSubDevices() > 1;
235 }
236
237 this->driverDiagnostics = driverDiagnostics.release();
238 if (rootDeviceIndices.size() > 1 && containsDeviceWithSubdevices && !DebugManager.flags.EnableMultiRootDeviceContexts.get()) {
239 DEBUG_BREAK_IF("No support for context with multiple devices with subdevices");
240 errcodeRet = CL_OUT_OF_HOST_MEMORY;
241 return false;
242 }
243
244 devices = inputDevices;
245 for (auto &rootDeviceIndex : rootDeviceIndices) {
246 DeviceBitfield deviceBitfield{};
247 for (const auto &pDevice : devices) {
248 if (pDevice->getRootDeviceIndex() == rootDeviceIndex) {
249 deviceBitfield |= pDevice->getDeviceBitfield();
250 }
251 }
252 deviceBitfields.insert({rootDeviceIndex, deviceBitfield});
253 }
254
255 if (devices.size() > 0) {
256 maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less<uint32_t const>());
257 specialQueues.resize(maxRootDeviceIndex + 1u);
258 auto device = this->getDevice(0);
259 this->memoryManager = device->getMemoryManager();
260 if (memoryManager->isAsyncDeleterEnabled()) {
261 memoryManager->getDeferredDeleter()->addClient();
262 }
263
264 bool anySvmSupport = false;
265 for (auto &device : devices) {
266 device->incRefInternal();
267 anySvmSupport |= device->getHardwareInfo().capabilityTable.ftrSvm;
268 }
269
270 setupContextType();
271 if (anySvmSupport) {
272 this->svmAllocsManager = new SVMAllocsManager(this->memoryManager,
273 this->areMultiStorageAllocationsPreferred());
274 }
275 }
276
277 for (auto &device : devices) {
278 if (!specialQueues[device->getRootDeviceIndex()]) {
279 auto commandQueue = CommandQueue::create(this, device, nullptr, true, errcodeRet); // NOLINT
280 DEBUG_BREAK_IF(commandQueue == nullptr);
281 overrideSpecialQueueAndDecrementRefCount(commandQueue, device->getRootDeviceIndex());
282 }
283 }
284
285 return true;
286 }
287
getInfo(cl_context_info paramName,size_t paramValueSize,void * paramValue,size_t * paramValueSizeRet)288 cl_int Context::getInfo(cl_context_info paramName, size_t paramValueSize,
289 void *paramValue, size_t *paramValueSizeRet) {
290 cl_int retVal;
291 size_t valueSize = GetInfo::invalidSourceSize;
292 const void *pValue = nullptr;
293 cl_uint numDevices;
294 cl_uint refCount = 0;
295 std::vector<cl_device_id> devIDs;
296 auto callGetinfo = true;
297
298 switch (paramName) {
299 case CL_CONTEXT_DEVICES:
300 valueSize = devices.size() * sizeof(cl_device_id);
301 devices.toDeviceIDs(devIDs);
302 pValue = devIDs.data();
303 break;
304
305 case CL_CONTEXT_NUM_DEVICES:
306 numDevices = (cl_uint)(devices.size());
307 valueSize = sizeof(numDevices);
308 pValue = &numDevices;
309 break;
310
311 case CL_CONTEXT_PROPERTIES:
312 valueSize = this->numProperties * sizeof(cl_context_properties);
313 pValue = this->properties;
314 if (valueSize == 0) {
315 callGetinfo = false;
316 }
317
318 break;
319
320 case CL_CONTEXT_REFERENCE_COUNT:
321 refCount = static_cast<cl_uint>(this->getReference());
322 valueSize = sizeof(refCount);
323 pValue = &refCount;
324 break;
325
326 default:
327 pValue = getOsContextInfo(paramName, &valueSize);
328 break;
329 }
330
331 GetInfoStatus getInfoStatus = GetInfoStatus::SUCCESS;
332 if (callGetinfo) {
333 getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pValue, valueSize);
334 }
335
336 retVal = changeGetInfoStatusToCLResultType(getInfoStatus);
337 GetInfo::setParamValueReturnSize(paramValueSizeRet, valueSize, getInfoStatus);
338
339 return retVal;
340 }
341
getNumDevices() const342 size_t Context::getNumDevices() const {
343 return devices.size();
344 }
345
containsMultipleSubDevices(uint32_t rootDeviceIndex) const346 bool Context::containsMultipleSubDevices(uint32_t rootDeviceIndex) const {
347 return deviceBitfields.at(rootDeviceIndex).count() > 1;
348 }
349
getDevice(size_t deviceOrdinal) const350 ClDevice *Context::getDevice(size_t deviceOrdinal) const {
351 return (ClDevice *)devices[deviceOrdinal];
352 }
353
getSupportedImageFormats(Device * device,cl_mem_flags flags,cl_mem_object_type imageType,cl_uint numEntries,cl_image_format * imageFormats,cl_uint * numImageFormatsReturned)354 cl_int Context::getSupportedImageFormats(
355 Device *device,
356 cl_mem_flags flags,
357 cl_mem_object_type imageType,
358 cl_uint numEntries,
359 cl_image_format *imageFormats,
360 cl_uint *numImageFormatsReturned) {
361 size_t numImageFormats = 0;
362
363 const bool nv12ExtensionEnabled = device->getSpecializedDevice<ClDevice>()->getDeviceInfo().nv12Extension;
364 const bool packedYuvExtensionEnabled = device->getSpecializedDevice<ClDevice>()->getDeviceInfo().packedYuvExtension;
365
366 auto appendImageFormats = [&](ArrayRef<const ClSurfaceFormatInfo> formats) {
367 if (imageFormats) {
368 size_t offset = numImageFormats;
369 for (size_t i = 0; i < formats.size() && offset < numEntries; ++i) {
370 imageFormats[offset++] = formats[i].OCLImageFormat;
371 }
372 }
373 numImageFormats += formats.size();
374 };
375
376 if (flags & CL_MEM_READ_ONLY) {
377 if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) {
378 appendImageFormats(SurfaceFormats::readOnly20());
379 } else {
380 appendImageFormats(SurfaceFormats::readOnly12());
381 }
382 if (Image::isImage2d(imageType) && nv12ExtensionEnabled) {
383 appendImageFormats(SurfaceFormats::planarYuv());
384 }
385 if (Image::isImage2dOr2dArray(imageType)) {
386 appendImageFormats(SurfaceFormats::readOnlyDepth());
387 }
388 if (Image::isImage2d(imageType) && packedYuvExtensionEnabled) {
389 appendImageFormats(SurfaceFormats::packedYuv());
390 }
391 } else if (flags & CL_MEM_WRITE_ONLY) {
392 appendImageFormats(SurfaceFormats::writeOnly());
393 if (Image::isImage2dOr2dArray(imageType)) {
394 appendImageFormats(SurfaceFormats::readWriteDepth());
395 }
396 } else if (nv12ExtensionEnabled && (flags & CL_MEM_NO_ACCESS_INTEL)) {
397 if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) {
398 appendImageFormats(SurfaceFormats::readOnly20());
399 } else {
400 appendImageFormats(SurfaceFormats::readOnly12());
401 }
402 if (Image::isImage2d(imageType)) {
403 appendImageFormats(SurfaceFormats::planarYuv());
404 }
405 } else {
406 appendImageFormats(SurfaceFormats::readWrite());
407 if (Image::isImage2dOr2dArray(imageType)) {
408 appendImageFormats(SurfaceFormats::readWriteDepth());
409 }
410 }
411 if (numImageFormatsReturned) {
412 *numImageFormatsReturned = static_cast<cl_uint>(numImageFormats);
413 }
414 return CL_SUCCESS;
415 }
416
getSchedulerKernel()417 SchedulerKernel &Context::getSchedulerKernel() {
418 if (schedulerBuiltIn->pKernel) {
419 return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
420 }
421
422 auto initializeSchedulerProgramAndKernel = [&] {
423 cl_int retVal = CL_SUCCESS;
424 auto clDevice = getDevice(0);
425 auto src = SchedulerKernel::loadSchedulerKernel(&clDevice->getDevice());
426
427 auto program = Program::createBuiltInFromGenBinary(this,
428 devices,
429 src.resource.data(),
430 src.resource.size(),
431 &retVal);
432 DEBUG_BREAK_IF(retVal != CL_SUCCESS);
433 DEBUG_BREAK_IF(!program);
434
435 retVal = program->processGenBinary(*clDevice);
436 DEBUG_BREAK_IF(retVal != CL_SUCCESS);
437
438 schedulerBuiltIn->pProgram = program;
439
440 auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex());
441 DEBUG_BREAK_IF(!kernelInfo);
442
443 schedulerBuiltIn->pKernel = Kernel::create<SchedulerKernel>(
444 schedulerBuiltIn->pProgram,
445 *kernelInfo,
446 *clDevice,
447 &retVal);
448
449 UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
450
451 DEBUG_BREAK_IF(retVal != CL_SUCCESS);
452 };
453 std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
454
455 UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
456 return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
457 }
458
isDeviceAssociated(const ClDevice & clDevice) const459 bool Context::isDeviceAssociated(const ClDevice &clDevice) const {
460 for (const auto &pDevice : devices) {
461 if (pDevice == &clDevice) {
462 return true;
463 }
464 }
465 return false;
466 }
467
getSubDeviceByIndex(uint32_t subDeviceIndex) const468 ClDevice *Context::getSubDeviceByIndex(uint32_t subDeviceIndex) const {
469
470 auto isExpectedSubDevice = [subDeviceIndex](ClDevice *pClDevice) -> bool {
471 bool isSubDevice = (pClDevice->getDeviceInfo().parentDevice != nullptr);
472 if (isSubDevice == false) {
473 return false;
474 }
475
476 auto &subDevice = static_cast<SubDevice &>(pClDevice->getDevice());
477 return (subDevice.getSubDeviceIndex() == subDeviceIndex);
478 };
479
480 auto foundDeviceIterator = std::find_if(devices.begin(), devices.end(), isExpectedSubDevice);
481 return (foundDeviceIterator != devices.end() ? *foundDeviceIterator : nullptr);
482 }
483
getAsyncEventsHandler() const484 AsyncEventsHandler &Context::getAsyncEventsHandler() const {
485 return *static_cast<ClExecutionEnvironment *>(devices[0]->getExecutionEnvironment())->getAsyncEventsHandler();
486 }
487
getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const488 DeviceBitfield Context::getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const {
489 return deviceBitfields.at(rootDeviceIndex);
490 }
491
setupContextType()492 void Context::setupContextType() {
493 if (contextType == ContextType::CONTEXT_TYPE_DEFAULT) {
494 if (devices.size() > 1) {
495 for (const auto &pDevice : devices) {
496 if (!pDevice->getDeviceInfo().parentDevice) {
497 contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
498 return;
499 }
500 }
501 }
502 if (devices[0]->getDeviceInfo().parentDevice) {
503 contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
504 }
505 }
506 }
507
getPlatformFromProperties(const cl_context_properties * properties,cl_int & errcode)508 Platform *Context::getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode) {
509 errcode = CL_SUCCESS;
510 auto propertiesCurrent = properties;
511 while (propertiesCurrent && *propertiesCurrent) {
512 auto propertyType = propertiesCurrent[0];
513 auto propertyValue = propertiesCurrent[1];
514 propertiesCurrent += 2;
515 if (CL_CONTEXT_PLATFORM == propertyType) {
516 Platform *pPlatform = nullptr;
517 errcode = validateObject(WithCastToInternal(reinterpret_cast<cl_platform_id>(propertyValue), &pPlatform));
518 return pPlatform;
519 }
520 }
521 return nullptr;
522 }
523
isSingleDeviceContext()524 bool Context::isSingleDeviceContext() {
525 return devices[0]->getNumGenericSubDevices() == 0 && getNumDevices() == 1;
526 }
527 } // namespace NEO
528