1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "opencl/source/context/context.h"
9 
10 #include "shared/source/built_ins/built_ins.h"
11 #include "shared/source/command_stream/command_stream_receiver.h"
12 #include "shared/source/compiler_interface/compiler_interface.h"
13 #include "shared/source/debug_settings/debug_settings_manager.h"
14 #include "shared/source/helpers/get_info.h"
15 #include "shared/source/helpers/ptr_math.h"
16 #include "shared/source/helpers/string.h"
17 #include "shared/source/memory_manager/deferred_deleter.h"
18 #include "shared/source/memory_manager/memory_manager.h"
19 #include "shared/source/memory_manager/unified_memory_manager.h"
20 
21 #include "opencl/source/cl_device/cl_device.h"
22 #include "opencl/source/command_queue/command_queue.h"
23 #include "opencl/source/device_queue/device_queue.h"
24 #include "opencl/source/execution_environment/cl_execution_environment.h"
25 #include "opencl/source/gtpin/gtpin_notify.h"
26 #include "opencl/source/helpers/get_info_status_mapper.h"
27 #include "opencl/source/helpers/surface_formats.h"
28 #include "opencl/source/mem_obj/image.h"
29 #include "opencl/source/platform/platform.h"
30 #include "opencl/source/scheduler/scheduler_kernel.h"
31 #include "opencl/source/sharings/sharing.h"
32 #include "opencl/source/sharings/sharing_factory.h"
33 
34 #include "d3d_sharing_functions.h"
35 
36 #include <algorithm>
37 #include <memory>
38 
39 namespace NEO {
40 
Context(void (CL_CALLBACK * funcNotify)(const char *,const void *,size_t,void *),void * data)41 Context::Context(
42     void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
43     void *data) {
44     contextCallback = funcNotify;
45     userData = data;
46     sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
47     schedulerBuiltIn = std::make_unique<BuiltInKernel>();
48 }
49 
~Context()50 Context::~Context() {
51     delete[] properties;
52 
53     for (auto rootDeviceIndex = 0u; rootDeviceIndex < specialQueues.size(); rootDeviceIndex++) {
54         if (specialQueues[rootDeviceIndex]) {
55             delete specialQueues[rootDeviceIndex];
56         }
57     }
58     if (svmAllocsManager) {
59         delete svmAllocsManager;
60     }
61     if (driverDiagnostics) {
62         delete driverDiagnostics;
63     }
64     if (memoryManager && memoryManager->isAsyncDeleterEnabled()) {
65         memoryManager->getDeferredDeleter()->removeClient();
66     }
67     gtpinNotifyContextDestroy((cl_context)this);
68     destructorCallbacks.invoke(this);
69     for (auto &device : devices) {
70         device->decRefInternal();
71     }
72     delete static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
73     delete schedulerBuiltIn->pProgram;
74     schedulerBuiltIn->pKernel = nullptr;
75     schedulerBuiltIn->pProgram = nullptr;
76 }
77 
setDestructorCallback(void (CL_CALLBACK * funcNotify)(cl_context,void *),void * userData)78 cl_int Context::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *),
79                                       void *userData) {
80     std::unique_lock<std::mutex> theLock(mtx);
81     destructorCallbacks.add(funcNotify, userData);
82     return CL_SUCCESS;
83 }
84 
tryGetExistingHostPtrAllocation(const void * ptr,size_t size,uint32_t rootDeviceIndex,GraphicsAllocation * & allocation,InternalMemoryType & memoryType,bool & isCpuCopyAllowed)85 cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
86                                                 size_t size,
87                                                 uint32_t rootDeviceIndex,
88                                                 GraphicsAllocation *&allocation,
89                                                 InternalMemoryType &memoryType,
90                                                 bool &isCpuCopyAllowed) {
91     cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed);
92     if (retVal != CL_SUCCESS || allocation != nullptr) {
93         return retVal;
94     }
95 
96     retVal = tryGetExistingMapAllocation(ptr, size, allocation);
97     return retVal;
98 }
99 
tryGetExistingSvmAllocation(const void * ptr,size_t size,uint32_t rootDeviceIndex,GraphicsAllocation * & allocation,InternalMemoryType & memoryType,bool & isCpuCopyAllowed)100 cl_int Context::tryGetExistingSvmAllocation(const void *ptr,
101                                             size_t size,
102                                             uint32_t rootDeviceIndex,
103                                             GraphicsAllocation *&allocation,
104                                             InternalMemoryType &memoryType,
105                                             bool &isCpuCopyAllowed) {
106     if (getSVMAllocsManager()) {
107         SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr);
108         if (svmEntry) {
109             memoryType = svmEntry->memoryType;
110             if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {
111                 return CL_INVALID_OPERATION;
112             }
113             allocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
114             if (isCpuCopyAllowed) {
115                 if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) {
116                     isCpuCopyAllowed = false;
117                 }
118             }
119         }
120     }
121     return CL_SUCCESS;
122 }
123 
tryGetExistingMapAllocation(const void * ptr,size_t size,GraphicsAllocation * & allocation)124 cl_int Context::tryGetExistingMapAllocation(const void *ptr,
125                                             size_t size,
126                                             GraphicsAllocation *&allocation) {
127     if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) {
128         if (mapInfo.graphicsAllocation) {
129             allocation = mapInfo.graphicsAllocation;
130         }
131     }
132     return CL_SUCCESS;
133 }
134 
getRootDeviceIndices() const135 const std::set<uint32_t> &Context::getRootDeviceIndices() const {
136     return rootDeviceIndices;
137 }
138 
getMaxRootDeviceIndex() const139 uint32_t Context::getMaxRootDeviceIndex() const {
140     return maxRootDeviceIndex;
141 }
142 
getDefaultDeviceQueue()143 DeviceQueue *Context::getDefaultDeviceQueue() {
144     return defaultDeviceQueue;
145 }
146 
setDefaultDeviceQueue(DeviceQueue * queue)147 void Context::setDefaultDeviceQueue(DeviceQueue *queue) {
148     defaultDeviceQueue = queue;
149 }
150 
getSpecialQueue(uint32_t rootDeviceIndex)151 CommandQueue *Context::getSpecialQueue(uint32_t rootDeviceIndex) {
152     return specialQueues[rootDeviceIndex];
153 }
154 
setSpecialQueue(CommandQueue * commandQueue,uint32_t rootDeviceIndex)155 void Context::setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex) {
156     specialQueues[rootDeviceIndex] = commandQueue;
157 }
overrideSpecialQueueAndDecrementRefCount(CommandQueue * commandQueue,uint32_t rootDeviceIndex)158 void Context::overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex) {
159     setSpecialQueue(commandQueue, rootDeviceIndex);
160     commandQueue->setIsSpecialCommandQueue(true);
161     //decrement ref count that special queue added
162     this->decRefInternal();
163 };
164 
areMultiStorageAllocationsPreferred()165 bool Context::areMultiStorageAllocationsPreferred() {
166     return this->contextType != ContextType::CONTEXT_TYPE_SPECIALIZED;
167 }
168 
createImpl(const cl_context_properties * properties,const ClDeviceVector & inputDevices,void (CL_CALLBACK * funcNotify)(const char *,const void *,size_t,void *),void * data,cl_int & errcodeRet)169 bool Context::createImpl(const cl_context_properties *properties,
170                          const ClDeviceVector &inputDevices,
171                          void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
172                          void *data, cl_int &errcodeRet) {
173 
174     auto propertiesCurrent = properties;
175     bool interopUserSync = false;
176     int32_t driverDiagnosticsUsed = -1;
177     auto sharingBuilder = sharingFactory.build();
178 
179     std::unique_ptr<DriverDiagnostics> driverDiagnostics;
180     while (propertiesCurrent && *propertiesCurrent) {
181         errcodeRet = CL_SUCCESS;
182 
183         auto propertyType = propertiesCurrent[0];
184         auto propertyValue = propertiesCurrent[1];
185         propertiesCurrent += 2;
186 
187         switch (propertyType) {
188         case CL_CONTEXT_PLATFORM:
189             break;
190         case CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL:
191             driverDiagnosticsUsed = static_cast<int32_t>(propertyValue);
192             break;
193         case CL_CONTEXT_INTEROP_USER_SYNC:
194             interopUserSync = propertyValue > 0;
195             break;
196         default:
197             if (!sharingBuilder->processProperties(propertyType, propertyValue)) {
198                 errcodeRet = CL_INVALID_PROPERTY;
199                 return false;
200             }
201             break;
202         }
203     }
204 
205     auto numProperties = ptrDiff(propertiesCurrent, properties) / sizeof(cl_context_properties);
206     cl_context_properties *propertiesNew = nullptr;
207 
208     // copy the user properties if there are any
209     if (numProperties) {
210         propertiesNew = new cl_context_properties[numProperties + 1];
211         memcpy_s(propertiesNew, (numProperties + 1) * sizeof(cl_context_properties), properties, numProperties * sizeof(cl_context_properties));
212         propertiesNew[numProperties] = 0;
213         numProperties++;
214     }
215 
216     if (DebugManager.flags.PrintDriverDiagnostics.get() != -1) {
217         driverDiagnosticsUsed = DebugManager.flags.PrintDriverDiagnostics.get();
218     }
219     if (driverDiagnosticsUsed >= 0) {
220         driverDiagnostics.reset(new DriverDiagnostics((cl_diagnostics_verbose_level)driverDiagnosticsUsed));
221     }
222 
223     this->numProperties = numProperties;
224     this->properties = propertiesNew;
225     this->setInteropUserSyncEnabled(interopUserSync);
226 
227     if (!sharingBuilder->finalizeProperties(*this, errcodeRet)) {
228         return false;
229     }
230 
231     bool containsDeviceWithSubdevices = false;
232     for (const auto &device : inputDevices) {
233         rootDeviceIndices.insert(device->getRootDeviceIndex());
234         containsDeviceWithSubdevices |= device->getNumGenericSubDevices() > 1;
235     }
236 
237     this->driverDiagnostics = driverDiagnostics.release();
238     if (rootDeviceIndices.size() > 1 && containsDeviceWithSubdevices && !DebugManager.flags.EnableMultiRootDeviceContexts.get()) {
239         DEBUG_BREAK_IF("No support for context with multiple devices with subdevices");
240         errcodeRet = CL_OUT_OF_HOST_MEMORY;
241         return false;
242     }
243 
244     devices = inputDevices;
245     for (auto &rootDeviceIndex : rootDeviceIndices) {
246         DeviceBitfield deviceBitfield{};
247         for (const auto &pDevice : devices) {
248             if (pDevice->getRootDeviceIndex() == rootDeviceIndex) {
249                 deviceBitfield |= pDevice->getDeviceBitfield();
250             }
251         }
252         deviceBitfields.insert({rootDeviceIndex, deviceBitfield});
253     }
254 
255     if (devices.size() > 0) {
256         maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less<uint32_t const>());
257         specialQueues.resize(maxRootDeviceIndex + 1u);
258         auto device = this->getDevice(0);
259         this->memoryManager = device->getMemoryManager();
260         if (memoryManager->isAsyncDeleterEnabled()) {
261             memoryManager->getDeferredDeleter()->addClient();
262         }
263 
264         bool anySvmSupport = false;
265         for (auto &device : devices) {
266             device->incRefInternal();
267             anySvmSupport |= device->getHardwareInfo().capabilityTable.ftrSvm;
268         }
269 
270         setupContextType();
271         if (anySvmSupport) {
272             this->svmAllocsManager = new SVMAllocsManager(this->memoryManager,
273                                                           this->areMultiStorageAllocationsPreferred());
274         }
275     }
276 
277     for (auto &device : devices) {
278         if (!specialQueues[device->getRootDeviceIndex()]) {
279             auto commandQueue = CommandQueue::create(this, device, nullptr, true, errcodeRet); // NOLINT
280             DEBUG_BREAK_IF(commandQueue == nullptr);
281             overrideSpecialQueueAndDecrementRefCount(commandQueue, device->getRootDeviceIndex());
282         }
283     }
284 
285     return true;
286 }
287 
getInfo(cl_context_info paramName,size_t paramValueSize,void * paramValue,size_t * paramValueSizeRet)288 cl_int Context::getInfo(cl_context_info paramName, size_t paramValueSize,
289                         void *paramValue, size_t *paramValueSizeRet) {
290     cl_int retVal;
291     size_t valueSize = GetInfo::invalidSourceSize;
292     const void *pValue = nullptr;
293     cl_uint numDevices;
294     cl_uint refCount = 0;
295     std::vector<cl_device_id> devIDs;
296     auto callGetinfo = true;
297 
298     switch (paramName) {
299     case CL_CONTEXT_DEVICES:
300         valueSize = devices.size() * sizeof(cl_device_id);
301         devices.toDeviceIDs(devIDs);
302         pValue = devIDs.data();
303         break;
304 
305     case CL_CONTEXT_NUM_DEVICES:
306         numDevices = (cl_uint)(devices.size());
307         valueSize = sizeof(numDevices);
308         pValue = &numDevices;
309         break;
310 
311     case CL_CONTEXT_PROPERTIES:
312         valueSize = this->numProperties * sizeof(cl_context_properties);
313         pValue = this->properties;
314         if (valueSize == 0) {
315             callGetinfo = false;
316         }
317 
318         break;
319 
320     case CL_CONTEXT_REFERENCE_COUNT:
321         refCount = static_cast<cl_uint>(this->getReference());
322         valueSize = sizeof(refCount);
323         pValue = &refCount;
324         break;
325 
326     default:
327         pValue = getOsContextInfo(paramName, &valueSize);
328         break;
329     }
330 
331     GetInfoStatus getInfoStatus = GetInfoStatus::SUCCESS;
332     if (callGetinfo) {
333         getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pValue, valueSize);
334     }
335 
336     retVal = changeGetInfoStatusToCLResultType(getInfoStatus);
337     GetInfo::setParamValueReturnSize(paramValueSizeRet, valueSize, getInfoStatus);
338 
339     return retVal;
340 }
341 
getNumDevices() const342 size_t Context::getNumDevices() const {
343     return devices.size();
344 }
345 
containsMultipleSubDevices(uint32_t rootDeviceIndex) const346 bool Context::containsMultipleSubDevices(uint32_t rootDeviceIndex) const {
347     return deviceBitfields.at(rootDeviceIndex).count() > 1;
348 }
349 
getDevice(size_t deviceOrdinal) const350 ClDevice *Context::getDevice(size_t deviceOrdinal) const {
351     return (ClDevice *)devices[deviceOrdinal];
352 }
353 
getSupportedImageFormats(Device * device,cl_mem_flags flags,cl_mem_object_type imageType,cl_uint numEntries,cl_image_format * imageFormats,cl_uint * numImageFormatsReturned)354 cl_int Context::getSupportedImageFormats(
355     Device *device,
356     cl_mem_flags flags,
357     cl_mem_object_type imageType,
358     cl_uint numEntries,
359     cl_image_format *imageFormats,
360     cl_uint *numImageFormatsReturned) {
361     size_t numImageFormats = 0;
362 
363     const bool nv12ExtensionEnabled = device->getSpecializedDevice<ClDevice>()->getDeviceInfo().nv12Extension;
364     const bool packedYuvExtensionEnabled = device->getSpecializedDevice<ClDevice>()->getDeviceInfo().packedYuvExtension;
365 
366     auto appendImageFormats = [&](ArrayRef<const ClSurfaceFormatInfo> formats) {
367         if (imageFormats) {
368             size_t offset = numImageFormats;
369             for (size_t i = 0; i < formats.size() && offset < numEntries; ++i) {
370                 imageFormats[offset++] = formats[i].OCLImageFormat;
371             }
372         }
373         numImageFormats += formats.size();
374     };
375 
376     if (flags & CL_MEM_READ_ONLY) {
377         if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) {
378             appendImageFormats(SurfaceFormats::readOnly20());
379         } else {
380             appendImageFormats(SurfaceFormats::readOnly12());
381         }
382         if (Image::isImage2d(imageType) && nv12ExtensionEnabled) {
383             appendImageFormats(SurfaceFormats::planarYuv());
384         }
385         if (Image::isImage2dOr2dArray(imageType)) {
386             appendImageFormats(SurfaceFormats::readOnlyDepth());
387         }
388         if (Image::isImage2d(imageType) && packedYuvExtensionEnabled) {
389             appendImageFormats(SurfaceFormats::packedYuv());
390         }
391     } else if (flags & CL_MEM_WRITE_ONLY) {
392         appendImageFormats(SurfaceFormats::writeOnly());
393         if (Image::isImage2dOr2dArray(imageType)) {
394             appendImageFormats(SurfaceFormats::readWriteDepth());
395         }
396     } else if (nv12ExtensionEnabled && (flags & CL_MEM_NO_ACCESS_INTEL)) {
397         if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) {
398             appendImageFormats(SurfaceFormats::readOnly20());
399         } else {
400             appendImageFormats(SurfaceFormats::readOnly12());
401         }
402         if (Image::isImage2d(imageType)) {
403             appendImageFormats(SurfaceFormats::planarYuv());
404         }
405     } else {
406         appendImageFormats(SurfaceFormats::readWrite());
407         if (Image::isImage2dOr2dArray(imageType)) {
408             appendImageFormats(SurfaceFormats::readWriteDepth());
409         }
410     }
411     if (numImageFormatsReturned) {
412         *numImageFormatsReturned = static_cast<cl_uint>(numImageFormats);
413     }
414     return CL_SUCCESS;
415 }
416 
getSchedulerKernel()417 SchedulerKernel &Context::getSchedulerKernel() {
418     if (schedulerBuiltIn->pKernel) {
419         return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
420     }
421 
422     auto initializeSchedulerProgramAndKernel = [&] {
423         cl_int retVal = CL_SUCCESS;
424         auto clDevice = getDevice(0);
425         auto src = SchedulerKernel::loadSchedulerKernel(&clDevice->getDevice());
426 
427         auto program = Program::createBuiltInFromGenBinary(this,
428                                                            devices,
429                                                            src.resource.data(),
430                                                            src.resource.size(),
431                                                            &retVal);
432         DEBUG_BREAK_IF(retVal != CL_SUCCESS);
433         DEBUG_BREAK_IF(!program);
434 
435         retVal = program->processGenBinary(*clDevice);
436         DEBUG_BREAK_IF(retVal != CL_SUCCESS);
437 
438         schedulerBuiltIn->pProgram = program;
439 
440         auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex());
441         DEBUG_BREAK_IF(!kernelInfo);
442 
443         schedulerBuiltIn->pKernel = Kernel::create<SchedulerKernel>(
444             schedulerBuiltIn->pProgram,
445             *kernelInfo,
446             *clDevice,
447             &retVal);
448 
449         UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
450 
451         DEBUG_BREAK_IF(retVal != CL_SUCCESS);
452     };
453     std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
454 
455     UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
456     return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
457 }
458 
isDeviceAssociated(const ClDevice & clDevice) const459 bool Context::isDeviceAssociated(const ClDevice &clDevice) const {
460     for (const auto &pDevice : devices) {
461         if (pDevice == &clDevice) {
462             return true;
463         }
464     }
465     return false;
466 }
467 
getSubDeviceByIndex(uint32_t subDeviceIndex) const468 ClDevice *Context::getSubDeviceByIndex(uint32_t subDeviceIndex) const {
469 
470     auto isExpectedSubDevice = [subDeviceIndex](ClDevice *pClDevice) -> bool {
471         bool isSubDevice = (pClDevice->getDeviceInfo().parentDevice != nullptr);
472         if (isSubDevice == false) {
473             return false;
474         }
475 
476         auto &subDevice = static_cast<SubDevice &>(pClDevice->getDevice());
477         return (subDevice.getSubDeviceIndex() == subDeviceIndex);
478     };
479 
480     auto foundDeviceIterator = std::find_if(devices.begin(), devices.end(), isExpectedSubDevice);
481     return (foundDeviceIterator != devices.end() ? *foundDeviceIterator : nullptr);
482 }
483 
getAsyncEventsHandler() const484 AsyncEventsHandler &Context::getAsyncEventsHandler() const {
485     return *static_cast<ClExecutionEnvironment *>(devices[0]->getExecutionEnvironment())->getAsyncEventsHandler();
486 }
487 
getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const488 DeviceBitfield Context::getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const {
489     return deviceBitfields.at(rootDeviceIndex);
490 }
491 
setupContextType()492 void Context::setupContextType() {
493     if (contextType == ContextType::CONTEXT_TYPE_DEFAULT) {
494         if (devices.size() > 1) {
495             for (const auto &pDevice : devices) {
496                 if (!pDevice->getDeviceInfo().parentDevice) {
497                     contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
498                     return;
499                 }
500             }
501         }
502         if (devices[0]->getDeviceInfo().parentDevice) {
503             contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
504         }
505     }
506 }
507 
getPlatformFromProperties(const cl_context_properties * properties,cl_int & errcode)508 Platform *Context::getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode) {
509     errcode = CL_SUCCESS;
510     auto propertiesCurrent = properties;
511     while (propertiesCurrent && *propertiesCurrent) {
512         auto propertyType = propertiesCurrent[0];
513         auto propertyValue = propertiesCurrent[1];
514         propertiesCurrent += 2;
515         if (CL_CONTEXT_PLATFORM == propertyType) {
516             Platform *pPlatform = nullptr;
517             errcode = validateObject(WithCastToInternal(reinterpret_cast<cl_platform_id>(propertyValue), &pPlatform));
518             return pPlatform;
519         }
520     }
521     return nullptr;
522 }
523 
isSingleDeviceContext()524 bool Context::isSingleDeviceContext() {
525     return devices[0]->getNumGenericSubDevices() == 0 && getNumDevices() == 1;
526 }
527 } // namespace NEO
528