1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "opencl/source/mem_obj/mem_obj.h"
9 
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/gmm_helper/gmm.h"
12 #include "shared/source/gmm_helper/resource_info.h"
13 #include "shared/source/helpers/aligned_memory.h"
14 #include "shared/source/helpers/bit_helpers.h"
15 #include "shared/source/helpers/get_info.h"
16 #include "shared/source/memory_manager/deferred_deleter.h"
17 #include "shared/source/memory_manager/internal_allocation_storage.h"
18 #include "shared/source/memory_manager/memory_manager.h"
19 #include "shared/source/os_interface/os_context.h"
20 
21 #include "opencl/source/cl_device/cl_device.h"
22 #include "opencl/source/command_queue/command_queue.h"
23 #include "opencl/source/context/context.h"
24 #include "opencl/source/helpers/get_info_status_mapper.h"
25 
26 #include <algorithm>
27 
28 namespace NEO {
29 
MemObj(Context * context,cl_mem_object_type memObjectType,const MemoryProperties & memoryProperties,cl_mem_flags flags,cl_mem_flags_intel flagsIntel,size_t size,void * memoryStorage,void * hostPtr,MultiGraphicsAllocation multiGraphicsAllocation,bool zeroCopy,bool isHostPtrSVM,bool isObjectRedescribed)30 MemObj::MemObj(Context *context,
31                cl_mem_object_type memObjectType,
32                const MemoryProperties &memoryProperties,
33                cl_mem_flags flags,
34                cl_mem_flags_intel flagsIntel,
35                size_t size,
36                void *memoryStorage,
37                void *hostPtr,
38                MultiGraphicsAllocation multiGraphicsAllocation,
39                bool zeroCopy,
40                bool isHostPtrSVM,
41                bool isObjectRedescribed)
42     : context(context), memObjectType(memObjectType), memoryProperties(memoryProperties), flags(flags), flagsIntel(flagsIntel), size(size),
43       memoryStorage(memoryStorage), hostPtr(hostPtr),
44       isZeroCopy(zeroCopy), isHostPtrSVM(isHostPtrSVM), isObjectRedescribed(isObjectRedescribed),
45       multiGraphicsAllocation(std::move(multiGraphicsAllocation)),
46       mapAllocations(static_cast<uint32_t>(this->multiGraphicsAllocation.getGraphicsAllocations().size() - 1)) {
47     if (context) {
48         context->incRefInternal();
49         memoryManager = context->getMemoryManager();
50         auto device = context->getDevice(0);
51         executionEnvironment = device->getExecutionEnvironment();
52     }
53 }
54 
~MemObj()55 MemObj::~MemObj() {
56     if (!context) {
57         return;
58     }
59 
60     bool needWait = false;
61 
62     if (allocatedMapPtr != nullptr) {
63         needWait = true;
64     }
65 
66     if (auto mapOperationsHandler = getMapOperationsHandlerIfExists(); mapOperationsHandler != nullptr) {
67         if (mapOperationsHandler->size() > 0 && !getCpuAddressForMapping()) {
68             needWait = true;
69         }
70         context->getMapOperationsStorage().removeHandler(this);
71     }
72 
73     if (!destructorCallbacks.empty()) {
74         needWait = true;
75     }
76 
77     if (!isObjectRedescribed) {
78         if (peekSharingHandler()) {
79             peekSharingHandler()->releaseReusedGraphicsAllocation();
80         }
81 
82         for (auto graphicsAllocation : multiGraphicsAllocation.getGraphicsAllocations()) {
83             auto rootDeviceIndex = graphicsAllocation ? graphicsAllocation->getRootDeviceIndex() : 0;
84             bool doAsyncDestructions = DebugManager.flags.EnableAsyncDestroyAllocations.get();
85             if (graphicsAllocation && !associatedMemObject && !isHostPtrSVM && graphicsAllocation->peekReuseCount() == 0) {
86                 memoryManager->removeAllocationFromHostPtrManager(graphicsAllocation);
87                 if (!doAsyncDestructions) {
88                     needWait = true;
89                 }
90                 if (needWait && graphicsAllocation->isUsed()) {
91                     memoryManager->waitForEnginesCompletion(*graphicsAllocation);
92                 }
93                 destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions);
94                 graphicsAllocation = nullptr;
95             }
96             if (!associatedMemObject) {
97                 releaseMapAllocation(rootDeviceIndex, doAsyncDestructions);
98             }
99             if (mcsAllocation) {
100                 destroyGraphicsAllocation(mcsAllocation, false);
101             }
102             if (graphicsAllocation && associatedMemObject) {
103                 if (associatedMemObject->getGraphicsAllocation(graphicsAllocation->getRootDeviceIndex()) != graphicsAllocation) {
104                     destroyGraphicsAllocation(graphicsAllocation, false);
105                 }
106                 associatedMemObject->decRefInternal();
107             }
108         }
109         if (!associatedMemObject) {
110             releaseAllocatedMapPtr();
111         }
112     }
113     destructorCallbacks.invoke(this);
114 
115     context->decRefInternal();
116 }
117 
getMemObjectInfo(cl_mem_info paramName,size_t paramValueSize,void * paramValue,size_t * paramValueSizeRet)118 cl_int MemObj::getMemObjectInfo(cl_mem_info paramName,
119                                 size_t paramValueSize,
120                                 void *paramValue,
121                                 size_t *paramValueSizeRet) {
122     cl_int retVal;
123     size_t srcParamSize = GetInfo::invalidSourceSize;
124     void *srcParam = nullptr;
125     cl_bool usesSVMPointer;
126     cl_uint refCnt = 0;
127     cl_uint mapCount = 0;
128     cl_mem clAssociatedMemObject = static_cast<cl_mem>(this->associatedMemObject);
129     cl_context ctx = nullptr;
130     uint64_t internalHandle = 0llu;
131     auto allocation = getMultiGraphicsAllocation().getDefaultGraphicsAllocation();
132     cl_bool usesCompression;
133 
134     switch (paramName) {
135     case CL_MEM_TYPE:
136         srcParamSize = sizeof(memObjectType);
137         srcParam = &memObjectType;
138         break;
139 
140     case CL_MEM_FLAGS:
141         srcParamSize = sizeof(flags);
142         srcParam = &flags;
143         break;
144 
145     case CL_MEM_SIZE:
146         srcParamSize = sizeof(size);
147         srcParam = &size;
148         break;
149 
150     case CL_MEM_HOST_PTR:
151         srcParamSize = sizeof(hostPtr);
152         srcParam = &hostPtr;
153         break;
154 
155     case CL_MEM_CONTEXT:
156         srcParamSize = sizeof(context);
157         ctx = context;
158         srcParam = &ctx;
159         break;
160 
161     case CL_MEM_USES_SVM_POINTER:
162         usesSVMPointer = isHostPtrSVM && isValueSet(flags, CL_MEM_USE_HOST_PTR);
163         srcParamSize = sizeof(cl_bool);
164         srcParam = &usesSVMPointer;
165         break;
166 
167     case CL_MEM_OFFSET:
168         srcParamSize = sizeof(offset);
169         srcParam = &offset;
170         break;
171 
172     case CL_MEM_ASSOCIATED_MEMOBJECT:
173         srcParamSize = sizeof(clAssociatedMemObject);
174         srcParam = &clAssociatedMemObject;
175         break;
176 
177     case CL_MEM_MAP_COUNT:
178         srcParamSize = sizeof(mapCount);
179         mapCount = static_cast<cl_uint>(getMapOperationsHandler().size());
180         srcParam = &mapCount;
181         break;
182 
183     case CL_MEM_REFERENCE_COUNT:
184         refCnt = static_cast<cl_uint>(this->getReference());
185         srcParamSize = sizeof(refCnt);
186         srcParam = &refCnt;
187         break;
188 
189     case CL_MEM_ALLOCATION_HANDLE_INTEL:
190         internalHandle = multiGraphicsAllocation.getDefaultGraphicsAllocation()->peekInternalHandle(this->memoryManager);
191         srcParamSize = sizeof(internalHandle);
192         srcParam = &internalHandle;
193         break;
194 
195     case CL_MEM_USES_COMPRESSION_INTEL:
196         usesCompression = allocation->isCompressionEnabled();
197         srcParam = &usesCompression;
198         srcParamSize = sizeof(cl_bool);
199         break;
200 
201     case CL_MEM_PROPERTIES:
202         srcParamSize = propertiesVector.size() * sizeof(cl_mem_properties);
203         srcParam = propertiesVector.data();
204         break;
205 
206     default:
207         getOsSpecificMemObjectInfo(paramName, &srcParamSize, &srcParam);
208         break;
209     }
210 
211     auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, srcParam, srcParamSize);
212     retVal = changeGetInfoStatusToCLResultType(getInfoStatus);
213     GetInfo::setParamValueReturnSize(paramValueSizeRet, srcParamSize, getInfoStatus);
214 
215     return retVal;
216 }
217 
setDestructorCallback(void (CL_CALLBACK * funcNotify)(cl_mem,void *),void * userData)218 cl_int MemObj::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
219                                      void *userData) {
220     std::unique_lock<std::mutex> theLock(mtx);
221     destructorCallbacks.add(funcNotify, userData);
222     return CL_SUCCESS;
223 }
224 
getCpuAddress() const225 void *MemObj::getCpuAddress() const {
226     return memoryStorage;
227 }
228 
getHostPtr() const229 void *MemObj::getHostPtr() const {
230     return hostPtr;
231 }
232 
getSize() const233 size_t MemObj::getSize() const {
234     return size;
235 }
236 
setAllocatedMapPtr(void * allocatedMapPtr)237 void MemObj::setAllocatedMapPtr(void *allocatedMapPtr) {
238     this->allocatedMapPtr = allocatedMapPtr;
239 }
240 
isMemObjZeroCopy() const241 bool MemObj::isMemObjZeroCopy() const {
242     return isZeroCopy;
243 }
244 
isMemObjWithHostPtrSVM() const245 bool MemObj::isMemObjWithHostPtrSVM() const {
246     return isHostPtrSVM;
247 }
248 
isMemObjUncacheable() const249 bool MemObj::isMemObjUncacheable() const {
250     return isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
251 }
252 
isMemObjUncacheableForSurfaceState() const253 bool MemObj::isMemObjUncacheableForSurfaceState() const {
254     return isAnyBitSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
255 }
256 
getGraphicsAllocation(uint32_t rootDeviceIndex) const257 GraphicsAllocation *MemObj::getGraphicsAllocation(uint32_t rootDeviceIndex) const {
258     return multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
259 }
260 
checkUsageAndReleaseOldAllocation(uint32_t rootDeviceIndex)261 void MemObj::checkUsageAndReleaseOldAllocation(uint32_t rootDeviceIndex) {
262     auto graphicsAllocation = getGraphicsAllocation(rootDeviceIndex);
263     if (graphicsAllocation != nullptr && (peekSharingHandler() == nullptr || graphicsAllocation->peekReuseCount() == 0)) {
264         memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
265     }
266 }
267 
resetGraphicsAllocation(GraphicsAllocation * newGraphicsAllocation)268 void MemObj::resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation) {
269     TakeOwnershipWrapper<MemObj> lock(*this);
270     checkUsageAndReleaseOldAllocation(newGraphicsAllocation->getRootDeviceIndex());
271     multiGraphicsAllocation.addAllocation(newGraphicsAllocation);
272 }
273 
removeGraphicsAllocation(uint32_t rootDeviceIndex)274 void MemObj::removeGraphicsAllocation(uint32_t rootDeviceIndex) {
275     TakeOwnershipWrapper<MemObj> lock(*this);
276     checkUsageAndReleaseOldAllocation(rootDeviceIndex);
277     multiGraphicsAllocation.removeAllocation(rootDeviceIndex);
278 }
279 
readMemObjFlagsInvalid()280 bool MemObj::readMemObjFlagsInvalid() {
281     return isValueSet(flags, CL_MEM_HOST_WRITE_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
282 }
283 
writeMemObjFlagsInvalid()284 bool MemObj::writeMemObjFlagsInvalid() {
285     return isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
286 }
287 
mapMemObjFlagsInvalid(cl_map_flags mapFlags)288 bool MemObj::mapMemObjFlagsInvalid(cl_map_flags mapFlags) {
289     return (writeMemObjFlagsInvalid() && (mapFlags & CL_MAP_WRITE)) ||
290            (readMemObjFlagsInvalid() && (mapFlags & CL_MAP_READ));
291 }
292 
setHostPtrMinSize(size_t size)293 void MemObj::setHostPtrMinSize(size_t size) {
294     hostPtrMinSize = size;
295 }
296 
getCpuAddressForMapping()297 void *MemObj::getCpuAddressForMapping() {
298     void *ptrToReturn = nullptr;
299     if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) {
300         ptrToReturn = this->hostPtr;
301     } else {
302         ptrToReturn = this->memoryStorage;
303     }
304     return ptrToReturn;
305 }
getCpuAddressForMemoryTransfer()306 void *MemObj::getCpuAddressForMemoryTransfer() {
307     void *ptrToReturn = nullptr;
308     if (isValueSet(flags, CL_MEM_USE_HOST_PTR) && this->isMemObjZeroCopy()) {
309         ptrToReturn = this->hostPtr;
310     } else {
311         ptrToReturn = this->memoryStorage;
312     }
313     return ptrToReturn;
314 }
releaseAllocatedMapPtr()315 void MemObj::releaseAllocatedMapPtr() {
316     if (allocatedMapPtr) {
317         DEBUG_BREAK_IF(isValueSet(flags, CL_MEM_USE_HOST_PTR));
318         memoryManager->freeSystemMemory(allocatedMapPtr);
319     }
320     allocatedMapPtr = nullptr;
321 }
322 
releaseMapAllocation(uint32_t rootDeviceIndex,bool asyncDestroy)323 void MemObj::releaseMapAllocation(uint32_t rootDeviceIndex, bool asyncDestroy) {
324     auto mapAllocation = mapAllocations.getGraphicsAllocation(rootDeviceIndex);
325     if (mapAllocation && !isHostPtrSVM) {
326         if (asyncDestroy && !isValueSet(flags, CL_MEM_USE_HOST_PTR)) {
327             destroyGraphicsAllocation(mapAllocation, true);
328         } else {
329             if (mapAllocation->isUsed()) {
330                 memoryManager->waitForEnginesCompletion(*mapAllocation);
331             }
332             destroyGraphicsAllocation(mapAllocation, false);
333         }
334     }
335 }
336 
destroyGraphicsAllocation(GraphicsAllocation * allocation,bool asyncDestroy)337 void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) {
338     if (asyncDestroy) {
339         memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation);
340     } else {
341         memoryManager->freeGraphicsMemory(allocation);
342     }
343 }
344 
checkIfMemoryTransferIsRequired(size_t offsetInMemObject,size_t offsetInHostPtr,const void * hostPtr,cl_command_type cmdType)345 bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {
346     auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObject);
347     auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr);
348     auto isMemTransferNeeded = !((bufferStorage == hostStorage) &&
349                                  (cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER ||
350                                   cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT ||
351                                   cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE));
352     return isMemTransferNeeded;
353 }
354 
getBasePtrForMap(uint32_t rootDeviceIndex)355 void *MemObj::getBasePtrForMap(uint32_t rootDeviceIndex) {
356     if (associatedMemObject) {
357         return associatedMemObject->getBasePtrForMap(rootDeviceIndex);
358     }
359     if (getFlags() & CL_MEM_USE_HOST_PTR) {
360         return getHostPtr();
361     } else {
362         TakeOwnershipWrapper<MemObj> memObjOwnership(*this);
363         if (getMapAllocation(rootDeviceIndex)) {
364             return getMapAllocation(rootDeviceIndex)->getUnderlyingBuffer();
365         } else {
366             auto memory = getAllocatedMapPtr();
367             if (!memory) {
368                 memory = memoryManager->allocateSystemMemory(getSize(), MemoryConstants::pageSize);
369                 setAllocatedMapPtr(memory);
370             }
371             AllocationProperties properties{rootDeviceIndex,
372                                             false, // allocateMemory
373                                             getSize(), GraphicsAllocation::AllocationType::MAP_ALLOCATION,
374                                             false, //isMultiStorageAllocation
375                                             context->getDeviceBitfieldForAllocation(rootDeviceIndex)};
376 
377             auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, memory);
378             setMapAllocation(allocation);
379             return getAllocatedMapPtr();
380         }
381     }
382 }
383 
getMapOperationsHandler()384 MapOperationsHandler &MemObj::getMapOperationsHandler() {
385     return context->getMapOperationsStorage().getHandler(this);
386 }
387 
getMapOperationsHandlerIfExists()388 MapOperationsHandler *MemObj::getMapOperationsHandlerIfExists() {
389     return context->getMapOperationsStorage().getHandlerIfExists(this);
390 }
391 
addMappedPtr(void * ptr,size_t ptrLength,cl_map_flags & mapFlags,MemObjSizeArray & size,MemObjOffsetArray & offset,uint32_t mipLevel,GraphicsAllocation * graphicsAllocation)392 bool MemObj::addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags,
393                           MemObjSizeArray &size, MemObjOffsetArray &offset,
394                           uint32_t mipLevel, GraphicsAllocation *graphicsAllocation) {
395     return getMapOperationsHandler().add(ptr, ptrLength, mapFlags, size, offset, mipLevel, graphicsAllocation);
396 }
397 
findMappedPtr(void * mappedPtr,MapInfo & outMapInfo)398 bool MemObj::findMappedPtr(void *mappedPtr, MapInfo &outMapInfo) {
399     return getMapOperationsHandler().find(mappedPtr, outMapInfo);
400 }
401 
removeMappedPtr(void * mappedPtr)402 void MemObj::removeMappedPtr(void *mappedPtr) {
403     getMapOperationsHandler().remove(mappedPtr);
404 }
405 
isTiledAllocation() const406 bool MemObj::isTiledAllocation() const {
407     auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
408     auto gmm = graphicsAllocation->getDefaultGmm();
409     return gmm && (gmm->gmmResourceInfo->getTileModeSurfaceState() != 0);
410 }
411 
mappingOnCpuAllowed() const412 bool MemObj::mappingOnCpuAllowed() const {
413     auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
414     return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() &&
415            !graphicsAllocation->isCompressionEnabled() && MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
416 }
417 
storeProperties(const cl_mem_properties * properties)418 void MemObj::storeProperties(const cl_mem_properties *properties) {
419     if (properties) {
420         for (size_t i = 0; properties[i] != 0; i += 2) {
421             propertiesVector.push_back(properties[i]);
422             propertiesVector.push_back(properties[i + 1]);
423         }
424         propertiesVector.push_back(0);
425     }
426 }
427 
cleanAllGraphicsAllocations(Context & context,MemoryManager & memoryManager,AllocationInfoType & allocationInfo,bool isParentObject)428 void MemObj::cleanAllGraphicsAllocations(Context &context, MemoryManager &memoryManager, AllocationInfoType &allocationInfo, bool isParentObject) {
429     if (!isParentObject) {
430         for (auto &index : context.getRootDeviceIndices()) {
431             if (allocationInfo[index].memory) {
432                 memoryManager.removeAllocationFromHostPtrManager(allocationInfo[index].memory);
433                 memoryManager.freeGraphicsMemory(allocationInfo[index].memory);
434             }
435         }
436     }
437 }
438 
439 } // namespace NEO
440